← Index
NYTProf Performance Profile   « line view »
For /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/bin/perlcritic
  Run on Sat Mar 19 22:12:22 2016
Reported on Sat Mar 19 22:14:10 2016

Filename/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/Pod/Simple/BlackBox.pm
StatementsExecuted 17 statements in 7.67ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11113µs14µsPod::Simple::BlackBox::::BEGIN@22Pod::Simple::BlackBox::BEGIN@22
11111µs14µsPod::Simple::BlackBox::::BEGIN@1183Pod::Simple::BlackBox::BEGIN@1183
1116µs23µsPod::Simple::BlackBox::::BEGIN@25Pod::Simple::BlackBox::BEGIN@25
1116µs16µsPod::Simple::BlackBox::::BEGIN@23Pod::Simple::BlackBox::BEGIN@23
1114µs4µsPod::Simple::BlackBox::::BEGIN@28Pod::Simple::BlackBox::BEGIN@28
1113µs3µsPod::Simple::BlackBox::::BEGIN@24Pod::Simple::BlackBox::BEGIN@24
0000s0sPod::Simple::BlackBox::::_closers_for_all_curr_openPod::Simple::BlackBox::_closers_for_all_curr_open
0000s0sPod::Simple::BlackBox::::_dump_curr_openPod::Simple::BlackBox::_dump_curr_open
0000s0sPod::Simple::BlackBox::::_gen_errataPod::Simple::BlackBox::_gen_errata
0000s0sPod::Simple::BlackBox::::_handle_encoding_linePod::Simple::BlackBox::_handle_encoding_line
0000s0sPod::Simple::BlackBox::::_handle_encoding_second_levelPod::Simple::BlackBox::_handle_encoding_second_level
0000s0sPod::Simple::BlackBox::::_ponder_DataPod::Simple::BlackBox::_ponder_Data
0000s0sPod::Simple::BlackBox::::_ponder_PlainPod::Simple::BlackBox::_ponder_Plain
0000s0sPod::Simple::BlackBox::::_ponder_VerbatimPod::Simple::BlackBox::_ponder_Verbatim
0000s0sPod::Simple::BlackBox::::_ponder_backPod::Simple::BlackBox::_ponder_back
0000s0sPod::Simple::BlackBox::::_ponder_beginPod::Simple::BlackBox::_ponder_begin
0000s0sPod::Simple::BlackBox::::_ponder_doc_endPod::Simple::BlackBox::_ponder_doc_end
0000s0sPod::Simple::BlackBox::::_ponder_endPod::Simple::BlackBox::_ponder_end
0000s0sPod::Simple::BlackBox::::_ponder_forPod::Simple::BlackBox::_ponder_for
0000s0sPod::Simple::BlackBox::::_ponder_itemPod::Simple::BlackBox::_ponder_item
0000s0sPod::Simple::BlackBox::::_ponder_overPod::Simple::BlackBox::_ponder_over
0000s0sPod::Simple::BlackBox::::_ponder_paragraph_bufferPod::Simple::BlackBox::_ponder_paragraph_buffer
0000s0sPod::Simple::BlackBox::::_ponder_podPod::Simple::BlackBox::_ponder_pod
0000s0sPod::Simple::BlackBox::::_stringify_lolPod::Simple::BlackBox::_stringify_lol
0000s0sPod::Simple::BlackBox::::_traverse_treelet_bitPod::Simple::BlackBox::_traverse_treelet_bit
0000s0sPod::Simple::BlackBox::::_treelet_from_formatting_codesPod::Simple::BlackBox::_treelet_from_formatting_codes
0000s0sPod::Simple::BlackBox::::_verbatim_formatPod::Simple::BlackBox::_verbatim_format
0000s0sPod::Simple::BlackBox::::parse_linePod::Simple::BlackBox::parse_line
0000s0sPod::Simple::BlackBox::::parse_linesPod::Simple::BlackBox::parse_lines
0000s0sPod::Simple::BlackBox::::prettyPod::Simple::BlackBox::pretty
0000s0sPod::Simple::BlackBox::::reinitPod::Simple::BlackBox::reinit
0000s0sPod::Simple::BlackBox::::stringify_lolPod::Simple::BlackBox::stringify_lol
0000s0sPod::Simple::BlackBox::::text_content_of_treeletPod::Simple::BlackBox::text_content_of_treelet
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1
2package Pod::Simple::BlackBox;
3#
4# "What's in the box?" "Pain."
5#
6###########################################################################
7#
8# This is where all the scary things happen: parsing lines into
9# paragraphs; and then into directives, verbatims, and then also
10# turning formatting sequences into treelets.
11#
12# Are you really sure you want to read this code?
13#
14#-----------------------------------------------------------------------------
15#
16# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18# to call the proper callbacks on the treelets.
19#
20# Every node in a treelet is a ['name', {attrhash}, ...children...]
21
22219µs216µs
# spent 14µs (13+2) within Pod::Simple::BlackBox::BEGIN@22 which was called: # once (13µs+2µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22
use integer; # vroom!
# spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@22 # spent 2µs making 1 call to integer::import
23218µs227µs
# spent 16µs (6+10) within Pod::Simple::BlackBox::BEGIN@23 which was called: # once (6µs+10µs) by Pod::Simple::LinkSection::BEGIN@9 at line 23
use strict;
# spent 16µs making 1 call to Pod::Simple::BlackBox::BEGIN@23 # spent 10µs making 1 call to strict::import
24218µs13µs
# spent 3µs within Pod::Simple::BlackBox::BEGIN@24 which was called: # once (3µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 24
use Carp ();
# spent 3µs making 1 call to Pod::Simple::BlackBox::BEGIN@24
25237µs240µs
# spent 23µs (6+17) within Pod::Simple::BlackBox::BEGIN@25 which was called: # once (6µs+17µs) by Pod::Simple::LinkSection::BEGIN@9 at line 25
use vars qw($VERSION );
# spent 23µs making 1 call to Pod::Simple::BlackBox::BEGIN@25 # spent 17µs making 1 call to vars::import
261800ns$VERSION = '3.28';
27#use constant DEBUG => 7;
28
# spent 4µs within Pod::Simple::BlackBox::BEGIN@28 which was called: # once (4µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 31
BEGIN {
291200ns require Pod::Simple;
3014µs *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
3114.30ms14µs}
# spent 4µs making 1 call to Pod::Simple::BlackBox::BEGIN@28
32
33#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
34
35sub parse_line { shift->parse_lines(@_) } # alias
36
37# - - - Turn back now! Run away! - - -
38
39sub parse_lines { # Usage: $parser->parse_lines(@lines)
40 # an undef means end-of-stream
41 my $self = shift;
42
43 my $code_handler = $self->{'code_handler'};
44 my $cut_handler = $self->{'cut_handler'};
45 my $wl_handler = $self->{'whiteline_handler'};
46 $self->{'line_count'} ||= 0;
47
48 my $scratch;
49
50 DEBUG > 4 and
51 print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
52
53 DEBUG > 5 and
54 print "# About to parse lines: ",
55 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
56
57 my $paras = ($self->{'paras'} ||= []);
58 # paragraph buffer. Because we need to defer processing of =over
59 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
60 # to process this.
61
62 $self->{'pod_para_count'} ||= 0;
63
64 my $line;
65 foreach my $source_line (@_) {
66 if( $self->{'source_dead'} ) {
67 DEBUG > 4 and print "# Source is dead.\n";
68 last;
69 }
70
71 unless( defined $source_line ) {
72 DEBUG > 4 and print "# Undef-line seen.\n";
73
74 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
75 push @$paras, $paras->[-1], $paras->[-1];
76 # So that it definitely fills the buffer.
77 $self->{'source_dead'} = 1;
78 $self->_ponder_paragraph_buffer;
79 next;
80 }
81
82
83 if( $self->{'line_count'}++ ) {
84 ($line = $source_line) =~ tr/\n\r//d;
85 # If we don't have two vars, we'll end up with that there
86 # tr/// modding the (potentially read-only) original source line!
87
88 } else {
89 DEBUG > 2 and print "First line: [$source_line]\n";
90
91 if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
92 DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
93 $self->_handle_encoding_line( "=encoding utf8" );
94 delete $self->{'_processed_encoding'};
95 $line =~ tr/\n\r//d;
96
97 } elsif( $line =~ s/^\xFE\xFF//s ) {
98 DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
99 $self->scream(
100 $self->{'line_count'},
101 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
102 );
103 splice @_;
104 push @_, undef;
105 next;
106
107 # TODO: implement somehow?
108
109 } elsif( $line =~ s/^\xFF\xFE//s ) {
110 DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
111 $self->scream(
112 $self->{'line_count'},
113 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
114 );
115 splice @_;
116 push @_, undef;
117 next;
118
119 # TODO: implement somehow?
120
121 } else {
122 DEBUG > 2 and print "First line is BOM-less.\n";
123 ($line = $source_line) =~ tr/\n\r//d;
124 }
125 }
126
127 # Try to guess encoding. Inlined for performance reasons.
128 if(!$self->{'parse_characters'} && !$self->{'encoding'}
129 && ($self->{'in_pod'} || $line =~ /^=/s)
130 && $line =~ /[^\x00-\x7f]/
131 ) {
132 my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
133 $self->_handle_encoding_line( "=encoding $encoding" );
134 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
135
136 my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;
137
138 $self->whine(
139 $self->{'line_count'},
140 "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
141 );
142 }
143
144 DEBUG > 5 and print "# Parsing line: [$line]\n";
145
146 if(!$self->{'in_pod'}) {
147 if($line =~ m/^=([a-zA-Z]+)/s) {
148 if($1 eq 'cut') {
149 $self->scream(
150 $self->{'line_count'},
151 "=cut found outside a pod block. Skipping to next block."
152 );
153
154 ## Before there were errata sections in the world, it was
155 ## least-pessimal to abort processing the file. But now we can
156 ## just barrel on thru (but still not start a pod block).
157 #splice @_;
158 #push @_, undef;
159
160 next;
161 } else {
162 $self->{'in_pod'} = $self->{'start_of_pod_block'}
163 = $self->{'last_was_blank'} = 1;
164 # And fall thru to the pod-mode block further down
165 }
166 } else {
167 DEBUG > 5 and print "# It's a code-line.\n";
168 $code_handler->(map $_, $line, $self->{'line_count'}, $self)
169 if $code_handler;
170 # Note: this may cause code to be processed out of order relative
171 # to pods, but in order relative to cuts.
172
173 # Note also that we haven't yet applied the transcoding to $line
174 # by time we call $code_handler!
175
176 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
177 # That RE is from perlsyn, section "Plain Old Comments (Not!)",
178 #$fname = $2 if defined $2;
179 #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
180 DEBUG > 1 and print "# Setting nextline to $1\n";
181 $self->{'line_count'} = $1 - 1;
182 }
183
184 next;
185 }
186 }
187
188 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
189 # Else we're in pod mode:
190
191 # Apply any necessary transcoding:
192 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
193
194 # HERE WE CATCH =encoding EARLY!
195 if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
196 next if $self->parse_characters; # Ignore this line
197 $line = $self->_handle_encoding_line( $line );
198 }
199
200 if($line =~ m/^=cut/s) {
201 # here ends the pod block, and therefore the previous pod para
202 DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
203 $self->{'in_pod'} = 0;
204 # ++$self->{'pod_para_count'};
205 $self->_ponder_paragraph_buffer();
206 # by now it's safe to consider the previous paragraph as done.
207 $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
208 if $cut_handler;
209
210 # TODO: add to docs: Note: this may cause cuts to be processed out
211 # of order relative to pods, but in order relative to code.
212
213 } elsif($line =~ m/^(\s*)$/s) { # it's a blank line
214 if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
215 $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
216 if $wl_handler;
217 }
218
219 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
220 DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
221 push @{$paras->[-1]}, $line;
222 } # otherwise it's not interesting
223
224 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
225 DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
226 }
227
228 $self->{'last_was_blank'} = 1;
229
230 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
231
232 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
233 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
234 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
235 # Note that in "=head1 foo", the WS is lost.
236 # Example: ['=head1', {'start_line' => 123}, ' foo']
237
238 ++$self->{'pod_para_count'};
239
240 $self->_ponder_paragraph_buffer();
241 # by now it's safe to consider the previous paragraph as done.
242
243 push @$paras, $new; # the new incipient paragraph
244 DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
245
246 } elsif($line =~ m/^\s/s) {
247
248 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
249 DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
250 push @{$paras->[-1]}, $line;
251 } else {
252 ++$self->{'pod_para_count'};
253 $self->_ponder_paragraph_buffer();
254 # by now it's safe to consider the previous paragraph as done.
255 DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
256 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
257 }
258 } else {
259 ++$self->{'pod_para_count'};
260 $self->_ponder_paragraph_buffer();
261 # by now it's safe to consider the previous paragraph as done.
262 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
263 DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
264 }
265 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
266
267 } else {
268 # It's a non-blank line /continuing/ the current para
269 if(@$paras) {
270 DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
271 push @{$paras->[-1]}, $line;
272 } else {
273 # Unexpected case!
274 die "Continuing a paragraph but \@\$paras is empty?";
275 }
276 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
277 }
278
279 } # ends the big while loop
280
281 DEBUG > 1 and print(pretty(@$paras), "\n");
282 return $self;
283}
284
285#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
286
287sub _handle_encoding_line {
288 my($self, $line) = @_;
289
290 return if $self->parse_characters;
291
292 # The point of this routine is to set $self->{'_transcoder'} as indicated.
293
294 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
295 DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
296
297 my $e = $1;
298 my $orig = $e;
299 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
300
301 my $enc_error;
302
303 # Cf. perldoc Encode and perldoc Encode::Supported
304
305 require Pod::Simple::Transcode;
306
307 if( $self->{'encoding'} ) {
308 my $norm_current = $self->{'encoding'};
309 my $norm_e = $e;
310 foreach my $that ($norm_current, $norm_e) {
311 $that = lc($that);
312 $that =~ s/[-_]//g;
313 }
314 if($norm_current eq $norm_e) {
315 DEBUG > 1 and print "The '=encoding $orig' line is ",
316 "redundant. ($norm_current eq $norm_e). Ignoring.\n";
317 $enc_error = '';
318 # But that doesn't necessarily mean that the earlier one went okay
319 } else {
320 $enc_error = "Encoding is already set to " . $self->{'encoding'};
321 DEBUG > 1 and print $enc_error;
322 }
323 } elsif (
324 # OK, let's turn on the encoding
325 do {
326 DEBUG > 1 and print " Setting encoding to $e\n";
327 $self->{'encoding'} = $e;
328 1;
329 }
330 and $e eq 'HACKRAW'
331 ) {
332 DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
333
334 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
335
336 die($enc_error = "WHAT? _transcoder is already set?!")
337 if $self->{'_transcoder'}; # should never happen
338 require Pod::Simple::Transcode;
339 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
340 eval {
341 my @x = ('', "abc", "123");
342 $self->{'_transcoder'}->(@x);
343 };
344 $@ && die( $enc_error =
345 "Really unexpected error setting up encoding $e: $@\nAborting"
346 );
347 $self->{'detected_encoding'} = $e;
348
349 } else {
350 my @supported = Pod::Simple::Transcode::->all_encodings;
351
352 # Note unsupported, and complain
353 DEBUG and print " Encoding [$e] is unsupported.",
354 "\nSupporteds: @supported\n";
355 my $suggestion = '';
356
357 # Look for a near match:
358 my $norm = lc($e);
359 $norm =~ tr[-_][]d;
360 my $n;
361 foreach my $enc (@supported) {
362 $n = lc($enc);
363 $n =~ tr[-_][]d;
364 next unless $n eq $norm;
365 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
366 last;
367 }
368 my $encmodver = Pod::Simple::Transcode::->encmodver;
369 $enc_error = join '' =>
370 "This document probably does not appear as it should, because its ",
371 "\"=encoding $e\" line calls for an unsupported encoding.",
372 $suggestion, " [$encmodver\'s supported encodings are: @supported]"
373 ;
374
375 $self->scream( $self->{'line_count'}, $enc_error );
376 }
377 push @{ $self->{'encoding_command_statuses'} }, $enc_error;
378 if (defined($self->{'_processed_encoding'})) {
379 # Should never happen
380 die "Nested processed encoding.";
381 }
382 $self->{'_processed_encoding'} = $orig;
383
384 return $line;
385}
386
387# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
388
389sub _handle_encoding_second_level {
390 # By time this is called, the encoding (if well formed) will already
391 # have been acted one.
392 my($self, $para) = @_;
393 my @x = @$para;
394 my $content = join ' ', splice @x, 2;
395 $content =~ s/^\s+//s;
396 $content =~ s/\s+$//s;
397
398 DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
399
400 if (defined($self->{'_processed_encoding'})) {
401 #if($content ne $self->{'_processed_encoding'}) {
402 # Could it happen?
403 #}
404 delete $self->{'_processed_encoding'};
405 # It's already been handled. Check for errors.
406 if(! $self->{'encoding_command_statuses'} ) {
407 DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
408 } elsif( $self->{'encoding_command_statuses'}[-1] ) {
409 $self->whine( $para->[1]{'start_line'},
410 sprintf "Couldn't do %s: %s",
411 $self->{'encoding_command_reqs' }[-1],
412 $self->{'encoding_command_statuses'}[-1],
413 );
414 } else {
415 DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
416 }
417
418 } else {
419 # Otherwise it's a syntax error
420 $self->whine( $para->[1]{'start_line'},
421 "Invalid =encoding syntax: $content"
422 );
423 }
424
425 return;
426}
427
428#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
429
430{
43121µsmy $m = -321; # magic line number
432
433sub _gen_errata {
434 my $self = $_[0];
435 # Return 0 or more fake-o paragraphs explaining the accumulated
436 # errors on this document.
437
438 return() unless $self->{'errata'} and keys %{$self->{'errata'}};
439
440 my @out;
441
442 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
443 push @out,
444 ['=item', {'start_line' => $m}, "Around line $line:"],
445 map( ['~Para', {'start_line' => $m, '~cooked' => 1},
446 #['~Top', {'start_line' => $m},
447 $_
448 #]
449 ],
450 @{$self->{'errata'}{$line}}
451 )
452 ;
453 }
454
455 # TODO: report of unknown entities? unrenderable characters?
456
457 unshift @out,
458 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
459 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
460 "Hey! ",
461 ['B', {},
462 'The above document had some coding errors, which are explained below:'
463 ]
464 ],
465 ['=over', {'start_line' => $m, 'errata' => 1}, ''],
466 ;
467
468 push @out,
469 ['=back', {'start_line' => $m, 'errata' => 1}, ''],
470 ;
471
472 DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
473
474 return @out;
475}
476
477}
478
479#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
480
481##############################################################################
482##
483## stop reading now stop reading now stop reading now stop reading now stop
484##
485## HERE IT BECOMES REALLY SCARY
486##
487## stop reading now stop reading now stop reading now stop reading now stop
488##
489##############################################################################
490
491sub _ponder_paragraph_buffer {
492
493 # Para-token types as found in the buffer.
494 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
495 # =over, =back, =item
496 # and the null =pod (to be complained about if over one line)
497 #
498 # "~data" paragraphs are something we generate at this level, depending on
499 # a currently open =over region
500
501 # Events fired: Begin and end for:
502 # directivename (like head1 .. head4), item, extend,
503 # for (from =begin...=end, =for),
504 # over-bullet, over-number, over-text, over-block,
505 # item-bullet, item-number, item-text,
506 # Document,
507 # Data, Para, Verbatim
508 # B, C, longdirname (TODO -- wha?), etc. for all directives
509 #
510
511 my $self = $_[0];
512 my $paras;
513 return unless @{$paras = $self->{'paras'}};
514 my $curr_open = ($self->{'curr_open'} ||= []);
515
516 my $scratch;
517
518 DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
519
520 # We have something in our buffer. So apparently the document has started.
521 unless($self->{'doc_has_started'}) {
522 $self->{'doc_has_started'} = 1;
523
524 my $starting_contentless;
525 $starting_contentless =
526 (
527 !@$curr_open
528 and @$paras and ! grep $_->[0] ne '~end', @$paras
529 # i.e., if the paras is all ~ends
530 )
531 ;
532 DEBUG and print "# Starting ",
533 $starting_contentless ? 'contentless' : 'contentful',
534 " document\n"
535 ;
536
537 $self->_handle_element_start(
538 ($scratch = 'Document'),
539 {
540 'start_line' => $paras->[0][1]{'start_line'},
541 $starting_contentless ? ( 'contentless' => 1 ) : (),
542 },
543 );
544 }
545
546 my($para, $para_type);
547 while(@$paras) {
548 last if @$paras == 1 and
549 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
550 or $paras->[0][0] eq '=item' )
551 ;
552 # Those're the three kinds of paragraphs that require lookahead.
553 # Actually, an "=item Foo" inside an <over type=text> region
554 # and any =item inside an <over type=block> region (rare)
555 # don't require any lookahead, but all others (bullets
556 # and numbers) do.
557
558# TODO: whinge about many kinds of directives in non-resolving =for regions?
559# TODO: many? like what? =head1 etc?
560
561 $para = shift @$paras;
562 $para_type = $para->[0];
563
564 DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
565 $self->_dump_curr_open(), ")\n";
566
567 if($para_type eq '=for') {
568 next if $self->_ponder_for($para,$curr_open,$paras);
569
570 } elsif($para_type eq '=begin') {
571 next if $self->_ponder_begin($para,$curr_open,$paras);
572
573 } elsif($para_type eq '=end') {
574 next if $self->_ponder_end($para,$curr_open,$paras);
575
576 } elsif($para_type eq '~end') { # The virtual end-document signal
577 next if $self->_ponder_doc_end($para,$curr_open,$paras);
578 }
579
580
581 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
582 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
583 if(grep $_->[1]{'~ignore'}, @$curr_open) {
584 DEBUG > 1 and
585 print "Skipping $para_type paragraph because in ignore mode.\n";
586 next;
587 }
588 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
589 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
590
591 if($para_type eq '=pod') {
592 $self->_ponder_pod($para,$curr_open,$paras);
593
594 } elsif($para_type eq '=over') {
595 next if $self->_ponder_over($para,$curr_open,$paras);
596
597 } elsif($para_type eq '=back') {
598 next if $self->_ponder_back($para,$curr_open,$paras);
599
600 } else {
601
602 # All non-magical codes!!!
603
604 # Here we start using $para_type for our own twisted purposes, to
605 # mean how it should get treated, not as what the element name
606 # should be.
607
608 DEBUG > 1 and print "Pondering non-magical $para_type\n";
609
610 my $i;
611
612 # Enforce some =headN discipline
613 if($para_type =~ m/^=head\d$/s
614 and ! $self->{'accept_heads_anywhere'}
615 and @$curr_open
616 and $curr_open->[-1][0] eq '=over'
617 ) {
618 DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
619 $self->whine(
620 $para->[1]{'start_line'},
621 "You forgot a '=back' before '$para_type'"
622 );
623 unshift @$paras, ['=back', {}, ''], $para; # close the =over
624 next;
625 }
626
627
628 if($para_type eq '=item') {
629
630 my $over;
631 unless(@$curr_open and
632 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
633 $self->whine(
634 $para->[1]{'start_line'},
635 "'=item' outside of any '=over'"
636 );
637 unshift @$paras,
638 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
639 $para
640 ;
641 next;
642 }
643
644
645 my $over_type = $over->[1]{'~type'};
646
647 if(!$over_type) {
648 # Shouldn't happen1
649 die "Typeless over in stack, starting at line "
650 . $over->[1]{'start_line'};
651
652 } elsif($over_type eq 'block') {
653 unless($curr_open->[-1][1]{'~bitched_about'}) {
654 $curr_open->[-1][1]{'~bitched_about'} = 1;
655 $self->whine(
656 $curr_open->[-1][1]{'start_line'},
657 "You can't have =items (as at line "
658 . $para->[1]{'start_line'}
659 . ") unless the first thing after the =over is an =item"
660 );
661 }
662 # Just turn it into a paragraph and reconsider it
663 $para->[0] = '~Para';
664 unshift @$paras, $para;
665 next;
666
667 } elsif($over_type eq 'text') {
668 my $item_type = $self->_get_item_type($para);
669 # That kills the content of the item if it's a number or bullet.
670 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
671
672 if($item_type eq 'text') {
673 # Nothing special needs doing for 'text'
674 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
675 $self->whine(
676 $para->[1]{'start_line'},
677 "Expected text after =item, not a $item_type"
678 );
679 # Undo our clobbering:
680 push @$para, $para->[1]{'~orig_content'};
681 delete $para->[1]{'number'};
682 # Only a PROPER item-number element is allowed
683 # to have a number attribute.
684 } else {
685 die "Unhandled item type $item_type"; # should never happen
686 }
687
688 # =item-text thingies don't need any assimilation, it seems.
689
690 } elsif($over_type eq 'number') {
691 my $item_type = $self->_get_item_type($para);
692 # That kills the content of the item if it's a number or bullet.
693 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
694
695 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
696
697 if($item_type eq 'bullet') {
698 # Hm, it's not numeric. Correct for this.
699 $para->[1]{'number'} = $expected_value;
700 $self->whine(
701 $para->[1]{'start_line'},
702 "Expected '=item $expected_value'"
703 );
704 push @$para, $para->[1]{'~orig_content'};
705 # restore the bullet, blocking the assimilation of next para
706
707 } elsif($item_type eq 'text') {
708 # Hm, it's not numeric. Correct for this.
709 $para->[1]{'number'} = $expected_value;
710 $self->whine(
711 $para->[1]{'start_line'},
712 "Expected '=item $expected_value'"
713 );
714 # Text content will still be there and will block next ~Para
715
716 } elsif($item_type ne 'number') {
717 die "Unknown item type $item_type"; # should never happen
718
719 } elsif($expected_value == $para->[1]{'number'}) {
720 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
721
722 } else {
723 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
724 " instead of the expected value of $expected_value\n";
725 $self->whine(
726 $para->[1]{'start_line'},
727 "You have '=item " . $para->[1]{'number'} .
728 "' instead of the expected '=item $expected_value'"
729 );
730 $para->[1]{'number'} = $expected_value; # correcting!!
731 }
732
733 if(@$para == 2) {
734 # For the cases where we /didn't/ push to @$para
735 if($paras->[0][0] eq '~Para') {
736 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
737 push @$para, splice @{shift @$paras},2;
738 } else {
739 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
740 push @$para, ''; # Just so it's not contentless
741 }
742 }
743
744
745 } elsif($over_type eq 'bullet') {
746 my $item_type = $self->_get_item_type($para);
747 # That kills the content of the item if it's a number or bullet.
748 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
749
750 if($item_type eq 'bullet') {
751 # as expected!
752
753 if( $para->[1]{'~_freaky_para_hack'} ) {
754 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
755 push @$para, delete $para->[1]{'~_freaky_para_hack'};
756 }
757
758 } elsif($item_type eq 'number') {
759 $self->whine(
760 $para->[1]{'start_line'},
761 "Expected '=item *'"
762 );
763 push @$para, $para->[1]{'~orig_content'};
764 # and block assimilation of the next paragraph
765 delete $para->[1]{'number'};
766 # Only a PROPER item-number element is allowed
767 # to have a number attribute.
768 } elsif($item_type eq 'text') {
769 $self->whine(
770 $para->[1]{'start_line'},
771 "Expected '=item *'"
772 );
773 # But doesn't need processing. But it'll block assimilation
774 # of the next para.
775 } else {
776 die "Unhandled item type $item_type"; # should never happen
777 }
778
779 if(@$para == 2) {
780 # For the cases where we /didn't/ push to @$para
781 if($paras->[0][0] eq '~Para') {
782 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
783 push @$para, splice @{shift @$paras},2;
784 } else {
785 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
786 push @$para, ''; # Just so it's not contentless
787 }
788 }
789
790 } else {
791 die "Unhandled =over type \"$over_type\"?";
792 # Shouldn't happen!
793 }
794
795 $para_type = 'Plain';
796 $para->[0] .= '-' . $over_type;
797 # Whew. Now fall thru and process it.
798
799
800 } elsif($para_type eq '=extend') {
801 # Well, might as well implement it here.
802 $self->_ponder_extend($para);
803 next; # and skip
804 } elsif($para_type eq '=encoding') {
805 # Not actually acted on here, but we catch errors here.
806 $self->_handle_encoding_second_level($para);
807 next unless $self->keep_encoding_directive;
808 $para_type = 'Plain';
809 } elsif($para_type eq '~Verbatim') {
810 $para->[0] = 'Verbatim';
811 $para_type = '?Verbatim';
812 } elsif($para_type eq '~Para') {
813 $para->[0] = 'Para';
814 $para_type = '?Plain';
815 } elsif($para_type eq 'Data') {
816 $para->[0] = 'Data';
817 $para_type = '?Data';
818 } elsif( $para_type =~ s/^=//s
819 and defined( $para_type = $self->{'accept_directives'}{$para_type} )
820 ) {
821 DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
822 } else {
823 # An unknown directive!
824 DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
825 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
826 ;
827 $self->whine(
828 $para->[1]{'start_line'},
829 "Unknown directive: $para->[0]"
830 );
831
832 # And maybe treat it as text instead of just letting it go?
833 next;
834 }
835
836 if($para_type =~ s/^\?//s) {
837 if(! @$curr_open) { # usual case
838 DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
839 } else {
840 my @fors = grep $_->[0] eq '=for', @$curr_open;
841 DEBUG > 1 and print "Containing fors: ",
842 join(',', map $_->[1]{'target'}, @fors), "\n";
843
844 if(! @fors) {
845 DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
846
847 #} elsif(grep $_->[1]{'~resolve'}, @fors) {
848 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
849 } elsif( $fors[-1][1]{'~resolve'} ) {
850 # Look to the immediately containing for
851
852 if($para_type eq 'Data') {
853 DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
854 $para->[0] = 'Para';
855 $para_type = 'Plain';
856 } else {
857 DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
858 }
859 } else {
860 DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
861 $para->[0] = $para_type = 'Data';
862 }
863 }
864 }
865
866 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
867 if($para_type eq 'Plain') {
868 $self->_ponder_Plain($para);
869 } elsif($para_type eq 'Verbatim') {
870 $self->_ponder_Verbatim($para);
871 } elsif($para_type eq 'Data') {
872 $self->_ponder_Data($para);
873 } else {
874 die "\$para type is $para_type -- how did that happen?";
875 # Shouldn't happen.
876 }
877
878 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
879 $para->[0] =~ s/^[~=]//s;
880
881 DEBUG and print "\n", pretty($para), "\n";
882
883 # traverse the treelet (which might well be just one string scalar)
884 $self->{'content_seen'} ||= 1;
885 $self->_traverse_treelet_bit(@$para);
886 }
887 }
888
889 return;
890}
891
892###########################################################################
893# The sub-ponderers...
894
- -
897sub _ponder_for {
898 my ($self,$para,$curr_open,$paras) = @_;
899
900 # Fake it out as a begin/end
901 my $target;
902
903 if(grep $_->[1]{'~ignore'}, @$curr_open) {
904 DEBUG > 1 and print "Ignoring ignorable =for\n";
905 return 1;
906 }
907
908 for(my $i = 2; $i < @$para; ++$i) {
909 if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
910 $target = $1;
911 last;
912 }
913 }
914 unless(defined $target) {
915 $self->whine(
916 $para->[1]{'start_line'},
917 "=for without a target?"
918 );
919 return 1;
920 }
921 DEBUG > 1 and
922 print "Faking out a =for $target as a =begin $target / =end $target\n";
923
924 $para->[0] = 'Data';
925
926 unshift @$paras,
927 ['=begin',
928 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
929 $target,
930 ],
931 $para,
932 ['=end',
933 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
934 $target,
935 ],
936 ;
937
938 return 1;
939}
940
941sub _ponder_begin {
942 my ($self,$para,$curr_open,$paras) = @_;
943 my $content = join ' ', splice @$para, 2;
944 $content =~ s/^\s+//s;
945 $content =~ s/\s+$//s;
946 unless(length($content)) {
947 $self->whine(
948 $para->[1]{'start_line'},
949 "=begin without a target?"
950 );
951 DEBUG and print "Ignoring targetless =begin\n";
952 return 1;
953 }
954
955 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
956 $para->[1]{'title'} = $title if ($title);
957 $para->[1]{'target'} = $target; # without any ':'
958 $content = $target; # strip off the title
959
960 $content =~ s/^:!/!:/s;
961 my $neg; # whether this is a negation-match
962 $neg = 1 if $content =~ s/^!//s;
963 my $to_resolve; # whether to process formatting codes
964 $to_resolve = 1 if $content =~ s/^://s;
965
966 my $dont_ignore; # whether this target matches us
967
968 foreach my $target_name (
969 split(',', $content, -1),
970 $neg ? () : '*'
971 ) {
972 DEBUG > 2 and
973 print " Considering whether =begin $content matches $target_name\n";
974 next unless $self->{'accept_targets'}{$target_name};
975
976 DEBUG > 2 and
977 print " It DOES match the acceptable target $target_name!\n";
978 $to_resolve = 1
979 if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
980 $dont_ignore = 1;
981 $para->[1]{'target_matching'} = $target_name;
982 last; # stop looking at other target names
983 }
984
985 if($neg) {
986 if( $dont_ignore ) {
987 $dont_ignore = '';
988 delete $para->[1]{'target_matching'};
989 DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
990 } else {
991 $dont_ignore = 1;
992 $para->[1]{'target_matching'} = '!';
993 DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
994 }
995 }
996
997 $para->[0] = '=for'; # Just what we happen to call these, internally
998 $para->[1]{'~really'} ||= '=begin';
999 $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
1000 $para->[1]{'~resolve'} = $to_resolve || 0;
1001
1002 DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
1003 "ignore contents of this region\n";
1004 DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
1005 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1006 DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
1007
1008 push @$curr_open, $para;
1009 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1010 DEBUG > 1 and print "Ignoring ignorable =begin\n";
1011 } else {
1012 $self->{'content_seen'} ||= 1;
1013 $self->_handle_element_start((my $scratch='for'), $para->[1]);
1014 }
1015
1016 return 1;
1017}
1018
1019sub _ponder_end {
1020 my ($self,$para,$curr_open,$paras) = @_;
1021 my $content = join ' ', splice @$para, 2;
1022 $content =~ s/^\s+//s;
1023 $content =~ s/\s+$//s;
1024 DEBUG and print "Ogling '=end $content' directive\n";
1025
1026 unless(length($content)) {
1027 $self->whine(
1028 $para->[1]{'start_line'},
1029 "'=end' without a target?" . (
1030 ( @$curr_open and $curr_open->[-1][0] eq '=for' )
1031 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1032 : ''
1033 )
1034 );
1035 DEBUG and print "Ignoring targetless =end\n";
1036 return 1;
1037 }
1038
1039 unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1040 $self->whine(
1041 $para->[1]{'start_line'},
1042 "'=end $content' is invalid. (Stack: "
1043 . $self->_dump_curr_open() . ')'
1044 );
1045 DEBUG and print "Ignoring mistargetted =end $content\n";
1046 return 1;
1047 }
1048
1049 unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1050 $self->whine(
1051 $para->[1]{'start_line'},
1052 "=end $content without matching =begin. (Stack: "
1053 . $self->_dump_curr_open() . ')'
1054 );
1055 DEBUG and print "Ignoring mistargetted =end $content\n";
1056 return 1;
1057 }
1058
1059 unless($content eq $curr_open->[-1][1]{'target'}) {
1060 $self->whine(
1061 $para->[1]{'start_line'},
1062 "=end $content doesn't match =begin "
1063 . $curr_open->[-1][1]{'target'}
1064 . ". (Stack: "
1065 . $self->_dump_curr_open() . ')'
1066 );
1067 DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1068 return 1;
1069 }
1070
1071 # Else it's okay to close...
1072 if(grep $_->[1]{'~ignore'}, @$curr_open) {
1073 DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1074 # And that may be because of this to-be-closed =for region, or some
1075 # other one, but it doesn't matter.
1076 } else {
1077 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1078 # what's that for?
1079
1080 $self->{'content_seen'} ||= 1;
1081 $self->_handle_element_end( my $scratch = 'for', $para->[1]);
1082 }
1083 DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1084 pop @$curr_open;
1085
1086 return 1;
1087}
1088
1089sub _ponder_doc_end {
1090 my ($self,$para,$curr_open,$paras) = @_;
1091 if(@$curr_open) { # Deal with things left open
1092 DEBUG and print "Stack is nonempty at end-document: (",
1093 $self->_dump_curr_open(), ")\n";
1094
1095 DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1096 unshift @$paras, $self->_closers_for_all_curr_open;
1097 # Make sure there is exactly one ~end in the parastack, at the end:
1098 @$paras = grep $_->[0] ne '~end', @$paras;
1099 push @$paras, $para, $para;
1100 # We need two -- once for the next cycle where we
1101 # generate errata, and then another to be at the end
1102 # when that loop back around to process the errata.
1103 return 1;
1104
1105 } else {
1106 DEBUG and print "Okay, stack is empty now.\n";
1107 }
1108
1109 # Try generating errata section, if applicable
1110 unless($self->{'~tried_gen_errata'}) {
1111 $self->{'~tried_gen_errata'} = 1;
1112 my @extras = $self->_gen_errata();
1113 if(@extras) {
1114 unshift @$paras, @extras;
1115 DEBUG and print "Generated errata... relooping...\n";
1116 return 1; # I.e., loop around again to process these fake-o paragraphs
1117 }
1118 }
1119
1120 splice @$paras; # Well, that's that for this paragraph buffer.
1121 DEBUG and print "Throwing end-document event.\n";
1122
1123 $self->_handle_element_end( my $scratch = 'Document' );
1124 return 1; # Hasta la byebye
1125}
1126
1127sub _ponder_pod {
1128 my ($self,$para,$curr_open,$paras) = @_;
1129 $self->whine(
1130 $para->[1]{'start_line'},
1131 "=pod directives shouldn't be over one line long! Ignoring all "
1132 . (@$para - 2) . " lines of content"
1133 ) if @$para > 3;
1134
1135 # Content ignored unless 'pod_handler' is set
1136 if (my $pod_handler = $self->{'pod_handler'}) {
1137 my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1138 $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1139 $pod_handler->($line, $line_num, $self);
1140 }
1141
1142 # The surrounding methods set content_seen, so let us remain consistent.
1143 # I do not know why it was not here before -- should it not be here?
1144 # $self->{'content_seen'} ||= 1;
1145
1146 return;
1147}
1148
1149sub _ponder_over {
1150 my ($self,$para,$curr_open,$paras) = @_;
1151 return 1 unless @$paras;
1152 my $list_type;
1153
1154 if($paras->[0][0] eq '=item') { # most common case
1155 $list_type = $self->_get_initial_item_type($paras->[0]);
1156
1157 } elsif($paras->[0][0] eq '=back') {
1158 # Ignore empty lists by default
1159 if ($self->{'parse_empty_lists'}) {
1160 $list_type = 'empty';
1161 } else {
1162 shift @$paras;
1163 return 1;
1164 }
1165 } elsif($paras->[0][0] eq '~end') {
1166 $self->whine(
1167 $para->[1]{'start_line'},
1168 "=over is the last thing in the document?!"
1169 );
1170 return 1; # But feh, ignore it.
1171 } else {
1172 $list_type = 'block';
1173 }
1174 $para->[1]{'~type'} = $list_type;
1175 push @$curr_open, $para;
1176 # yes, we reuse the paragraph as a stack item
1177
1178 my $content = join ' ', splice @$para, 2;
1179 my $overness;
1180 if($content =~ m/^\s*$/s) {
1181 $para->[1]{'indent'} = 4;
1182 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
118323.26ms216µs
# spent 14µs (11+2) within Pod::Simple::BlackBox::BEGIN@1183 which was called: # once (11µs+2µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1183
no integer;
# spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@1183 # spent 2µs making 1 call to integer::unimport
1184 $para->[1]{'indent'} = $1;
1185 if($1 == 0) {
1186 $self->whine(
1187 $para->[1]{'start_line'},
1188 "Can't have a 0 in =over $content"
1189 );
1190 $para->[1]{'indent'} = 4;
1191 }
1192 } else {
1193 $self->whine(
1194 $para->[1]{'start_line'},
1195 "=over should be: '=over' or '=over positive_number'"
1196 );
1197 $para->[1]{'indent'} = 4;
1198 }
1199 DEBUG > 1 and print "=over found of type $list_type\n";
1200
1201 $self->{'content_seen'} ||= 1;
1202 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1203
1204 return;
1205}
1206
1207sub _ponder_back {
1208 my ($self,$para,$curr_open,$paras) = @_;
1209 # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1210
1211 my $content = join ' ', splice @$para, 2;
1212 if($content =~ m/\S/) {
1213 $self->whine(
1214 $para->[1]{'start_line'},
1215 "=back doesn't take any parameters, but you said =back $content"
1216 );
1217 }
1218
1219 if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1220 DEBUG > 1 and print "=back happily closes matching =over\n";
1221 # Expected case: we're closing the most recently opened thing
1222 #my $over = pop @$curr_open;
1223 $self->{'content_seen'} ||= 1;
1224 $self->_handle_element_end( my $scratch =
1225 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1226 );
1227 } else {
1228 DEBUG > 1 and print "=back found without a matching =over. Stack: (",
1229 join(', ', map $_->[0], @$curr_open), ").\n";
1230 $self->whine(
1231 $para->[1]{'start_line'},
1232 '=back without =over'
1233 );
1234 return 1; # and ignore it
1235 }
1236}
1237
1238sub _ponder_item {
1239 my ($self,$para,$curr_open,$paras) = @_;
1240 my $over;
1241 unless(@$curr_open and
1242 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1243 $self->whine(
1244 $para->[1]{'start_line'},
1245 "'=item' outside of any '=over'"
1246 );
1247 unshift @$paras,
1248 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1249 $para
1250 ;
1251 return 1;
1252 }
1253
1254
1255 my $over_type = $over->[1]{'~type'};
1256
1257 if(!$over_type) {
1258 # Shouldn't happen1
1259 die "Typeless over in stack, starting at line "
1260 . $over->[1]{'start_line'};
1261
1262 } elsif($over_type eq 'block') {
1263 unless($curr_open->[-1][1]{'~bitched_about'}) {
1264 $curr_open->[-1][1]{'~bitched_about'} = 1;
1265 $self->whine(
1266 $curr_open->[-1][1]{'start_line'},
1267 "You can't have =items (as at line "
1268 . $para->[1]{'start_line'}
1269 . ") unless the first thing after the =over is an =item"
1270 );
1271 }
1272 # Just turn it into a paragraph and reconsider it
1273 $para->[0] = '~Para';
1274 unshift @$paras, $para;
1275 return 1;
1276
1277 } elsif($over_type eq 'text') {
1278 my $item_type = $self->_get_item_type($para);
1279 # That kills the content of the item if it's a number or bullet.
1280 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1281
1282 if($item_type eq 'text') {
1283 # Nothing special needs doing for 'text'
1284 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1285 $self->whine(
1286 $para->[1]{'start_line'},
1287 "Expected text after =item, not a $item_type"
1288 );
1289 # Undo our clobbering:
1290 push @$para, $para->[1]{'~orig_content'};
1291 delete $para->[1]{'number'};
1292 # Only a PROPER item-number element is allowed
1293 # to have a number attribute.
1294 } else {
1295 die "Unhandled item type $item_type"; # should never happen
1296 }
1297
1298 # =item-text thingies don't need any assimilation, it seems.
1299
1300 } elsif($over_type eq 'number') {
1301 my $item_type = $self->_get_item_type($para);
1302 # That kills the content of the item if it's a number or bullet.
1303 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1304
1305 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1306
1307 if($item_type eq 'bullet') {
1308 # Hm, it's not numeric. Correct for this.
1309 $para->[1]{'number'} = $expected_value;
1310 $self->whine(
1311 $para->[1]{'start_line'},
1312 "Expected '=item $expected_value'"
1313 );
1314 push @$para, $para->[1]{'~orig_content'};
1315 # restore the bullet, blocking the assimilation of next para
1316
1317 } elsif($item_type eq 'text') {
1318 # Hm, it's not numeric. Correct for this.
1319 $para->[1]{'number'} = $expected_value;
1320 $self->whine(
1321 $para->[1]{'start_line'},
1322 "Expected '=item $expected_value'"
1323 );
1324 # Text content will still be there and will block next ~Para
1325
1326 } elsif($item_type ne 'number') {
1327 die "Unknown item type $item_type"; # should never happen
1328
1329 } elsif($expected_value == $para->[1]{'number'}) {
1330 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1331
1332 } else {
1333 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1334 " instead of the expected value of $expected_value\n";
1335 $self->whine(
1336 $para->[1]{'start_line'},
1337 "You have '=item " . $para->[1]{'number'} .
1338 "' instead of the expected '=item $expected_value'"
1339 );
1340 $para->[1]{'number'} = $expected_value; # correcting!!
1341 }
1342
1343 if(@$para == 2) {
1344 # For the cases where we /didn't/ push to @$para
1345 if($paras->[0][0] eq '~Para') {
1346 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1347 push @$para, splice @{shift @$paras},2;
1348 } else {
1349 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1350 push @$para, ''; # Just so it's not contentless
1351 }
1352 }
1353
1354
1355 } elsif($over_type eq 'bullet') {
1356 my $item_type = $self->_get_item_type($para);
1357 # That kills the content of the item if it's a number or bullet.
1358 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1359
1360 if($item_type eq 'bullet') {
1361 # as expected!
1362
1363 if( $para->[1]{'~_freaky_para_hack'} ) {
1364 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1365 push @$para, delete $para->[1]{'~_freaky_para_hack'};
1366 }
1367
1368 } elsif($item_type eq 'number') {
1369 $self->whine(
1370 $para->[1]{'start_line'},
1371 "Expected '=item *'"
1372 );
1373 push @$para, $para->[1]{'~orig_content'};
1374 # and block assimilation of the next paragraph
1375 delete $para->[1]{'number'};
1376 # Only a PROPER item-number element is allowed
1377 # to have a number attribute.
1378 } elsif($item_type eq 'text') {
1379 $self->whine(
1380 $para->[1]{'start_line'},
1381 "Expected '=item *'"
1382 );
1383 # But doesn't need processing. But it'll block assimilation
1384 # of the next para.
1385 } else {
1386 die "Unhandled item type $item_type"; # should never happen
1387 }
1388
1389 if(@$para == 2) {
1390 # For the cases where we /didn't/ push to @$para
1391 if($paras->[0][0] eq '~Para') {
1392 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1393 push @$para, splice @{shift @$paras},2;
1394 } else {
1395 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1396 push @$para, ''; # Just so it's not contentless
1397 }
1398 }
1399
1400 } else {
1401 die "Unhandled =over type \"$over_type\"?";
1402 # Shouldn't happen!
1403 }
1404 $para->[0] .= '-' . $over_type;
1405
1406 return;
1407}
1408
1409sub _ponder_Plain {
1410 my ($self,$para) = @_;
1411 DEBUG and print " giving plain treatment...\n";
1412 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1413 or $para->[1]{'~cooked'}
1414 ) {
1415 push @$para,
1416 @{$self->_make_treelet(
1417 join("\n", splice(@$para, 2)),
1418 $para->[1]{'start_line'}
1419 )};
1420 }
1421 # Empty paragraphs don't need a treelet for any reason I can see.
1422 # And precooked paragraphs already have a treelet.
1423 return;
1424}
1425
1426sub _ponder_Verbatim {
1427 my ($self,$para) = @_;
1428 DEBUG and print " giving verbatim treatment...\n";
1429
1430 $para->[1]{'xml:space'} = 'preserve';
1431
1432 my $indent = $self->strip_verbatim_indent;
1433 if ($indent && ref $indent eq 'CODE') {
1434 my @shifted = (shift @{$para}, shift @{$para});
1435 $indent = $indent->($para);
1436 unshift @{$para}, @shifted;
1437 }
1438
1439 for(my $i = 2; $i < @$para; $i++) {
1440 foreach my $line ($para->[$i]) { # just for aliasing
1441 # Strip indentation.
1442 $line =~ s/^\Q$indent// if $indent
1443 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1444 while( $line =~
1445 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1446 # tabs are at every EIGHTH column. For portability, it has to be
1447 # one setting everywhere, and 8th wins.
1448 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1449 ) {}
1450
1451 # TODO: whinge about (or otherwise treat) unindented or overlong lines
1452
1453 }
1454 }
1455
1456 # Now the VerbatimFormatted hoodoo...
1457 if( $self->{'accept_codes'} and
1458 $self->{'accept_codes'}{'VerbatimFormatted'}
1459 ) {
1460 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1461 # Kill any number of terminal newlines
1462 $self->_verbatim_format($para);
1463 } elsif ($self->{'codes_in_verbatim'}) {
1464 push @$para,
1465 @{$self->_make_treelet(
1466 join("\n", splice(@$para, 2)),
1467 $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1468 )};
1469 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1470 } else {
1471 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1472 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1473 }
1474 return;
1475}
1476
1477sub _ponder_Data {
1478 my ($self,$para) = @_;
1479 DEBUG and print " giving data treatment...\n";
1480 $para->[1]{'xml:space'} = 'preserve';
1481 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1482 return;
1483}
1484
- -
1488###########################################################################
1489
1490sub _traverse_treelet_bit { # for use only by the routine above
1491 my($self, $name) = splice @_,0,2;
1492
1493 my $scratch;
1494 $self->_handle_element_start(($scratch=$name), shift @_);
1495
1496 while (@_) {
1497 my $x = shift;
1498 if (ref($x)) {
1499 &_traverse_treelet_bit($self, @$x);
1500 } else {
1501 $x .= shift while @_ && !ref($_[0]);
1502 $self->_handle_text($x);
1503 }
1504 }
1505
1506 $self->_handle_element_end($scratch=$name);
1507 return;
1508}
1509
1510#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1511
1512sub _closers_for_all_curr_open {
1513 my $self = $_[0];
1514 my @closers;
1515 foreach my $still_open (@{ $self->{'curr_open'} || return }) {
1516 my @copy = @$still_open;
1517 $copy[1] = {%{ $copy[1] }};
1518 #$copy[1]{'start_line'} = -1;
1519 if($copy[0] eq '=for') {
1520 $copy[0] = '=end';
1521 } elsif($copy[0] eq '=over') {
1522 $self->whine(
1523 $still_open->[1]{start_line} ,
1524 "=over without closing =back"
1525 );
1526
1527 $copy[0] = '=back';
1528 } else {
1529 die "I don't know how to auto-close an open $copy[0] region";
1530 }
1531
1532 unless( @copy > 2 ) {
1533 push @copy, $copy[1]{'target'};
1534 $copy[-1] = '' unless defined $copy[-1];
1535 # since =over's don't have targets
1536 }
1537
1538 $copy[1]{'fake-closer'} = 1;
1539
1540 DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1541 unshift @closers, \@copy;
1542 }
1543 return @closers;
1544}
1545
1546#--------------------------------------------------------------------------
1547
1548sub _verbatim_format {
1549 my($it, $p) = @_;
1550
1551 my $formatting;
1552
1553 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1554 DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1555 $p->[$i] .= "\n";
1556 # Unlike with simple Verbatim blocks, we don't end up just doing
1557 # a join("\n", ...) on the contents, so we have to append a
1558 # newline to ever line, and then nix the last one later.
1559 }
1560
1561 if( DEBUG > 4 ) {
1562 print "<<\n";
1563 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1564 print "_verbatim_format $i: $p->[$i]";
1565 }
1566 print ">>\n";
1567 }
1568
1569 for(my $i = $#$p; $i > 2; $i--) {
1570 # work backwards over the lines, except the first (#2)
1571
1572 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1573 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1574 # look at a formatty line preceding a nonformatty one
1575 DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1576 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1577 DEBUG > 5 and print " It's a formatty line. ",
1578 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1579
1580 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1581 DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
1582 next;
1583 } else {
1584 DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
1585 }
1586 } else {
1587 DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
1588 next;
1589 }
1590
1591 # A formatty line has to have #: in the first two columns, and uses
1592 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1593 # Example:
1594 # What do you want? i like pie. [or whatever]
1595 # #:^^^^^^^^^^^^^^^^^ /////////////
1596
1597
1598 DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1599
1600 $formatting = ' ' . $1;
1601 $formatting =~ s/\s+$//s; # nix trailing whitespace
1602 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1603 splice @$p,$i,1; # remove this line
1604 $i--; # don't consider next line
1605 next;
1606 }
1607
1608 if( length($formatting) >= length($p->[$i-1]) ) {
1609 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1610 } else {
1611 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1612 }
1613 # Make $formatting and the previous line be exactly the same length,
1614 # with $formatting having a " " as the last character.
1615
1616 DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1617
1618
1619 my @new_line;
1620 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1621 #print "Format matches $1\n";
1622
1623 if($2) {
1624 #print "SKIPPING <$2>\n";
1625 push @new_line,
1626 substr($p->[$i-1], pos($formatting)-length($1), length($1));
1627 } else {
1628 #print "SNARING $+\n";
1629 push @new_line, [
1630 (
1631 $3 ? 'VerbatimB' :
1632 $4 ? 'VerbatimI' :
1633 $5 ? 'VerbatimBI' : die("Should never get called")
1634 ), {},
1635 substr($p->[$i-1], pos($formatting)-length($1), length($1))
1636 ];
1637 #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1638 }
1639 }
1640 my @nixed =
1641 splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1642 DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1643
1644 DEBUG > 6 and print "New version of the above line is these tokens (",
1645 scalar(@new_line), "):",
1646 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1647 $i--; # So the next line we scrutinize is the line before the one
1648 # that we just went and formatted
1649 }
1650
1651 $p->[0] = 'VerbatimFormatted';
1652
1653 # Collapse adjacent text nodes, just for kicks.
1654 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1655 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1656 DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1657 $p->[$i] .= splice @$p, $i+1, 1; # merge
1658 --$i; # and back up
1659 }
1660 }
1661
1662 # Now look for the last text token, and remove the terminal newline
1663 for( my $i = $#$p; $i >= 2; $i-- ) {
1664 # work backwards over the tokens, even the first
1665 if( !ref($p->[$i]) ) {
1666 if($p->[$i] =~ s/\n$//s) {
1667 DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1668 } else {
1669 DEBUG > 5 and print
1670 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1671 }
1672 last; # we only want the next one
1673 }
1674 }
1675
1676 return;
1677}
1678
1679
1680#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1681
1682
1683sub _treelet_from_formatting_codes {
1684 # Given a paragraph, returns a treelet. Full of scary tokenizing code.
1685 # Like [ '~Top', {'start_line' => $start_line},
1686 # "I like ",
1687 # [ 'B', {}, "pie" ],
1688 # "!"
1689 # ]
1690
1691 my($self, $para, $start_line, $preserve_space) = @_;
1692
1693 my $treelet = ['~Top', {'start_line' => $start_line},];
1694
1695 unless ($preserve_space || $self->{'preserve_whitespace'}) {
1696 $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1697 $para =~ s/ $//;
1698 $para =~ s/^ //;
1699 }
1700
1701 # Only apparent problem the above code is that N<< >> turns into
1702 # N<< >>. But then, word wrapping does that too! So don't do that!
1703
1704 my @stack;
1705 my @lineage = ($treelet);
1706 my $raw = ''; # raw content of L<> fcode before splitting/processing
1707 # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1708 # into just 1 ' '. Is this the regex's doing or 'raw's?
1709 my $inL = 0;
1710
1711 DEBUG > 4 and print "Paragraph:\n$para\n\n";
1712
1713 # Here begins our frightening tokenizer RE. The following regex matches
1714 # text in four main parts:
1715 #
1716 # * Start-codes. The first alternative matches C< or C<<, the latter
1717 # followed by some whitespace. $1 will hold the entire start code
1718 # (including any space following a multiple-angle-bracket delimiter),
1719 # and $2 will hold only the additional brackets past the first in a
1720 # multiple-bracket delimiter. length($2) + 1 will be the number of
1721 # closing brackets we have to find.
1722 #
1723 # * Closing brackets. Match some amount of whitespace followed by
1724 # multiple close brackets. The logic to see if this closes anything
1725 # is down below. Note that in order to parse C<< >> correctly, we
1726 # have to use look-behind (?<=\s\s), since the match of the starting
1727 # code will have consumed the whitespace.
1728 #
1729 # * A single closing bracket, to close a simple code like C<>.
1730 #
1731 # * Something that isn't a start or end code. We have to be careful
1732 # about accepting whitespace, since perlpodspec says that any whitespace
1733 # before a multiple-bracket closing delimiter should be ignored.
1734 #
1735 while($para =~
1736 m/\G
1737 (?:
1738 # Match starting codes, including the whitespace following a
1739 # multiple-delimiter start code. $1 gets the whole start code and
1740 # $2 gets all but one of the <s in the multiple-bracket case.
1741 ([A-Z]<(?:(<+)\s+)?)
1742 |
1743 # Match multiple-bracket end codes. $3 gets the whitespace that
1744 # should be discarded before an end bracket but kept in other cases
1745 # and $4 gets the end brackets themselves.
1746 (\s+|(?<=\s\s))(>{2,})
1747 |
1748 (\s?>) # $5: simple end-codes
1749 |
1750 ( # $6: stuff containing no start-codes or end-codes
1751 (?:
1752 [^A-Z\s>]
1753 |
1754 (?:
1755 [A-Z](?!<)
1756 )
1757 |
1758 # whitespace is ok, but we don't want to eat the whitespace before
1759 # a multiple-bracket end code.
1760 # NOTE: we may still have problems with e.g. S<< >>
1761 (?:
1762 \s(?!\s*>{2,})
1763 )
1764 )+
1765 )
1766 )
1767 /xgo
1768 ) {
1769 DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1770 if(defined $1) {
1771 if(defined $2) {
1772 DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1773 push @stack, length($2) + 1;
1774 # length of the necessary complex end-code string
1775 } else {
1776 DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1777 push @stack, 0; # signal that we're looking for simple
1778 }
1779 push @lineage, [ substr($1,0,1), {}, ]; # new node object
1780 push @{ $lineage[-2] }, $lineage[-1];
1781 if ('L' eq substr($1,0,1)) {
1782 $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1783 $inL = 1;
1784 } else {
1785 $raw .= $1 if $inL;
1786 }
1787
1788 } elsif(defined $4) {
1789 DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1790 # This is where it gets messy...
1791 if(! @stack) {
1792 # We saw " >>>>" but needed nothing. This is ALL just stuff then.
1793 DEBUG > 4 and print " But it's really just stuff.\n";
1794 push @{ $lineage[-1] }, $3, $4;
1795 next;
1796 } elsif(!$stack[-1]) {
1797 # We saw " >>>>" but needed only ">". Back pos up.
1798 DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1799 push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1800 pos($para) = pos($para) - length($4) + 1;
1801 } elsif($stack[-1] == length($4)) {
1802 # We found " >>>>", and it was exactly what we needed. Commonest case.
1803 DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1804 } elsif($stack[-1] < length($4)) {
1805 # We saw " >>>>" but needed only " >>". Back pos up.
1806 DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1807 pos($para) = pos($para) - length($4) + $stack[-1];
1808 } else {
1809 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1810 DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1811 push @{ $lineage[-1] }, $3, $4;
1812 next;
1813 }
1814 #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1815
1816 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1817 # Keep the element from being childless
1818
1819 pop @stack;
1820 pop @lineage;
1821
1822 unless (@stack) { # not in an L if there are no open fcodes
1823 $inL = 0;
1824 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1825 $lineage[-1][-1][1]{'raw'} = $raw
1826 }
1827 }
1828 $raw .= $3.$4 if $inL;
1829
1830 } elsif(defined $5) {
1831 DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
1832
1833 if(@stack and ! $stack[-1]) {
1834 # We're indeed expecting a simple end-code
1835 DEBUG > 4 and print " It's indeed an end-code.\n";
1836
1837 if(length($5) == 2) { # There was a space there: " >"
1838 push @{ $lineage[-1] }, ' ';
1839 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1840 push @{ $lineage[-1] }, ''; # keep it from being really childless
1841 }
1842
1843 pop @stack;
1844 pop @lineage;
1845 } else {
1846 DEBUG > 4 and print " It's just stuff.\n";
1847 push @{ $lineage[-1] }, $5;
1848 }
1849
1850 unless (@stack) { # not in an L if there are no open fcodes
1851 $inL = 0;
1852 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1853 $lineage[-1][-1][1]{'raw'} = $raw
1854 }
1855 }
1856 $raw .= $5 if $inL;
1857
1858 } elsif(defined $6) {
1859 DEBUG > 3 and print "Found stuff \"$6\"\n";
1860 push @{ $lineage[-1] }, $6;
1861 $raw .= $6 if $inL;
1862 # XXX does not capture multiplace whitespaces -- 'raw' ends up with
1863 # at most 1 leading/trailing whitespace, why not all of it?
1864
1865 } else {
1866 # should never ever ever ever happen
1867 DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1868 die "SPORK 512512!";
1869 }
1870 }
1871
1872 if(@stack) { # Uhoh, some sequences weren't closed.
1873 my $x= "...";
1874 while(@stack) {
1875 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1876 # Hmmmmm!
1877
1878 my $code = (pop @lineage)->[0];
1879 my $ender_length = pop @stack;
1880 if($ender_length) {
1881 --$ender_length;
1882 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1883 } else {
1884 $x = $code . "<$x>";
1885 }
1886 }
1887 DEBUG > 1 and print "Unterminated $x sequence\n";
1888 $self->whine($start_line,
1889 "Unterminated $x sequence",
1890 );
1891 }
1892
1893 return $treelet;
1894}
1895
1896#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1897
1898sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1899 return stringify_lol($_[1]);
1900}
1901
1902sub stringify_lol { # function: stringify_lol($lol)
1903 my $string_form = '';
1904 _stringify_lol( $_[0] => \$string_form );
1905 return $string_form;
1906}
1907
1908sub _stringify_lol { # the real recursor
1909 my($lol, $to) = @_;
1910 for(my $i = 2; $i < @$lol; ++$i) {
1911 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1912 _stringify_lol( $lol->[$i], $to); # recurse!
1913 } else {
1914 $$to .= $lol->[$i];
1915 }
1916 }
1917 return;
1918}
1919
1920#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1921
1922sub _dump_curr_open { # return a string representation of the stack
1923 my $curr_open = $_[0]{'curr_open'};
1924
1925 return '[empty]' unless @$curr_open;
1926 return join '; ',
1927 map {;
1928 ($_->[0] eq '=for')
1929 ? ( ($_->[1]{'~really'} || '=over')
1930 . ' ' . $_->[1]{'target'})
1931 : $_->[0]
1932 }
1933 @$curr_open
1934 ;
1935}
1936
1937###########################################################################
193817µsmy %pretty_form = (
1939 "\a" => '\a', # ding!
1940 "\b" => '\b', # BS
1941 "\e" => '\e', # ESC
1942 "\f" => '\f', # FF
1943 "\t" => '\t', # tab
1944 "\cm" => '\cm',
1945 "\cj" => '\cj',
1946 "\n" => '\n', # probably overrides one of either \cm or \cj
1947 '"' => '\"',
1948 '\\' => '\\\\',
1949 '$' => '\\$',
1950 '@' => '\\@',
1951 '%' => '\\%',
1952 '#' => '\\#',
1953);
1954
1955sub pretty { # adopted from Class::Classless
1956 # Not the most brilliant routine, but passable.
1957 # Don't give it a cyclic data structure!
1958 my @stuff = @_; # copy
1959 my $x;
1960 my $out =
1961 # join ",\n" .
1962 join ", ",
1963 map {;
1964 if(!defined($_)) {
1965 "undef";
1966 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1967 $x = "[ " . pretty(@$_) . " ]" ;
1968 $x;
1969 } elsif(ref($_) eq 'SCALAR') {
1970 $x = "\\" . pretty($$_) ;
1971 $x;
1972 } elsif(ref($_) eq 'HASH') {
1973 my $hr = $_;
1974 $x = "{" . join(", ",
1975 map(pretty($_) . '=>' . pretty($hr->{$_}),
1976 sort keys %$hr ) ) . "}" ;
1977 $x;
1978 } elsif(!length($_)) { q{''} # empty string
1979 } elsif(
1980 $_ eq '0' # very common case
1981 or(
1982 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1983 and $_ ne '-0' # the strange case that that RE lets thru
1984 )
1985 ) { $_;
1986 } else {
1987 if( chr(65) eq 'A' ) {
1988 s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1989 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1990 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1991 } else {
1992 # We're in some crazy non-ASCII world!
1993 s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1994 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1995 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1996 }
1997 qq{"$_"};
1998 }
1999 } @stuff;
2000 # $out =~ s/\n */ /g if length($out) < 75;
2001 return $out;
2002}
2003
2004#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2005
2006# A rather unsubtle method of blowing away all the state information
2007# from a parser object so it can be reused. Provided as a utility for
2008# backward compatibility in Pod::Man, etc. but not recommended for
2009# general use.
2010
2011sub reinit {
2012 my $self = shift;
2013 foreach (qw(source_dead source_filename doc_has_started
2014start_of_pod_block content_seen last_was_blank paras curr_open
2015line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
2016Title)) {
2017
2018 delete $self->{$_};
2019 }
2020}
2021
2022#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
202317µs1;
2024