Profile of Pod/Simple/BlackBox.pm

Filename	/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/Pod/Simple/BlackBox.pm
Statements	Executed 17 statements in 7.67ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	13µs	14µs	Pod::Simple::BlackBox::::BEGIN@22Pod::Simple::BlackBox::BEGIN@22
1	1	1	11µs	14µs	Pod::Simple::BlackBox::::BEGIN@1183Pod::Simple::BlackBox::BEGIN@1183
1	1	1	6µs	23µs	Pod::Simple::BlackBox::::BEGIN@25Pod::Simple::BlackBox::BEGIN@25
1	1	1	6µs	16µs	Pod::Simple::BlackBox::::BEGIN@23Pod::Simple::BlackBox::BEGIN@23
1	1	1	4µs	4µs	Pod::Simple::BlackBox::::BEGIN@28Pod::Simple::BlackBox::BEGIN@28
1	1	1	3µs	3µs	Pod::Simple::BlackBox::::BEGIN@24Pod::Simple::BlackBox::BEGIN@24
0	0	0	0s	0s	Pod::Simple::BlackBox::_closers_for_all_curr_open
0	0	0	0s	0s	Pod::Simple::BlackBox::_dump_curr_open
0	0	0	0s	0s	Pod::Simple::BlackBox::::_gen_errataPod::Simple::BlackBox::_gen_errata
0	0	0	0s	0s	Pod::Simple::BlackBox::_handle_encoding_line
0	0	0	0s	0s	Pod::Simple::BlackBox::_handle_encoding_second_level
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_DataPod::Simple::BlackBox::_ponder_Data
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_PlainPod::Simple::BlackBox::_ponder_Plain
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_Verbatim
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_backPod::Simple::BlackBox::_ponder_back
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_beginPod::Simple::BlackBox::_ponder_begin
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_doc_end
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_endPod::Simple::BlackBox::_ponder_end
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_forPod::Simple::BlackBox::_ponder_for
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_itemPod::Simple::BlackBox::_ponder_item
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_overPod::Simple::BlackBox::_ponder_over
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_paragraph_buffer
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_podPod::Simple::BlackBox::_ponder_pod
0	0	0	0s	0s	Pod::Simple::BlackBox::::_stringify_lolPod::Simple::BlackBox::_stringify_lol
0	0	0	0s	0s	Pod::Simple::BlackBox::_traverse_treelet_bit
0	0	0	0s	0s	Pod::Simple::BlackBox::_treelet_from_formatting_codes
0	0	0	0s	0s	Pod::Simple::BlackBox::_verbatim_format
0	0	0	0s	0s	Pod::Simple::BlackBox::::parse_linePod::Simple::BlackBox::parse_line
0	0	0	0s	0s	Pod::Simple::BlackBox::::parse_linesPod::Simple::BlackBox::parse_lines
0	0	0	0s	0s	Pod::Simple::BlackBox::::prettyPod::Simple::BlackBox::pretty
0	0	0	0s	0s	Pod::Simple::BlackBox::::reinitPod::Simple::BlackBox::reinit
0	0	0	0s	0s	Pod::Simple::BlackBox::::stringify_lolPod::Simple::BlackBox::stringify_lol
0	0	0	0s	0s	Pod::Simple::BlackBox::text_content_of_treelet

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1
2					package Pod::Simple::BlackBox;
3					#
4					# "What's in the box?" "Pain."
5					#
6					###########################################################################
7					#
8					# This is where all the scary things happen: parsing lines into
9					# paragraphs; and then into directives, verbatims, and then also
10					# turning formatting sequences into treelets.
11					#
12					# Are you really sure you want to read this code?
13					#
14					#-----------------------------------------------------------------------------
15					#
16					# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17					# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18					# to call the proper callbacks on the treelets.
19					#
20					# Every node in a treelet is a ['name', {attrhash}, ...children...]
21
22	2	19µs	2	16µs	# spent 14µs (13+2) within Pod::Simple::BlackBox::BEGIN@22 which was called: # once (13µs+2µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22 use integer; # vroom! # spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@22 # spent 2µs making 1 call to integer::import
23	2	18µs	2	27µs	# spent 16µs (6+10) within Pod::Simple::BlackBox::BEGIN@23 which was called: # once (6µs+10µs) by Pod::Simple::LinkSection::BEGIN@9 at line 23 use strict; # spent 16µs making 1 call to Pod::Simple::BlackBox::BEGIN@23 # spent 10µs making 1 call to strict::import
24	2	18µs	1	3µs	# spent 3µs within Pod::Simple::BlackBox::BEGIN@24 which was called: # once (3µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 24 use Carp (); # spent 3µs making 1 call to Pod::Simple::BlackBox::BEGIN@24
25	2	37µs	2	40µs	# spent 23µs (6+17) within Pod::Simple::BlackBox::BEGIN@25 which was called: # once (6µs+17µs) by Pod::Simple::LinkSection::BEGIN@9 at line 25 use vars qw($VERSION ); # spent 23µs making 1 call to Pod::Simple::BlackBox::BEGIN@25 # spent 17µs making 1 call to vars::import
26	1	800ns			$VERSION = '3.28';
27					#use constant DEBUG => 7;
28					# spent 4µs within Pod::Simple::BlackBox::BEGIN@28 which was called: # once (4µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 31 BEGIN {
29	1	200ns			require Pod::Simple;
30	1	4µs			*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
31	1	4.30ms	1	4µs	} # spent 4µs making 1 call to Pod::Simple::BlackBox::BEGIN@28
32
33					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
34
35					sub parse_line { shift->parse_lines(@_) } # alias
36
37					# - - - Turn back now! Run away! - - -
38
39					sub parse_lines { # Usage: $parser->parse_lines(@lines)
40					# an undef means end-of-stream
41					my $self = shift;
42
43					my $code_handler = $self->{'code_handler'};
44					my $cut_handler = $self->{'cut_handler'};
45					my $wl_handler = $self->{'whiteline_handler'};
46					$self->{'line_count'} \|\|= 0;
47
48					my $scratch;
49
50					DEBUG > 4 and
51					print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
52
53					DEBUG > 5 and
54					print "# About to parse lines: ",
55					join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
56
57					my $paras = ($self->{'paras'} \|\|= []);
58					# paragraph buffer. Because we need to defer processing of =over
59					# directives and verbatim paragraphs. We call _ponder_paragraph_buffer
60					# to process this.
61
62					$self->{'pod_para_count'} \|\|= 0;
63
64					my $line;
65					foreach my $source_line (@_) {
66					if( $self->{'source_dead'} ) {
67					DEBUG > 4 and print "# Source is dead.\n";
68					last;
69					}
70
71					unless( defined $source_line ) {
72					DEBUG > 4 and print "# Undef-line seen.\n";
73
74					push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
75					push @$paras, $paras->[-1], $paras->[-1];
76					# So that it definitely fills the buffer.
77					$self->{'source_dead'} = 1;
78					$self->_ponder_paragraph_buffer;
79					next;
80					}
81
82
83					if( $self->{'line_count'}++ ) {
84					($line = $source_line) =~ tr/\n\r//d;
85					# If we don't have two vars, we'll end up with that there
86					# tr/// modding the (potentially read-only) original source line!
87
88					} else {
89					DEBUG > 2 and print "First line: [$source_line]\n";
90
91					if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
92					DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
93					$self->_handle_encoding_line( "=encoding utf8" );
94					delete $self->{'_processed_encoding'};
95					$line =~ tr/\n\r//d;
96
97					} elsif( $line =~ s/^\xFE\xFF//s ) {
98					DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
99					$self->scream(
100					$self->{'line_count'},
101					"UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
102					);
103					splice @_;
104					push @_, undef;
105					next;
106
107					# TODO: implement somehow?
108
109					} elsif( $line =~ s/^\xFF\xFE//s ) {
110					DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
111					$self->scream(
112					$self->{'line_count'},
113					"UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
114					);
115					splice @_;
116					push @_, undef;
117					next;
118
119					# TODO: implement somehow?
120
121					} else {
122					DEBUG > 2 and print "First line is BOM-less.\n";
123					($line = $source_line) =~ tr/\n\r//d;
124					}
125					}
126
127					# Try to guess encoding. Inlined for performance reasons.
128					if(!$self->{'parse_characters'} && !$self->{'encoding'}
129					&& ($self->{'in_pod'} \|\| $line =~ /^=/s)
130					&& $line =~ /[^\x00-\x7f]/
131					) {
132					my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
133					$self->_handle_encoding_line( "=encoding $encoding" );
134					$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
135
136					my ($word) = $line =~ /(\S[^\x00-\x7f]\S)/;
137
138					$self->whine(
139					$self->{'line_count'},
140					"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
141					);
142					}
143
144					DEBUG > 5 and print "# Parsing line: [$line]\n";
145
146					if(!$self->{'in_pod'}) {
147					if($line =~ m/^=([a-zA-Z]+)/s) {
148					if($1 eq 'cut') {
149					$self->scream(
150					$self->{'line_count'},
151					"=cut found outside a pod block. Skipping to next block."
152					);
153
154					## Before there were errata sections in the world, it was
155					## least-pessimal to abort processing the file. But now we can
156					## just barrel on thru (but still not start a pod block).
157					#splice @_;
158					#push @_, undef;
159
160					next;
161					} else {
162					$self->{'in_pod'} = $self->{'start_of_pod_block'}
163					= $self->{'last_was_blank'} = 1;
164					# And fall thru to the pod-mode block further down
165					}
166					} else {
167					DEBUG > 5 and print "# It's a code-line.\n";
168					$code_handler->(map $_, $line, $self->{'line_count'}, $self)
169					if $code_handler;
170					# Note: this may cause code to be processed out of order relative
171					# to pods, but in order relative to cuts.
172
173					# Note also that we haven't yet applied the transcoding to $line
174					# by time we call $code_handler!
175
176					if( $line =~ m/^#\sline\s+(\d+)\s(?:\s"([^"]+)")?\s*$/ ) {
177					# That RE is from perlsyn, section "Plain Old Comments (Not!)",
178					#$fname = $2 if defined $2;
179					#DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
180					DEBUG > 1 and print "# Setting nextline to $1\n";
181					$self->{'line_count'} = $1 - 1;
182					}
183
184					next;
185					}
186					}
187
188					# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
189					# Else we're in pod mode:
190
191					# Apply any necessary transcoding:
192					$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
193
194					# HERE WE CATCH =encoding EARLY!
195					if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
196					next if $self->parse_characters; # Ignore this line
197					$line = $self->_handle_encoding_line( $line );
198					}
199
200					if($line =~ m/^=cut/s) {
201					# here ends the pod block, and therefore the previous pod para
202					DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
203					$self->{'in_pod'} = 0;
204					# ++$self->{'pod_para_count'};
205					$self->_ponder_paragraph_buffer();
206					# by now it's safe to consider the previous paragraph as done.
207					$cut_handler->(map $_, $line, $self->{'line_count'}, $self)
208					if $cut_handler;
209
210					# TODO: add to docs: Note: this may cause cuts to be processed out
211					# of order relative to pods, but in order relative to code.
212
213					} elsif($line =~ m/^(\s*)$/s) { # it's a blank line
214					if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
215					$wl_handler->(map $_, $line, $self->{'line_count'}, $self)
216					if $wl_handler;
217					}
218
219					if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
220					DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
221					push @{$paras->[-1]}, $line;
222					} # otherwise it's not interesting
223
224					if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
225					DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
226					}
227
228					$self->{'last_was_blank'} = 1;
229
230					} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
231
232					if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9])(?:\s+\|$)(.)/s) {
233					# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
234					my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
235					# Note that in "=head1 foo", the WS is lost.
236					# Example: ['=head1', {'start_line' => 123}, ' foo']
237
238					++$self->{'pod_para_count'};
239
240					$self->_ponder_paragraph_buffer();
241					# by now it's safe to consider the previous paragraph as done.
242
243					push @$paras, $new; # the new incipient paragraph
244					DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
245
246					} elsif($line =~ m/^\s/s) {
247
248					if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
249					DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
250					push @{$paras->[-1]}, $line;
251					} else {
252					++$self->{'pod_para_count'};
253					$self->_ponder_paragraph_buffer();
254					# by now it's safe to consider the previous paragraph as done.
255					DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
256					push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
257					}
258					} else {
259					++$self->{'pod_para_count'};
260					$self->_ponder_paragraph_buffer();
261					# by now it's safe to consider the previous paragraph as done.
262					push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
263					DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
264					}
265					$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
266
267					} else {
268					# It's a non-blank line /continuing/ the current para
269					if(@$paras) {
270					DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
271					push @{$paras->[-1]}, $line;
272					} else {
273					# Unexpected case!
274					die "Continuing a paragraph but \@\$paras is empty?";
275					}
276					$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
277					}
278
279					} # ends the big while loop
280
281					DEBUG > 1 and print(pretty(@$paras), "\n");
282					return $self;
283					}
284
285					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
286
287					sub _handle_encoding_line {
288					my($self, $line) = @_;
289
290					return if $self->parse_characters;
291
292					# The point of this routine is to set $self->{'_transcoder'} as indicated.
293
294					return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
295					DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
296
297					my $e = $1;
298					my $orig = $e;
299					push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
300
301					my $enc_error;
302
303					# Cf. perldoc Encode and perldoc Encode::Supported
304
305					require Pod::Simple::Transcode;
306
307					if( $self->{'encoding'} ) {
308					my $norm_current = $self->{'encoding'};
309					my $norm_e = $e;
310					foreach my $that ($norm_current, $norm_e) {
311					$that = lc($that);
312					$that =~ s/[-_]//g;
313					}
314					if($norm_current eq $norm_e) {
315					DEBUG > 1 and print "The '=encoding $orig' line is ",
316					"redundant. ($norm_current eq $norm_e). Ignoring.\n";
317					$enc_error = '';
318					# But that doesn't necessarily mean that the earlier one went okay
319					} else {
320					$enc_error = "Encoding is already set to " . $self->{'encoding'};
321					DEBUG > 1 and print $enc_error;
322					}
323					} elsif (
324					# OK, let's turn on the encoding
325					do {
326					DEBUG > 1 and print " Setting encoding to $e\n";
327					$self->{'encoding'} = $e;
328					1;
329					}
330					and $e eq 'HACKRAW'
331					) {
332					DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
333
334					} elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
335
336					die($enc_error = "WHAT? _transcoder is already set?!")
337					if $self->{'_transcoder'}; # should never happen
338					require Pod::Simple::Transcode;
339					$self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
340					eval {
341					my @x = ('', "abc", "123");
342					$self->{'_transcoder'}->(@x);
343					};
344					$@ && die( $enc_error =
345					"Really unexpected error setting up encoding $e: $@\nAborting"
346					);
347					$self->{'detected_encoding'} = $e;
348
349					} else {
350					my @supported = Pod::Simple::Transcode::->all_encodings;
351
352					# Note unsupported, and complain
353					DEBUG and print " Encoding [$e] is unsupported.",
354					"\nSupporteds: @supported\n";
355					my $suggestion = '';
356
357					# Look for a near match:
358					my $norm = lc($e);
359					$norm =~ tr[-_][]d;
360					my $n;
361					foreach my $enc (@supported) {
362					$n = lc($enc);
363					$n =~ tr[-_][]d;
364					next unless $n eq $norm;
365					$suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
366					last;
367					}
368					my $encmodver = Pod::Simple::Transcode::->encmodver;
369					$enc_error = join '' =>
370					"This document probably does not appear as it should, because its ",
371					"\"=encoding $e\" line calls for an unsupported encoding.",
372					$suggestion, " [$encmodver\'s supported encodings are: @supported]"
373					;
374
375					$self->scream( $self->{'line_count'}, $enc_error );
376					}
377					push @{ $self->{'encoding_command_statuses'} }, $enc_error;
378					if (defined($self->{'_processed_encoding'})) {
379					# Should never happen
380					die "Nested processed encoding.";
381					}
382					$self->{'_processed_encoding'} = $orig;
383
384					return $line;
385					}
386
387					# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
388
389					sub _handle_encoding_second_level {
390					# By time this is called, the encoding (if well formed) will already
391					# have been acted one.
392					my($self, $para) = @_;
393					my @x = @$para;
394					my $content = join ' ', splice @x, 2;
395					$content =~ s/^\s+//s;
396					$content =~ s/\s+$//s;
397
398					DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
399
400					if (defined($self->{'_processed_encoding'})) {
401					#if($content ne $self->{'_processed_encoding'}) {
402					# Could it happen?
403					#}
404					delete $self->{'_processed_encoding'};
405					# It's already been handled. Check for errors.
406					if(! $self->{'encoding_command_statuses'} ) {
407					DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
408					} elsif( $self->{'encoding_command_statuses'}[-1] ) {
409					$self->whine( $para->[1]{'start_line'},
410					sprintf "Couldn't do %s: %s",
411					$self->{'encoding_command_reqs' }[-1],
412					$self->{'encoding_command_statuses'}[-1],
413					);
414					} else {
415					DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
416					}
417
418					} else {
419					# Otherwise it's a syntax error
420					$self->whine( $para->[1]{'start_line'},
421					"Invalid =encoding syntax: $content"
422					);
423					}
424
425					return;
426					}
427
428					#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
429
430					{
431	2	1µs			my $m = -321; # magic line number
432
433					sub _gen_errata {
434					my $self = $_[0];
435					# Return 0 or more fake-o paragraphs explaining the accumulated
436					# errors on this document.
437
438					return() unless $self->{'errata'} and keys %{$self->{'errata'}};
439
440					my @out;
441
442					foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
443					push @out,
444					['=item', {'start_line' => $m}, "Around line $line:"],
445					map( ['~Para', {'start_line' => $m, '~cooked' => 1},
446					#['~Top', {'start_line' => $m},
447					$_
448					#]
449					],
450					@{$self->{'errata'}{$line}}
451					)
452					;
453					}
454
455					# TODO: report of unknown entities? unrenderable characters?
456
457					unshift @out,
458					['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
459					['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
460					"Hey! ",
461					['B', {},
462					'The above document had some coding errors, which are explained below:'
463					]
464					],
465					['=over', {'start_line' => $m, 'errata' => 1}, ''],
466					;
467
468					push @out,
469					['=back', {'start_line' => $m, 'errata' => 1}, ''],
470					;
471
472					DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
473
474					return @out;
475					}
476
477					}
478
479					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
480
481					##############################################################################
482					##
483					## stop reading now stop reading now stop reading now stop reading now stop
484					##
485					## HERE IT BECOMES REALLY SCARY
486					##
487					## stop reading now stop reading now stop reading now stop reading now stop
488					##
489					##############################################################################
490
491					sub _ponder_paragraph_buffer {
492
493					# Para-token types as found in the buffer.
494					# ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
495					# =over, =back, =item
496					# and the null =pod (to be complained about if over one line)
497					#
498					# "~data" paragraphs are something we generate at this level, depending on
499					# a currently open =over region
500
501					# Events fired: Begin and end for:
502					# directivename (like head1 .. head4), item, extend,
503					# for (from =begin...=end, =for),
504					# over-bullet, over-number, over-text, over-block,
505					# item-bullet, item-number, item-text,
506					# Document,
507					# Data, Para, Verbatim
508					# B, C, longdirname (TODO -- wha?), etc. for all directives
509					#
510
511					my $self = $_[0];
512					my $paras;
513					return unless @{$paras = $self->{'paras'}};
514					my $curr_open = ($self->{'curr_open'} \|\|= []);
515
516					my $scratch;
517
518					DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
519
520					# We have something in our buffer. So apparently the document has started.
521					unless($self->{'doc_has_started'}) {
522					$self->{'doc_has_started'} = 1;
523
524					my $starting_contentless;
525					$starting_contentless =
526					(
527					!@$curr_open
528					and @$paras and ! grep $_->[0] ne '~end', @$paras
529					# i.e., if the paras is all ~ends
530					)
531					;
532					DEBUG and print "# Starting ",
533					$starting_contentless ? 'contentless' : 'contentful',
534					" document\n"
535					;
536
537					$self->_handle_element_start(
538					($scratch = 'Document'),
539					{
540					'start_line' => $paras->[0][1]{'start_line'},
541					$starting_contentless ? ( 'contentless' => 1 ) : (),
542					},
543					);
544					}
545
546					my($para, $para_type);
547					while(@$paras) {
548					last if @$paras == 1 and
549					( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
550					or $paras->[0][0] eq '=item' )
551					;
552					# Those're the three kinds of paragraphs that require lookahead.
553					# Actually, an "=item Foo" inside an <over type=text> region
554					# and any =item inside an <over type=block> region (rare)
555					# don't require any lookahead, but all others (bullets
556					# and numbers) do.
557
558					# TODO: whinge about many kinds of directives in non-resolving =for regions?
559					# TODO: many? like what? =head1 etc?
560
561					$para = shift @$paras;
562					$para_type = $para->[0];
563
564					DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
565					$self->_dump_curr_open(), ")\n";
566
567					if($para_type eq '=for') {
568					next if $self->_ponder_for($para,$curr_open,$paras);
569
570					} elsif($para_type eq '=begin') {
571					next if $self->_ponder_begin($para,$curr_open,$paras);
572
573					} elsif($para_type eq '=end') {
574					next if $self->_ponder_end($para,$curr_open,$paras);
575
576					} elsif($para_type eq '~end') { # The virtual end-document signal
577					next if $self->_ponder_doc_end($para,$curr_open,$paras);
578					}
579
580
581					# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
582					#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
583					if(grep $_->[1]{'~ignore'}, @$curr_open) {
584					DEBUG > 1 and
585					print "Skipping $para_type paragraph because in ignore mode.\n";
586					next;
587					}
588					#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
589					# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
590
591					if($para_type eq '=pod') {
592					$self->_ponder_pod($para,$curr_open,$paras);
593
594					} elsif($para_type eq '=over') {
595					next if $self->_ponder_over($para,$curr_open,$paras);
596
597					} elsif($para_type eq '=back') {
598					next if $self->_ponder_back($para,$curr_open,$paras);
599
600					} else {
601
602					# All non-magical codes!!!
603
604					# Here we start using $para_type for our own twisted purposes, to
605					# mean how it should get treated, not as what the element name
606					# should be.
607
608					DEBUG > 1 and print "Pondering non-magical $para_type\n";
609
610					my $i;
611
612					# Enforce some =headN discipline
613					if($para_type =~ m/^=head\d$/s
614					and ! $self->{'accept_heads_anywhere'}
615					and @$curr_open
616					and $curr_open->[-1][0] eq '=over'
617					) {
618					DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
619					$self->whine(
620					$para->[1]{'start_line'},
621					"You forgot a '=back' before '$para_type'"
622					);
623					unshift @$paras, ['=back', {}, ''], $para; # close the =over
624					next;
625					}
626
627
628					if($para_type eq '=item') {
629
630					my $over;
631					unless(@$curr_open and
632					$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
633					$self->whine(
634					$para->[1]{'start_line'},
635					"'=item' outside of any '=over'"
636					);
637					unshift @$paras,
638					['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
639					$para
640					;
641					next;
642					}
643
644
645					my $over_type = $over->[1]{'~type'};
646
647					if(!$over_type) {
648					# Shouldn't happen1
649					die "Typeless over in stack, starting at line "
650					. $over->[1]{'start_line'};
651
652					} elsif($over_type eq 'block') {
653					unless($curr_open->[-1][1]{'~bitched_about'}) {
654					$curr_open->[-1][1]{'~bitched_about'} = 1;
655					$self->whine(
656					$curr_open->[-1][1]{'start_line'},
657					"You can't have =items (as at line "
658					. $para->[1]{'start_line'}
659					. ") unless the first thing after the =over is an =item"
660					);
661					}
662					# Just turn it into a paragraph and reconsider it
663					$para->[0] = '~Para';
664					unshift @$paras, $para;
665					next;
666
667					} elsif($over_type eq 'text') {
668					my $item_type = $self->_get_item_type($para);
669					# That kills the content of the item if it's a number or bullet.
670					DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
671
672					if($item_type eq 'text') {
673					# Nothing special needs doing for 'text'
674					} elsif($item_type eq 'number' or $item_type eq 'bullet') {
675					$self->whine(
676					$para->[1]{'start_line'},
677					"Expected text after =item, not a $item_type"
678					);
679					# Undo our clobbering:
680					push @$para, $para->[1]{'~orig_content'};
681					delete $para->[1]{'number'};
682					# Only a PROPER item-number element is allowed
683					# to have a number attribute.
684					} else {
685					die "Unhandled item type $item_type"; # should never happen
686					}
687
688					# =item-text thingies don't need any assimilation, it seems.
689
690					} elsif($over_type eq 'number') {
691					my $item_type = $self->_get_item_type($para);
692					# That kills the content of the item if it's a number or bullet.
693					DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
694
695					my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
696
697					if($item_type eq 'bullet') {
698					# Hm, it's not numeric. Correct for this.
699					$para->[1]{'number'} = $expected_value;
700					$self->whine(
701					$para->[1]{'start_line'},
702					"Expected '=item $expected_value'"
703					);
704					push @$para, $para->[1]{'~orig_content'};
705					# restore the bullet, blocking the assimilation of next para
706
707					} elsif($item_type eq 'text') {
708					# Hm, it's not numeric. Correct for this.
709					$para->[1]{'number'} = $expected_value;
710					$self->whine(
711					$para->[1]{'start_line'},
712					"Expected '=item $expected_value'"
713					);
714					# Text content will still be there and will block next ~Para
715
716					} elsif($item_type ne 'number') {
717					die "Unknown item type $item_type"; # should never happen
718
719					} elsif($expected_value == $para->[1]{'number'}) {
720					DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
721
722					} else {
723					DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
724					" instead of the expected value of $expected_value\n";
725					$self->whine(
726					$para->[1]{'start_line'},
727					"You have '=item " . $para->[1]{'number'} .
728					"' instead of the expected '=item $expected_value'"
729					);
730					$para->[1]{'number'} = $expected_value; # correcting!!
731					}
732
733					if(@$para == 2) {
734					# For the cases where we /didn't/ push to @$para
735					if($paras->[0][0] eq '~Para') {
736					DEBUG and print "Assimilating following ~Para content into $over_type item\n";
737					push @$para, splice @{shift @$paras},2;
738					} else {
739					DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
740					push @$para, ''; # Just so it's not contentless
741					}
742					}
743
744
745					} elsif($over_type eq 'bullet') {
746					my $item_type = $self->_get_item_type($para);
747					# That kills the content of the item if it's a number or bullet.
748					DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
749
750					if($item_type eq 'bullet') {
751					# as expected!
752
753					if( $para->[1]{'~_freaky_para_hack'} ) {
754					DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
755					push @$para, delete $para->[1]{'~_freaky_para_hack'};
756					}
757
758					} elsif($item_type eq 'number') {
759					$self->whine(
760					$para->[1]{'start_line'},
761					"Expected '=item *'"
762					);
763					push @$para, $para->[1]{'~orig_content'};
764					# and block assimilation of the next paragraph
765					delete $para->[1]{'number'};
766					# Only a PROPER item-number element is allowed
767					# to have a number attribute.
768					} elsif($item_type eq 'text') {
769					$self->whine(
770					$para->[1]{'start_line'},
771					"Expected '=item *'"
772					);
773					# But doesn't need processing. But it'll block assimilation
774					# of the next para.
775					} else {
776					die "Unhandled item type $item_type"; # should never happen
777					}
778
779					if(@$para == 2) {
780					# For the cases where we /didn't/ push to @$para
781					if($paras->[0][0] eq '~Para') {
782					DEBUG and print "Assimilating following ~Para content into $over_type item\n";
783					push @$para, splice @{shift @$paras},2;
784					} else {
785					DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
786					push @$para, ''; # Just so it's not contentless
787					}
788					}
789
790					} else {
791					die "Unhandled =over type \"$over_type\"?";
792					# Shouldn't happen!
793					}
794
795					$para_type = 'Plain';
796					$para->[0] .= '-' . $over_type;
797					# Whew. Now fall thru and process it.
798
799
800					} elsif($para_type eq '=extend') {
801					# Well, might as well implement it here.
802					$self->_ponder_extend($para);
803					next; # and skip
804					} elsif($para_type eq '=encoding') {
805					# Not actually acted on here, but we catch errors here.
806					$self->_handle_encoding_second_level($para);
807					next unless $self->keep_encoding_directive;
808					$para_type = 'Plain';
809					} elsif($para_type eq '~Verbatim') {
810					$para->[0] = 'Verbatim';
811					$para_type = '?Verbatim';
812					} elsif($para_type eq '~Para') {
813					$para->[0] = 'Para';
814					$para_type = '?Plain';
815					} elsif($para_type eq 'Data') {
816					$para->[0] = 'Data';
817					$para_type = '?Data';
818					} elsif( $para_type =~ s/^=//s
819					and defined( $para_type = $self->{'accept_directives'}{$para_type} )
820					) {
821					DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
822					} else {
823					# An unknown directive!
824					DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
825					$para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
826					;
827					$self->whine(
828					$para->[1]{'start_line'},
829					"Unknown directive: $para->[0]"
830					);
831
832					# And maybe treat it as text instead of just letting it go?
833					next;
834					}
835
836					if($para_type =~ s/^\?//s) {
837					if(! @$curr_open) { # usual case
838					DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
839					} else {
840					my @fors = grep $_->[0] eq '=for', @$curr_open;
841					DEBUG > 1 and print "Containing fors: ",
842					join(',', map $_->[1]{'target'}, @fors), "\n";
843
844					if(! @fors) {
845					DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
846
847					#} elsif(grep $_->[1]{'~resolve'}, @fors) {
848					#} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
849					} elsif( $fors[-1][1]{'~resolve'} ) {
850					# Look to the immediately containing for
851
852					if($para_type eq 'Data') {
853					DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
854					$para->[0] = 'Para';
855					$para_type = 'Plain';
856					} else {
857					DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
858					}
859					} else {
860					DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
861					$para->[0] = $para_type = 'Data';
862					}
863					}
864					}
865
866					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
867					if($para_type eq 'Plain') {
868					$self->_ponder_Plain($para);
869					} elsif($para_type eq 'Verbatim') {
870					$self->_ponder_Verbatim($para);
871					} elsif($para_type eq 'Data') {
872					$self->_ponder_Data($para);
873					} else {
874					die "\$para type is $para_type -- how did that happen?";
875					# Shouldn't happen.
876					}
877
878					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
879					$para->[0] =~ s/^[~=]//s;
880
881					DEBUG and print "\n", pretty($para), "\n";
882
883					# traverse the treelet (which might well be just one string scalar)
884					$self->{'content_seen'} \|\|= 1;
885					$self->_traverse_treelet_bit(@$para);
886					}
887					}
888
889					return;
890					}
891
892					###########################################################################
893					# The sub-ponderers...
894
- -
897					sub _ponder_for {
898					my ($self,$para,$curr_open,$paras) = @_;
899
900					# Fake it out as a begin/end
901					my $target;
902
903					if(grep $_->[1]{'~ignore'}, @$curr_open) {
904					DEBUG > 1 and print "Ignoring ignorable =for\n";
905					return 1;
906					}
907
908					for(my $i = 2; $i < @$para; ++$i) {
909					if($para->[$i] =~ s/^\s(\S+)\s//s) {
910					$target = $1;
911					last;
912					}
913					}
914					unless(defined $target) {
915					$self->whine(
916					$para->[1]{'start_line'},
917					"=for without a target?"
918					);
919					return 1;
920					}
921					DEBUG > 1 and
922					print "Faking out a =for $target as a =begin $target / =end $target\n";
923
924					$para->[0] = 'Data';
925
926					unshift @$paras,
927					['=begin',
928					{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
929					$target,
930					],
931					$para,
932					['=end',
933					{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
934					$target,
935					],
936					;
937
938					return 1;
939					}
940
941					sub _ponder_begin {
942					my ($self,$para,$curr_open,$paras) = @_;
943					my $content = join ' ', splice @$para, 2;
944					$content =~ s/^\s+//s;
945					$content =~ s/\s+$//s;
946					unless(length($content)) {
947					$self->whine(
948					$para->[1]{'start_line'},
949					"=begin without a target?"
950					);
951					DEBUG and print "Ignoring targetless =begin\n";
952					return 1;
953					}
954
955					my ($target, $title) = $content =~ m/^(\S+)\s(.)$/;
956					$para->[1]{'title'} = $title if ($title);
957					$para->[1]{'target'} = $target; # without any ':'
958					$content = $target; # strip off the title
959
960					$content =~ s/^:!/!:/s;
961					my $neg; # whether this is a negation-match
962					$neg = 1 if $content =~ s/^!//s;
963					my $to_resolve; # whether to process formatting codes
964					$to_resolve = 1 if $content =~ s/^://s;
965
966					my $dont_ignore; # whether this target matches us
967
968					foreach my $target_name (
969					split(',', $content, -1),
970					$neg ? () : '*'
971					) {
972					DEBUG > 2 and
973					print " Considering whether =begin $content matches $target_name\n";
974					next unless $self->{'accept_targets'}{$target_name};
975
976					DEBUG > 2 and
977					print " It DOES match the acceptable target $target_name!\n";
978					$to_resolve = 1
979					if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
980					$dont_ignore = 1;
981					$para->[1]{'target_matching'} = $target_name;
982					last; # stop looking at other target names
983					}
984
985					if($neg) {
986					if( $dont_ignore ) {
987					$dont_ignore = '';
988					delete $para->[1]{'target_matching'};
989					DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
990					} else {
991					$dont_ignore = 1;
992					$para->[1]{'target_matching'} = '!';
993					DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
994					}
995					}
996
997					$para->[0] = '=for'; # Just what we happen to call these, internally
998					$para->[1]{'~really'} \|\|= '=begin';
999					$para->[1]{'~ignore'} = (! $dont_ignore) \|\| 0;
1000					$para->[1]{'~resolve'} = $to_resolve \|\| 0;
1001
1002					DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
1003					"ignore contents of this region\n";
1004					DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
1005					($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1006					DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
1007
1008					push @$curr_open, $para;
1009					if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1010					DEBUG > 1 and print "Ignoring ignorable =begin\n";
1011					} else {
1012					$self->{'content_seen'} \|\|= 1;
1013					$self->_handle_element_start((my $scratch='for'), $para->[1]);
1014					}
1015
1016					return 1;
1017					}
1018
1019					sub _ponder_end {
1020					my ($self,$para,$curr_open,$paras) = @_;
1021					my $content = join ' ', splice @$para, 2;
1022					$content =~ s/^\s+//s;
1023					$content =~ s/\s+$//s;
1024					DEBUG and print "Ogling '=end $content' directive\n";
1025
1026					unless(length($content)) {
1027					$self->whine(
1028					$para->[1]{'start_line'},
1029					"'=end' without a target?" . (
1030					( @$curr_open and $curr_open->[-1][0] eq '=for' )
1031					? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1032					: ''
1033					)
1034					);
1035					DEBUG and print "Ignoring targetless =end\n";
1036					return 1;
1037					}
1038
1039					unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1040					$self->whine(
1041					$para->[1]{'start_line'},
1042					"'=end $content' is invalid. (Stack: "
1043					. $self->_dump_curr_open() . ')'
1044					);
1045					DEBUG and print "Ignoring mistargetted =end $content\n";
1046					return 1;
1047					}
1048
1049					unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1050					$self->whine(
1051					$para->[1]{'start_line'},
1052					"=end $content without matching =begin. (Stack: "
1053					. $self->_dump_curr_open() . ')'
1054					);
1055					DEBUG and print "Ignoring mistargetted =end $content\n";
1056					return 1;
1057					}
1058
1059					unless($content eq $curr_open->[-1][1]{'target'}) {
1060					$self->whine(
1061					$para->[1]{'start_line'},
1062					"=end $content doesn't match =begin "
1063					. $curr_open->[-1][1]{'target'}
1064					. ". (Stack: "
1065					. $self->_dump_curr_open() . ')'
1066					);
1067					DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1068					return 1;
1069					}
1070
1071					# Else it's okay to close...
1072					if(grep $_->[1]{'~ignore'}, @$curr_open) {
1073					DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1074					# And that may be because of this to-be-closed =for region, or some
1075					# other one, but it doesn't matter.
1076					} else {
1077					$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1078					# what's that for?
1079
1080					$self->{'content_seen'} \|\|= 1;
1081					$self->_handle_element_end( my $scratch = 'for', $para->[1]);
1082					}
1083					DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1084					pop @$curr_open;
1085
1086					return 1;
1087					}
1088
1089					sub _ponder_doc_end {
1090					my ($self,$para,$curr_open,$paras) = @_;
1091					if(@$curr_open) { # Deal with things left open
1092					DEBUG and print "Stack is nonempty at end-document: (",
1093					$self->_dump_curr_open(), ")\n";
1094
1095					DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1096					unshift @$paras, $self->_closers_for_all_curr_open;
1097					# Make sure there is exactly one ~end in the parastack, at the end:
1098					@$paras = grep $_->[0] ne '~end', @$paras;
1099					push @$paras, $para, $para;
1100					# We need two -- once for the next cycle where we
1101					# generate errata, and then another to be at the end
1102					# when that loop back around to process the errata.
1103					return 1;
1104
1105					} else {
1106					DEBUG and print "Okay, stack is empty now.\n";
1107					}
1108
1109					# Try generating errata section, if applicable
1110					unless($self->{'~tried_gen_errata'}) {
1111					$self->{'~tried_gen_errata'} = 1;
1112					my @extras = $self->_gen_errata();
1113					if(@extras) {
1114					unshift @$paras, @extras;
1115					DEBUG and print "Generated errata... relooping...\n";
1116					return 1; # I.e., loop around again to process these fake-o paragraphs
1117					}
1118					}
1119
1120					splice @$paras; # Well, that's that for this paragraph buffer.
1121					DEBUG and print "Throwing end-document event.\n";
1122
1123					$self->_handle_element_end( my $scratch = 'Document' );
1124					return 1; # Hasta la byebye
1125					}
1126
1127					sub _ponder_pod {
1128					my ($self,$para,$curr_open,$paras) = @_;
1129					$self->whine(
1130					$para->[1]{'start_line'},
1131					"=pod directives shouldn't be over one line long! Ignoring all "
1132					. (@$para - 2) . " lines of content"
1133					) if @$para > 3;
1134
1135					# Content ignored unless 'pod_handler' is set
1136					if (my $pod_handler = $self->{'pod_handler'}) {
1137					my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1138					$line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1139					$pod_handler->($line, $line_num, $self);
1140					}
1141
1142					# The surrounding methods set content_seen, so let us remain consistent.
1143					# I do not know why it was not here before -- should it not be here?
1144					# $self->{'content_seen'} \|\|= 1;
1145
1146					return;
1147					}
1148
1149					sub _ponder_over {
1150					my ($self,$para,$curr_open,$paras) = @_;
1151					return 1 unless @$paras;
1152					my $list_type;
1153
1154					if($paras->[0][0] eq '=item') { # most common case
1155					$list_type = $self->_get_initial_item_type($paras->[0]);
1156
1157					} elsif($paras->[0][0] eq '=back') {
1158					# Ignore empty lists by default
1159					if ($self->{'parse_empty_lists'}) {
1160					$list_type = 'empty';
1161					} else {
1162					shift @$paras;
1163					return 1;
1164					}
1165					} elsif($paras->[0][0] eq '~end') {
1166					$self->whine(
1167					$para->[1]{'start_line'},
1168					"=over is the last thing in the document?!"
1169					);
1170					return 1; # But feh, ignore it.
1171					} else {
1172					$list_type = 'block';
1173					}
1174					$para->[1]{'~type'} = $list_type;
1175					push @$curr_open, $para;
1176					# yes, we reuse the paragraph as a stack item
1177
1178					my $content = join ' ', splice @$para, 2;
1179					my $overness;
1180					if($content =~ m/^\s*$/s) {
1181					$para->[1]{'indent'} = 4;
1182					} elsif($content =~ m/^\s((?:\d\.)?\d+)\s*$/s) {
1183	2	3.26ms	2	16µs	# spent 14µs (11+2) within Pod::Simple::BlackBox::BEGIN@1183 which was called: # once (11µs+2µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1183 no integer; # spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@1183 # spent 2µs making 1 call to integer::unimport
1184					$para->[1]{'indent'} = $1;
1185					if($1 == 0) {
1186					$self->whine(
1187					$para->[1]{'start_line'},
1188					"Can't have a 0 in =over $content"
1189					);
1190					$para->[1]{'indent'} = 4;
1191					}
1192					} else {
1193					$self->whine(
1194					$para->[1]{'start_line'},
1195					"=over should be: '=over' or '=over positive_number'"
1196					);
1197					$para->[1]{'indent'} = 4;
1198					}
1199					DEBUG > 1 and print "=over found of type $list_type\n";
1200
1201					$self->{'content_seen'} \|\|= 1;
1202					$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1203
1204					return;
1205					}
1206
1207					sub _ponder_back {
1208					my ($self,$para,$curr_open,$paras) = @_;
1209					# TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1210
1211					my $content = join ' ', splice @$para, 2;
1212					if($content =~ m/\S/) {
1213					$self->whine(
1214					$para->[1]{'start_line'},
1215					"=back doesn't take any parameters, but you said =back $content"
1216					);
1217					}
1218
1219					if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1220					DEBUG > 1 and print "=back happily closes matching =over\n";
1221					# Expected case: we're closing the most recently opened thing
1222					#my $over = pop @$curr_open;
1223					$self->{'content_seen'} \|\|= 1;
1224					$self->_handle_element_end( my $scratch =
1225					'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1226					);
1227					} else {
1228					DEBUG > 1 and print "=back found without a matching =over. Stack: (",
1229					join(', ', map $_->[0], @$curr_open), ").\n";
1230					$self->whine(
1231					$para->[1]{'start_line'},
1232					'=back without =over'
1233					);
1234					return 1; # and ignore it
1235					}
1236					}
1237
1238					sub _ponder_item {
1239					my ($self,$para,$curr_open,$paras) = @_;
1240					my $over;
1241					unless(@$curr_open and
1242					$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1243					$self->whine(
1244					$para->[1]{'start_line'},
1245					"'=item' outside of any '=over'"
1246					);
1247					unshift @$paras,
1248					['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1249					$para
1250					;
1251					return 1;
1252					}
1253
1254
1255					my $over_type = $over->[1]{'~type'};
1256
1257					if(!$over_type) {
1258					# Shouldn't happen1
1259					die "Typeless over in stack, starting at line "
1260					. $over->[1]{'start_line'};
1261
1262					} elsif($over_type eq 'block') {
1263					unless($curr_open->[-1][1]{'~bitched_about'}) {
1264					$curr_open->[-1][1]{'~bitched_about'} = 1;
1265					$self->whine(
1266					$curr_open->[-1][1]{'start_line'},
1267					"You can't have =items (as at line "
1268					. $para->[1]{'start_line'}
1269					. ") unless the first thing after the =over is an =item"
1270					);
1271					}
1272					# Just turn it into a paragraph and reconsider it
1273					$para->[0] = '~Para';
1274					unshift @$paras, $para;
1275					return 1;
1276
1277					} elsif($over_type eq 'text') {
1278					my $item_type = $self->_get_item_type($para);
1279					# That kills the content of the item if it's a number or bullet.
1280					DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1281
1282					if($item_type eq 'text') {
1283					# Nothing special needs doing for 'text'
1284					} elsif($item_type eq 'number' or $item_type eq 'bullet') {
1285					$self->whine(
1286					$para->[1]{'start_line'},
1287					"Expected text after =item, not a $item_type"
1288					);
1289					# Undo our clobbering:
1290					push @$para, $para->[1]{'~orig_content'};
1291					delete $para->[1]{'number'};
1292					# Only a PROPER item-number element is allowed
1293					# to have a number attribute.
1294					} else {
1295					die "Unhandled item type $item_type"; # should never happen
1296					}
1297
1298					# =item-text thingies don't need any assimilation, it seems.
1299
1300					} elsif($over_type eq 'number') {
1301					my $item_type = $self->_get_item_type($para);
1302					# That kills the content of the item if it's a number or bullet.
1303					DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1304
1305					my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1306
1307					if($item_type eq 'bullet') {
1308					# Hm, it's not numeric. Correct for this.
1309					$para->[1]{'number'} = $expected_value;
1310					$self->whine(
1311					$para->[1]{'start_line'},
1312					"Expected '=item $expected_value'"
1313					);
1314					push @$para, $para->[1]{'~orig_content'};
1315					# restore the bullet, blocking the assimilation of next para
1316
1317					} elsif($item_type eq 'text') {
1318					# Hm, it's not numeric. Correct for this.
1319					$para->[1]{'number'} = $expected_value;
1320					$self->whine(
1321					$para->[1]{'start_line'},
1322					"Expected '=item $expected_value'"
1323					);
1324					# Text content will still be there and will block next ~Para
1325
1326					} elsif($item_type ne 'number') {
1327					die "Unknown item type $item_type"; # should never happen
1328
1329					} elsif($expected_value == $para->[1]{'number'}) {
1330					DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1331
1332					} else {
1333					DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1334					" instead of the expected value of $expected_value\n";
1335					$self->whine(
1336					$para->[1]{'start_line'},
1337					"You have '=item " . $para->[1]{'number'} .
1338					"' instead of the expected '=item $expected_value'"
1339					);
1340					$para->[1]{'number'} = $expected_value; # correcting!!
1341					}
1342
1343					if(@$para == 2) {
1344					# For the cases where we /didn't/ push to @$para
1345					if($paras->[0][0] eq '~Para') {
1346					DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1347					push @$para, splice @{shift @$paras},2;
1348					} else {
1349					DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1350					push @$para, ''; # Just so it's not contentless
1351					}
1352					}
1353
1354
1355					} elsif($over_type eq 'bullet') {
1356					my $item_type = $self->_get_item_type($para);
1357					# That kills the content of the item if it's a number or bullet.
1358					DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1359
1360					if($item_type eq 'bullet') {
1361					# as expected!
1362
1363					if( $para->[1]{'~_freaky_para_hack'} ) {
1364					DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1365					push @$para, delete $para->[1]{'~_freaky_para_hack'};
1366					}
1367
1368					} elsif($item_type eq 'number') {
1369					$self->whine(
1370					$para->[1]{'start_line'},
1371					"Expected '=item *'"
1372					);
1373					push @$para, $para->[1]{'~orig_content'};
1374					# and block assimilation of the next paragraph
1375					delete $para->[1]{'number'};
1376					# Only a PROPER item-number element is allowed
1377					# to have a number attribute.
1378					} elsif($item_type eq 'text') {
1379					$self->whine(
1380					$para->[1]{'start_line'},
1381					"Expected '=item *'"
1382					);
1383					# But doesn't need processing. But it'll block assimilation
1384					# of the next para.
1385					} else {
1386					die "Unhandled item type $item_type"; # should never happen
1387					}
1388
1389					if(@$para == 2) {
1390					# For the cases where we /didn't/ push to @$para
1391					if($paras->[0][0] eq '~Para') {
1392					DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1393					push @$para, splice @{shift @$paras},2;
1394					} else {
1395					DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1396					push @$para, ''; # Just so it's not contentless
1397					}
1398					}
1399
1400					} else {
1401					die "Unhandled =over type \"$over_type\"?";
1402					# Shouldn't happen!
1403					}
1404					$para->[0] .= '-' . $over_type;
1405
1406					return;
1407					}
1408
1409					sub _ponder_Plain {
1410					my ($self,$para) = @_;
1411					DEBUG and print " giving plain treatment...\n";
1412					unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1413					or $para->[1]{'~cooked'}
1414					) {
1415					push @$para,
1416					@{$self->_make_treelet(
1417					join("\n", splice(@$para, 2)),
1418					$para->[1]{'start_line'}
1419					)};
1420					}
1421					# Empty paragraphs don't need a treelet for any reason I can see.
1422					# And precooked paragraphs already have a treelet.
1423					return;
1424					}
1425
1426					sub _ponder_Verbatim {
1427					my ($self,$para) = @_;
1428					DEBUG and print " giving verbatim treatment...\n";
1429
1430					$para->[1]{'xml:space'} = 'preserve';
1431
1432					my $indent = $self->strip_verbatim_indent;
1433					if ($indent && ref $indent eq 'CODE') {
1434					my @shifted = (shift @{$para}, shift @{$para});
1435					$indent = $indent->($para);
1436					unshift @{$para}, @shifted;
1437					}
1438
1439					for(my $i = 2; $i < @$para; $i++) {
1440					foreach my $line ($para->[$i]) { # just for aliasing
1441					# Strip indentation.
1442					$line =~ s/^\Q$indent// if $indent
1443					&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1444					while( $line =~
1445					# Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1446					# tabs are at every EIGHTH column. For portability, it has to be
1447					# one setting everywhere, and 8th wins.
1448					s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1449					) {}
1450
1451					# TODO: whinge about (or otherwise treat) unindented or overlong lines
1452
1453					}
1454					}
1455
1456					# Now the VerbatimFormatted hoodoo...
1457					if( $self->{'accept_codes'} and
1458					$self->{'accept_codes'}{'VerbatimFormatted'}
1459					) {
1460					while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1461					# Kill any number of terminal newlines
1462					$self->_verbatim_format($para);
1463					} elsif ($self->{'codes_in_verbatim'}) {
1464					push @$para,
1465					@{$self->_make_treelet(
1466					join("\n", splice(@$para, 2)),
1467					$para->[1]{'start_line'}, $para->[1]{'xml:space'}
1468					)};
1469					$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1470					} else {
1471					push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1472					$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1473					}
1474					return;
1475					}
1476
1477					sub _ponder_Data {
1478					my ($self,$para) = @_;
1479					DEBUG and print " giving data treatment...\n";
1480					$para->[1]{'xml:space'} = 'preserve';
1481					push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1482					return;
1483					}
1484
- -
1488					###########################################################################
1489
1490					sub _traverse_treelet_bit { # for use only by the routine above
1491					my($self, $name) = splice @_,0,2;
1492
1493					my $scratch;
1494					$self->_handle_element_start(($scratch=$name), shift @_);
1495
1496					while (@_) {
1497					my $x = shift;
1498					if (ref($x)) {
1499					&_traverse_treelet_bit($self, @$x);
1500					} else {
1501					$x .= shift while @_ && !ref($_[0]);
1502					$self->_handle_text($x);
1503					}
1504					}
1505
1506					$self->_handle_element_end($scratch=$name);
1507					return;
1508					}
1509
1510					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1511
1512					sub _closers_for_all_curr_open {
1513					my $self = $_[0];
1514					my @closers;
1515					foreach my $still_open (@{ $self->{'curr_open'} \|\| return }) {
1516					my @copy = @$still_open;
1517					$copy[1] = {%{ $copy[1] }};
1518					#$copy[1]{'start_line'} = -1;
1519					if($copy[0] eq '=for') {
1520					$copy[0] = '=end';
1521					} elsif($copy[0] eq '=over') {
1522					$self->whine(
1523					$still_open->[1]{start_line} ,
1524					"=over without closing =back"
1525					);
1526
1527					$copy[0] = '=back';
1528					} else {
1529					die "I don't know how to auto-close an open $copy[0] region";
1530					}
1531
1532					unless( @copy > 2 ) {
1533					push @copy, $copy[1]{'target'};
1534					$copy[-1] = '' unless defined $copy[-1];
1535					# since =over's don't have targets
1536					}
1537
1538					$copy[1]{'fake-closer'} = 1;
1539
1540					DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1541					unshift @closers, \@copy;
1542					}
1543					return @closers;
1544					}
1545
1546					#--------------------------------------------------------------------------
1547
1548					sub _verbatim_format {
1549					my($it, $p) = @_;
1550
1551					my $formatting;
1552
1553					for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1554					DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1555					$p->[$i] .= "\n";
1556					# Unlike with simple Verbatim blocks, we don't end up just doing
1557					# a join("\n", ...) on the contents, so we have to append a
1558					# newline to ever line, and then nix the last one later.
1559					}
1560
1561					if( DEBUG > 4 ) {
1562					print "<<\n";
1563					for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1564					print "_verbatim_format $i: $p->[$i]";
1565					}
1566					print ">>\n";
1567					}
1568
1569					for(my $i = $#$p; $i > 2; $i--) {
1570					# work backwards over the lines, except the first (#2)
1571
1572					#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1573					# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1574					# look at a formatty line preceding a nonformatty one
1575					DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1576					if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1577					DEBUG > 5 and print " It's a formatty line. ",
1578					"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1579
1580					if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1581					DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
1582					next;
1583					} else {
1584					DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
1585					}
1586					} else {
1587					DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
1588					next;
1589					}
1590
1591					# A formatty line has to have #: in the first two columns, and uses
1592					# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1593					# Example:
1594					# What do you want? i like pie. [or whatever]
1595					# #:^^^^^^^^^^^^^^^^^ /////////////
1596
1597
1598					DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1599
1600					$formatting = ' ' . $1;
1601					$formatting =~ s/\s+$//s; # nix trailing whitespace
1602					unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1603					splice @$p,$i,1; # remove this line
1604					$i--; # don't consider next line
1605					next;
1606					}
1607
1608					if( length($formatting) >= length($p->[$i-1]) ) {
1609					$formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1610					} else {
1611					$formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1612					}
1613					# Make $formatting and the previous line be exactly the same length,
1614					# with $formatting having a " " as the last character.
1615
1616					DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1617
1618
1619					my @new_line;
1620					while( $formatting =~ m{\G(( +)\|(\^+)\|(\/+)\|(\%+))}g ) {
1621					#print "Format matches $1\n";
1622
1623					if($2) {
1624					#print "SKIPPING <$2>\n";
1625					push @new_line,
1626					substr($p->[$i-1], pos($formatting)-length($1), length($1));
1627					} else {
1628					#print "SNARING $+\n";
1629					push @new_line, [
1630					(
1631					$3 ? 'VerbatimB' :
1632					$4 ? 'VerbatimI' :
1633					$5 ? 'VerbatimBI' : die("Should never get called")
1634					), {},
1635					substr($p->[$i-1], pos($formatting)-length($1), length($1))
1636					];
1637					#print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1638					}
1639					}
1640					my @nixed =
1641					splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1642					DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1643
1644					DEBUG > 6 and print "New version of the above line is these tokens (",
1645					scalar(@new_line), "):",
1646					map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1647					$i--; # So the next line we scrutinize is the line before the one
1648					# that we just went and formatted
1649					}
1650
1651					$p->[0] = 'VerbatimFormatted';
1652
1653					# Collapse adjacent text nodes, just for kicks.
1654					for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1655					if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1656					DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1657					$p->[$i] .= splice @$p, $i+1, 1; # merge
1658					--$i; # and back up
1659					}
1660					}
1661
1662					# Now look for the last text token, and remove the terminal newline
1663					for( my $i = $#$p; $i >= 2; $i-- ) {
1664					# work backwards over the tokens, even the first
1665					if( !ref($p->[$i]) ) {
1666					if($p->[$i] =~ s/\n$//s) {
1667					DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1668					} else {
1669					DEBUG > 5 and print
1670					"No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1671					}
1672					last; # we only want the next one
1673					}
1674					}
1675
1676					return;
1677					}
1678
1679
1680					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1681
1682
1683					sub _treelet_from_formatting_codes {
1684					# Given a paragraph, returns a treelet. Full of scary tokenizing code.
1685					# Like [ '~Top', {'start_line' => $start_line},
1686					# "I like ",
1687					# [ 'B', {}, "pie" ],
1688					# "!"
1689					# ]
1690
1691					my($self, $para, $start_line, $preserve_space) = @_;
1692
1693					my $treelet = ['~Top', {'start_line' => $start_line},];
1694
1695					unless ($preserve_space \|\| $self->{'preserve_whitespace'}) {
1696					$para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1697					$para =~ s/ $//;
1698					$para =~ s/^ //;
1699					}
1700
1701					# Only apparent problem the above code is that N<< >> turns into
1702					# N<< >>. But then, word wrapping does that too! So don't do that!
1703
1704					my @stack;
1705					my @lineage = ($treelet);
1706					my $raw = ''; # raw content of L<> fcode before splitting/processing
1707					# XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1708					# into just 1 ' '. Is this the regex's doing or 'raw's?
1709					my $inL = 0;
1710
1711					DEBUG > 4 and print "Paragraph:\n$para\n\n";
1712
1713					# Here begins our frightening tokenizer RE. The following regex matches
1714					# text in four main parts:
1715					#
1716					# * Start-codes. The first alternative matches C< or C<<, the latter
1717					# followed by some whitespace. $1 will hold the entire start code
1718					# (including any space following a multiple-angle-bracket delimiter),
1719					# and $2 will hold only the additional brackets past the first in a
1720					# multiple-bracket delimiter. length($2) + 1 will be the number of
1721					# closing brackets we have to find.
1722					#
1723					# * Closing brackets. Match some amount of whitespace followed by
1724					# multiple close brackets. The logic to see if this closes anything
1725					# is down below. Note that in order to parse C<< >> correctly, we
1726					# have to use look-behind (?<=\s\s), since the match of the starting
1727					# code will have consumed the whitespace.
1728					#
1729					# * A single closing bracket, to close a simple code like C<>.
1730					#
1731					# * Something that isn't a start or end code. We have to be careful
1732					# about accepting whitespace, since perlpodspec says that any whitespace
1733					# before a multiple-bracket closing delimiter should be ignored.
1734					#
1735					while($para =~
1736					m/\G
1737					(?:
1738					# Match starting codes, including the whitespace following a
1739					# multiple-delimiter start code. $1 gets the whole start code and
1740					# $2 gets all but one of the <s in the multiple-bracket case.
1741					([A-Z]<(?:(<+)\s+)?)
1742					\|
1743					# Match multiple-bracket end codes. $3 gets the whitespace that
1744					# should be discarded before an end bracket but kept in other cases
1745					# and $4 gets the end brackets themselves.
1746					(\s+\|(?<=\s\s))(>{2,})
1747					\|
1748					(\s?>) # $5: simple end-codes
1749					\|
1750					( # $6: stuff containing no start-codes or end-codes
1751					(?:
1752					[^A-Z\s>]
1753					\|
1754					(?:
1755					[A-Z](?!<)
1756					)
1757					\|
1758					# whitespace is ok, but we don't want to eat the whitespace before
1759					# a multiple-bracket end code.
1760					# NOTE: we may still have problems with e.g. S<< >>
1761					(?:
1762					\s(?!\s*>{2,})
1763					)
1764					)+
1765					)
1766					)
1767					/xgo
1768					) {
1769					DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1770					if(defined $1) {
1771					if(defined $2) {
1772					DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1773					push @stack, length($2) + 1;
1774					# length of the necessary complex end-code string
1775					} else {
1776					DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1777					push @stack, 0; # signal that we're looking for simple
1778					}
1779					push @lineage, [ substr($1,0,1), {}, ]; # new node object
1780					push @{ $lineage[-2] }, $lineage[-1];
1781					if ('L' eq substr($1,0,1)) {
1782					$raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1783					$inL = 1;
1784					} else {
1785					$raw .= $1 if $inL;
1786					}
1787
1788					} elsif(defined $4) {
1789					DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1790					# This is where it gets messy...
1791					if(! @stack) {
1792					# We saw " >>>>" but needed nothing. This is ALL just stuff then.
1793					DEBUG > 4 and print " But it's really just stuff.\n";
1794					push @{ $lineage[-1] }, $3, $4;
1795					next;
1796					} elsif(!$stack[-1]) {
1797					# We saw " >>>>" but needed only ">". Back pos up.
1798					DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1799					push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1800					pos($para) = pos($para) - length($4) + 1;
1801					} elsif($stack[-1] == length($4)) {
1802					# We found " >>>>", and it was exactly what we needed. Commonest case.
1803					DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1804					} elsif($stack[-1] < length($4)) {
1805					# We saw " >>>>" but needed only " >>". Back pos up.
1806					DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1807					pos($para) = pos($para) - length($4) + $stack[-1];
1808					} else {
1809					# We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1810					DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1811					push @{ $lineage[-1] }, $3, $4;
1812					next;
1813					}
1814					#print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1815
1816					push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1817					# Keep the element from being childless
1818
1819					pop @stack;
1820					pop @lineage;
1821
1822					unless (@stack) { # not in an L if there are no open fcodes
1823					$inL = 0;
1824					if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1825					$lineage[-1][-1][1]{'raw'} = $raw
1826					}
1827					}
1828					$raw .= $3.$4 if $inL;
1829
1830					} elsif(defined $5) {
1831					DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
1832
1833					if(@stack and ! $stack[-1]) {
1834					# We're indeed expecting a simple end-code
1835					DEBUG > 4 and print " It's indeed an end-code.\n";
1836
1837					if(length($5) == 2) { # There was a space there: " >"
1838					push @{ $lineage[-1] }, ' ';
1839					} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1840					push @{ $lineage[-1] }, ''; # keep it from being really childless
1841					}
1842
1843					pop @stack;
1844					pop @lineage;
1845					} else {
1846					DEBUG > 4 and print " It's just stuff.\n";
1847					push @{ $lineage[-1] }, $5;
1848					}
1849
1850					unless (@stack) { # not in an L if there are no open fcodes
1851					$inL = 0;
1852					if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1853					$lineage[-1][-1][1]{'raw'} = $raw
1854					}
1855					}
1856					$raw .= $5 if $inL;
1857
1858					} elsif(defined $6) {
1859					DEBUG > 3 and print "Found stuff \"$6\"\n";
1860					push @{ $lineage[-1] }, $6;
1861					$raw .= $6 if $inL;
1862					# XXX does not capture multiplace whitespaces -- 'raw' ends up with
1863					# at most 1 leading/trailing whitespace, why not all of it?
1864
1865					} else {
1866					# should never ever ever ever happen
1867					DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1868					die "SPORK 512512!";
1869					}
1870					}
1871
1872					if(@stack) { # Uhoh, some sequences weren't closed.
1873					my $x= "...";
1874					while(@stack) {
1875					push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1876					# Hmmmmm!
1877
1878					my $code = (pop @lineage)->[0];
1879					my $ender_length = pop @stack;
1880					if($ender_length) {
1881					--$ender_length;
1882					$x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1883					} else {
1884					$x = $code . "<$x>";
1885					}
1886					}
1887					DEBUG > 1 and print "Unterminated $x sequence\n";
1888					$self->whine($start_line,
1889					"Unterminated $x sequence",
1890					);
1891					}
1892
1893					return $treelet;
1894					}
1895
1896					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1897
1898					sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1899					return stringify_lol($_[1]);
1900					}
1901
1902					sub stringify_lol { # function: stringify_lol($lol)
1903					my $string_form = '';
1904					_stringify_lol( $_[0] => \$string_form );
1905					return $string_form;
1906					}
1907
1908					sub _stringify_lol { # the real recursor
1909					my($lol, $to) = @_;
1910					for(my $i = 2; $i < @$lol; ++$i) {
1911					if( ref($lol->[$i] \|\| '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1912					_stringify_lol( $lol->[$i], $to); # recurse!
1913					} else {
1914					$$to .= $lol->[$i];
1915					}
1916					}
1917					return;
1918					}
1919
1920					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1921
1922					sub _dump_curr_open { # return a string representation of the stack
1923					my $curr_open = $_[0]{'curr_open'};
1924
1925					return '[empty]' unless @$curr_open;
1926					return join '; ',
1927					map {;
1928					($_->[0] eq '=for')
1929					? ( ($_->[1]{'~really'} \|\| '=over')
1930					. ' ' . $_->[1]{'target'})
1931					: $_->[0]
1932					}
1933					@$curr_open
1934					;
1935					}
1936
1937					###########################################################################
1938	1	7µs			my %pretty_form = (
1939					"\a" => '\a', # ding!
1940					"\b" => '\b', # BS
1941					"\e" => '\e', # ESC
1942					"\f" => '\f', # FF
1943					"\t" => '\t', # tab
1944					"\cm" => '\cm',
1945					"\cj" => '\cj',
1946					"\n" => '\n', # probably overrides one of either \cm or \cj
1947					'"' => '\"',
1948					'\\' => '\\\\',
1949					'$' => '\\$',
1950					'@' => '\\@',
1951					'%' => '\\%',
1952					'#' => '\\#',
1953					);
1954
1955					sub pretty { # adopted from Class::Classless
1956					# Not the most brilliant routine, but passable.
1957					# Don't give it a cyclic data structure!
1958					my @stuff = @_; # copy
1959					my $x;
1960					my $out =
1961					# join ",\n" .
1962					join ", ",
1963					map {;
1964					if(!defined($_)) {
1965					"undef";
1966					} elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1967					$x = "[ " . pretty(@$_) . " ]" ;
1968					$x;
1969					} elsif(ref($_) eq 'SCALAR') {
1970					$x = "\\" . pretty($$_) ;
1971					$x;
1972					} elsif(ref($_) eq 'HASH') {
1973					my $hr = $_;
1974					$x = "{" . join(", ",
1975					map(pretty($_) . '=>' . pretty($hr->{$_}),
1976					sort keys %$hr ) ) . "}" ;
1977					$x;
1978					} elsif(!length($_)) { q{''} # empty string
1979					} elsif(
1980					$_ eq '0' # very common case
1981					or(
1982					m/^-?(?:[123456789]\d*\|0)(?:\.\d+)?$/s
1983					and $_ ne '-0' # the strange case that that RE lets thru
1984					)
1985					) { $_;
1986					} else {
1987					if( chr(65) eq 'A' ) {
1988					s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1989					#<$pretty_form{$1} \|\| '\\x'.(unpack("H2",$1))>eg;
1990					<$pretty_form{$1} \|\| '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1991					} else {
1992					# We're in some crazy non-ASCII world!
1993					s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1994					#<$pretty_form{$1} \|\| '\\x'.(unpack("H2",$1))>eg;
1995					<$pretty_form{$1} \|\| '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1996					}
1997					qq{"$_"};
1998					}
1999					} @stuff;
2000					# $out =~ s/\n */ /g if length($out) < 75;
2001					return $out;
2002					}
2003
2004					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2005
2006					# A rather unsubtle method of blowing away all the state information
2007					# from a parser object so it can be reused. Provided as a utility for
2008					# backward compatibility in Pod::Man, etc. but not recommended for
2009					# general use.
2010
2011					sub reinit {
2012					my $self = shift;
2013					foreach (qw(source_dead source_filename doc_has_started
2014					start_of_pod_block content_seen last_was_blank paras curr_open
2015					line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
2016					Title)) {
2017
2018					delete $self->{$_};
2019					}
2020					}
2021
2022					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2023	1	7µs			1;
2024