Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/Pod/Parser.pm |
Statements | Executed 15 statements in 2.96ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 1.72ms | 1.80ms | BEGIN@209 | Pod::Parser::
1 | 1 | 1 | 12µs | 48µs | BEGIN@210 | Pod::Parser::
1 | 1 | 1 | 11µs | 23µs | BEGIN@11 | Pod::Parser::
1 | 1 | 1 | 8µs | 21µs | BEGIN@211 | Pod::Parser::
1 | 1 | 1 | 6µs | 65µs | BEGIN@14 | Pod::Parser::
1 | 1 | 1 | 4µs | 4µs | BEGIN@212 | Pod::Parser::
0 | 0 | 0 | 0s | 0s | __ANON__[:763] | Pod::Parser::
0 | 0 | 0 | 0s | 0s | __ANON__[:765] | Pod::Parser::
0 | 0 | 0 | 0s | 0s | __ANON__[:766] | Pod::Parser::
0 | 0 | 0 | 0s | 0s | __ANON__[:767] | Pod::Parser::
0 | 0 | 0 | 0s | 0s | _pop_input_stream | Pod::Parser::
0 | 0 | 0 | 0s | 0s | _push_input_stream | Pod::Parser::
0 | 0 | 0 | 0s | 0s | begin_input | Pod::Parser::
0 | 0 | 0 | 0s | 0s | begin_pod | Pod::Parser::
0 | 0 | 0 | 0s | 0s | command | Pod::Parser::
0 | 0 | 0 | 0s | 0s | cutting | Pod::Parser::
0 | 0 | 0 | 0s | 0s | end_input | Pod::Parser::
0 | 0 | 0 | 0s | 0s | end_pod | Pod::Parser::
0 | 0 | 0 | 0s | 0s | errorsub | Pod::Parser::
0 | 0 | 0 | 0s | 0s | initialize | Pod::Parser::
0 | 0 | 0 | 0s | 0s | input_file | Pod::Parser::
0 | 0 | 0 | 0s | 0s | input_handle | Pod::Parser::
0 | 0 | 0 | 0s | 0s | input_streams | Pod::Parser::
0 | 0 | 0 | 0s | 0s | interior_sequence | Pod::Parser::
0 | 0 | 0 | 0s | 0s | interpolate | Pod::Parser::
0 | 0 | 0 | 0s | 0s | new | Pod::Parser::
0 | 0 | 0 | 0s | 0s | output_file | Pod::Parser::
0 | 0 | 0 | 0s | 0s | output_handle | Pod::Parser::
0 | 0 | 0 | 0s | 0s | parse_from_file | Pod::Parser::
0 | 0 | 0 | 0s | 0s | parse_from_filehandle | Pod::Parser::
0 | 0 | 0 | 0s | 0s | parse_paragraph | Pod::Parser::
0 | 0 | 0 | 0s | 0s | parse_text | Pod::Parser::
0 | 0 | 0 | 0s | 0s | parseopts | Pod::Parser::
0 | 0 | 0 | 0s | 0s | preprocess_line | Pod::Parser::
0 | 0 | 0 | 0s | 0s | preprocess_paragraph | Pod::Parser::
0 | 0 | 0 | 0s | 0s | textblock | Pod::Parser::
0 | 0 | 0 | 0s | 0s | top_stream | Pod::Parser::
0 | 0 | 0 | 0s | 0s | verbatim | Pod::Parser::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | ############################################################################# | ||||
2 | # Pod/Parser.pm -- package which defines a base class for parsing POD docs. | ||||
3 | # | ||||
4 | # Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. | ||||
5 | # This file is part of "PodParser". PodParser is free software; | ||||
6 | # you can redistribute it and/or modify it under the same terms | ||||
7 | # as Perl itself. | ||||
8 | ############################################################################# | ||||
9 | |||||
10 | package Pod::Parser; | ||||
11 | 2 | 26µs | 2 | 35µs | # spent 23µs (11+12) within Pod::Parser::BEGIN@11 which was called:
# once (11µs+12µs) by Pod::Select::BEGIN@242 at line 11 # spent 23µs making 1 call to Pod::Parser::BEGIN@11
# spent 12µs making 1 call to strict::import |
12 | |||||
13 | ## These "variables" are used as local "glob aliases" for performance | ||||
14 | 2 | 86µs | 2 | 125µs | # spent 65µs (6+59) within Pod::Parser::BEGIN@14 which was called:
# once (6µs+59µs) by Pod::Select::BEGIN@242 at line 14 # spent 65µs making 1 call to Pod::Parser::BEGIN@14
# spent 59µs making 1 call to vars::import |
15 | 1 | 700ns | $VERSION = '1.60'; ## Current version of this package | ||
16 | 1 | 9µs | require 5.005; ## requires this Perl version or later | ||
17 | |||||
18 | ############################################################################# | ||||
19 | |||||
20 | =head1 NAME | ||||
21 | |||||
22 | Pod::Parser - base class for creating POD filters and translators | ||||
23 | |||||
24 | =head1 SYNOPSIS | ||||
25 | |||||
26 | use Pod::Parser; | ||||
27 | |||||
28 | package MyParser; | ||||
29 | @ISA = qw(Pod::Parser); | ||||
30 | |||||
31 | sub command { | ||||
32 | my ($parser, $command, $paragraph, $line_num) = @_; | ||||
33 | ## Interpret the command and its text; sample actions might be: | ||||
34 | if ($command eq 'head1') { ... } | ||||
35 | elsif ($command eq 'head2') { ... } | ||||
36 | ## ... other commands and their actions | ||||
37 | my $out_fh = $parser->output_handle(); | ||||
38 | my $expansion = $parser->interpolate($paragraph, $line_num); | ||||
39 | print $out_fh $expansion; | ||||
40 | } | ||||
41 | |||||
42 | sub verbatim { | ||||
43 | my ($parser, $paragraph, $line_num) = @_; | ||||
44 | ## Format verbatim paragraph; sample actions might be: | ||||
45 | my $out_fh = $parser->output_handle(); | ||||
46 | print $out_fh $paragraph; | ||||
47 | } | ||||
48 | |||||
49 | sub textblock { | ||||
50 | my ($parser, $paragraph, $line_num) = @_; | ||||
51 | ## Translate/Format this block of text; sample actions might be: | ||||
52 | my $out_fh = $parser->output_handle(); | ||||
53 | my $expansion = $parser->interpolate($paragraph, $line_num); | ||||
54 | print $out_fh $expansion; | ||||
55 | } | ||||
56 | |||||
57 | sub interior_sequence { | ||||
58 | my ($parser, $seq_command, $seq_argument) = @_; | ||||
59 | ## Expand an interior sequence; sample actions might be: | ||||
60 | return "*$seq_argument*" if ($seq_command eq 'B'); | ||||
61 | return "`$seq_argument'" if ($seq_command eq 'C'); | ||||
62 | return "_${seq_argument}_'" if ($seq_command eq 'I'); | ||||
63 | ## ... other sequence commands and their resulting text | ||||
64 | } | ||||
65 | |||||
66 | package main; | ||||
67 | |||||
68 | ## Create a parser object and have it parse file whose name was | ||||
69 | ## given on the command-line (use STDIN if no files were given). | ||||
70 | $parser = new MyParser(); | ||||
71 | $parser->parse_from_filehandle(\*STDIN) if (@ARGV == 0); | ||||
72 | for (@ARGV) { $parser->parse_from_file($_); } | ||||
73 | |||||
74 | =head1 REQUIRES | ||||
75 | |||||
76 | perl5.005, Pod::InputObjects, Exporter, Symbol, Carp | ||||
77 | |||||
78 | =head1 EXPORTS | ||||
79 | |||||
80 | Nothing. | ||||
81 | |||||
82 | =head1 DESCRIPTION | ||||
83 | |||||
84 | B<Pod::Parser> is a base class for creating POD filters and translators. | ||||
85 | It handles most of the effort involved with parsing the POD sections | ||||
86 | from an input stream, leaving subclasses free to be concerned only with | ||||
87 | performing the actual translation of text. | ||||
88 | |||||
89 | B<Pod::Parser> parses PODs, and makes method calls to handle the various | ||||
90 | components of the POD. Subclasses of B<Pod::Parser> override these methods | ||||
91 | to translate the POD into whatever output format they desire. | ||||
92 | |||||
93 | Note: This module is considered as legacy; modern Perl releases (5.18 and | ||||
94 | higher) are going to remove Pod::Parser from core and use L<Pod::Simple> | ||||
95 | for all things POD. | ||||
96 | |||||
97 | =head1 QUICK OVERVIEW | ||||
98 | |||||
99 | To create a POD filter for translating POD documentation into some other | ||||
100 | format, you create a subclass of B<Pod::Parser> which typically overrides | ||||
101 | just the base class implementation for the following methods: | ||||
102 | |||||
103 | =over 2 | ||||
104 | |||||
105 | =item * | ||||
106 | |||||
107 | B<command()> | ||||
108 | |||||
109 | =item * | ||||
110 | |||||
111 | B<verbatim()> | ||||
112 | |||||
113 | =item * | ||||
114 | |||||
115 | B<textblock()> | ||||
116 | |||||
117 | =item * | ||||
118 | |||||
119 | B<interior_sequence()> | ||||
120 | |||||
121 | =back | ||||
122 | |||||
123 | You may also want to override the B<begin_input()> and B<end_input()> | ||||
124 | methods for your subclass (to perform any needed per-file and/or | ||||
125 | per-document initialization or cleanup). | ||||
126 | |||||
127 | If you need to perform any preprocessing of input before it is parsed | ||||
128 | you may want to override one or more of B<preprocess_line()> and/or | ||||
129 | B<preprocess_paragraph()>. | ||||
130 | |||||
131 | Sometimes it may be necessary to make more than one pass over the input | ||||
132 | files. If this is the case you have several options. You can make the | ||||
133 | first pass using B<Pod::Parser> and override your methods to store the | ||||
134 | intermediate results in memory somewhere for the B<end_pod()> method to | ||||
135 | process. You could use B<Pod::Parser> for several passes with an | ||||
136 | appropriate state variable to control the operation for each pass. If | ||||
137 | your input source can't be reset to start at the beginning, you can | ||||
138 | store it in some other structure as a string or an array and have that | ||||
139 | structure implement a B<getline()> method (which is all that | ||||
140 | B<parse_from_filehandle()> uses to read input). | ||||
141 | |||||
142 | Feel free to add any member data fields you need to keep track of things | ||||
143 | like current font, indentation, horizontal or vertical position, or | ||||
144 | whatever else you like. Be sure to read L<"PRIVATE METHODS AND DATA"> | ||||
145 | to avoid name collisions. | ||||
146 | |||||
147 | For the most part, the B<Pod::Parser> base class should be able to | ||||
148 | do most of the input parsing for you and leave you free to worry about | ||||
149 | how to interpret the commands and translate the result. | ||||
150 | |||||
151 | Note that all we have described here in this quick overview is the | ||||
152 | simplest most straightforward use of B<Pod::Parser> to do stream-based | ||||
153 | parsing. It is also possible to use the B<Pod::Parser::parse_text> function | ||||
154 | to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">. | ||||
155 | |||||
156 | =head1 PARSING OPTIONS | ||||
157 | |||||
158 | A I<parse-option> is simply a named option of B<Pod::Parser> with a | ||||
159 | value that corresponds to a certain specified behavior. These various | ||||
160 | behaviors of B<Pod::Parser> may be enabled/disabled by setting | ||||
161 | or unsetting one or more I<parse-options> using the B<parseopts()> method. | ||||
162 | The set of currently accepted parse-options is as follows: | ||||
163 | |||||
164 | =over 3 | ||||
165 | |||||
166 | =item B<-want_nonPODs> (default: unset) | ||||
167 | |||||
168 | Normally (by default) B<Pod::Parser> will only provide access to | ||||
169 | the POD sections of the input. Input paragraphs that are not part | ||||
170 | of the POD-format documentation are not made available to the caller | ||||
171 | (not even using B<preprocess_paragraph()>). Setting this option to a | ||||
172 | non-empty, non-zero value will allow B<preprocess_paragraph()> to see | ||||
173 | non-POD sections of the input as well as POD sections. The B<cutting()> | ||||
174 | method can be used to determine if the corresponding paragraph is a POD | ||||
175 | paragraph, or some other input paragraph. | ||||
176 | |||||
177 | =item B<-process_cut_cmd> (default: unset) | ||||
178 | |||||
179 | Normally (by default) B<Pod::Parser> handles the C<=cut> POD directive | ||||
180 | by itself and does not pass it on to the caller for processing. Setting | ||||
181 | this option to a non-empty, non-zero value will cause B<Pod::Parser> to | ||||
182 | pass the C<=cut> directive to the caller just like any other POD command | ||||
183 | (and hence it may be processed by the B<command()> method). | ||||
184 | |||||
185 | B<Pod::Parser> will still interpret the C<=cut> directive to mean that | ||||
186 | "cutting mode" has been (re)entered, but the caller will get a chance | ||||
187 | to capture the actual C<=cut> paragraph itself for whatever purpose | ||||
188 | it desires. | ||||
189 | |||||
190 | =item B<-warnings> (default: unset) | ||||
191 | |||||
192 | Normally (by default) B<Pod::Parser> recognizes a bare minimum of | ||||
193 | pod syntax errors and warnings and issues diagnostic messages | ||||
194 | for errors, but not for warnings. (Use B<Pod::Checker> to do more | ||||
195 | thorough checking of POD syntax.) Setting this option to a non-empty, | ||||
196 | non-zero value will cause B<Pod::Parser> to issue diagnostics for | ||||
197 | the few warnings it recognizes as well as the errors. | ||||
198 | |||||
199 | =back | ||||
200 | |||||
201 | Please see L<"parseopts()"> for a complete description of the interface | ||||
202 | for the setting and unsetting of parse-options. | ||||
203 | |||||
204 | =cut | ||||
205 | |||||
206 | ############################################################################# | ||||
207 | |||||
208 | #use diagnostics; | ||||
209 | 2 | 95µs | 1 | 1.80ms | # spent 1.80ms (1.72+71µs) within Pod::Parser::BEGIN@209 which was called:
# once (1.72ms+71µs) by Pod::Select::BEGIN@242 at line 209 # spent 1.80ms making 1 call to Pod::Parser::BEGIN@209 |
210 | 2 | 22µs | 2 | 85µs | # spent 48µs (12+36) within Pod::Parser::BEGIN@210 which was called:
# once (12µs+36µs) by Pod::Select::BEGIN@242 at line 210 # spent 48µs making 1 call to Pod::Parser::BEGIN@210
# spent 36µs making 1 call to Exporter::import |
211 | 2 | 34µs | 2 | 34µs | # spent 21µs (8+13) within Pod::Parser::BEGIN@211 which was called:
# once (8µs+13µs) by Pod::Select::BEGIN@242 at line 211 # spent 21µs making 1 call to Pod::Parser::BEGIN@211
# spent 13µs making 1 call to Exporter::import |
212 | # spent 4µs within Pod::Parser::BEGIN@212 which was called:
# once (4µs+0s) by Pod::Select::BEGIN@242 at line 217 | ||||
213 | 1 | 4µs | if ($] < 5.006) { | ||
214 | require Symbol; | ||||
215 | import Symbol; | ||||
216 | } | ||||
217 | 1 | 2.68ms | 1 | 4µs | }
# spent 4µs making 1 call to Pod::Parser::BEGIN@212 |
218 | 1 | 5µs | @ISA = qw(Exporter); | ||
219 | |||||
220 | ############################################################################# | ||||
221 | |||||
222 | =head1 RECOMMENDED SUBROUTINE/METHOD OVERRIDES | ||||
223 | |||||
224 | B<Pod::Parser> provides several methods which most subclasses will probably | ||||
225 | want to override. These methods are as follows: | ||||
226 | |||||
227 | =cut | ||||
228 | |||||
229 | ##--------------------------------------------------------------------------- | ||||
230 | |||||
231 | =head1 B<command()> | ||||
232 | |||||
233 | $parser->command($cmd,$text,$line_num,$pod_para); | ||||
234 | |||||
235 | This method should be overridden by subclasses to take the appropriate | ||||
236 | action when a POD command paragraph (denoted by a line beginning with | ||||
237 | "=") is encountered. When such a POD directive is seen in the input, | ||||
238 | this method is called and is passed: | ||||
239 | |||||
240 | =over 3 | ||||
241 | |||||
242 | =item C<$cmd> | ||||
243 | |||||
244 | the name of the command for this POD paragraph | ||||
245 | |||||
246 | =item C<$text> | ||||
247 | |||||
248 | the paragraph text for the given POD paragraph command. | ||||
249 | |||||
250 | =item C<$line_num> | ||||
251 | |||||
252 | the line-number of the beginning of the paragraph | ||||
253 | |||||
254 | =item C<$pod_para> | ||||
255 | |||||
256 | a reference to a C<Pod::Paragraph> object which contains further | ||||
257 | information about the paragraph command (see L<Pod::InputObjects> | ||||
258 | for details). | ||||
259 | |||||
260 | =back | ||||
261 | |||||
262 | B<Note> that this method I<is> called for C<=pod> paragraphs. | ||||
263 | |||||
264 | The base class implementation of this method simply treats the raw POD | ||||
265 | command as normal block of paragraph text (invoking the B<textblock()> | ||||
266 | method with the command paragraph). | ||||
267 | |||||
268 | =cut | ||||
269 | |||||
270 | sub command { | ||||
271 | my ($self, $cmd, $text, $line_num, $pod_para) = @_; | ||||
272 | ## Just treat this like a textblock | ||||
273 | $self->textblock($pod_para->raw_text(), $line_num, $pod_para); | ||||
274 | } | ||||
275 | |||||
276 | ##--------------------------------------------------------------------------- | ||||
277 | |||||
278 | =head1 B<verbatim()> | ||||
279 | |||||
280 | $parser->verbatim($text,$line_num,$pod_para); | ||||
281 | |||||
282 | This method may be overridden by subclasses to take the appropriate | ||||
283 | action when a block of verbatim text is encountered. It is passed the | ||||
284 | following parameters: | ||||
285 | |||||
286 | =over 3 | ||||
287 | |||||
288 | =item C<$text> | ||||
289 | |||||
290 | the block of text for the verbatim paragraph | ||||
291 | |||||
292 | =item C<$line_num> | ||||
293 | |||||
294 | the line-number of the beginning of the paragraph | ||||
295 | |||||
296 | =item C<$pod_para> | ||||
297 | |||||
298 | a reference to a C<Pod::Paragraph> object which contains further | ||||
299 | information about the paragraph (see L<Pod::InputObjects> | ||||
300 | for details). | ||||
301 | |||||
302 | =back | ||||
303 | |||||
304 | The base class implementation of this method simply prints the textblock | ||||
305 | (unmodified) to the output filehandle. | ||||
306 | |||||
307 | =cut | ||||
308 | |||||
309 | sub verbatim { | ||||
310 | my ($self, $text, $line_num, $pod_para) = @_; | ||||
311 | my $out_fh = $self->{_OUTPUT}; | ||||
312 | print $out_fh $text; | ||||
313 | } | ||||
314 | |||||
315 | ##--------------------------------------------------------------------------- | ||||
316 | |||||
317 | =head1 B<textblock()> | ||||
318 | |||||
319 | $parser->textblock($text,$line_num,$pod_para); | ||||
320 | |||||
321 | This method may be overridden by subclasses to take the appropriate | ||||
322 | action when a normal block of POD text is encountered (although the base | ||||
323 | class method will usually do what you want). It is passed the following | ||||
324 | parameters: | ||||
325 | |||||
326 | =over 3 | ||||
327 | |||||
328 | =item C<$text> | ||||
329 | |||||
330 | the block of text for the a POD paragraph | ||||
331 | |||||
332 | =item C<$line_num> | ||||
333 | |||||
334 | the line-number of the beginning of the paragraph | ||||
335 | |||||
336 | =item C<$pod_para> | ||||
337 | |||||
338 | a reference to a C<Pod::Paragraph> object which contains further | ||||
339 | information about the paragraph (see L<Pod::InputObjects> | ||||
340 | for details). | ||||
341 | |||||
342 | =back | ||||
343 | |||||
344 | In order to process interior sequences, subclasses implementations of | ||||
345 | this method will probably want to invoke either B<interpolate()> or | ||||
346 | B<parse_text()>, passing it the text block C<$text>, and the corresponding | ||||
347 | line number in C<$line_num>, and then perform any desired processing upon | ||||
348 | the returned result. | ||||
349 | |||||
350 | The base class implementation of this method simply prints the text block | ||||
351 | as it occurred in the input stream). | ||||
352 | |||||
353 | =cut | ||||
354 | |||||
355 | sub textblock { | ||||
356 | my ($self, $text, $line_num, $pod_para) = @_; | ||||
357 | my $out_fh = $self->{_OUTPUT}; | ||||
358 | print $out_fh $self->interpolate($text, $line_num); | ||||
359 | } | ||||
360 | |||||
361 | ##--------------------------------------------------------------------------- | ||||
362 | |||||
363 | =head1 B<interior_sequence()> | ||||
364 | |||||
365 | $parser->interior_sequence($seq_cmd,$seq_arg,$pod_seq); | ||||
366 | |||||
367 | This method should be overridden by subclasses to take the appropriate | ||||
368 | action when an interior sequence is encountered. An interior sequence is | ||||
369 | an embedded command within a block of text which appears as a command | ||||
370 | name (usually a single uppercase character) followed immediately by a | ||||
371 | string of text which is enclosed in angle brackets. This method is | ||||
372 | passed the sequence command C<$seq_cmd> and the corresponding text | ||||
373 | C<$seq_arg>. It is invoked by the B<interpolate()> method for each interior | ||||
374 | sequence that occurs in the string that it is passed. It should return | ||||
375 | the desired text string to be used in place of the interior sequence. | ||||
376 | The C<$pod_seq> argument is a reference to a C<Pod::InteriorSequence> | ||||
377 | object which contains further information about the interior sequence. | ||||
378 | Please see L<Pod::InputObjects> for details if you need to access this | ||||
379 | additional information. | ||||
380 | |||||
381 | Subclass implementations of this method may wish to invoke the | ||||
382 | B<nested()> method of C<$pod_seq> to see if it is nested inside | ||||
383 | some other interior-sequence (and if so, which kind). | ||||
384 | |||||
385 | The base class implementation of the B<interior_sequence()> method | ||||
386 | simply returns the raw text of the interior sequence (as it occurred | ||||
387 | in the input) to the caller. | ||||
388 | |||||
389 | =cut | ||||
390 | |||||
391 | sub interior_sequence { | ||||
392 | my ($self, $seq_cmd, $seq_arg, $pod_seq) = @_; | ||||
393 | ## Just return the raw text of the interior sequence | ||||
394 | return $pod_seq->raw_text(); | ||||
395 | } | ||||
396 | |||||
397 | ############################################################################# | ||||
398 | |||||
399 | =head1 OPTIONAL SUBROUTINE/METHOD OVERRIDES | ||||
400 | |||||
401 | B<Pod::Parser> provides several methods which subclasses may want to override | ||||
402 | to perform any special pre/post-processing. These methods do I<not> have to | ||||
403 | be overridden, but it may be useful for subclasses to take advantage of them. | ||||
404 | |||||
405 | =cut | ||||
406 | |||||
407 | ##--------------------------------------------------------------------------- | ||||
408 | |||||
409 | =head1 B<new()> | ||||
410 | |||||
411 | my $parser = Pod::Parser->new(); | ||||
412 | |||||
413 | This is the constructor for B<Pod::Parser> and its subclasses. You | ||||
414 | I<do not> need to override this method! It is capable of constructing | ||||
415 | subclass objects as well as base class objects, provided you use | ||||
416 | any of the following constructor invocation styles: | ||||
417 | |||||
418 | my $parser1 = MyParser->new(); | ||||
419 | my $parser2 = new MyParser(); | ||||
420 | my $parser3 = $parser2->new(); | ||||
421 | |||||
422 | where C<MyParser> is some subclass of B<Pod::Parser>. | ||||
423 | |||||
424 | Using the syntax C<MyParser::new()> to invoke the constructor is I<not> | ||||
425 | recommended, but if you insist on being able to do this, then the | ||||
426 | subclass I<will> need to override the B<new()> constructor method. If | ||||
427 | you do override the constructor, you I<must> be sure to invoke the | ||||
428 | B<initialize()> method of the newly blessed object. | ||||
429 | |||||
430 | Using any of the above invocations, the first argument to the | ||||
431 | constructor is always the corresponding package name (or object | ||||
432 | reference). No other arguments are required, but if desired, an | ||||
433 | associative array (or hash-table) my be passed to the B<new()> | ||||
434 | constructor, as in: | ||||
435 | |||||
436 | my $parser1 = MyParser->new( MYDATA => $value1, MOREDATA => $value2 ); | ||||
437 | my $parser2 = new MyParser( -myflag => 1 ); | ||||
438 | |||||
439 | All arguments passed to the B<new()> constructor will be treated as | ||||
440 | key/value pairs in a hash-table. The newly constructed object will be | ||||
441 | initialized by copying the contents of the given hash-table (which may | ||||
442 | have been empty). The B<new()> constructor for this class and all of its | ||||
443 | subclasses returns a blessed reference to the initialized object (hash-table). | ||||
444 | |||||
445 | =cut | ||||
446 | |||||
447 | sub new { | ||||
448 | ## Determine if we were called via an object-ref or a classname | ||||
449 | my ($this,%params) = @_; | ||||
450 | my $class = ref($this) || $this; | ||||
451 | ## Any remaining arguments are treated as initial values for the | ||||
452 | ## hash that is used to represent this object. | ||||
453 | my $self = { %params }; | ||||
454 | ## Bless ourselves into the desired class and perform any initialization | ||||
455 | bless $self, $class; | ||||
456 | $self->initialize(); | ||||
457 | return $self; | ||||
458 | } | ||||
459 | |||||
460 | ##--------------------------------------------------------------------------- | ||||
461 | |||||
462 | =head1 B<initialize()> | ||||
463 | |||||
464 | $parser->initialize(); | ||||
465 | |||||
466 | This method performs any necessary object initialization. It takes no | ||||
467 | arguments (other than the object instance of course, which is typically | ||||
468 | copied to a local variable named C<$self>). If subclasses override this | ||||
469 | method then they I<must> be sure to invoke C<$self-E<gt>SUPER::initialize()>. | ||||
470 | |||||
471 | =cut | ||||
472 | |||||
473 | sub initialize { | ||||
474 | #my $self = shift; | ||||
475 | #return; | ||||
476 | } | ||||
477 | |||||
478 | ##--------------------------------------------------------------------------- | ||||
479 | |||||
480 | =head1 B<begin_pod()> | ||||
481 | |||||
482 | $parser->begin_pod(); | ||||
483 | |||||
484 | This method is invoked at the beginning of processing for each POD | ||||
485 | document that is encountered in the input. Subclasses should override | ||||
486 | this method to perform any per-document initialization. | ||||
487 | |||||
488 | =cut | ||||
489 | |||||
490 | sub begin_pod { | ||||
491 | #my $self = shift; | ||||
492 | #return; | ||||
493 | } | ||||
494 | |||||
495 | ##--------------------------------------------------------------------------- | ||||
496 | |||||
497 | =head1 B<begin_input()> | ||||
498 | |||||
499 | $parser->begin_input(); | ||||
500 | |||||
501 | This method is invoked by B<parse_from_filehandle()> immediately I<before> | ||||
502 | processing input from a filehandle. The base class implementation does | ||||
503 | nothing, however, subclasses may override it to perform any per-file | ||||
504 | initializations. | ||||
505 | |||||
506 | Note that if multiple files are parsed for a single POD document | ||||
507 | (perhaps the result of some future C<=include> directive) this method | ||||
508 | is invoked for every file that is parsed. If you wish to perform certain | ||||
509 | initializations once per document, then you should use B<begin_pod()>. | ||||
510 | |||||
511 | =cut | ||||
512 | |||||
513 | sub begin_input { | ||||
514 | #my $self = shift; | ||||
515 | #return; | ||||
516 | } | ||||
517 | |||||
518 | ##--------------------------------------------------------------------------- | ||||
519 | |||||
520 | =head1 B<end_input()> | ||||
521 | |||||
522 | $parser->end_input(); | ||||
523 | |||||
524 | This method is invoked by B<parse_from_filehandle()> immediately I<after> | ||||
525 | processing input from a filehandle. The base class implementation does | ||||
526 | nothing, however, subclasses may override it to perform any per-file | ||||
527 | cleanup actions. | ||||
528 | |||||
529 | Please note that if multiple files are parsed for a single POD document | ||||
530 | (perhaps the result of some kind of C<=include> directive) this method | ||||
531 | is invoked for every file that is parsed. If you wish to perform certain | ||||
532 | cleanup actions once per document, then you should use B<end_pod()>. | ||||
533 | |||||
534 | =cut | ||||
535 | |||||
536 | sub end_input { | ||||
537 | #my $self = shift; | ||||
538 | #return; | ||||
539 | } | ||||
540 | |||||
541 | ##--------------------------------------------------------------------------- | ||||
542 | |||||
543 | =head1 B<end_pod()> | ||||
544 | |||||
545 | $parser->end_pod(); | ||||
546 | |||||
547 | This method is invoked at the end of processing for each POD document | ||||
548 | that is encountered in the input. Subclasses should override this method | ||||
549 | to perform any per-document finalization. | ||||
550 | |||||
551 | =cut | ||||
552 | |||||
553 | sub end_pod { | ||||
554 | #my $self = shift; | ||||
555 | #return; | ||||
556 | } | ||||
557 | |||||
558 | ##--------------------------------------------------------------------------- | ||||
559 | |||||
560 | =head1 B<preprocess_line()> | ||||
561 | |||||
562 | $textline = $parser->preprocess_line($text, $line_num); | ||||
563 | |||||
564 | This method should be overridden by subclasses that wish to perform | ||||
565 | any kind of preprocessing for each I<line> of input (I<before> it has | ||||
566 | been determined whether or not it is part of a POD paragraph). The | ||||
567 | parameter C<$text> is the input line; and the parameter C<$line_num> is | ||||
568 | the line number of the corresponding text line. | ||||
569 | |||||
570 | The value returned should correspond to the new text to use in its | ||||
571 | place. If the empty string or an undefined value is returned then no | ||||
572 | further processing will be performed for this line. | ||||
573 | |||||
574 | Please note that the B<preprocess_line()> method is invoked I<before> | ||||
575 | the B<preprocess_paragraph()> method. After all (possibly preprocessed) | ||||
576 | lines in a paragraph have been assembled together and it has been | ||||
577 | determined that the paragraph is part of the POD documentation from one | ||||
578 | of the selected sections, then B<preprocess_paragraph()> is invoked. | ||||
579 | |||||
580 | The base class implementation of this method returns the given text. | ||||
581 | |||||
582 | =cut | ||||
583 | |||||
584 | sub preprocess_line { | ||||
585 | my ($self, $text, $line_num) = @_; | ||||
586 | return $text; | ||||
587 | } | ||||
588 | |||||
589 | ##--------------------------------------------------------------------------- | ||||
590 | |||||
591 | =head1 B<preprocess_paragraph()> | ||||
592 | |||||
593 | $textblock = $parser->preprocess_paragraph($text, $line_num); | ||||
594 | |||||
595 | This method should be overridden by subclasses that wish to perform any | ||||
596 | kind of preprocessing for each block (paragraph) of POD documentation | ||||
597 | that appears in the input stream. The parameter C<$text> is the POD | ||||
598 | paragraph from the input file; and the parameter C<$line_num> is the | ||||
599 | line number for the beginning of the corresponding paragraph. | ||||
600 | |||||
601 | The value returned should correspond to the new text to use in its | ||||
602 | place If the empty string is returned or an undefined value is | ||||
603 | returned, then the given C<$text> is ignored (not processed). | ||||
604 | |||||
605 | This method is invoked after gathering up all the lines in a paragraph | ||||
606 | and after determining the cutting state of the paragraph, | ||||
607 | but before trying to further parse or interpret them. After | ||||
608 | B<preprocess_paragraph()> returns, the current cutting state (which | ||||
609 | is returned by C<$self-E<gt>cutting()>) is examined. If it evaluates | ||||
610 | to true then input text (including the given C<$text>) is cut (not | ||||
611 | processed) until the next POD directive is encountered. | ||||
612 | |||||
613 | Please note that the B<preprocess_line()> method is invoked I<before> | ||||
614 | the B<preprocess_paragraph()> method. After all (possibly preprocessed) | ||||
615 | lines in a paragraph have been assembled together and either it has been | ||||
616 | determined that the paragraph is part of the POD documentation from one | ||||
617 | of the selected sections or the C<-want_nonPODs> option is true, | ||||
618 | then B<preprocess_paragraph()> is invoked. | ||||
619 | |||||
620 | The base class implementation of this method returns the given text. | ||||
621 | |||||
622 | =cut | ||||
623 | |||||
624 | sub preprocess_paragraph { | ||||
625 | my ($self, $text, $line_num) = @_; | ||||
626 | return $text; | ||||
627 | } | ||||
628 | |||||
629 | ############################################################################# | ||||
630 | |||||
631 | =head1 METHODS FOR PARSING AND PROCESSING | ||||
632 | |||||
633 | B<Pod::Parser> provides several methods to process input text. These | ||||
634 | methods typically won't need to be overridden (and in some cases they | ||||
635 | can't be overridden), but subclasses may want to invoke them to exploit | ||||
636 | their functionality. | ||||
637 | |||||
638 | =cut | ||||
639 | |||||
640 | ##--------------------------------------------------------------------------- | ||||
641 | |||||
642 | =head1 B<parse_text()> | ||||
643 | |||||
644 | $ptree1 = $parser->parse_text($text, $line_num); | ||||
645 | $ptree2 = $parser->parse_text({%opts}, $text, $line_num); | ||||
646 | $ptree3 = $parser->parse_text(\%opts, $text, $line_num); | ||||
647 | |||||
648 | This method is useful if you need to perform your own interpolation | ||||
649 | of interior sequences and can't rely upon B<interpolate> to expand | ||||
650 | them in simple bottom-up order. | ||||
651 | |||||
652 | The parameter C<$text> is a string or block of text to be parsed | ||||
653 | for interior sequences; and the parameter C<$line_num> is the | ||||
654 | line number corresponding to the beginning of C<$text>. | ||||
655 | |||||
656 | B<parse_text()> will parse the given text into a parse-tree of "nodes." | ||||
657 | and interior-sequences. Each "node" in the parse tree is either a | ||||
658 | text-string, or a B<Pod::InteriorSequence>. The result returned is a | ||||
659 | parse-tree of type B<Pod::ParseTree>. Please see L<Pod::InputObjects> | ||||
660 | for more information about B<Pod::InteriorSequence> and B<Pod::ParseTree>. | ||||
661 | |||||
662 | If desired, an optional hash-ref may be specified as the first argument | ||||
663 | to customize certain aspects of the parse-tree that is created and | ||||
664 | returned. The set of recognized option keywords are: | ||||
665 | |||||
666 | =over 3 | ||||
667 | |||||
668 | =item B<-expand_seq> =E<gt> I<code-ref>|I<method-name> | ||||
669 | |||||
670 | Normally, the parse-tree returned by B<parse_text()> will contain an | ||||
671 | unexpanded C<Pod::InteriorSequence> object for each interior-sequence | ||||
672 | encountered. Specifying B<-expand_seq> tells B<parse_text()> to "expand" | ||||
673 | every interior-sequence it sees by invoking the referenced function | ||||
674 | (or named method of the parser object) and using the return value as the | ||||
675 | expanded result. | ||||
676 | |||||
677 | If a subroutine reference was given, it is invoked as: | ||||
678 | |||||
679 | &$code_ref( $parser, $sequence ) | ||||
680 | |||||
681 | and if a method-name was given, it is invoked as: | ||||
682 | |||||
683 | $parser->method_name( $sequence ) | ||||
684 | |||||
685 | where C<$parser> is a reference to the parser object, and C<$sequence> | ||||
686 | is a reference to the interior-sequence object. | ||||
687 | [I<NOTE>: If the B<interior_sequence()> method is specified, then it is | ||||
688 | invoked according to the interface specified in L<"interior_sequence()">]. | ||||
689 | |||||
690 | =item B<-expand_text> =E<gt> I<code-ref>|I<method-name> | ||||
691 | |||||
692 | Normally, the parse-tree returned by B<parse_text()> will contain a | ||||
693 | text-string for each contiguous sequence of characters outside of an | ||||
694 | interior-sequence. Specifying B<-expand_text> tells B<parse_text()> to | ||||
695 | "preprocess" every such text-string it sees by invoking the referenced | ||||
696 | function (or named method of the parser object) and using the return value | ||||
697 | as the preprocessed (or "expanded") result. [Note that if the result is | ||||
698 | an interior-sequence, then it will I<not> be expanded as specified by the | ||||
699 | B<-expand_seq> option; Any such recursive expansion needs to be handled by | ||||
700 | the specified callback routine.] | ||||
701 | |||||
702 | If a subroutine reference was given, it is invoked as: | ||||
703 | |||||
704 | &$code_ref( $parser, $text, $ptree_node ) | ||||
705 | |||||
706 | and if a method-name was given, it is invoked as: | ||||
707 | |||||
708 | $parser->method_name( $text, $ptree_node ) | ||||
709 | |||||
710 | where C<$parser> is a reference to the parser object, C<$text> is the | ||||
711 | text-string encountered, and C<$ptree_node> is a reference to the current | ||||
712 | node in the parse-tree (usually an interior-sequence object or else the | ||||
713 | top-level node of the parse-tree). | ||||
714 | |||||
715 | =item B<-expand_ptree> =E<gt> I<code-ref>|I<method-name> | ||||
716 | |||||
717 | Rather than returning a C<Pod::ParseTree>, pass the parse-tree as an | ||||
718 | argument to the referenced subroutine (or named method of the parser | ||||
719 | object) and return the result instead of the parse-tree object. | ||||
720 | |||||
721 | If a subroutine reference was given, it is invoked as: | ||||
722 | |||||
723 | &$code_ref( $parser, $ptree ) | ||||
724 | |||||
725 | and if a method-name was given, it is invoked as: | ||||
726 | |||||
727 | $parser->method_name( $ptree ) | ||||
728 | |||||
729 | where C<$parser> is a reference to the parser object, and C<$ptree> | ||||
730 | is a reference to the parse-tree object. | ||||
731 | |||||
732 | =back | ||||
733 | |||||
734 | =cut | ||||
735 | |||||
736 | sub parse_text { | ||||
737 | my $self = shift; | ||||
738 | local $_ = ''; | ||||
739 | |||||
740 | ## Get options and set any defaults | ||||
741 | my %opts = (ref $_[0]) ? %{ shift() } : (); | ||||
742 | my $expand_seq = $opts{'-expand_seq'} || undef; | ||||
743 | my $expand_text = $opts{'-expand_text'} || undef; | ||||
744 | my $expand_ptree = $opts{'-expand_ptree'} || undef; | ||||
745 | |||||
746 | my $text = shift; | ||||
747 | my $line = shift; | ||||
748 | my $file = $self->input_file(); | ||||
749 | my $cmd = ""; | ||||
750 | |||||
751 | ## Convert method calls into closures, for our convenience | ||||
752 | my $xseq_sub = $expand_seq; | ||||
753 | my $xtext_sub = $expand_text; | ||||
754 | my $xptree_sub = $expand_ptree; | ||||
755 | if (defined $expand_seq and $expand_seq eq 'interior_sequence') { | ||||
756 | ## If 'interior_sequence' is the method to use, we have to pass | ||||
757 | ## more than just the sequence object, we also need to pass the | ||||
758 | ## sequence name and text. | ||||
759 | $xseq_sub = sub { | ||||
760 | my ($sself, $iseq) = @_; | ||||
761 | my $args = join('', $iseq->parse_tree->children); | ||||
762 | return $sself->interior_sequence($iseq->name, $args, $iseq); | ||||
763 | }; | ||||
764 | } | ||||
765 | ref $xseq_sub or $xseq_sub = sub { shift()->$expand_seq(@_) }; | ||||
766 | ref $xtext_sub or $xtext_sub = sub { shift()->$expand_text(@_) }; | ||||
767 | ref $xptree_sub or $xptree_sub = sub { shift()->$expand_ptree(@_) }; | ||||
768 | |||||
769 | ## Keep track of the "current" interior sequence, and maintain a stack | ||||
770 | ## of "in progress" sequences. | ||||
771 | ## | ||||
772 | ## NOTE that we push our own "accumulator" at the very beginning of the | ||||
773 | ## stack. It's really a parse-tree, not a sequence; but it implements | ||||
774 | ## the methods we need so we can use it to gather-up all the sequences | ||||
775 | ## and strings we parse. Thus, by the end of our parsing, it should be | ||||
776 | ## the only thing left on our stack and all we have to do is return it! | ||||
777 | ## | ||||
778 | my $seq = Pod::ParseTree->new(); | ||||
779 | my @seq_stack = ($seq); | ||||
780 | my ($ldelim, $rdelim) = ('', ''); | ||||
781 | |||||
782 | ## Iterate over all sequence starts text (NOTE: split with | ||||
783 | ## capturing parens keeps the delimiters) | ||||
784 | $_ = $text; | ||||
785 | my @tokens = split /([A-Z]<(?:<+(?:\r?\n|[ \t]))?)/; | ||||
786 | while ( @tokens ) { | ||||
787 | $_ = shift @tokens; | ||||
788 | ## Look for the beginning of a sequence | ||||
789 | if ( /^([A-Z])(<(?:<+(?:\r?\n|[ \t]))?)$/ ) { | ||||
790 | ## Push a new sequence onto the stack of those "in-progress" | ||||
791 | my $ldelim_orig; | ||||
792 | ($cmd, $ldelim_orig) = ($1, $2); | ||||
793 | ($ldelim = $ldelim_orig) =~ s/\s+$//; | ||||
794 | ($rdelim = $ldelim) =~ tr/</>/; | ||||
795 | $seq = Pod::InteriorSequence->new( | ||||
796 | -name => $cmd, | ||||
797 | -ldelim => $ldelim_orig, -rdelim => $rdelim, | ||||
798 | -file => $file, -line => $line | ||||
799 | ); | ||||
800 | (@seq_stack > 1) and $seq->nested($seq_stack[-1]); | ||||
801 | push @seq_stack, $seq; | ||||
802 | } | ||||
803 | ## Look for sequence ending | ||||
804 | elsif ( @seq_stack > 1 ) { | ||||
805 | ## Make sure we match the right kind of closing delimiter | ||||
806 | my ($seq_end, $post_seq) = ('', ''); | ||||
807 | if ( ($ldelim eq '<' and /\A(.*?)(>)/s) | ||||
808 | or /\A(.*?)(\s+$rdelim)/s ) | ||||
809 | { | ||||
810 | ## Found end-of-sequence, capture the interior and the | ||||
811 | ## closing the delimiter, and put the rest back on the | ||||
812 | ## token-list | ||||
813 | $post_seq = substr($_, length($1) + length($2)); | ||||
814 | ($_, $seq_end) = ($1, $2); | ||||
815 | (length $post_seq) and unshift @tokens, $post_seq; | ||||
816 | } | ||||
817 | if (length) { | ||||
818 | ## In the middle of a sequence, append this text to it, and | ||||
819 | ## dont forget to "expand" it if that's what the caller wanted | ||||
820 | $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); | ||||
821 | $_ .= $seq_end; | ||||
822 | } | ||||
823 | if (length $seq_end) { | ||||
824 | ## End of current sequence, record terminating delimiter | ||||
825 | $seq->rdelim($seq_end); | ||||
826 | ## Pop it off the stack of "in progress" sequences | ||||
827 | pop @seq_stack; | ||||
828 | ## Append result to its parent in current parse tree | ||||
829 | $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) | ||||
830 | : $seq); | ||||
831 | ## Remember the current cmd-name and left-delimiter | ||||
832 | if(@seq_stack > 1) { | ||||
833 | $cmd = $seq_stack[-1]->name; | ||||
834 | $ldelim = $seq_stack[-1]->ldelim; | ||||
835 | $rdelim = $seq_stack[-1]->rdelim; | ||||
836 | } else { | ||||
837 | $cmd = $ldelim = $rdelim = ''; | ||||
838 | } | ||||
839 | } | ||||
840 | } | ||||
841 | elsif (length) { | ||||
842 | ## In the middle of a sequence, append this text to it, and | ||||
843 | ## dont forget to "expand" it if that's what the caller wanted | ||||
844 | $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); | ||||
845 | } | ||||
846 | ## Keep track of line count | ||||
847 | $line += /\n/; | ||||
848 | ## Remember the "current" sequence | ||||
849 | $seq = $seq_stack[-1]; | ||||
850 | } | ||||
851 | |||||
852 | ## Handle unterminated sequences | ||||
853 | my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef; | ||||
854 | while (@seq_stack > 1) { | ||||
855 | ($cmd, $file, $line) = ($seq->name, $seq->file_line); | ||||
856 | $ldelim = $seq->ldelim; | ||||
857 | ($rdelim = $ldelim) =~ tr/</>/; | ||||
858 | $rdelim =~ s/^(\S+)(\s*)$/$2$1/; | ||||
859 | pop @seq_stack; | ||||
860 | my $errmsg = "*** ERROR: unterminated ${cmd}${ldelim}...${rdelim}". | ||||
861 | " at line $line in file $file\n"; | ||||
862 | (ref $errorsub) and &{$errorsub}($errmsg) | ||||
863 | or (defined $errorsub) and $self->$errorsub($errmsg) | ||||
864 | or carp($errmsg); | ||||
865 | $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) : $seq); | ||||
866 | $seq = $seq_stack[-1]; | ||||
867 | } | ||||
868 | |||||
869 | ## Return the resulting parse-tree | ||||
870 | my $ptree = (pop @seq_stack)->parse_tree; | ||||
871 | return $expand_ptree ? &$xptree_sub($self, $ptree) : $ptree; | ||||
872 | } | ||||
873 | |||||
874 | ##--------------------------------------------------------------------------- | ||||
875 | |||||
876 | =head1 B<interpolate()> | ||||
877 | |||||
878 | $textblock = $parser->interpolate($text, $line_num); | ||||
879 | |||||
880 | This method translates all text (including any embedded interior sequences) | ||||
881 | in the given text string C<$text> and returns the interpolated result. The | ||||
882 | parameter C<$line_num> is the line number corresponding to the beginning | ||||
883 | of C<$text>. | ||||
884 | |||||
885 | B<interpolate()> merely invokes a private method to recursively expand | ||||
886 | nested interior sequences in bottom-up order (innermost sequences are | ||||
887 | expanded first). If there is a need to expand nested sequences in | ||||
888 | some alternate order, use B<parse_text> instead. | ||||
889 | |||||
890 | =cut | ||||
891 | |||||
892 | sub interpolate { | ||||
893 | my($self, $text, $line_num) = @_; | ||||
894 | my %parse_opts = ( -expand_seq => 'interior_sequence' ); | ||||
895 | my $ptree = $self->parse_text( \%parse_opts, $text, $line_num ); | ||||
896 | return join '', $ptree->children(); | ||||
897 | } | ||||
898 | |||||
899 | ##--------------------------------------------------------------------------- | ||||
900 | |||||
901 | =begin __PRIVATE__ | ||||
902 | |||||
903 | =head1 B<parse_paragraph()> | ||||
904 | |||||
905 | $parser->parse_paragraph($text, $line_num); | ||||
906 | |||||
907 | This method takes the text of a POD paragraph to be processed, along | ||||
908 | with its corresponding line number, and invokes the appropriate method | ||||
909 | (one of B<command()>, B<verbatim()>, or B<textblock()>). | ||||
910 | |||||
911 | For performance reasons, this method is invoked directly without any | ||||
912 | dynamic lookup; Hence subclasses may I<not> override it! | ||||
913 | |||||
914 | =end __PRIVATE__ | ||||
915 | |||||
916 | =cut | ||||
917 | |||||
918 | sub parse_paragraph { | ||||
919 | my ($self, $text, $line_num) = @_; | ||||
920 | local *myData = $self; ## alias to avoid deref-ing overhead | ||||
921 | local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options | ||||
922 | local $_; | ||||
923 | |||||
924 | ## See if we want to preprocess nonPOD paragraphs as well as POD ones. | ||||
925 | my $wantNonPods = $myOpts{'-want_nonPODs'}; | ||||
926 | |||||
927 | ## Update cutting status | ||||
928 | $myData{_CUTTING} = 0 if $text =~ /^={1,2}\S/; | ||||
929 | |||||
930 | ## Perform any desired preprocessing if we wanted it this early | ||||
931 | $wantNonPods and $text = $self->preprocess_paragraph($text, $line_num); | ||||
932 | |||||
933 | ## Ignore up until next POD directive if we are cutting | ||||
934 | return if $myData{_CUTTING}; | ||||
935 | |||||
936 | ## Now we know this is block of text in a POD section! | ||||
937 | |||||
938 | ##----------------------------------------------------------------- | ||||
939 | ## This is a hook (hack ;-) for Pod::Select to do its thing without | ||||
940 | ## having to override methods, but also without Pod::Parser assuming | ||||
941 | ## $self is an instance of Pod::Select (if the _SELECTED_SECTIONS | ||||
942 | ## field exists then we assume there is an is_selected() method for | ||||
943 | ## us to invoke (calling $self->can('is_selected') could verify this | ||||
944 | ## but that is more overhead than I want to incur) | ||||
945 | ##----------------------------------------------------------------- | ||||
946 | |||||
947 | ## Ignore this block if it isnt in one of the selected sections | ||||
948 | if (exists $myData{_SELECTED_SECTIONS}) { | ||||
949 | $self->is_selected($text) or return ($myData{_CUTTING} = 1); | ||||
950 | } | ||||
951 | |||||
952 | ## If we havent already, perform any desired preprocessing and | ||||
953 | ## then re-check the "cutting" state | ||||
954 | unless ($wantNonPods) { | ||||
955 | $text = $self->preprocess_paragraph($text, $line_num); | ||||
956 | return 1 unless ((defined $text) and (length $text)); | ||||
957 | return 1 if ($myData{_CUTTING}); | ||||
958 | } | ||||
959 | |||||
960 | ## Look for one of the three types of paragraphs | ||||
961 | my ($pfx, $cmd, $arg, $sep) = ('', '', '', ''); | ||||
962 | my $pod_para = undef; | ||||
963 | if ($text =~ /^(={1,2})(?=\S)/) { | ||||
964 | ## Looks like a command paragraph. Capture the command prefix used | ||||
965 | ## ("=" or "=="), as well as the command-name, its paragraph text, | ||||
966 | ## and whatever sequence of characters was used to separate them | ||||
967 | $pfx = $1; | ||||
968 | $_ = substr($text, length $pfx); | ||||
969 | ($cmd, $sep, $text) = split /(\s+)/, $_, 2; | ||||
970 | $sep = '' unless defined $sep; | ||||
971 | $text = '' unless defined $text; | ||||
972 | ## If this is a "cut" directive then we dont need to do anything | ||||
973 | ## except return to "cutting" mode. | ||||
974 | if ($cmd eq 'cut') { | ||||
975 | $myData{_CUTTING} = 1; | ||||
976 | return unless $myOpts{'-process_cut_cmd'}; | ||||
977 | } | ||||
978 | } | ||||
979 | ## Save the attributes indicating how the command was specified. | ||||
980 | $pod_para = new Pod::Paragraph( | ||||
981 | -name => $cmd, | ||||
982 | -text => $text, | ||||
983 | -prefix => $pfx, | ||||
984 | -separator => $sep, | ||||
985 | -file => $myData{_INFILE}, | ||||
986 | -line => $line_num | ||||
987 | ); | ||||
988 | # ## Invoke appropriate callbacks | ||||
989 | # if (exists $myData{_CALLBACKS}) { | ||||
990 | # ## Look through the callback list, invoke callbacks, | ||||
991 | # ## then see if we need to do the default actions | ||||
992 | # ## (invoke_callbacks will return true if we do). | ||||
993 | # return 1 unless $self->invoke_callbacks($cmd, $text, $line_num, $pod_para); | ||||
994 | # } | ||||
995 | |||||
996 | # If the last paragraph ended in whitespace, and we're not between verbatim blocks, carp | ||||
997 | if ($myData{_WHITESPACE} and $myOpts{'-warnings'} | ||||
998 | and not ($text =~ /^\s+/ and ($myData{_PREVIOUS}||"") eq "verbatim")) { | ||||
999 | my $errorsub = $self->errorsub(); | ||||
1000 | my $line = $line_num - 1; | ||||
1001 | my $errmsg = "*** WARNING: line containing nothing but whitespace". | ||||
1002 | " in paragraph at line $line in file $myData{_INFILE}\n"; | ||||
1003 | (ref $errorsub) and &{$errorsub}($errmsg) | ||||
1004 | or (defined $errorsub) and $self->$errorsub($errmsg) | ||||
1005 | or carp($errmsg); | ||||
1006 | } | ||||
1007 | |||||
1008 | if (length $cmd) { | ||||
1009 | ## A command paragraph | ||||
1010 | $self->command($cmd, $text, $line_num, $pod_para); | ||||
1011 | $myData{_PREVIOUS} = $cmd; | ||||
1012 | } | ||||
1013 | elsif ($text =~ /^\s+/) { | ||||
1014 | ## Indented text - must be a verbatim paragraph | ||||
1015 | $self->verbatim($text, $line_num, $pod_para); | ||||
1016 | $myData{_PREVIOUS} = "verbatim"; | ||||
1017 | } | ||||
1018 | else { | ||||
1019 | ## Looks like an ordinary block of text | ||||
1020 | $self->textblock($text, $line_num, $pod_para); | ||||
1021 | $myData{_PREVIOUS} = "textblock"; | ||||
1022 | } | ||||
1023 | |||||
1024 | # Update the whitespace for the next time around | ||||
1025 | #$myData{_WHITESPACE} = $text =~ /^[^\S\r\n]+\Z/m ? 1 : 0; | ||||
1026 | $myData{_WHITESPACE} = $text =~ /^[^\S\r\n]+\r*\Z/m ? 1 : 0; | ||||
1027 | |||||
1028 | return 1; | ||||
1029 | } | ||||
1030 | |||||
1031 | ##--------------------------------------------------------------------------- | ||||
1032 | |||||
1033 | =head1 B<parse_from_filehandle()> | ||||
1034 | |||||
1035 | $parser->parse_from_filehandle($in_fh,$out_fh); | ||||
1036 | |||||
1037 | This method takes an input filehandle (which is assumed to already be | ||||
1038 | opened for reading) and reads the entire input stream looking for blocks | ||||
1039 | (paragraphs) of POD documentation to be processed. If no first argument | ||||
1040 | is given the default input filehandle C<STDIN> is used. | ||||
1041 | |||||
1042 | The C<$in_fh> parameter may be any object that provides a B<getline()> | ||||
1043 | method to retrieve a single line of input text (hence, an appropriate | ||||
1044 | wrapper object could be used to parse PODs from a single string or an | ||||
1045 | array of strings). | ||||
1046 | |||||
1047 | Using C<$in_fh-E<gt>getline()>, input is read line-by-line and assembled | ||||
1048 | into paragraphs or "blocks" (which are separated by lines containing | ||||
1049 | nothing but whitespace). For each block of POD documentation | ||||
1050 | encountered it will invoke a method to parse the given paragraph. | ||||
1051 | |||||
1052 | If a second argument is given then it should correspond to a filehandle where | ||||
1053 | output should be sent (otherwise the default output filehandle is | ||||
1054 | C<STDOUT> if no output filehandle is currently in use). | ||||
1055 | |||||
1056 | B<NOTE:> For performance reasons, this method caches the input stream at | ||||
1057 | the top of the stack in a local variable. Any attempts by clients to | ||||
1058 | change the stack contents during processing when in the midst executing | ||||
1059 | of this method I<will not affect> the input stream used by the current | ||||
1060 | invocation of this method. | ||||
1061 | |||||
1062 | This method does I<not> usually need to be overridden by subclasses. | ||||
1063 | |||||
1064 | =cut | ||||
1065 | |||||
1066 | sub parse_from_filehandle { | ||||
1067 | my $self = shift; | ||||
1068 | my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : (); | ||||
1069 | my ($in_fh, $out_fh) = @_; | ||||
1070 | $in_fh = \*STDIN unless ($in_fh); | ||||
1071 | local *myData = $self; ## alias to avoid deref-ing overhead | ||||
1072 | local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options | ||||
1073 | local $_; | ||||
1074 | |||||
1075 | ## Put this stream at the top of the stack and do beginning-of-input | ||||
1076 | ## processing. NOTE that $in_fh might be reset during this process. | ||||
1077 | my $topstream = $self->_push_input_stream($in_fh, $out_fh); | ||||
1078 | (exists $opts{-cutting}) and $self->cutting( $opts{-cutting} ); | ||||
1079 | |||||
1080 | ## Initialize line/paragraph | ||||
1081 | my ($textline, $paragraph) = ('', ''); | ||||
1082 | my ($nlines, $plines) = (0, 0); | ||||
1083 | |||||
1084 | ## Use <$fh> instead of $fh->getline where possible (for speed) | ||||
1085 | $_ = ref $in_fh; | ||||
1086 | my $tied_fh = (/^(?:GLOB|FileHandle|IO::\w+)$/ or tied $in_fh); | ||||
1087 | |||||
1088 | ## Read paragraphs line-by-line | ||||
1089 | while (defined ($textline = $tied_fh ? <$in_fh> : $in_fh->getline)) { | ||||
1090 | $textline = $self->preprocess_line($textline, ++$nlines); | ||||
1091 | next unless ((defined $textline) && (length $textline)); | ||||
1092 | |||||
1093 | if ((! length $paragraph) && ($textline =~ /^==/)) { | ||||
1094 | ## '==' denotes a one-line command paragraph | ||||
1095 | $paragraph = $textline; | ||||
1096 | $plines = 1; | ||||
1097 | $textline = ''; | ||||
1098 | } else { | ||||
1099 | ## Append this line to the current paragraph | ||||
1100 | $paragraph .= $textline; | ||||
1101 | ++$plines; | ||||
1102 | } | ||||
1103 | |||||
1104 | ## See if this line is blank and ends the current paragraph. | ||||
1105 | ## If it isnt, then keep iterating until it is. | ||||
1106 | next unless (($textline =~ /^[^\S\r\n]*[\r\n]*$/) | ||||
1107 | && (length $paragraph)); | ||||
1108 | |||||
1109 | ## Now process the paragraph | ||||
1110 | parse_paragraph($self, $paragraph, ($nlines - $plines) + 1); | ||||
1111 | $paragraph = ''; | ||||
1112 | $plines = 0; | ||||
1113 | } | ||||
1114 | ## Dont forget about the last paragraph in the file | ||||
1115 | if (length $paragraph) { | ||||
1116 | parse_paragraph($self, $paragraph, ($nlines - $plines) + 1) | ||||
1117 | } | ||||
1118 | |||||
1119 | ## Now pop the input stream off the top of the input stack. | ||||
1120 | $self->_pop_input_stream(); | ||||
1121 | } | ||||
1122 | |||||
1123 | ##--------------------------------------------------------------------------- | ||||
1124 | |||||
1125 | =head1 B<parse_from_file()> | ||||
1126 | |||||
1127 | $parser->parse_from_file($filename,$outfile); | ||||
1128 | |||||
1129 | This method takes a filename and does the following: | ||||
1130 | |||||
1131 | =over 2 | ||||
1132 | |||||
1133 | =item * | ||||
1134 | |||||
1135 | opens the input and output files for reading | ||||
1136 | (creating the appropriate filehandles) | ||||
1137 | |||||
1138 | =item * | ||||
1139 | |||||
1140 | invokes the B<parse_from_filehandle()> method passing it the | ||||
1141 | corresponding input and output filehandles. | ||||
1142 | |||||
1143 | =item * | ||||
1144 | |||||
1145 | closes the input and output files. | ||||
1146 | |||||
1147 | =back | ||||
1148 | |||||
1149 | If the special input filename "-" or "<&STDIN" is given then the STDIN | ||||
1150 | filehandle is used for input (and no open or close is performed). If no | ||||
1151 | input filename is specified then "-" is implied. Filehandle references, | ||||
1152 | or objects that support the regular IO operations (like C<E<lt>$fhE<gt>> | ||||
1153 | or C<$fh-<Egt>getline>) are also accepted; the handles must already be | ||||
1154 | opened. | ||||
1155 | |||||
1156 | If a second argument is given then it should be the name of the desired | ||||
1157 | output file. If the special output filename "-" or ">&STDOUT" is given | ||||
1158 | then the STDOUT filehandle is used for output (and no open or close is | ||||
1159 | performed). If the special output filename ">&STDERR" is given then the | ||||
1160 | STDERR filehandle is used for output (and no open or close is | ||||
1161 | performed). If no output filehandle is currently in use and no output | ||||
1162 | filename is specified, then "-" is implied. | ||||
1163 | Alternatively, filehandle references or objects that support the regular | ||||
1164 | IO operations (like C<print>, e.g. L<IO::String>) are also accepted; | ||||
1165 | the object must already be opened. | ||||
1166 | |||||
1167 | This method does I<not> usually need to be overridden by subclasses. | ||||
1168 | |||||
1169 | =cut | ||||
1170 | |||||
1171 | sub parse_from_file { | ||||
1172 | my $self = shift; | ||||
1173 | my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : (); | ||||
1174 | my ($infile, $outfile) = @_; | ||||
1175 | my ($in_fh, $out_fh); | ||||
1176 | if ($] < 5.006) { | ||||
1177 | ($in_fh, $out_fh) = (gensym(), gensym()); | ||||
1178 | } | ||||
1179 | my ($close_input, $close_output) = (0, 0); | ||||
1180 | local *myData = $self; | ||||
1181 | local *_; | ||||
1182 | |||||
1183 | ## Is $infile a filename or a (possibly implied) filehandle | ||||
1184 | if (defined $infile && ref $infile) { | ||||
1185 | if (ref($infile) =~ /^(SCALAR|ARRAY|HASH|CODE|REF)$/) { | ||||
1186 | croak "Input from $1 reference not supported!\n"; | ||||
1187 | } | ||||
1188 | ## Must be a filehandle-ref (or else assume its a ref to an object | ||||
1189 | ## that supports the common IO read operations). | ||||
1190 | $myData{_INFILE} = ${$infile}; | ||||
1191 | $in_fh = $infile; | ||||
1192 | } | ||||
1193 | elsif (!defined($infile) || !length($infile) || ($infile eq '-') | ||||
1194 | || ($infile =~ /^<&(?:STDIN|0)$/i)) | ||||
1195 | { | ||||
1196 | ## Not a filename, just a string implying STDIN | ||||
1197 | $infile ||= '-'; | ||||
1198 | $myData{_INFILE} = '<standard input>'; | ||||
1199 | $in_fh = \*STDIN; | ||||
1200 | } | ||||
1201 | else { | ||||
1202 | ## We have a filename, open it for reading | ||||
1203 | $myData{_INFILE} = $infile; | ||||
1204 | open($in_fh, "< $infile") or | ||||
1205 | croak "Can't open $infile for reading: $!\n"; | ||||
1206 | $close_input = 1; | ||||
1207 | } | ||||
1208 | |||||
1209 | ## NOTE: we need to be *very* careful when "defaulting" the output | ||||
1210 | ## file. We only want to use a default if this is the beginning of | ||||
1211 | ## the entire document (but *not* if this is an included file). We | ||||
1212 | ## determine this by seeing if the input stream stack has been set-up | ||||
1213 | ## already | ||||
1214 | |||||
1215 | ## Is $outfile a filename, a (possibly implied) filehandle, maybe a ref? | ||||
1216 | if (ref $outfile) { | ||||
1217 | ## we need to check for ref() first, as other checks involve reading | ||||
1218 | if (ref($outfile) =~ /^(ARRAY|HASH|CODE)$/) { | ||||
1219 | croak "Output to $1 reference not supported!\n"; | ||||
1220 | } | ||||
1221 | elsif (ref($outfile) eq 'SCALAR') { | ||||
1222 | # # NOTE: IO::String isn't a part of the perl distribution, | ||||
1223 | # # so probably we shouldn't support this case... | ||||
1224 | # require IO::String; | ||||
1225 | # $myData{_OUTFILE} = "$outfile"; | ||||
1226 | # $out_fh = IO::String->new($outfile); | ||||
1227 | croak "Output to SCALAR reference not supported!\n"; | ||||
1228 | } | ||||
1229 | else { | ||||
1230 | ## Must be a filehandle-ref (or else assume its a ref to an | ||||
1231 | ## object that supports the common IO write operations). | ||||
1232 | $myData{_OUTFILE} = ${$outfile}; | ||||
1233 | $out_fh = $outfile; | ||||
1234 | } | ||||
1235 | } | ||||
1236 | elsif (!defined($outfile) || !length($outfile) || ($outfile eq '-') | ||||
1237 | || ($outfile =~ /^>&?(?:STDOUT|1)$/i)) | ||||
1238 | { | ||||
1239 | if (defined $myData{_TOP_STREAM}) { | ||||
1240 | $out_fh = $myData{_OUTPUT}; | ||||
1241 | } | ||||
1242 | else { | ||||
1243 | ## Not a filename, just a string implying STDOUT | ||||
1244 | $outfile ||= '-'; | ||||
1245 | $myData{_OUTFILE} = '<standard output>'; | ||||
1246 | $out_fh = \*STDOUT; | ||||
1247 | } | ||||
1248 | } | ||||
1249 | elsif ($outfile =~ /^>&(STDERR|2)$/i) { | ||||
1250 | ## Not a filename, just a string implying STDERR | ||||
1251 | $myData{_OUTFILE} = '<standard error>'; | ||||
1252 | $out_fh = \*STDERR; | ||||
1253 | } | ||||
1254 | else { | ||||
1255 | ## We have a filename, open it for writing | ||||
1256 | $myData{_OUTFILE} = $outfile; | ||||
1257 | (-d $outfile) and croak "$outfile is a directory, not POD input!\n"; | ||||
1258 | open($out_fh, "> $outfile") or | ||||
1259 | croak "Can't open $outfile for writing: $!\n"; | ||||
1260 | $close_output = 1; | ||||
1261 | } | ||||
1262 | |||||
1263 | ## Whew! That was a lot of work to set up reasonably/robust behavior | ||||
1264 | ## in the case of a non-filename for reading and writing. Now we just | ||||
1265 | ## have to parse the input and close the handles when we're finished. | ||||
1266 | $self->parse_from_filehandle(\%opts, $in_fh, $out_fh); | ||||
1267 | |||||
1268 | $close_input and | ||||
1269 | close($in_fh) || croak "Can't close $infile after reading: $!\n"; | ||||
1270 | $close_output and | ||||
1271 | close($out_fh) || croak "Can't close $outfile after writing: $!\n"; | ||||
1272 | } | ||||
1273 | |||||
1274 | ############################################################################# | ||||
1275 | |||||
1276 | =head1 ACCESSOR METHODS | ||||
1277 | |||||
1278 | Clients of B<Pod::Parser> should use the following methods to access | ||||
1279 | instance data fields: | ||||
1280 | |||||
1281 | =cut | ||||
1282 | |||||
1283 | ##--------------------------------------------------------------------------- | ||||
1284 | |||||
1285 | =head1 B<errorsub()> | ||||
1286 | |||||
1287 | $parser->errorsub("method_name"); | ||||
1288 | $parser->errorsub(\&warn_user); | ||||
1289 | $parser->errorsub(sub { print STDERR, @_ }); | ||||
1290 | |||||
1291 | Specifies the method or subroutine to use when printing error messages | ||||
1292 | about POD syntax. The supplied method/subroutine I<must> return TRUE upon | ||||
1293 | successful printing of the message. If C<undef> is given, then the B<carp> | ||||
1294 | builtin is used to issue error messages (this is the default behavior). | ||||
1295 | |||||
1296 | my $errorsub = $parser->errorsub() | ||||
1297 | my $errmsg = "This is an error message!\n" | ||||
1298 | (ref $errorsub) and &{$errorsub}($errmsg) | ||||
1299 | or (defined $errorsub) and $parser->$errorsub($errmsg) | ||||
1300 | or carp($errmsg); | ||||
1301 | |||||
1302 | Returns a method name, or else a reference to the user-supplied subroutine | ||||
1303 | used to print error messages. Returns C<undef> if the B<carp> builtin | ||||
1304 | is used to issue error messages (this is the default behavior). | ||||
1305 | |||||
1306 | =cut | ||||
1307 | |||||
1308 | sub errorsub { | ||||
1309 | return (@_ > 1) ? ($_[0]->{_ERRORSUB} = $_[1]) : $_[0]->{_ERRORSUB}; | ||||
1310 | } | ||||
1311 | |||||
1312 | ##--------------------------------------------------------------------------- | ||||
1313 | |||||
1314 | =head1 B<cutting()> | ||||
1315 | |||||
1316 | $boolean = $parser->cutting(); | ||||
1317 | |||||
1318 | Returns the current C<cutting> state: a boolean-valued scalar which | ||||
1319 | evaluates to true if text from the input file is currently being "cut" | ||||
1320 | (meaning it is I<not> considered part of the POD document). | ||||
1321 | |||||
1322 | $parser->cutting($boolean); | ||||
1323 | |||||
1324 | Sets the current C<cutting> state to the given value and returns the | ||||
1325 | result. | ||||
1326 | |||||
1327 | =cut | ||||
1328 | |||||
1329 | sub cutting { | ||||
1330 | return (@_ > 1) ? ($_[0]->{_CUTTING} = $_[1]) : $_[0]->{_CUTTING}; | ||||
1331 | } | ||||
1332 | |||||
1333 | ##--------------------------------------------------------------------------- | ||||
1334 | |||||
1335 | ##--------------------------------------------------------------------------- | ||||
1336 | |||||
1337 | =head1 B<parseopts()> | ||||
1338 | |||||
1339 | When invoked with no additional arguments, B<parseopts> returns a hashtable | ||||
1340 | of all the current parsing options. | ||||
1341 | |||||
1342 | ## See if we are parsing non-POD sections as well as POD ones | ||||
1343 | my %opts = $parser->parseopts(); | ||||
1344 | $opts{'-want_nonPODs}' and print "-want_nonPODs\n"; | ||||
1345 | |||||
1346 | When invoked using a single string, B<parseopts> treats the string as the | ||||
1347 | name of a parse-option and returns its corresponding value if it exists | ||||
1348 | (returns C<undef> if it doesn't). | ||||
1349 | |||||
1350 | ## Did we ask to see '=cut' paragraphs? | ||||
1351 | my $want_cut = $parser->parseopts('-process_cut_cmd'); | ||||
1352 | $want_cut and print "-process_cut_cmd\n"; | ||||
1353 | |||||
1354 | When invoked with multiple arguments, B<parseopts> treats them as | ||||
1355 | key/value pairs and the specified parse-option names are set to the | ||||
1356 | given values. Any unspecified parse-options are unaffected. | ||||
1357 | |||||
1358 | ## Set them back to the default | ||||
1359 | $parser->parseopts(-warnings => 0); | ||||
1360 | |||||
1361 | When passed a single hash-ref, B<parseopts> uses that hash to completely | ||||
1362 | reset the existing parse-options, all previous parse-option values | ||||
1363 | are lost. | ||||
1364 | |||||
1365 | ## Reset all options to default | ||||
1366 | $parser->parseopts( { } ); | ||||
1367 | |||||
1368 | See L<"PARSING OPTIONS"> for more information on the name and meaning of each | ||||
1369 | parse-option currently recognized. | ||||
1370 | |||||
1371 | =cut | ||||
1372 | |||||
1373 | sub parseopts { | ||||
1374 | local *myData = shift; | ||||
1375 | local *myOpts = ($myData{_PARSEOPTS} ||= {}); | ||||
1376 | return %myOpts if (@_ == 0); | ||||
1377 | if (@_ == 1) { | ||||
1378 | local $_ = shift; | ||||
1379 | return ref($_) ? $myData{_PARSEOPTS} = $_ : $myOpts{$_}; | ||||
1380 | } | ||||
1381 | my @newOpts = (%myOpts, @_); | ||||
1382 | $myData{_PARSEOPTS} = { @newOpts }; | ||||
1383 | } | ||||
1384 | |||||
1385 | ##--------------------------------------------------------------------------- | ||||
1386 | |||||
1387 | =head1 B<output_file()> | ||||
1388 | |||||
1389 | $fname = $parser->output_file(); | ||||
1390 | |||||
1391 | Returns the name of the output file being written. | ||||
1392 | |||||
1393 | =cut | ||||
1394 | |||||
1395 | sub output_file { | ||||
1396 | return $_[0]->{_OUTFILE}; | ||||
1397 | } | ||||
1398 | |||||
1399 | ##--------------------------------------------------------------------------- | ||||
1400 | |||||
1401 | =head1 B<output_handle()> | ||||
1402 | |||||
1403 | $fhandle = $parser->output_handle(); | ||||
1404 | |||||
1405 | Returns the output filehandle object. | ||||
1406 | |||||
1407 | =cut | ||||
1408 | |||||
1409 | sub output_handle { | ||||
1410 | return $_[0]->{_OUTPUT}; | ||||
1411 | } | ||||
1412 | |||||
1413 | ##--------------------------------------------------------------------------- | ||||
1414 | |||||
1415 | =head1 B<input_file()> | ||||
1416 | |||||
1417 | $fname = $parser->input_file(); | ||||
1418 | |||||
1419 | Returns the name of the input file being read. | ||||
1420 | |||||
1421 | =cut | ||||
1422 | |||||
1423 | sub input_file { | ||||
1424 | return $_[0]->{_INFILE}; | ||||
1425 | } | ||||
1426 | |||||
1427 | ##--------------------------------------------------------------------------- | ||||
1428 | |||||
1429 | =head1 B<input_handle()> | ||||
1430 | |||||
1431 | $fhandle = $parser->input_handle(); | ||||
1432 | |||||
1433 | Returns the current input filehandle object. | ||||
1434 | |||||
1435 | =cut | ||||
1436 | |||||
1437 | sub input_handle { | ||||
1438 | return $_[0]->{_INPUT}; | ||||
1439 | } | ||||
1440 | |||||
1441 | ##--------------------------------------------------------------------------- | ||||
1442 | |||||
1443 | =begin __PRIVATE__ | ||||
1444 | |||||
1445 | =head1 B<input_streams()> | ||||
1446 | |||||
1447 | $listref = $parser->input_streams(); | ||||
1448 | |||||
1449 | Returns a reference to an array which corresponds to the stack of all | ||||
1450 | the input streams that are currently in the middle of being parsed. | ||||
1451 | |||||
1452 | While parsing an input stream, it is possible to invoke | ||||
1453 | B<parse_from_file()> or B<parse_from_filehandle()> to parse a new input | ||||
1454 | stream and then return to parsing the previous input stream. Each input | ||||
1455 | stream to be parsed is pushed onto the end of this input stack | ||||
1456 | before any of its input is read. The input stream that is currently | ||||
1457 | being parsed is always at the end (or top) of the input stack. When an | ||||
1458 | input stream has been exhausted, it is popped off the end of the | ||||
1459 | input stack. | ||||
1460 | |||||
1461 | Each element on this input stack is a reference to C<Pod::InputSource> | ||||
1462 | object. Please see L<Pod::InputObjects> for more details. | ||||
1463 | |||||
1464 | This method might be invoked when printing diagnostic messages, for example, | ||||
1465 | to obtain the name and line number of the all input files that are currently | ||||
1466 | being processed. | ||||
1467 | |||||
1468 | =end __PRIVATE__ | ||||
1469 | |||||
1470 | =cut | ||||
1471 | |||||
1472 | sub input_streams { | ||||
1473 | return $_[0]->{_INPUT_STREAMS}; | ||||
1474 | } | ||||
1475 | |||||
1476 | ##--------------------------------------------------------------------------- | ||||
1477 | |||||
1478 | =begin __PRIVATE__ | ||||
1479 | |||||
1480 | =head1 B<top_stream()> | ||||
1481 | |||||
1482 | $hashref = $parser->top_stream(); | ||||
1483 | |||||
1484 | Returns a reference to the hash-table that represents the element | ||||
1485 | that is currently at the top (end) of the input stream stack | ||||
1486 | (see L<"input_streams()">). The return value will be the C<undef> | ||||
1487 | if the input stack is empty. | ||||
1488 | |||||
1489 | This method might be used when printing diagnostic messages, for example, | ||||
1490 | to obtain the name and line number of the current input file. | ||||
1491 | |||||
1492 | =end __PRIVATE__ | ||||
1493 | |||||
1494 | =cut | ||||
1495 | |||||
1496 | sub top_stream { | ||||
1497 | return $_[0]->{_TOP_STREAM} || undef; | ||||
1498 | } | ||||
1499 | |||||
1500 | ############################################################################# | ||||
1501 | |||||
1502 | =head1 PRIVATE METHODS AND DATA | ||||
1503 | |||||
1504 | B<Pod::Parser> makes use of several internal methods and data fields | ||||
1505 | which clients should not need to see or use. For the sake of avoiding | ||||
1506 | name collisions for client data and methods, these methods and fields | ||||
1507 | are briefly discussed here. Determined hackers may obtain further | ||||
1508 | information about them by reading the B<Pod::Parser> source code. | ||||
1509 | |||||
1510 | Private data fields are stored in the hash-object whose reference is | ||||
1511 | returned by the B<new()> constructor for this class. The names of all | ||||
1512 | private methods and data-fields used by B<Pod::Parser> begin with a | ||||
1513 | prefix of "_" and match the regular expression C</^_\w+$/>. | ||||
1514 | |||||
1515 | =cut | ||||
1516 | |||||
1517 | ##--------------------------------------------------------------------------- | ||||
1518 | |||||
1519 | =begin _PRIVATE_ | ||||
1520 | |||||
1521 | =head1 B<_push_input_stream()> | ||||
1522 | |||||
1523 | $hashref = $parser->_push_input_stream($in_fh,$out_fh); | ||||
1524 | |||||
1525 | This method will push the given input stream on the input stack and | ||||
1526 | perform any necessary beginning-of-document or beginning-of-file | ||||
1527 | processing. The argument C<$in_fh> is the input stream filehandle to | ||||
1528 | push, and C<$out_fh> is the corresponding output filehandle to use (if | ||||
1529 | it is not given or is undefined, then the current output stream is used, | ||||
1530 | which defaults to standard output if it doesnt exist yet). | ||||
1531 | |||||
1532 | The value returned will be reference to the hash-table that represents | ||||
1533 | the new top of the input stream stack. I<Please Note> that it is | ||||
1534 | possible for this method to use default values for the input and output | ||||
1535 | file handles. If this happens, you will need to look at the C<INPUT> | ||||
1536 | and C<OUTPUT> instance data members to determine their new values. | ||||
1537 | |||||
1538 | =end _PRIVATE_ | ||||
1539 | |||||
1540 | =cut | ||||
1541 | |||||
1542 | sub _push_input_stream { | ||||
1543 | my ($self, $in_fh, $out_fh) = @_; | ||||
1544 | local *myData = $self; | ||||
1545 | |||||
1546 | ## Initialize stuff for the entire document if this is *not* | ||||
1547 | ## an included file. | ||||
1548 | ## | ||||
1549 | ## NOTE: we need to be *very* careful when "defaulting" the output | ||||
1550 | ## filehandle. We only want to use a default value if this is the | ||||
1551 | ## beginning of the entire document (but *not* if this is an included | ||||
1552 | ## file). | ||||
1553 | unless (defined $myData{_TOP_STREAM}) { | ||||
1554 | $out_fh = \*STDOUT unless (defined $out_fh); | ||||
1555 | $myData{_CUTTING} = 1; ## current "cutting" state | ||||
1556 | $myData{_INPUT_STREAMS} = []; ## stack of all input streams | ||||
1557 | } | ||||
1558 | |||||
1559 | ## Initialize input indicators | ||||
1560 | $myData{_OUTFILE} = '(unknown)' unless (defined $myData{_OUTFILE}); | ||||
1561 | $myData{_OUTPUT} = $out_fh if (defined $out_fh); | ||||
1562 | $in_fh = \*STDIN unless (defined $in_fh); | ||||
1563 | $myData{_INFILE} = '(unknown)' unless (defined $myData{_INFILE}); | ||||
1564 | $myData{_INPUT} = $in_fh; | ||||
1565 | my $input_top = $myData{_TOP_STREAM} | ||||
1566 | = new Pod::InputSource( | ||||
1567 | -name => $myData{_INFILE}, | ||||
1568 | -handle => $in_fh, | ||||
1569 | -was_cutting => $myData{_CUTTING} | ||||
1570 | ); | ||||
1571 | local *input_stack = $myData{_INPUT_STREAMS}; | ||||
1572 | push(@input_stack, $input_top); | ||||
1573 | |||||
1574 | ## Perform beginning-of-document and/or beginning-of-input processing | ||||
1575 | $self->begin_pod() if (@input_stack == 1); | ||||
1576 | $self->begin_input(); | ||||
1577 | |||||
1578 | return $input_top; | ||||
1579 | } | ||||
1580 | |||||
1581 | ##--------------------------------------------------------------------------- | ||||
1582 | |||||
1583 | =begin _PRIVATE_ | ||||
1584 | |||||
1585 | =head1 B<_pop_input_stream()> | ||||
1586 | |||||
1587 | $hashref = $parser->_pop_input_stream(); | ||||
1588 | |||||
1589 | This takes no arguments. It will perform any necessary end-of-file or | ||||
1590 | end-of-document processing and then pop the current input stream from | ||||
1591 | the top of the input stack. | ||||
1592 | |||||
1593 | The value returned will be reference to the hash-table that represents | ||||
1594 | the new top of the input stream stack. | ||||
1595 | |||||
1596 | =end _PRIVATE_ | ||||
1597 | |||||
1598 | =cut | ||||
1599 | |||||
1600 | sub _pop_input_stream { | ||||
1601 | my ($self) = @_; | ||||
1602 | local *myData = $self; | ||||
1603 | local *input_stack = $myData{_INPUT_STREAMS}; | ||||
1604 | |||||
1605 | ## Perform end-of-input and/or end-of-document processing | ||||
1606 | $self->end_input() if (@input_stack > 0); | ||||
1607 | $self->end_pod() if (@input_stack == 1); | ||||
1608 | |||||
1609 | ## Restore cutting state to whatever it was before we started | ||||
1610 | ## parsing this file. | ||||
1611 | my $old_top = pop(@input_stack); | ||||
1612 | $myData{_CUTTING} = $old_top->was_cutting(); | ||||
1613 | |||||
1614 | ## Dont forget to reset the input indicators | ||||
1615 | my $input_top = undef; | ||||
1616 | if (@input_stack > 0) { | ||||
1617 | $input_top = $myData{_TOP_STREAM} = $input_stack[-1]; | ||||
1618 | $myData{_INFILE} = $input_top->name(); | ||||
1619 | $myData{_INPUT} = $input_top->handle(); | ||||
1620 | } else { | ||||
1621 | delete $myData{_TOP_STREAM}; | ||||
1622 | delete $myData{_INPUT_STREAMS}; | ||||
1623 | } | ||||
1624 | |||||
1625 | return $input_top; | ||||
1626 | } | ||||
1627 | |||||
1628 | ############################################################################# | ||||
1629 | |||||
1630 | =head1 TREE-BASED PARSING | ||||
1631 | |||||
1632 | If straightforward stream-based parsing wont meet your needs (as is | ||||
1633 | likely the case for tasks such as translating PODs into structured | ||||
1634 | markup languages like HTML and XML) then you may need to take the | ||||
1635 | tree-based approach. Rather than doing everything in one pass and | ||||
1636 | calling the B<interpolate()> method to expand sequences into text, it | ||||
1637 | may be desirable to instead create a parse-tree using the B<parse_text()> | ||||
1638 | method to return a tree-like structure which may contain an ordered | ||||
1639 | list of children (each of which may be a text-string, or a similar | ||||
1640 | tree-like structure). | ||||
1641 | |||||
1642 | Pay special attention to L<"METHODS FOR PARSING AND PROCESSING"> and | ||||
1643 | to the objects described in L<Pod::InputObjects>. The former describes | ||||
1644 | the gory details and parameters for how to customize and extend the | ||||
1645 | parsing behavior of B<Pod::Parser>. B<Pod::InputObjects> provides | ||||
1646 | several objects that may all be used interchangeably as parse-trees. The | ||||
1647 | most obvious one is the B<Pod::ParseTree> object. It defines the basic | ||||
1648 | interface and functionality that all things trying to be a POD parse-tree | ||||
1649 | should do. A B<Pod::ParseTree> is defined such that each "node" may be a | ||||
1650 | text-string, or a reference to another parse-tree. Each B<Pod::Paragraph> | ||||
1651 | object and each B<Pod::InteriorSequence> object also supports the basic | ||||
1652 | parse-tree interface. | ||||
1653 | |||||
1654 | The B<parse_text()> method takes a given paragraph of text, and | ||||
1655 | returns a parse-tree that contains one or more children, each of which | ||||
1656 | may be a text-string, or an InteriorSequence object. There are also | ||||
1657 | callback-options that may be passed to B<parse_text()> to customize | ||||
1658 | the way it expands or transforms interior-sequences, as well as the | ||||
1659 | returned result. These callbacks can be used to create a parse-tree | ||||
1660 | with custom-made objects (which may or may not support the parse-tree | ||||
1661 | interface, depending on how you choose to do it). | ||||
1662 | |||||
1663 | If you wish to turn an entire POD document into a parse-tree, that process | ||||
1664 | is fairly straightforward. The B<parse_text()> method is the key to doing | ||||
1665 | this successfully. Every paragraph-callback (i.e. the polymorphic methods | ||||
1666 | for B<command()>, B<verbatim()>, and B<textblock()> paragraphs) takes | ||||
1667 | a B<Pod::Paragraph> object as an argument. Each paragraph object has a | ||||
1668 | B<parse_tree()> method that can be used to get or set a corresponding | ||||
1669 | parse-tree. So for each of those paragraph-callback methods, simply call | ||||
1670 | B<parse_text()> with the options you desire, and then use the returned | ||||
1671 | parse-tree to assign to the given paragraph object. | ||||
1672 | |||||
1673 | That gives you a parse-tree for each paragraph - so now all you need is | ||||
1674 | an ordered list of paragraphs. You can maintain that yourself as a data | ||||
1675 | element in the object/hash. The most straightforward way would be simply | ||||
1676 | to use an array-ref, with the desired set of custom "options" for each | ||||
1677 | invocation of B<parse_text>. Let's assume the desired option-set is | ||||
1678 | given by the hash C<%options>. Then we might do something like the | ||||
1679 | following: | ||||
1680 | |||||
1681 | package MyPodParserTree; | ||||
1682 | |||||
1683 | @ISA = qw( Pod::Parser ); | ||||
1684 | |||||
1685 | ... | ||||
1686 | |||||
1687 | sub begin_pod { | ||||
1688 | my $self = shift; | ||||
1689 | $self->{'-paragraphs'} = []; ## initialize paragraph list | ||||
1690 | } | ||||
1691 | |||||
1692 | sub command { | ||||
1693 | my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; | ||||
1694 | my $ptree = $parser->parse_text({%options}, $paragraph, ...); | ||||
1695 | $pod_para->parse_tree( $ptree ); | ||||
1696 | push @{ $self->{'-paragraphs'} }, $pod_para; | ||||
1697 | } | ||||
1698 | |||||
1699 | sub verbatim { | ||||
1700 | my ($parser, $paragraph, $line_num, $pod_para) = @_; | ||||
1701 | push @{ $self->{'-paragraphs'} }, $pod_para; | ||||
1702 | } | ||||
1703 | |||||
1704 | sub textblock { | ||||
1705 | my ($parser, $paragraph, $line_num, $pod_para) = @_; | ||||
1706 | my $ptree = $parser->parse_text({%options}, $paragraph, ...); | ||||
1707 | $pod_para->parse_tree( $ptree ); | ||||
1708 | push @{ $self->{'-paragraphs'} }, $pod_para; | ||||
1709 | } | ||||
1710 | |||||
1711 | ... | ||||
1712 | |||||
1713 | package main; | ||||
1714 | ... | ||||
1715 | my $parser = new MyPodParserTree(...); | ||||
1716 | $parser->parse_from_file(...); | ||||
1717 | my $paragraphs_ref = $parser->{'-paragraphs'}; | ||||
1718 | |||||
1719 | Of course, in this module-author's humble opinion, I'd be more inclined to | ||||
1720 | use the existing B<Pod::ParseTree> object than a simple array. That way | ||||
1721 | everything in it, paragraphs and sequences, all respond to the same core | ||||
1722 | interface for all parse-tree nodes. The result would look something like: | ||||
1723 | |||||
1724 | package MyPodParserTree2; | ||||
1725 | |||||
1726 | ... | ||||
1727 | |||||
1728 | sub begin_pod { | ||||
1729 | my $self = shift; | ||||
1730 | $self->{'-ptree'} = new Pod::ParseTree; ## initialize parse-tree | ||||
1731 | } | ||||
1732 | |||||
1733 | sub parse_tree { | ||||
1734 | ## convenience method to get/set the parse-tree for the entire POD | ||||
1735 | (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; | ||||
1736 | return $_[0]->{'-ptree'}; | ||||
1737 | } | ||||
1738 | |||||
1739 | sub command { | ||||
1740 | my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; | ||||
1741 | my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...); | ||||
1742 | $pod_para->parse_tree( $ptree ); | ||||
1743 | $parser->parse_tree()->append( $pod_para ); | ||||
1744 | } | ||||
1745 | |||||
1746 | sub verbatim { | ||||
1747 | my ($parser, $paragraph, $line_num, $pod_para) = @_; | ||||
1748 | $parser->parse_tree()->append( $pod_para ); | ||||
1749 | } | ||||
1750 | |||||
1751 | sub textblock { | ||||
1752 | my ($parser, $paragraph, $line_num, $pod_para) = @_; | ||||
1753 | my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...); | ||||
1754 | $pod_para->parse_tree( $ptree ); | ||||
1755 | $parser->parse_tree()->append( $pod_para ); | ||||
1756 | } | ||||
1757 | |||||
1758 | ... | ||||
1759 | |||||
1760 | package main; | ||||
1761 | ... | ||||
1762 | my $parser = new MyPodParserTree2(...); | ||||
1763 | $parser->parse_from_file(...); | ||||
1764 | my $ptree = $parser->parse_tree; | ||||
1765 | ... | ||||
1766 | |||||
1767 | Now you have the entire POD document as one great big parse-tree. You | ||||
1768 | can even use the B<-expand_seq> option to B<parse_text> to insert | ||||
1769 | whole different kinds of objects. Just don't expect B<Pod::Parser> | ||||
1770 | to know what to do with them after that. That will need to be in your | ||||
1771 | code. Or, alternatively, you can insert any object you like so long as | ||||
1772 | it conforms to the B<Pod::ParseTree> interface. | ||||
1773 | |||||
1774 | One could use this to create subclasses of B<Pod::Paragraphs> and | ||||
1775 | B<Pod::InteriorSequences> for specific commands (or to create your own | ||||
1776 | custom node-types in the parse-tree) and add some kind of B<emit()> | ||||
1777 | method to each custom node/subclass object in the tree. Then all you'd | ||||
1778 | need to do is recursively walk the tree in the desired order, processing | ||||
1779 | the children (most likely from left to right) by formatting them if | ||||
1780 | they are text-strings, or by calling their B<emit()> method if they | ||||
1781 | are objects/references. | ||||
1782 | |||||
1783 | =head1 CAVEATS | ||||
1784 | |||||
1785 | Please note that POD has the notion of "paragraphs": this is something | ||||
1786 | starting I<after> a blank (read: empty) line, with the single exception | ||||
1787 | of the file start, which is also starting a paragraph. That means that | ||||
1788 | especially a command (e.g. C<=head1>) I<must> be preceded with a blank | ||||
1789 | line; C<__END__> is I<not> a blank line. | ||||
1790 | |||||
1791 | =head1 SEE ALSO | ||||
1792 | |||||
1793 | L<Pod::InputObjects>, L<Pod::Select> | ||||
1794 | |||||
1795 | B<Pod::InputObjects> defines POD input objects corresponding to | ||||
1796 | command paragraphs, parse-trees, and interior-sequences. | ||||
1797 | |||||
1798 | B<Pod::Select> is a subclass of B<Pod::Parser> which provides the ability | ||||
1799 | to selectively include and/or exclude sections of a POD document from being | ||||
1800 | translated based upon the current heading, subheading, subsubheading, etc. | ||||
1801 | |||||
1802 | =for __PRIVATE__ | ||||
1803 | B<Pod::Callbacks> is a subclass of B<Pod::Parser> which gives its users | ||||
1804 | the ability the employ I<callback functions> instead of, or in addition | ||||
1805 | to, overriding methods of the base class. | ||||
1806 | |||||
1807 | =for __PRIVATE__ | ||||
1808 | B<Pod::Select> and B<Pod::Callbacks> do not override any | ||||
1809 | methods nor do they define any new methods with the same name. Because | ||||
1810 | of this, they may I<both> be used (in combination) as a base class of | ||||
1811 | the same subclass in order to combine their functionality without | ||||
1812 | causing any namespace clashes due to multiple inheritance. | ||||
1813 | |||||
1814 | =head1 AUTHOR | ||||
1815 | |||||
1816 | Please report bugs using L<http://rt.cpan.org>. | ||||
1817 | |||||
1818 | Brad Appleton E<lt>bradapp@enteract.comE<gt> | ||||
1819 | |||||
1820 | Based on code for B<Pod::Text> written by | ||||
1821 | Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> | ||||
1822 | |||||
1823 | =head1 LICENSE | ||||
1824 | |||||
1825 | Pod-Parser is free software; you can redistribute it and/or modify it | ||||
1826 | under the terms of the Artistic License distributed with Perl version | ||||
1827 | 5.000 or (at your option) any later version. Please refer to the | ||||
1828 | Artistic License that came with your Perl distribution for more | ||||
1829 | details. If your version of Perl was not distributed under the | ||||
1830 | terms of the Artistic License, than you may distribute PodParser | ||||
1831 | under the same terms as Perl itself. | ||||
1832 | |||||
1833 | =cut | ||||
1834 | |||||
1835 | 1 | 3µs | 1; | ||
1836 | # vim: ts=4 sw=4 et |