Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/Perl/Tidy.pm |
Statements | Executed 772 statements in 93.6ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 1.84ms | 1.96ms | BEGIN@78 | Perl::Tidy::
1 | 1 | 1 | 628µs | 1.00ms | BEGIN@76 | Perl::Tidy::
1 | 1 | 1 | 484µs | 484µs | BEGIN@29596 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 93µs | 93µs | BEGIN@6198 | Perl::Tidy::Formatter::
1 | 1 | 1 | 90µs | 90µs | BEGIN@8401 | Perl::Tidy::Formatter::
1 | 1 | 1 | 86µs | 86µs | BEGIN@13836 | Perl::Tidy::Formatter::
1 | 1 | 1 | 50µs | 50µs | BEGIN@4762 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 30µs | 30µs | BEGIN@13541 | Perl::Tidy::Formatter::
1 | 1 | 1 | 28µs | 28µs | BEGIN@8312 | Perl::Tidy::Formatter::
1 | 1 | 1 | 22µs | 22µs | BEGIN@215 | Perl::Tidy::
1 | 1 | 1 | 17µs | 48µs | BEGIN@5950 | Perl::Tidy::Formatter::
1 | 1 | 1 | 16µs | 23µs | BEGIN@81 | Perl::Tidy::
1 | 1 | 1 | 16µs | 86µs | BEGIN@27475 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 15µs | 15µs | BEGIN@17043 | Perl::Tidy::Formatter::
1 | 1 | 1 | 15µs | 44µs | BEGIN@22672 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 14µs | 14µs | BEGIN@56 | Perl::Tidy::
1 | 1 | 1 | 14µs | 14µs | BEGIN@14914 | Perl::Tidy::Formatter::
1 | 1 | 1 | 14µs | 14µs | BEGIN@20752 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 12µs | 12µs | BEGIN@15938 | Perl::Tidy::Formatter::
1 | 1 | 1 | 12µs | 69µs | BEGIN@22177 | Perl::Tidy::FileWriter::
1 | 1 | 1 | 12µs | 12µs | BEGIN@11610 | Perl::Tidy::Formatter::
1 | 1 | 1 | 11µs | 11µs | BEGIN@8152 | Perl::Tidy::Formatter::
1 | 1 | 1 | 11µs | 53µs | BEGIN@22598 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 11µs | 44µs | BEGIN@22666 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 11µs | 46µs | BEGIN@19322 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 11µs | 110µs | BEGIN@2125 | Perl::Tidy::
1 | 1 | 1 | 11µs | 42µs | BEGIN@75 | Perl::Tidy::
1 | 1 | 1 | 11µs | 38µs | BEGIN@77 | Perl::Tidy::
1 | 1 | 1 | 11µs | 58µs | BEGIN@22580 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 11µs | 29µs | BEGIN@19319 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 11µs | 72µs | BEGIN@19077 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 10µs | 45µs | BEGIN@4522 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 10µs | 22µs | BEGIN@19496 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 10µs | 61µs | BEGIN@19320 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 10µs | 48µs | BEGIN@3659 | Perl::Tidy::IOScalar::
1 | 1 | 1 | 10µs | 63µs | BEGIN@4303 | Perl::Tidy::Logger::
1 | 1 | 1 | 10µs | 22µs | BEGIN@59 | Perl::Tidy::
1 | 1 | 1 | 10µs | 10µs | BEGIN@12316 | Perl::Tidy::Formatter::
1 | 1 | 1 | 10µs | 10µs | BEGIN@12798 | Perl::Tidy::Formatter::
1 | 1 | 1 | 10µs | 40µs | BEGIN@5952 | Perl::Tidy::Formatter::
1 | 1 | 1 | 10µs | 51µs | BEGIN@5944 | Perl::Tidy::Formatter::
1 | 1 | 1 | 10µs | 2.07ms | BEGIN@5980 | Perl::Tidy::Formatter::
1 | 1 | 1 | 9µs | 72µs | BEGIN@79 | Perl::Tidy::
1 | 1 | 1 | 9µs | 39µs | BEGIN@3743 | Perl::Tidy::IOScalarArray::
1 | 1 | 1 | 9µs | 38µs | BEGIN@19334 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 9µs | 4.38ms | BEGIN@22601 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 9µs | 110µs | BEGIN@4525 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 9µs | 50µs | BEGIN@22581 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 9µs | 41µs | BEGIN@5979 | Perl::Tidy::Formatter::
1 | 1 | 1 | 9µs | 38µs | BEGIN@5956 | Perl::Tidy::Formatter::
1 | 1 | 1 | 9µs | 45µs | BEGIN@23592 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 8µs | 8µs | BEGIN@10806 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 42µs | BEGIN@5946 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 38µs | BEGIN@61 | Perl::Tidy::
1 | 1 | 1 | 8µs | 44µs | BEGIN@22582 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 8µs | 201µs | BEGIN@22644 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 8µs | 43µs | BEGIN@4955 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 8µs | 8µs | BEGIN@22575 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 8µs | 580µs | BEGIN@19616 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 8µs | 72µs | BEGIN@64 | Perl::Tidy::
1 | 1 | 1 | 8µs | 43µs | BEGIN@5945 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 45µs | BEGIN@4339 | Perl::Tidy::Logger::
1 | 1 | 1 | 8µs | 41µs | BEGIN@19600 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 8µs | 24µs | BEGIN@60 | Perl::Tidy::
1 | 1 | 1 | 8µs | 40µs | BEGIN@5948 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 45µs | BEGIN@6282 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 53µs | BEGIN@22583 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 8µs | 45µs | BEGIN@5949 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 43µs | BEGIN@22584 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 8µs | 19µs | BEGIN@19380 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 8µs | 8µs | BEGIN@5940 | Perl::Tidy::Formatter::
1 | 1 | 1 | 8µs | 42µs | BEGIN@4956 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 8µs | 37µs | BEGIN@19506 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 7µs | 45µs | BEGIN@5947 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 42µs | BEGIN@6298 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 41µs | BEGIN@6302 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 43µs | BEGIN@23593 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 7µs | 38µs | BEGIN@5951 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 40µs | BEGIN@19501 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 7µs | 36µs | BEGIN@6292 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 40µs | BEGIN@19503 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 7µs | 37µs | BEGIN@6288 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 36µs | BEGIN@22671 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 7µs | 16µs | BEGIN@19542 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 7µs | 35µs | BEGIN@4961 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 7µs | 40µs | BEGIN@22665 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 7µs | 36µs | BEGIN@19505 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 7µs | 39µs | BEGIN@19324 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 7µs | 49µs | BEGIN@6295 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 37µs | BEGIN@19079 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 7µs | 40µs | BEGIN@19323 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 7µs | 38µs | BEGIN@19326 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 7µs | 7µs | BEGIN@11861 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 38µs | BEGIN@6283 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 36µs | BEGIN@6289 | Perl::Tidy::Formatter::
1 | 1 | 1 | 7µs | 39µs | BEGIN@19078 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 7µs | 36µs | BEGIN@19507 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 6µs | 36µs | BEGIN@5954 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 35µs | BEGIN@6291 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 36µs | BEGIN@4957 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 6µs | 38µs | BEGIN@4963 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 6µs | 40µs | BEGIN@19080 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19083 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19090 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19502 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 6µs | 39µs | BEGIN@19325 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 46µs | BEGIN@19327 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 34µs | BEGIN@19330 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 37µs | BEGIN@5957 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 36µs | BEGIN@6284 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 36µs | BEGIN@6287 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 35µs | BEGIN@4958 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 6µs | 35µs | BEGIN@4959 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19084 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 37µs | BEGIN@5953 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 36µs | BEGIN@5955 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19086 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 48µs | BEGIN@19087 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 35µs | BEGIN@22667 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 6µs | 40µs | BEGIN@22670 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 6µs | 36µs | BEGIN@23594 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19504 | Perl::Tidy::VerticalAligner::Alignment::
1 | 1 | 1 | 6µs | 6µs | BEGIN@19595 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19329 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 35µs | BEGIN@6290 | Perl::Tidy::Formatter::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19095 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 36µs | BEGIN@23595 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19601 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 6µs | 34µs | BEGIN@4962 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 6µs | 34µs | BEGIN@19093 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19331 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 34µs | BEGIN@19085 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 36µs | BEGIN@19602 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 6µs | 34µs | BEGIN@19332 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 39µs | BEGIN@19333 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 34µs | BEGIN@4960 | Perl::Tidy::HtmlWriter::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19082 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19092 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 35µs | BEGIN@22675 | Perl::Tidy::Tokenizer::
1 | 1 | 1 | 6µs | 34µs | BEGIN@19094 | Perl::Tidy::IndentationItem::
1 | 1 | 1 | 6µs | 34µs | BEGIN@19603 | Perl::Tidy::VerticalAligner::
1 | 1 | 1 | 6µs | 35µs | BEGIN@19328 | Perl::Tidy::VerticalAligner::Line::
1 | 1 | 1 | 6µs | 6µs | CORE:subst (opcode) | Perl::Tidy::
1 | 1 | 1 | 3µs | 3µs | BEGIN@57 | Perl::Tidy::
2 | 1 | 1 | 1µs | 1µs | CORE:substcont (opcode) | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | close_debug_file | Perl::Tidy::Debugger::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::Debugger::
0 | 0 | 0 | 0s | 0s | really_open_debug_file | Perl::Tidy::Debugger::
0 | 0 | 0 | 0s | 0s | write_debug_entry | Perl::Tidy::Debugger::
0 | 0 | 0 | 0s | 0s | close | Perl::Tidy::DevNull::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::DevNull::
0 | 0 | 0 | 0s | 0s | |
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::Diagnostics::
0 | 0 | 0 | 0s | 0s | set_input_file | Perl::Tidy::Diagnostics::
0 | 0 | 0 | 0s | 0s | write_diagnostics | Perl::Tidy::Diagnostics::
0 | 0 | 0 | 0s | 0s | Die | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | Exit | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | decrement_output_line_number | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | get_consecutive_nonblank_lines | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | get_output_line_number | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | report_line_length_errors | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | require_blank_code_lines | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | reset_consecutive_blank_lines | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | tee_off | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | tee_on | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | want_blank_line | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | write_blank_code_line | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | write_code_line | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | write_line | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | write_logfile_entry | Perl::Tidy::FileWriter::
0 | 0 | 0 | 0s | 0s | DESTROY | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | __ANON__[:5961] | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | __ANON__[:7683] | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | __ANON__[:7694] | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | _decrement_count | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | _increment_count | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | accumulate_block_text | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | accumulate_csc_text | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | add_closing_side_comment | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | balance_csc_text | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | bias_table_key | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | black_box | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | break_all_chain_tokens | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | break_equals | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | check_for_long_gnu_style_lines | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | check_for_new_minimum_depth | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | check_options | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | clear_breakpoint_undo_stack | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | comma_arrow_count | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | compactify_table | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | compare_indentation_levels | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | complain | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | copy_old_breakpoints | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | correct_lp_indentation | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | create_one_line_block | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | destroy_one_line_block | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | do_uncontained_comma_breaks | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | dump_want_left_space | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | dump_want_right_space | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | excess_line_length | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | extract_token | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | find_token_starting_list | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | finish_formatting | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | finish_lp_batch | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | flush | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_AVAILABLE_SPACES_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_RECOVERABLE_SPACES | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_SPACES | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_added_semicolon_count | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_count | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_maximum_fields_wanted | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_opening_indentation | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_saw_brace_error | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | get_seqno | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | in_same_container | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | insert_additional_breaks | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | insert_final_breaks | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | insert_new_token_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | is_essential_whitespace | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | is_unbalanced_batch | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | is_unbreakable_container | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | leading_spaces_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | lookup_opening_indentation | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_alignment_patterns | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_bli_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_block_brace_vertical_tightness_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_block_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_closing_side_comment_list_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_closing_side_comment_prefix | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_else_csc_text | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_format_skipping_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_static_block_comment_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | make_static_side_comment_pattern | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | match_opening_and_closing_tokens | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | max | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | maximum_line_length | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | maximum_line_length_for_level | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | maximum_number_of_fields | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | min | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | new_lp_indentation_item | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | note_added_semicolon | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | note_deleted_semicolon | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | note_embedded_tab | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | output_line_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | pad_array_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | pad_token | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | prepare_for_new_input_lines | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | previous_nonblank_token | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | print_line_of_tokens | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | recombine_breakpoints | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | reduce_lp_indentation | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | report_definite_bug | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | reset_block_text_accumulator | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | restore_current_token | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | rtoken_length | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | save_current_token | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | save_opening_indentation | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | scan_list | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | secret_operator_whitespace | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | send_lines_to_vertical_aligner | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_adjusted_indentation | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_block_text_accumulator | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_bond_strengths | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_closing_breakpoint | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_comma_breakpoints | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_comma_breakpoints_do | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_continuation_breaks | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_fake_breakpoint | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_for_semicolon_breakpoints | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_forced_breakpoint | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_leading_whitespace | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_logical_breakpoints | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_logical_padding | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_nobreaks | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_non_alignment_flags | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_ragged_breakpoints | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_vertical_alignment_markers | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_vertical_tightness_flags | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | set_white_space_flag | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | split_words | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | starting_one_line_block | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | store_token_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | study_list_complexity | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | table_columns_available | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | terminal_type | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | token_length | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | token_sequence_length | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | total_line_length | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | trim | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | undo_ci | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | undo_forced_breakpoint_stack | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | undo_lp_ci | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | unstore_token_to_go | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | want_blank_line | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | warning | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | we_are_at_the_last_line | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | write_diagnostics | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | write_line | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | write_logfile_entry | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | write_unindented_line | Perl::Tidy::Formatter::
0 | 0 | 0 | 0s | 0s | __ANON__[:4680] | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | __ANON__[:4690] | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | __ANON__[:5185] | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | __ANON__[:5210] | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | add_toc_item | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | change_anchor_names | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | check_RGB | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | check_options | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | close_html_file | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | escape_html | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | finish_formatting | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | make_abbreviated_names | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | make_frame | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | make_getopt_long_names | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | markup_html_element | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | markup_tokens | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | pod_to_html | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | set_default_color | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | set_default_properties | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | write_frame_html | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | write_line | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | write_style_sheet_data | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | write_style_sheet_file | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | write_toc_html | Perl::Tidy::HtmlWriter::
0 | 0 | 0 | 0s | 0s | close | Perl::Tidy::IOScalar::
0 | 0 | 0 | 0s | 0s | getline | Perl::Tidy::IOScalar::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::IOScalar::
0 | 0 | 0 | 0s | 0s | |
0 | 0 | 0 | 0s | 0s | close | Perl::Tidy::IOScalarArray::
0 | 0 | 0 | 0s | 0s | getline | Perl::Tidy::IOScalarArray::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::IOScalarArray::
0 | 0 | 0 | 0s | 0s | |
0 | 0 | 0 | 0s | 0s | decrease_AVAILABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | decrease_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_ALIGN_PAREN | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_ARROW_COUNT | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_AVAILABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_CI_LEVEL | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_CLOSED | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_COMMA_COUNT | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_HAVE_CHILD | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_INDEX | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_LEVEL | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_MARKED | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_RECOVERABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_SEQUENCE_NUMBER | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_STACK_DEPTH | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_STARTING_INDEX | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | increase_RECOVERABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | permanently_decrease_AVAILABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | set_ARROW_COUNT | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | set_CLOSED | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | set_COMMA_COUNT | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | set_HAVE_CHILD | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | set_MARKED | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | set_RECOVERABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | tentatively_decrease_AVAILABLE_SPACES | Perl::Tidy::IndentationItem::
0 | 0 | 0 | 0s | 0s | get_line | Perl::Tidy::LineBuffer::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::LineBuffer::
0 | 0 | 0 | 0s | 0s | peek_ahead | Perl::Tidy::LineBuffer::
0 | 0 | 0 | 0s | 0s | close_output_file | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | close_tee_file | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | really_open_tee_file | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | tee_off | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | tee_on | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | write_line | Perl::Tidy::LineSink::
0 | 0 | 0 | 0s | 0s | close_input_file | Perl::Tidy::LineSource::
0 | 0 | 0 | 0s | 0s | get_line | Perl::Tidy::LineSource::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::LineSource::
0 | 0 | 0 | 0s | 0s | ask_user_for_bug_report | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | black_box | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | block_log_output | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | brace_warning | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | complain | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | finish | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | get_saw_brace_error | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | get_use_prefix | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | get_warning_count | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | increment_brace_error | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | interrupt_logfile | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | logfile_output | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | make_line_information_string | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | report_definite_bug | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | report_possible_bug | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | resume_logfile | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | unblock_log_output | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | warning | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | we_are_at_the_last_line | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | write_column_headings | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | write_logfile_entry | Perl::Tidy::Logger::
0 | 0 | 0 | 0s | 0s | DESTROY | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:22588] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24021] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24025] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24050] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24164] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24184] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24196] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24221] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24228] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24235] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24242] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24278] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24400] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24429] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24448] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24465] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24491] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24515] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24527] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24561] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24575] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24581] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24591] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24604] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24622] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24658] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24689] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24693] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24734] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24740] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24750] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24760] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24771] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24776] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24781] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | __ANON__[:24786] | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | _decrement_count | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | _increment_count | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | brace_warning | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | check_final_nesting_depths | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | check_prototype | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | code_block_type | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | complain | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | decide_if_code_block | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | decrease_nesting_depth | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | do_quote | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | do_scan_package | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | do_scan_sub | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | dump_functions | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | dump_token_types | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | error_if_expecting_OPERATOR | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | error_if_expecting_TERM | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_angle_operator_termination | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_here_doc | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_next_nonblank_token | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_next_nonblank_token_on_this_line | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_starting_indentation_level | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | follow_quoted_string | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_count | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_indentation_level | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_input_line_number | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_line | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_saw_brace_error | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | guess_if_here_doc | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | guess_if_pattern_or_conditional | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | guess_if_pattern_or_division | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | guess_old_indentation_level | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | increase_nesting_depth | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | increment_brace_error | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | indicate_error | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | initialize_tokenizer_state | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | interrupt_logfile | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | inverse_pretoken_map | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | is_non_structural_brace | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | label_ok | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | make_numbered_line | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | matching_end_token | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | new_statement_ok | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | numerator_expected | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | ones_count | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | operator_expected | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | pattern_expected | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | peek_ahead_for_n_nonblank_pre_tokens | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | peek_ahead_for_nonblank_token | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | peeked_ahead | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | pre_tokenize | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | prepare_for_a_new_file | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | report_definite_bug | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | report_tokenization_errors | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | report_v_string | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | reset_indentation_level | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | restore_tokenizer_state | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | resume_logfile | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | save_tokenizer_state | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_bare_identifier | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_bare_identifier_do | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_id | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_id_do | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_identifier | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_identifier_do | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_number | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_number_do | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | scan_replacement_text | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | show_tokens | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | tokenize_this_line | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | unexpected | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | warning | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | write_diagnostics | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | write_error_indicator_pair | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | write_logfile_entry | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | write_on_underline | Perl::Tidy::Tokenizer::
0 | 0 | 0 | 0s | 0s | DESTROY | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | _decrement_count | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | _increment_count | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_count | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_ending_line | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_matching_token | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_serial_number | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_starting_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | get_starting_line | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | increment_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | restore_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | save_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | set_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | set_ending_line | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | set_matching_token | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | set_starting_column | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | set_starting_line | Perl::Tidy::VerticalAligner::Alignment::
0 | 0 | 0 | 0s | 0s | DESTROY | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | _decrement_count | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | _increment_count | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | current_field_width | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | field_width_growth | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_alignment | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_alignments | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_available_space_on_right | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_column | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_count | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_indentation | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_is_hanging_side_comment | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_jmax | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_jmax_original_line | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_leading_space_count | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_list_type | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_outdent_long_lines | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_rfields | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_rpatterns | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_rtokens | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_rvertical_tightness_flags | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | get_starting_column | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | increase_field_width | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | increment_column | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | new | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_alignment | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_alignments | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_column | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_indentation | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_is_hanging_side_comment | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_jmax | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_jmax_original_line | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_leading_space_count | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_list_type | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_outdent_long_lines | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_rfields | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_rpatterns | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | set_rtokens | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | starting_field_width | Perl::Tidy::VerticalAligner::Line::
0 | 0 | 0 | 0s | 0s | __ANON__[:19607] | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | add_to_group | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | adjust_side_comment | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | check_fit | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | check_match | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | combine_fields | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | decide_if_aligned | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | decide_if_list | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | dump_alignments | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | dump_array | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | dump_valign_buffer | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | eliminate_new_fields | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | eliminate_old_fields | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | fix_terminal_else | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | fix_terminal_ternary | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | flush | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | forget_side_comment | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | get_RECOVERABLE_SPACES | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | get_SPACES | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | get_STACK_DEPTH | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | get_extra_leading_spaces | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | get_leading_string | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | get_output_line_number | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | improve_continuation_indentation | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | initialize | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | initialize_for_new_group | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | join_hanging_comment | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | level_change | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | make_alignment | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | make_side_comment | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | maximum_line_length_for_level | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | my_flush | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | reduce_valign_buffer_indentation | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | report_anything_unusual | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | report_definite_bug | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | restore_alignment_columns | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | save_alignment_columns | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | valign_input | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | valign_output_step_A | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | valign_output_step_B | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | valign_output_step_C | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | valign_output_step_D | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | warning | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | write_diagnostics | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | write_logfile_entry | Perl::Tidy::VerticalAligner::
0 | 0 | 0 | 0s | 0s | Warn | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | Win_Config_Locs | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | Win_OS_Type | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:111] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:114] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:125] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:128] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:142] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:145] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:1507] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:160] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:163] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:2378] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:2857] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | __ANON__[:331] | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | _process_command_line | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | catfile | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | check_options | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | check_syntax | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | check_vms_filename | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | do_syntax_check | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | dump_config_file | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | dump_defaults | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | dump_long_names | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | dump_short_names | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | expand_command_abbreviations | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | fileglob_to_re | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | find_config_file | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | find_file_upwards | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | find_input_line_ending | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | generate_options | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | get_stream_as_named_file | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | is_unix | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | look_for_Windows | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | make_extension | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | parse_args | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | perltidy | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | process_command_line | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | process_this_file | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | read_config_file | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | readable_options | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | show_version | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | streamhandle | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | strip_comment | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | usage | Perl::Tidy::
0 | 0 | 0 | 0s | 0s | write_logfile_header | Perl::Tidy::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # | ||||
2 | ############################################################ | ||||
3 | # | ||||
4 | # perltidy - a perl script indenter and formatter | ||||
5 | # | ||||
6 | # Copyright (c) 2000-2014 by Steve Hancock | ||||
7 | # Distributed under the GPL license agreement; see file COPYING | ||||
8 | # | ||||
9 | # This program is free software; you can redistribute it and/or modify | ||||
10 | # it under the terms of the GNU General Public License as published by | ||||
11 | # the Free Software Foundation; either version 2 of the License, or | ||||
12 | # (at your option) any later version. | ||||
13 | # | ||||
14 | # This program is distributed in the hope that it will be useful, | ||||
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
17 | # GNU General Public License for more details. | ||||
18 | # | ||||
19 | # You should have received a copy of the GNU General Public License along | ||||
20 | # with this program; if not, write to the Free Software Foundation, Inc., | ||||
21 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
22 | # | ||||
23 | # For brief instructions, try 'perltidy -h'. | ||||
24 | # For more complete documentation, try 'man perltidy' | ||||
25 | # or visit http://perltidy.sourceforge.net | ||||
26 | # | ||||
27 | # This script is an example of the default style. It was formatted with: | ||||
28 | # | ||||
29 | # perltidy Tidy.pm | ||||
30 | # | ||||
31 | # Code Contributions: See ChangeLog.html for a complete history. | ||||
32 | # Michael Cartmell supplied code for adaptation to VMS and helped with | ||||
33 | # v-strings. | ||||
34 | # Hugh S. Myers supplied sub streamhandle and the supporting code to | ||||
35 | # create a Perl::Tidy module which can operate on strings, arrays, etc. | ||||
36 | # Yves Orton supplied coding to help detect Windows versions. | ||||
37 | # Axel Rose supplied a patch for MacPerl. | ||||
38 | # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. | ||||
39 | # Dan Tyrell contributed a patch for binary I/O. | ||||
40 | # Ueli Hugenschmidt contributed a patch for -fpsc | ||||
41 | # Sam Kington supplied a patch to identify the initial indentation of | ||||
42 | # entabbed code. | ||||
43 | # jonathan swartz supplied patches for: | ||||
44 | # * .../ pattern, which looks upwards from directory | ||||
45 | # * --notidy, to be used in directories where we want to avoid | ||||
46 | # accidentally tidying | ||||
47 | # * prefilter and postfilter | ||||
48 | # * iterations option | ||||
49 | # | ||||
50 | # Many others have supplied key ideas, suggestions, and bug reports; | ||||
51 | # see the CHANGES file. | ||||
52 | # | ||||
53 | ############################################################ | ||||
54 | |||||
55 | package Perl::Tidy; | ||||
56 | 2 | 40µs | 1 | 14µs | # spent 14µs within Perl::Tidy::BEGIN@56 which was called:
# once (14µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 56 # spent 14µs making 1 call to Perl::Tidy::BEGIN@56 |
57 | 1 | 18µs | 1 | 3µs | # spent 3µs within Perl::Tidy::BEGIN@57 which was called:
# once (3µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 57 # spent 3µs making 1 call to Perl::Tidy::BEGIN@57 |
58 | |||||
59 | 2 | 23µs | 2 | 33µs | # spent 22µs (10+11) within Perl::Tidy::BEGIN@59 which was called:
# once (10µs+11µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 59 # spent 22µs making 1 call to Perl::Tidy::BEGIN@59
# spent 12µs making 1 call to strict::import |
60 | 2 | 19µs | 2 | 40µs | # spent 24µs (8+16) within Perl::Tidy::BEGIN@60 which was called:
# once (8µs+16µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 60 # spent 24µs making 1 call to Perl::Tidy::BEGIN@60
# spent 16µs making 1 call to Exporter::import |
61 | 2 | 28µs | 2 | 67µs | # spent 38µs (8+30) within Perl::Tidy::BEGIN@61 which was called:
# once (8µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 61 # spent 38µs making 1 call to Perl::Tidy::BEGIN@61
# spent 30µs making 1 call to Exporter::import |
62 | 1 | 2µs | $|++; | ||
63 | |||||
64 | 1 | 600ns | # spent 72µs (8+64) within Perl::Tidy::BEGIN@64 which was called:
# once (8µs+64µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 70 | ||
65 | $VERSION | ||||
66 | @ISA | ||||
67 | @EXPORT | ||||
68 | $missing_file_spec | ||||
69 | $fh_stderr | ||||
70 | 1 | 32µs | 2 | 136µs | }; # spent 72µs making 1 call to Perl::Tidy::BEGIN@64
# spent 64µs making 1 call to vars::import |
71 | |||||
72 | 1 | 11µs | @ISA = qw( Exporter ); | ||
73 | 1 | 800ns | @EXPORT = qw( &perltidy ); | ||
74 | |||||
75 | 2 | 34µs | 2 | 73µs | # spent 42µs (11+31) within Perl::Tidy::BEGIN@75 which was called:
# once (11µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 75 # spent 42µs making 1 call to Perl::Tidy::BEGIN@75
# spent 31µs making 1 call to Exporter::import |
76 | 2 | 133µs | 2 | 1.10ms | # spent 1.00ms (628µs+377µs) within Perl::Tidy::BEGIN@76 which was called:
# once (628µs+377µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 76 # spent 1.00ms making 1 call to Perl::Tidy::BEGIN@76
# spent 100µs making 1 call to Exporter::import |
77 | 2 | 21µs | 2 | 66µs | # spent 38µs (11+28) within Perl::Tidy::BEGIN@77 which was called:
# once (11µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 77 # spent 38µs making 1 call to Perl::Tidy::BEGIN@77
# spent 28µs making 1 call to Exporter::import |
78 | 2 | 109µs | 2 | 1.98ms | # spent 1.96ms (1.84+119µs) within Perl::Tidy::BEGIN@78 which was called:
# once (1.84ms+119µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 78 # spent 1.96ms making 1 call to Perl::Tidy::BEGIN@78
# spent 27µs making 1 call to Exporter::import |
79 | 2 | 53µs | 2 | 135µs | # spent 72µs (9+63) within Perl::Tidy::BEGIN@79 which was called:
# once (9µs+63µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 79 # spent 72µs making 1 call to Perl::Tidy::BEGIN@79
# spent 63µs making 1 call to Exporter::import |
80 | |||||
81 | # spent 23µs (16+7) within Perl::Tidy::BEGIN@81 which was called:
# once (16µs+7µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 83 | ||||
82 | 1 | 22µs | 3 | 7µs | ( $VERSION = q($Id: Tidy.pm,v 1.74 2014/03/28 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker # spent 6µs making 1 call to Perl::Tidy::CORE:subst
# spent 1µs making 2 calls to Perl::Tidy::CORE:substcont, avg 650ns/call |
83 | 1 | 454µs | 1 | 23µs | } # spent 23µs making 1 call to Perl::Tidy::BEGIN@81 |
84 | |||||
85 | sub streamhandle { | ||||
86 | |||||
87 | # given filename and mode (r or w), create an object which: | ||||
88 | # has a 'getline' method if mode='r', and | ||||
89 | # has a 'print' method if mode='w'. | ||||
90 | # The objects also need a 'close' method. | ||||
91 | # | ||||
92 | # How the object is made: | ||||
93 | # | ||||
94 | # if $filename is: Make object using: | ||||
95 | # ---------------- ----------------- | ||||
96 | # '-' (STDIN if mode = 'r', STDOUT if mode='w') | ||||
97 | # string IO::File | ||||
98 | # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) | ||||
99 | # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) | ||||
100 | # object object | ||||
101 | # (check for 'print' method for 'w' mode) | ||||
102 | # (check for 'getline' method for 'r' mode) | ||||
103 | my $ref = ref( my $filename = shift ); | ||||
104 | my $mode = shift; | ||||
105 | my $New; | ||||
106 | my $fh; | ||||
107 | |||||
108 | # handle a reference | ||||
109 | if ($ref) { | ||||
110 | if ( $ref eq 'ARRAY' ) { | ||||
111 | $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; | ||||
112 | } | ||||
113 | elsif ( $ref eq 'SCALAR' ) { | ||||
114 | $New = sub { Perl::Tidy::IOScalar->new(@_) }; | ||||
115 | } | ||||
116 | else { | ||||
117 | |||||
118 | # Accept an object with a getline method for reading. Note: | ||||
119 | # IO::File is built-in and does not respond to the defined | ||||
120 | # operator. If this causes trouble, the check can be | ||||
121 | # skipped and we can just let it crash if there is no | ||||
122 | # getline. | ||||
123 | if ( $mode =~ /[rR]/ ) { | ||||
124 | if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { | ||||
125 | $New = sub { $filename }; | ||||
126 | } | ||||
127 | else { | ||||
128 | $New = sub { undef }; | ||||
129 | confess <<EOM; | ||||
130 | ------------------------------------------------------------------------ | ||||
131 | No 'getline' method is defined for object of class $ref | ||||
132 | Please check your call to Perl::Tidy::perltidy. Trace follows. | ||||
133 | ------------------------------------------------------------------------ | ||||
134 | EOM | ||||
135 | } | ||||
136 | } | ||||
137 | |||||
138 | # Accept an object with a print method for writing. | ||||
139 | # See note above about IO::File | ||||
140 | if ( $mode =~ /[wW]/ ) { | ||||
141 | if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { | ||||
142 | $New = sub { $filename }; | ||||
143 | } | ||||
144 | else { | ||||
145 | $New = sub { undef }; | ||||
146 | confess <<EOM; | ||||
147 | ------------------------------------------------------------------------ | ||||
148 | No 'print' method is defined for object of class $ref | ||||
149 | Please check your call to Perl::Tidy::perltidy. Trace follows. | ||||
150 | ------------------------------------------------------------------------ | ||||
151 | EOM | ||||
152 | } | ||||
153 | } | ||||
154 | } | ||||
155 | } | ||||
156 | |||||
157 | # handle a string | ||||
158 | else { | ||||
159 | if ( $filename eq '-' ) { | ||||
160 | $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } | ||||
161 | } | ||||
162 | else { | ||||
163 | $New = sub { IO::File->new(@_) }; | ||||
164 | } | ||||
165 | } | ||||
166 | $fh = $New->( $filename, $mode ) | ||||
167 | or Warn("Couldn't open file:$filename in mode:$mode : $!\n"); | ||||
168 | return $fh, ( $ref or $filename ); | ||||
169 | } | ||||
170 | |||||
171 | sub find_input_line_ending { | ||||
172 | |||||
173 | # Peek at a file and return first line ending character. | ||||
174 | # Quietly return undef in case of any trouble. | ||||
175 | my ($input_file) = @_; | ||||
176 | my $ending; | ||||
177 | |||||
178 | # silently ignore input from object or stdin | ||||
179 | if ( ref($input_file) || $input_file eq '-' ) { | ||||
180 | return $ending; | ||||
181 | } | ||||
182 | open( INFILE, $input_file ) || return $ending; | ||||
183 | |||||
184 | binmode INFILE; | ||||
185 | my $buf; | ||||
186 | read( INFILE, $buf, 1024 ); | ||||
187 | close INFILE; | ||||
188 | if ( $buf && $buf =~ /([\012\015]+)/ ) { | ||||
189 | my $test = $1; | ||||
190 | |||||
191 | # dos | ||||
192 | if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } | ||||
193 | |||||
194 | # mac | ||||
195 | elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } | ||||
196 | |||||
197 | # unix | ||||
198 | elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } | ||||
199 | |||||
200 | # unknown | ||||
201 | else { } | ||||
202 | } | ||||
203 | |||||
204 | # no ending seen | ||||
205 | else { } | ||||
206 | |||||
207 | return $ending; | ||||
208 | } | ||||
209 | |||||
210 | sub catfile { | ||||
211 | |||||
212 | # concatenate a path and file basename | ||||
213 | # returns undef in case of error | ||||
214 | |||||
215 | 2 | 5.10ms | 1 | 22µs | # spent 22µs within Perl::Tidy::BEGIN@215 which was called:
# once (22µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 215 # spent 22µs making 1 call to Perl::Tidy::BEGIN@215 # spent 2µs executing statements in string eval |
216 | |||||
217 | # use File::Spec if we can | ||||
218 | unless ($missing_file_spec) { | ||||
219 | return File::Spec->catfile(@_); | ||||
220 | } | ||||
221 | |||||
222 | # Perl 5.004 systems may not have File::Spec so we'll make | ||||
223 | # a simple try. We assume File::Basename is available. | ||||
224 | # return undef if not successful. | ||||
225 | my $name = pop @_; | ||||
226 | my $path = join '/', @_; | ||||
227 | my $test_file = $path . $name; | ||||
228 | my ( $test_name, $test_path ) = fileparse($test_file); | ||||
229 | return $test_file if ( $test_name eq $name ); | ||||
230 | return undef if ( $^O eq 'VMS' ); | ||||
231 | |||||
232 | # this should work at least for Windows and Unix: | ||||
233 | $test_file = $path . '/' . $name; | ||||
234 | ( $test_name, $test_path ) = fileparse($test_file); | ||||
235 | return $test_file if ( $test_name eq $name ); | ||||
236 | return undef; | ||||
237 | } | ||||
238 | |||||
239 | # Here is a map of the flow of data from the input source to the output | ||||
240 | # line sink: | ||||
241 | # | ||||
242 | # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> | ||||
243 | # input groups output | ||||
244 | # lines tokens lines of lines lines | ||||
245 | # lines | ||||
246 | # | ||||
247 | # The names correspond to the package names responsible for the unit processes. | ||||
248 | # | ||||
249 | # The overall process is controlled by the "main" package. | ||||
250 | # | ||||
251 | # LineSource is the stream of input lines | ||||
252 | # | ||||
253 | # Tokenizer analyzes a line and breaks it into tokens, peeking ahead | ||||
254 | # if necessary. A token is any section of the input line which should be | ||||
255 | # manipulated as a single entity during formatting. For example, a single | ||||
256 | # ',' character is a token, and so is an entire side comment. It handles | ||||
257 | # the complexities of Perl syntax, such as distinguishing between '<<' as | ||||
258 | # a shift operator and as a here-document, or distinguishing between '/' | ||||
259 | # as a divide symbol and as a pattern delimiter. | ||||
260 | # | ||||
261 | # Formatter inserts and deletes whitespace between tokens, and breaks | ||||
262 | # sequences of tokens at appropriate points as output lines. It bases its | ||||
263 | # decisions on the default rules as modified by any command-line options. | ||||
264 | # | ||||
265 | # VerticalAligner collects groups of lines together and tries to line up | ||||
266 | # certain tokens, such as '=>', '#', and '=' by adding whitespace. | ||||
267 | # | ||||
268 | # FileWriter simply writes lines to the output stream. | ||||
269 | # | ||||
270 | # The Logger package, not shown, records significant events and warning | ||||
271 | # messages. It writes a .LOG file, which may be saved with a | ||||
272 | # '-log' or a '-g' flag. | ||||
273 | |||||
274 | sub perltidy { | ||||
275 | |||||
276 | my %defaults = ( | ||||
277 | argv => undef, | ||||
278 | destination => undef, | ||||
279 | formatter => undef, | ||||
280 | logfile => undef, | ||||
281 | errorfile => undef, | ||||
282 | perltidyrc => undef, | ||||
283 | source => undef, | ||||
284 | stderr => undef, | ||||
285 | dump_options => undef, | ||||
286 | dump_options_type => undef, | ||||
287 | dump_getopt_flags => undef, | ||||
288 | dump_options_category => undef, | ||||
289 | dump_options_range => undef, | ||||
290 | dump_abbreviations => undef, | ||||
291 | prefilter => undef, | ||||
292 | postfilter => undef, | ||||
293 | ); | ||||
294 | |||||
295 | # don't overwrite callers ARGV | ||||
296 | local @ARGV = @ARGV; | ||||
297 | local *STDERR = *STDERR; | ||||
298 | |||||
299 | my %input_hash = @_; | ||||
300 | |||||
301 | if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { | ||||
302 | local $" = ')('; | ||||
303 | my @good_keys = sort keys %defaults; | ||||
304 | @bad_keys = sort @bad_keys; | ||||
305 | confess <<EOM; | ||||
306 | ------------------------------------------------------------------------ | ||||
307 | Unknown perltidy parameter : (@bad_keys) | ||||
308 | perltidy only understands : (@good_keys) | ||||
309 | ------------------------------------------------------------------------ | ||||
310 | |||||
311 | EOM | ||||
312 | } | ||||
313 | |||||
314 | my $get_hash_ref = sub { | ||||
315 | my ($key) = @_; | ||||
316 | my $hash_ref = $input_hash{$key}; | ||||
317 | if ( defined($hash_ref) ) { | ||||
318 | unless ( ref($hash_ref) eq 'HASH' ) { | ||||
319 | my $what = ref($hash_ref); | ||||
320 | my $but_is = | ||||
321 | $what ? "but is ref to $what" : "but is not a reference"; | ||||
322 | croak <<EOM; | ||||
323 | ------------------------------------------------------------------------ | ||||
324 | error in call to perltidy: | ||||
325 | -$key must be reference to HASH $but_is | ||||
326 | ------------------------------------------------------------------------ | ||||
327 | EOM | ||||
328 | } | ||||
329 | } | ||||
330 | return $hash_ref; | ||||
331 | }; | ||||
332 | |||||
333 | %input_hash = ( %defaults, %input_hash ); | ||||
334 | my $argv = $input_hash{'argv'}; | ||||
335 | my $destination_stream = $input_hash{'destination'}; | ||||
336 | my $errorfile_stream = $input_hash{'errorfile'}; | ||||
337 | my $logfile_stream = $input_hash{'logfile'}; | ||||
338 | my $perltidyrc_stream = $input_hash{'perltidyrc'}; | ||||
339 | my $source_stream = $input_hash{'source'}; | ||||
340 | my $stderr_stream = $input_hash{'stderr'}; | ||||
341 | my $user_formatter = $input_hash{'formatter'}; | ||||
342 | my $prefilter = $input_hash{'prefilter'}; | ||||
343 | my $postfilter = $input_hash{'postfilter'}; | ||||
344 | |||||
345 | if ($stderr_stream) { | ||||
346 | ( $fh_stderr, my $stderr_file ) = | ||||
347 | Perl::Tidy::streamhandle( $stderr_stream, 'w' ); | ||||
348 | if ( !$fh_stderr ) { | ||||
349 | croak <<EOM; | ||||
350 | ------------------------------------------------------------------------ | ||||
351 | Unable to redirect STDERR to $stderr_stream | ||||
352 | Please check value of -stderr in call to perltidy | ||||
353 | ------------------------------------------------------------------------ | ||||
354 | EOM | ||||
355 | } | ||||
356 | } | ||||
357 | else { | ||||
358 | $fh_stderr = *STDERR; | ||||
359 | } | ||||
360 | |||||
361 | sub Warn ($) { $fh_stderr->print( $_[0] ); } | ||||
362 | |||||
363 | sub Exit ($) { | ||||
364 | if ( $_[0] ) { goto ERROR_EXIT } | ||||
365 | else { goto NORMAL_EXIT } | ||||
366 | } | ||||
367 | |||||
368 | sub Die ($) { Warn $_[0]; Exit(1); } | ||||
369 | |||||
370 | # extract various dump parameters | ||||
371 | my $dump_options_type = $input_hash{'dump_options_type'}; | ||||
372 | my $dump_options = $get_hash_ref->('dump_options'); | ||||
373 | my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); | ||||
374 | my $dump_options_category = $get_hash_ref->('dump_options_category'); | ||||
375 | my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); | ||||
376 | my $dump_options_range = $get_hash_ref->('dump_options_range'); | ||||
377 | |||||
378 | # validate dump_options_type | ||||
379 | if ( defined($dump_options) ) { | ||||
380 | unless ( defined($dump_options_type) ) { | ||||
381 | $dump_options_type = 'perltidyrc'; | ||||
382 | } | ||||
383 | unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { | ||||
384 | croak <<EOM; | ||||
385 | ------------------------------------------------------------------------ | ||||
386 | Please check value of -dump_options_type in call to perltidy; | ||||
387 | saw: '$dump_options_type' | ||||
388 | expecting: 'perltidyrc' or 'full' | ||||
389 | ------------------------------------------------------------------------ | ||||
390 | EOM | ||||
391 | |||||
392 | } | ||||
393 | } | ||||
394 | else { | ||||
395 | $dump_options_type = ""; | ||||
396 | } | ||||
397 | |||||
398 | if ($user_formatter) { | ||||
399 | |||||
400 | # if the user defines a formatter, there is no output stream, | ||||
401 | # but we need a null stream to keep coding simple | ||||
402 | $destination_stream = Perl::Tidy::DevNull->new(); | ||||
403 | } | ||||
404 | |||||
405 | # see if ARGV is overridden | ||||
406 | if ( defined($argv) ) { | ||||
407 | |||||
408 | my $rargv = ref $argv; | ||||
409 | if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } | ||||
410 | |||||
411 | # ref to ARRAY | ||||
412 | if ($rargv) { | ||||
413 | if ( $rargv eq 'ARRAY' ) { | ||||
414 | @ARGV = @$argv; | ||||
415 | } | ||||
416 | else { | ||||
417 | croak <<EOM; | ||||
418 | ------------------------------------------------------------------------ | ||||
419 | Please check value of -argv in call to perltidy; | ||||
420 | it must be a string or ref to ARRAY but is: $rargv | ||||
421 | ------------------------------------------------------------------------ | ||||
422 | EOM | ||||
423 | } | ||||
424 | } | ||||
425 | |||||
426 | # string | ||||
427 | else { | ||||
428 | my ( $rargv, $msg ) = parse_args($argv); | ||||
429 | if ($msg) { | ||||
430 | Die <<EOM; | ||||
431 | Error parsing this string passed to to perltidy with 'argv': | ||||
432 | $msg | ||||
433 | EOM | ||||
434 | } | ||||
435 | @ARGV = @{$rargv}; | ||||
436 | } | ||||
437 | } | ||||
438 | |||||
439 | my $rpending_complaint; | ||||
440 | $$rpending_complaint = ""; | ||||
441 | my $rpending_logfile_message; | ||||
442 | $$rpending_logfile_message = ""; | ||||
443 | |||||
444 | my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint); | ||||
445 | |||||
446 | # VMS file names are restricted to a 40.40 format, so we append _tdy | ||||
447 | # instead of .tdy, etc. (but see also sub check_vms_filename) | ||||
448 | my $dot; | ||||
449 | my $dot_pattern; | ||||
450 | if ( $^O eq 'VMS' ) { | ||||
451 | $dot = '_'; | ||||
452 | $dot_pattern = '_'; | ||||
453 | } | ||||
454 | else { | ||||
455 | $dot = '.'; | ||||
456 | $dot_pattern = '\.'; # must escape for use in regex | ||||
457 | } | ||||
458 | |||||
459 | #--------------------------------------------------------------- | ||||
460 | # get command line options | ||||
461 | #--------------------------------------------------------------- | ||||
462 | my ( | ||||
463 | $rOpts, $config_file, $rraw_options, | ||||
464 | $saw_extrude, $saw_pbp, $roption_string, | ||||
465 | $rexpansion, $roption_category, $roption_range | ||||
466 | ) | ||||
467 | = process_command_line( | ||||
468 | $perltidyrc_stream, $is_Windows, $Windows_type, | ||||
469 | $rpending_complaint, $dump_options_type, | ||||
470 | ); | ||||
471 | |||||
472 | #--------------------------------------------------------------- | ||||
473 | # Handle requests to dump information | ||||
474 | #--------------------------------------------------------------- | ||||
475 | |||||
476 | # return or exit immediately after all dumps | ||||
477 | my $quit_now = 0; | ||||
478 | |||||
479 | # Getopt parameters and their flags | ||||
480 | if ( defined($dump_getopt_flags) ) { | ||||
481 | $quit_now = 1; | ||||
482 | foreach my $op ( @{$roption_string} ) { | ||||
483 | my $opt = $op; | ||||
484 | my $flag = ""; | ||||
485 | |||||
486 | # Examples: | ||||
487 | # some-option=s | ||||
488 | # some-option=i | ||||
489 | # some-option:i | ||||
490 | # some-option! | ||||
491 | if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) { | ||||
492 | $opt = $1; | ||||
493 | $flag = $2; | ||||
494 | } | ||||
495 | $dump_getopt_flags->{$opt} = $flag; | ||||
496 | } | ||||
497 | } | ||||
498 | |||||
499 | if ( defined($dump_options_category) ) { | ||||
500 | $quit_now = 1; | ||||
501 | %{$dump_options_category} = %{$roption_category}; | ||||
502 | } | ||||
503 | |||||
504 | if ( defined($dump_options_range) ) { | ||||
505 | $quit_now = 1; | ||||
506 | %{$dump_options_range} = %{$roption_range}; | ||||
507 | } | ||||
508 | |||||
509 | if ( defined($dump_abbreviations) ) { | ||||
510 | $quit_now = 1; | ||||
511 | %{$dump_abbreviations} = %{$rexpansion}; | ||||
512 | } | ||||
513 | |||||
514 | if ( defined($dump_options) ) { | ||||
515 | $quit_now = 1; | ||||
516 | %{$dump_options} = %{$rOpts}; | ||||
517 | } | ||||
518 | |||||
519 | Exit 0 if ($quit_now); | ||||
520 | |||||
521 | # make printable string of options for this run as possible diagnostic | ||||
522 | my $readable_options = readable_options( $rOpts, $roption_string ); | ||||
523 | |||||
524 | # dump from command line | ||||
525 | if ( $rOpts->{'dump-options'} ) { | ||||
526 | print STDOUT $readable_options; | ||||
527 | Exit 0; | ||||
528 | } | ||||
529 | |||||
530 | #--------------------------------------------------------------- | ||||
531 | # check parameters and their interactions | ||||
532 | #--------------------------------------------------------------- | ||||
533 | my $tabsize = | ||||
534 | check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); | ||||
535 | |||||
536 | if ($user_formatter) { | ||||
537 | $rOpts->{'format'} = 'user'; | ||||
538 | } | ||||
539 | |||||
540 | # there must be one entry here for every possible format | ||||
541 | my %default_file_extension = ( | ||||
542 | tidy => 'tdy', | ||||
543 | html => 'html', | ||||
544 | user => '', | ||||
545 | ); | ||||
546 | |||||
547 | # be sure we have a valid output format | ||||
548 | unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { | ||||
549 | my $formats = join ' ', | ||||
550 | sort map { "'" . $_ . "'" } keys %default_file_extension; | ||||
551 | my $fmt = $rOpts->{'format'}; | ||||
552 | Die "-format='$fmt' but must be one of: $formats\n"; | ||||
553 | } | ||||
554 | |||||
555 | my $output_extension = make_extension( $rOpts->{'output-file-extension'}, | ||||
556 | $default_file_extension{ $rOpts->{'format'} }, $dot ); | ||||
557 | |||||
558 | # If the backup extension contains a / character then the backup should | ||||
559 | # be deleted when the -b option is used. On older versions of | ||||
560 | # perltidy this will generate an error message due to an illegal | ||||
561 | # file name. | ||||
562 | # | ||||
563 | # A backup file will still be generated but will be deleted | ||||
564 | # at the end. If -bext='/' then this extension will be | ||||
565 | # the default 'bak'. Otherwise it will be whatever characters | ||||
566 | # remains after all '/' characters are removed. For example: | ||||
567 | # -bext extension slashes | ||||
568 | # '/' bak 1 | ||||
569 | # '/delete' delete 1 | ||||
570 | # 'delete/' delete 1 | ||||
571 | # '/dev/null' devnull 2 (Currently not allowed) | ||||
572 | my $bext = $rOpts->{'backup-file-extension'}; | ||||
573 | my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); | ||||
574 | |||||
575 | # At present only one forward slash is allowed. In the future multiple | ||||
576 | # slashes may be allowed to allow for other options | ||||
577 | if ( $delete_backup > 1 ) { | ||||
578 | Die "-bext=$bext contains more than one '/'\n"; | ||||
579 | } | ||||
580 | |||||
581 | my $backup_extension = | ||||
582 | make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); | ||||
583 | |||||
584 | my $html_toc_extension = | ||||
585 | make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); | ||||
586 | |||||
587 | my $html_src_extension = | ||||
588 | make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); | ||||
589 | |||||
590 | # check for -b option; | ||||
591 | # silently ignore unless beautify mode | ||||
592 | my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} | ||||
593 | && $rOpts->{'format'} eq 'tidy'; | ||||
594 | |||||
595 | # Turn off -b with warnings in case of conflicts with other options. | ||||
596 | # NOTE: Do this silently, without warnings, if there is a source or | ||||
597 | # destination stream, or standard output is used. This is because the -b | ||||
598 | # flag may have been in a .perltidyrc file and warnings break | ||||
599 | # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014. | ||||
600 | if ($in_place_modify) { | ||||
601 | if ( $rOpts->{'standard-output'} ) { | ||||
602 | ## my $msg = "Ignoring -b; you may not use -b and -st together"; | ||||
603 | ## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); | ||||
604 | ## Warn "$msg\n"; | ||||
605 | $in_place_modify = 0; | ||||
606 | } | ||||
607 | if ($destination_stream) { | ||||
608 | ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n"; | ||||
609 | $in_place_modify = 0; | ||||
610 | } | ||||
611 | if ( ref($source_stream) ) { | ||||
612 | ##Warn "Ignoring -b; you may not specify a source array and -b together\n"; | ||||
613 | $in_place_modify = 0; | ||||
614 | } | ||||
615 | if ( $rOpts->{'outfile'} ) { | ||||
616 | ##Warn "Ignoring -b; you may not use -b and -o together\n"; | ||||
617 | $in_place_modify = 0; | ||||
618 | } | ||||
619 | if ( defined( $rOpts->{'output-path'} ) ) { | ||||
620 | ##Warn "Ignoring -b; you may not use -b and -opath together\n"; | ||||
621 | $in_place_modify = 0; | ||||
622 | } | ||||
623 | } | ||||
624 | |||||
625 | Perl::Tidy::Formatter::check_options($rOpts); | ||||
626 | if ( $rOpts->{'format'} eq 'html' ) { | ||||
627 | Perl::Tidy::HtmlWriter->check_options($rOpts); | ||||
628 | } | ||||
629 | |||||
630 | # make the pattern of file extensions that we shouldn't touch | ||||
631 | my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; | ||||
632 | if ($output_extension) { | ||||
633 | my $ext = quotemeta($output_extension); | ||||
634 | $forbidden_file_extensions .= "|$ext"; | ||||
635 | } | ||||
636 | if ( $in_place_modify && $backup_extension ) { | ||||
637 | my $ext = quotemeta($backup_extension); | ||||
638 | $forbidden_file_extensions .= "|$ext"; | ||||
639 | } | ||||
640 | $forbidden_file_extensions .= ')$'; | ||||
641 | |||||
642 | # Create a diagnostics object if requested; | ||||
643 | # This is only useful for code development | ||||
644 | my $diagnostics_object = undef; | ||||
645 | if ( $rOpts->{'DIAGNOSTICS'} ) { | ||||
646 | $diagnostics_object = Perl::Tidy::Diagnostics->new(); | ||||
647 | } | ||||
648 | |||||
649 | # no filenames should be given if input is from an array | ||||
650 | if ($source_stream) { | ||||
651 | if ( @ARGV > 0 ) { | ||||
652 | Die | ||||
653 | "You may not specify any filenames when a source array is given\n"; | ||||
654 | } | ||||
655 | |||||
656 | # we'll stuff the source array into ARGV | ||||
657 | unshift( @ARGV, $source_stream ); | ||||
658 | |||||
659 | # No special treatment for source stream which is a filename. | ||||
660 | # This will enable checks for binary files and other bad stuff. | ||||
661 | $source_stream = undef unless ref($source_stream); | ||||
662 | } | ||||
663 | |||||
664 | # use stdin by default if no source array and no args | ||||
665 | else { | ||||
666 | unshift( @ARGV, '-' ) unless @ARGV; | ||||
667 | } | ||||
668 | |||||
669 | #--------------------------------------------------------------- | ||||
670 | # Ready to go... | ||||
671 | # main loop to process all files in argument list | ||||
672 | #--------------------------------------------------------------- | ||||
673 | my $number_of_files = @ARGV; | ||||
674 | my $formatter = undef; | ||||
675 | my $tokenizer = undef; | ||||
676 | while ( my $input_file = shift @ARGV ) { | ||||
677 | my $fileroot; | ||||
678 | my $input_file_permissions; | ||||
679 | |||||
680 | #--------------------------------------------------------------- | ||||
681 | # prepare this input stream | ||||
682 | #--------------------------------------------------------------- | ||||
683 | if ($source_stream) { | ||||
684 | $fileroot = "perltidy"; | ||||
685 | |||||
686 | # If the source is from an array or string, then .LOG output | ||||
687 | # is only possible if a logfile stream is specified. This prevents | ||||
688 | # unexpected perltidy.LOG files. | ||||
689 | if ( !defined($logfile_stream) ) { | ||||
690 | $logfile_stream = Perl::Tidy::DevNull->new(); | ||||
691 | } | ||||
692 | } | ||||
693 | elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN | ||||
694 | $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc | ||||
695 | $in_place_modify = 0; | ||||
696 | } | ||||
697 | else { | ||||
698 | $fileroot = $input_file; | ||||
699 | unless ( -e $input_file ) { | ||||
700 | |||||
701 | # file doesn't exist - check for a file glob | ||||
702 | if ( $input_file =~ /([\?\*\[\{])/ ) { | ||||
703 | |||||
704 | # Windows shell may not remove quotes, so do it | ||||
705 | my $input_file = $input_file; | ||||
706 | if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } | ||||
707 | if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } | ||||
708 | my $pattern = fileglob_to_re($input_file); | ||||
709 | ##eval "/$pattern/"; | ||||
710 | if ( !$@ && opendir( DIR, './' ) ) { | ||||
711 | my @files = | ||||
712 | grep { /$pattern/ && !-d $_ } readdir(DIR); | ||||
713 | closedir(DIR); | ||||
714 | if (@files) { | ||||
715 | unshift @ARGV, @files; | ||||
716 | next; | ||||
717 | } | ||||
718 | } | ||||
719 | } | ||||
720 | Warn "skipping file: '$input_file': no matches found\n"; | ||||
721 | next; | ||||
722 | } | ||||
723 | |||||
724 | unless ( -f $input_file ) { | ||||
725 | Warn "skipping file: $input_file: not a regular file\n"; | ||||
726 | next; | ||||
727 | } | ||||
728 | |||||
729 | # As a safety precaution, skip zero length files. | ||||
730 | # If for example a source file got clobbered somehow, | ||||
731 | # the old .tdy or .bak files might still exist so we | ||||
732 | # shouldn't overwrite them with zero length files. | ||||
733 | unless ( -s $input_file ) { | ||||
734 | Warn "skipping file: $input_file: Zero size\n"; | ||||
735 | next; | ||||
736 | } | ||||
737 | |||||
738 | unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { | ||||
739 | Warn | ||||
740 | "skipping file: $input_file: Non-text (override with -f)\n"; | ||||
741 | next; | ||||
742 | } | ||||
743 | |||||
744 | # we should have a valid filename now | ||||
745 | $fileroot = $input_file; | ||||
746 | $input_file_permissions = ( stat $input_file )[2] & 07777; | ||||
747 | |||||
748 | if ( $^O eq 'VMS' ) { | ||||
749 | ( $fileroot, $dot ) = check_vms_filename($fileroot); | ||||
750 | } | ||||
751 | |||||
752 | # add option to change path here | ||||
753 | if ( defined( $rOpts->{'output-path'} ) ) { | ||||
754 | |||||
755 | my ( $base, $old_path ) = fileparse($fileroot); | ||||
756 | my $new_path = $rOpts->{'output-path'}; | ||||
757 | unless ( -d $new_path ) { | ||||
758 | unless ( mkdir $new_path, 0777 ) { | ||||
759 | Die "unable to create directory $new_path: $!\n"; | ||||
760 | } | ||||
761 | } | ||||
762 | my $path = $new_path; | ||||
763 | $fileroot = catfile( $path, $base ); | ||||
764 | unless ($fileroot) { | ||||
765 | Die <<EOM; | ||||
766 | ------------------------------------------------------------------------ | ||||
767 | Problem combining $new_path and $base to make a filename; check -opath | ||||
768 | ------------------------------------------------------------------------ | ||||
769 | EOM | ||||
770 | } | ||||
771 | } | ||||
772 | } | ||||
773 | |||||
774 | # Skip files with same extension as the output files because | ||||
775 | # this can lead to a messy situation with files like | ||||
776 | # script.tdy.tdy.tdy ... or worse problems ... when you | ||||
777 | # rerun perltidy over and over with wildcard input. | ||||
778 | if ( | ||||
779 | !$source_stream | ||||
780 | && ( $input_file =~ /$forbidden_file_extensions/o | ||||
781 | || $input_file eq 'DIAGNOSTICS' ) | ||||
782 | ) | ||||
783 | { | ||||
784 | Warn "skipping file: $input_file: wrong extension\n"; | ||||
785 | next; | ||||
786 | } | ||||
787 | |||||
788 | # the 'source_object' supplies a method to read the input file | ||||
789 | my $source_object = | ||||
790 | Perl::Tidy::LineSource->new( $input_file, $rOpts, | ||||
791 | $rpending_logfile_message ); | ||||
792 | next unless ($source_object); | ||||
793 | |||||
794 | # Prefilters and postfilters: The prefilter is a code reference | ||||
795 | # that will be applied to the source before tidying, and the | ||||
796 | # postfilter is a code reference to the result before outputting. | ||||
797 | if ($prefilter) { | ||||
798 | my $buf = ''; | ||||
799 | while ( my $line = $source_object->get_line() ) { | ||||
800 | $buf .= $line; | ||||
801 | } | ||||
802 | $buf = $prefilter->($buf); | ||||
803 | |||||
804 | $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, | ||||
805 | $rpending_logfile_message ); | ||||
806 | } | ||||
807 | |||||
808 | # register this file name with the Diagnostics package | ||||
809 | $diagnostics_object->set_input_file($input_file) | ||||
810 | if $diagnostics_object; | ||||
811 | |||||
812 | #--------------------------------------------------------------- | ||||
813 | # prepare the output stream | ||||
814 | #--------------------------------------------------------------- | ||||
815 | my $output_file = undef; | ||||
816 | my $actual_output_extension; | ||||
817 | |||||
818 | if ( $rOpts->{'outfile'} ) { | ||||
819 | |||||
820 | if ( $number_of_files <= 1 ) { | ||||
821 | |||||
822 | if ( $rOpts->{'standard-output'} ) { | ||||
823 | my $msg = "You may not use -o and -st together"; | ||||
824 | $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); | ||||
825 | Die "$msg\n"; | ||||
826 | } | ||||
827 | elsif ($destination_stream) { | ||||
828 | Die | ||||
829 | "You may not specify a destination array and -o together\n"; | ||||
830 | } | ||||
831 | elsif ( defined( $rOpts->{'output-path'} ) ) { | ||||
832 | Die "You may not specify -o and -opath together\n"; | ||||
833 | } | ||||
834 | elsif ( defined( $rOpts->{'output-file-extension'} ) ) { | ||||
835 | Die "You may not specify -o and -oext together\n"; | ||||
836 | } | ||||
837 | $output_file = $rOpts->{outfile}; | ||||
838 | |||||
839 | # make sure user gives a file name after -o | ||||
840 | if ( $output_file =~ /^-/ ) { | ||||
841 | Die "You must specify a valid filename after -o\n"; | ||||
842 | } | ||||
843 | |||||
844 | # do not overwrite input file with -o | ||||
845 | if ( defined($input_file_permissions) | ||||
846 | && ( $output_file eq $input_file ) ) | ||||
847 | { | ||||
848 | Die "Use 'perltidy -b $input_file' to modify in-place\n"; | ||||
849 | } | ||||
850 | } | ||||
851 | else { | ||||
852 | Die "You may not use -o with more than one input file\n"; | ||||
853 | } | ||||
854 | } | ||||
855 | elsif ( $rOpts->{'standard-output'} ) { | ||||
856 | if ($destination_stream) { | ||||
857 | my $msg = | ||||
858 | "You may not specify a destination array and -st together\n"; | ||||
859 | $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); | ||||
860 | Die "$msg\n"; | ||||
861 | } | ||||
862 | $output_file = '-'; | ||||
863 | |||||
864 | if ( $number_of_files <= 1 ) { | ||||
865 | } | ||||
866 | else { | ||||
867 | Die "You may not use -st with more than one input file\n"; | ||||
868 | } | ||||
869 | } | ||||
870 | elsif ($destination_stream) { | ||||
871 | $output_file = $destination_stream; | ||||
872 | } | ||||
873 | elsif ($source_stream) { # source but no destination goes to stdout | ||||
874 | $output_file = '-'; | ||||
875 | } | ||||
876 | elsif ( $input_file eq '-' ) { | ||||
877 | $output_file = '-'; | ||||
878 | } | ||||
879 | else { | ||||
880 | if ($in_place_modify) { | ||||
881 | $output_file = IO::File->new_tmpfile() | ||||
882 | or Die "cannot open temp file for -b option: $!\n"; | ||||
883 | } | ||||
884 | else { | ||||
885 | $actual_output_extension = $output_extension; | ||||
886 | $output_file = $fileroot . $output_extension; | ||||
887 | } | ||||
888 | } | ||||
889 | |||||
890 | # the 'sink_object' knows how to write the output file | ||||
891 | my $tee_file = $fileroot . $dot . "TEE"; | ||||
892 | |||||
893 | my $line_separator = $rOpts->{'output-line-ending'}; | ||||
894 | if ( $rOpts->{'preserve-line-endings'} ) { | ||||
895 | $line_separator = find_input_line_ending($input_file); | ||||
896 | } | ||||
897 | |||||
898 | # Eventually all I/O may be done with binmode, but for now it is | ||||
899 | # only done when a user requests a particular line separator | ||||
900 | # through the -ple or -ole flags | ||||
901 | my $binmode = 0; | ||||
902 | if ( defined($line_separator) ) { $binmode = 1 } | ||||
903 | else { $line_separator = "\n" } | ||||
904 | |||||
905 | my ( $sink_object, $postfilter_buffer ); | ||||
906 | if ($postfilter) { | ||||
907 | $sink_object = | ||||
908 | Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, | ||||
909 | $line_separator, $rOpts, $rpending_logfile_message, $binmode ); | ||||
910 | } | ||||
911 | else { | ||||
912 | $sink_object = | ||||
913 | Perl::Tidy::LineSink->new( $output_file, $tee_file, | ||||
914 | $line_separator, $rOpts, $rpending_logfile_message, $binmode ); | ||||
915 | } | ||||
916 | |||||
917 | #--------------------------------------------------------------- | ||||
918 | # initialize the error logger for this file | ||||
919 | #--------------------------------------------------------------- | ||||
920 | my $warning_file = $fileroot . $dot . "ERR"; | ||||
921 | if ($errorfile_stream) { $warning_file = $errorfile_stream } | ||||
922 | my $log_file = $fileroot . $dot . "LOG"; | ||||
923 | if ($logfile_stream) { $log_file = $logfile_stream } | ||||
924 | |||||
925 | my $logger_object = | ||||
926 | Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, | ||||
927 | $fh_stderr, $saw_extrude ); | ||||
928 | write_logfile_header( | ||||
929 | $rOpts, $logger_object, $config_file, | ||||
930 | $rraw_options, $Windows_type, $readable_options, | ||||
931 | ); | ||||
932 | if ($$rpending_logfile_message) { | ||||
933 | $logger_object->write_logfile_entry($$rpending_logfile_message); | ||||
934 | } | ||||
935 | if ($$rpending_complaint) { | ||||
936 | $logger_object->complain($$rpending_complaint); | ||||
937 | } | ||||
938 | |||||
939 | #--------------------------------------------------------------- | ||||
940 | # initialize the debug object, if any | ||||
941 | #--------------------------------------------------------------- | ||||
942 | my $debugger_object = undef; | ||||
943 | if ( $rOpts->{DEBUG} ) { | ||||
944 | $debugger_object = | ||||
945 | Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); | ||||
946 | } | ||||
947 | |||||
948 | #--------------------------------------------------------------- | ||||
949 | # loop over iterations for one source stream | ||||
950 | #--------------------------------------------------------------- | ||||
951 | |||||
952 | # We will do a convergence test if 3 or more iterations are allowed. | ||||
953 | # It would be pointless for fewer because we have to make at least | ||||
954 | # two passes before we can see if we are converged, and the test | ||||
955 | # would just slow things down. | ||||
956 | my $max_iterations = $rOpts->{'iterations'}; | ||||
957 | my $convergence_log_message; | ||||
958 | my %saw_md5; | ||||
959 | my $do_convergence_test = $max_iterations > 2; | ||||
960 | if ($do_convergence_test) { | ||||
961 | eval "use Digest::MD5 qw(md5_hex)"; | ||||
962 | $do_convergence_test = !$@; | ||||
963 | |||||
964 | # Trying to avoid problems with ancient versions of perl because | ||||
965 | # I don't know in which version number utf8::encode was introduced. | ||||
966 | eval { my $string = "perltidy"; utf8::encode($string) }; | ||||
967 | $do_convergence_test = $do_convergence_test && !$@; | ||||
968 | } | ||||
969 | |||||
970 | # save objects to allow redirecting output during iterations | ||||
971 | my $sink_object_final = $sink_object; | ||||
972 | my $debugger_object_final = $debugger_object; | ||||
973 | my $logger_object_final = $logger_object; | ||||
974 | |||||
975 | for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { | ||||
976 | |||||
977 | # send output stream to temp buffers until last iteration | ||||
978 | my $sink_buffer; | ||||
979 | if ( $iter < $max_iterations ) { | ||||
980 | $sink_object = | ||||
981 | Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, | ||||
982 | $line_separator, $rOpts, $rpending_logfile_message, | ||||
983 | $binmode ); | ||||
984 | } | ||||
985 | else { | ||||
986 | $sink_object = $sink_object_final; | ||||
987 | } | ||||
988 | |||||
989 | # Save logger, debugger output only on pass 1 because: | ||||
990 | # (1) line number references must be to the starting | ||||
991 | # source, not an intermediate result, and | ||||
992 | # (2) we need to know if there are errors so we can stop the | ||||
993 | # iterations early if necessary. | ||||
994 | if ( $iter > 1 ) { | ||||
995 | $debugger_object = undef; | ||||
996 | $logger_object = undef; | ||||
997 | } | ||||
998 | |||||
999 | #------------------------------------------------------------ | ||||
1000 | # create a formatter for this file : html writer or | ||||
1001 | # pretty printer | ||||
1002 | #------------------------------------------------------------ | ||||
1003 | |||||
1004 | # we have to delete any old formatter because, for safety, | ||||
1005 | # the formatter will check to see that there is only one. | ||||
1006 | $formatter = undef; | ||||
1007 | |||||
1008 | if ($user_formatter) { | ||||
1009 | $formatter = $user_formatter; | ||||
1010 | } | ||||
1011 | elsif ( $rOpts->{'format'} eq 'html' ) { | ||||
1012 | $formatter = | ||||
1013 | Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, | ||||
1014 | $actual_output_extension, $html_toc_extension, | ||||
1015 | $html_src_extension ); | ||||
1016 | } | ||||
1017 | elsif ( $rOpts->{'format'} eq 'tidy' ) { | ||||
1018 | $formatter = Perl::Tidy::Formatter->new( | ||||
1019 | logger_object => $logger_object, | ||||
1020 | diagnostics_object => $diagnostics_object, | ||||
1021 | sink_object => $sink_object, | ||||
1022 | ); | ||||
1023 | } | ||||
1024 | else { | ||||
1025 | Die "I don't know how to do -format=$rOpts->{'format'}\n"; | ||||
1026 | } | ||||
1027 | |||||
1028 | unless ($formatter) { | ||||
1029 | Die "Unable to continue with $rOpts->{'format'} formatting\n"; | ||||
1030 | } | ||||
1031 | |||||
1032 | #--------------------------------------------------------------- | ||||
1033 | # create the tokenizer for this file | ||||
1034 | #--------------------------------------------------------------- | ||||
1035 | $tokenizer = undef; # must destroy old tokenizer | ||||
1036 | $tokenizer = Perl::Tidy::Tokenizer->new( | ||||
1037 | source_object => $source_object, | ||||
1038 | logger_object => $logger_object, | ||||
1039 | debugger_object => $debugger_object, | ||||
1040 | diagnostics_object => $diagnostics_object, | ||||
1041 | tabsize => $tabsize, | ||||
1042 | |||||
1043 | starting_level => $rOpts->{'starting-indentation-level'}, | ||||
1044 | indent_columns => $rOpts->{'indent-columns'}, | ||||
1045 | look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, | ||||
1046 | look_for_autoloader => $rOpts->{'look-for-autoloader'}, | ||||
1047 | look_for_selfloader => $rOpts->{'look-for-selfloader'}, | ||||
1048 | trim_qw => $rOpts->{'trim-qw'}, | ||||
1049 | |||||
1050 | continuation_indentation => | ||||
1051 | $rOpts->{'continuation-indentation'}, | ||||
1052 | outdent_labels => $rOpts->{'outdent-labels'}, | ||||
1053 | ); | ||||
1054 | |||||
1055 | #--------------------------------------------------------------- | ||||
1056 | # now we can do it | ||||
1057 | #--------------------------------------------------------------- | ||||
1058 | process_this_file( $tokenizer, $formatter ); | ||||
1059 | |||||
1060 | #--------------------------------------------------------------- | ||||
1061 | # close the input source and report errors | ||||
1062 | #--------------------------------------------------------------- | ||||
1063 | $source_object->close_input_file(); | ||||
1064 | |||||
1065 | # line source for next iteration (if any) comes from the current | ||||
1066 | # temporary output buffer | ||||
1067 | if ( $iter < $max_iterations ) { | ||||
1068 | |||||
1069 | $sink_object->close_output_file(); | ||||
1070 | $source_object = | ||||
1071 | Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, | ||||
1072 | $rpending_logfile_message ); | ||||
1073 | |||||
1074 | # stop iterations if errors or converged | ||||
1075 | my $stop_now = $logger_object->{_warning_count}; | ||||
1076 | if ($stop_now) { | ||||
1077 | $convergence_log_message = <<EOM; | ||||
1078 | Stopping iterations because of errors. | ||||
1079 | EOM | ||||
1080 | } | ||||
1081 | elsif ($do_convergence_test) { | ||||
1082 | |||||
1083 | # Patch for [rt.cpan.org #88020] | ||||
1084 | # Use utf8::encode since md5_hex() only operates on bytes. | ||||
1085 | my $digest = md5_hex( utf8::encode($sink_buffer) ); | ||||
1086 | if ( !$saw_md5{$digest} ) { | ||||
1087 | $saw_md5{$digest} = $iter; | ||||
1088 | } | ||||
1089 | else { | ||||
1090 | |||||
1091 | # Deja vu, stop iterating | ||||
1092 | $stop_now = 1; | ||||
1093 | my $iterm = $iter - 1; | ||||
1094 | if ( $saw_md5{$digest} != $iterm ) { | ||||
1095 | |||||
1096 | # Blinking (oscillating) between two stable | ||||
1097 | # end states. This has happened in the past | ||||
1098 | # but at present there are no known instances. | ||||
1099 | $convergence_log_message = <<EOM; | ||||
1100 | Blinking. Output for iteration $iter same as for $saw_md5{$digest}. | ||||
1101 | EOM | ||||
1102 | $diagnostics_object->write_diagnostics( | ||||
1103 | $convergence_log_message) | ||||
1104 | if $diagnostics_object; | ||||
1105 | } | ||||
1106 | else { | ||||
1107 | $convergence_log_message = <<EOM; | ||||
1108 | Converged. Output for iteration $iter same as for iter $iterm. | ||||
1109 | EOM | ||||
1110 | $diagnostics_object->write_diagnostics( | ||||
1111 | $convergence_log_message) | ||||
1112 | if $diagnostics_object && $iterm > 2; | ||||
1113 | } | ||||
1114 | } | ||||
1115 | } ## end if ($do_convergence_test) | ||||
1116 | |||||
1117 | if ($stop_now) { | ||||
1118 | |||||
1119 | # we are stopping the iterations early; | ||||
1120 | # copy the output stream to its final destination | ||||
1121 | $sink_object = $sink_object_final; | ||||
1122 | while ( my $line = $source_object->get_line() ) { | ||||
1123 | $sink_object->write_line($line); | ||||
1124 | } | ||||
1125 | $source_object->close_input_file(); | ||||
1126 | last; | ||||
1127 | } | ||||
1128 | } ## end if ( $iter < $max_iterations) | ||||
1129 | } # end loop over iterations for one source file | ||||
1130 | |||||
1131 | # restore objects which have been temporarily undefined | ||||
1132 | # for second and higher iterations | ||||
1133 | $debugger_object = $debugger_object_final; | ||||
1134 | $logger_object = $logger_object_final; | ||||
1135 | |||||
1136 | $logger_object->write_logfile_entry($convergence_log_message) | ||||
1137 | if $convergence_log_message; | ||||
1138 | |||||
1139 | #--------------------------------------------------------------- | ||||
1140 | # Perform any postfilter operation | ||||
1141 | #--------------------------------------------------------------- | ||||
1142 | if ($postfilter) { | ||||
1143 | $sink_object->close_output_file(); | ||||
1144 | $sink_object = | ||||
1145 | Perl::Tidy::LineSink->new( $output_file, $tee_file, | ||||
1146 | $line_separator, $rOpts, $rpending_logfile_message, $binmode ); | ||||
1147 | my $buf = $postfilter->($postfilter_buffer); | ||||
1148 | $source_object = | ||||
1149 | Perl::Tidy::LineSource->new( \$buf, $rOpts, | ||||
1150 | $rpending_logfile_message ); | ||||
1151 | while ( my $line = $source_object->get_line() ) { | ||||
1152 | $sink_object->write_line($line); | ||||
1153 | } | ||||
1154 | $source_object->close_input_file(); | ||||
1155 | } | ||||
1156 | |||||
1157 | # Save names of the input and output files for syntax check | ||||
1158 | my $ifname = $input_file; | ||||
1159 | my $ofname = $output_file; | ||||
1160 | |||||
1161 | #--------------------------------------------------------------- | ||||
1162 | # handle the -b option (backup and modify in-place) | ||||
1163 | #--------------------------------------------------------------- | ||||
1164 | if ($in_place_modify) { | ||||
1165 | unless ( -f $input_file ) { | ||||
1166 | |||||
1167 | # oh, oh, no real file to backup .. | ||||
1168 | # shouldn't happen because of numerous preliminary checks | ||||
1169 | Die | ||||
1170 | "problem with -b backing up input file '$input_file': not a file\n"; | ||||
1171 | } | ||||
1172 | my $backup_name = $input_file . $backup_extension; | ||||
1173 | if ( -f $backup_name ) { | ||||
1174 | unlink($backup_name) | ||||
1175 | or Die | ||||
1176 | "unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; | ||||
1177 | } | ||||
1178 | |||||
1179 | # backup the input file | ||||
1180 | # we use copy for symlinks, move for regular files | ||||
1181 | if ( -l $input_file ) { | ||||
1182 | File::Copy::copy( $input_file, $backup_name ) | ||||
1183 | or Die "File::Copy failed trying to backup source: $!"; | ||||
1184 | } | ||||
1185 | else { | ||||
1186 | rename( $input_file, $backup_name ) | ||||
1187 | or Die | ||||
1188 | "problem renaming $input_file to $backup_name for -b option: $!\n"; | ||||
1189 | } | ||||
1190 | $ifname = $backup_name; | ||||
1191 | |||||
1192 | # copy the output to the original input file | ||||
1193 | # NOTE: it would be nice to just close $output_file and use | ||||
1194 | # File::Copy::copy here, but in this case $output_file is the | ||||
1195 | # handle of an open nameless temporary file so we would lose | ||||
1196 | # everything if we closed it. | ||||
1197 | seek( $output_file, 0, 0 ) | ||||
1198 | or Die "unable to rewind a temporary file for -b option: $!\n"; | ||||
1199 | my $fout = IO::File->new("> $input_file") | ||||
1200 | or Die | ||||
1201 | "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"; | ||||
1202 | binmode $fout; | ||||
1203 | my $line; | ||||
1204 | while ( $line = $output_file->getline() ) { | ||||
1205 | $fout->print($line); | ||||
1206 | } | ||||
1207 | $fout->close(); | ||||
1208 | $output_file = $input_file; | ||||
1209 | $ofname = $input_file; | ||||
1210 | } | ||||
1211 | |||||
1212 | #--------------------------------------------------------------- | ||||
1213 | # clean up and report errors | ||||
1214 | #--------------------------------------------------------------- | ||||
1215 | $sink_object->close_output_file() if $sink_object; | ||||
1216 | $debugger_object->close_debug_file() if $debugger_object; | ||||
1217 | |||||
1218 | # set output file permissions | ||||
1219 | if ( $output_file && -f $output_file && !-l $output_file ) { | ||||
1220 | if ($input_file_permissions) { | ||||
1221 | |||||
1222 | # give output script same permissions as input script, but | ||||
1223 | # make it user-writable or else we can't run perltidy again. | ||||
1224 | # Thus we retain whatever executable flags were set. | ||||
1225 | if ( $rOpts->{'format'} eq 'tidy' ) { | ||||
1226 | chmod( $input_file_permissions | 0600, $output_file ); | ||||
1227 | } | ||||
1228 | |||||
1229 | # else use default permissions for html and any other format | ||||
1230 | } | ||||
1231 | } | ||||
1232 | |||||
1233 | #--------------------------------------------------------------- | ||||
1234 | # Do syntax check if requested and possible | ||||
1235 | #--------------------------------------------------------------- | ||||
1236 | my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes | ||||
1237 | if ( $logger_object | ||||
1238 | && $rOpts->{'check-syntax'} | ||||
1239 | && $ifname | ||||
1240 | && $ofname ) | ||||
1241 | { | ||||
1242 | $infile_syntax_ok = | ||||
1243 | check_syntax( $ifname, $ofname, $logger_object, $rOpts ); | ||||
1244 | } | ||||
1245 | |||||
1246 | #--------------------------------------------------------------- | ||||
1247 | # remove the original file for in-place modify as follows: | ||||
1248 | # $delete_backup=0 never | ||||
1249 | # $delete_backup=1 only if no errors | ||||
1250 | # $delete_backup>1 always : NOT ALLOWED, too risky, see above | ||||
1251 | #--------------------------------------------------------------- | ||||
1252 | if ( $in_place_modify | ||||
1253 | && $delete_backup | ||||
1254 | && -f $ifname | ||||
1255 | && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) | ||||
1256 | { | ||||
1257 | |||||
1258 | # As an added safety precaution, do not delete the source file | ||||
1259 | # if its size has dropped from positive to zero, since this | ||||
1260 | # could indicate a disaster of some kind, including a hardware | ||||
1261 | # failure. Actually, this could happen if you had a file of | ||||
1262 | # all comments (or pod) and deleted everything with -dac (-dap) | ||||
1263 | # for some reason. | ||||
1264 | if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { | ||||
1265 | Warn( | ||||
1266 | "output file '$output_file' missing or zero length; original '$ifname' not deleted\n" | ||||
1267 | ); | ||||
1268 | } | ||||
1269 | else { | ||||
1270 | unlink($ifname) | ||||
1271 | or Die | ||||
1272 | "unable to remove previous '$ifname' for -b option; check permissions: $!\n"; | ||||
1273 | } | ||||
1274 | } | ||||
1275 | |||||
1276 | $logger_object->finish( $infile_syntax_ok, $formatter ) | ||||
1277 | if $logger_object; | ||||
1278 | } # end of main loop to process all files | ||||
1279 | |||||
1280 | NORMAL_EXIT: | ||||
1281 | return 0; | ||||
1282 | |||||
1283 | ERROR_EXIT: | ||||
1284 | return 1; | ||||
1285 | } # end of main program perltidy | ||||
1286 | |||||
1287 | sub get_stream_as_named_file { | ||||
1288 | |||||
1289 | # Return the name of a file containing a stream of data, creating | ||||
1290 | # a temporary file if necessary. | ||||
1291 | # Given: | ||||
1292 | # $stream - the name of a file or stream | ||||
1293 | # Returns: | ||||
1294 | # $fname = name of file if possible, or undef | ||||
1295 | # $if_tmpfile = true if temp file, undef if not temp file | ||||
1296 | # | ||||
1297 | # This routine is needed for passing actual files to Perl for | ||||
1298 | # a syntax check. | ||||
1299 | my ($stream) = @_; | ||||
1300 | my $is_tmpfile; | ||||
1301 | my $fname; | ||||
1302 | if ($stream) { | ||||
1303 | if ( ref($stream) ) { | ||||
1304 | my ( $fh_stream, $fh_name ) = | ||||
1305 | Perl::Tidy::streamhandle( $stream, 'r' ); | ||||
1306 | if ($fh_stream) { | ||||
1307 | my ( $fout, $tmpnam ) = tempfile(); | ||||
1308 | if ($fout) { | ||||
1309 | $fname = $tmpnam; | ||||
1310 | $is_tmpfile = 1; | ||||
1311 | binmode $fout; | ||||
1312 | while ( my $line = $fh_stream->getline() ) { | ||||
1313 | $fout->print($line); | ||||
1314 | } | ||||
1315 | $fout->close(); | ||||
1316 | } | ||||
1317 | $fh_stream->close(); | ||||
1318 | } | ||||
1319 | } | ||||
1320 | elsif ( $stream ne '-' && -f $stream ) { | ||||
1321 | $fname = $stream; | ||||
1322 | } | ||||
1323 | } | ||||
1324 | return ( $fname, $is_tmpfile ); | ||||
1325 | } | ||||
1326 | |||||
1327 | sub fileglob_to_re { | ||||
1328 | |||||
1329 | # modified (corrected) from version in find2perl | ||||
1330 | my $x = shift; | ||||
1331 | $x =~ s#([./^\$()])#\\$1#g; # escape special characters | ||||
1332 | $x =~ s#\*#.*#g; # '*' -> '.*' | ||||
1333 | $x =~ s#\?#.#g; # '?' -> '.' | ||||
1334 | "^$x\\z"; # match whole word | ||||
1335 | } | ||||
1336 | |||||
1337 | sub make_extension { | ||||
1338 | |||||
1339 | # Make a file extension, including any leading '.' if necessary | ||||
1340 | # The '.' may actually be an '_' under VMS | ||||
1341 | my ( $extension, $default, $dot ) = @_; | ||||
1342 | |||||
1343 | # Use the default if none specified | ||||
1344 | $extension = $default unless ($extension); | ||||
1345 | |||||
1346 | # Only extensions with these leading characters get a '.' | ||||
1347 | # This rule gives the user some freedom | ||||
1348 | if ( $extension =~ /^[a-zA-Z0-9]/ ) { | ||||
1349 | $extension = $dot . $extension; | ||||
1350 | } | ||||
1351 | return $extension; | ||||
1352 | } | ||||
1353 | |||||
1354 | sub write_logfile_header { | ||||
1355 | my ( | ||||
1356 | $rOpts, $logger_object, $config_file, | ||||
1357 | $rraw_options, $Windows_type, $readable_options | ||||
1358 | ) = @_; | ||||
1359 | $logger_object->write_logfile_entry( | ||||
1360 | "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" | ||||
1361 | ); | ||||
1362 | if ($Windows_type) { | ||||
1363 | $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); | ||||
1364 | } | ||||
1365 | my $options_string = join( ' ', @$rraw_options ); | ||||
1366 | |||||
1367 | if ($config_file) { | ||||
1368 | $logger_object->write_logfile_entry( | ||||
1369 | "Found Configuration File >>> $config_file \n"); | ||||
1370 | } | ||||
1371 | $logger_object->write_logfile_entry( | ||||
1372 | "Configuration and command line parameters for this run:\n"); | ||||
1373 | $logger_object->write_logfile_entry("$options_string\n"); | ||||
1374 | |||||
1375 | if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { | ||||
1376 | $rOpts->{'logfile'} = 1; # force logfile to be saved | ||||
1377 | $logger_object->write_logfile_entry( | ||||
1378 | "Final parameter set for this run\n"); | ||||
1379 | $logger_object->write_logfile_entry( | ||||
1380 | "------------------------------------\n"); | ||||
1381 | |||||
1382 | $logger_object->write_logfile_entry($readable_options); | ||||
1383 | |||||
1384 | $logger_object->write_logfile_entry( | ||||
1385 | "------------------------------------\n"); | ||||
1386 | } | ||||
1387 | $logger_object->write_logfile_entry( | ||||
1388 | "To find error messages search for 'WARNING' with your editor\n"); | ||||
1389 | } | ||||
1390 | |||||
1391 | sub generate_options { | ||||
1392 | |||||
1393 | ###################################################################### | ||||
1394 | # Generate and return references to: | ||||
1395 | # @option_string - the list of options to be passed to Getopt::Long | ||||
1396 | # @defaults - the list of default options | ||||
1397 | # %expansion - a hash showing how all abbreviations are expanded | ||||
1398 | # %category - a hash giving the general category of each option | ||||
1399 | # %option_range - a hash giving the valid ranges of certain options | ||||
1400 | |||||
1401 | # Note: a few options are not documented in the man page and usage | ||||
1402 | # message. This is because these are experimental or debug options and | ||||
1403 | # may or may not be retained in future versions. | ||||
1404 | # | ||||
1405 | # Here are the undocumented flags as far as I know. Any of them | ||||
1406 | # may disappear at any time. They are mainly for fine-tuning | ||||
1407 | # and debugging. | ||||
1408 | # | ||||
1409 | # fll --> fuzzy-line-length # a trivial parameter which gets | ||||
1410 | # turned off for the extrude option | ||||
1411 | # which is mainly for debugging | ||||
1412 | # scl --> short-concatenation-item-length # helps break at '.' | ||||
1413 | # recombine # for debugging line breaks | ||||
1414 | # valign # for debugging vertical alignment | ||||
1415 | # I --> DIAGNOSTICS # for debugging | ||||
1416 | ###################################################################### | ||||
1417 | |||||
1418 | # here is a summary of the Getopt codes: | ||||
1419 | # <none> does not take an argument | ||||
1420 | # =s takes a mandatory string | ||||
1421 | # :s takes an optional string (DO NOT USE - filenames will get eaten up) | ||||
1422 | # =i takes a mandatory integer | ||||
1423 | # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) | ||||
1424 | # ! does not take an argument and may be negated | ||||
1425 | # i.e., -foo and -nofoo are allowed | ||||
1426 | # a double dash signals the end of the options list | ||||
1427 | # | ||||
1428 | #--------------------------------------------------------------- | ||||
1429 | # Define the option string passed to GetOptions. | ||||
1430 | #--------------------------------------------------------------- | ||||
1431 | |||||
1432 | my @option_string = (); | ||||
1433 | my %expansion = (); | ||||
1434 | my %option_category = (); | ||||
1435 | my %option_range = (); | ||||
1436 | my $rexpansion = \%expansion; | ||||
1437 | |||||
1438 | # names of categories in manual | ||||
1439 | # leading integers will allow sorting | ||||
1440 | my @category_name = ( | ||||
1441 | '0. I/O control', | ||||
1442 | '1. Basic formatting options', | ||||
1443 | '2. Code indentation control', | ||||
1444 | '3. Whitespace control', | ||||
1445 | '4. Comment controls', | ||||
1446 | '5. Linebreak controls', | ||||
1447 | '6. Controlling list formatting', | ||||
1448 | '7. Retaining or ignoring existing line breaks', | ||||
1449 | '8. Blank line control', | ||||
1450 | '9. Other controls', | ||||
1451 | '10. HTML options', | ||||
1452 | '11. pod2html options', | ||||
1453 | '12. Controlling HTML properties', | ||||
1454 | '13. Debugging', | ||||
1455 | ); | ||||
1456 | |||||
1457 | # These options are parsed directly by perltidy: | ||||
1458 | # help h | ||||
1459 | # version v | ||||
1460 | # However, they are included in the option set so that they will | ||||
1461 | # be seen in the options dump. | ||||
1462 | |||||
1463 | # These long option names have no abbreviations or are treated specially | ||||
1464 | @option_string = qw( | ||||
1465 | html! | ||||
1466 | noprofile | ||||
1467 | no-profile | ||||
1468 | npro | ||||
1469 | recombine! | ||||
1470 | valign! | ||||
1471 | notidy | ||||
1472 | ); | ||||
1473 | |||||
1474 | my $category = 13; # Debugging | ||||
1475 | foreach (@option_string) { | ||||
1476 | my $opt = $_; # must avoid changing the actual flag | ||||
1477 | $opt =~ s/!$//; | ||||
1478 | $option_category{$opt} = $category_name[$category]; | ||||
1479 | } | ||||
1480 | |||||
1481 | $category = 11; # HTML | ||||
1482 | $option_category{html} = $category_name[$category]; | ||||
1483 | |||||
1484 | # routine to install and check options | ||||
1485 | my $add_option = sub { | ||||
1486 | my ( $long_name, $short_name, $flag ) = @_; | ||||
1487 | push @option_string, $long_name . $flag; | ||||
1488 | $option_category{$long_name} = $category_name[$category]; | ||||
1489 | if ($short_name) { | ||||
1490 | if ( $expansion{$short_name} ) { | ||||
1491 | my $existing_name = $expansion{$short_name}[0]; | ||||
1492 | Die | ||||
1493 | "redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; | ||||
1494 | } | ||||
1495 | $expansion{$short_name} = [$long_name]; | ||||
1496 | if ( $flag eq '!' ) { | ||||
1497 | my $nshort_name = 'n' . $short_name; | ||||
1498 | my $nolong_name = 'no' . $long_name; | ||||
1499 | if ( $expansion{$nshort_name} ) { | ||||
1500 | my $existing_name = $expansion{$nshort_name}[0]; | ||||
1501 | Die | ||||
1502 | "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; | ||||
1503 | } | ||||
1504 | $expansion{$nshort_name} = [$nolong_name]; | ||||
1505 | } | ||||
1506 | } | ||||
1507 | }; | ||||
1508 | |||||
1509 | # Install long option names which have a simple abbreviation. | ||||
1510 | # Options with code '!' get standard negation ('no' for long names, | ||||
1511 | # 'n' for abbreviations). Categories follow the manual. | ||||
1512 | |||||
1513 | ########################### | ||||
1514 | $category = 0; # I/O_Control | ||||
1515 | ########################### | ||||
1516 | $add_option->( 'backup-and-modify-in-place', 'b', '!' ); | ||||
1517 | $add_option->( 'backup-file-extension', 'bext', '=s' ); | ||||
1518 | $add_option->( 'force-read-binary', 'f', '!' ); | ||||
1519 | $add_option->( 'format', 'fmt', '=s' ); | ||||
1520 | $add_option->( 'iterations', 'it', '=i' ); | ||||
1521 | $add_option->( 'logfile', 'log', '!' ); | ||||
1522 | $add_option->( 'logfile-gap', 'g', ':i' ); | ||||
1523 | $add_option->( 'outfile', 'o', '=s' ); | ||||
1524 | $add_option->( 'output-file-extension', 'oext', '=s' ); | ||||
1525 | $add_option->( 'output-path', 'opath', '=s' ); | ||||
1526 | $add_option->( 'profile', 'pro', '=s' ); | ||||
1527 | $add_option->( 'quiet', 'q', '!' ); | ||||
1528 | $add_option->( 'standard-error-output', 'se', '!' ); | ||||
1529 | $add_option->( 'standard-output', 'st', '!' ); | ||||
1530 | $add_option->( 'warning-output', 'w', '!' ); | ||||
1531 | |||||
1532 | # options which are both toggle switches and values moved here | ||||
1533 | # to hide from tidyview (which does not show category 0 flags): | ||||
1534 | # -ole moved here from category 1 | ||||
1535 | # -sil moved here from category 2 | ||||
1536 | $add_option->( 'output-line-ending', 'ole', '=s' ); | ||||
1537 | $add_option->( 'starting-indentation-level', 'sil', '=i' ); | ||||
1538 | |||||
1539 | ######################################## | ||||
1540 | $category = 1; # Basic formatting options | ||||
1541 | ######################################## | ||||
1542 | $add_option->( 'check-syntax', 'syn', '!' ); | ||||
1543 | $add_option->( 'entab-leading-whitespace', 'et', '=i' ); | ||||
1544 | $add_option->( 'indent-columns', 'i', '=i' ); | ||||
1545 | $add_option->( 'maximum-line-length', 'l', '=i' ); | ||||
1546 | $add_option->( 'variable-maximum-line-length', 'vmll', '!' ); | ||||
1547 | $add_option->( 'whitespace-cycle', 'wc', '=i' ); | ||||
1548 | $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); | ||||
1549 | $add_option->( 'preserve-line-endings', 'ple', '!' ); | ||||
1550 | $add_option->( 'tabs', 't', '!' ); | ||||
1551 | $add_option->( 'default-tabsize', 'dt', '=i' ); | ||||
1552 | |||||
1553 | ######################################## | ||||
1554 | $category = 2; # Code indentation control | ||||
1555 | ######################################## | ||||
1556 | $add_option->( 'continuation-indentation', 'ci', '=i' ); | ||||
1557 | $add_option->( 'line-up-parentheses', 'lp', '!' ); | ||||
1558 | $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); | ||||
1559 | $add_option->( 'outdent-keywords', 'okw', '!' ); | ||||
1560 | $add_option->( 'outdent-labels', 'ola', '!' ); | ||||
1561 | $add_option->( 'outdent-long-quotes', 'olq', '!' ); | ||||
1562 | $add_option->( 'indent-closing-brace', 'icb', '!' ); | ||||
1563 | $add_option->( 'closing-token-indentation', 'cti', '=i' ); | ||||
1564 | $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); | ||||
1565 | $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); | ||||
1566 | $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); | ||||
1567 | $add_option->( 'brace-left-and-indent', 'bli', '!' ); | ||||
1568 | $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); | ||||
1569 | |||||
1570 | ######################################## | ||||
1571 | $category = 3; # Whitespace control | ||||
1572 | ######################################## | ||||
1573 | $add_option->( 'add-semicolons', 'asc', '!' ); | ||||
1574 | $add_option->( 'add-whitespace', 'aws', '!' ); | ||||
1575 | $add_option->( 'block-brace-tightness', 'bbt', '=i' ); | ||||
1576 | $add_option->( 'brace-tightness', 'bt', '=i' ); | ||||
1577 | $add_option->( 'delete-old-whitespace', 'dws', '!' ); | ||||
1578 | $add_option->( 'delete-semicolons', 'dsm', '!' ); | ||||
1579 | $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); | ||||
1580 | $add_option->( 'nowant-left-space', 'nwls', '=s' ); | ||||
1581 | $add_option->( 'nowant-right-space', 'nwrs', '=s' ); | ||||
1582 | $add_option->( 'paren-tightness', 'pt', '=i' ); | ||||
1583 | $add_option->( 'space-after-keyword', 'sak', '=s' ); | ||||
1584 | $add_option->( 'space-for-semicolon', 'sfs', '!' ); | ||||
1585 | $add_option->( 'space-function-paren', 'sfp', '!' ); | ||||
1586 | $add_option->( 'space-keyword-paren', 'skp', '!' ); | ||||
1587 | $add_option->( 'space-terminal-semicolon', 'sts', '!' ); | ||||
1588 | $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); | ||||
1589 | $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); | ||||
1590 | $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); | ||||
1591 | $add_option->( 'tight-secret-operators', 'tso', '!' ); | ||||
1592 | $add_option->( 'trim-qw', 'tqw', '!' ); | ||||
1593 | $add_option->( 'trim-pod', 'trp', '!' ); | ||||
1594 | $add_option->( 'want-left-space', 'wls', '=s' ); | ||||
1595 | $add_option->( 'want-right-space', 'wrs', '=s' ); | ||||
1596 | |||||
1597 | ######################################## | ||||
1598 | $category = 4; # Comment controls | ||||
1599 | ######################################## | ||||
1600 | $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); | ||||
1601 | $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); | ||||
1602 | $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); | ||||
1603 | $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); | ||||
1604 | $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); | ||||
1605 | $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); | ||||
1606 | $add_option->( 'closing-side-comments', 'csc', '!' ); | ||||
1607 | $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); | ||||
1608 | $add_option->( 'format-skipping', 'fs', '!' ); | ||||
1609 | $add_option->( 'format-skipping-begin', 'fsb', '=s' ); | ||||
1610 | $add_option->( 'format-skipping-end', 'fse', '=s' ); | ||||
1611 | $add_option->( 'hanging-side-comments', 'hsc', '!' ); | ||||
1612 | $add_option->( 'indent-block-comments', 'ibc', '!' ); | ||||
1613 | $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); | ||||
1614 | $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); | ||||
1615 | $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); | ||||
1616 | $add_option->( 'outdent-long-comments', 'olc', '!' ); | ||||
1617 | $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); | ||||
1618 | $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); | ||||
1619 | $add_option->( 'static-block-comments', 'sbc', '!' ); | ||||
1620 | $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); | ||||
1621 | $add_option->( 'static-side-comments', 'ssc', '!' ); | ||||
1622 | $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' ); | ||||
1623 | |||||
1624 | ######################################## | ||||
1625 | $category = 5; # Linebreak controls | ||||
1626 | ######################################## | ||||
1627 | $add_option->( 'add-newlines', 'anl', '!' ); | ||||
1628 | $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); | ||||
1629 | $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); | ||||
1630 | $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); | ||||
1631 | $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); | ||||
1632 | $add_option->( 'cuddled-else', 'ce', '!' ); | ||||
1633 | $add_option->( 'delete-old-newlines', 'dnl', '!' ); | ||||
1634 | $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); | ||||
1635 | $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); | ||||
1636 | $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); | ||||
1637 | $add_option->( 'opening-paren-right', 'opr', '!' ); | ||||
1638 | $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); | ||||
1639 | $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); | ||||
1640 | $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); | ||||
1641 | $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); | ||||
1642 | $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); | ||||
1643 | $add_option->( 'stack-closing-block-brace', 'scbb', '!' ); | ||||
1644 | $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); | ||||
1645 | $add_option->( 'stack-closing-paren', 'scp', '!' ); | ||||
1646 | $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); | ||||
1647 | $add_option->( 'stack-opening-block-brace', 'sobb', '!' ); | ||||
1648 | $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); | ||||
1649 | $add_option->( 'stack-opening-paren', 'sop', '!' ); | ||||
1650 | $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); | ||||
1651 | $add_option->( 'vertical-tightness', 'vt', '=i' ); | ||||
1652 | $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); | ||||
1653 | $add_option->( 'want-break-after', 'wba', '=s' ); | ||||
1654 | $add_option->( 'want-break-before', 'wbb', '=s' ); | ||||
1655 | $add_option->( 'break-after-all-operators', 'baao', '!' ); | ||||
1656 | $add_option->( 'break-before-all-operators', 'bbao', '!' ); | ||||
1657 | $add_option->( 'keep-interior-semicolons', 'kis', '!' ); | ||||
1658 | |||||
1659 | ######################################## | ||||
1660 | $category = 6; # Controlling list formatting | ||||
1661 | ######################################## | ||||
1662 | $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); | ||||
1663 | $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); | ||||
1664 | $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); | ||||
1665 | |||||
1666 | ######################################## | ||||
1667 | $category = 7; # Retaining or ignoring existing line breaks | ||||
1668 | ######################################## | ||||
1669 | $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); | ||||
1670 | $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); | ||||
1671 | $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); | ||||
1672 | $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' ); | ||||
1673 | $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); | ||||
1674 | |||||
1675 | ######################################## | ||||
1676 | $category = 8; # Blank line control | ||||
1677 | ######################################## | ||||
1678 | $add_option->( 'blanks-before-blocks', 'bbb', '!' ); | ||||
1679 | $add_option->( 'blanks-before-comments', 'bbc', '!' ); | ||||
1680 | $add_option->( 'blank-lines-before-subs', 'blbs', '=i' ); | ||||
1681 | $add_option->( 'blank-lines-before-packages', 'blbp', '=i' ); | ||||
1682 | $add_option->( 'long-block-line-count', 'lbl', '=i' ); | ||||
1683 | $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); | ||||
1684 | $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); | ||||
1685 | |||||
1686 | ######################################## | ||||
1687 | $category = 9; # Other controls | ||||
1688 | ######################################## | ||||
1689 | $add_option->( 'delete-block-comments', 'dbc', '!' ); | ||||
1690 | $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); | ||||
1691 | $add_option->( 'delete-pod', 'dp', '!' ); | ||||
1692 | $add_option->( 'delete-side-comments', 'dsc', '!' ); | ||||
1693 | $add_option->( 'tee-block-comments', 'tbc', '!' ); | ||||
1694 | $add_option->( 'tee-pod', 'tp', '!' ); | ||||
1695 | $add_option->( 'tee-side-comments', 'tsc', '!' ); | ||||
1696 | $add_option->( 'look-for-autoloader', 'lal', '!' ); | ||||
1697 | $add_option->( 'look-for-hash-bang', 'x', '!' ); | ||||
1698 | $add_option->( 'look-for-selfloader', 'lsl', '!' ); | ||||
1699 | $add_option->( 'pass-version-line', 'pvl', '!' ); | ||||
1700 | |||||
1701 | ######################################## | ||||
1702 | $category = 13; # Debugging | ||||
1703 | ######################################## | ||||
1704 | $add_option->( 'DEBUG', 'D', '!' ); | ||||
1705 | $add_option->( 'DIAGNOSTICS', 'I', '!' ); | ||||
1706 | $add_option->( 'dump-defaults', 'ddf', '!' ); | ||||
1707 | $add_option->( 'dump-long-names', 'dln', '!' ); | ||||
1708 | $add_option->( 'dump-options', 'dop', '!' ); | ||||
1709 | $add_option->( 'dump-profile', 'dpro', '!' ); | ||||
1710 | $add_option->( 'dump-short-names', 'dsn', '!' ); | ||||
1711 | $add_option->( 'dump-token-types', 'dtt', '!' ); | ||||
1712 | $add_option->( 'dump-want-left-space', 'dwls', '!' ); | ||||
1713 | $add_option->( 'dump-want-right-space', 'dwrs', '!' ); | ||||
1714 | $add_option->( 'fuzzy-line-length', 'fll', '!' ); | ||||
1715 | $add_option->( 'help', 'h', '' ); | ||||
1716 | $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); | ||||
1717 | $add_option->( 'show-options', 'opt', '!' ); | ||||
1718 | $add_option->( 'version', 'v', '' ); | ||||
1719 | $add_option->( 'memoize', 'mem', '!' ); | ||||
1720 | |||||
1721 | #--------------------------------------------------------------------- | ||||
1722 | |||||
1723 | # The Perl::Tidy::HtmlWriter will add its own options to the string | ||||
1724 | Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); | ||||
1725 | |||||
1726 | ######################################## | ||||
1727 | # Set categories 10, 11, 12 | ||||
1728 | ######################################## | ||||
1729 | # Based on their known order | ||||
1730 | $category = 12; # HTML properties | ||||
1731 | foreach my $opt (@option_string) { | ||||
1732 | my $long_name = $opt; | ||||
1733 | $long_name =~ s/(!|=.*|:.*)$//; | ||||
1734 | unless ( defined( $option_category{$long_name} ) ) { | ||||
1735 | if ( $long_name =~ /^html-linked/ ) { | ||||
1736 | $category = 10; # HTML options | ||||
1737 | } | ||||
1738 | elsif ( $long_name =~ /^pod2html/ ) { | ||||
1739 | $category = 11; # Pod2html | ||||
1740 | } | ||||
1741 | $option_category{$long_name} = $category_name[$category]; | ||||
1742 | } | ||||
1743 | } | ||||
1744 | |||||
1745 | #--------------------------------------------------------------- | ||||
1746 | # Assign valid ranges to certain options | ||||
1747 | #--------------------------------------------------------------- | ||||
1748 | # In the future, these may be used to make preliminary checks | ||||
1749 | # hash keys are long names | ||||
1750 | # If key or value is undefined: | ||||
1751 | # strings may have any value | ||||
1752 | # integer ranges are >=0 | ||||
1753 | # If value is defined: | ||||
1754 | # value is [qw(any valid words)] for strings | ||||
1755 | # value is [min, max] for integers | ||||
1756 | # if min is undefined, there is no lower limit | ||||
1757 | # if max is undefined, there is no upper limit | ||||
1758 | # Parameters not listed here have defaults | ||||
1759 | %option_range = ( | ||||
1760 | 'format' => [ 'tidy', 'html', 'user' ], | ||||
1761 | 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], | ||||
1762 | |||||
1763 | 'block-brace-tightness' => [ 0, 2 ], | ||||
1764 | 'brace-tightness' => [ 0, 2 ], | ||||
1765 | 'paren-tightness' => [ 0, 2 ], | ||||
1766 | 'square-bracket-tightness' => [ 0, 2 ], | ||||
1767 | |||||
1768 | 'block-brace-vertical-tightness' => [ 0, 2 ], | ||||
1769 | 'brace-vertical-tightness' => [ 0, 2 ], | ||||
1770 | 'brace-vertical-tightness-closing' => [ 0, 2 ], | ||||
1771 | 'paren-vertical-tightness' => [ 0, 2 ], | ||||
1772 | 'paren-vertical-tightness-closing' => [ 0, 2 ], | ||||
1773 | 'square-bracket-vertical-tightness' => [ 0, 2 ], | ||||
1774 | 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], | ||||
1775 | 'vertical-tightness' => [ 0, 2 ], | ||||
1776 | 'vertical-tightness-closing' => [ 0, 2 ], | ||||
1777 | |||||
1778 | 'closing-brace-indentation' => [ 0, 3 ], | ||||
1779 | 'closing-paren-indentation' => [ 0, 3 ], | ||||
1780 | 'closing-square-bracket-indentation' => [ 0, 3 ], | ||||
1781 | 'closing-token-indentation' => [ 0, 3 ], | ||||
1782 | |||||
1783 | 'closing-side-comment-else-flag' => [ 0, 2 ], | ||||
1784 | 'comma-arrow-breakpoints' => [ 0, 5 ], | ||||
1785 | ); | ||||
1786 | |||||
1787 | # Note: we could actually allow negative ci if someone really wants it: | ||||
1788 | # $option_range{'continuation-indentation'} = [ undef, undef ]; | ||||
1789 | |||||
1790 | #--------------------------------------------------------------- | ||||
1791 | # Assign default values to the above options here, except | ||||
1792 | # for 'outfile' and 'help'. | ||||
1793 | # These settings should approximate the perlstyle(1) suggestions. | ||||
1794 | #--------------------------------------------------------------- | ||||
1795 | my @defaults = qw( | ||||
1796 | add-newlines | ||||
1797 | add-semicolons | ||||
1798 | add-whitespace | ||||
1799 | blanks-before-blocks | ||||
1800 | blanks-before-comments | ||||
1801 | blank-lines-before-subs=1 | ||||
1802 | blank-lines-before-packages=1 | ||||
1803 | block-brace-tightness=0 | ||||
1804 | block-brace-vertical-tightness=0 | ||||
1805 | brace-tightness=1 | ||||
1806 | brace-vertical-tightness-closing=0 | ||||
1807 | brace-vertical-tightness=0 | ||||
1808 | break-at-old-logical-breakpoints | ||||
1809 | break-at-old-ternary-breakpoints | ||||
1810 | break-at-old-attribute-breakpoints | ||||
1811 | break-at-old-keyword-breakpoints | ||||
1812 | comma-arrow-breakpoints=5 | ||||
1813 | nocheck-syntax | ||||
1814 | closing-side-comment-interval=6 | ||||
1815 | closing-side-comment-maximum-text=20 | ||||
1816 | closing-side-comment-else-flag=0 | ||||
1817 | closing-side-comments-balanced | ||||
1818 | closing-paren-indentation=0 | ||||
1819 | closing-brace-indentation=0 | ||||
1820 | closing-square-bracket-indentation=0 | ||||
1821 | continuation-indentation=2 | ||||
1822 | delete-old-newlines | ||||
1823 | delete-semicolons | ||||
1824 | fuzzy-line-length | ||||
1825 | hanging-side-comments | ||||
1826 | indent-block-comments | ||||
1827 | indent-columns=4 | ||||
1828 | iterations=1 | ||||
1829 | keep-old-blank-lines=1 | ||||
1830 | long-block-line-count=8 | ||||
1831 | look-for-autoloader | ||||
1832 | look-for-selfloader | ||||
1833 | maximum-consecutive-blank-lines=1 | ||||
1834 | maximum-fields-per-table=0 | ||||
1835 | maximum-line-length=80 | ||||
1836 | memoize | ||||
1837 | minimum-space-to-comment=4 | ||||
1838 | nobrace-left-and-indent | ||||
1839 | nocuddled-else | ||||
1840 | nodelete-old-whitespace | ||||
1841 | nohtml | ||||
1842 | nologfile | ||||
1843 | noquiet | ||||
1844 | noshow-options | ||||
1845 | nostatic-side-comments | ||||
1846 | notabs | ||||
1847 | nowarning-output | ||||
1848 | outdent-labels | ||||
1849 | outdent-long-quotes | ||||
1850 | outdent-long-comments | ||||
1851 | paren-tightness=1 | ||||
1852 | paren-vertical-tightness-closing=0 | ||||
1853 | paren-vertical-tightness=0 | ||||
1854 | pass-version-line | ||||
1855 | recombine | ||||
1856 | valign | ||||
1857 | short-concatenation-item-length=8 | ||||
1858 | space-for-semicolon | ||||
1859 | square-bracket-tightness=1 | ||||
1860 | square-bracket-vertical-tightness-closing=0 | ||||
1861 | square-bracket-vertical-tightness=0 | ||||
1862 | static-block-comments | ||||
1863 | trim-qw | ||||
1864 | format=tidy | ||||
1865 | backup-file-extension=bak | ||||
1866 | format-skipping | ||||
1867 | default-tabsize=8 | ||||
1868 | |||||
1869 | pod2html | ||||
1870 | html-table-of-contents | ||||
1871 | html-entities | ||||
1872 | ); | ||||
1873 | |||||
1874 | push @defaults, "perl-syntax-check-flags=-c -T"; | ||||
1875 | |||||
1876 | #--------------------------------------------------------------- | ||||
1877 | # Define abbreviations which will be expanded into the above primitives. | ||||
1878 | # These may be defined recursively. | ||||
1879 | #--------------------------------------------------------------- | ||||
1880 | %expansion = ( | ||||
1881 | %expansion, | ||||
1882 | 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], | ||||
1883 | 'fnl' => [qw(freeze-newlines)], | ||||
1884 | 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], | ||||
1885 | 'fws' => [qw(freeze-whitespace)], | ||||
1886 | 'freeze-blank-lines' => | ||||
1887 | [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], | ||||
1888 | 'fbl' => [qw(freeze-blank-lines)], | ||||
1889 | 'indent-only' => [qw(freeze-newlines freeze-whitespace)], | ||||
1890 | 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], | ||||
1891 | 'nooutdent-long-lines' => | ||||
1892 | [qw(nooutdent-long-quotes nooutdent-long-comments)], | ||||
1893 | 'noll' => [qw(nooutdent-long-lines)], | ||||
1894 | 'io' => [qw(indent-only)], | ||||
1895 | 'delete-all-comments' => | ||||
1896 | [qw(delete-block-comments delete-side-comments delete-pod)], | ||||
1897 | 'nodelete-all-comments' => | ||||
1898 | [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], | ||||
1899 | 'dac' => [qw(delete-all-comments)], | ||||
1900 | 'ndac' => [qw(nodelete-all-comments)], | ||||
1901 | 'gnu' => [qw(gnu-style)], | ||||
1902 | 'pbp' => [qw(perl-best-practices)], | ||||
1903 | 'tee-all-comments' => | ||||
1904 | [qw(tee-block-comments tee-side-comments tee-pod)], | ||||
1905 | 'notee-all-comments' => | ||||
1906 | [qw(notee-block-comments notee-side-comments notee-pod)], | ||||
1907 | 'tac' => [qw(tee-all-comments)], | ||||
1908 | 'ntac' => [qw(notee-all-comments)], | ||||
1909 | 'html' => [qw(format=html)], | ||||
1910 | 'nhtml' => [qw(format=tidy)], | ||||
1911 | 'tidy' => [qw(format=tidy)], | ||||
1912 | |||||
1913 | 'swallow-optional-blank-lines' => [qw(kbl=0)], | ||||
1914 | 'noswallow-optional-blank-lines' => [qw(kbl=1)], | ||||
1915 | 'sob' => [qw(kbl=0)], | ||||
1916 | 'nsob' => [qw(kbl=1)], | ||||
1917 | |||||
1918 | 'break-after-comma-arrows' => [qw(cab=0)], | ||||
1919 | 'nobreak-after-comma-arrows' => [qw(cab=1)], | ||||
1920 | 'baa' => [qw(cab=0)], | ||||
1921 | 'nbaa' => [qw(cab=1)], | ||||
1922 | |||||
1923 | 'blanks-before-subs' => [qw(blbs=1 blbp=1)], | ||||
1924 | 'bbs' => [qw(blbs=1 blbp=1)], | ||||
1925 | 'noblanks-before-subs' => [qw(blbs=0 blbp=0)], | ||||
1926 | 'nbbs' => [qw(blbs=0 blbp=0)], | ||||
1927 | |||||
1928 | 'break-at-old-trinary-breakpoints' => [qw(bot)], | ||||
1929 | |||||
1930 | 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
1931 | 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], | ||||
1932 | 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
1933 | 'icp' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
1934 | 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
1935 | |||||
1936 | 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
1937 | 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], | ||||
1938 | 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
1939 | 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
1940 | 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
1941 | |||||
1942 | 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], | ||||
1943 | 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], | ||||
1944 | 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], | ||||
1945 | |||||
1946 | 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], | ||||
1947 | 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], | ||||
1948 | 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], | ||||
1949 | |||||
1950 | 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], | ||||
1951 | 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], | ||||
1952 | 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], | ||||
1953 | |||||
1954 | 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], | ||||
1955 | 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], | ||||
1956 | 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], | ||||
1957 | |||||
1958 | 'otr' => [qw(opr ohbr osbr)], | ||||
1959 | 'opening-token-right' => [qw(opr ohbr osbr)], | ||||
1960 | 'notr' => [qw(nopr nohbr nosbr)], | ||||
1961 | 'noopening-token-right' => [qw(nopr nohbr nosbr)], | ||||
1962 | |||||
1963 | 'sot' => [qw(sop sohb sosb)], | ||||
1964 | 'nsot' => [qw(nsop nsohb nsosb)], | ||||
1965 | 'stack-opening-tokens' => [qw(sop sohb sosb)], | ||||
1966 | 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], | ||||
1967 | |||||
1968 | 'sct' => [qw(scp schb scsb)], | ||||
1969 | 'stack-closing-tokens' => => [qw(scp schb scsb)], | ||||
1970 | 'nsct' => [qw(nscp nschb nscsb)], | ||||
1971 | 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], | ||||
1972 | |||||
1973 | 'sac' => [qw(sot sct)], | ||||
1974 | 'nsac' => [qw(nsot nsct)], | ||||
1975 | 'stack-all-containers' => [qw(sot sct)], | ||||
1976 | 'nostack-all-containers' => [qw(nsot nsct)], | ||||
1977 | |||||
1978 | 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], | ||||
1979 | 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], | ||||
1980 | 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], | ||||
1981 | 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], | ||||
1982 | 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], | ||||
1983 | 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], | ||||
1984 | |||||
1985 | 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)], | ||||
1986 | 'sobb' => [qw(bbvt=2 bbvtl=*)], | ||||
1987 | 'nostack-opening-block-brace' => [qw(bbvt=0)], | ||||
1988 | 'nsobb' => [qw(bbvt=0)], | ||||
1989 | |||||
1990 | 'converge' => [qw(it=4)], | ||||
1991 | 'noconverge' => [qw(it=1)], | ||||
1992 | 'conv' => [qw(it=4)], | ||||
1993 | 'nconv' => [qw(it=1)], | ||||
1994 | |||||
1995 | # 'mangle' originally deleted pod and comments, but to keep it | ||||
1996 | # reversible, it no longer does. But if you really want to | ||||
1997 | # delete them, just use: | ||||
1998 | # -mangle -dac | ||||
1999 | |||||
2000 | # An interesting use for 'mangle' is to do this: | ||||
2001 | # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new | ||||
2002 | # which will form as many one-line blocks as possible | ||||
2003 | |||||
2004 | 'mangle' => [ | ||||
2005 | qw( | ||||
2006 | check-syntax | ||||
2007 | keep-old-blank-lines=0 | ||||
2008 | delete-old-newlines | ||||
2009 | delete-old-whitespace | ||||
2010 | delete-semicolons | ||||
2011 | indent-columns=0 | ||||
2012 | maximum-consecutive-blank-lines=0 | ||||
2013 | maximum-line-length=100000 | ||||
2014 | noadd-newlines | ||||
2015 | noadd-semicolons | ||||
2016 | noadd-whitespace | ||||
2017 | noblanks-before-blocks | ||||
2018 | blank-lines-before-subs=0 | ||||
2019 | blank-lines-before-packages=0 | ||||
2020 | notabs | ||||
2021 | ) | ||||
2022 | ], | ||||
2023 | |||||
2024 | # 'extrude' originally deleted pod and comments, but to keep it | ||||
2025 | # reversible, it no longer does. But if you really want to | ||||
2026 | # delete them, just use | ||||
2027 | # extrude -dac | ||||
2028 | # | ||||
2029 | # An interesting use for 'extrude' is to do this: | ||||
2030 | # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new | ||||
2031 | # which will break up all one-line blocks. | ||||
2032 | |||||
2033 | 'extrude' => [ | ||||
2034 | qw( | ||||
2035 | check-syntax | ||||
2036 | ci=0 | ||||
2037 | delete-old-newlines | ||||
2038 | delete-old-whitespace | ||||
2039 | delete-semicolons | ||||
2040 | indent-columns=0 | ||||
2041 | maximum-consecutive-blank-lines=0 | ||||
2042 | maximum-line-length=1 | ||||
2043 | noadd-semicolons | ||||
2044 | noadd-whitespace | ||||
2045 | noblanks-before-blocks | ||||
2046 | blank-lines-before-subs=0 | ||||
2047 | blank-lines-before-packages=0 | ||||
2048 | nofuzzy-line-length | ||||
2049 | notabs | ||||
2050 | norecombine | ||||
2051 | ) | ||||
2052 | ], | ||||
2053 | |||||
2054 | # this style tries to follow the GNU Coding Standards (which do | ||||
2055 | # not really apply to perl but which are followed by some perl | ||||
2056 | # programmers). | ||||
2057 | 'gnu-style' => [ | ||||
2058 | qw( | ||||
2059 | lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 | ||||
2060 | ) | ||||
2061 | ], | ||||
2062 | |||||
2063 | # Style suggested in Damian Conway's Perl Best Practices | ||||
2064 | 'perl-best-practices' => [ | ||||
2065 | qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), | ||||
2066 | q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) | ||||
2067 | ], | ||||
2068 | |||||
2069 | # Additional styles can be added here | ||||
2070 | ); | ||||
2071 | |||||
2072 | Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); | ||||
2073 | |||||
2074 | # Uncomment next line to dump all expansions for debugging: | ||||
2075 | # dump_short_names(\%expansion); | ||||
2076 | return ( | ||||
2077 | \@option_string, \@defaults, \%expansion, | ||||
2078 | \%option_category, \%option_range | ||||
2079 | ); | ||||
2080 | |||||
2081 | } # end of generate_options | ||||
2082 | |||||
2083 | # Memoize process_command_line. Given same @ARGV passed in, return same | ||||
2084 | # values and same @ARGV back. | ||||
2085 | # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds | ||||
2086 | # up masontidy (https://metacpan.org/module/masontidy) | ||||
2087 | |||||
2088 | 1 | 100ns | my %process_command_line_cache; | ||
2089 | |||||
2090 | sub process_command_line { | ||||
2091 | |||||
2092 | my ( | ||||
2093 | $perltidyrc_stream, $is_Windows, $Windows_type, | ||||
2094 | $rpending_complaint, $dump_options_type | ||||
2095 | ) = @_; | ||||
2096 | |||||
2097 | my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; | ||||
2098 | if ($use_cache) { | ||||
2099 | my $cache_key = join( chr(28), @ARGV ); | ||||
2100 | if ( my $result = $process_command_line_cache{$cache_key} ) { | ||||
2101 | my ( $argv, @retvals ) = @$result; | ||||
2102 | @ARGV = @$argv; | ||||
2103 | return @retvals; | ||||
2104 | } | ||||
2105 | else { | ||||
2106 | my @retvals = _process_command_line(@_); | ||||
2107 | $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] | ||||
2108 | if $retvals[0]->{'memoize'}; | ||||
2109 | return @retvals; | ||||
2110 | } | ||||
2111 | } | ||||
2112 | else { | ||||
2113 | return _process_command_line(@_); | ||||
2114 | } | ||||
2115 | } | ||||
2116 | |||||
2117 | # (note the underscore here) | ||||
2118 | sub _process_command_line { | ||||
2119 | |||||
2120 | my ( | ||||
2121 | $perltidyrc_stream, $is_Windows, $Windows_type, | ||||
2122 | $rpending_complaint, $dump_options_type | ||||
2123 | ) = @_; | ||||
2124 | |||||
2125 | 2 | 4.15ms | 2 | 209µs | # spent 110µs (11+99) within Perl::Tidy::BEGIN@2125 which was called:
# once (11µs+99µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 2125 # spent 110µs making 1 call to Perl::Tidy::BEGIN@2125
# spent 99µs making 1 call to Getopt::Long::import |
2126 | |||||
2127 | my ( | ||||
2128 | $roption_string, $rdefaults, $rexpansion, | ||||
2129 | $roption_category, $roption_range | ||||
2130 | ) = generate_options(); | ||||
2131 | |||||
2132 | #--------------------------------------------------------------- | ||||
2133 | # set the defaults by passing the above list through GetOptions | ||||
2134 | #--------------------------------------------------------------- | ||||
2135 | my %Opts = (); | ||||
2136 | { | ||||
2137 | local @ARGV; | ||||
2138 | my $i; | ||||
2139 | |||||
2140 | # do not load the defaults if we are just dumping perltidyrc | ||||
2141 | unless ( $dump_options_type eq 'perltidyrc' ) { | ||||
2142 | for $i (@$rdefaults) { push @ARGV, "--" . $i } | ||||
2143 | } | ||||
2144 | |||||
2145 | # Patch to save users Getopt::Long configuration | ||||
2146 | # and set to Getopt::Long defaults. Use eval to avoid | ||||
2147 | # breaking old versions of Perl without these routines. | ||||
2148 | my $glc; | ||||
2149 | eval { $glc = Getopt::Long::Configure() }; | ||||
2150 | unless ($@) { | ||||
2151 | eval { Getopt::Long::ConfigDefaults() }; | ||||
2152 | } | ||||
2153 | else { $glc = undef } | ||||
2154 | |||||
2155 | if ( !GetOptions( \%Opts, @$roption_string ) ) { | ||||
2156 | Die "Programming Bug: error in setting default options"; | ||||
2157 | } | ||||
2158 | |||||
2159 | # Patch to put the previous Getopt::Long configuration back | ||||
2160 | eval { Getopt::Long::Configure($glc) } if defined $glc; | ||||
2161 | } | ||||
2162 | |||||
2163 | my $word; | ||||
2164 | my @raw_options = (); | ||||
2165 | my $config_file = ""; | ||||
2166 | my $saw_ignore_profile = 0; | ||||
2167 | my $saw_extrude = 0; | ||||
2168 | my $saw_pbp = 0; | ||||
2169 | my $saw_dump_profile = 0; | ||||
2170 | my $i; | ||||
2171 | |||||
2172 | #--------------------------------------------------------------- | ||||
2173 | # Take a first look at the command-line parameters. Do as many | ||||
2174 | # immediate dumps as possible, which can avoid confusion if the | ||||
2175 | # perltidyrc file has an error. | ||||
2176 | #--------------------------------------------------------------- | ||||
2177 | foreach $i (@ARGV) { | ||||
2178 | |||||
2179 | $i =~ s/^--/-/; | ||||
2180 | if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { | ||||
2181 | $saw_ignore_profile = 1; | ||||
2182 | } | ||||
2183 | |||||
2184 | # note: this must come before -pro and -profile, below: | ||||
2185 | elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { | ||||
2186 | $saw_dump_profile = 1; | ||||
2187 | } | ||||
2188 | elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { | ||||
2189 | if ($config_file) { | ||||
2190 | Warn | ||||
2191 | "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; | ||||
2192 | } | ||||
2193 | $config_file = $2; | ||||
2194 | |||||
2195 | # resolve <dir>/.../<file>, meaning look upwards from directory | ||||
2196 | if ( defined($config_file) ) { | ||||
2197 | if ( my ( $start_dir, $search_file ) = | ||||
2198 | ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) | ||||
2199 | { | ||||
2200 | $start_dir = '.' if !$start_dir; | ||||
2201 | $start_dir = Cwd::realpath($start_dir); | ||||
2202 | if ( my $found_file = | ||||
2203 | find_file_upwards( $start_dir, $search_file ) ) | ||||
2204 | { | ||||
2205 | $config_file = $found_file; | ||||
2206 | } | ||||
2207 | } | ||||
2208 | } | ||||
2209 | unless ( -e $config_file ) { | ||||
2210 | Warn "cannot find file given with -pro=$config_file: $!\n"; | ||||
2211 | $config_file = ""; | ||||
2212 | } | ||||
2213 | } | ||||
2214 | elsif ( $i =~ /^-(pro|profile)=?$/ ) { | ||||
2215 | Die "usage: -pro=filename or --profile=filename, no spaces\n"; | ||||
2216 | } | ||||
2217 | elsif ( $i =~ /^-extrude$/ ) { | ||||
2218 | $saw_extrude = 1; | ||||
2219 | } | ||||
2220 | elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) { | ||||
2221 | $saw_pbp = 1; | ||||
2222 | } | ||||
2223 | elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { | ||||
2224 | usage(); | ||||
2225 | Exit 0; | ||||
2226 | } | ||||
2227 | elsif ( $i =~ /^-(version|v)$/ ) { | ||||
2228 | show_version(); | ||||
2229 | Exit 0; | ||||
2230 | } | ||||
2231 | elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { | ||||
2232 | dump_defaults(@$rdefaults); | ||||
2233 | Exit 0; | ||||
2234 | } | ||||
2235 | elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { | ||||
2236 | dump_long_names(@$roption_string); | ||||
2237 | Exit 0; | ||||
2238 | } | ||||
2239 | elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { | ||||
2240 | dump_short_names($rexpansion); | ||||
2241 | Exit 0; | ||||
2242 | } | ||||
2243 | elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { | ||||
2244 | Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); | ||||
2245 | Exit 0; | ||||
2246 | } | ||||
2247 | } | ||||
2248 | |||||
2249 | if ( $saw_dump_profile && $saw_ignore_profile ) { | ||||
2250 | Warn "No profile to dump because of -npro\n"; | ||||
2251 | Exit 1; | ||||
2252 | } | ||||
2253 | |||||
2254 | #--------------------------------------------------------------- | ||||
2255 | # read any .perltidyrc configuration file | ||||
2256 | #--------------------------------------------------------------- | ||||
2257 | unless ($saw_ignore_profile) { | ||||
2258 | |||||
2259 | # resolve possible conflict between $perltidyrc_stream passed | ||||
2260 | # as call parameter to perltidy and -pro=filename on command | ||||
2261 | # line. | ||||
2262 | if ($perltidyrc_stream) { | ||||
2263 | if ($config_file) { | ||||
2264 | Warn <<EOM; | ||||
2265 | Conflict: a perltidyrc configuration file was specified both as this | ||||
2266 | perltidy call parameter: $perltidyrc_stream | ||||
2267 | and with this -profile=$config_file. | ||||
2268 | Using -profile=$config_file. | ||||
2269 | EOM | ||||
2270 | } | ||||
2271 | else { | ||||
2272 | $config_file = $perltidyrc_stream; | ||||
2273 | } | ||||
2274 | } | ||||
2275 | |||||
2276 | # look for a config file if we don't have one yet | ||||
2277 | my $rconfig_file_chatter; | ||||
2278 | $$rconfig_file_chatter = ""; | ||||
2279 | $config_file = | ||||
2280 | find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter, | ||||
2281 | $rpending_complaint ) | ||||
2282 | unless $config_file; | ||||
2283 | |||||
2284 | # open any config file | ||||
2285 | my $fh_config; | ||||
2286 | if ($config_file) { | ||||
2287 | ( $fh_config, $config_file ) = | ||||
2288 | Perl::Tidy::streamhandle( $config_file, 'r' ); | ||||
2289 | unless ($fh_config) { | ||||
2290 | $$rconfig_file_chatter .= | ||||
2291 | "# $config_file exists but cannot be opened\n"; | ||||
2292 | } | ||||
2293 | } | ||||
2294 | |||||
2295 | if ($saw_dump_profile) { | ||||
2296 | dump_config_file( $fh_config, $config_file, $rconfig_file_chatter ); | ||||
2297 | Exit 0; | ||||
2298 | } | ||||
2299 | |||||
2300 | if ($fh_config) { | ||||
2301 | |||||
2302 | my ( $rconfig_list, $death_message, $_saw_pbp ) = | ||||
2303 | read_config_file( $fh_config, $config_file, $rexpansion ); | ||||
2304 | Die $death_message if ($death_message); | ||||
2305 | $saw_pbp ||= $_saw_pbp; | ||||
2306 | |||||
2307 | # process any .perltidyrc parameters right now so we can | ||||
2308 | # localize errors | ||||
2309 | if (@$rconfig_list) { | ||||
2310 | local @ARGV = @$rconfig_list; | ||||
2311 | |||||
2312 | expand_command_abbreviations( $rexpansion, \@raw_options, | ||||
2313 | $config_file ); | ||||
2314 | |||||
2315 | if ( !GetOptions( \%Opts, @$roption_string ) ) { | ||||
2316 | Die | ||||
2317 | "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; | ||||
2318 | } | ||||
2319 | |||||
2320 | # Anything left in this local @ARGV is an error and must be | ||||
2321 | # invalid bare words from the configuration file. We cannot | ||||
2322 | # check this earlier because bare words may have been valid | ||||
2323 | # values for parameters. We had to wait for GetOptions to have | ||||
2324 | # a look at @ARGV. | ||||
2325 | if (@ARGV) { | ||||
2326 | my $count = @ARGV; | ||||
2327 | my $str = "\'" . pop(@ARGV) . "\'"; | ||||
2328 | while ( my $param = pop(@ARGV) ) { | ||||
2329 | if ( length($str) < 70 ) { | ||||
2330 | $str .= ", '$param'"; | ||||
2331 | } | ||||
2332 | else { | ||||
2333 | $str .= ", ..."; | ||||
2334 | last; | ||||
2335 | } | ||||
2336 | } | ||||
2337 | Die <<EOM; | ||||
2338 | There are $count unrecognized values in the configuration file '$config_file': | ||||
2339 | $str | ||||
2340 | Use leading dashes for parameters. Use -npro to ignore this file. | ||||
2341 | EOM | ||||
2342 | } | ||||
2343 | |||||
2344 | # Undo any options which cause premature exit. They are not | ||||
2345 | # appropriate for a config file, and it could be hard to | ||||
2346 | # diagnose the cause of the premature exit. | ||||
2347 | foreach ( | ||||
2348 | qw{ | ||||
2349 | dump-defaults | ||||
2350 | dump-long-names | ||||
2351 | dump-options | ||||
2352 | dump-profile | ||||
2353 | dump-short-names | ||||
2354 | dump-token-types | ||||
2355 | dump-want-left-space | ||||
2356 | dump-want-right-space | ||||
2357 | help | ||||
2358 | stylesheet | ||||
2359 | version | ||||
2360 | } | ||||
2361 | ) | ||||
2362 | { | ||||
2363 | |||||
2364 | if ( defined( $Opts{$_} ) ) { | ||||
2365 | delete $Opts{$_}; | ||||
2366 | Warn "ignoring --$_ in config file: $config_file\n"; | ||||
2367 | } | ||||
2368 | } | ||||
2369 | } | ||||
2370 | } | ||||
2371 | } | ||||
2372 | |||||
2373 | #--------------------------------------------------------------- | ||||
2374 | # now process the command line parameters | ||||
2375 | #--------------------------------------------------------------- | ||||
2376 | expand_command_abbreviations( $rexpansion, \@raw_options, $config_file ); | ||||
2377 | |||||
2378 | local $SIG{'__WARN__'} = sub { Warn $_[0] }; | ||||
2379 | if ( !GetOptions( \%Opts, @$roption_string ) ) { | ||||
2380 | Die "Error on command line; for help try 'perltidy -h'\n"; | ||||
2381 | } | ||||
2382 | |||||
2383 | return ( | ||||
2384 | \%Opts, $config_file, \@raw_options, | ||||
2385 | $saw_extrude, $saw_pbp, $roption_string, | ||||
2386 | $rexpansion, $roption_category, $roption_range | ||||
2387 | ); | ||||
2388 | } # end of process_command_line | ||||
2389 | |||||
2390 | sub check_options { | ||||
2391 | |||||
2392 | my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_; | ||||
2393 | |||||
2394 | #--------------------------------------------------------------- | ||||
2395 | # check and handle any interactions among the basic options.. | ||||
2396 | #--------------------------------------------------------------- | ||||
2397 | |||||
2398 | # Since -vt, -vtc, and -cti are abbreviations, but under | ||||
2399 | # msdos, an unquoted input parameter like vtc=1 will be | ||||
2400 | # seen as 2 parameters, vtc and 1, so the abbreviations | ||||
2401 | # won't be seen. Therefore, we will catch them here if | ||||
2402 | # they get through. | ||||
2403 | |||||
2404 | if ( defined $rOpts->{'vertical-tightness'} ) { | ||||
2405 | my $vt = $rOpts->{'vertical-tightness'}; | ||||
2406 | $rOpts->{'paren-vertical-tightness'} = $vt; | ||||
2407 | $rOpts->{'square-bracket-vertical-tightness'} = $vt; | ||||
2408 | $rOpts->{'brace-vertical-tightness'} = $vt; | ||||
2409 | } | ||||
2410 | |||||
2411 | if ( defined $rOpts->{'vertical-tightness-closing'} ) { | ||||
2412 | my $vtc = $rOpts->{'vertical-tightness-closing'}; | ||||
2413 | $rOpts->{'paren-vertical-tightness-closing'} = $vtc; | ||||
2414 | $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; | ||||
2415 | $rOpts->{'brace-vertical-tightness-closing'} = $vtc; | ||||
2416 | } | ||||
2417 | |||||
2418 | if ( defined $rOpts->{'closing-token-indentation'} ) { | ||||
2419 | my $cti = $rOpts->{'closing-token-indentation'}; | ||||
2420 | $rOpts->{'closing-square-bracket-indentation'} = $cti; | ||||
2421 | $rOpts->{'closing-brace-indentation'} = $cti; | ||||
2422 | $rOpts->{'closing-paren-indentation'} = $cti; | ||||
2423 | } | ||||
2424 | |||||
2425 | # In quiet mode, there is no log file and hence no way to report | ||||
2426 | # results of syntax check, so don't do it. | ||||
2427 | if ( $rOpts->{'quiet'} ) { | ||||
2428 | $rOpts->{'check-syntax'} = 0; | ||||
2429 | } | ||||
2430 | |||||
2431 | # can't check syntax if no output | ||||
2432 | if ( $rOpts->{'format'} ne 'tidy' ) { | ||||
2433 | $rOpts->{'check-syntax'} = 0; | ||||
2434 | } | ||||
2435 | |||||
2436 | # Never let Windows 9x/Me systems run syntax check -- this will prevent a | ||||
2437 | # wide variety of nasty problems on these systems, because they cannot | ||||
2438 | # reliably run backticks. Don't even think about changing this! | ||||
2439 | if ( $rOpts->{'check-syntax'} | ||||
2440 | && $is_Windows | ||||
2441 | && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) | ||||
2442 | { | ||||
2443 | $rOpts->{'check-syntax'} = 0; | ||||
2444 | } | ||||
2445 | |||||
2446 | # It's really a bad idea to check syntax as root unless you wrote | ||||
2447 | # the script yourself. FIXME: not sure if this works with VMS | ||||
2448 | unless ($is_Windows) { | ||||
2449 | |||||
2450 | if ( $< == 0 && $rOpts->{'check-syntax'} ) { | ||||
2451 | $rOpts->{'check-syntax'} = 0; | ||||
2452 | $$rpending_complaint .= | ||||
2453 | "Syntax check deactivated for safety; you shouldn't run this as root\n"; | ||||
2454 | } | ||||
2455 | } | ||||
2456 | |||||
2457 | # check iteration count and quietly fix if necessary: | ||||
2458 | # - iterations option only applies to code beautification mode | ||||
2459 | # - the convergence check should stop most runs on iteration 2, and | ||||
2460 | # virtually all on iteration 3. But we'll allow up to 6. | ||||
2461 | if ( $rOpts->{'format'} ne 'tidy' ) { | ||||
2462 | $rOpts->{'iterations'} = 1; | ||||
2463 | } | ||||
2464 | elsif ( defined( $rOpts->{'iterations'} ) ) { | ||||
2465 | if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } | ||||
2466 | elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 } | ||||
2467 | } | ||||
2468 | else { | ||||
2469 | $rOpts->{'iterations'} = 1; | ||||
2470 | } | ||||
2471 | |||||
2472 | # check for reasonable number of blank lines and fix to avoid problems | ||||
2473 | if ( $rOpts->{'blank-lines-before-subs'} ) { | ||||
2474 | if ( $rOpts->{'blank-lines-before-subs'} < 0 ) { | ||||
2475 | $rOpts->{'blank-lines-before-subs'} = 0; | ||||
2476 | Warn "negative value of -blbs, setting 0\n"; | ||||
2477 | } | ||||
2478 | if ( $rOpts->{'blank-lines-before-subs'} > 100 ) { | ||||
2479 | Warn "unreasonably large value of -blbs, reducing\n"; | ||||
2480 | $rOpts->{'blank-lines-before-subs'} = 100; | ||||
2481 | } | ||||
2482 | } | ||||
2483 | if ( $rOpts->{'blank-lines-before-packages'} ) { | ||||
2484 | if ( $rOpts->{'blank-lines-before-packages'} < 0 ) { | ||||
2485 | Warn "negative value of -blbp, setting 0\n"; | ||||
2486 | $rOpts->{'blank-lines-before-packages'} = 0; | ||||
2487 | } | ||||
2488 | if ( $rOpts->{'blank-lines-before-packages'} > 100 ) { | ||||
2489 | Warn "unreasonably large value of -blbp, reducing\n"; | ||||
2490 | $rOpts->{'blank-lines-before-packages'} = 100; | ||||
2491 | } | ||||
2492 | } | ||||
2493 | |||||
2494 | # setting a non-negative logfile gap causes logfile to be saved | ||||
2495 | if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { | ||||
2496 | $rOpts->{'logfile'} = 1; | ||||
2497 | } | ||||
2498 | |||||
2499 | # set short-cut flag when only indentation is to be done. | ||||
2500 | # Note that the user may or may not have already set the | ||||
2501 | # indent-only flag. | ||||
2502 | if ( !$rOpts->{'add-whitespace'} | ||||
2503 | && !$rOpts->{'delete-old-whitespace'} | ||||
2504 | && !$rOpts->{'add-newlines'} | ||||
2505 | && !$rOpts->{'delete-old-newlines'} ) | ||||
2506 | { | ||||
2507 | $rOpts->{'indent-only'} = 1; | ||||
2508 | } | ||||
2509 | |||||
2510 | # -isbc implies -ibc | ||||
2511 | if ( $rOpts->{'indent-spaced-block-comments'} ) { | ||||
2512 | $rOpts->{'indent-block-comments'} = 1; | ||||
2513 | } | ||||
2514 | |||||
2515 | # -bli flag implies -bl | ||||
2516 | if ( $rOpts->{'brace-left-and-indent'} ) { | ||||
2517 | $rOpts->{'opening-brace-on-new-line'} = 1; | ||||
2518 | } | ||||
2519 | |||||
2520 | if ( $rOpts->{'opening-brace-always-on-right'} | ||||
2521 | && $rOpts->{'opening-brace-on-new-line'} ) | ||||
2522 | { | ||||
2523 | Warn <<EOM; | ||||
2524 | Conflict: you specified both 'opening-brace-always-on-right' (-bar) and | ||||
2525 | 'opening-brace-on-new-line' (-bl). Ignoring -bl. | ||||
2526 | EOM | ||||
2527 | $rOpts->{'opening-brace-on-new-line'} = 0; | ||||
2528 | } | ||||
2529 | |||||
2530 | # it simplifies things if -bl is 0 rather than undefined | ||||
2531 | if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { | ||||
2532 | $rOpts->{'opening-brace-on-new-line'} = 0; | ||||
2533 | } | ||||
2534 | |||||
2535 | # -sbl defaults to -bl if not defined | ||||
2536 | if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { | ||||
2537 | $rOpts->{'opening-sub-brace-on-new-line'} = | ||||
2538 | $rOpts->{'opening-brace-on-new-line'}; | ||||
2539 | } | ||||
2540 | |||||
2541 | if ( $rOpts->{'entab-leading-whitespace'} ) { | ||||
2542 | if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { | ||||
2543 | Warn "-et=n must use a positive integer; ignoring -et\n"; | ||||
2544 | $rOpts->{'entab-leading-whitespace'} = undef; | ||||
2545 | } | ||||
2546 | |||||
2547 | # entab leading whitespace has priority over the older 'tabs' option | ||||
2548 | if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } | ||||
2549 | } | ||||
2550 | |||||
2551 | # set a default tabsize to be used in guessing the starting indentation | ||||
2552 | # level if and only if this run does not use tabs and the old code does | ||||
2553 | # use tabs | ||||
2554 | if ( $rOpts->{'default-tabsize'} ) { | ||||
2555 | if ( $rOpts->{'default-tabsize'} < 0 ) { | ||||
2556 | Warn "negative value of -dt, setting 0\n"; | ||||
2557 | $rOpts->{'default-tabsize'} = 0; | ||||
2558 | } | ||||
2559 | if ( $rOpts->{'default-tabsize'} > 20 ) { | ||||
2560 | Warn "unreasonably large value of -dt, reducing\n"; | ||||
2561 | $rOpts->{'default-tabsize'} = 20; | ||||
2562 | } | ||||
2563 | } | ||||
2564 | else { | ||||
2565 | $rOpts->{'default-tabsize'} = 8; | ||||
2566 | } | ||||
2567 | |||||
2568 | # Define $tabsize, the number of spaces per tab for use in | ||||
2569 | # guessing the indentation of source lines with leading tabs. | ||||
2570 | # Assume same as for this run if tabs are used , otherwise assume | ||||
2571 | # a default value, typically 8 | ||||
2572 | my $tabsize = | ||||
2573 | $rOpts->{'entab-leading-whitespace'} | ||||
2574 | ? $rOpts->{'entab-leading-whitespace'} | ||||
2575 | : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} | ||||
2576 | : $rOpts->{'default-tabsize'}; | ||||
2577 | return $tabsize; | ||||
2578 | } | ||||
2579 | |||||
2580 | sub find_file_upwards { | ||||
2581 | my ( $search_dir, $search_file ) = @_; | ||||
2582 | |||||
2583 | $search_dir =~ s{/+$}{}; | ||||
2584 | $search_file =~ s{^/+}{}; | ||||
2585 | |||||
2586 | while (1) { | ||||
2587 | my $try_path = "$search_dir/$search_file"; | ||||
2588 | if ( -f $try_path ) { | ||||
2589 | return $try_path; | ||||
2590 | } | ||||
2591 | elsif ( $search_dir eq '/' ) { | ||||
2592 | return undef; | ||||
2593 | } | ||||
2594 | else { | ||||
2595 | $search_dir = dirname($search_dir); | ||||
2596 | } | ||||
2597 | } | ||||
2598 | } | ||||
2599 | |||||
2600 | sub expand_command_abbreviations { | ||||
2601 | |||||
2602 | # go through @ARGV and expand any abbreviations | ||||
2603 | |||||
2604 | my ( $rexpansion, $rraw_options, $config_file ) = @_; | ||||
2605 | my ($word); | ||||
2606 | |||||
2607 | # set a pass limit to prevent an infinite loop; | ||||
2608 | # 10 should be plenty, but it may be increased to allow deeply | ||||
2609 | # nested expansions. | ||||
2610 | my $max_passes = 10; | ||||
2611 | my @new_argv = (); | ||||
2612 | |||||
2613 | # keep looping until all expansions have been converted into actual | ||||
2614 | # dash parameters.. | ||||
2615 | for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { | ||||
2616 | my @new_argv = (); | ||||
2617 | my $abbrev_count = 0; | ||||
2618 | |||||
2619 | # loop over each item in @ARGV.. | ||||
2620 | foreach $word (@ARGV) { | ||||
2621 | |||||
2622 | # convert any leading 'no-' to just 'no' | ||||
2623 | if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } | ||||
2624 | |||||
2625 | # if it is a dash flag (instead of a file name).. | ||||
2626 | if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { | ||||
2627 | |||||
2628 | my $abr = $1; | ||||
2629 | my $flags = $2; | ||||
2630 | |||||
2631 | # save the raw input for debug output in case of circular refs | ||||
2632 | if ( $pass_count == 0 ) { | ||||
2633 | push( @$rraw_options, $word ); | ||||
2634 | } | ||||
2635 | |||||
2636 | # recombine abbreviation and flag, if necessary, | ||||
2637 | # to allow abbreviations with arguments such as '-vt=1' | ||||
2638 | if ( $rexpansion->{ $abr . $flags } ) { | ||||
2639 | $abr = $abr . $flags; | ||||
2640 | $flags = ""; | ||||
2641 | } | ||||
2642 | |||||
2643 | # if we see this dash item in the expansion hash.. | ||||
2644 | if ( $rexpansion->{$abr} ) { | ||||
2645 | $abbrev_count++; | ||||
2646 | |||||
2647 | # stuff all of the words that it expands to into the | ||||
2648 | # new arg list for the next pass | ||||
2649 | foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { | ||||
2650 | next unless $abbrev; # for safety; shouldn't happen | ||||
2651 | push( @new_argv, '--' . $abbrev . $flags ); | ||||
2652 | } | ||||
2653 | } | ||||
2654 | |||||
2655 | # not in expansion hash, must be actual long name | ||||
2656 | else { | ||||
2657 | push( @new_argv, $word ); | ||||
2658 | } | ||||
2659 | } | ||||
2660 | |||||
2661 | # not a dash item, so just save it for the next pass | ||||
2662 | else { | ||||
2663 | push( @new_argv, $word ); | ||||
2664 | } | ||||
2665 | } # end of this pass | ||||
2666 | |||||
2667 | # update parameter list @ARGV to the new one | ||||
2668 | @ARGV = @new_argv; | ||||
2669 | last unless ( $abbrev_count > 0 ); | ||||
2670 | |||||
2671 | # make sure we are not in an infinite loop | ||||
2672 | if ( $pass_count == $max_passes ) { | ||||
2673 | local $" = ')('; | ||||
2674 | Warn <<EOM; | ||||
2675 | I'm tired. We seem to be in an infinite loop trying to expand aliases. | ||||
2676 | Here are the raw options; | ||||
2677 | (rraw_options) | ||||
2678 | EOM | ||||
2679 | my $num = @new_argv; | ||||
2680 | if ( $num < 50 ) { | ||||
2681 | Warn <<EOM; | ||||
2682 | After $max_passes passes here is ARGV | ||||
2683 | (@new_argv) | ||||
2684 | EOM | ||||
2685 | } | ||||
2686 | else { | ||||
2687 | Warn <<EOM; | ||||
2688 | After $max_passes passes ARGV has $num entries | ||||
2689 | EOM | ||||
2690 | } | ||||
2691 | |||||
2692 | if ($config_file) { | ||||
2693 | Die <<"DIE"; | ||||
2694 | Please check your configuration file $config_file for circular-references. | ||||
2695 | To deactivate it, use -npro. | ||||
2696 | DIE | ||||
2697 | } | ||||
2698 | else { | ||||
2699 | Die <<'DIE'; | ||||
2700 | Program bug - circular-references in the %expansion hash, probably due to | ||||
2701 | a recent program change. | ||||
2702 | DIE | ||||
2703 | } | ||||
2704 | } # end of check for circular references | ||||
2705 | } # end of loop over all passes | ||||
2706 | } | ||||
2707 | |||||
2708 | # Debug routine -- this will dump the expansion hash | ||||
2709 | sub dump_short_names { | ||||
2710 | my $rexpansion = shift; | ||||
2711 | print STDOUT <<EOM; | ||||
2712 | List of short names. This list shows how all abbreviations are | ||||
2713 | translated into other abbreviations and, eventually, into long names. | ||||
2714 | New abbreviations may be defined in a .perltidyrc file. | ||||
2715 | For a list of all long names, use perltidy --dump-long-names (-dln). | ||||
2716 | -------------------------------------------------------------------------- | ||||
2717 | EOM | ||||
2718 | foreach my $abbrev ( sort keys %$rexpansion ) { | ||||
2719 | my @list = @{ $$rexpansion{$abbrev} }; | ||||
2720 | print STDOUT "$abbrev --> @list\n"; | ||||
2721 | } | ||||
2722 | } | ||||
2723 | |||||
2724 | sub check_vms_filename { | ||||
2725 | |||||
2726 | # given a valid filename (the perltidy input file) | ||||
2727 | # create a modified filename and separator character | ||||
2728 | # suitable for VMS. | ||||
2729 | # | ||||
2730 | # Contributed by Michael Cartmell | ||||
2731 | # | ||||
2732 | my ( $base, $path ) = fileparse( $_[0] ); | ||||
2733 | |||||
2734 | # remove explicit ; version | ||||
2735 | $base =~ s/;-?\d*$// | ||||
2736 | |||||
2737 | # remove explicit . version ie two dots in filename NB ^ escapes a dot | ||||
2738 | or $base =~ s/( # begin capture $1 | ||||
2739 | (?:^|[^^])\. # match a dot not preceded by a caret | ||||
2740 | (?: # followed by nothing | ||||
2741 | | # or | ||||
2742 | .*[^^] # anything ending in a non caret | ||||
2743 | ) | ||||
2744 | ) # end capture $1 | ||||
2745 | \.-?\d*$ # match . version number | ||||
2746 | /$1/x; | ||||
2747 | |||||
2748 | # normalise filename, if there are no unescaped dots then append one | ||||
2749 | $base .= '.' unless $base =~ /(?:^|[^^])\./; | ||||
2750 | |||||
2751 | # if we don't already have an extension then we just append the extension | ||||
2752 | my $separator = ( $base =~ /\.$/ ) ? "" : "_"; | ||||
2753 | return ( $path . $base, $separator ); | ||||
2754 | } | ||||
2755 | |||||
2756 | sub Win_OS_Type { | ||||
2757 | |||||
2758 | # TODO: are these more standard names? | ||||
2759 | # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 | ||||
2760 | |||||
2761 | # Returns a string that determines what MS OS we are on. | ||||
2762 | # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 | ||||
2763 | # Returns blank string if not an MS system. | ||||
2764 | # Original code contributed by: Yves Orton | ||||
2765 | # We need to know this to decide where to look for config files | ||||
2766 | |||||
2767 | my $rpending_complaint = shift; | ||||
2768 | my $os = ""; | ||||
2769 | return $os unless $^O =~ /win32|dos/i; # is it a MS box? | ||||
2770 | |||||
2771 | # Systems built from Perl source may not have Win32.pm | ||||
2772 | # But probably have Win32::GetOSVersion() anyway so the | ||||
2773 | # following line is not 'required': | ||||
2774 | # return $os unless eval('require Win32'); | ||||
2775 | |||||
2776 | # Use the standard API call to determine the version | ||||
2777 | my ( $undef, $major, $minor, $build, $id ); | ||||
2778 | eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; | ||||
2779 | |||||
2780 | # | ||||
2781 | # NAME ID MAJOR MINOR | ||||
2782 | # Windows NT 4 2 4 0 | ||||
2783 | # Windows 2000 2 5 0 | ||||
2784 | # Windows XP 2 5 1 | ||||
2785 | # Windows Server 2003 2 5 2 | ||||
2786 | |||||
2787 | return "win32s" unless $id; # If id==0 then its a win32s box. | ||||
2788 | $os = { # Magic numbers from MSDN | ||||
2789 | # documentation of GetOSVersion | ||||
2790 | 1 => { | ||||
2791 | 0 => "95", | ||||
2792 | 10 => "98", | ||||
2793 | 90 => "Me" | ||||
2794 | }, | ||||
2795 | 2 => { | ||||
2796 | 0 => "2000", # or NT 4, see below | ||||
2797 | 1 => "XP/.Net", | ||||
2798 | 2 => "Win2003", | ||||
2799 | 51 => "NT3.51" | ||||
2800 | } | ||||
2801 | }->{$id}->{$minor}; | ||||
2802 | |||||
2803 | # If $os is undefined, the above code is out of date. Suggested updates | ||||
2804 | # are welcome. | ||||
2805 | unless ( defined $os ) { | ||||
2806 | $os = ""; | ||||
2807 | $$rpending_complaint .= <<EOS; | ||||
2808 | Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record! | ||||
2809 | We won't be able to look for a system-wide config file. | ||||
2810 | EOS | ||||
2811 | } | ||||
2812 | |||||
2813 | # Unfortunately the logic used for the various versions isn't so clever.. | ||||
2814 | # so we have to handle an outside case. | ||||
2815 | return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; | ||||
2816 | } | ||||
2817 | |||||
2818 | sub is_unix { | ||||
2819 | return | ||||
2820 | ( $^O !~ /win32|dos/i ) | ||||
2821 | && ( $^O ne 'VMS' ) | ||||
2822 | && ( $^O ne 'OS2' ) | ||||
2823 | && ( $^O ne 'MacOS' ); | ||||
2824 | } | ||||
2825 | |||||
2826 | sub look_for_Windows { | ||||
2827 | |||||
2828 | # determine Windows sub-type and location of | ||||
2829 | # system-wide configuration files | ||||
2830 | my $rpending_complaint = shift; | ||||
2831 | my $is_Windows = ( $^O =~ /win32|dos/i ); | ||||
2832 | my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows; | ||||
2833 | return ( $is_Windows, $Windows_type ); | ||||
2834 | } | ||||
2835 | |||||
2836 | sub find_config_file { | ||||
2837 | |||||
2838 | # look for a .perltidyrc configuration file | ||||
2839 | # For Windows also look for a file named perltidy.ini | ||||
2840 | my ( $is_Windows, $Windows_type, $rconfig_file_chatter, | ||||
2841 | $rpending_complaint ) = @_; | ||||
2842 | |||||
2843 | $$rconfig_file_chatter .= "# Config file search...system reported as:"; | ||||
2844 | if ($is_Windows) { | ||||
2845 | $$rconfig_file_chatter .= "Windows $Windows_type\n"; | ||||
2846 | } | ||||
2847 | else { | ||||
2848 | $$rconfig_file_chatter .= " $^O\n"; | ||||
2849 | } | ||||
2850 | |||||
2851 | # sub to check file existence and record all tests | ||||
2852 | my $exists_config_file = sub { | ||||
2853 | my $config_file = shift; | ||||
2854 | return 0 unless $config_file; | ||||
2855 | $$rconfig_file_chatter .= "# Testing: $config_file\n"; | ||||
2856 | return -f $config_file; | ||||
2857 | }; | ||||
2858 | |||||
2859 | my $config_file; | ||||
2860 | |||||
2861 | # look in current directory first | ||||
2862 | $config_file = ".perltidyrc"; | ||||
2863 | return $config_file if $exists_config_file->($config_file); | ||||
2864 | if ($is_Windows) { | ||||
2865 | $config_file = "perltidy.ini"; | ||||
2866 | return $config_file if $exists_config_file->($config_file); | ||||
2867 | } | ||||
2868 | |||||
2869 | # Default environment vars. | ||||
2870 | my @envs = qw(PERLTIDY HOME); | ||||
2871 | |||||
2872 | # Check the NT/2k/XP locations, first a local machine def, then a | ||||
2873 | # network def | ||||
2874 | push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; | ||||
2875 | |||||
2876 | # Now go through the environment ... | ||||
2877 | foreach my $var (@envs) { | ||||
2878 | $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; | ||||
2879 | if ( defined( $ENV{$var} ) ) { | ||||
2880 | $$rconfig_file_chatter .= " = $ENV{$var}\n"; | ||||
2881 | |||||
2882 | # test ENV{ PERLTIDY } as file: | ||||
2883 | if ( $var eq 'PERLTIDY' ) { | ||||
2884 | $config_file = "$ENV{$var}"; | ||||
2885 | return $config_file if $exists_config_file->($config_file); | ||||
2886 | } | ||||
2887 | |||||
2888 | # test ENV as directory: | ||||
2889 | $config_file = catfile( $ENV{$var}, ".perltidyrc" ); | ||||
2890 | return $config_file if $exists_config_file->($config_file); | ||||
2891 | |||||
2892 | if ($is_Windows) { | ||||
2893 | $config_file = catfile( $ENV{$var}, "perltidy.ini" ); | ||||
2894 | return $config_file if $exists_config_file->($config_file); | ||||
2895 | } | ||||
2896 | } | ||||
2897 | else { | ||||
2898 | $$rconfig_file_chatter .= "\n"; | ||||
2899 | } | ||||
2900 | } | ||||
2901 | |||||
2902 | # then look for a system-wide definition | ||||
2903 | # where to look varies with OS | ||||
2904 | if ($is_Windows) { | ||||
2905 | |||||
2906 | if ($Windows_type) { | ||||
2907 | my ( $os, $system, $allusers ) = | ||||
2908 | Win_Config_Locs( $rpending_complaint, $Windows_type ); | ||||
2909 | |||||
2910 | # Check All Users directory, if there is one. | ||||
2911 | # i.e. C:\Documents and Settings\User\perltidy.ini | ||||
2912 | if ($allusers) { | ||||
2913 | |||||
2914 | $config_file = catfile( $allusers, ".perltidyrc" ); | ||||
2915 | return $config_file if $exists_config_file->($config_file); | ||||
2916 | |||||
2917 | $config_file = catfile( $allusers, "perltidy.ini" ); | ||||
2918 | return $config_file if $exists_config_file->($config_file); | ||||
2919 | } | ||||
2920 | |||||
2921 | # Check system directory. | ||||
2922 | # retain old code in case someone has been able to create | ||||
2923 | # a file with a leading period. | ||||
2924 | $config_file = catfile( $system, ".perltidyrc" ); | ||||
2925 | return $config_file if $exists_config_file->($config_file); | ||||
2926 | |||||
2927 | $config_file = catfile( $system, "perltidy.ini" ); | ||||
2928 | return $config_file if $exists_config_file->($config_file); | ||||
2929 | } | ||||
2930 | } | ||||
2931 | |||||
2932 | # Place to add customization code for other systems | ||||
2933 | elsif ( $^O eq 'OS2' ) { | ||||
2934 | } | ||||
2935 | elsif ( $^O eq 'MacOS' ) { | ||||
2936 | } | ||||
2937 | elsif ( $^O eq 'VMS' ) { | ||||
2938 | } | ||||
2939 | |||||
2940 | # Assume some kind of Unix | ||||
2941 | else { | ||||
2942 | |||||
2943 | $config_file = "/usr/local/etc/perltidyrc"; | ||||
2944 | return $config_file if $exists_config_file->($config_file); | ||||
2945 | |||||
2946 | $config_file = "/etc/perltidyrc"; | ||||
2947 | return $config_file if $exists_config_file->($config_file); | ||||
2948 | } | ||||
2949 | |||||
2950 | # Couldn't find a config file | ||||
2951 | return; | ||||
2952 | } | ||||
2953 | |||||
2954 | sub Win_Config_Locs { | ||||
2955 | |||||
2956 | # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), | ||||
2957 | # or undef if its not a win32 OS. In list context returns OS, System | ||||
2958 | # Directory, and All Users Directory. All Users will be empty on a | ||||
2959 | # 9x/Me box. Contributed by: Yves Orton. | ||||
2960 | |||||
2961 | my $rpending_complaint = shift; | ||||
2962 | my $os = (@_) ? shift : Win_OS_Type(); | ||||
2963 | return unless $os; | ||||
2964 | |||||
2965 | my $system = ""; | ||||
2966 | my $allusers = ""; | ||||
2967 | |||||
2968 | if ( $os =~ /9[58]|Me/ ) { | ||||
2969 | $system = "C:/Windows"; | ||||
2970 | } | ||||
2971 | elsif ( $os =~ /NT|XP|200?/ ) { | ||||
2972 | $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; | ||||
2973 | $allusers = | ||||
2974 | ( $os =~ /NT/ ) | ||||
2975 | ? "C:/WinNT/profiles/All Users/" | ||||
2976 | : "C:/Documents and Settings/All Users/"; | ||||
2977 | } | ||||
2978 | else { | ||||
2979 | |||||
2980 | # This currently would only happen on a win32s computer. I don't have | ||||
2981 | # one to test, so I am unsure how to proceed. Suggestions welcome! | ||||
2982 | $$rpending_complaint .= | ||||
2983 | "I dont know a sensible place to look for config files on an $os system.\n"; | ||||
2984 | return; | ||||
2985 | } | ||||
2986 | return wantarray ? ( $os, $system, $allusers ) : $os; | ||||
2987 | } | ||||
2988 | |||||
2989 | sub dump_config_file { | ||||
2990 | my $fh = shift; | ||||
2991 | my $config_file = shift; | ||||
2992 | my $rconfig_file_chatter = shift; | ||||
2993 | print STDOUT "$$rconfig_file_chatter"; | ||||
2994 | if ($fh) { | ||||
2995 | print STDOUT "# Dump of file: '$config_file'\n"; | ||||
2996 | while ( my $line = $fh->getline() ) { print STDOUT $line } | ||||
2997 | eval { $fh->close() }; | ||||
2998 | } | ||||
2999 | else { | ||||
3000 | print STDOUT "# ...no config file found\n"; | ||||
3001 | } | ||||
3002 | } | ||||
3003 | |||||
3004 | sub read_config_file { | ||||
3005 | |||||
3006 | my ( $fh, $config_file, $rexpansion ) = @_; | ||||
3007 | my @config_list = (); | ||||
3008 | my $saw_pbp; | ||||
3009 | |||||
3010 | # file is bad if non-empty $death_message is returned | ||||
3011 | my $death_message = ""; | ||||
3012 | |||||
3013 | my $name = undef; | ||||
3014 | my $line_no; | ||||
3015 | while ( my $line = $fh->getline() ) { | ||||
3016 | $line_no++; | ||||
3017 | chomp $line; | ||||
3018 | ( $line, $death_message ) = | ||||
3019 | strip_comment( $line, $config_file, $line_no ); | ||||
3020 | last if ($death_message); | ||||
3021 | next unless $line; | ||||
3022 | $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends | ||||
3023 | next unless $line; | ||||
3024 | |||||
3025 | # look for something of the general form | ||||
3026 | # newname { body } | ||||
3027 | # or just | ||||
3028 | # body | ||||
3029 | |||||
3030 | my $body = $line; | ||||
3031 | my ($newname); | ||||
3032 | if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) { | ||||
3033 | ( $newname, $body ) = ( $2, $3, ); | ||||
3034 | } | ||||
3035 | if ($body) { | ||||
3036 | |||||
3037 | if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) { | ||||
3038 | $saw_pbp = 1; | ||||
3039 | } | ||||
3040 | |||||
3041 | # handle a new alias definition | ||||
3042 | if ($newname) { | ||||
3043 | if ($name) { | ||||
3044 | $death_message = | ||||
3045 | "No '}' seen after $name and before $newname in config file $config_file line $.\n"; | ||||
3046 | last; | ||||
3047 | } | ||||
3048 | $name = $newname; | ||||
3049 | |||||
3050 | if ( ${$rexpansion}{$name} ) { | ||||
3051 | local $" = ')('; | ||||
3052 | my @names = sort keys %$rexpansion; | ||||
3053 | $death_message = | ||||
3054 | "Here is a list of all installed aliases\n(@names)\n" | ||||
3055 | . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; | ||||
3056 | last; | ||||
3057 | } | ||||
3058 | ${$rexpansion}{$name} = []; | ||||
3059 | } | ||||
3060 | |||||
3061 | # now do the body | ||||
3062 | if ($body) { | ||||
3063 | |||||
3064 | my ( $rbody_parts, $msg ) = parse_args($body); | ||||
3065 | if ($msg) { | ||||
3066 | $death_message = <<EOM; | ||||
3067 | Error reading file '$config_file' at line number $line_no. | ||||
3068 | $msg | ||||
3069 | Please fix this line or use -npro to avoid reading this file | ||||
3070 | EOM | ||||
3071 | last; | ||||
3072 | } | ||||
3073 | |||||
3074 | if ($name) { | ||||
3075 | |||||
3076 | # remove leading dashes if this is an alias | ||||
3077 | foreach (@$rbody_parts) { s/^\-+//; } | ||||
3078 | push @{ ${$rexpansion}{$name} }, @$rbody_parts; | ||||
3079 | } | ||||
3080 | else { | ||||
3081 | push( @config_list, @$rbody_parts ); | ||||
3082 | } | ||||
3083 | } | ||||
3084 | } | ||||
3085 | } | ||||
3086 | eval { $fh->close() }; | ||||
3087 | return ( \@config_list, $death_message, $saw_pbp ); | ||||
3088 | } | ||||
3089 | |||||
3090 | sub strip_comment { | ||||
3091 | |||||
3092 | # Strip any comment from a command line | ||||
3093 | my ( $instr, $config_file, $line_no ) = @_; | ||||
3094 | my $msg = ""; | ||||
3095 | |||||
3096 | # check for full-line comment | ||||
3097 | if ( $instr =~ /^\s*#/ ) { | ||||
3098 | return ( "", $msg ); | ||||
3099 | } | ||||
3100 | |||||
3101 | # nothing to do if no comments | ||||
3102 | if ( $instr !~ /#/ ) { | ||||
3103 | return ( $instr, $msg ); | ||||
3104 | } | ||||
3105 | |||||
3106 | # handle case of no quotes | ||||
3107 | elsif ( $instr !~ /['"]/ ) { | ||||
3108 | |||||
3109 | # We now require a space before the # of a side comment | ||||
3110 | # this allows something like: | ||||
3111 | # -sbcp=# | ||||
3112 | # Otherwise, it would have to be quoted: | ||||
3113 | # -sbcp='#' | ||||
3114 | $instr =~ s/\s+\#.*$//; | ||||
3115 | return ( $instr, $msg ); | ||||
3116 | } | ||||
3117 | |||||
3118 | # handle comments and quotes | ||||
3119 | my $outstr = ""; | ||||
3120 | my $quote_char = ""; | ||||
3121 | while (1) { | ||||
3122 | |||||
3123 | # looking for ending quote character | ||||
3124 | if ($quote_char) { | ||||
3125 | if ( $instr =~ /\G($quote_char)/gc ) { | ||||
3126 | $quote_char = ""; | ||||
3127 | $outstr .= $1; | ||||
3128 | } | ||||
3129 | elsif ( $instr =~ /\G(.)/gc ) { | ||||
3130 | $outstr .= $1; | ||||
3131 | } | ||||
3132 | |||||
3133 | # error..we reached the end without seeing the ending quote char | ||||
3134 | else { | ||||
3135 | $msg = <<EOM; | ||||
3136 | Error reading file $config_file at line number $line_no. | ||||
3137 | Did not see ending quote character <$quote_char> in this text: | ||||
3138 | $instr | ||||
3139 | Please fix this line or use -npro to avoid reading this file | ||||
3140 | EOM | ||||
3141 | last; | ||||
3142 | } | ||||
3143 | } | ||||
3144 | |||||
3145 | # accumulating characters and looking for start of a quoted string | ||||
3146 | else { | ||||
3147 | if ( $instr =~ /\G([\"\'])/gc ) { | ||||
3148 | $outstr .= $1; | ||||
3149 | $quote_char = $1; | ||||
3150 | } | ||||
3151 | |||||
3152 | # Note: not yet enforcing the space-before-hash rule for side | ||||
3153 | # comments if the parameter is quoted. | ||||
3154 | elsif ( $instr =~ /\G#/gc ) { | ||||
3155 | last; | ||||
3156 | } | ||||
3157 | elsif ( $instr =~ /\G(.)/gc ) { | ||||
3158 | $outstr .= $1; | ||||
3159 | } | ||||
3160 | else { | ||||
3161 | last; | ||||
3162 | } | ||||
3163 | } | ||||
3164 | } | ||||
3165 | return ( $outstr, $msg ); | ||||
3166 | } | ||||
3167 | |||||
3168 | sub parse_args { | ||||
3169 | |||||
3170 | # Parse a command string containing multiple string with possible | ||||
3171 | # quotes, into individual commands. It might look like this, for example: | ||||
3172 | # | ||||
3173 | # -wba=" + - " -some-thing -wbb='. && ||' | ||||
3174 | # | ||||
3175 | # There is no need, at present, to handle escaped quote characters. | ||||
3176 | # (They are not perltidy tokens, so needn't be in strings). | ||||
3177 | |||||
3178 | my ($body) = @_; | ||||
3179 | my @body_parts = (); | ||||
3180 | my $quote_char = ""; | ||||
3181 | my $part = ""; | ||||
3182 | my $msg = ""; | ||||
3183 | while (1) { | ||||
3184 | |||||
3185 | # looking for ending quote character | ||||
3186 | if ($quote_char) { | ||||
3187 | if ( $body =~ /\G($quote_char)/gc ) { | ||||
3188 | $quote_char = ""; | ||||
3189 | } | ||||
3190 | elsif ( $body =~ /\G(.)/gc ) { | ||||
3191 | $part .= $1; | ||||
3192 | } | ||||
3193 | |||||
3194 | # error..we reached the end without seeing the ending quote char | ||||
3195 | else { | ||||
3196 | if ( length($part) ) { push @body_parts, $part; } | ||||
3197 | $msg = <<EOM; | ||||
3198 | Did not see ending quote character <$quote_char> in this text: | ||||
3199 | $body | ||||
3200 | EOM | ||||
3201 | last; | ||||
3202 | } | ||||
3203 | } | ||||
3204 | |||||
3205 | # accumulating characters and looking for start of a quoted string | ||||
3206 | else { | ||||
3207 | if ( $body =~ /\G([\"\'])/gc ) { | ||||
3208 | $quote_char = $1; | ||||
3209 | } | ||||
3210 | elsif ( $body =~ /\G(\s+)/gc ) { | ||||
3211 | if ( length($part) ) { push @body_parts, $part; } | ||||
3212 | $part = ""; | ||||
3213 | } | ||||
3214 | elsif ( $body =~ /\G(.)/gc ) { | ||||
3215 | $part .= $1; | ||||
3216 | } | ||||
3217 | else { | ||||
3218 | if ( length($part) ) { push @body_parts, $part; } | ||||
3219 | last; | ||||
3220 | } | ||||
3221 | } | ||||
3222 | } | ||||
3223 | return ( \@body_parts, $msg ); | ||||
3224 | } | ||||
3225 | |||||
3226 | sub dump_long_names { | ||||
3227 | |||||
3228 | my @names = sort @_; | ||||
3229 | print STDOUT <<EOM; | ||||
3230 | # Command line long names (passed to GetOptions) | ||||
3231 | #--------------------------------------------------------------- | ||||
3232 | # here is a summary of the Getopt codes: | ||||
3233 | # <none> does not take an argument | ||||
3234 | # =s takes a mandatory string | ||||
3235 | # :s takes an optional string | ||||
3236 | # =i takes a mandatory integer | ||||
3237 | # :i takes an optional integer | ||||
3238 | # ! does not take an argument and may be negated | ||||
3239 | # i.e., -foo and -nofoo are allowed | ||||
3240 | # a double dash signals the end of the options list | ||||
3241 | # | ||||
3242 | #--------------------------------------------------------------- | ||||
3243 | EOM | ||||
3244 | |||||
3245 | foreach (@names) { print STDOUT "$_\n" } | ||||
3246 | } | ||||
3247 | |||||
3248 | sub dump_defaults { | ||||
3249 | my @defaults = sort @_; | ||||
3250 | print STDOUT "Default command line options:\n"; | ||||
3251 | foreach (@_) { print STDOUT "$_\n" } | ||||
3252 | } | ||||
3253 | |||||
3254 | sub readable_options { | ||||
3255 | |||||
3256 | # return options for this run as a string which could be | ||||
3257 | # put in a perltidyrc file | ||||
3258 | my ( $rOpts, $roption_string ) = @_; | ||||
3259 | my %Getopt_flags; | ||||
3260 | my $rGetopt_flags = \%Getopt_flags; | ||||
3261 | my $readable_options = "# Final parameter set for this run.\n"; | ||||
3262 | $readable_options .= | ||||
3263 | "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; | ||||
3264 | foreach my $opt ( @{$roption_string} ) { | ||||
3265 | my $flag = ""; | ||||
3266 | if ( $opt =~ /(.*)(!|=.*)$/ ) { | ||||
3267 | $opt = $1; | ||||
3268 | $flag = $2; | ||||
3269 | } | ||||
3270 | if ( defined( $rOpts->{$opt} ) ) { | ||||
3271 | $rGetopt_flags->{$opt} = $flag; | ||||
3272 | } | ||||
3273 | } | ||||
3274 | foreach my $key ( sort keys %{$rOpts} ) { | ||||
3275 | my $flag = $rGetopt_flags->{$key}; | ||||
3276 | my $value = $rOpts->{$key}; | ||||
3277 | my $prefix = '--'; | ||||
3278 | my $suffix = ""; | ||||
3279 | if ($flag) { | ||||
3280 | if ( $flag =~ /^=/ ) { | ||||
3281 | if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } | ||||
3282 | $suffix = "=" . $value; | ||||
3283 | } | ||||
3284 | elsif ( $flag =~ /^!/ ) { | ||||
3285 | $prefix .= "no" unless ($value); | ||||
3286 | } | ||||
3287 | else { | ||||
3288 | |||||
3289 | # shouldn't happen | ||||
3290 | $readable_options .= | ||||
3291 | "# ERROR in dump_options: unrecognized flag $flag for $key\n"; | ||||
3292 | } | ||||
3293 | } | ||||
3294 | $readable_options .= $prefix . $key . $suffix . "\n"; | ||||
3295 | } | ||||
3296 | return $readable_options; | ||||
3297 | } | ||||
3298 | |||||
3299 | sub show_version { | ||||
3300 | print STDOUT <<"EOM"; | ||||
3301 | This is perltidy, v$VERSION | ||||
3302 | |||||
3303 | Copyright 2000-2014, Steve Hancock | ||||
3304 | |||||
3305 | Perltidy is free software and may be copied under the terms of the GNU | ||||
3306 | General Public License, which is included in the distribution files. | ||||
3307 | |||||
3308 | Complete documentation for perltidy can be found using 'man perltidy' | ||||
3309 | or on the internet at http://perltidy.sourceforge.net. | ||||
3310 | EOM | ||||
3311 | } | ||||
3312 | |||||
3313 | sub usage { | ||||
3314 | |||||
3315 | print STDOUT <<EOF; | ||||
3316 | This is perltidy version $VERSION, a perl script indenter. Usage: | ||||
3317 | |||||
3318 | perltidy [ options ] file1 file2 file3 ... | ||||
3319 | (output goes to file1.tdy, file2.tdy, file3.tdy, ...) | ||||
3320 | perltidy [ options ] file1 -o outfile | ||||
3321 | perltidy [ options ] file1 -st >outfile | ||||
3322 | perltidy [ options ] <infile >outfile | ||||
3323 | |||||
3324 | Options have short and long forms. Short forms are shown; see | ||||
3325 | man pages for long forms. Note: '=s' indicates a required string, | ||||
3326 | and '=n' indicates a required integer. | ||||
3327 | |||||
3328 | I/O control | ||||
3329 | -h show this help | ||||
3330 | -o=file name of the output file (only if single input file) | ||||
3331 | -oext=s change output extension from 'tdy' to s | ||||
3332 | -opath=path change path to be 'path' for output files | ||||
3333 | -b backup original to .bak and modify file in-place | ||||
3334 | -bext=s change default backup extension from 'bak' to s | ||||
3335 | -q deactivate error messages (for running under editor) | ||||
3336 | -w include non-critical warning messages in the .ERR error output | ||||
3337 | -syn run perl -c to check syntax (default under unix systems) | ||||
3338 | -log save .LOG file, which has useful diagnostics | ||||
3339 | -f force perltidy to read a binary file | ||||
3340 | -g like -log but writes more detailed .LOG file, for debugging scripts | ||||
3341 | -opt write the set of options actually used to a .LOG file | ||||
3342 | -npro ignore .perltidyrc configuration command file | ||||
3343 | -pro=file read configuration commands from file instead of .perltidyrc | ||||
3344 | -st send output to standard output, STDOUT | ||||
3345 | -se send all error output to standard error output, STDERR | ||||
3346 | -v display version number to standard output and quit | ||||
3347 | |||||
3348 | Basic Options: | ||||
3349 | -i=n use n columns per indentation level (default n=4) | ||||
3350 | -t tabs: use one tab character per indentation level, not recommeded | ||||
3351 | -nt no tabs: use n spaces per indentation level (default) | ||||
3352 | -et=n entab leading whitespace n spaces per tab; not recommended | ||||
3353 | -io "indent only": just do indentation, no other formatting. | ||||
3354 | -sil=n set starting indentation level to n; use if auto detection fails | ||||
3355 | -ole=s specify output line ending (s=dos or win, mac, unix) | ||||
3356 | -ple keep output line endings same as input (input must be filename) | ||||
3357 | |||||
3358 | Whitespace Control | ||||
3359 | -fws freeze whitespace; this disables all whitespace changes | ||||
3360 | and disables the following switches: | ||||
3361 | -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) | ||||
3362 | -bbt same as -bt but for code block braces; same as -bt if not given | ||||
3363 | -bbvt block braces vertically tight; use with -bl or -bli | ||||
3364 | -bbvtl=s make -bbvt to apply to selected list of block types | ||||
3365 | -pt=n paren tightness (n=0, 1 or 2) | ||||
3366 | -sbt=n square bracket tightness (n=0, 1, or 2) | ||||
3367 | -bvt=n brace vertical tightness, | ||||
3368 | n=(0=open, 1=close unless multiple steps on a line, 2=always close) | ||||
3369 | -pvt=n paren vertical tightness (see -bvt for n) | ||||
3370 | -sbvt=n square bracket vertical tightness (see -bvt for n) | ||||
3371 | -bvtc=n closing brace vertical tightness: | ||||
3372 | n=(0=open, 1=sometimes close, 2=always close) | ||||
3373 | -pvtc=n closing paren vertical tightness, see -bvtc for n. | ||||
3374 | -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. | ||||
3375 | -ci=n sets continuation indentation=n, default is n=2 spaces | ||||
3376 | -lp line up parentheses, brackets, and non-BLOCK braces | ||||
3377 | -sfs add space before semicolon in for( ; ; ) | ||||
3378 | -aws allow perltidy to add whitespace (default) | ||||
3379 | -dws delete all old non-essential whitespace | ||||
3380 | -icb indent closing brace of a code block | ||||
3381 | -cti=n closing indentation of paren, square bracket, or non-block brace: | ||||
3382 | n=0 none, =1 align with opening, =2 one full indentation level | ||||
3383 | -icp equivalent to -cti=2 | ||||
3384 | -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' | ||||
3385 | -wrs=s want space right of tokens in string; | ||||
3386 | -sts put space before terminal semicolon of a statement | ||||
3387 | -sak=s put space between keywords given in s and '('; | ||||
3388 | -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' | ||||
3389 | |||||
3390 | Line Break Control | ||||
3391 | -fnl freeze newlines; this disables all line break changes | ||||
3392 | and disables the following switches: | ||||
3393 | -anl add newlines; ok to introduce new line breaks | ||||
3394 | -bbs add blank line before subs and packages | ||||
3395 | -bbc add blank line before block comments | ||||
3396 | -bbb add blank line between major blocks | ||||
3397 | -kbl=n keep old blank lines? 0=no, 1=some, 2=all | ||||
3398 | -mbl=n maximum consecutive blank lines to output (default=1) | ||||
3399 | -ce cuddled else; use this style: '} else {' | ||||
3400 | -dnl delete old newlines (default) | ||||
3401 | -l=n maximum line length; default n=80 | ||||
3402 | -bl opening brace on new line | ||||
3403 | -sbl opening sub brace on new line. value of -bl is used if not given. | ||||
3404 | -bli opening brace on new line and indented | ||||
3405 | -bar opening brace always on right, even for long clauses | ||||
3406 | -vt=n vertical tightness (requires -lp); n controls break after opening | ||||
3407 | token: 0=never 1=no break if next line balanced 2=no break | ||||
3408 | -vtc=n vertical tightness of closing container; n controls if closing | ||||
3409 | token starts new line: 0=always 1=not unless list 1=never | ||||
3410 | -wba=s want break after tokens in string; i.e. wba=': .' | ||||
3411 | -wbb=s want break before tokens in string | ||||
3412 | |||||
3413 | Following Old Breakpoints | ||||
3414 | -kis keep interior semicolons. Allows multiple statements per line. | ||||
3415 | -boc break at old comma breaks: turns off all automatic list formatting | ||||
3416 | -bol break at old logical breakpoints: or, and, ||, && (default) | ||||
3417 | -bok break at old list keyword breakpoints such as map, sort (default) | ||||
3418 | -bot break at old conditional (ternary ?:) operator breakpoints (default) | ||||
3419 | -boa break at old attribute breakpoints | ||||
3420 | -cab=n break at commas after a comma-arrow (=>): | ||||
3421 | n=0 break at all commas after => | ||||
3422 | n=1 stable: break unless this breaks an existing one-line container | ||||
3423 | n=2 break only if a one-line container cannot be formed | ||||
3424 | n=3 do not treat commas after => specially at all | ||||
3425 | |||||
3426 | Comment controls | ||||
3427 | -ibc indent block comments (default) | ||||
3428 | -isbc indent spaced block comments; may indent unless no leading space | ||||
3429 | -msc=n minimum desired spaces to side comment, default 4 | ||||
3430 | -fpsc=n fix position for side comments; default 0; | ||||
3431 | -csc add or update closing side comments after closing BLOCK brace | ||||
3432 | -dcsc delete closing side comments created by a -csc command | ||||
3433 | -cscp=s change closing side comment prefix to be other than '## end' | ||||
3434 | -cscl=s change closing side comment to apply to selected list of blocks | ||||
3435 | -csci=n minimum number of lines needed to apply a -csc tag, default n=6 | ||||
3436 | -csct=n maximum number of columns of appended text, default n=20 | ||||
3437 | -cscw causes warning if old side comment is overwritten with -csc | ||||
3438 | |||||
3439 | -sbc use 'static block comments' identified by leading '##' (default) | ||||
3440 | -sbcp=s change static block comment identifier to be other than '##' | ||||
3441 | -osbc outdent static block comments | ||||
3442 | |||||
3443 | -ssc use 'static side comments' identified by leading '##' (default) | ||||
3444 | -sscp=s change static side comment identifier to be other than '##' | ||||
3445 | |||||
3446 | Delete selected text | ||||
3447 | -dac delete all comments AND pod | ||||
3448 | -dbc delete block comments | ||||
3449 | -dsc delete side comments | ||||
3450 | -dp delete pod | ||||
3451 | |||||
3452 | Send selected text to a '.TEE' file | ||||
3453 | -tac tee all comments AND pod | ||||
3454 | -tbc tee block comments | ||||
3455 | -tsc tee side comments | ||||
3456 | -tp tee pod | ||||
3457 | |||||
3458 | Outdenting | ||||
3459 | -olq outdent long quoted strings (default) | ||||
3460 | -olc outdent a long block comment line | ||||
3461 | -ola outdent statement labels | ||||
3462 | -okw outdent control keywords (redo, next, last, goto, return) | ||||
3463 | -okwl=s specify alternative keywords for -okw command | ||||
3464 | |||||
3465 | Other controls | ||||
3466 | -mft=n maximum fields per table; default n=40 | ||||
3467 | -x do not format lines before hash-bang line (i.e., for VMS) | ||||
3468 | -asc allows perltidy to add a ';' when missing (default) | ||||
3469 | -dsm allows perltidy to delete an unnecessary ';' (default) | ||||
3470 | |||||
3471 | Combinations of other parameters | ||||
3472 | -gnu attempt to follow GNU Coding Standards as applied to perl | ||||
3473 | -mangle remove as many newlines as possible (but keep comments and pods) | ||||
3474 | -extrude insert as many newlines as possible | ||||
3475 | |||||
3476 | Dump and die, debugging | ||||
3477 | -dop dump options used in this run to standard output and quit | ||||
3478 | -ddf dump default options to standard output and quit | ||||
3479 | -dsn dump all option short names to standard output and quit | ||||
3480 | -dln dump option long names to standard output and quit | ||||
3481 | -dpro dump whatever configuration file is in effect to standard output | ||||
3482 | -dtt dump all token types to standard output and quit | ||||
3483 | |||||
3484 | HTML | ||||
3485 | -html write an html file (see 'man perl2web' for many options) | ||||
3486 | Note: when -html is used, no indentation or formatting are done. | ||||
3487 | Hint: try perltidy -html -css=mystyle.css filename.pl | ||||
3488 | and edit mystyle.css to change the appearance of filename.html. | ||||
3489 | -nnn gives line numbers | ||||
3490 | -pre only writes out <pre>..</pre> code section | ||||
3491 | -toc places a table of contents to subs at the top (default) | ||||
3492 | -pod passes pod text through pod2html (default) | ||||
3493 | -frm write html as a frame (3 files) | ||||
3494 | -text=s extra extension for table of contents if -frm, default='toc' | ||||
3495 | -sext=s extra extension for file content if -frm, default='src' | ||||
3496 | |||||
3497 | A prefix of "n" negates short form toggle switches, and a prefix of "no" | ||||
3498 | negates the long forms. For example, -nasc means don't add missing | ||||
3499 | semicolons. | ||||
3500 | |||||
3501 | If you are unable to see this entire text, try "perltidy -h | more" | ||||
3502 | For more detailed information, and additional options, try "man perltidy", | ||||
3503 | or go to the perltidy home page at http://perltidy.sourceforge.net | ||||
3504 | EOF | ||||
3505 | |||||
3506 | } | ||||
3507 | |||||
3508 | sub process_this_file { | ||||
3509 | |||||
3510 | my ( $truth, $beauty ) = @_; | ||||
3511 | |||||
3512 | # loop to process each line of this file | ||||
3513 | while ( my $line_of_tokens = $truth->get_line() ) { | ||||
3514 | $beauty->write_line($line_of_tokens); | ||||
3515 | } | ||||
3516 | |||||
3517 | # finish up | ||||
3518 | eval { $beauty->finish_formatting() }; | ||||
3519 | $truth->report_tokenization_errors(); | ||||
3520 | } | ||||
3521 | |||||
3522 | sub check_syntax { | ||||
3523 | |||||
3524 | # Use 'perl -c' to make sure that we did not create bad syntax | ||||
3525 | # This is a very good independent check for programming errors | ||||
3526 | # | ||||
3527 | # Given names of the input and output files, ($istream, $ostream), | ||||
3528 | # we do the following: | ||||
3529 | # - check syntax of the input file | ||||
3530 | # - if bad, all done (could be an incomplete code snippet) | ||||
3531 | # - if infile syntax ok, then check syntax of the output file; | ||||
3532 | # - if outfile syntax bad, issue warning; this implies a code bug! | ||||
3533 | # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good | ||||
3534 | |||||
3535 | my ( $istream, $ostream, $logger_object, $rOpts ) = @_; | ||||
3536 | my $infile_syntax_ok = 0; | ||||
3537 | my $line_of_dashes = '-' x 42 . "\n"; | ||||
3538 | |||||
3539 | my $flags = $rOpts->{'perl-syntax-check-flags'}; | ||||
3540 | |||||
3541 | # be sure we invoke perl with -c | ||||
3542 | # note: perl will accept repeated flags like '-c -c'. It is safest | ||||
3543 | # to append another -c than try to find an interior bundled c, as | ||||
3544 | # in -Tc, because such a 'c' might be in a quoted string, for example. | ||||
3545 | if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } | ||||
3546 | |||||
3547 | # be sure we invoke perl with -x if requested | ||||
3548 | # same comments about repeated parameters applies | ||||
3549 | if ( $rOpts->{'look-for-hash-bang'} ) { | ||||
3550 | if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } | ||||
3551 | } | ||||
3552 | |||||
3553 | # this shouldn't happen unless a temporary file couldn't be made | ||||
3554 | if ( $istream eq '-' ) { | ||||
3555 | $logger_object->write_logfile_entry( | ||||
3556 | "Cannot run perl -c on STDIN and STDOUT\n"); | ||||
3557 | return $infile_syntax_ok; | ||||
3558 | } | ||||
3559 | |||||
3560 | $logger_object->write_logfile_entry( | ||||
3561 | "checking input file syntax with perl $flags\n"); | ||||
3562 | |||||
3563 | # Not all operating systems/shells support redirection of the standard | ||||
3564 | # error output. | ||||
3565 | my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; | ||||
3566 | |||||
3567 | my ( $istream_filename, $perl_output ) = | ||||
3568 | do_syntax_check( $istream, $flags, $error_redirection ); | ||||
3569 | $logger_object->write_logfile_entry( | ||||
3570 | "Input stream passed to Perl as file $istream_filename\n"); | ||||
3571 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
3572 | $logger_object->write_logfile_entry("$perl_output\n"); | ||||
3573 | |||||
3574 | if ( $perl_output =~ /syntax\s*OK/ ) { | ||||
3575 | $infile_syntax_ok = 1; | ||||
3576 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
3577 | $logger_object->write_logfile_entry( | ||||
3578 | "checking output file syntax with perl $flags ...\n"); | ||||
3579 | my ( $ostream_filename, $perl_output ) = | ||||
3580 | do_syntax_check( $ostream, $flags, $error_redirection ); | ||||
3581 | $logger_object->write_logfile_entry( | ||||
3582 | "Output stream passed to Perl as file $ostream_filename\n"); | ||||
3583 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
3584 | $logger_object->write_logfile_entry("$perl_output\n"); | ||||
3585 | |||||
3586 | unless ( $perl_output =~ /syntax\s*OK/ ) { | ||||
3587 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
3588 | $logger_object->warning( | ||||
3589 | "The output file has a syntax error when tested with perl $flags $ostream !\n" | ||||
3590 | ); | ||||
3591 | $logger_object->warning( | ||||
3592 | "This implies an error in perltidy; the file $ostream is bad\n" | ||||
3593 | ); | ||||
3594 | $logger_object->report_definite_bug(); | ||||
3595 | |||||
3596 | # the perl version number will be helpful for diagnosing the problem | ||||
3597 | $logger_object->write_logfile_entry( | ||||
3598 | qx/perl -v $error_redirection/ . "\n" ); | ||||
3599 | } | ||||
3600 | } | ||||
3601 | else { | ||||
3602 | |||||
3603 | # Only warn of perl -c syntax errors. Other messages, | ||||
3604 | # such as missing modules, are too common. They can be | ||||
3605 | # seen by running with perltidy -w | ||||
3606 | $logger_object->complain("A syntax check using perl $flags\n"); | ||||
3607 | $logger_object->complain( | ||||
3608 | "for the output in file $istream_filename gives:\n"); | ||||
3609 | $logger_object->complain($line_of_dashes); | ||||
3610 | $logger_object->complain("$perl_output\n"); | ||||
3611 | $logger_object->complain($line_of_dashes); | ||||
3612 | $infile_syntax_ok = -1; | ||||
3613 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
3614 | $logger_object->write_logfile_entry( | ||||
3615 | "The output file will not be checked because of input file problems\n" | ||||
3616 | ); | ||||
3617 | } | ||||
3618 | return $infile_syntax_ok; | ||||
3619 | } | ||||
3620 | |||||
3621 | sub do_syntax_check { | ||||
3622 | my ( $stream, $flags, $error_redirection ) = @_; | ||||
3623 | |||||
3624 | # We need a named input file for executing perl | ||||
3625 | my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream); | ||||
3626 | |||||
3627 | # TODO: Need to add name of file to log somewhere | ||||
3628 | # otherwise Perl output is hard to read | ||||
3629 | if ( !$stream_filename ) { return $stream_filename, "" } | ||||
3630 | |||||
3631 | # We have to quote the filename in case it has unusual characters | ||||
3632 | # or spaces. Example: this filename #CM11.pm# gives trouble. | ||||
3633 | my $quoted_stream_filename = '"' . $stream_filename . '"'; | ||||
3634 | |||||
3635 | # Under VMS something like -T will become -t (and an error) so we | ||||
3636 | # will put quotes around the flags. Double quotes seem to work on | ||||
3637 | # Unix/Windows/VMS, but this may not work on all systems. (Single | ||||
3638 | # quotes do not work under Windows). It could become necessary to | ||||
3639 | # put double quotes around each flag, such as: -"c" -"T" | ||||
3640 | # We may eventually need some system-dependent coding here. | ||||
3641 | $flags = '"' . $flags . '"'; | ||||
3642 | |||||
3643 | # now wish for luck... | ||||
3644 | my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; | ||||
3645 | |||||
3646 | unlink $stream_filename if ($is_tmpfile); | ||||
3647 | return $stream_filename, $msg; | ||||
3648 | } | ||||
3649 | |||||
3650 | ##################################################################### | ||||
3651 | # | ||||
3652 | # This is a stripped down version of IO::Scalar | ||||
3653 | # Given a reference to a scalar, it supplies either: | ||||
3654 | # a getline method which reads lines (mode='r'), or | ||||
3655 | # a print method which reads lines (mode='w') | ||||
3656 | # | ||||
3657 | ##################################################################### | ||||
3658 | package Perl::Tidy::IOScalar; | ||||
3659 | 2 | 285µs | 2 | 85µs | # spent 48µs (10+37) within Perl::Tidy::IOScalar::BEGIN@3659 which was called:
# once (10µs+37µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 3659 # spent 48µs making 1 call to Perl::Tidy::IOScalar::BEGIN@3659
# spent 37µs making 1 call to Exporter::import |
3660 | |||||
3661 | sub new { | ||||
3662 | my ( $package, $rscalar, $mode ) = @_; | ||||
3663 | my $ref = ref $rscalar; | ||||
3664 | if ( $ref ne 'SCALAR' ) { | ||||
3665 | confess <<EOM; | ||||
3666 | ------------------------------------------------------------------------ | ||||
3667 | expecting ref to SCALAR but got ref to ($ref); trace follows: | ||||
3668 | ------------------------------------------------------------------------ | ||||
3669 | EOM | ||||
3670 | |||||
3671 | } | ||||
3672 | if ( $mode eq 'w' ) { | ||||
3673 | $$rscalar = ""; | ||||
3674 | return bless [ $rscalar, $mode ], $package; | ||||
3675 | } | ||||
3676 | elsif ( $mode eq 'r' ) { | ||||
3677 | |||||
3678 | # Convert a scalar to an array. | ||||
3679 | # This avoids looking for "\n" on each call to getline | ||||
3680 | # | ||||
3681 | # NOTES: The -1 count is needed to avoid loss of trailing blank lines | ||||
3682 | # (which might be important in a DATA section). | ||||
3683 | my @array; | ||||
3684 | if ( $rscalar && ${$rscalar} ) { | ||||
3685 | @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1; | ||||
3686 | |||||
3687 | # remove possible extra blank line introduced with split | ||||
3688 | if ( @array && $array[-1] eq "\n" ) { pop @array } | ||||
3689 | } | ||||
3690 | my $i_next = 0; | ||||
3691 | return bless [ \@array, $mode, $i_next ], $package; | ||||
3692 | } | ||||
3693 | else { | ||||
3694 | confess <<EOM; | ||||
3695 | ------------------------------------------------------------------------ | ||||
3696 | expecting mode = 'r' or 'w' but got mode ($mode); trace follows: | ||||
3697 | ------------------------------------------------------------------------ | ||||
3698 | EOM | ||||
3699 | } | ||||
3700 | } | ||||
3701 | |||||
3702 | sub getline { | ||||
3703 | my $self = shift; | ||||
3704 | my $mode = $self->[1]; | ||||
3705 | if ( $mode ne 'r' ) { | ||||
3706 | confess <<EOM; | ||||
3707 | ------------------------------------------------------------------------ | ||||
3708 | getline call requires mode = 'r' but mode = ($mode); trace follows: | ||||
3709 | ------------------------------------------------------------------------ | ||||
3710 | EOM | ||||
3711 | } | ||||
3712 | my $i = $self->[2]++; | ||||
3713 | return $self->[0]->[$i]; | ||||
3714 | } | ||||
3715 | |||||
3716 | sub print { | ||||
3717 | my $self = shift; | ||||
3718 | my $mode = $self->[1]; | ||||
3719 | if ( $mode ne 'w' ) { | ||||
3720 | confess <<EOM; | ||||
3721 | ------------------------------------------------------------------------ | ||||
3722 | print call requires mode = 'w' but mode = ($mode); trace follows: | ||||
3723 | ------------------------------------------------------------------------ | ||||
3724 | EOM | ||||
3725 | } | ||||
3726 | ${ $self->[0] } .= $_[0]; | ||||
3727 | } | ||||
3728 | sub close { return } | ||||
3729 | |||||
3730 | ##################################################################### | ||||
3731 | # | ||||
3732 | # This is a stripped down version of IO::ScalarArray | ||||
3733 | # Given a reference to an array, it supplies either: | ||||
3734 | # a getline method which reads lines (mode='r'), or | ||||
3735 | # a print method which reads lines (mode='w') | ||||
3736 | # | ||||
3737 | # NOTE: this routine assumes that there aren't any embedded | ||||
3738 | # newlines within any of the array elements. There are no checks | ||||
3739 | # for that. | ||||
3740 | # | ||||
3741 | ##################################################################### | ||||
3742 | package Perl::Tidy::IOScalarArray; | ||||
3743 | 2 | 1.74ms | 2 | 69µs | # spent 39µs (9+30) within Perl::Tidy::IOScalarArray::BEGIN@3743 which was called:
# once (9µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 3743 # spent 39µs making 1 call to Perl::Tidy::IOScalarArray::BEGIN@3743
# spent 30µs making 1 call to Exporter::import |
3744 | |||||
3745 | sub new { | ||||
3746 | my ( $package, $rarray, $mode ) = @_; | ||||
3747 | my $ref = ref $rarray; | ||||
3748 | if ( $ref ne 'ARRAY' ) { | ||||
3749 | confess <<EOM; | ||||
3750 | ------------------------------------------------------------------------ | ||||
3751 | expecting ref to ARRAY but got ref to ($ref); trace follows: | ||||
3752 | ------------------------------------------------------------------------ | ||||
3753 | EOM | ||||
3754 | |||||
3755 | } | ||||
3756 | if ( $mode eq 'w' ) { | ||||
3757 | @$rarray = (); | ||||
3758 | return bless [ $rarray, $mode ], $package; | ||||
3759 | } | ||||
3760 | elsif ( $mode eq 'r' ) { | ||||
3761 | my $i_next = 0; | ||||
3762 | return bless [ $rarray, $mode, $i_next ], $package; | ||||
3763 | } | ||||
3764 | else { | ||||
3765 | confess <<EOM; | ||||
3766 | ------------------------------------------------------------------------ | ||||
3767 | expecting mode = 'r' or 'w' but got mode ($mode); trace follows: | ||||
3768 | ------------------------------------------------------------------------ | ||||
3769 | EOM | ||||
3770 | } | ||||
3771 | } | ||||
3772 | |||||
3773 | sub getline { | ||||
3774 | my $self = shift; | ||||
3775 | my $mode = $self->[1]; | ||||
3776 | if ( $mode ne 'r' ) { | ||||
3777 | confess <<EOM; | ||||
3778 | ------------------------------------------------------------------------ | ||||
3779 | getline requires mode = 'r' but mode = ($mode); trace follows: | ||||
3780 | ------------------------------------------------------------------------ | ||||
3781 | EOM | ||||
3782 | } | ||||
3783 | my $i = $self->[2]++; | ||||
3784 | return $self->[0]->[$i]; | ||||
3785 | } | ||||
3786 | |||||
3787 | sub print { | ||||
3788 | my $self = shift; | ||||
3789 | my $mode = $self->[1]; | ||||
3790 | if ( $mode ne 'w' ) { | ||||
3791 | confess <<EOM; | ||||
3792 | ------------------------------------------------------------------------ | ||||
3793 | print requires mode = 'w' but mode = ($mode); trace follows: | ||||
3794 | ------------------------------------------------------------------------ | ||||
3795 | EOM | ||||
3796 | } | ||||
3797 | push @{ $self->[0] }, $_[0]; | ||||
3798 | } | ||||
3799 | sub close { return } | ||||
3800 | |||||
3801 | ##################################################################### | ||||
3802 | # | ||||
3803 | # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method | ||||
3804 | # which returns the next line to be parsed | ||||
3805 | # | ||||
3806 | ##################################################################### | ||||
3807 | |||||
3808 | package Perl::Tidy::LineSource; | ||||
3809 | |||||
3810 | sub new { | ||||
3811 | |||||
3812 | my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; | ||||
3813 | |||||
3814 | my $input_line_ending; | ||||
3815 | if ( $rOpts->{'preserve-line-endings'} ) { | ||||
3816 | $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); | ||||
3817 | } | ||||
3818 | |||||
3819 | ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); | ||||
3820 | return undef unless $fh; | ||||
3821 | |||||
3822 | # in order to check output syntax when standard output is used, | ||||
3823 | # or when it is an object, we have to make a copy of the file | ||||
3824 | if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) | ||||
3825 | { | ||||
3826 | |||||
3827 | # Turning off syntax check when input output is used. | ||||
3828 | # The reason is that temporary files cause problems on | ||||
3829 | # on many systems. | ||||
3830 | $rOpts->{'check-syntax'} = 0; | ||||
3831 | |||||
3832 | $$rpending_logfile_message .= <<EOM; | ||||
3833 | Note: --syntax check will be skipped because standard input is used | ||||
3834 | EOM | ||||
3835 | |||||
3836 | } | ||||
3837 | |||||
3838 | return bless { | ||||
3839 | _fh => $fh, | ||||
3840 | _filename => $input_file, | ||||
3841 | _input_line_ending => $input_line_ending, | ||||
3842 | _rinput_buffer => [], | ||||
3843 | _started => 0, | ||||
3844 | }, $class; | ||||
3845 | } | ||||
3846 | |||||
3847 | sub close_input_file { | ||||
3848 | my $self = shift; | ||||
3849 | |||||
3850 | # Only close physical files, not STDIN and other objects | ||||
3851 | my $filename = $self->{_filename}; | ||||
3852 | if ( $filename ne '-' && !ref $filename ) { | ||||
3853 | eval { $self->{_fh}->close() }; | ||||
3854 | } | ||||
3855 | } | ||||
3856 | |||||
3857 | sub get_line { | ||||
3858 | my $self = shift; | ||||
3859 | my $line = undef; | ||||
3860 | my $fh = $self->{_fh}; | ||||
3861 | my $rinput_buffer = $self->{_rinput_buffer}; | ||||
3862 | |||||
3863 | if ( scalar(@$rinput_buffer) ) { | ||||
3864 | $line = shift @$rinput_buffer; | ||||
3865 | } | ||||
3866 | else { | ||||
3867 | $line = $fh->getline(); | ||||
3868 | |||||
3869 | # patch to read raw mac files under unix, dos | ||||
3870 | # see if the first line has embedded \r's | ||||
3871 | if ( $line && !$self->{_started} ) { | ||||
3872 | if ( $line =~ /[\015][^\015\012]/ ) { | ||||
3873 | |||||
3874 | # found one -- break the line up and store in a buffer | ||||
3875 | @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; | ||||
3876 | my $count = @$rinput_buffer; | ||||
3877 | $line = shift @$rinput_buffer; | ||||
3878 | } | ||||
3879 | $self->{_started}++; | ||||
3880 | } | ||||
3881 | } | ||||
3882 | return $line; | ||||
3883 | } | ||||
3884 | |||||
3885 | ##################################################################### | ||||
3886 | # | ||||
3887 | # the Perl::Tidy::LineSink class supplies a write_line method for | ||||
3888 | # actual file writing | ||||
3889 | # | ||||
3890 | ##################################################################### | ||||
3891 | |||||
3892 | package Perl::Tidy::LineSink; | ||||
3893 | |||||
3894 | sub new { | ||||
3895 | |||||
3896 | my ( $class, $output_file, $tee_file, $line_separator, $rOpts, | ||||
3897 | $rpending_logfile_message, $binmode ) | ||||
3898 | = @_; | ||||
3899 | my $fh = undef; | ||||
3900 | my $fh_tee = undef; | ||||
3901 | |||||
3902 | my $output_file_open = 0; | ||||
3903 | |||||
3904 | if ( $rOpts->{'format'} eq 'tidy' ) { | ||||
3905 | ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); | ||||
3906 | unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; } | ||||
3907 | $output_file_open = 1; | ||||
3908 | if ($binmode) { | ||||
3909 | if ( ref($fh) eq 'IO::File' ) { | ||||
3910 | binmode $fh; | ||||
3911 | } | ||||
3912 | if ( $output_file eq '-' ) { binmode STDOUT } | ||||
3913 | } | ||||
3914 | } | ||||
3915 | |||||
3916 | # in order to check output syntax when standard output is used, | ||||
3917 | # or when it is an object, we have to make a copy of the file | ||||
3918 | if ( $output_file eq '-' || ref $output_file ) { | ||||
3919 | if ( $rOpts->{'check-syntax'} ) { | ||||
3920 | |||||
3921 | # Turning off syntax check when standard output is used. | ||||
3922 | # The reason is that temporary files cause problems on | ||||
3923 | # on many systems. | ||||
3924 | $rOpts->{'check-syntax'} = 0; | ||||
3925 | $$rpending_logfile_message .= <<EOM; | ||||
3926 | Note: --syntax check will be skipped because standard output is used | ||||
3927 | EOM | ||||
3928 | |||||
3929 | } | ||||
3930 | } | ||||
3931 | |||||
3932 | bless { | ||||
3933 | _fh => $fh, | ||||
3934 | _fh_tee => $fh_tee, | ||||
3935 | _output_file => $output_file, | ||||
3936 | _output_file_open => $output_file_open, | ||||
3937 | _tee_flag => 0, | ||||
3938 | _tee_file => $tee_file, | ||||
3939 | _tee_file_opened => 0, | ||||
3940 | _line_separator => $line_separator, | ||||
3941 | _binmode => $binmode, | ||||
3942 | }, $class; | ||||
3943 | } | ||||
3944 | |||||
3945 | sub write_line { | ||||
3946 | |||||
3947 | my $self = shift; | ||||
3948 | my $fh = $self->{_fh}; | ||||
3949 | |||||
3950 | my $output_file_open = $self->{_output_file_open}; | ||||
3951 | chomp $_[0]; | ||||
3952 | $_[0] .= $self->{_line_separator}; | ||||
3953 | |||||
3954 | $fh->print( $_[0] ) if ( $self->{_output_file_open} ); | ||||
3955 | |||||
3956 | if ( $self->{_tee_flag} ) { | ||||
3957 | unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } | ||||
3958 | my $fh_tee = $self->{_fh_tee}; | ||||
3959 | print $fh_tee $_[0]; | ||||
3960 | } | ||||
3961 | } | ||||
3962 | |||||
3963 | sub tee_on { | ||||
3964 | my $self = shift; | ||||
3965 | $self->{_tee_flag} = 1; | ||||
3966 | } | ||||
3967 | |||||
3968 | sub tee_off { | ||||
3969 | my $self = shift; | ||||
3970 | $self->{_tee_flag} = 0; | ||||
3971 | } | ||||
3972 | |||||
3973 | sub really_open_tee_file { | ||||
3974 | my $self = shift; | ||||
3975 | my $tee_file = $self->{_tee_file}; | ||||
3976 | my $fh_tee; | ||||
3977 | $fh_tee = IO::File->new(">$tee_file") | ||||
3978 | or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); | ||||
3979 | binmode $fh_tee if $self->{_binmode}; | ||||
3980 | $self->{_tee_file_opened} = 1; | ||||
3981 | $self->{_fh_tee} = $fh_tee; | ||||
3982 | } | ||||
3983 | |||||
3984 | sub close_output_file { | ||||
3985 | my $self = shift; | ||||
3986 | |||||
3987 | # Only close physical files, not STDOUT and other objects | ||||
3988 | my $output_file = $self->{_output_file}; | ||||
3989 | if ( $output_file ne '-' && !ref $output_file ) { | ||||
3990 | eval { $self->{_fh}->close() } if $self->{_output_file_open}; | ||||
3991 | } | ||||
3992 | $self->close_tee_file(); | ||||
3993 | } | ||||
3994 | |||||
3995 | sub close_tee_file { | ||||
3996 | my $self = shift; | ||||
3997 | |||||
3998 | # Only close physical files, not STDOUT and other objects | ||||
3999 | if ( $self->{_tee_file_opened} ) { | ||||
4000 | my $tee_file = $self->{_tee_file}; | ||||
4001 | if ( $tee_file ne '-' && !ref $tee_file ) { | ||||
4002 | eval { $self->{_fh_tee}->close() }; | ||||
4003 | $self->{_tee_file_opened} = 0; | ||||
4004 | } | ||||
4005 | } | ||||
4006 | } | ||||
4007 | |||||
4008 | ##################################################################### | ||||
4009 | # | ||||
4010 | # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is | ||||
4011 | # useful for program development. | ||||
4012 | # | ||||
4013 | # Only one such file is created regardless of the number of input | ||||
4014 | # files processed. This allows the results of processing many files | ||||
4015 | # to be summarized in a single file. | ||||
4016 | # | ||||
4017 | ##################################################################### | ||||
4018 | |||||
4019 | package Perl::Tidy::Diagnostics; | ||||
4020 | |||||
4021 | sub new { | ||||
4022 | |||||
4023 | my $class = shift; | ||||
4024 | bless { | ||||
4025 | _write_diagnostics_count => 0, | ||||
4026 | _last_diagnostic_file => "", | ||||
4027 | _input_file => "", | ||||
4028 | _fh => undef, | ||||
4029 | }, $class; | ||||
4030 | } | ||||
4031 | |||||
4032 | sub set_input_file { | ||||
4033 | my $self = shift; | ||||
4034 | $self->{_input_file} = $_[0]; | ||||
4035 | } | ||||
4036 | |||||
4037 | # This is a diagnostic routine which is useful for program development. | ||||
4038 | # Output from debug messages go to a file named DIAGNOSTICS, where | ||||
4039 | # they are labeled by file and line. This allows many files to be | ||||
4040 | # scanned at once for some particular condition of interest. | ||||
4041 | sub write_diagnostics { | ||||
4042 | my $self = shift; | ||||
4043 | |||||
4044 | unless ( $self->{_write_diagnostics_count} ) { | ||||
4045 | open DIAGNOSTICS, ">DIAGNOSTICS" | ||||
4046 | or death("couldn't open DIAGNOSTICS: $!\n"); | ||||
4047 | } | ||||
4048 | |||||
4049 | my $last_diagnostic_file = $self->{_last_diagnostic_file}; | ||||
4050 | my $input_file = $self->{_input_file}; | ||||
4051 | if ( $last_diagnostic_file ne $input_file ) { | ||||
4052 | print DIAGNOSTICS "\nFILE:$input_file\n"; | ||||
4053 | } | ||||
4054 | $self->{_last_diagnostic_file} = $input_file; | ||||
4055 | my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); | ||||
4056 | print DIAGNOSTICS "$input_line_number:\t@_"; | ||||
4057 | $self->{_write_diagnostics_count}++; | ||||
4058 | } | ||||
4059 | |||||
4060 | ##################################################################### | ||||
4061 | # | ||||
4062 | # The Perl::Tidy::Logger class writes the .LOG and .ERR files | ||||
4063 | # | ||||
4064 | ##################################################################### | ||||
4065 | |||||
4066 | package Perl::Tidy::Logger; | ||||
4067 | |||||
4068 | sub new { | ||||
4069 | my $class = shift; | ||||
4070 | my $fh; | ||||
4071 | my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_; | ||||
4072 | |||||
4073 | my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; | ||||
4074 | |||||
4075 | # remove any old error output file if we might write a new one | ||||
4076 | unless ( $fh_warnings || ref($warning_file) ) { | ||||
4077 | if ( -e $warning_file ) { unlink($warning_file) } | ||||
4078 | } | ||||
4079 | |||||
4080 | my $logfile_gap = | ||||
4081 | defined( $rOpts->{'logfile-gap'} ) | ||||
4082 | ? $rOpts->{'logfile-gap'} | ||||
4083 | : 50; | ||||
4084 | if ( $logfile_gap == 0 ) { $logfile_gap = 1 } | ||||
4085 | |||||
4086 | bless { | ||||
4087 | _log_file => $log_file, | ||||
4088 | _logfile_gap => $logfile_gap, | ||||
4089 | _rOpts => $rOpts, | ||||
4090 | _fh_warnings => $fh_warnings, | ||||
4091 | _last_input_line_written => 0, | ||||
4092 | _at_end_of_file => 0, | ||||
4093 | _use_prefix => 1, | ||||
4094 | _block_log_output => 0, | ||||
4095 | _line_of_tokens => undef, | ||||
4096 | _output_line_number => undef, | ||||
4097 | _wrote_line_information_string => 0, | ||||
4098 | _wrote_column_headings => 0, | ||||
4099 | _warning_file => $warning_file, | ||||
4100 | _warning_count => 0, | ||||
4101 | _complaint_count => 0, | ||||
4102 | _saw_code_bug => -1, # -1=no 0=maybe 1=for sure | ||||
4103 | _saw_brace_error => 0, | ||||
4104 | _saw_extrude => $saw_extrude, | ||||
4105 | _output_array => [], | ||||
4106 | }, $class; | ||||
4107 | } | ||||
4108 | |||||
4109 | sub get_warning_count { | ||||
4110 | my $self = shift; | ||||
4111 | return $self->{_warning_count}; | ||||
4112 | } | ||||
4113 | |||||
4114 | sub get_use_prefix { | ||||
4115 | my $self = shift; | ||||
4116 | return $self->{_use_prefix}; | ||||
4117 | } | ||||
4118 | |||||
4119 | sub block_log_output { | ||||
4120 | my $self = shift; | ||||
4121 | $self->{_block_log_output} = 1; | ||||
4122 | } | ||||
4123 | |||||
4124 | sub unblock_log_output { | ||||
4125 | my $self = shift; | ||||
4126 | $self->{_block_log_output} = 0; | ||||
4127 | } | ||||
4128 | |||||
4129 | sub interrupt_logfile { | ||||
4130 | my $self = shift; | ||||
4131 | $self->{_use_prefix} = 0; | ||||
4132 | $self->warning("\n"); | ||||
4133 | $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); | ||||
4134 | } | ||||
4135 | |||||
4136 | sub resume_logfile { | ||||
4137 | my $self = shift; | ||||
4138 | $self->write_logfile_entry( '#' x 60 . "\n" ); | ||||
4139 | $self->{_use_prefix} = 1; | ||||
4140 | } | ||||
4141 | |||||
4142 | sub we_are_at_the_last_line { | ||||
4143 | my $self = shift; | ||||
4144 | unless ( $self->{_wrote_line_information_string} ) { | ||||
4145 | $self->write_logfile_entry("Last line\n\n"); | ||||
4146 | } | ||||
4147 | $self->{_at_end_of_file} = 1; | ||||
4148 | } | ||||
4149 | |||||
4150 | # record some stuff in case we go down in flames | ||||
4151 | sub black_box { | ||||
4152 | my $self = shift; | ||||
4153 | my ( $line_of_tokens, $output_line_number ) = @_; | ||||
4154 | my $input_line = $line_of_tokens->{_line_text}; | ||||
4155 | my $input_line_number = $line_of_tokens->{_line_number}; | ||||
4156 | |||||
4157 | # save line information in case we have to write a logfile message | ||||
4158 | $self->{_line_of_tokens} = $line_of_tokens; | ||||
4159 | $self->{_output_line_number} = $output_line_number; | ||||
4160 | $self->{_wrote_line_information_string} = 0; | ||||
4161 | |||||
4162 | my $last_input_line_written = $self->{_last_input_line_written}; | ||||
4163 | my $rOpts = $self->{_rOpts}; | ||||
4164 | if ( | ||||
4165 | ( | ||||
4166 | ( $input_line_number - $last_input_line_written ) >= | ||||
4167 | $self->{_logfile_gap} | ||||
4168 | ) | ||||
4169 | || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) | ||||
4170 | ) | ||||
4171 | { | ||||
4172 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
4173 | my $structural_indentation_level = $$rlevels[0]; | ||||
4174 | $self->{_last_input_line_written} = $input_line_number; | ||||
4175 | ( my $out_str = $input_line ) =~ s/^\s*//; | ||||
4176 | chomp $out_str; | ||||
4177 | |||||
4178 | $out_str = ( '.' x $structural_indentation_level ) . $out_str; | ||||
4179 | |||||
4180 | if ( length($out_str) > 35 ) { | ||||
4181 | $out_str = substr( $out_str, 0, 35 ) . " ...."; | ||||
4182 | } | ||||
4183 | $self->logfile_output( "", "$out_str\n" ); | ||||
4184 | } | ||||
4185 | } | ||||
4186 | |||||
4187 | sub write_logfile_entry { | ||||
4188 | my $self = shift; | ||||
4189 | |||||
4190 | # add leading >>> to avoid confusing error messages and code | ||||
4191 | $self->logfile_output( ">>>", "@_" ); | ||||
4192 | } | ||||
4193 | |||||
4194 | sub write_column_headings { | ||||
4195 | my $self = shift; | ||||
4196 | |||||
4197 | $self->{_wrote_column_headings} = 1; | ||||
4198 | my $routput_array = $self->{_output_array}; | ||||
4199 | push @{$routput_array}, <<EOM; | ||||
4200 | The nesting depths in the table below are at the start of the lines. | ||||
4201 | The indicated output line numbers are not always exact. | ||||
4202 | ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. | ||||
4203 | |||||
4204 | in:out indent c b nesting code + messages; (messages begin with >>>) | ||||
4205 | lines levels i k (code begins with one '.' per indent level) | ||||
4206 | ------ ----- - - -------- ------------------------------------------- | ||||
4207 | EOM | ||||
4208 | } | ||||
4209 | |||||
4210 | sub make_line_information_string { | ||||
4211 | |||||
4212 | # make columns of information when a logfile message needs to go out | ||||
4213 | my $self = shift; | ||||
4214 | my $line_of_tokens = $self->{_line_of_tokens}; | ||||
4215 | my $input_line_number = $line_of_tokens->{_line_number}; | ||||
4216 | my $line_information_string = ""; | ||||
4217 | if ($input_line_number) { | ||||
4218 | |||||
4219 | my $output_line_number = $self->{_output_line_number}; | ||||
4220 | my $brace_depth = $line_of_tokens->{_curly_brace_depth}; | ||||
4221 | my $paren_depth = $line_of_tokens->{_paren_depth}; | ||||
4222 | my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; | ||||
4223 | my $guessed_indentation_level = | ||||
4224 | $line_of_tokens->{_guessed_indentation_level}; | ||||
4225 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
4226 | my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; | ||||
4227 | my $rci_levels = $line_of_tokens->{_rci_levels}; | ||||
4228 | my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; | ||||
4229 | |||||
4230 | my $structural_indentation_level = $$rlevels[0]; | ||||
4231 | |||||
4232 | $self->write_column_headings() unless $self->{_wrote_column_headings}; | ||||
4233 | |||||
4234 | # keep logfile columns aligned for scripts up to 999 lines; | ||||
4235 | # for longer scripts it doesn't really matter | ||||
4236 | my $extra_space = ""; | ||||
4237 | $extra_space .= | ||||
4238 | ( $input_line_number < 10 ) ? " " | ||||
4239 | : ( $input_line_number < 100 ) ? " " | ||||
4240 | : ""; | ||||
4241 | $extra_space .= | ||||
4242 | ( $output_line_number < 10 ) ? " " | ||||
4243 | : ( $output_line_number < 100 ) ? " " | ||||
4244 | : ""; | ||||
4245 | |||||
4246 | # there are 2 possible nesting strings: | ||||
4247 | # the original which looks like this: (0 [1 {2 | ||||
4248 | # the new one, which looks like this: {{[ | ||||
4249 | # the new one is easier to read, and shows the order, but | ||||
4250 | # could be arbitrarily long, so we use it unless it is too long | ||||
4251 | my $nesting_string = | ||||
4252 | "($paren_depth [$square_bracket_depth {$brace_depth"; | ||||
4253 | my $nesting_string_new = $$rnesting_tokens[0]; | ||||
4254 | |||||
4255 | my $ci_level = $$rci_levels[0]; | ||||
4256 | if ( $ci_level > 9 ) { $ci_level = '*' } | ||||
4257 | my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; | ||||
4258 | |||||
4259 | if ( length($nesting_string_new) <= 8 ) { | ||||
4260 | $nesting_string = | ||||
4261 | $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); | ||||
4262 | } | ||||
4263 | $line_information_string = | ||||
4264 | "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; | ||||
4265 | } | ||||
4266 | return $line_information_string; | ||||
4267 | } | ||||
4268 | |||||
4269 | sub logfile_output { | ||||
4270 | my $self = shift; | ||||
4271 | my ( $prompt, $msg ) = @_; | ||||
4272 | return if ( $self->{_block_log_output} ); | ||||
4273 | |||||
4274 | my $routput_array = $self->{_output_array}; | ||||
4275 | if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { | ||||
4276 | push @{$routput_array}, "$msg"; | ||||
4277 | } | ||||
4278 | else { | ||||
4279 | my $line_information_string = $self->make_line_information_string(); | ||||
4280 | $self->{_wrote_line_information_string} = 1; | ||||
4281 | |||||
4282 | if ($line_information_string) { | ||||
4283 | push @{$routput_array}, "$line_information_string $prompt$msg"; | ||||
4284 | } | ||||
4285 | else { | ||||
4286 | push @{$routput_array}, "$msg"; | ||||
4287 | } | ||||
4288 | } | ||||
4289 | } | ||||
4290 | |||||
4291 | sub get_saw_brace_error { | ||||
4292 | my $self = shift; | ||||
4293 | return $self->{_saw_brace_error}; | ||||
4294 | } | ||||
4295 | |||||
4296 | sub increment_brace_error { | ||||
4297 | my $self = shift; | ||||
4298 | $self->{_saw_brace_error}++; | ||||
4299 | } | ||||
4300 | |||||
4301 | sub brace_warning { | ||||
4302 | my $self = shift; | ||||
4303 | 2 | 128µs | 2 | 115µs | # spent 63µs (10+52) within Perl::Tidy::Logger::BEGIN@4303 which was called:
# once (10µs+52µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4303 # spent 63µs making 1 call to Perl::Tidy::Logger::BEGIN@4303
# spent 52µs making 1 call to constant::import |
4304 | my $saw_brace_error = $self->{_saw_brace_error}; | ||||
4305 | |||||
4306 | if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { | ||||
4307 | $self->warning(@_); | ||||
4308 | } | ||||
4309 | $saw_brace_error++; | ||||
4310 | $self->{_saw_brace_error} = $saw_brace_error; | ||||
4311 | |||||
4312 | if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { | ||||
4313 | $self->warning("No further warnings of this type will be given\n"); | ||||
4314 | } | ||||
4315 | } | ||||
4316 | |||||
4317 | sub complain { | ||||
4318 | |||||
4319 | # handle non-critical warning messages based on input flag | ||||
4320 | my $self = shift; | ||||
4321 | my $rOpts = $self->{_rOpts}; | ||||
4322 | |||||
4323 | # these appear in .ERR output only if -w flag is used | ||||
4324 | if ( $rOpts->{'warning-output'} ) { | ||||
4325 | $self->warning(@_); | ||||
4326 | } | ||||
4327 | |||||
4328 | # otherwise, they go to the .LOG file | ||||
4329 | else { | ||||
4330 | $self->{_complaint_count}++; | ||||
4331 | $self->write_logfile_entry(@_); | ||||
4332 | } | ||||
4333 | } | ||||
4334 | |||||
4335 | sub warning { | ||||
4336 | |||||
4337 | # report errors to .ERR file (or stdout) | ||||
4338 | my $self = shift; | ||||
4339 | 2 | 578µs | 2 | 83µs | # spent 45µs (8+38) within Perl::Tidy::Logger::BEGIN@4339 which was called:
# once (8µs+38µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4339 # spent 45µs making 1 call to Perl::Tidy::Logger::BEGIN@4339
# spent 38µs making 1 call to constant::import |
4340 | |||||
4341 | my $rOpts = $self->{_rOpts}; | ||||
4342 | unless ( $rOpts->{'quiet'} ) { | ||||
4343 | |||||
4344 | my $warning_count = $self->{_warning_count}; | ||||
4345 | my $fh_warnings = $self->{_fh_warnings}; | ||||
4346 | if ( !$fh_warnings ) { | ||||
4347 | my $warning_file = $self->{_warning_file}; | ||||
4348 | ( $fh_warnings, my $filename ) = | ||||
4349 | Perl::Tidy::streamhandle( $warning_file, 'w' ); | ||||
4350 | $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); | ||||
4351 | Perl::Tidy::Warn "## Please see file $filename\n" | ||||
4352 | unless ref($warning_file); | ||||
4353 | $self->{_fh_warnings} = $fh_warnings; | ||||
4354 | } | ||||
4355 | |||||
4356 | if ( $warning_count < WARNING_LIMIT ) { | ||||
4357 | if ( $self->get_use_prefix() > 0 ) { | ||||
4358 | my $input_line_number = | ||||
4359 | Perl::Tidy::Tokenizer::get_input_line_number(); | ||||
4360 | if ( !defined($input_line_number) ) { $input_line_number = -1 } | ||||
4361 | $fh_warnings->print("$input_line_number:\t@_"); | ||||
4362 | $self->write_logfile_entry("WARNING: @_"); | ||||
4363 | } | ||||
4364 | else { | ||||
4365 | $fh_warnings->print(@_); | ||||
4366 | $self->write_logfile_entry(@_); | ||||
4367 | } | ||||
4368 | } | ||||
4369 | $warning_count++; | ||||
4370 | $self->{_warning_count} = $warning_count; | ||||
4371 | |||||
4372 | if ( $warning_count == WARNING_LIMIT ) { | ||||
4373 | $fh_warnings->print("No further warnings will be given\n"); | ||||
4374 | } | ||||
4375 | } | ||||
4376 | } | ||||
4377 | |||||
4378 | # programming bug codes: | ||||
4379 | # -1 = no bug | ||||
4380 | # 0 = maybe, not sure. | ||||
4381 | # 1 = definitely | ||||
4382 | sub report_possible_bug { | ||||
4383 | my $self = shift; | ||||
4384 | my $saw_code_bug = $self->{_saw_code_bug}; | ||||
4385 | $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; | ||||
4386 | } | ||||
4387 | |||||
4388 | sub report_definite_bug { | ||||
4389 | my $self = shift; | ||||
4390 | $self->{_saw_code_bug} = 1; | ||||
4391 | } | ||||
4392 | |||||
4393 | sub ask_user_for_bug_report { | ||||
4394 | my $self = shift; | ||||
4395 | |||||
4396 | my ( $infile_syntax_ok, $formatter ) = @_; | ||||
4397 | my $saw_code_bug = $self->{_saw_code_bug}; | ||||
4398 | if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { | ||||
4399 | $self->warning(<<EOM); | ||||
4400 | |||||
4401 | You may have encountered a code bug in perltidy. If you think so, and | ||||
4402 | the problem is not listed in the BUGS file at | ||||
4403 | http://perltidy.sourceforge.net, please report it so that it can be | ||||
4404 | corrected. Include the smallest possible script which has the problem, | ||||
4405 | along with the .LOG file. See the manual pages for contact information. | ||||
4406 | Thank you! | ||||
4407 | EOM | ||||
4408 | |||||
4409 | } | ||||
4410 | elsif ( $saw_code_bug == 1 ) { | ||||
4411 | if ( $self->{_saw_extrude} ) { | ||||
4412 | $self->warning(<<EOM); | ||||
4413 | |||||
4414 | You may have encountered a bug in perltidy. However, since you are using the | ||||
4415 | -extrude option, the problem may be with perl or one of its modules, which have | ||||
4416 | occasional problems with this type of file. If you believe that the | ||||
4417 | problem is with perltidy, and the problem is not listed in the BUGS file at | ||||
4418 | http://perltidy.sourceforge.net, please report it so that it can be corrected. | ||||
4419 | Include the smallest possible script which has the problem, along with the .LOG | ||||
4420 | file. See the manual pages for contact information. | ||||
4421 | Thank you! | ||||
4422 | EOM | ||||
4423 | } | ||||
4424 | else { | ||||
4425 | $self->warning(<<EOM); | ||||
4426 | |||||
4427 | Oops, you seem to have encountered a bug in perltidy. Please check the | ||||
4428 | BUGS file at http://perltidy.sourceforge.net. If the problem is not | ||||
4429 | listed there, please report it so that it can be corrected. Include the | ||||
4430 | smallest possible script which produces this message, along with the | ||||
4431 | .LOG file if appropriate. See the manual pages for contact information. | ||||
4432 | Your efforts are appreciated. | ||||
4433 | Thank you! | ||||
4434 | EOM | ||||
4435 | my $added_semicolon_count = 0; | ||||
4436 | eval { | ||||
4437 | $added_semicolon_count = | ||||
4438 | $formatter->get_added_semicolon_count(); | ||||
4439 | }; | ||||
4440 | if ( $added_semicolon_count > 0 ) { | ||||
4441 | $self->warning(<<EOM); | ||||
4442 | |||||
4443 | The log file shows that perltidy added $added_semicolon_count semicolons. | ||||
4444 | Please rerun with -nasc to see if that is the cause of the syntax error. Even | ||||
4445 | if that is the problem, please report it so that it can be fixed. | ||||
4446 | EOM | ||||
4447 | |||||
4448 | } | ||||
4449 | } | ||||
4450 | } | ||||
4451 | } | ||||
4452 | |||||
4453 | sub finish { | ||||
4454 | |||||
4455 | # called after all formatting to summarize errors | ||||
4456 | my $self = shift; | ||||
4457 | my ( $infile_syntax_ok, $formatter ) = @_; | ||||
4458 | |||||
4459 | my $rOpts = $self->{_rOpts}; | ||||
4460 | my $warning_count = $self->{_warning_count}; | ||||
4461 | my $saw_code_bug = $self->{_saw_code_bug}; | ||||
4462 | |||||
4463 | my $save_logfile = | ||||
4464 | ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) | ||||
4465 | || $saw_code_bug == 1 | ||||
4466 | || $rOpts->{'logfile'}; | ||||
4467 | my $log_file = $self->{_log_file}; | ||||
4468 | if ($warning_count) { | ||||
4469 | if ($save_logfile) { | ||||
4470 | $self->block_log_output(); # avoid echoing this to the logfile | ||||
4471 | $self->warning( | ||||
4472 | "The logfile $log_file may contain useful information\n"); | ||||
4473 | $self->unblock_log_output(); | ||||
4474 | } | ||||
4475 | |||||
4476 | if ( $self->{_complaint_count} > 0 ) { | ||||
4477 | $self->warning( | ||||
4478 | "To see $self->{_complaint_count} non-critical warnings rerun with -w\n" | ||||
4479 | ); | ||||
4480 | } | ||||
4481 | |||||
4482 | if ( $self->{_saw_brace_error} | ||||
4483 | && ( $self->{_logfile_gap} > 1 || !$save_logfile ) ) | ||||
4484 | { | ||||
4485 | $self->warning("To save a full .LOG file rerun with -g\n"); | ||||
4486 | } | ||||
4487 | } | ||||
4488 | $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); | ||||
4489 | |||||
4490 | if ($save_logfile) { | ||||
4491 | my $log_file = $self->{_log_file}; | ||||
4492 | my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); | ||||
4493 | if ($fh) { | ||||
4494 | my $routput_array = $self->{_output_array}; | ||||
4495 | foreach ( @{$routput_array} ) { $fh->print($_) } | ||||
4496 | if ( $log_file ne '-' && !ref $log_file ) { | ||||
4497 | eval { $fh->close() }; | ||||
4498 | } | ||||
4499 | } | ||||
4500 | } | ||||
4501 | } | ||||
4502 | |||||
4503 | ##################################################################### | ||||
4504 | # | ||||
4505 | # The Perl::Tidy::DevNull class supplies a dummy print method | ||||
4506 | # | ||||
4507 | ##################################################################### | ||||
4508 | |||||
4509 | package Perl::Tidy::DevNull; | ||||
4510 | sub new { return bless {}, $_[0] } | ||||
4511 | sub print { return } | ||||
4512 | sub close { return } | ||||
4513 | |||||
4514 | ##################################################################### | ||||
4515 | # | ||||
4516 | # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html | ||||
4517 | # | ||||
4518 | ##################################################################### | ||||
4519 | |||||
4520 | package Perl::Tidy::HtmlWriter; | ||||
4521 | |||||
4522 | 2 | 38µs | 2 | 80µs | # spent 45µs (10+35) within Perl::Tidy::HtmlWriter::BEGIN@4522 which was called:
# once (10µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4522 # spent 45µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4522
# spent 35µs making 1 call to Exporter::import |
4523 | |||||
4524 | # class variables | ||||
4525 | 1 | 600ns | # spent 110µs (9+101) within Perl::Tidy::HtmlWriter::BEGIN@4525 which was called:
# once (9µs+101µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4535 | ||
4526 | %html_color | ||||
4527 | %html_bold | ||||
4528 | %html_italic | ||||
4529 | %token_short_names | ||||
4530 | %short_to_long_names | ||||
4531 | $rOpts | ||||
4532 | $css_filename | ||||
4533 | $css_linkname | ||||
4534 | $missing_html_entities | ||||
4535 | 1 | 765µs | 2 | 211µs | }; # spent 110µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4525
# spent 101µs making 1 call to vars::import |
4536 | |||||
4537 | # replace unsafe characters with HTML entity representation if HTML::Entities | ||||
4538 | # is available | ||||
4539 | 3 | 34µs | { eval "use HTML::Entities"; $missing_html_entities = $@; } # spent 125µs executing statements in string eval # includes 3.00ms spent executing 1 call to 1 sub defined therein. | ||
4540 | |||||
4541 | sub new { | ||||
4542 | |||||
4543 | my ( $class, $input_file, $html_file, $extension, $html_toc_extension, | ||||
4544 | $html_src_extension ) | ||||
4545 | = @_; | ||||
4546 | |||||
4547 | my $html_file_opened = 0; | ||||
4548 | my $html_fh; | ||||
4549 | ( $html_fh, my $html_filename ) = | ||||
4550 | Perl::Tidy::streamhandle( $html_file, 'w' ); | ||||
4551 | unless ($html_fh) { | ||||
4552 | Perl::Tidy::Warn("can't open $html_file: $!\n"); | ||||
4553 | return undef; | ||||
4554 | } | ||||
4555 | $html_file_opened = 1; | ||||
4556 | |||||
4557 | if ( !$input_file || $input_file eq '-' || ref($input_file) ) { | ||||
4558 | $input_file = "NONAME"; | ||||
4559 | } | ||||
4560 | |||||
4561 | # write the table of contents to a string | ||||
4562 | my $toc_string; | ||||
4563 | my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); | ||||
4564 | |||||
4565 | my $html_pre_fh; | ||||
4566 | my @pre_string_stack; | ||||
4567 | if ( $rOpts->{'html-pre-only'} ) { | ||||
4568 | |||||
4569 | # pre section goes directly to the output stream | ||||
4570 | $html_pre_fh = $html_fh; | ||||
4571 | $html_pre_fh->print( <<"PRE_END"); | ||||
4572 | <pre> | ||||
4573 | PRE_END | ||||
4574 | } | ||||
4575 | else { | ||||
4576 | |||||
4577 | # pre section go out to a temporary string | ||||
4578 | my $pre_string; | ||||
4579 | $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); | ||||
4580 | push @pre_string_stack, \$pre_string; | ||||
4581 | } | ||||
4582 | |||||
4583 | # pod text gets diverted if the 'pod2html' is used | ||||
4584 | my $html_pod_fh; | ||||
4585 | my $pod_string; | ||||
4586 | if ( $rOpts->{'pod2html'} ) { | ||||
4587 | if ( $rOpts->{'html-pre-only'} ) { | ||||
4588 | undef $rOpts->{'pod2html'}; | ||||
4589 | } | ||||
4590 | else { | ||||
4591 | eval "use Pod::Html"; | ||||
4592 | if ($@) { | ||||
4593 | Perl::Tidy::Warn | ||||
4594 | "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; | ||||
4595 | undef $rOpts->{'pod2html'}; | ||||
4596 | } | ||||
4597 | else { | ||||
4598 | $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' ); | ||||
4599 | } | ||||
4600 | } | ||||
4601 | } | ||||
4602 | |||||
4603 | my $toc_filename; | ||||
4604 | my $src_filename; | ||||
4605 | if ( $rOpts->{'frames'} ) { | ||||
4606 | unless ($extension) { | ||||
4607 | Perl::Tidy::Warn | ||||
4608 | "cannot use frames without a specified output extension; ignoring -frm\n"; | ||||
4609 | undef $rOpts->{'frames'}; | ||||
4610 | } | ||||
4611 | else { | ||||
4612 | $toc_filename = $input_file . $html_toc_extension . $extension; | ||||
4613 | $src_filename = $input_file . $html_src_extension . $extension; | ||||
4614 | } | ||||
4615 | } | ||||
4616 | |||||
4617 | # ---------------------------------------------------------- | ||||
4618 | # Output is now directed as follows: | ||||
4619 | # html_toc_fh <-- table of contents items | ||||
4620 | # html_pre_fh <-- the <pre> section of formatted code, except: | ||||
4621 | # html_pod_fh <-- pod goes here with the pod2html option | ||||
4622 | # ---------------------------------------------------------- | ||||
4623 | |||||
4624 | my $title = $rOpts->{'title'}; | ||||
4625 | unless ($title) { | ||||
4626 | ( $title, my $path ) = fileparse($input_file); | ||||
4627 | } | ||||
4628 | my $toc_item_count = 0; | ||||
4629 | my $in_toc_package = ""; | ||||
4630 | my $last_level = 0; | ||||
4631 | bless { | ||||
4632 | _input_file => $input_file, # name of input file | ||||
4633 | _title => $title, # title, unescaped | ||||
4634 | _html_file => $html_file, # name of .html output file | ||||
4635 | _toc_filename => $toc_filename, # for frames option | ||||
4636 | _src_filename => $src_filename, # for frames option | ||||
4637 | _html_file_opened => $html_file_opened, # a flag | ||||
4638 | _html_fh => $html_fh, # the output stream | ||||
4639 | _html_pre_fh => $html_pre_fh, # pre section goes here | ||||
4640 | _rpre_string_stack => \@pre_string_stack, # stack of pre sections | ||||
4641 | _html_pod_fh => $html_pod_fh, # pod goes here if pod2html | ||||
4642 | _rpod_string => \$pod_string, # string holding pod | ||||
4643 | _pod_cut_count => 0, # how many =cut's? | ||||
4644 | _html_toc_fh => $html_toc_fh, # fh for table of contents | ||||
4645 | _rtoc_string => \$toc_string, # string holding toc | ||||
4646 | _rtoc_item_count => \$toc_item_count, # how many toc items | ||||
4647 | _rin_toc_package => \$in_toc_package, # package name | ||||
4648 | _rtoc_name_count => {}, # hash to track unique names | ||||
4649 | _rpackage_stack => [], # stack to check for package | ||||
4650 | # name changes | ||||
4651 | _rlast_level => \$last_level, # brace indentation level | ||||
4652 | }, $class; | ||||
4653 | } | ||||
4654 | |||||
4655 | sub add_toc_item { | ||||
4656 | |||||
4657 | # Add an item to the html table of contents. | ||||
4658 | # This is called even if no table of contents is written, | ||||
4659 | # because we still want to put the anchors in the <pre> text. | ||||
4660 | # We are given an anchor name and its type; types are: | ||||
4661 | # 'package', 'sub', '__END__', '__DATA__', 'EOF' | ||||
4662 | # There must be an 'EOF' call at the end to wrap things up. | ||||
4663 | my $self = shift; | ||||
4664 | my ( $name, $type ) = @_; | ||||
4665 | my $html_toc_fh = $self->{_html_toc_fh}; | ||||
4666 | my $html_pre_fh = $self->{_html_pre_fh}; | ||||
4667 | my $rtoc_name_count = $self->{_rtoc_name_count}; | ||||
4668 | my $rtoc_item_count = $self->{_rtoc_item_count}; | ||||
4669 | my $rlast_level = $self->{_rlast_level}; | ||||
4670 | my $rin_toc_package = $self->{_rin_toc_package}; | ||||
4671 | my $rpackage_stack = $self->{_rpackage_stack}; | ||||
4672 | |||||
4673 | # packages contain sublists of subs, so to avoid errors all package | ||||
4674 | # items are written and finished with the following routines | ||||
4675 | my $end_package_list = sub { | ||||
4676 | if ($$rin_toc_package) { | ||||
4677 | $html_toc_fh->print("</ul>\n</li>\n"); | ||||
4678 | $$rin_toc_package = ""; | ||||
4679 | } | ||||
4680 | }; | ||||
4681 | |||||
4682 | my $start_package_list = sub { | ||||
4683 | my ( $unique_name, $package ) = @_; | ||||
4684 | if ($$rin_toc_package) { $end_package_list->() } | ||||
4685 | $html_toc_fh->print(<<EOM); | ||||
4686 | <li><a href=\"#$unique_name\">package $package</a> | ||||
4687 | <ul> | ||||
4688 | EOM | ||||
4689 | $$rin_toc_package = $package; | ||||
4690 | }; | ||||
4691 | |||||
4692 | # start the table of contents on the first item | ||||
4693 | unless ($$rtoc_item_count) { | ||||
4694 | |||||
4695 | # but just quit if we hit EOF without any other entries | ||||
4696 | # in this case, there will be no toc | ||||
4697 | return if ( $type eq 'EOF' ); | ||||
4698 | $html_toc_fh->print( <<"TOC_END"); | ||||
4699 | <!-- BEGIN CODE INDEX --><a name="code-index"></a> | ||||
4700 | <ul> | ||||
4701 | TOC_END | ||||
4702 | } | ||||
4703 | $$rtoc_item_count++; | ||||
4704 | |||||
4705 | # make a unique anchor name for this location: | ||||
4706 | # - packages get a 'package-' prefix | ||||
4707 | # - subs use their names | ||||
4708 | my $unique_name = $name; | ||||
4709 | if ( $type eq 'package' ) { $unique_name = "package-$name" } | ||||
4710 | |||||
4711 | # append '-1', '-2', etc if necessary to make unique; this will | ||||
4712 | # be unique because subs and packages cannot have a '-' | ||||
4713 | if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { | ||||
4714 | $unique_name .= "-$count"; | ||||
4715 | } | ||||
4716 | |||||
4717 | # - all names get terminal '-' if pod2html is used, to avoid | ||||
4718 | # conflicts with anchor names created by pod2html | ||||
4719 | if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } | ||||
4720 | |||||
4721 | # start/stop lists of subs | ||||
4722 | if ( $type eq 'sub' ) { | ||||
4723 | my $package = $rpackage_stack->[$$rlast_level]; | ||||
4724 | unless ($package) { $package = 'main' } | ||||
4725 | |||||
4726 | # if we're already in a package/sub list, be sure its the right | ||||
4727 | # package or else close it | ||||
4728 | if ( $$rin_toc_package && $$rin_toc_package ne $package ) { | ||||
4729 | $end_package_list->(); | ||||
4730 | } | ||||
4731 | |||||
4732 | # start a package/sub list if necessary | ||||
4733 | unless ($$rin_toc_package) { | ||||
4734 | $start_package_list->( $unique_name, $package ); | ||||
4735 | } | ||||
4736 | } | ||||
4737 | |||||
4738 | # now write an entry in the toc for this item | ||||
4739 | if ( $type eq 'package' ) { | ||||
4740 | $start_package_list->( $unique_name, $name ); | ||||
4741 | } | ||||
4742 | elsif ( $type eq 'sub' ) { | ||||
4743 | $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); | ||||
4744 | } | ||||
4745 | else { | ||||
4746 | $end_package_list->(); | ||||
4747 | $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); | ||||
4748 | } | ||||
4749 | |||||
4750 | # write the anchor in the <pre> section | ||||
4751 | $html_pre_fh->print("<a name=\"$unique_name\"></a>"); | ||||
4752 | |||||
4753 | # end the table of contents, if any, on the end of file | ||||
4754 | if ( $type eq 'EOF' ) { | ||||
4755 | $html_toc_fh->print( <<"TOC_END"); | ||||
4756 | </ul> | ||||
4757 | <!-- END CODE INDEX --> | ||||
4758 | TOC_END | ||||
4759 | } | ||||
4760 | } | ||||
4761 | |||||
4762 | # spent 50µs within Perl::Tidy::HtmlWriter::BEGIN@4762 which was called:
# once (50µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4867 | ||||
4763 | |||||
4764 | # This is the official list of tokens which may be identified by the | ||||
4765 | # user. Long names are used as getopt keys. Short names are | ||||
4766 | # convenient short abbreviations for specifying input. Short names | ||||
4767 | # somewhat resemble token type characters, but are often different | ||||
4768 | # because they may only be alphanumeric, to allow command line | ||||
4769 | # input. Also, note that because of case insensitivity of html, | ||||
4770 | # this table must be in a single case only (I've chosen to use all | ||||
4771 | # lower case). | ||||
4772 | # When adding NEW_TOKENS: update this hash table | ||||
4773 | # short names => long names | ||||
4774 | 1 | 25µs | %short_to_long_names = ( | ||
4775 | 'n' => 'numeric', | ||||
4776 | 'p' => 'paren', | ||||
4777 | 'q' => 'quote', | ||||
4778 | 's' => 'structure', | ||||
4779 | 'c' => 'comment', | ||||
4780 | 'v' => 'v-string', | ||||
4781 | 'cm' => 'comma', | ||||
4782 | 'w' => 'bareword', | ||||
4783 | 'co' => 'colon', | ||||
4784 | 'pu' => 'punctuation', | ||||
4785 | 'i' => 'identifier', | ||||
4786 | 'j' => 'label', | ||||
4787 | 'h' => 'here-doc-target', | ||||
4788 | 'hh' => 'here-doc-text', | ||||
4789 | 'k' => 'keyword', | ||||
4790 | 'sc' => 'semicolon', | ||||
4791 | 'm' => 'subroutine', | ||||
4792 | 'pd' => 'pod-text', | ||||
4793 | ); | ||||
4794 | |||||
4795 | # Now we have to map actual token types into one of the above short | ||||
4796 | # names; any token types not mapped will get 'punctuation' | ||||
4797 | # properties. | ||||
4798 | |||||
4799 | # The values of this hash table correspond to the keys of the | ||||
4800 | # previous hash table. | ||||
4801 | # The keys of this hash table are token types and can be seen | ||||
4802 | # by running with --dump-token-types (-dtt). | ||||
4803 | |||||
4804 | # When adding NEW_TOKENS: update this hash table | ||||
4805 | # $type => $short_name | ||||
4806 | 1 | 8µs | %token_short_names = ( | ||
4807 | '#' => 'c', | ||||
4808 | 'n' => 'n', | ||||
4809 | 'v' => 'v', | ||||
4810 | 'k' => 'k', | ||||
4811 | 'F' => 'k', | ||||
4812 | 'Q' => 'q', | ||||
4813 | 'q' => 'q', | ||||
4814 | 'J' => 'j', | ||||
4815 | 'j' => 'j', | ||||
4816 | 'h' => 'h', | ||||
4817 | 'H' => 'hh', | ||||
4818 | 'w' => 'w', | ||||
4819 | ',' => 'cm', | ||||
4820 | '=>' => 'cm', | ||||
4821 | ';' => 'sc', | ||||
4822 | ':' => 'co', | ||||
4823 | 'f' => 'sc', | ||||
4824 | '(' => 'p', | ||||
4825 | ')' => 'p', | ||||
4826 | 'M' => 'm', | ||||
4827 | 'P' => 'pd', | ||||
4828 | 'A' => 'co', | ||||
4829 | ); | ||||
4830 | |||||
4831 | # These token types will all be called identifiers for now | ||||
4832 | # FIXME: could separate user defined modules as separate type | ||||
4833 | 1 | 2µs | my @identifier = qw" i t U C Y Z G :: CORE::"; | ||
4834 | 1 | 7µs | @token_short_names{@identifier} = ('i') x scalar(@identifier); | ||
4835 | |||||
4836 | # These token types will be called 'structure' | ||||
4837 | 1 | 500ns | my @structure = qw" { } "; | ||
4838 | 1 | 9µs | @token_short_names{@structure} = ('s') x scalar(@structure); | ||
4839 | |||||
4840 | # OLD NOTES: save for reference | ||||
4841 | # Any of these could be added later if it would be useful. | ||||
4842 | # For now, they will by default become punctuation | ||||
4843 | # my @list = qw" L R [ ] "; | ||||
4844 | # @token_long_names{@list} = ('non-structure') x scalar(@list); | ||||
4845 | # | ||||
4846 | # my @list = qw" | ||||
4847 | # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm | ||||
4848 | # "; | ||||
4849 | # @token_long_names{@list} = ('math') x scalar(@list); | ||||
4850 | # | ||||
4851 | # my @list = qw" & &= ~ ~= ^ ^= | |= "; | ||||
4852 | # @token_long_names{@list} = ('bit') x scalar(@list); | ||||
4853 | # | ||||
4854 | # my @list = qw" == != < > <= <=> "; | ||||
4855 | # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); | ||||
4856 | # | ||||
4857 | # my @list = qw" && || ! &&= ||= //= "; | ||||
4858 | # @token_long_names{@list} = ('logical') x scalar(@list); | ||||
4859 | # | ||||
4860 | # my @list = qw" . .= =~ !~ x x= "; | ||||
4861 | # @token_long_names{@list} = ('string-operators') x scalar(@list); | ||||
4862 | # | ||||
4863 | # # Incomplete.. | ||||
4864 | # my @list = qw" .. -> <> ... \ ? "; | ||||
4865 | # @token_long_names{@list} = ('misc-operators') x scalar(@list); | ||||
4866 | |||||
4867 | 1 | 384µs | 1 | 50µs | } # spent 50µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4762 |
4868 | |||||
4869 | sub make_getopt_long_names { | ||||
4870 | my $class = shift; | ||||
4871 | my ($rgetopt_names) = @_; | ||||
4872 | while ( my ( $short_name, $name ) = each %short_to_long_names ) { | ||||
4873 | push @$rgetopt_names, "html-color-$name=s"; | ||||
4874 | push @$rgetopt_names, "html-italic-$name!"; | ||||
4875 | push @$rgetopt_names, "html-bold-$name!"; | ||||
4876 | } | ||||
4877 | push @$rgetopt_names, "html-color-background=s"; | ||||
4878 | push @$rgetopt_names, "html-linked-style-sheet=s"; | ||||
4879 | push @$rgetopt_names, "nohtml-style-sheets"; | ||||
4880 | push @$rgetopt_names, "html-pre-only"; | ||||
4881 | push @$rgetopt_names, "html-line-numbers"; | ||||
4882 | push @$rgetopt_names, "html-entities!"; | ||||
4883 | push @$rgetopt_names, "stylesheet"; | ||||
4884 | push @$rgetopt_names, "html-table-of-contents!"; | ||||
4885 | push @$rgetopt_names, "pod2html!"; | ||||
4886 | push @$rgetopt_names, "frames!"; | ||||
4887 | push @$rgetopt_names, "html-toc-extension=s"; | ||||
4888 | push @$rgetopt_names, "html-src-extension=s"; | ||||
4889 | |||||
4890 | # Pod::Html parameters: | ||||
4891 | push @$rgetopt_names, "backlink=s"; | ||||
4892 | push @$rgetopt_names, "cachedir=s"; | ||||
4893 | push @$rgetopt_names, "htmlroot=s"; | ||||
4894 | push @$rgetopt_names, "libpods=s"; | ||||
4895 | push @$rgetopt_names, "podpath=s"; | ||||
4896 | push @$rgetopt_names, "podroot=s"; | ||||
4897 | push @$rgetopt_names, "title=s"; | ||||
4898 | |||||
4899 | # Pod::Html parameters with leading 'pod' which will be removed | ||||
4900 | # before the call to Pod::Html | ||||
4901 | push @$rgetopt_names, "podquiet!"; | ||||
4902 | push @$rgetopt_names, "podverbose!"; | ||||
4903 | push @$rgetopt_names, "podrecurse!"; | ||||
4904 | push @$rgetopt_names, "podflush"; | ||||
4905 | push @$rgetopt_names, "podheader!"; | ||||
4906 | push @$rgetopt_names, "podindex!"; | ||||
4907 | } | ||||
4908 | |||||
4909 | sub make_abbreviated_names { | ||||
4910 | |||||
4911 | # We're appending things like this to the expansion list: | ||||
4912 | # 'hcc' => [qw(html-color-comment)], | ||||
4913 | # 'hck' => [qw(html-color-keyword)], | ||||
4914 | # etc | ||||
4915 | my $class = shift; | ||||
4916 | my ($rexpansion) = @_; | ||||
4917 | |||||
4918 | # abbreviations for color/bold/italic properties | ||||
4919 | while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { | ||||
4920 | ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; | ||||
4921 | ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; | ||||
4922 | ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; | ||||
4923 | ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; | ||||
4924 | ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; | ||||
4925 | } | ||||
4926 | |||||
4927 | # abbreviations for all other html options | ||||
4928 | ${$rexpansion}{"hcbg"} = ["html-color-background"]; | ||||
4929 | ${$rexpansion}{"pre"} = ["html-pre-only"]; | ||||
4930 | ${$rexpansion}{"toc"} = ["html-table-of-contents"]; | ||||
4931 | ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; | ||||
4932 | ${$rexpansion}{"nnn"} = ["html-line-numbers"]; | ||||
4933 | ${$rexpansion}{"hent"} = ["html-entities"]; | ||||
4934 | ${$rexpansion}{"nhent"} = ["nohtml-entities"]; | ||||
4935 | ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; | ||||
4936 | ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; | ||||
4937 | ${$rexpansion}{"ss"} = ["stylesheet"]; | ||||
4938 | ${$rexpansion}{"pod"} = ["pod2html"]; | ||||
4939 | ${$rexpansion}{"npod"} = ["nopod2html"]; | ||||
4940 | ${$rexpansion}{"frm"} = ["frames"]; | ||||
4941 | ${$rexpansion}{"nfrm"} = ["noframes"]; | ||||
4942 | ${$rexpansion}{"text"} = ["html-toc-extension"]; | ||||
4943 | ${$rexpansion}{"sext"} = ["html-src-extension"]; | ||||
4944 | } | ||||
4945 | |||||
4946 | sub check_options { | ||||
4947 | |||||
4948 | # This will be called once after options have been parsed | ||||
4949 | my $class = shift; | ||||
4950 | $rOpts = shift; | ||||
4951 | |||||
4952 | # X11 color names for default settings that seemed to look ok | ||||
4953 | # (these color names are only used for programming clarity; the hex | ||||
4954 | # numbers are actually written) | ||||
4955 | 2 | 24µs | 2 | 79µs | # spent 43µs (8+35) within Perl::Tidy::HtmlWriter::BEGIN@4955 which was called:
# once (8µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4955 # spent 43µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4955
# spent 36µs making 1 call to constant::import |
4956 | 2 | 22µs | 2 | 76µs | # spent 42µs (8+34) within Perl::Tidy::HtmlWriter::BEGIN@4956 which was called:
# once (8µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4956 # spent 42µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4956
# spent 34µs making 1 call to constant::import |
4957 | 2 | 21µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::HtmlWriter::BEGIN@4957 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4957 # spent 36µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4957
# spent 30µs making 1 call to constant::import |
4958 | 2 | 20µs | 2 | 63µs | # spent 35µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4958 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4958 # spent 35µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4958
# spent 28µs making 1 call to constant::import |
4959 | 2 | 20µs | 2 | 63µs | # spent 35µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4959 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4959 # spent 35µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4959
# spent 28µs making 1 call to constant::import |
4960 | 2 | 24µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4960 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4960 # spent 34µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4960
# spent 28µs making 1 call to constant::import |
4961 | 2 | 24µs | 2 | 63µs | # spent 35µs (7+28) within Perl::Tidy::HtmlWriter::BEGIN@4961 which was called:
# once (7µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4961 # spent 35µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4961
# spent 28µs making 1 call to constant::import |
4962 | 2 | 20µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4962 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4962 # spent 34µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4962
# spent 28µs making 1 call to constant::import |
4963 | 2 | 3.63ms | 2 | 69µs | # spent 38µs (6+31) within Perl::Tidy::HtmlWriter::BEGIN@4963 which was called:
# once (6µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4963 # spent 38µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4963
# spent 31µs making 1 call to constant::import |
4964 | |||||
4965 | # set default color, bold, italic properties | ||||
4966 | # anything not listed here will be given the default (punctuation) color -- | ||||
4967 | # these types currently not listed and get default: ws pu s sc cm co p | ||||
4968 | # When adding NEW_TOKENS: add an entry here if you don't want defaults | ||||
4969 | |||||
4970 | # set_default_properties( $short_name, default_color, bold?, italic? ); | ||||
4971 | set_default_properties( 'c', ForestGreen, 0, 0 ); | ||||
4972 | set_default_properties( 'pd', ForestGreen, 0, 1 ); | ||||
4973 | set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown | ||||
4974 | set_default_properties( 'q', IndianRed3, 0, 0 ); | ||||
4975 | set_default_properties( 'hh', IndianRed3, 0, 1 ); | ||||
4976 | set_default_properties( 'h', IndianRed3, 1, 0 ); | ||||
4977 | set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); | ||||
4978 | set_default_properties( 'w', black, 0, 0 ); | ||||
4979 | set_default_properties( 'n', MediumOrchid3, 0, 0 ); | ||||
4980 | set_default_properties( 'v', MediumOrchid3, 0, 0 ); | ||||
4981 | set_default_properties( 'j', IndianRed3, 1, 0 ); | ||||
4982 | set_default_properties( 'm', red, 1, 0 ); | ||||
4983 | |||||
4984 | set_default_color( 'html-color-background', white ); | ||||
4985 | set_default_color( 'html-color-punctuation', black ); | ||||
4986 | |||||
4987 | # setup property lookup tables for tokens based on their short names | ||||
4988 | # every token type has a short name, and will use these tables | ||||
4989 | # to do the html markup | ||||
4990 | while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { | ||||
4991 | $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; | ||||
4992 | $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; | ||||
4993 | $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; | ||||
4994 | } | ||||
4995 | |||||
4996 | # write style sheet to STDOUT and die if requested | ||||
4997 | if ( defined( $rOpts->{'stylesheet'} ) ) { | ||||
4998 | write_style_sheet_file('-'); | ||||
4999 | Perl::Tidy::Exit 0; | ||||
5000 | } | ||||
5001 | |||||
5002 | # make sure user gives a file name after -css | ||||
5003 | if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { | ||||
5004 | $css_linkname = $rOpts->{'html-linked-style-sheet'}; | ||||
5005 | if ( $css_linkname =~ /^-/ ) { | ||||
5006 | Perl::Tidy::Die "You must specify a valid filename after -css\n"; | ||||
5007 | } | ||||
5008 | } | ||||
5009 | |||||
5010 | # check for conflict | ||||
5011 | if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { | ||||
5012 | $rOpts->{'nohtml-style-sheets'} = 0; | ||||
5013 | warning("You can't specify both -css and -nss; -nss ignored\n"); | ||||
5014 | } | ||||
5015 | |||||
5016 | # write a style sheet file if necessary | ||||
5017 | if ($css_linkname) { | ||||
5018 | |||||
5019 | # if the selected filename exists, don't write, because user may | ||||
5020 | # have done some work by hand to create it; use backup name instead | ||||
5021 | # Also, this will avoid a potential disaster in which the user | ||||
5022 | # forgets to specify the style sheet, like this: | ||||
5023 | # perltidy -html -css myfile1.pl myfile2.pl | ||||
5024 | # This would cause myfile1.pl to parsed as the style sheet by GetOpts | ||||
5025 | my $css_filename = $css_linkname; | ||||
5026 | unless ( -e $css_filename ) { | ||||
5027 | write_style_sheet_file($css_filename); | ||||
5028 | } | ||||
5029 | } | ||||
5030 | $missing_html_entities = 1 unless $rOpts->{'html-entities'}; | ||||
5031 | } | ||||
5032 | |||||
5033 | sub write_style_sheet_file { | ||||
5034 | |||||
5035 | my $css_filename = shift; | ||||
5036 | my $fh; | ||||
5037 | unless ( $fh = IO::File->new("> $css_filename") ) { | ||||
5038 | Perl::Tidy::Die "can't open $css_filename: $!\n"; | ||||
5039 | } | ||||
5040 | write_style_sheet_data($fh); | ||||
5041 | eval { $fh->close }; | ||||
5042 | } | ||||
5043 | |||||
5044 | sub write_style_sheet_data { | ||||
5045 | |||||
5046 | # write the style sheet data to an open file handle | ||||
5047 | my $fh = shift; | ||||
5048 | |||||
5049 | my $bg_color = $rOpts->{'html-color-background'}; | ||||
5050 | my $text_color = $rOpts->{'html-color-punctuation'}; | ||||
5051 | |||||
5052 | # pre-bgcolor is new, and may not be defined | ||||
5053 | my $pre_bg_color = $rOpts->{'html-pre-color-background'}; | ||||
5054 | $pre_bg_color = $bg_color unless $pre_bg_color; | ||||
5055 | |||||
5056 | $fh->print(<<"EOM"); | ||||
5057 | /* default style sheet generated by perltidy */ | ||||
5058 | body {background: $bg_color; color: $text_color} | ||||
5059 | pre { color: $text_color; | ||||
5060 | background: $pre_bg_color; | ||||
5061 | font-family: courier; | ||||
5062 | } | ||||
5063 | |||||
5064 | EOM | ||||
5065 | |||||
5066 | foreach my $short_name ( sort keys %short_to_long_names ) { | ||||
5067 | my $long_name = $short_to_long_names{$short_name}; | ||||
5068 | |||||
5069 | my $abbrev = '.' . $short_name; | ||||
5070 | if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment | ||||
5071 | my $color = $html_color{$short_name}; | ||||
5072 | if ( !defined($color) ) { $color = $text_color } | ||||
5073 | $fh->print("$abbrev \{ color: $color;"); | ||||
5074 | |||||
5075 | if ( $html_bold{$short_name} ) { | ||||
5076 | $fh->print(" font-weight:bold;"); | ||||
5077 | } | ||||
5078 | |||||
5079 | if ( $html_italic{$short_name} ) { | ||||
5080 | $fh->print(" font-style:italic;"); | ||||
5081 | } | ||||
5082 | $fh->print("} /* $long_name */\n"); | ||||
5083 | } | ||||
5084 | } | ||||
5085 | |||||
5086 | sub set_default_color { | ||||
5087 | |||||
5088 | # make sure that options hash $rOpts->{$key} contains a valid color | ||||
5089 | my ( $key, $color ) = @_; | ||||
5090 | if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } | ||||
5091 | $rOpts->{$key} = check_RGB($color); | ||||
5092 | } | ||||
5093 | |||||
5094 | sub check_RGB { | ||||
5095 | |||||
5096 | # if color is a 6 digit hex RGB value, prepend a #, otherwise | ||||
5097 | # assume that it is a valid ascii color name | ||||
5098 | my ($color) = @_; | ||||
5099 | if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } | ||||
5100 | return $color; | ||||
5101 | } | ||||
5102 | |||||
5103 | sub set_default_properties { | ||||
5104 | my ( $short_name, $color, $bold, $italic ) = @_; | ||||
5105 | |||||
5106 | set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); | ||||
5107 | my $key; | ||||
5108 | $key = "html-bold-$short_to_long_names{$short_name}"; | ||||
5109 | $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; | ||||
5110 | $key = "html-italic-$short_to_long_names{$short_name}"; | ||||
5111 | $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; | ||||
5112 | } | ||||
5113 | |||||
5114 | sub pod_to_html { | ||||
5115 | |||||
5116 | # Use Pod::Html to process the pod and make the page | ||||
5117 | # then merge the perltidy code sections into it. | ||||
5118 | # return 1 if success, 0 otherwise | ||||
5119 | my $self = shift; | ||||
5120 | my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; | ||||
5121 | my $input_file = $self->{_input_file}; | ||||
5122 | my $title = $self->{_title}; | ||||
5123 | my $success_flag = 0; | ||||
5124 | |||||
5125 | # don't try to use pod2html if no pod | ||||
5126 | unless ($pod_string) { | ||||
5127 | return $success_flag; | ||||
5128 | } | ||||
5129 | |||||
5130 | # Pod::Html requires a real temporary filename | ||||
5131 | my ( $fh_tmp, $tmpfile ) = tempfile(); | ||||
5132 | unless ($fh_tmp) { | ||||
5133 | Perl::Tidy::Warn | ||||
5134 | "unable to open temporary file $tmpfile; cannot use pod2html\n"; | ||||
5135 | return $success_flag; | ||||
5136 | } | ||||
5137 | |||||
5138 | #------------------------------------------------------------------ | ||||
5139 | # Warning: a temporary file is open; we have to clean up if | ||||
5140 | # things go bad. From here on all returns should be by going to | ||||
5141 | # RETURN so that the temporary file gets unlinked. | ||||
5142 | #------------------------------------------------------------------ | ||||
5143 | |||||
5144 | # write the pod text to the temporary file | ||||
5145 | $fh_tmp->print($pod_string); | ||||
5146 | $fh_tmp->close(); | ||||
5147 | |||||
5148 | # Hand off the pod to pod2html. | ||||
5149 | # Note that we can use the same temporary filename for input and output | ||||
5150 | # because of the way pod2html works. | ||||
5151 | { | ||||
5152 | |||||
5153 | my @args; | ||||
5154 | push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; | ||||
5155 | my $kw; | ||||
5156 | |||||
5157 | # Flags with string args: | ||||
5158 | # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", | ||||
5159 | # "podpath=s", "podroot=s" | ||||
5160 | # Note: -css=s is handled by perltidy itself | ||||
5161 | foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { | ||||
5162 | if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } | ||||
5163 | } | ||||
5164 | |||||
5165 | # Toggle switches; these have extra leading 'pod' | ||||
5166 | # "header!", "index!", "recurse!", "quiet!", "verbose!" | ||||
5167 | foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { | ||||
5168 | my $kwd = $kw; # allows us to strip 'pod' | ||||
5169 | if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } | ||||
5170 | elsif ( defined( $rOpts->{$kw} ) ) { | ||||
5171 | $kwd =~ s/^pod//; | ||||
5172 | push @args, "--no$kwd"; | ||||
5173 | } | ||||
5174 | } | ||||
5175 | |||||
5176 | # "flush", | ||||
5177 | $kw = 'podflush'; | ||||
5178 | if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } | ||||
5179 | |||||
5180 | # Must clean up if pod2html dies (it can); | ||||
5181 | # Be careful not to overwrite callers __DIE__ routine | ||||
5182 | local $SIG{__DIE__} = sub { | ||||
5183 | unlink $tmpfile if -e $tmpfile; | ||||
5184 | Perl::Tidy::Die $_[0]; | ||||
5185 | }; | ||||
5186 | |||||
5187 | pod2html(@args); | ||||
5188 | } | ||||
5189 | $fh_tmp = IO::File->new( $tmpfile, 'r' ); | ||||
5190 | unless ($fh_tmp) { | ||||
5191 | |||||
5192 | # this error shouldn't happen ... we just used this filename | ||||
5193 | Perl::Tidy::Warn | ||||
5194 | "unable to open temporary file $tmpfile; cannot use pod2html\n"; | ||||
5195 | goto RETURN; | ||||
5196 | } | ||||
5197 | |||||
5198 | my $html_fh = $self->{_html_fh}; | ||||
5199 | my @toc; | ||||
5200 | my $in_toc; | ||||
5201 | my $ul_level = 0; | ||||
5202 | my $no_print; | ||||
5203 | |||||
5204 | # This routine will write the html selectively and store the toc | ||||
5205 | my $html_print = sub { | ||||
5206 | foreach (@_) { | ||||
5207 | $html_fh->print($_) unless ($no_print); | ||||
5208 | if ($in_toc) { push @toc, $_ } | ||||
5209 | } | ||||
5210 | }; | ||||
5211 | |||||
5212 | # loop over lines of html output from pod2html and merge in | ||||
5213 | # the necessary perltidy html sections | ||||
5214 | my ( $saw_body, $saw_index, $saw_body_end ); | ||||
5215 | while ( my $line = $fh_tmp->getline() ) { | ||||
5216 | |||||
5217 | if ( $line =~ /^\s*<html>\s*$/i ) { | ||||
5218 | my $date = localtime; | ||||
5219 | $html_print->("<!-- Generated by perltidy on $date -->\n"); | ||||
5220 | $html_print->($line); | ||||
5221 | } | ||||
5222 | |||||
5223 | # Copy the perltidy css, if any, after <body> tag | ||||
5224 | elsif ( $line =~ /^\s*<body.*>\s*$/i ) { | ||||
5225 | $saw_body = 1; | ||||
5226 | $html_print->($css_string) if $css_string; | ||||
5227 | $html_print->($line); | ||||
5228 | |||||
5229 | # add a top anchor and heading | ||||
5230 | $html_print->("<a name=\"-top-\"></a>\n"); | ||||
5231 | $title = escape_html($title); | ||||
5232 | $html_print->("<h1>$title</h1>\n"); | ||||
5233 | } | ||||
5234 | |||||
5235 | # check for start of index, old pod2html | ||||
5236 | # before Pod::Html VERSION 1.15_02 it is delimited by comments as: | ||||
5237 | # <!-- INDEX BEGIN --> | ||||
5238 | # <ul> | ||||
5239 | # ... | ||||
5240 | # </ul> | ||||
5241 | # <!-- INDEX END --> | ||||
5242 | # | ||||
5243 | elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) { | ||||
5244 | $in_toc = 'INDEX'; | ||||
5245 | |||||
5246 | # when frames are used, an extra table of contents in the | ||||
5247 | # contents panel is confusing, so don't print it | ||||
5248 | $no_print = $rOpts->{'frames'} | ||||
5249 | || !$rOpts->{'html-table-of-contents'}; | ||||
5250 | $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; | ||||
5251 | $html_print->($line); | ||||
5252 | } | ||||
5253 | |||||
5254 | # check for start of index, new pod2html | ||||
5255 | # After Pod::Html VERSION 1.15_02 it is delimited as: | ||||
5256 | # <ul id="index"> | ||||
5257 | # ... | ||||
5258 | # </ul> | ||||
5259 | elsif ( $line =~ /^\s*<ul\s+id="index">/i ) { | ||||
5260 | $in_toc = 'UL'; | ||||
5261 | $ul_level = 1; | ||||
5262 | |||||
5263 | # when frames are used, an extra table of contents in the | ||||
5264 | # contents panel is confusing, so don't print it | ||||
5265 | $no_print = $rOpts->{'frames'} | ||||
5266 | || !$rOpts->{'html-table-of-contents'}; | ||||
5267 | $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; | ||||
5268 | $html_print->($line); | ||||
5269 | } | ||||
5270 | |||||
5271 | # Check for end of index, old pod2html | ||||
5272 | elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) { | ||||
5273 | $saw_index = 1; | ||||
5274 | $html_print->($line); | ||||
5275 | |||||
5276 | # Copy the perltidy toc, if any, after the Pod::Html toc | ||||
5277 | if ($toc_string) { | ||||
5278 | $html_print->("<hr />\n") if $rOpts->{'frames'}; | ||||
5279 | $html_print->("<h2>Code Index:</h2>\n"); | ||||
5280 | my @toc = map { $_ .= "\n" } split /\n/, $toc_string; | ||||
5281 | $html_print->(@toc); | ||||
5282 | } | ||||
5283 | $in_toc = ""; | ||||
5284 | $no_print = 0; | ||||
5285 | } | ||||
5286 | |||||
5287 | # must track <ul> depth level for new pod2html | ||||
5288 | elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) { | ||||
5289 | $ul_level++; | ||||
5290 | $html_print->($line); | ||||
5291 | } | ||||
5292 | |||||
5293 | # Check for end of index, for new pod2html | ||||
5294 | elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) { | ||||
5295 | $ul_level--; | ||||
5296 | $html_print->($line); | ||||
5297 | |||||
5298 | # Copy the perltidy toc, if any, after the Pod::Html toc | ||||
5299 | if ( $ul_level <= 0 ) { | ||||
5300 | $saw_index = 1; | ||||
5301 | if ($toc_string) { | ||||
5302 | $html_print->("<hr />\n") if $rOpts->{'frames'}; | ||||
5303 | $html_print->("<h2>Code Index:</h2>\n"); | ||||
5304 | my @toc = map { $_ .= "\n" } split /\n/, $toc_string; | ||||
5305 | $html_print->(@toc); | ||||
5306 | } | ||||
5307 | $in_toc = ""; | ||||
5308 | $ul_level = 0; | ||||
5309 | $no_print = 0; | ||||
5310 | } | ||||
5311 | } | ||||
5312 | |||||
5313 | # Copy one perltidy section after each marker | ||||
5314 | elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) { | ||||
5315 | $line = $2; | ||||
5316 | $html_print->($1) if $1; | ||||
5317 | |||||
5318 | # Intermingle code and pod sections if we saw multiple =cut's. | ||||
5319 | if ( $self->{_pod_cut_count} > 1 ) { | ||||
5320 | my $rpre_string = shift(@$rpre_string_stack); | ||||
5321 | if ($$rpre_string) { | ||||
5322 | $html_print->('<pre>'); | ||||
5323 | $html_print->($$rpre_string); | ||||
5324 | $html_print->('</pre>'); | ||||
5325 | } | ||||
5326 | else { | ||||
5327 | |||||
5328 | # shouldn't happen: we stored a string before writing | ||||
5329 | # each marker. | ||||
5330 | Perl::Tidy::Warn | ||||
5331 | "Problem merging html stream with pod2html; order may be wrong\n"; | ||||
5332 | } | ||||
5333 | $html_print->($line); | ||||
5334 | } | ||||
5335 | |||||
5336 | # If didn't see multiple =cut lines, we'll put the pod out first | ||||
5337 | # and then the code, because it's less confusing. | ||||
5338 | else { | ||||
5339 | |||||
5340 | # since we are not intermixing code and pod, we don't need | ||||
5341 | # or want any <hr> lines which separated pod and code | ||||
5342 | $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i ); | ||||
5343 | } | ||||
5344 | } | ||||
5345 | |||||
5346 | # Copy any remaining code section before the </body> tag | ||||
5347 | elsif ( $line =~ /^\s*<\/body>\s*$/i ) { | ||||
5348 | $saw_body_end = 1; | ||||
5349 | if (@$rpre_string_stack) { | ||||
5350 | unless ( $self->{_pod_cut_count} > 1 ) { | ||||
5351 | $html_print->('<hr />'); | ||||
5352 | } | ||||
5353 | while ( my $rpre_string = shift(@$rpre_string_stack) ) { | ||||
5354 | $html_print->('<pre>'); | ||||
5355 | $html_print->($$rpre_string); | ||||
5356 | $html_print->('</pre>'); | ||||
5357 | } | ||||
5358 | } | ||||
5359 | $html_print->($line); | ||||
5360 | } | ||||
5361 | else { | ||||
5362 | $html_print->($line); | ||||
5363 | } | ||||
5364 | } | ||||
5365 | |||||
5366 | $success_flag = 1; | ||||
5367 | unless ($saw_body) { | ||||
5368 | Perl::Tidy::Warn "Did not see <body> in pod2html output\n"; | ||||
5369 | $success_flag = 0; | ||||
5370 | } | ||||
5371 | unless ($saw_body_end) { | ||||
5372 | Perl::Tidy::Warn "Did not see </body> in pod2html output\n"; | ||||
5373 | $success_flag = 0; | ||||
5374 | } | ||||
5375 | unless ($saw_index) { | ||||
5376 | Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n"; | ||||
5377 | $success_flag = 0; | ||||
5378 | } | ||||
5379 | |||||
5380 | RETURN: | ||||
5381 | eval { $html_fh->close() }; | ||||
5382 | |||||
5383 | # note that we have to unlink tmpfile before making frames | ||||
5384 | # because the tmpfile may be one of the names used for frames | ||||
5385 | unlink $tmpfile if -e $tmpfile; | ||||
5386 | if ( $success_flag && $rOpts->{'frames'} ) { | ||||
5387 | $self->make_frame( \@toc ); | ||||
5388 | } | ||||
5389 | return $success_flag; | ||||
5390 | } | ||||
5391 | |||||
5392 | sub make_frame { | ||||
5393 | |||||
5394 | # Make a frame with table of contents in the left panel | ||||
5395 | # and the text in the right panel. | ||||
5396 | # On entry: | ||||
5397 | # $html_filename contains the no-frames html output | ||||
5398 | # $rtoc is a reference to an array with the table of contents | ||||
5399 | my $self = shift; | ||||
5400 | my ($rtoc) = @_; | ||||
5401 | my $input_file = $self->{_input_file}; | ||||
5402 | my $html_filename = $self->{_html_file}; | ||||
5403 | my $toc_filename = $self->{_toc_filename}; | ||||
5404 | my $src_filename = $self->{_src_filename}; | ||||
5405 | my $title = $self->{_title}; | ||||
5406 | $title = escape_html($title); | ||||
5407 | |||||
5408 | # FUTURE input parameter: | ||||
5409 | my $top_basename = ""; | ||||
5410 | |||||
5411 | # We need to produce 3 html files: | ||||
5412 | # 1. - the table of contents | ||||
5413 | # 2. - the contents (source code) itself | ||||
5414 | # 3. - the frame which contains them | ||||
5415 | |||||
5416 | # get basenames for relative links | ||||
5417 | my ( $toc_basename, $toc_path ) = fileparse($toc_filename); | ||||
5418 | my ( $src_basename, $src_path ) = fileparse($src_filename); | ||||
5419 | |||||
5420 | # 1. Make the table of contents panel, with appropriate changes | ||||
5421 | # to the anchor names | ||||
5422 | my $src_frame_name = 'SRC'; | ||||
5423 | my $first_anchor = | ||||
5424 | write_toc_html( $title, $toc_filename, $src_basename, $rtoc, | ||||
5425 | $src_frame_name ); | ||||
5426 | |||||
5427 | # 2. The current .html filename is renamed to be the contents panel | ||||
5428 | rename( $html_filename, $src_filename ) | ||||
5429 | or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n"; | ||||
5430 | |||||
5431 | # 3. Then use the original html filename for the frame | ||||
5432 | write_frame_html( | ||||
5433 | $title, $html_filename, $top_basename, | ||||
5434 | $toc_basename, $src_basename, $src_frame_name | ||||
5435 | ); | ||||
5436 | } | ||||
5437 | |||||
5438 | sub write_toc_html { | ||||
5439 | |||||
5440 | # write a separate html table of contents file for frames | ||||
5441 | my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; | ||||
5442 | my $fh = IO::File->new( $toc_filename, 'w' ) | ||||
5443 | or Perl::Tidy::Die "Cannot open $toc_filename:$!\n"; | ||||
5444 | $fh->print(<<EOM); | ||||
5445 | <html> | ||||
5446 | <head> | ||||
5447 | <title>$title</title> | ||||
5448 | </head> | ||||
5449 | <body> | ||||
5450 | <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1> | ||||
5451 | EOM | ||||
5452 | |||||
5453 | my $first_anchor = | ||||
5454 | change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); | ||||
5455 | $fh->print( join "", @$rtoc ); | ||||
5456 | |||||
5457 | $fh->print(<<EOM); | ||||
5458 | </body> | ||||
5459 | </html> | ||||
5460 | EOM | ||||
5461 | |||||
5462 | } | ||||
5463 | |||||
5464 | sub write_frame_html { | ||||
5465 | |||||
5466 | # write an html file to be the table of contents frame | ||||
5467 | my ( | ||||
5468 | $title, $frame_filename, $top_basename, | ||||
5469 | $toc_basename, $src_basename, $src_frame_name | ||||
5470 | ) = @_; | ||||
5471 | |||||
5472 | my $fh = IO::File->new( $frame_filename, 'w' ) | ||||
5473 | or Perl::Tidy::Die "Cannot open $toc_basename:$!\n"; | ||||
5474 | |||||
5475 | $fh->print(<<EOM); | ||||
5476 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" | ||||
5477 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | ||||
5478 | <?xml version="1.0" encoding="iso-8859-1" ?> | ||||
5479 | <html xmlns="http://www.w3.org/1999/xhtml"> | ||||
5480 | <head> | ||||
5481 | <title>$title</title> | ||||
5482 | </head> | ||||
5483 | EOM | ||||
5484 | |||||
5485 | # two left panels, one right, if master index file | ||||
5486 | if ($top_basename) { | ||||
5487 | $fh->print(<<EOM); | ||||
5488 | <frameset cols="20%,80%"> | ||||
5489 | <frameset rows="30%,70%"> | ||||
5490 | <frame src = "$top_basename" /> | ||||
5491 | <frame src = "$toc_basename" /> | ||||
5492 | </frameset> | ||||
5493 | EOM | ||||
5494 | } | ||||
5495 | |||||
5496 | # one left panels, one right, if no master index file | ||||
5497 | else { | ||||
5498 | $fh->print(<<EOM); | ||||
5499 | <frameset cols="20%,*"> | ||||
5500 | <frame src = "$toc_basename" /> | ||||
5501 | EOM | ||||
5502 | } | ||||
5503 | $fh->print(<<EOM); | ||||
5504 | <frame src = "$src_basename" name = "$src_frame_name" /> | ||||
5505 | <noframes> | ||||
5506 | <body> | ||||
5507 | <p>If you see this message, you are using a non-frame-capable web client.</p> | ||||
5508 | <p>This document contains:</p> | ||||
5509 | <ul> | ||||
5510 | <li><a href="$toc_basename">A table of contents</a></li> | ||||
5511 | <li><a href="$src_basename">The source code</a></li> | ||||
5512 | </ul> | ||||
5513 | </body> | ||||
5514 | </noframes> | ||||
5515 | </frameset> | ||||
5516 | </html> | ||||
5517 | EOM | ||||
5518 | } | ||||
5519 | |||||
5520 | sub change_anchor_names { | ||||
5521 | |||||
5522 | # add a filename and target to anchors | ||||
5523 | # also return the first anchor | ||||
5524 | my ( $rlines, $filename, $target ) = @_; | ||||
5525 | my $first_anchor; | ||||
5526 | foreach my $line (@$rlines) { | ||||
5527 | |||||
5528 | # We're looking for lines like this: | ||||
5529 | # <LI><A HREF="#synopsis">SYNOPSIS</A></LI> | ||||
5530 | # ---- - -------- ----------------- | ||||
5531 | # $1 $4 $5 | ||||
5532 | if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) { | ||||
5533 | my $pre = $1; | ||||
5534 | my $name = $4; | ||||
5535 | my $post = $5; | ||||
5536 | my $href = "$filename#$name"; | ||||
5537 | $line = "$pre<a href=\"$href\" target=\"$target\">$post\n"; | ||||
5538 | unless ($first_anchor) { $first_anchor = $href } | ||||
5539 | } | ||||
5540 | } | ||||
5541 | return $first_anchor; | ||||
5542 | } | ||||
5543 | |||||
5544 | sub close_html_file { | ||||
5545 | my $self = shift; | ||||
5546 | return unless $self->{_html_file_opened}; | ||||
5547 | |||||
5548 | my $html_fh = $self->{_html_fh}; | ||||
5549 | my $rtoc_string = $self->{_rtoc_string}; | ||||
5550 | |||||
5551 | # There are 3 basic paths to html output... | ||||
5552 | |||||
5553 | # --------------------------------- | ||||
5554 | # Path 1: finish up if in -pre mode | ||||
5555 | # --------------------------------- | ||||
5556 | if ( $rOpts->{'html-pre-only'} ) { | ||||
5557 | $html_fh->print( <<"PRE_END"); | ||||
5558 | </pre> | ||||
5559 | PRE_END | ||||
5560 | eval { $html_fh->close() }; | ||||
5561 | return; | ||||
5562 | } | ||||
5563 | |||||
5564 | # Finish the index | ||||
5565 | $self->add_toc_item( 'EOF', 'EOF' ); | ||||
5566 | |||||
5567 | my $rpre_string_stack = $self->{_rpre_string_stack}; | ||||
5568 | |||||
5569 | # Patch to darken the <pre> background color in case of pod2html and | ||||
5570 | # interleaved code/documentation. Otherwise, the distinction | ||||
5571 | # between code and documentation is blurred. | ||||
5572 | if ( $rOpts->{pod2html} | ||||
5573 | && $self->{_pod_cut_count} >= 1 | ||||
5574 | && $rOpts->{'html-color-background'} eq '#FFFFFF' ) | ||||
5575 | { | ||||
5576 | $rOpts->{'html-pre-color-background'} = '#F0F0F0'; | ||||
5577 | } | ||||
5578 | |||||
5579 | # put the css or its link into a string, if used | ||||
5580 | my $css_string; | ||||
5581 | my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' ); | ||||
5582 | |||||
5583 | # use css linked to another file | ||||
5584 | if ( $rOpts->{'html-linked-style-sheet'} ) { | ||||
5585 | $fh_css->print( | ||||
5586 | qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />) | ||||
5587 | ); | ||||
5588 | } | ||||
5589 | |||||
5590 | # use css embedded in this file | ||||
5591 | elsif ( !$rOpts->{'nohtml-style-sheets'} ) { | ||||
5592 | $fh_css->print( <<'ENDCSS'); | ||||
5593 | <style type="text/css"> | ||||
5594 | <!-- | ||||
5595 | ENDCSS | ||||
5596 | write_style_sheet_data($fh_css); | ||||
5597 | $fh_css->print( <<"ENDCSS"); | ||||
5598 | --> | ||||
5599 | </style> | ||||
5600 | ENDCSS | ||||
5601 | } | ||||
5602 | |||||
5603 | # ----------------------------------------------------------- | ||||
5604 | # path 2: use pod2html if requested | ||||
5605 | # If we fail for some reason, continue on to path 3 | ||||
5606 | # ----------------------------------------------------------- | ||||
5607 | if ( $rOpts->{'pod2html'} ) { | ||||
5608 | my $rpod_string = $self->{_rpod_string}; | ||||
5609 | $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string, | ||||
5610 | $rpre_string_stack ) | ||||
5611 | && return; | ||||
5612 | } | ||||
5613 | |||||
5614 | # -------------------------------------------------- | ||||
5615 | # path 3: write code in html, with pod only in italics | ||||
5616 | # -------------------------------------------------- | ||||
5617 | my $input_file = $self->{_input_file}; | ||||
5618 | my $title = escape_html($input_file); | ||||
5619 | my $date = localtime; | ||||
5620 | $html_fh->print( <<"HTML_START"); | ||||
5621 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | ||||
5622 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | ||||
5623 | <!-- Generated by perltidy on $date --> | ||||
5624 | <html xmlns="http://www.w3.org/1999/xhtml"> | ||||
5625 | <head> | ||||
5626 | <title>$title</title> | ||||
5627 | HTML_START | ||||
5628 | |||||
5629 | # output the css, if used | ||||
5630 | if ($css_string) { | ||||
5631 | $html_fh->print($css_string); | ||||
5632 | $html_fh->print( <<"ENDCSS"); | ||||
5633 | </head> | ||||
5634 | <body> | ||||
5635 | ENDCSS | ||||
5636 | } | ||||
5637 | else { | ||||
5638 | |||||
5639 | $html_fh->print( <<"HTML_START"); | ||||
5640 | </head> | ||||
5641 | <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\"> | ||||
5642 | HTML_START | ||||
5643 | } | ||||
5644 | |||||
5645 | $html_fh->print("<a name=\"-top-\"></a>\n"); | ||||
5646 | $html_fh->print( <<"EOM"); | ||||
5647 | <h1>$title</h1> | ||||
5648 | EOM | ||||
5649 | |||||
5650 | # copy the table of contents | ||||
5651 | if ( $$rtoc_string | ||||
5652 | && !$rOpts->{'frames'} | ||||
5653 | && $rOpts->{'html-table-of-contents'} ) | ||||
5654 | { | ||||
5655 | $html_fh->print($$rtoc_string); | ||||
5656 | } | ||||
5657 | |||||
5658 | # copy the pre section(s) | ||||
5659 | my $fname_comment = $input_file; | ||||
5660 | $fname_comment =~ s/--+/-/g; # protect HTML comment tags | ||||
5661 | $html_fh->print( <<"END_PRE"); | ||||
5662 | <hr /> | ||||
5663 | <!-- contents of filename: $fname_comment --> | ||||
5664 | <pre> | ||||
5665 | END_PRE | ||||
5666 | |||||
5667 | foreach my $rpre_string (@$rpre_string_stack) { | ||||
5668 | $html_fh->print($$rpre_string); | ||||
5669 | } | ||||
5670 | |||||
5671 | # and finish the html page | ||||
5672 | $html_fh->print( <<"HTML_END"); | ||||
5673 | </pre> | ||||
5674 | </body> | ||||
5675 | </html> | ||||
5676 | HTML_END | ||||
5677 | eval { $html_fh->close() }; # could be object without close method | ||||
5678 | |||||
5679 | if ( $rOpts->{'frames'} ) { | ||||
5680 | my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; | ||||
5681 | $self->make_frame( \@toc ); | ||||
5682 | } | ||||
5683 | } | ||||
5684 | |||||
5685 | sub markup_tokens { | ||||
5686 | my $self = shift; | ||||
5687 | my ( $rtokens, $rtoken_type, $rlevels ) = @_; | ||||
5688 | my ( @colored_tokens, $j, $string, $type, $token, $level ); | ||||
5689 | my $rlast_level = $self->{_rlast_level}; | ||||
5690 | my $rpackage_stack = $self->{_rpackage_stack}; | ||||
5691 | |||||
5692 | for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { | ||||
5693 | $type = $$rtoken_type[$j]; | ||||
5694 | $token = $$rtokens[$j]; | ||||
5695 | $level = $$rlevels[$j]; | ||||
5696 | $level = 0 if ( $level < 0 ); | ||||
5697 | |||||
5698 | #------------------------------------------------------- | ||||
5699 | # Update the package stack. The package stack is needed to keep | ||||
5700 | # the toc correct because some packages may be declared within | ||||
5701 | # blocks and go out of scope when we leave the block. | ||||
5702 | #------------------------------------------------------- | ||||
5703 | if ( $level > $$rlast_level ) { | ||||
5704 | unless ( $rpackage_stack->[ $level - 1 ] ) { | ||||
5705 | $rpackage_stack->[ $level - 1 ] = 'main'; | ||||
5706 | } | ||||
5707 | $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; | ||||
5708 | } | ||||
5709 | elsif ( $level < $$rlast_level ) { | ||||
5710 | my $package = $rpackage_stack->[$level]; | ||||
5711 | unless ($package) { $package = 'main' } | ||||
5712 | |||||
5713 | # if we change packages due to a nesting change, we | ||||
5714 | # have to make an entry in the toc | ||||
5715 | if ( $package ne $rpackage_stack->[ $level + 1 ] ) { | ||||
5716 | $self->add_toc_item( $package, 'package' ); | ||||
5717 | } | ||||
5718 | } | ||||
5719 | $$rlast_level = $level; | ||||
5720 | |||||
5721 | #------------------------------------------------------- | ||||
5722 | # Intercept a sub name here; split it | ||||
5723 | # into keyword 'sub' and sub name; and add an | ||||
5724 | # entry in the toc | ||||
5725 | #------------------------------------------------------- | ||||
5726 | if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { | ||||
5727 | $token = $self->markup_html_element( $1, 'k' ); | ||||
5728 | push @colored_tokens, $token; | ||||
5729 | $token = $2; | ||||
5730 | $type = 'M'; | ||||
5731 | |||||
5732 | # but don't include sub declarations in the toc; | ||||
5733 | # these wlll have leading token types 'i;' | ||||
5734 | my $signature = join "", @$rtoken_type; | ||||
5735 | unless ( $signature =~ /^i;/ ) { | ||||
5736 | my $subname = $token; | ||||
5737 | $subname =~ s/[\s\(].*$//; # remove any attributes and prototype | ||||
5738 | $self->add_toc_item( $subname, 'sub' ); | ||||
5739 | } | ||||
5740 | } | ||||
5741 | |||||
5742 | #------------------------------------------------------- | ||||
5743 | # Intercept a package name here; split it | ||||
5744 | # into keyword 'package' and name; add to the toc, | ||||
5745 | # and update the package stack | ||||
5746 | #------------------------------------------------------- | ||||
5747 | if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { | ||||
5748 | $token = $self->markup_html_element( $1, 'k' ); | ||||
5749 | push @colored_tokens, $token; | ||||
5750 | $token = $2; | ||||
5751 | $type = 'i'; | ||||
5752 | $self->add_toc_item( "$token", 'package' ); | ||||
5753 | $rpackage_stack->[$level] = $token; | ||||
5754 | } | ||||
5755 | |||||
5756 | $token = $self->markup_html_element( $token, $type ); | ||||
5757 | push @colored_tokens, $token; | ||||
5758 | } | ||||
5759 | return ( \@colored_tokens ); | ||||
5760 | } | ||||
5761 | |||||
5762 | sub markup_html_element { | ||||
5763 | my $self = shift; | ||||
5764 | my ( $token, $type ) = @_; | ||||
5765 | |||||
5766 | return $token if ( $type eq 'b' ); # skip a blank token | ||||
5767 | return $token if ( $token =~ /^\s*$/ ); # skip a blank line | ||||
5768 | $token = escape_html($token); | ||||
5769 | |||||
5770 | # get the short abbreviation for this token type | ||||
5771 | my $short_name = $token_short_names{$type}; | ||||
5772 | if ( !defined($short_name) ) { | ||||
5773 | $short_name = "pu"; # punctuation is default | ||||
5774 | } | ||||
5775 | |||||
5776 | # handle style sheets.. | ||||
5777 | if ( !$rOpts->{'nohtml-style-sheets'} ) { | ||||
5778 | if ( $short_name ne 'pu' ) { | ||||
5779 | $token = qq(<span class="$short_name">) . $token . "</span>"; | ||||
5780 | } | ||||
5781 | } | ||||
5782 | |||||
5783 | # handle no style sheets.. | ||||
5784 | else { | ||||
5785 | my $color = $html_color{$short_name}; | ||||
5786 | |||||
5787 | if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { | ||||
5788 | $token = qq(<font color="$color">) . $token . "</font>"; | ||||
5789 | } | ||||
5790 | if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" } | ||||
5791 | if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" } | ||||
5792 | } | ||||
5793 | return $token; | ||||
5794 | } | ||||
5795 | |||||
5796 | sub escape_html { | ||||
5797 | |||||
5798 | my $token = shift; | ||||
5799 | if ($missing_html_entities) { | ||||
5800 | $token =~ s/\&/&/g; | ||||
5801 | $token =~ s/\</</g; | ||||
5802 | $token =~ s/\>/>/g; | ||||
5803 | $token =~ s/\"/"/g; | ||||
5804 | } | ||||
5805 | else { | ||||
5806 | HTML::Entities::encode_entities($token); | ||||
5807 | } | ||||
5808 | return $token; | ||||
5809 | } | ||||
5810 | |||||
5811 | sub finish_formatting { | ||||
5812 | |||||
5813 | # called after last line | ||||
5814 | my $self = shift; | ||||
5815 | $self->close_html_file(); | ||||
5816 | return; | ||||
5817 | } | ||||
5818 | |||||
5819 | sub write_line { | ||||
5820 | |||||
5821 | my $self = shift; | ||||
5822 | return unless $self->{_html_file_opened}; | ||||
5823 | my $html_pre_fh = $self->{_html_pre_fh}; | ||||
5824 | my ($line_of_tokens) = @_; | ||||
5825 | my $line_type = $line_of_tokens->{_line_type}; | ||||
5826 | my $input_line = $line_of_tokens->{_line_text}; | ||||
5827 | my $line_number = $line_of_tokens->{_line_number}; | ||||
5828 | chomp $input_line; | ||||
5829 | |||||
5830 | # markup line of code.. | ||||
5831 | my $html_line; | ||||
5832 | if ( $line_type eq 'CODE' ) { | ||||
5833 | my $rtoken_type = $line_of_tokens->{_rtoken_type}; | ||||
5834 | my $rtokens = $line_of_tokens->{_rtokens}; | ||||
5835 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
5836 | |||||
5837 | if ( $input_line =~ /(^\s*)/ ) { | ||||
5838 | $html_line = $1; | ||||
5839 | } | ||||
5840 | else { | ||||
5841 | $html_line = ""; | ||||
5842 | } | ||||
5843 | my ($rcolored_tokens) = | ||||
5844 | $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); | ||||
5845 | $html_line .= join '', @$rcolored_tokens; | ||||
5846 | } | ||||
5847 | |||||
5848 | # markup line of non-code.. | ||||
5849 | else { | ||||
5850 | my $line_character; | ||||
5851 | if ( $line_type eq 'HERE' ) { $line_character = 'H' } | ||||
5852 | elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } | ||||
5853 | elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } | ||||
5854 | elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } | ||||
5855 | elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } | ||||
5856 | elsif ( $line_type eq 'END_START' ) { | ||||
5857 | $line_character = 'k'; | ||||
5858 | $self->add_toc_item( '__END__', '__END__' ); | ||||
5859 | } | ||||
5860 | elsif ( $line_type eq 'DATA_START' ) { | ||||
5861 | $line_character = 'k'; | ||||
5862 | $self->add_toc_item( '__DATA__', '__DATA__' ); | ||||
5863 | } | ||||
5864 | elsif ( $line_type =~ /^POD/ ) { | ||||
5865 | $line_character = 'P'; | ||||
5866 | if ( $rOpts->{'pod2html'} ) { | ||||
5867 | my $html_pod_fh = $self->{_html_pod_fh}; | ||||
5868 | if ( $line_type eq 'POD_START' ) { | ||||
5869 | |||||
5870 | my $rpre_string_stack = $self->{_rpre_string_stack}; | ||||
5871 | my $rpre_string = $rpre_string_stack->[-1]; | ||||
5872 | |||||
5873 | # if we have written any non-blank lines to the | ||||
5874 | # current pre section, start writing to a new output | ||||
5875 | # string | ||||
5876 | if ( $$rpre_string =~ /\S/ ) { | ||||
5877 | my $pre_string; | ||||
5878 | $html_pre_fh = | ||||
5879 | Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); | ||||
5880 | $self->{_html_pre_fh} = $html_pre_fh; | ||||
5881 | push @$rpre_string_stack, \$pre_string; | ||||
5882 | |||||
5883 | # leave a marker in the pod stream so we know | ||||
5884 | # where to put the pre section we just | ||||
5885 | # finished. | ||||
5886 | my $for_html = '=for html'; # don't confuse pod utils | ||||
5887 | $html_pod_fh->print(<<EOM); | ||||
5888 | |||||
5889 | $for_html | ||||
5890 | <!-- pERLTIDY sECTION --> | ||||
5891 | |||||
5892 | EOM | ||||
5893 | } | ||||
5894 | |||||
5895 | # otherwise, just clear the current string and start | ||||
5896 | # over | ||||
5897 | else { | ||||
5898 | $$rpre_string = ""; | ||||
5899 | $html_pod_fh->print("\n"); | ||||
5900 | } | ||||
5901 | } | ||||
5902 | $html_pod_fh->print( $input_line . "\n" ); | ||||
5903 | if ( $line_type eq 'POD_END' ) { | ||||
5904 | $self->{_pod_cut_count}++; | ||||
5905 | $html_pod_fh->print("\n"); | ||||
5906 | } | ||||
5907 | return; | ||||
5908 | } | ||||
5909 | } | ||||
5910 | else { $line_character = 'Q' } | ||||
5911 | $html_line = $self->markup_html_element( $input_line, $line_character ); | ||||
5912 | } | ||||
5913 | |||||
5914 | # add the line number if requested | ||||
5915 | if ( $rOpts->{'html-line-numbers'} ) { | ||||
5916 | my $extra_space .= | ||||
5917 | ( $line_number < 10 ) ? " " | ||||
5918 | : ( $line_number < 100 ) ? " " | ||||
5919 | : ( $line_number < 1000 ) ? " " | ||||
5920 | : ""; | ||||
5921 | $html_line = $extra_space . $line_number . " " . $html_line; | ||||
5922 | } | ||||
5923 | |||||
5924 | # write the line | ||||
5925 | $html_pre_fh->print("$html_line\n"); | ||||
5926 | } | ||||
5927 | |||||
5928 | ##################################################################### | ||||
5929 | # | ||||
5930 | # The Perl::Tidy::Formatter package adds indentation, whitespace, and | ||||
5931 | # line breaks to the token stream | ||||
5932 | # | ||||
5933 | # WARNING: This is not a real class for speed reasons. Only one | ||||
5934 | # Formatter may be used. | ||||
5935 | # | ||||
5936 | ##################################################################### | ||||
5937 | |||||
5938 | package Perl::Tidy::Formatter; | ||||
5939 | |||||
5940 | # spent 8µs within Perl::Tidy::Formatter::BEGIN@5940 which was called:
# once (8µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5977 | ||||
5941 | |||||
5942 | # Caution: these debug flags produce a lot of output | ||||
5943 | # They should all be 0 except when debugging small scripts | ||||
5944 | 2 | 26µs | 2 | 93µs | # spent 51µs (10+42) within Perl::Tidy::Formatter::BEGIN@5944 which was called:
# once (10µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5944 # spent 51µs making 1 call to Perl::Tidy::Formatter::BEGIN@5944
# spent 42µs making 1 call to constant::import |
5945 | 2 | 29µs | 2 | 78µs | # spent 43µs (8+35) within Perl::Tidy::Formatter::BEGIN@5945 which was called:
# once (8µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5945 # spent 43µs making 1 call to Perl::Tidy::Formatter::BEGIN@5945
# spent 35µs making 1 call to constant::import |
5946 | 2 | 23µs | 2 | 77µs | # spent 42µs (8+34) within Perl::Tidy::Formatter::BEGIN@5946 which was called:
# once (8µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5946 # spent 42µs making 1 call to Perl::Tidy::Formatter::BEGIN@5946
# spent 34µs making 1 call to constant::import |
5947 | 2 | 23µs | 2 | 82µs | # spent 45µs (7+37) within Perl::Tidy::Formatter::BEGIN@5947 which was called:
# once (7µs+37µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5947 # spent 45µs making 1 call to Perl::Tidy::Formatter::BEGIN@5947
# spent 37µs making 1 call to constant::import |
5948 | 2 | 29µs | 2 | 73µs | # spent 40µs (8+33) within Perl::Tidy::Formatter::BEGIN@5948 which was called:
# once (8µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5948 # spent 40µs making 1 call to Perl::Tidy::Formatter::BEGIN@5948
# spent 32µs making 1 call to constant::import |
5949 | 2 | 32µs | 2 | 83µs | # spent 45µs (8+38) within Perl::Tidy::Formatter::BEGIN@5949 which was called:
# once (8µs+38µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5949 # spent 45µs making 1 call to Perl::Tidy::Formatter::BEGIN@5949
# spent 38µs making 1 call to constant::import |
5950 | 2 | 30µs | 2 | 78µs | # spent 48µs (17+31) within Perl::Tidy::Formatter::BEGIN@5950 which was called:
# once (17µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5950 # spent 48µs making 1 call to Perl::Tidy::Formatter::BEGIN@5950
# spent 31µs making 1 call to constant::import |
5951 | 2 | 20µs | 2 | 69µs | # spent 38µs (7+31) within Perl::Tidy::Formatter::BEGIN@5951 which was called:
# once (7µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5951 # spent 38µs making 1 call to Perl::Tidy::Formatter::BEGIN@5951
# spent 31µs making 1 call to constant::import |
5952 | 2 | 21µs | 2 | 70µs | # spent 40µs (10+30) within Perl::Tidy::Formatter::BEGIN@5952 which was called:
# once (10µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5952 # spent 40µs making 1 call to Perl::Tidy::Formatter::BEGIN@5952
# spent 30µs making 1 call to constant::import |
5953 | 2 | 23µs | 2 | 67µs | # spent 37µs (6+30) within Perl::Tidy::Formatter::BEGIN@5953 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5953 # spent 37µs making 1 call to Perl::Tidy::Formatter::BEGIN@5953
# spent 30µs making 1 call to constant::import |
5954 | 2 | 19µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::Formatter::BEGIN@5954 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5954 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@5954
# spent 30µs making 1 call to constant::import |
5955 | 2 | 18µs | 2 | 65µs | # spent 36µs (6+29) within Perl::Tidy::Formatter::BEGIN@5955 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5955 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@5955
# spent 29µs making 1 call to constant::import |
5956 | 2 | 19µs | 2 | 67µs | # spent 38µs (9+29) within Perl::Tidy::Formatter::BEGIN@5956 which was called:
# once (9µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5956 # spent 38µs making 1 call to Perl::Tidy::Formatter::BEGIN@5956
# spent 29µs making 1 call to constant::import |
5957 | 2 | 157µs | 2 | 68µs | # spent 37µs (6+31) within Perl::Tidy::Formatter::BEGIN@5957 which was called:
# once (6µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5957 # spent 37µs making 1 call to Perl::Tidy::Formatter::BEGIN@5957
# spent 31µs making 1 call to constant::import |
5958 | |||||
5959 | my $debug_warning = sub { | ||||
5960 | print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; | ||||
5961 | 1 | 2µs | }; | ||
5962 | |||||
5963 | FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); | ||||
5964 | 1 | 0s | FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); | ||
5965 | FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); | ||||
5966 | 1 | 0s | FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); | ||
5967 | FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); | ||||
5968 | 1 | 0s | FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); | ||
5969 | FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); | ||||
5970 | 1 | 0s | FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); | ||
5971 | FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); | ||||
5972 | 1 | 0s | FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); | ||
5973 | FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); | ||||
5974 | 1 | 0s | FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); | ||
5975 | FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); | ||||
5976 | 1 | 6µs | FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); | ||
5977 | 1 | 18µs | 1 | 8µs | } # spent 8µs making 1 call to Perl::Tidy::Formatter::BEGIN@5940 |
5978 | |||||
5979 | 2 | 165µs | 2 | 74µs | # spent 41µs (9+33) within Perl::Tidy::Formatter::BEGIN@5979 which was called:
# once (9µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5979 # spent 41µs making 1 call to Perl::Tidy::Formatter::BEGIN@5979
# spent 33µs making 1 call to Exporter::import |
5980 | 1 | 500ns | # spent 2.07ms (10µs+2.06) within Perl::Tidy::Formatter::BEGIN@5980 which was called:
# once (10µs+2.06ms) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6196 | ||
5981 | |||||
5982 | @gnu_stack | ||||
5983 | $max_gnu_stack_index | ||||
5984 | $gnu_position_predictor | ||||
5985 | $line_start_index_to_go | ||||
5986 | $last_indentation_written | ||||
5987 | $last_unadjusted_indentation | ||||
5988 | $last_leading_token | ||||
5989 | $last_output_short_opening_token | ||||
5990 | |||||
5991 | $saw_VERSION_in_this_file | ||||
5992 | $saw_END_or_DATA_ | ||||
5993 | |||||
5994 | @gnu_item_list | ||||
5995 | $max_gnu_item_index | ||||
5996 | $gnu_sequence_number | ||||
5997 | $last_output_indentation | ||||
5998 | %last_gnu_equals | ||||
5999 | %gnu_comma_count | ||||
6000 | %gnu_arrow_count | ||||
6001 | |||||
6002 | @block_type_to_go | ||||
6003 | @type_sequence_to_go | ||||
6004 | @container_environment_to_go | ||||
6005 | @bond_strength_to_go | ||||
6006 | @forced_breakpoint_to_go | ||||
6007 | @token_lengths_to_go | ||||
6008 | @summed_lengths_to_go | ||||
6009 | @levels_to_go | ||||
6010 | @leading_spaces_to_go | ||||
6011 | @reduced_spaces_to_go | ||||
6012 | @matching_token_to_go | ||||
6013 | @mate_index_to_go | ||||
6014 | @nesting_blocks_to_go | ||||
6015 | @ci_levels_to_go | ||||
6016 | @nesting_depth_to_go | ||||
6017 | @nobreak_to_go | ||||
6018 | @old_breakpoint_to_go | ||||
6019 | @tokens_to_go | ||||
6020 | @types_to_go | ||||
6021 | @inext_to_go | ||||
6022 | @iprev_to_go | ||||
6023 | |||||
6024 | %saved_opening_indentation | ||||
6025 | |||||
6026 | $max_index_to_go | ||||
6027 | $comma_count_in_batch | ||||
6028 | $old_line_count_in_batch | ||||
6029 | $last_nonblank_index_to_go | ||||
6030 | $last_nonblank_type_to_go | ||||
6031 | $last_nonblank_token_to_go | ||||
6032 | $last_last_nonblank_index_to_go | ||||
6033 | $last_last_nonblank_type_to_go | ||||
6034 | $last_last_nonblank_token_to_go | ||||
6035 | @nonblank_lines_at_depth | ||||
6036 | $starting_in_quote | ||||
6037 | $ending_in_quote | ||||
6038 | @whitespace_level_stack | ||||
6039 | $whitespace_last_level | ||||
6040 | |||||
6041 | $in_format_skipping_section | ||||
6042 | $format_skipping_pattern_begin | ||||
6043 | $format_skipping_pattern_end | ||||
6044 | |||||
6045 | $forced_breakpoint_count | ||||
6046 | $forced_breakpoint_undo_count | ||||
6047 | @forced_breakpoint_undo_stack | ||||
6048 | %postponed_breakpoint | ||||
6049 | |||||
6050 | $tabbing | ||||
6051 | $embedded_tab_count | ||||
6052 | $first_embedded_tab_at | ||||
6053 | $last_embedded_tab_at | ||||
6054 | $deleted_semicolon_count | ||||
6055 | $first_deleted_semicolon_at | ||||
6056 | $last_deleted_semicolon_at | ||||
6057 | $added_semicolon_count | ||||
6058 | $first_added_semicolon_at | ||||
6059 | $last_added_semicolon_at | ||||
6060 | $first_tabbing_disagreement | ||||
6061 | $last_tabbing_disagreement | ||||
6062 | $in_tabbing_disagreement | ||||
6063 | $tabbing_disagreement_count | ||||
6064 | $input_line_tabbing | ||||
6065 | |||||
6066 | $last_line_type | ||||
6067 | $last_line_leading_type | ||||
6068 | $last_line_leading_level | ||||
6069 | $last_last_line_leading_level | ||||
6070 | |||||
6071 | %block_leading_text | ||||
6072 | %block_opening_line_number | ||||
6073 | $csc_new_statement_ok | ||||
6074 | $csc_last_label | ||||
6075 | %csc_block_label | ||||
6076 | $accumulating_text_for_block | ||||
6077 | $leading_block_text | ||||
6078 | $rleading_block_if_elsif_text | ||||
6079 | $leading_block_text_level | ||||
6080 | $leading_block_text_length_exceeded | ||||
6081 | $leading_block_text_line_length | ||||
6082 | $leading_block_text_line_number | ||||
6083 | $closing_side_comment_prefix_pattern | ||||
6084 | $closing_side_comment_list_pattern | ||||
6085 | |||||
6086 | $last_nonblank_token | ||||
6087 | $last_nonblank_type | ||||
6088 | $last_last_nonblank_token | ||||
6089 | $last_last_nonblank_type | ||||
6090 | $last_nonblank_block_type | ||||
6091 | $last_output_level | ||||
6092 | %is_do_follower | ||||
6093 | %is_if_brace_follower | ||||
6094 | %space_after_keyword | ||||
6095 | $rbrace_follower | ||||
6096 | $looking_for_else | ||||
6097 | %is_last_next_redo_return | ||||
6098 | %is_other_brace_follower | ||||
6099 | %is_else_brace_follower | ||||
6100 | %is_anon_sub_brace_follower | ||||
6101 | %is_anon_sub_1_brace_follower | ||||
6102 | %is_sort_map_grep | ||||
6103 | %is_sort_map_grep_eval | ||||
6104 | %is_sort_map_grep_eval_do | ||||
6105 | %is_block_without_semicolon | ||||
6106 | %is_if_unless | ||||
6107 | %is_and_or | ||||
6108 | %is_assignment | ||||
6109 | %is_chain_operator | ||||
6110 | %is_if_unless_and_or_last_next_redo_return | ||||
6111 | |||||
6112 | @has_broken_sublist | ||||
6113 | @dont_align | ||||
6114 | @want_comma_break | ||||
6115 | |||||
6116 | $is_static_block_comment | ||||
6117 | $index_start_one_line_block | ||||
6118 | $semicolons_before_block_self_destruct | ||||
6119 | $index_max_forced_break | ||||
6120 | $input_line_number | ||||
6121 | $diagnostics_object | ||||
6122 | $vertical_aligner_object | ||||
6123 | $logger_object | ||||
6124 | $file_writer_object | ||||
6125 | $formatter_self | ||||
6126 | @ci_stack | ||||
6127 | $last_line_had_side_comment | ||||
6128 | %want_break_before | ||||
6129 | %outdent_keyword | ||||
6130 | $static_block_comment_pattern | ||||
6131 | $static_side_comment_pattern | ||||
6132 | %opening_vertical_tightness | ||||
6133 | %closing_vertical_tightness | ||||
6134 | %closing_token_indentation | ||||
6135 | $some_closing_token_indentation | ||||
6136 | |||||
6137 | %opening_token_right | ||||
6138 | %stack_opening_token | ||||
6139 | %stack_closing_token | ||||
6140 | |||||
6141 | $block_brace_vertical_tightness_pattern | ||||
6142 | |||||
6143 | $rOpts_add_newlines | ||||
6144 | $rOpts_add_whitespace | ||||
6145 | $rOpts_block_brace_tightness | ||||
6146 | $rOpts_block_brace_vertical_tightness | ||||
6147 | $rOpts_brace_left_and_indent | ||||
6148 | $rOpts_comma_arrow_breakpoints | ||||
6149 | $rOpts_break_at_old_keyword_breakpoints | ||||
6150 | $rOpts_break_at_old_comma_breakpoints | ||||
6151 | $rOpts_break_at_old_logical_breakpoints | ||||
6152 | $rOpts_break_at_old_ternary_breakpoints | ||||
6153 | $rOpts_break_at_old_attribute_breakpoints | ||||
6154 | $rOpts_closing_side_comment_else_flag | ||||
6155 | $rOpts_closing_side_comment_maximum_text | ||||
6156 | $rOpts_continuation_indentation | ||||
6157 | $rOpts_cuddled_else | ||||
6158 | $rOpts_delete_old_whitespace | ||||
6159 | $rOpts_fuzzy_line_length | ||||
6160 | $rOpts_indent_columns | ||||
6161 | $rOpts_line_up_parentheses | ||||
6162 | $rOpts_maximum_fields_per_table | ||||
6163 | $rOpts_maximum_line_length | ||||
6164 | $rOpts_variable_maximum_line_length | ||||
6165 | $rOpts_short_concatenation_item_length | ||||
6166 | $rOpts_keep_old_blank_lines | ||||
6167 | $rOpts_ignore_old_breakpoints | ||||
6168 | $rOpts_format_skipping | ||||
6169 | $rOpts_space_function_paren | ||||
6170 | $rOpts_space_keyword_paren | ||||
6171 | $rOpts_keep_interior_semicolons | ||||
6172 | $rOpts_ignore_side_comment_lengths | ||||
6173 | $rOpts_stack_closing_block_brace | ||||
6174 | $rOpts_whitespace_cycle | ||||
6175 | $rOpts_tight_secret_operators | ||||
6176 | |||||
6177 | %is_opening_type | ||||
6178 | %is_closing_type | ||||
6179 | %is_keyword_returning_list | ||||
6180 | %tightness | ||||
6181 | %matching_token | ||||
6182 | $rOpts | ||||
6183 | %right_bond_strength | ||||
6184 | %left_bond_strength | ||||
6185 | %binary_ws_rules | ||||
6186 | %want_left_space | ||||
6187 | %want_right_space | ||||
6188 | %is_digraph | ||||
6189 | %is_trigraph | ||||
6190 | $bli_pattern | ||||
6191 | $bli_list_string | ||||
6192 | %is_closing_type | ||||
6193 | %is_opening_type | ||||
6194 | %is_closing_token | ||||
6195 | %is_opening_token | ||||
6196 | 1 | 284µs | 2 | 4.13ms | }; # spent 2.07ms making 1 call to Perl::Tidy::Formatter::BEGIN@5980
# spent 2.06ms making 1 call to vars::import |
6197 | |||||
6198 | # spent 93µs within Perl::Tidy::Formatter::BEGIN@6198 which was called:
# once (93µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6279 | ||||
6199 | |||||
6200 | # default list of block types for which -bli would apply | ||||
6201 | 1 | 500ns | $bli_list_string = 'if else elsif unless while for foreach do : sub'; | ||
6202 | |||||
6203 | 1 | 4µs | @_ = qw( | ||
6204 | .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> | ||||
6205 | <= >= == =~ !~ != ++ -- /= x= | ||||
6206 | ); | ||||
6207 | 1 | 11µs | @is_digraph{@_} = (1) x scalar(@_); | ||
6208 | |||||
6209 | 1 | 5µs | @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); | ||
6210 | 1 | 3µs | @is_trigraph{@_} = (1) x scalar(@_); | ||
6211 | |||||
6212 | 1 | 4µs | @_ = qw( | ||
6213 | = **= += *= &= <<= &&= | ||||
6214 | -= /= |= >>= ||= //= | ||||
6215 | .= %= ^= | ||||
6216 | x= | ||||
6217 | ); | ||||
6218 | 1 | 4µs | @is_assignment{@_} = (1) x scalar(@_); | ||
6219 | |||||
6220 | 1 | 3µs | @_ = qw( | ||
6221 | grep | ||||
6222 | keys | ||||
6223 | map | ||||
6224 | reverse | ||||
6225 | sort | ||||
6226 | split | ||||
6227 | ); | ||||
6228 | 1 | 2µs | @is_keyword_returning_list{@_} = (1) x scalar(@_); | ||
6229 | |||||
6230 | 1 | 3µs | @_ = qw(is if unless and or err last next redo return); | ||
6231 | 1 | 7µs | @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); | ||
6232 | |||||
6233 | 1 | 2µs | @_ = qw(last next redo return); | ||
6234 | 1 | 1µs | @is_last_next_redo_return{@_} = (1) x scalar(@_); | ||
6235 | |||||
6236 | 1 | 1µs | @_ = qw(sort map grep); | ||
6237 | 1 | 900ns | @is_sort_map_grep{@_} = (1) x scalar(@_); | ||
6238 | |||||
6239 | 1 | 1µs | @_ = qw(sort map grep eval); | ||
6240 | 1 | 1µs | @is_sort_map_grep_eval{@_} = (1) x scalar(@_); | ||
6241 | |||||
6242 | 1 | 1µs | @_ = qw(sort map grep eval do); | ||
6243 | 1 | 1µs | @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); | ||
6244 | |||||
6245 | 1 | 900ns | @_ = qw(if unless); | ||
6246 | 1 | 700ns | @is_if_unless{@_} = (1) x scalar(@_); | ||
6247 | |||||
6248 | 1 | 800ns | @_ = qw(and or err); | ||
6249 | 1 | 700ns | @is_and_or{@_} = (1) x scalar(@_); | ||
6250 | |||||
6251 | # Identify certain operators which often occur in chains. | ||||
6252 | # Note: the minus (-) causes a side effect of padding of the first line in | ||||
6253 | # something like this (by sub set_logical_padding): | ||||
6254 | # Checkbutton => 'Transmission checked', | ||||
6255 | # -variable => \$TRANS | ||||
6256 | # This usually improves appearance so it seems ok. | ||||
6257 | 1 | 2µs | @_ = qw(&& || and or : ? . + - * /); | ||
6258 | 1 | 3µs | @is_chain_operator{@_} = (1) x scalar(@_); | ||
6259 | |||||
6260 | # We can remove semicolons after blocks preceded by these keywords | ||||
6261 | 1 | 4µs | @_ = | ||
6262 | qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else | ||||
6263 | unless while until for foreach given when default); | ||||
6264 | 1 | 6µs | @is_block_without_semicolon{@_} = (1) x scalar(@_); | ||
6265 | |||||
6266 | # 'L' is token for opening { at hash key | ||||
6267 | 1 | 2µs | @_ = qw" L { ( [ "; | ||
6268 | 1 | 1µs | @is_opening_type{@_} = (1) x scalar(@_); | ||
6269 | |||||
6270 | # 'R' is token for closing } at hash key | ||||
6271 | 1 | 1µs | @_ = qw" R } ) ] "; | ||
6272 | 1 | 1µs | @is_closing_type{@_} = (1) x scalar(@_); | ||
6273 | |||||
6274 | 1 | 1µs | @_ = qw" { ( [ "; | ||
6275 | 1 | 700ns | @is_opening_token{@_} = (1) x scalar(@_); | ||
6276 | |||||
6277 | 1 | 1µs | @_ = qw" } ) ] "; | ||
6278 | 1 | 6µs | @is_closing_token{@_} = (1) x scalar(@_); | ||
6279 | 1 | 44µs | 1 | 93µs | } # spent 93µs making 1 call to Perl::Tidy::Formatter::BEGIN@6198 |
6280 | |||||
6281 | # whitespace codes | ||||
6282 | 2 | 23µs | 2 | 82µs | # spent 45µs (8+37) within Perl::Tidy::Formatter::BEGIN@6282 which was called:
# once (8µs+37µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6282 # spent 45µs making 1 call to Perl::Tidy::Formatter::BEGIN@6282
# spent 37µs making 1 call to constant::import |
6283 | 2 | 23µs | 2 | 69µs | # spent 38µs (7+31) within Perl::Tidy::Formatter::BEGIN@6283 which was called:
# once (7µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6283 # spent 38µs making 1 call to Perl::Tidy::Formatter::BEGIN@6283
# spent 31µs making 1 call to constant::import |
6284 | 2 | 22µs | 2 | 67µs | # spent 36µs (6+30) within Perl::Tidy::Formatter::BEGIN@6284 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6284 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6284
# spent 30µs making 1 call to constant::import |
6285 | |||||
6286 | # Token bond strengths. | ||||
6287 | 2 | 20µs | 2 | 65µs | # spent 36µs (6+30) within Perl::Tidy::Formatter::BEGIN@6287 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6287 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6287
# spent 30µs making 1 call to constant::import |
6288 | 2 | 20µs | 2 | 67µs | # spent 37µs (7+30) within Perl::Tidy::Formatter::BEGIN@6288 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6288 # spent 37µs making 1 call to Perl::Tidy::Formatter::BEGIN@6288
# spent 30µs making 1 call to constant::import |
6289 | 2 | 20µs | 2 | 66µs | # spent 36µs (7+30) within Perl::Tidy::Formatter::BEGIN@6289 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6289 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6289
# spent 30µs making 1 call to constant::import |
6290 | 2 | 20µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Formatter::BEGIN@6290 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6290 # spent 35µs making 1 call to Perl::Tidy::Formatter::BEGIN@6290
# spent 29µs making 1 call to constant::import |
6291 | 2 | 20µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Formatter::BEGIN@6291 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6291 # spent 35µs making 1 call to Perl::Tidy::Formatter::BEGIN@6291
# spent 29µs making 1 call to constant::import |
6292 | 2 | 21µs | 2 | 66µs | # spent 36µs (7+30) within Perl::Tidy::Formatter::BEGIN@6292 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6292 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6292
# spent 30µs making 1 call to constant::import |
6293 | |||||
6294 | # values for testing indexes in output array | ||||
6295 | 2 | 28µs | 2 | 92µs | # spent 49µs (7+42) within Perl::Tidy::Formatter::BEGIN@6295 which was called:
# once (7µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6295 # spent 49µs making 1 call to Perl::Tidy::Formatter::BEGIN@6295
# spent 42µs making 1 call to constant::import |
6296 | |||||
6297 | # Maximum number of little messages; probably need not be changed. | ||||
6298 | 2 | 24µs | 2 | 77µs | # spent 42µs (7+35) within Perl::Tidy::Formatter::BEGIN@6298 which was called:
# once (7µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6298 # spent 42µs making 1 call to Perl::Tidy::Formatter::BEGIN@6298
# spent 35µs making 1 call to constant::import |
6299 | |||||
6300 | # increment between sequence numbers for each type | ||||
6301 | # For example, ?: pairs might have numbers 7,11,15,... | ||||
6302 | 2 | 5.86ms | 2 | 75µs | # spent 41µs (7+34) within Perl::Tidy::Formatter::BEGIN@6302 which was called:
# once (7µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6302 # spent 41µs making 1 call to Perl::Tidy::Formatter::BEGIN@6302
# spent 34µs making 1 call to constant::import |
6303 | |||||
6304 | { | ||||
6305 | |||||
6306 | # methods to count instances | ||||
6307 | 2 | 600ns | my $_count = 0; | ||
6308 | sub get_count { $_count; } | ||||
6309 | sub _increment_count { ++$_count } | ||||
6310 | sub _decrement_count { --$_count } | ||||
6311 | } | ||||
6312 | |||||
6313 | sub trim { | ||||
6314 | |||||
6315 | # trim leading and trailing whitespace from a string | ||||
6316 | $_[0] =~ s/\s+$//; | ||||
6317 | $_[0] =~ s/^\s+//; | ||||
6318 | return $_[0]; | ||||
6319 | } | ||||
6320 | |||||
6321 | sub max { | ||||
6322 | my $max = shift; | ||||
6323 | foreach (@_) { | ||||
6324 | $max = ( $max < $_ ) ? $_ : $max; | ||||
6325 | } | ||||
6326 | return $max; | ||||
6327 | } | ||||
6328 | |||||
6329 | sub min { | ||||
6330 | my $min = shift; | ||||
6331 | foreach (@_) { | ||||
6332 | $min = ( $min > $_ ) ? $_ : $min; | ||||
6333 | } | ||||
6334 | return $min; | ||||
6335 | } | ||||
6336 | |||||
6337 | sub split_words { | ||||
6338 | |||||
6339 | # given a string containing words separated by whitespace, | ||||
6340 | # return the list of words | ||||
6341 | my ($str) = @_; | ||||
6342 | return unless $str; | ||||
6343 | $str =~ s/\s+$//; | ||||
6344 | $str =~ s/^\s+//; | ||||
6345 | return split( /\s+/, $str ); | ||||
6346 | } | ||||
6347 | |||||
6348 | # interface to Perl::Tidy::Logger routines | ||||
6349 | sub warning { | ||||
6350 | if ($logger_object) { | ||||
6351 | $logger_object->warning(@_); | ||||
6352 | } | ||||
6353 | } | ||||
6354 | |||||
6355 | sub complain { | ||||
6356 | if ($logger_object) { | ||||
6357 | $logger_object->complain(@_); | ||||
6358 | } | ||||
6359 | } | ||||
6360 | |||||
6361 | sub write_logfile_entry { | ||||
6362 | if ($logger_object) { | ||||
6363 | $logger_object->write_logfile_entry(@_); | ||||
6364 | } | ||||
6365 | } | ||||
6366 | |||||
6367 | sub black_box { | ||||
6368 | if ($logger_object) { | ||||
6369 | $logger_object->black_box(@_); | ||||
6370 | } | ||||
6371 | } | ||||
6372 | |||||
6373 | sub report_definite_bug { | ||||
6374 | if ($logger_object) { | ||||
6375 | $logger_object->report_definite_bug(); | ||||
6376 | } | ||||
6377 | } | ||||
6378 | |||||
6379 | sub get_saw_brace_error { | ||||
6380 | if ($logger_object) { | ||||
6381 | $logger_object->get_saw_brace_error(); | ||||
6382 | } | ||||
6383 | } | ||||
6384 | |||||
6385 | sub we_are_at_the_last_line { | ||||
6386 | if ($logger_object) { | ||||
6387 | $logger_object->we_are_at_the_last_line(); | ||||
6388 | } | ||||
6389 | } | ||||
6390 | |||||
6391 | # interface to Perl::Tidy::Diagnostics routine | ||||
6392 | sub write_diagnostics { | ||||
6393 | |||||
6394 | if ($diagnostics_object) { | ||||
6395 | $diagnostics_object->write_diagnostics(@_); | ||||
6396 | } | ||||
6397 | } | ||||
6398 | |||||
6399 | sub get_added_semicolon_count { | ||||
6400 | my $self = shift; | ||||
6401 | return $added_semicolon_count; | ||||
6402 | } | ||||
6403 | |||||
6404 | sub DESTROY { | ||||
6405 | $_[0]->_decrement_count(); | ||||
6406 | } | ||||
6407 | |||||
6408 | sub new { | ||||
6409 | |||||
6410 | my $class = shift; | ||||
6411 | |||||
6412 | # we are given an object with a write_line() method to take lines | ||||
6413 | my %defaults = ( | ||||
6414 | sink_object => undef, | ||||
6415 | diagnostics_object => undef, | ||||
6416 | logger_object => undef, | ||||
6417 | ); | ||||
6418 | my %args = ( %defaults, @_ ); | ||||
6419 | |||||
6420 | $logger_object = $args{logger_object}; | ||||
6421 | $diagnostics_object = $args{diagnostics_object}; | ||||
6422 | |||||
6423 | # we create another object with a get_line() and peek_ahead() method | ||||
6424 | my $sink_object = $args{sink_object}; | ||||
6425 | $file_writer_object = | ||||
6426 | Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); | ||||
6427 | |||||
6428 | # initialize the leading whitespace stack to negative levels | ||||
6429 | # so that we can never run off the end of the stack | ||||
6430 | $gnu_position_predictor = 0; # where the current token is predicted to be | ||||
6431 | $max_gnu_stack_index = 0; | ||||
6432 | $max_gnu_item_index = -1; | ||||
6433 | $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); | ||||
6434 | @gnu_item_list = (); | ||||
6435 | $last_output_indentation = 0; | ||||
6436 | $last_indentation_written = 0; | ||||
6437 | $last_unadjusted_indentation = 0; | ||||
6438 | $last_leading_token = ""; | ||||
6439 | $last_output_short_opening_token = 0; | ||||
6440 | |||||
6441 | $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; | ||||
6442 | $saw_END_or_DATA_ = 0; | ||||
6443 | |||||
6444 | @block_type_to_go = (); | ||||
6445 | @type_sequence_to_go = (); | ||||
6446 | @container_environment_to_go = (); | ||||
6447 | @bond_strength_to_go = (); | ||||
6448 | @forced_breakpoint_to_go = (); | ||||
6449 | @summed_lengths_to_go = (); # line length to start of ith token | ||||
6450 | @token_lengths_to_go = (); | ||||
6451 | @levels_to_go = (); | ||||
6452 | @matching_token_to_go = (); | ||||
6453 | @mate_index_to_go = (); | ||||
6454 | @nesting_blocks_to_go = (); | ||||
6455 | @ci_levels_to_go = (); | ||||
6456 | @nesting_depth_to_go = (0); | ||||
6457 | @nobreak_to_go = (); | ||||
6458 | @old_breakpoint_to_go = (); | ||||
6459 | @tokens_to_go = (); | ||||
6460 | @types_to_go = (); | ||||
6461 | @leading_spaces_to_go = (); | ||||
6462 | @reduced_spaces_to_go = (); | ||||
6463 | @inext_to_go = (); | ||||
6464 | @iprev_to_go = (); | ||||
6465 | |||||
6466 | @whitespace_level_stack = (); | ||||
6467 | $whitespace_last_level = -1; | ||||
6468 | |||||
6469 | @dont_align = (); | ||||
6470 | @has_broken_sublist = (); | ||||
6471 | @want_comma_break = (); | ||||
6472 | |||||
6473 | @ci_stack = (""); | ||||
6474 | $first_tabbing_disagreement = 0; | ||||
6475 | $last_tabbing_disagreement = 0; | ||||
6476 | $tabbing_disagreement_count = 0; | ||||
6477 | $in_tabbing_disagreement = 0; | ||||
6478 | $input_line_tabbing = undef; | ||||
6479 | |||||
6480 | $last_line_type = ""; | ||||
6481 | $last_last_line_leading_level = 0; | ||||
6482 | $last_line_leading_level = 0; | ||||
6483 | $last_line_leading_type = '#'; | ||||
6484 | |||||
6485 | $last_nonblank_token = ';'; | ||||
6486 | $last_nonblank_type = ';'; | ||||
6487 | $last_last_nonblank_token = ';'; | ||||
6488 | $last_last_nonblank_type = ';'; | ||||
6489 | $last_nonblank_block_type = ""; | ||||
6490 | $last_output_level = 0; | ||||
6491 | $looking_for_else = 0; | ||||
6492 | $embedded_tab_count = 0; | ||||
6493 | $first_embedded_tab_at = 0; | ||||
6494 | $last_embedded_tab_at = 0; | ||||
6495 | $deleted_semicolon_count = 0; | ||||
6496 | $first_deleted_semicolon_at = 0; | ||||
6497 | $last_deleted_semicolon_at = 0; | ||||
6498 | $added_semicolon_count = 0; | ||||
6499 | $first_added_semicolon_at = 0; | ||||
6500 | $last_added_semicolon_at = 0; | ||||
6501 | $last_line_had_side_comment = 0; | ||||
6502 | $is_static_block_comment = 0; | ||||
6503 | %postponed_breakpoint = (); | ||||
6504 | |||||
6505 | # variables for adding side comments | ||||
6506 | %block_leading_text = (); | ||||
6507 | %block_opening_line_number = (); | ||||
6508 | $csc_new_statement_ok = 1; | ||||
6509 | %csc_block_label = (); | ||||
6510 | |||||
6511 | %saved_opening_indentation = (); | ||||
6512 | $in_format_skipping_section = 0; | ||||
6513 | |||||
6514 | reset_block_text_accumulator(); | ||||
6515 | |||||
6516 | prepare_for_new_input_lines(); | ||||
6517 | |||||
6518 | $vertical_aligner_object = | ||||
6519 | Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, | ||||
6520 | $logger_object, $diagnostics_object ); | ||||
6521 | |||||
6522 | if ( $rOpts->{'entab-leading-whitespace'} ) { | ||||
6523 | write_logfile_entry( | ||||
6524 | "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" | ||||
6525 | ); | ||||
6526 | } | ||||
6527 | elsif ( $rOpts->{'tabs'} ) { | ||||
6528 | write_logfile_entry("Indentation will be with a tab character\n"); | ||||
6529 | } | ||||
6530 | else { | ||||
6531 | write_logfile_entry( | ||||
6532 | "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); | ||||
6533 | } | ||||
6534 | |||||
6535 | # This was the start of a formatter referent, but object-oriented | ||||
6536 | # coding has turned out to be too slow here. | ||||
6537 | $formatter_self = {}; | ||||
6538 | |||||
6539 | bless $formatter_self, $class; | ||||
6540 | |||||
6541 | # Safety check..this is not a class yet | ||||
6542 | if ( _increment_count() > 1 ) { | ||||
6543 | confess | ||||
6544 | "Attempt to create more than 1 object in $class, which is not a true class yet\n"; | ||||
6545 | } | ||||
6546 | return $formatter_self; | ||||
6547 | } | ||||
6548 | |||||
6549 | sub prepare_for_new_input_lines { | ||||
6550 | |||||
6551 | $gnu_sequence_number++; # increment output batch counter | ||||
6552 | %last_gnu_equals = (); | ||||
6553 | %gnu_comma_count = (); | ||||
6554 | %gnu_arrow_count = (); | ||||
6555 | $line_start_index_to_go = 0; | ||||
6556 | $max_gnu_item_index = UNDEFINED_INDEX; | ||||
6557 | $index_max_forced_break = UNDEFINED_INDEX; | ||||
6558 | $max_index_to_go = UNDEFINED_INDEX; | ||||
6559 | $last_nonblank_index_to_go = UNDEFINED_INDEX; | ||||
6560 | $last_nonblank_type_to_go = ''; | ||||
6561 | $last_nonblank_token_to_go = ''; | ||||
6562 | $last_last_nonblank_index_to_go = UNDEFINED_INDEX; | ||||
6563 | $last_last_nonblank_type_to_go = ''; | ||||
6564 | $last_last_nonblank_token_to_go = ''; | ||||
6565 | $forced_breakpoint_count = 0; | ||||
6566 | $forced_breakpoint_undo_count = 0; | ||||
6567 | $rbrace_follower = undef; | ||||
6568 | $summed_lengths_to_go[0] = 0; | ||||
6569 | $old_line_count_in_batch = 1; | ||||
6570 | $comma_count_in_batch = 0; | ||||
6571 | $starting_in_quote = 0; | ||||
6572 | |||||
6573 | destroy_one_line_block(); | ||||
6574 | } | ||||
6575 | |||||
6576 | sub write_line { | ||||
6577 | |||||
6578 | my $self = shift; | ||||
6579 | my ($line_of_tokens) = @_; | ||||
6580 | |||||
6581 | my $line_type = $line_of_tokens->{_line_type}; | ||||
6582 | my $input_line = $line_of_tokens->{_line_text}; | ||||
6583 | |||||
6584 | if ( $rOpts->{notidy} ) { | ||||
6585 | write_unindented_line($input_line); | ||||
6586 | $last_line_type = $line_type; | ||||
6587 | return; | ||||
6588 | } | ||||
6589 | |||||
6590 | # _line_type codes are: | ||||
6591 | # SYSTEM - system-specific code before hash-bang line | ||||
6592 | # CODE - line of perl code (including comments) | ||||
6593 | # POD_START - line starting pod, such as '=head' | ||||
6594 | # POD - pod documentation text | ||||
6595 | # POD_END - last line of pod section, '=cut' | ||||
6596 | # HERE - text of here-document | ||||
6597 | # HERE_END - last line of here-doc (target word) | ||||
6598 | # FORMAT - format section | ||||
6599 | # FORMAT_END - last line of format section, '.' | ||||
6600 | # DATA_START - __DATA__ line | ||||
6601 | # DATA - unidentified text following __DATA__ | ||||
6602 | # END_START - __END__ line | ||||
6603 | # END - unidentified text following __END__ | ||||
6604 | # ERROR - we are in big trouble, probably not a perl script | ||||
6605 | |||||
6606 | # put a blank line after an =cut which comes before __END__ and __DATA__ | ||||
6607 | # (required by podchecker) | ||||
6608 | if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { | ||||
6609 | $file_writer_object->reset_consecutive_blank_lines(); | ||||
6610 | if ( $input_line !~ /^\s*$/ ) { want_blank_line() } | ||||
6611 | } | ||||
6612 | |||||
6613 | # handle line of code.. | ||||
6614 | if ( $line_type eq 'CODE' ) { | ||||
6615 | |||||
6616 | # let logger see all non-blank lines of code | ||||
6617 | if ( $input_line !~ /^\s*$/ ) { | ||||
6618 | my $output_line_number = | ||||
6619 | $vertical_aligner_object->get_output_line_number(); | ||||
6620 | black_box( $line_of_tokens, $output_line_number ); | ||||
6621 | } | ||||
6622 | print_line_of_tokens($line_of_tokens); | ||||
6623 | } | ||||
6624 | |||||
6625 | # handle line of non-code.. | ||||
6626 | else { | ||||
6627 | |||||
6628 | # set special flags | ||||
6629 | my $skip_line = 0; | ||||
6630 | my $tee_line = 0; | ||||
6631 | if ( $line_type =~ /^POD/ ) { | ||||
6632 | |||||
6633 | # Pod docs should have a preceding blank line. But stay | ||||
6634 | # out of __END__ and __DATA__ sections, because | ||||
6635 | # the user may be using this section for any purpose whatsoever | ||||
6636 | if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } | ||||
6637 | if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } | ||||
6638 | if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } | ||||
6639 | if ( !$skip_line | ||||
6640 | && $line_type eq 'POD_START' | ||||
6641 | && !$saw_END_or_DATA_ ) | ||||
6642 | { | ||||
6643 | want_blank_line(); | ||||
6644 | } | ||||
6645 | } | ||||
6646 | |||||
6647 | # leave the blank counters in a predictable state | ||||
6648 | # after __END__ or __DATA__ | ||||
6649 | elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { | ||||
6650 | $file_writer_object->reset_consecutive_blank_lines(); | ||||
6651 | $saw_END_or_DATA_ = 1; | ||||
6652 | } | ||||
6653 | |||||
6654 | # write unindented non-code line | ||||
6655 | if ( !$skip_line ) { | ||||
6656 | if ($tee_line) { $file_writer_object->tee_on() } | ||||
6657 | write_unindented_line($input_line); | ||||
6658 | if ($tee_line) { $file_writer_object->tee_off() } | ||||
6659 | } | ||||
6660 | } | ||||
6661 | $last_line_type = $line_type; | ||||
6662 | } | ||||
6663 | |||||
6664 | sub create_one_line_block { | ||||
6665 | $index_start_one_line_block = $_[0]; | ||||
6666 | $semicolons_before_block_self_destruct = $_[1]; | ||||
6667 | } | ||||
6668 | |||||
6669 | sub destroy_one_line_block { | ||||
6670 | $index_start_one_line_block = UNDEFINED_INDEX; | ||||
6671 | $semicolons_before_block_self_destruct = 0; | ||||
6672 | } | ||||
6673 | |||||
6674 | sub leading_spaces_to_go { | ||||
6675 | |||||
6676 | # return the number of indentation spaces for a token in the output stream; | ||||
6677 | # these were previously stored by 'set_leading_whitespace'. | ||||
6678 | |||||
6679 | my $ii = shift; | ||||
6680 | if ( $ii < 0 ) { $ii = 0 } | ||||
6681 | return get_SPACES( $leading_spaces_to_go[$ii] ); | ||||
6682 | |||||
6683 | } | ||||
6684 | |||||
6685 | sub get_SPACES { | ||||
6686 | |||||
6687 | # return the number of leading spaces associated with an indentation | ||||
6688 | # variable $indentation is either a constant number of spaces or an object | ||||
6689 | # with a get_SPACES method. | ||||
6690 | my $indentation = shift; | ||||
6691 | return ref($indentation) ? $indentation->get_SPACES() : $indentation; | ||||
6692 | } | ||||
6693 | |||||
6694 | sub get_RECOVERABLE_SPACES { | ||||
6695 | |||||
6696 | # return the number of spaces (+ means shift right, - means shift left) | ||||
6697 | # that we would like to shift a group of lines with the same indentation | ||||
6698 | # to get them to line up with their opening parens | ||||
6699 | my $indentation = shift; | ||||
6700 | return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; | ||||
6701 | } | ||||
6702 | |||||
6703 | sub get_AVAILABLE_SPACES_to_go { | ||||
6704 | |||||
6705 | my $item = $leading_spaces_to_go[ $_[0] ]; | ||||
6706 | |||||
6707 | # return the number of available leading spaces associated with an | ||||
6708 | # indentation variable. $indentation is either a constant number of | ||||
6709 | # spaces or an object with a get_AVAILABLE_SPACES method. | ||||
6710 | return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; | ||||
6711 | } | ||||
6712 | |||||
6713 | sub new_lp_indentation_item { | ||||
6714 | |||||
6715 | # this is an interface to the IndentationItem class | ||||
6716 | my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; | ||||
6717 | |||||
6718 | # A negative level implies not to store the item in the item_list | ||||
6719 | my $index = 0; | ||||
6720 | if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } | ||||
6721 | |||||
6722 | my $item = Perl::Tidy::IndentationItem->new( | ||||
6723 | $spaces, $level, | ||||
6724 | $ci_level, $available_spaces, | ||||
6725 | $index, $gnu_sequence_number, | ||||
6726 | $align_paren, $max_gnu_stack_index, | ||||
6727 | $line_start_index_to_go, | ||||
6728 | ); | ||||
6729 | |||||
6730 | if ( $level >= 0 ) { | ||||
6731 | $gnu_item_list[$max_gnu_item_index] = $item; | ||||
6732 | } | ||||
6733 | |||||
6734 | return $item; | ||||
6735 | } | ||||
6736 | |||||
6737 | sub set_leading_whitespace { | ||||
6738 | |||||
6739 | # This routine defines leading whitespace | ||||
6740 | # given: the level and continuation_level of a token, | ||||
6741 | # define: space count of leading string which would apply if it | ||||
6742 | # were the first token of a new line. | ||||
6743 | |||||
6744 | my ( $level_abs, $ci_level, $in_continued_quote ) = @_; | ||||
6745 | |||||
6746 | # Adjust levels if necessary to recycle whitespace: | ||||
6747 | # given $level_abs, the absolute level | ||||
6748 | # define $level, a possibly reduced level for whitespace | ||||
6749 | my $level = $level_abs; | ||||
6750 | if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { | ||||
6751 | if ( $level_abs < $whitespace_last_level ) { | ||||
6752 | pop(@whitespace_level_stack); | ||||
6753 | } | ||||
6754 | if ( !@whitespace_level_stack ) { | ||||
6755 | push @whitespace_level_stack, $level_abs; | ||||
6756 | } | ||||
6757 | elsif ( $level_abs > $whitespace_last_level ) { | ||||
6758 | $level = $whitespace_level_stack[-1] + | ||||
6759 | ( $level_abs - $whitespace_last_level ); | ||||
6760 | |||||
6761 | if ( | ||||
6762 | # 1 Try to break at a block brace | ||||
6763 | ( | ||||
6764 | $level > $rOpts_whitespace_cycle | ||||
6765 | && $last_nonblank_type eq '{' | ||||
6766 | && $last_nonblank_token eq '{' | ||||
6767 | ) | ||||
6768 | |||||
6769 | # 2 Then either a brace or bracket | ||||
6770 | || ( $level > $rOpts_whitespace_cycle + 1 | ||||
6771 | && $last_nonblank_token =~ /^[\{\[]$/ ) | ||||
6772 | |||||
6773 | # 3 Then a paren too | ||||
6774 | || $level > $rOpts_whitespace_cycle + 2 | ||||
6775 | ) | ||||
6776 | { | ||||
6777 | $level = 1; | ||||
6778 | } | ||||
6779 | push @whitespace_level_stack, $level; | ||||
6780 | } | ||||
6781 | $level = $whitespace_level_stack[-1]; | ||||
6782 | } | ||||
6783 | $whitespace_last_level = $level_abs; | ||||
6784 | |||||
6785 | # modify for -bli, which adds one continuation indentation for | ||||
6786 | # opening braces | ||||
6787 | if ( $rOpts_brace_left_and_indent | ||||
6788 | && $max_index_to_go == 0 | ||||
6789 | && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) | ||||
6790 | { | ||||
6791 | $ci_level++; | ||||
6792 | } | ||||
6793 | |||||
6794 | # patch to avoid trouble when input file has negative indentation. | ||||
6795 | # other logic should catch this error. | ||||
6796 | if ( $level < 0 ) { $level = 0 } | ||||
6797 | |||||
6798 | #------------------------------------------- | ||||
6799 | # handle the standard indentation scheme | ||||
6800 | #------------------------------------------- | ||||
6801 | unless ($rOpts_line_up_parentheses) { | ||||
6802 | my $space_count = | ||||
6803 | $ci_level * $rOpts_continuation_indentation + | ||||
6804 | $level * $rOpts_indent_columns; | ||||
6805 | my $ci_spaces = | ||||
6806 | ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; | ||||
6807 | |||||
6808 | if ($in_continued_quote) { | ||||
6809 | $space_count = 0; | ||||
6810 | $ci_spaces = 0; | ||||
6811 | } | ||||
6812 | $leading_spaces_to_go[$max_index_to_go] = $space_count; | ||||
6813 | $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; | ||||
6814 | return; | ||||
6815 | } | ||||
6816 | |||||
6817 | #------------------------------------------------------------- | ||||
6818 | # handle case of -lp indentation.. | ||||
6819 | #------------------------------------------------------------- | ||||
6820 | |||||
6821 | # The continued_quote flag means that this is the first token of a | ||||
6822 | # line, and it is the continuation of some kind of multi-line quote | ||||
6823 | # or pattern. It requires special treatment because it must have no | ||||
6824 | # added leading whitespace. So we create a special indentation item | ||||
6825 | # which is not in the stack. | ||||
6826 | if ($in_continued_quote) { | ||||
6827 | my $space_count = 0; | ||||
6828 | my $available_space = 0; | ||||
6829 | $level = -1; # flag to prevent storing in item_list | ||||
6830 | $leading_spaces_to_go[$max_index_to_go] = | ||||
6831 | $reduced_spaces_to_go[$max_index_to_go] = | ||||
6832 | new_lp_indentation_item( $space_count, $level, $ci_level, | ||||
6833 | $available_space, 0 ); | ||||
6834 | return; | ||||
6835 | } | ||||
6836 | |||||
6837 | # get the top state from the stack | ||||
6838 | my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | ||||
6839 | my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); | ||||
6840 | my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); | ||||
6841 | |||||
6842 | my $type = $types_to_go[$max_index_to_go]; | ||||
6843 | my $token = $tokens_to_go[$max_index_to_go]; | ||||
6844 | my $total_depth = $nesting_depth_to_go[$max_index_to_go]; | ||||
6845 | |||||
6846 | if ( $type eq '{' || $type eq '(' ) { | ||||
6847 | |||||
6848 | $gnu_comma_count{ $total_depth + 1 } = 0; | ||||
6849 | $gnu_arrow_count{ $total_depth + 1 } = 0; | ||||
6850 | |||||
6851 | # If we come to an opening token after an '=' token of some type, | ||||
6852 | # see if it would be helpful to 'break' after the '=' to save space | ||||
6853 | my $last_equals = $last_gnu_equals{$total_depth}; | ||||
6854 | if ( $last_equals && $last_equals > $line_start_index_to_go ) { | ||||
6855 | |||||
6856 | # find the position if we break at the '=' | ||||
6857 | my $i_test = $last_equals; | ||||
6858 | if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } | ||||
6859 | |||||
6860 | # TESTING | ||||
6861 | ##my $too_close = ($i_test==$max_index_to_go-1); | ||||
6862 | |||||
6863 | my $test_position = total_line_length( $i_test, $max_index_to_go ); | ||||
6864 | my $mll = maximum_line_length($i_test); | ||||
6865 | |||||
6866 | if ( | ||||
6867 | |||||
6868 | # the equals is not just before an open paren (testing) | ||||
6869 | ##!$too_close && | ||||
6870 | |||||
6871 | # if we are beyond the midpoint | ||||
6872 | $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 | ||||
6873 | |||||
6874 | # or we are beyond the 1/4 point and there was an old | ||||
6875 | # break at the equals | ||||
6876 | || ( | ||||
6877 | $gnu_position_predictor > | ||||
6878 | $mll - $rOpts_maximum_line_length * 3 / 4 | ||||
6879 | && ( | ||||
6880 | $old_breakpoint_to_go[$last_equals] | ||||
6881 | || ( $last_equals > 0 | ||||
6882 | && $old_breakpoint_to_go[ $last_equals - 1 ] ) | ||||
6883 | || ( $last_equals > 1 | ||||
6884 | && $types_to_go[ $last_equals - 1 ] eq 'b' | ||||
6885 | && $old_breakpoint_to_go[ $last_equals - 2 ] ) | ||||
6886 | ) | ||||
6887 | ) | ||||
6888 | ) | ||||
6889 | { | ||||
6890 | |||||
6891 | # then make the switch -- note that we do not set a real | ||||
6892 | # breakpoint here because we may not really need one; sub | ||||
6893 | # scan_list will do that if necessary | ||||
6894 | $line_start_index_to_go = $i_test + 1; | ||||
6895 | $gnu_position_predictor = $test_position; | ||||
6896 | } | ||||
6897 | } | ||||
6898 | } | ||||
6899 | |||||
6900 | my $halfway = | ||||
6901 | maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; | ||||
6902 | |||||
6903 | # Check for decreasing depth .. | ||||
6904 | # Note that one token may have both decreasing and then increasing | ||||
6905 | # depth. For example, (level, ci) can go from (1,1) to (2,0). So, | ||||
6906 | # in this example we would first go back to (1,0) then up to (2,0) | ||||
6907 | # in a single call. | ||||
6908 | if ( $level < $current_level || $ci_level < $current_ci_level ) { | ||||
6909 | |||||
6910 | # loop to find the first entry at or completely below this level | ||||
6911 | my ( $lev, $ci_lev ); | ||||
6912 | while (1) { | ||||
6913 | if ($max_gnu_stack_index) { | ||||
6914 | |||||
6915 | # save index of token which closes this level | ||||
6916 | $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); | ||||
6917 | |||||
6918 | # Undo any extra indentation if we saw no commas | ||||
6919 | my $available_spaces = | ||||
6920 | $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); | ||||
6921 | |||||
6922 | my $comma_count = 0; | ||||
6923 | my $arrow_count = 0; | ||||
6924 | if ( $type eq '}' || $type eq ')' ) { | ||||
6925 | $comma_count = $gnu_comma_count{$total_depth}; | ||||
6926 | $arrow_count = $gnu_arrow_count{$total_depth}; | ||||
6927 | $comma_count = 0 unless $comma_count; | ||||
6928 | $arrow_count = 0 unless $arrow_count; | ||||
6929 | } | ||||
6930 | $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); | ||||
6931 | $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); | ||||
6932 | |||||
6933 | if ( $available_spaces > 0 ) { | ||||
6934 | |||||
6935 | if ( $comma_count <= 0 || $arrow_count > 0 ) { | ||||
6936 | |||||
6937 | my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); | ||||
6938 | my $seqno = | ||||
6939 | $gnu_stack[$max_gnu_stack_index] | ||||
6940 | ->get_SEQUENCE_NUMBER(); | ||||
6941 | |||||
6942 | # Be sure this item was created in this batch. This | ||||
6943 | # should be true because we delete any available | ||||
6944 | # space from open items at the end of each batch. | ||||
6945 | if ( $gnu_sequence_number != $seqno | ||||
6946 | || $i > $max_gnu_item_index ) | ||||
6947 | { | ||||
6948 | warning( | ||||
6949 | "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" | ||||
6950 | ); | ||||
6951 | report_definite_bug(); | ||||
6952 | } | ||||
6953 | |||||
6954 | else { | ||||
6955 | if ( $arrow_count == 0 ) { | ||||
6956 | $gnu_item_list[$i] | ||||
6957 | ->permanently_decrease_AVAILABLE_SPACES( | ||||
6958 | $available_spaces); | ||||
6959 | } | ||||
6960 | else { | ||||
6961 | $gnu_item_list[$i] | ||||
6962 | ->tentatively_decrease_AVAILABLE_SPACES( | ||||
6963 | $available_spaces); | ||||
6964 | } | ||||
6965 | |||||
6966 | my $j; | ||||
6967 | for ( | ||||
6968 | $j = $i + 1 ; | ||||
6969 | $j <= $max_gnu_item_index ; | ||||
6970 | $j++ | ||||
6971 | ) | ||||
6972 | { | ||||
6973 | $gnu_item_list[$j] | ||||
6974 | ->decrease_SPACES($available_spaces); | ||||
6975 | } | ||||
6976 | } | ||||
6977 | } | ||||
6978 | } | ||||
6979 | |||||
6980 | # go down one level | ||||
6981 | --$max_gnu_stack_index; | ||||
6982 | $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); | ||||
6983 | $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); | ||||
6984 | |||||
6985 | # stop when we reach a level at or below the current level | ||||
6986 | if ( $lev <= $level && $ci_lev <= $ci_level ) { | ||||
6987 | $space_count = | ||||
6988 | $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | ||||
6989 | $current_level = $lev; | ||||
6990 | $current_ci_level = $ci_lev; | ||||
6991 | last; | ||||
6992 | } | ||||
6993 | } | ||||
6994 | |||||
6995 | # reached bottom of stack .. should never happen because | ||||
6996 | # only negative levels can get here, and $level was forced | ||||
6997 | # to be positive above. | ||||
6998 | else { | ||||
6999 | warning( | ||||
7000 | "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" | ||||
7001 | ); | ||||
7002 | report_definite_bug(); | ||||
7003 | last; | ||||
7004 | } | ||||
7005 | } | ||||
7006 | } | ||||
7007 | |||||
7008 | # handle increasing depth | ||||
7009 | if ( $level > $current_level || $ci_level > $current_ci_level ) { | ||||
7010 | |||||
7011 | # Compute the standard incremental whitespace. This will be | ||||
7012 | # the minimum incremental whitespace that will be used. This | ||||
7013 | # choice results in a smooth transition between the gnu-style | ||||
7014 | # and the standard style. | ||||
7015 | my $standard_increment = | ||||
7016 | ( $level - $current_level ) * $rOpts_indent_columns + | ||||
7017 | ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; | ||||
7018 | |||||
7019 | # Now we have to define how much extra incremental space | ||||
7020 | # ("$available_space") we want. This extra space will be | ||||
7021 | # reduced as necessary when long lines are encountered or when | ||||
7022 | # it becomes clear that we do not have a good list. | ||||
7023 | my $available_space = 0; | ||||
7024 | my $align_paren = 0; | ||||
7025 | my $excess = 0; | ||||
7026 | |||||
7027 | # initialization on empty stack.. | ||||
7028 | if ( $max_gnu_stack_index == 0 ) { | ||||
7029 | $space_count = $level * $rOpts_indent_columns; | ||||
7030 | } | ||||
7031 | |||||
7032 | # if this is a BLOCK, add the standard increment | ||||
7033 | elsif ($last_nonblank_block_type) { | ||||
7034 | $space_count += $standard_increment; | ||||
7035 | } | ||||
7036 | |||||
7037 | # if last nonblank token was not structural indentation, | ||||
7038 | # just use standard increment | ||||
7039 | elsif ( $last_nonblank_type ne '{' ) { | ||||
7040 | $space_count += $standard_increment; | ||||
7041 | } | ||||
7042 | |||||
7043 | # otherwise use the space to the first non-blank level change token | ||||
7044 | else { | ||||
7045 | |||||
7046 | $space_count = $gnu_position_predictor; | ||||
7047 | |||||
7048 | my $min_gnu_indentation = | ||||
7049 | $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | ||||
7050 | |||||
7051 | $available_space = $space_count - $min_gnu_indentation; | ||||
7052 | if ( $available_space >= $standard_increment ) { | ||||
7053 | $min_gnu_indentation += $standard_increment; | ||||
7054 | } | ||||
7055 | elsif ( $available_space > 1 ) { | ||||
7056 | $min_gnu_indentation += $available_space + 1; | ||||
7057 | } | ||||
7058 | elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { | ||||
7059 | if ( ( $tightness{$last_nonblank_token} < 2 ) ) { | ||||
7060 | $min_gnu_indentation += 2; | ||||
7061 | } | ||||
7062 | else { | ||||
7063 | $min_gnu_indentation += 1; | ||||
7064 | } | ||||
7065 | } | ||||
7066 | else { | ||||
7067 | $min_gnu_indentation += $standard_increment; | ||||
7068 | } | ||||
7069 | $available_space = $space_count - $min_gnu_indentation; | ||||
7070 | |||||
7071 | if ( $available_space < 0 ) { | ||||
7072 | $space_count = $min_gnu_indentation; | ||||
7073 | $available_space = 0; | ||||
7074 | } | ||||
7075 | $align_paren = 1; | ||||
7076 | } | ||||
7077 | |||||
7078 | # update state, but not on a blank token | ||||
7079 | if ( $types_to_go[$max_index_to_go] ne 'b' ) { | ||||
7080 | |||||
7081 | $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); | ||||
7082 | |||||
7083 | ++$max_gnu_stack_index; | ||||
7084 | $gnu_stack[$max_gnu_stack_index] = | ||||
7085 | new_lp_indentation_item( $space_count, $level, $ci_level, | ||||
7086 | $available_space, $align_paren ); | ||||
7087 | |||||
7088 | # If the opening paren is beyond the half-line length, then | ||||
7089 | # we will use the minimum (standard) indentation. This will | ||||
7090 | # help avoid problems associated with running out of space | ||||
7091 | # near the end of a line. As a result, in deeply nested | ||||
7092 | # lists, there will be some indentations which are limited | ||||
7093 | # to this minimum standard indentation. But the most deeply | ||||
7094 | # nested container will still probably be able to shift its | ||||
7095 | # parameters to the right for proper alignment, so in most | ||||
7096 | # cases this will not be noticeable. | ||||
7097 | if ( $available_space > 0 && $space_count > $halfway ) { | ||||
7098 | $gnu_stack[$max_gnu_stack_index] | ||||
7099 | ->tentatively_decrease_AVAILABLE_SPACES($available_space); | ||||
7100 | } | ||||
7101 | } | ||||
7102 | } | ||||
7103 | |||||
7104 | # Count commas and look for non-list characters. Once we see a | ||||
7105 | # non-list character, we give up and don't look for any more commas. | ||||
7106 | if ( $type eq '=>' ) { | ||||
7107 | $gnu_arrow_count{$total_depth}++; | ||||
7108 | |||||
7109 | # tentatively treating '=>' like '=' for estimating breaks | ||||
7110 | # TODO: this could use some experimentation | ||||
7111 | $last_gnu_equals{$total_depth} = $max_index_to_go; | ||||
7112 | } | ||||
7113 | |||||
7114 | elsif ( $type eq ',' ) { | ||||
7115 | $gnu_comma_count{$total_depth}++; | ||||
7116 | } | ||||
7117 | |||||
7118 | elsif ( $is_assignment{$type} ) { | ||||
7119 | $last_gnu_equals{$total_depth} = $max_index_to_go; | ||||
7120 | } | ||||
7121 | |||||
7122 | # this token might start a new line | ||||
7123 | # if this is a non-blank.. | ||||
7124 | if ( $type ne 'b' ) { | ||||
7125 | |||||
7126 | # and if .. | ||||
7127 | if ( | ||||
7128 | |||||
7129 | # this is the first nonblank token of the line | ||||
7130 | $max_index_to_go == 1 && $types_to_go[0] eq 'b' | ||||
7131 | |||||
7132 | # or previous character was one of these: | ||||
7133 | || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ | ||||
7134 | |||||
7135 | # or previous character was opening and this does not close it | ||||
7136 | || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) | ||||
7137 | || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) | ||||
7138 | |||||
7139 | # or this token is one of these: | ||||
7140 | || $type =~ /^([\.]|\|\||\&\&)$/ | ||||
7141 | |||||
7142 | # or this is a closing structure | ||||
7143 | || ( $last_nonblank_type_to_go eq '}' | ||||
7144 | && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) | ||||
7145 | |||||
7146 | # or previous token was keyword 'return' | ||||
7147 | || ( $last_nonblank_type_to_go eq 'k' | ||||
7148 | && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) | ||||
7149 | |||||
7150 | # or starting a new line at certain keywords is fine | ||||
7151 | || ( $type eq 'k' | ||||
7152 | && $is_if_unless_and_or_last_next_redo_return{$token} ) | ||||
7153 | |||||
7154 | # or this is after an assignment after a closing structure | ||||
7155 | || ( | ||||
7156 | $is_assignment{$last_nonblank_type_to_go} | ||||
7157 | && ( | ||||
7158 | $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ | ||||
7159 | |||||
7160 | # and it is significantly to the right | ||||
7161 | || $gnu_position_predictor > $halfway | ||||
7162 | ) | ||||
7163 | ) | ||||
7164 | ) | ||||
7165 | { | ||||
7166 | check_for_long_gnu_style_lines(); | ||||
7167 | $line_start_index_to_go = $max_index_to_go; | ||||
7168 | |||||
7169 | # back up 1 token if we want to break before that type | ||||
7170 | # otherwise, we may strand tokens like '?' or ':' on a line | ||||
7171 | if ( $line_start_index_to_go > 0 ) { | ||||
7172 | if ( $last_nonblank_type_to_go eq 'k' ) { | ||||
7173 | |||||
7174 | if ( $want_break_before{$last_nonblank_token_to_go} ) { | ||||
7175 | $line_start_index_to_go--; | ||||
7176 | } | ||||
7177 | } | ||||
7178 | elsif ( $want_break_before{$last_nonblank_type_to_go} ) { | ||||
7179 | $line_start_index_to_go--; | ||||
7180 | } | ||||
7181 | } | ||||
7182 | } | ||||
7183 | } | ||||
7184 | |||||
7185 | # remember the predicted position of this token on the output line | ||||
7186 | if ( $max_index_to_go > $line_start_index_to_go ) { | ||||
7187 | $gnu_position_predictor = | ||||
7188 | total_line_length( $line_start_index_to_go, $max_index_to_go ); | ||||
7189 | } | ||||
7190 | else { | ||||
7191 | $gnu_position_predictor = | ||||
7192 | $space_count + $token_lengths_to_go[$max_index_to_go]; | ||||
7193 | } | ||||
7194 | |||||
7195 | # store the indentation object for this token | ||||
7196 | # this allows us to manipulate the leading whitespace | ||||
7197 | # (in case we have to reduce indentation to fit a line) without | ||||
7198 | # having to change any token values | ||||
7199 | $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; | ||||
7200 | $reduced_spaces_to_go[$max_index_to_go] = | ||||
7201 | ( $max_gnu_stack_index > 0 && $ci_level ) | ||||
7202 | ? $gnu_stack[ $max_gnu_stack_index - 1 ] | ||||
7203 | : $gnu_stack[$max_gnu_stack_index]; | ||||
7204 | return; | ||||
7205 | } | ||||
7206 | |||||
7207 | sub check_for_long_gnu_style_lines { | ||||
7208 | |||||
7209 | # look at the current estimated maximum line length, and | ||||
7210 | # remove some whitespace if it exceeds the desired maximum | ||||
7211 | |||||
7212 | # this is only for the '-lp' style | ||||
7213 | return unless ($rOpts_line_up_parentheses); | ||||
7214 | |||||
7215 | # nothing can be done if no stack items defined for this line | ||||
7216 | return if ( $max_gnu_item_index == UNDEFINED_INDEX ); | ||||
7217 | |||||
7218 | # see if we have exceeded the maximum desired line length | ||||
7219 | # keep 2 extra free because they are needed in some cases | ||||
7220 | # (result of trial-and-error testing) | ||||
7221 | my $spaces_needed = | ||||
7222 | $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; | ||||
7223 | |||||
7224 | return if ( $spaces_needed <= 0 ); | ||||
7225 | |||||
7226 | # We are over the limit, so try to remove a requested number of | ||||
7227 | # spaces from leading whitespace. We are only allowed to remove | ||||
7228 | # from whitespace items created on this batch, since others have | ||||
7229 | # already been used and cannot be undone. | ||||
7230 | my @candidates = (); | ||||
7231 | my $i; | ||||
7232 | |||||
7233 | # loop over all whitespace items created for the current batch | ||||
7234 | for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { | ||||
7235 | my $item = $gnu_item_list[$i]; | ||||
7236 | |||||
7237 | # item must still be open to be a candidate (otherwise it | ||||
7238 | # cannot influence the current token) | ||||
7239 | next if ( $item->get_CLOSED() >= 0 ); | ||||
7240 | |||||
7241 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
7242 | |||||
7243 | if ( $available_spaces > 0 ) { | ||||
7244 | push( @candidates, [ $i, $available_spaces ] ); | ||||
7245 | } | ||||
7246 | } | ||||
7247 | |||||
7248 | return unless (@candidates); | ||||
7249 | |||||
7250 | # sort by available whitespace so that we can remove whitespace | ||||
7251 | # from the maximum available first | ||||
7252 | @candidates = sort { $b->[1] <=> $a->[1] } @candidates; | ||||
7253 | |||||
7254 | # keep removing whitespace until we are done or have no more | ||||
7255 | my $candidate; | ||||
7256 | foreach $candidate (@candidates) { | ||||
7257 | my ( $i, $available_spaces ) = @{$candidate}; | ||||
7258 | my $deleted_spaces = | ||||
7259 | ( $available_spaces > $spaces_needed ) | ||||
7260 | ? $spaces_needed | ||||
7261 | : $available_spaces; | ||||
7262 | |||||
7263 | # remove the incremental space from this item | ||||
7264 | $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); | ||||
7265 | |||||
7266 | my $i_debug = $i; | ||||
7267 | |||||
7268 | # update the leading whitespace of this item and all items | ||||
7269 | # that came after it | ||||
7270 | for ( ; $i <= $max_gnu_item_index ; $i++ ) { | ||||
7271 | |||||
7272 | my $old_spaces = $gnu_item_list[$i]->get_SPACES(); | ||||
7273 | if ( $old_spaces >= $deleted_spaces ) { | ||||
7274 | $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); | ||||
7275 | } | ||||
7276 | |||||
7277 | # shouldn't happen except for code bug: | ||||
7278 | else { | ||||
7279 | my $level = $gnu_item_list[$i_debug]->get_LEVEL(); | ||||
7280 | my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); | ||||
7281 | my $old_level = $gnu_item_list[$i]->get_LEVEL(); | ||||
7282 | my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); | ||||
7283 | warning( | ||||
7284 | "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" | ||||
7285 | ); | ||||
7286 | report_definite_bug(); | ||||
7287 | } | ||||
7288 | } | ||||
7289 | $gnu_position_predictor -= $deleted_spaces; | ||||
7290 | $spaces_needed -= $deleted_spaces; | ||||
7291 | last unless ( $spaces_needed > 0 ); | ||||
7292 | } | ||||
7293 | } | ||||
7294 | |||||
7295 | sub finish_lp_batch { | ||||
7296 | |||||
7297 | # This routine is called once after each output stream batch is | ||||
7298 | # finished to undo indentation for all incomplete -lp | ||||
7299 | # indentation levels. It is too risky to leave a level open, | ||||
7300 | # because then we can't backtrack in case of a long line to follow. | ||||
7301 | # This means that comments and blank lines will disrupt this | ||||
7302 | # indentation style. But the vertical aligner may be able to | ||||
7303 | # get the space back if there are side comments. | ||||
7304 | |||||
7305 | # this is only for the 'lp' style | ||||
7306 | return unless ($rOpts_line_up_parentheses); | ||||
7307 | |||||
7308 | # nothing can be done if no stack items defined for this line | ||||
7309 | return if ( $max_gnu_item_index == UNDEFINED_INDEX ); | ||||
7310 | |||||
7311 | # loop over all whitespace items created for the current batch | ||||
7312 | my $i; | ||||
7313 | for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { | ||||
7314 | my $item = $gnu_item_list[$i]; | ||||
7315 | |||||
7316 | # only look for open items | ||||
7317 | next if ( $item->get_CLOSED() >= 0 ); | ||||
7318 | |||||
7319 | # Tentatively remove all of the available space | ||||
7320 | # (The vertical aligner will try to get it back later) | ||||
7321 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
7322 | if ( $available_spaces > 0 ) { | ||||
7323 | |||||
7324 | # delete incremental space for this item | ||||
7325 | $gnu_item_list[$i] | ||||
7326 | ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); | ||||
7327 | |||||
7328 | # Reduce the total indentation space of any nodes that follow | ||||
7329 | # Note that any such nodes must necessarily be dependents | ||||
7330 | # of this node. | ||||
7331 | foreach ( $i + 1 .. $max_gnu_item_index ) { | ||||
7332 | $gnu_item_list[$_]->decrease_SPACES($available_spaces); | ||||
7333 | } | ||||
7334 | } | ||||
7335 | } | ||||
7336 | return; | ||||
7337 | } | ||||
7338 | |||||
7339 | sub reduce_lp_indentation { | ||||
7340 | |||||
7341 | # reduce the leading whitespace at token $i if possible by $spaces_needed | ||||
7342 | # (a large value of $spaces_needed will remove all excess space) | ||||
7343 | # NOTE: to be called from scan_list only for a sequence of tokens | ||||
7344 | # contained between opening and closing parens/braces/brackets | ||||
7345 | |||||
7346 | my ( $i, $spaces_wanted ) = @_; | ||||
7347 | my $deleted_spaces = 0; | ||||
7348 | |||||
7349 | my $item = $leading_spaces_to_go[$i]; | ||||
7350 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
7351 | |||||
7352 | if ( | ||||
7353 | $available_spaces > 0 | ||||
7354 | && ( ( $spaces_wanted <= $available_spaces ) | ||||
7355 | || !$item->get_HAVE_CHILD() ) | ||||
7356 | ) | ||||
7357 | { | ||||
7358 | |||||
7359 | # we'll remove these spaces, but mark them as recoverable | ||||
7360 | $deleted_spaces = | ||||
7361 | $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); | ||||
7362 | } | ||||
7363 | |||||
7364 | return $deleted_spaces; | ||||
7365 | } | ||||
7366 | |||||
7367 | sub token_sequence_length { | ||||
7368 | |||||
7369 | # return length of tokens ($ibeg .. $iend) including $ibeg & $iend | ||||
7370 | # returns 0 if $ibeg > $iend (shouldn't happen) | ||||
7371 | my ( $ibeg, $iend ) = @_; | ||||
7372 | return 0 if ( $iend < 0 || $ibeg > $iend ); | ||||
7373 | return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); | ||||
7374 | return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; | ||||
7375 | } | ||||
7376 | |||||
7377 | sub total_line_length { | ||||
7378 | |||||
7379 | # return length of a line of tokens ($ibeg .. $iend) | ||||
7380 | my ( $ibeg, $iend ) = @_; | ||||
7381 | return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); | ||||
7382 | } | ||||
7383 | |||||
7384 | sub maximum_line_length_for_level { | ||||
7385 | |||||
7386 | # return maximum line length for line starting with a given level | ||||
7387 | my $maximum_line_length = $rOpts_maximum_line_length; | ||||
7388 | |||||
7389 | # Modify if -vmll option is selected | ||||
7390 | if ($rOpts_variable_maximum_line_length) { | ||||
7391 | my $level = shift; | ||||
7392 | if ( $level < 0 ) { $level = 0 } | ||||
7393 | $maximum_line_length += $level * $rOpts_indent_columns; | ||||
7394 | } | ||||
7395 | return $maximum_line_length; | ||||
7396 | } | ||||
7397 | |||||
7398 | sub maximum_line_length { | ||||
7399 | |||||
7400 | # return maximum line length for line starting with the token at given index | ||||
7401 | return maximum_line_length_for_level( $levels_to_go[ $_[0] ] ); | ||||
7402 | |||||
7403 | } | ||||
7404 | |||||
7405 | sub excess_line_length { | ||||
7406 | |||||
7407 | # return number of characters by which a line of tokens ($ibeg..$iend) | ||||
7408 | # exceeds the allowable line length. | ||||
7409 | my ( $ibeg, $iend ) = @_; | ||||
7410 | return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg); | ||||
7411 | } | ||||
7412 | |||||
7413 | sub finish_formatting { | ||||
7414 | |||||
7415 | # flush buffer and write any informative messages | ||||
7416 | my $self = shift; | ||||
7417 | |||||
7418 | flush(); | ||||
7419 | $file_writer_object->decrement_output_line_number() | ||||
7420 | ; # fix up line number since it was incremented | ||||
7421 | we_are_at_the_last_line(); | ||||
7422 | if ( $added_semicolon_count > 0 ) { | ||||
7423 | my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; | ||||
7424 | my $what = | ||||
7425 | ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; | ||||
7426 | write_logfile_entry("$added_semicolon_count $what added:\n"); | ||||
7427 | write_logfile_entry( | ||||
7428 | " $first at input line $first_added_semicolon_at\n"); | ||||
7429 | |||||
7430 | if ( $added_semicolon_count > 1 ) { | ||||
7431 | write_logfile_entry( | ||||
7432 | " Last at input line $last_added_semicolon_at\n"); | ||||
7433 | } | ||||
7434 | write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); | ||||
7435 | write_logfile_entry("\n"); | ||||
7436 | } | ||||
7437 | |||||
7438 | if ( $deleted_semicolon_count > 0 ) { | ||||
7439 | my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; | ||||
7440 | my $what = | ||||
7441 | ( $deleted_semicolon_count > 1 ) | ||||
7442 | ? "semicolons were" | ||||
7443 | : "semicolon was"; | ||||
7444 | write_logfile_entry( | ||||
7445 | "$deleted_semicolon_count unnecessary $what deleted:\n"); | ||||
7446 | write_logfile_entry( | ||||
7447 | " $first at input line $first_deleted_semicolon_at\n"); | ||||
7448 | |||||
7449 | if ( $deleted_semicolon_count > 1 ) { | ||||
7450 | write_logfile_entry( | ||||
7451 | " Last at input line $last_deleted_semicolon_at\n"); | ||||
7452 | } | ||||
7453 | write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); | ||||
7454 | write_logfile_entry("\n"); | ||||
7455 | } | ||||
7456 | |||||
7457 | if ( $embedded_tab_count > 0 ) { | ||||
7458 | my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; | ||||
7459 | my $what = | ||||
7460 | ( $embedded_tab_count > 1 ) | ||||
7461 | ? "quotes or patterns" | ||||
7462 | : "quote or pattern"; | ||||
7463 | write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); | ||||
7464 | write_logfile_entry( | ||||
7465 | "This means the display of this script could vary with device or software\n" | ||||
7466 | ); | ||||
7467 | write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); | ||||
7468 | |||||
7469 | if ( $embedded_tab_count > 1 ) { | ||||
7470 | write_logfile_entry( | ||||
7471 | " Last at input line $last_embedded_tab_at\n"); | ||||
7472 | } | ||||
7473 | write_logfile_entry("\n"); | ||||
7474 | } | ||||
7475 | |||||
7476 | if ($first_tabbing_disagreement) { | ||||
7477 | write_logfile_entry( | ||||
7478 | "First indentation disagreement seen at input line $first_tabbing_disagreement\n" | ||||
7479 | ); | ||||
7480 | } | ||||
7481 | |||||
7482 | if ($in_tabbing_disagreement) { | ||||
7483 | write_logfile_entry( | ||||
7484 | "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" | ||||
7485 | ); | ||||
7486 | } | ||||
7487 | else { | ||||
7488 | |||||
7489 | if ($last_tabbing_disagreement) { | ||||
7490 | |||||
7491 | write_logfile_entry( | ||||
7492 | "Last indentation disagreement seen at input line $last_tabbing_disagreement\n" | ||||
7493 | ); | ||||
7494 | } | ||||
7495 | else { | ||||
7496 | write_logfile_entry("No indentation disagreement seen\n"); | ||||
7497 | } | ||||
7498 | } | ||||
7499 | if ($first_tabbing_disagreement) { | ||||
7500 | write_logfile_entry( | ||||
7501 | "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" | ||||
7502 | ); | ||||
7503 | } | ||||
7504 | write_logfile_entry("\n"); | ||||
7505 | |||||
7506 | $vertical_aligner_object->report_anything_unusual(); | ||||
7507 | |||||
7508 | $file_writer_object->report_line_length_errors(); | ||||
7509 | } | ||||
7510 | |||||
7511 | sub check_options { | ||||
7512 | |||||
7513 | # This routine is called to check the Opts hash after it is defined | ||||
7514 | |||||
7515 | ($rOpts) = @_; | ||||
7516 | |||||
7517 | make_static_block_comment_pattern(); | ||||
7518 | make_static_side_comment_pattern(); | ||||
7519 | make_closing_side_comment_prefix(); | ||||
7520 | make_closing_side_comment_list_pattern(); | ||||
7521 | $format_skipping_pattern_begin = | ||||
7522 | make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); | ||||
7523 | $format_skipping_pattern_end = | ||||
7524 | make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); | ||||
7525 | |||||
7526 | # If closing side comments ARE selected, then we can safely | ||||
7527 | # delete old closing side comments unless closing side comment | ||||
7528 | # warnings are requested. This is a good idea because it will | ||||
7529 | # eliminate any old csc's which fall below the line count threshold. | ||||
7530 | # We cannot do this if warnings are turned on, though, because we | ||||
7531 | # might delete some text which has been added. So that must | ||||
7532 | # be handled when comments are created. | ||||
7533 | if ( $rOpts->{'closing-side-comments'} ) { | ||||
7534 | if ( !$rOpts->{'closing-side-comment-warnings'} ) { | ||||
7535 | $rOpts->{'delete-closing-side-comments'} = 1; | ||||
7536 | } | ||||
7537 | } | ||||
7538 | |||||
7539 | # If closing side comments ARE NOT selected, but warnings ARE | ||||
7540 | # selected and we ARE DELETING csc's, then we will pretend to be | ||||
7541 | # adding with a huge interval. This will force the comments to be | ||||
7542 | # generated for comparison with the old comments, but not added. | ||||
7543 | elsif ( $rOpts->{'closing-side-comment-warnings'} ) { | ||||
7544 | if ( $rOpts->{'delete-closing-side-comments'} ) { | ||||
7545 | $rOpts->{'delete-closing-side-comments'} = 0; | ||||
7546 | $rOpts->{'closing-side-comments'} = 1; | ||||
7547 | $rOpts->{'closing-side-comment-interval'} = 100000000; | ||||
7548 | } | ||||
7549 | } | ||||
7550 | |||||
7551 | make_bli_pattern(); | ||||
7552 | make_block_brace_vertical_tightness_pattern(); | ||||
7553 | |||||
7554 | if ( $rOpts->{'line-up-parentheses'} ) { | ||||
7555 | |||||
7556 | if ( $rOpts->{'indent-only'} | ||||
7557 | || !$rOpts->{'add-newlines'} | ||||
7558 | || !$rOpts->{'delete-old-newlines'} ) | ||||
7559 | { | ||||
7560 | Perl::Tidy::Warn <<EOM; | ||||
7561 | ----------------------------------------------------------------------- | ||||
7562 | Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp | ||||
7563 | |||||
7564 | The -lp indentation logic requires that perltidy be able to coordinate | ||||
7565 | arbitrarily large numbers of line breakpoints. This isn't possible | ||||
7566 | with these flags. Sometimes an acceptable workaround is to use -wocb=3 | ||||
7567 | ----------------------------------------------------------------------- | ||||
7568 | EOM | ||||
7569 | $rOpts->{'line-up-parentheses'} = 0; | ||||
7570 | } | ||||
7571 | } | ||||
7572 | |||||
7573 | # At present, tabs are not compatible with the line-up-parentheses style | ||||
7574 | # (it would be possible to entab the total leading whitespace | ||||
7575 | # just prior to writing the line, if desired). | ||||
7576 | if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { | ||||
7577 | Perl::Tidy::Warn <<EOM; | ||||
7578 | Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. | ||||
7579 | EOM | ||||
7580 | $rOpts->{'tabs'} = 0; | ||||
7581 | } | ||||
7582 | |||||
7583 | # Likewise, tabs are not compatible with outdenting.. | ||||
7584 | if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { | ||||
7585 | Perl::Tidy::Warn <<EOM; | ||||
7586 | Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. | ||||
7587 | EOM | ||||
7588 | $rOpts->{'tabs'} = 0; | ||||
7589 | } | ||||
7590 | |||||
7591 | if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { | ||||
7592 | Perl::Tidy::Warn <<EOM; | ||||
7593 | Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. | ||||
7594 | EOM | ||||
7595 | $rOpts->{'tabs'} = 0; | ||||
7596 | } | ||||
7597 | |||||
7598 | if ( !$rOpts->{'space-for-semicolon'} ) { | ||||
7599 | $want_left_space{'f'} = -1; | ||||
7600 | } | ||||
7601 | |||||
7602 | if ( $rOpts->{'space-terminal-semicolon'} ) { | ||||
7603 | $want_left_space{';'} = 1; | ||||
7604 | } | ||||
7605 | |||||
7606 | # implement outdenting preferences for keywords | ||||
7607 | %outdent_keyword = (); | ||||
7608 | unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { | ||||
7609 | @_ = qw(next last redo goto return); # defaults | ||||
7610 | } | ||||
7611 | |||||
7612 | # FUTURE: if not a keyword, assume that it is an identifier | ||||
7613 | foreach (@_) { | ||||
7614 | if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { | ||||
7615 | $outdent_keyword{$_} = 1; | ||||
7616 | } | ||||
7617 | else { | ||||
7618 | Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword"; | ||||
7619 | } | ||||
7620 | } | ||||
7621 | |||||
7622 | # implement user whitespace preferences | ||||
7623 | if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { | ||||
7624 | @want_left_space{@_} = (1) x scalar(@_); | ||||
7625 | } | ||||
7626 | |||||
7627 | if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { | ||||
7628 | @want_right_space{@_} = (1) x scalar(@_); | ||||
7629 | } | ||||
7630 | |||||
7631 | if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { | ||||
7632 | @want_left_space{@_} = (-1) x scalar(@_); | ||||
7633 | } | ||||
7634 | |||||
7635 | if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { | ||||
7636 | @want_right_space{@_} = (-1) x scalar(@_); | ||||
7637 | } | ||||
7638 | if ( $rOpts->{'dump-want-left-space'} ) { | ||||
7639 | dump_want_left_space(*STDOUT); | ||||
7640 | Perl::Tidy::Exit 0; | ||||
7641 | } | ||||
7642 | |||||
7643 | if ( $rOpts->{'dump-want-right-space'} ) { | ||||
7644 | dump_want_right_space(*STDOUT); | ||||
7645 | Perl::Tidy::Exit 0; | ||||
7646 | } | ||||
7647 | |||||
7648 | # default keywords for which space is introduced before an opening paren | ||||
7649 | # (at present, including them messes up vertical alignment) | ||||
7650 | @_ = qw(my local our and or err eq ne if else elsif until | ||||
7651 | unless while for foreach return switch case given when); | ||||
7652 | @space_after_keyword{@_} = (1) x scalar(@_); | ||||
7653 | |||||
7654 | # first remove any or all of these if desired | ||||
7655 | if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { | ||||
7656 | |||||
7657 | # -nsak='*' selects all the above keywords | ||||
7658 | if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) } | ||||
7659 | @space_after_keyword{@_} = (0) x scalar(@_); | ||||
7660 | } | ||||
7661 | |||||
7662 | # then allow user to add to these defaults | ||||
7663 | if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { | ||||
7664 | @space_after_keyword{@_} = (1) x scalar(@_); | ||||
7665 | } | ||||
7666 | |||||
7667 | # implement user break preferences | ||||
7668 | my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & | ||||
7669 | = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= | ||||
7670 | . : ? && || and or err xor | ||||
7671 | ); | ||||
7672 | |||||
7673 | my $break_after = sub { | ||||
7674 | foreach my $tok (@_) { | ||||
7675 | if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: | ||||
7676 | my $lbs = $left_bond_strength{$tok}; | ||||
7677 | my $rbs = $right_bond_strength{$tok}; | ||||
7678 | if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { | ||||
7679 | ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = | ||||
7680 | ( $lbs, $rbs ); | ||||
7681 | } | ||||
7682 | } | ||||
7683 | }; | ||||
7684 | |||||
7685 | my $break_before = sub { | ||||
7686 | foreach my $tok (@_) { | ||||
7687 | my $lbs = $left_bond_strength{$tok}; | ||||
7688 | my $rbs = $right_bond_strength{$tok}; | ||||
7689 | if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { | ||||
7690 | ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = | ||||
7691 | ( $lbs, $rbs ); | ||||
7692 | } | ||||
7693 | } | ||||
7694 | }; | ||||
7695 | |||||
7696 | $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); | ||||
7697 | $break_before->(@all_operators) | ||||
7698 | if ( $rOpts->{'break-before-all-operators'} ); | ||||
7699 | |||||
7700 | $break_after->( split_words( $rOpts->{'want-break-after'} ) ); | ||||
7701 | $break_before->( split_words( $rOpts->{'want-break-before'} ) ); | ||||
7702 | |||||
7703 | # make note if breaks are before certain key types | ||||
7704 | %want_break_before = (); | ||||
7705 | foreach my $tok ( @all_operators, ',' ) { | ||||
7706 | $want_break_before{$tok} = | ||||
7707 | $left_bond_strength{$tok} < $right_bond_strength{$tok}; | ||||
7708 | } | ||||
7709 | |||||
7710 | # Coordinate ?/: breaks, which must be similar | ||||
7711 | if ( !$want_break_before{':'} ) { | ||||
7712 | $want_break_before{'?'} = $want_break_before{':'}; | ||||
7713 | $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; | ||||
7714 | $left_bond_strength{'?'} = NO_BREAK; | ||||
7715 | } | ||||
7716 | |||||
7717 | # Define here tokens which may follow the closing brace of a do statement | ||||
7718 | # on the same line, as in: | ||||
7719 | # } while ( $something); | ||||
7720 | @_ = qw(until while unless if ; : ); | ||||
7721 | push @_, ','; | ||||
7722 | @is_do_follower{@_} = (1) x scalar(@_); | ||||
7723 | |||||
7724 | # These tokens may follow the closing brace of an if or elsif block. | ||||
7725 | # In other words, for cuddled else we want code to look like: | ||||
7726 | # } elsif ( $something) { | ||||
7727 | # } else { | ||||
7728 | if ( $rOpts->{'cuddled-else'} ) { | ||||
7729 | @_ = qw(else elsif); | ||||
7730 | @is_if_brace_follower{@_} = (1) x scalar(@_); | ||||
7731 | } | ||||
7732 | else { | ||||
7733 | %is_if_brace_follower = (); | ||||
7734 | } | ||||
7735 | |||||
7736 | # nothing can follow the closing curly of an else { } block: | ||||
7737 | %is_else_brace_follower = (); | ||||
7738 | |||||
7739 | # what can follow a multi-line anonymous sub definition closing curly: | ||||
7740 | @_ = qw# ; : => or and && || ~~ !~~ ) #; | ||||
7741 | push @_, ','; | ||||
7742 | @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); | ||||
7743 | |||||
7744 | # what can follow a one-line anonymous sub closing curly: | ||||
7745 | # one-line anonymous subs also have ']' here... | ||||
7746 | # see tk3.t and PP.pm | ||||
7747 | @_ = qw# ; : => or and && || ) ] ~~ !~~ #; | ||||
7748 | push @_, ','; | ||||
7749 | @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); | ||||
7750 | |||||
7751 | # What can follow a closing curly of a block | ||||
7752 | # which is not an if/elsif/else/do/sort/map/grep/eval/sub | ||||
7753 | # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' | ||||
7754 | @_ = qw# ; : => or and && || ) #; | ||||
7755 | push @_, ','; | ||||
7756 | |||||
7757 | # allow cuddled continue if cuddled else is specified | ||||
7758 | if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } | ||||
7759 | |||||
7760 | @is_other_brace_follower{@_} = (1) x scalar(@_); | ||||
7761 | |||||
7762 | $right_bond_strength{'{'} = WEAK; | ||||
7763 | $left_bond_strength{'{'} = VERY_STRONG; | ||||
7764 | |||||
7765 | # make -l=0 equal to -l=infinite | ||||
7766 | if ( !$rOpts->{'maximum-line-length'} ) { | ||||
7767 | $rOpts->{'maximum-line-length'} = 1000000; | ||||
7768 | } | ||||
7769 | |||||
7770 | # make -lbl=0 equal to -lbl=infinite | ||||
7771 | if ( !$rOpts->{'long-block-line-count'} ) { | ||||
7772 | $rOpts->{'long-block-line-count'} = 1000000; | ||||
7773 | } | ||||
7774 | |||||
7775 | my $ole = $rOpts->{'output-line-ending'}; | ||||
7776 | if ($ole) { | ||||
7777 | my %endings = ( | ||||
7778 | dos => "\015\012", | ||||
7779 | win => "\015\012", | ||||
7780 | mac => "\015", | ||||
7781 | unix => "\012", | ||||
7782 | ); | ||||
7783 | $ole = lc $ole; | ||||
7784 | unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { | ||||
7785 | my $str = join " ", keys %endings; | ||||
7786 | Perl::Tidy::Die <<EOM; | ||||
7787 | Unrecognized line ending '$ole'; expecting one of: $str | ||||
7788 | EOM | ||||
7789 | } | ||||
7790 | if ( $rOpts->{'preserve-line-endings'} ) { | ||||
7791 | Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n"; | ||||
7792 | $rOpts->{'preserve-line-endings'} = undef; | ||||
7793 | } | ||||
7794 | } | ||||
7795 | |||||
7796 | # hashes used to simplify setting whitespace | ||||
7797 | %tightness = ( | ||||
7798 | '{' => $rOpts->{'brace-tightness'}, | ||||
7799 | '}' => $rOpts->{'brace-tightness'}, | ||||
7800 | '(' => $rOpts->{'paren-tightness'}, | ||||
7801 | ')' => $rOpts->{'paren-tightness'}, | ||||
7802 | '[' => $rOpts->{'square-bracket-tightness'}, | ||||
7803 | ']' => $rOpts->{'square-bracket-tightness'}, | ||||
7804 | ); | ||||
7805 | %matching_token = ( | ||||
7806 | '{' => '}', | ||||
7807 | '(' => ')', | ||||
7808 | '[' => ']', | ||||
7809 | '?' => ':', | ||||
7810 | ); | ||||
7811 | |||||
7812 | # frequently used parameters | ||||
7813 | $rOpts_add_newlines = $rOpts->{'add-newlines'}; | ||||
7814 | $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; | ||||
7815 | $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; | ||||
7816 | $rOpts_block_brace_vertical_tightness = | ||||
7817 | $rOpts->{'block-brace-vertical-tightness'}; | ||||
7818 | $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; | ||||
7819 | $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; | ||||
7820 | $rOpts_break_at_old_ternary_breakpoints = | ||||
7821 | $rOpts->{'break-at-old-ternary-breakpoints'}; | ||||
7822 | $rOpts_break_at_old_attribute_breakpoints = | ||||
7823 | $rOpts->{'break-at-old-attribute-breakpoints'}; | ||||
7824 | $rOpts_break_at_old_comma_breakpoints = | ||||
7825 | $rOpts->{'break-at-old-comma-breakpoints'}; | ||||
7826 | $rOpts_break_at_old_keyword_breakpoints = | ||||
7827 | $rOpts->{'break-at-old-keyword-breakpoints'}; | ||||
7828 | $rOpts_break_at_old_logical_breakpoints = | ||||
7829 | $rOpts->{'break-at-old-logical-breakpoints'}; | ||||
7830 | $rOpts_closing_side_comment_else_flag = | ||||
7831 | $rOpts->{'closing-side-comment-else-flag'}; | ||||
7832 | $rOpts_closing_side_comment_maximum_text = | ||||
7833 | $rOpts->{'closing-side-comment-maximum-text'}; | ||||
7834 | $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; | ||||
7835 | $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; | ||||
7836 | $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; | ||||
7837 | $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; | ||||
7838 | $rOpts_indent_columns = $rOpts->{'indent-columns'}; | ||||
7839 | $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; | ||||
7840 | $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; | ||||
7841 | $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; | ||||
7842 | $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; | ||||
7843 | |||||
7844 | $rOpts_variable_maximum_line_length = | ||||
7845 | $rOpts->{'variable-maximum-line-length'}; | ||||
7846 | $rOpts_short_concatenation_item_length = | ||||
7847 | $rOpts->{'short-concatenation-item-length'}; | ||||
7848 | |||||
7849 | $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; | ||||
7850 | $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; | ||||
7851 | $rOpts_format_skipping = $rOpts->{'format-skipping'}; | ||||
7852 | $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; | ||||
7853 | $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; | ||||
7854 | $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; | ||||
7855 | $rOpts_ignore_side_comment_lengths = | ||||
7856 | $rOpts->{'ignore-side-comment-lengths'}; | ||||
7857 | |||||
7858 | # Note that both opening and closing tokens can access the opening | ||||
7859 | # and closing flags of their container types. | ||||
7860 | %opening_vertical_tightness = ( | ||||
7861 | '(' => $rOpts->{'paren-vertical-tightness'}, | ||||
7862 | '{' => $rOpts->{'brace-vertical-tightness'}, | ||||
7863 | '[' => $rOpts->{'square-bracket-vertical-tightness'}, | ||||
7864 | ')' => $rOpts->{'paren-vertical-tightness'}, | ||||
7865 | '}' => $rOpts->{'brace-vertical-tightness'}, | ||||
7866 | ']' => $rOpts->{'square-bracket-vertical-tightness'}, | ||||
7867 | ); | ||||
7868 | |||||
7869 | %closing_vertical_tightness = ( | ||||
7870 | '(' => $rOpts->{'paren-vertical-tightness-closing'}, | ||||
7871 | '{' => $rOpts->{'brace-vertical-tightness-closing'}, | ||||
7872 | '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, | ||||
7873 | ')' => $rOpts->{'paren-vertical-tightness-closing'}, | ||||
7874 | '}' => $rOpts->{'brace-vertical-tightness-closing'}, | ||||
7875 | ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, | ||||
7876 | ); | ||||
7877 | |||||
7878 | $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'}; | ||||
7879 | |||||
7880 | # assume flag for '>' same as ')' for closing qw quotes | ||||
7881 | %closing_token_indentation = ( | ||||
7882 | ')' => $rOpts->{'closing-paren-indentation'}, | ||||
7883 | '}' => $rOpts->{'closing-brace-indentation'}, | ||||
7884 | ']' => $rOpts->{'closing-square-bracket-indentation'}, | ||||
7885 | '>' => $rOpts->{'closing-paren-indentation'}, | ||||
7886 | ); | ||||
7887 | |||||
7888 | # flag indicating if any closing tokens are indented | ||||
7889 | $some_closing_token_indentation = | ||||
7890 | $rOpts->{'closing-paren-indentation'} | ||||
7891 | || $rOpts->{'closing-brace-indentation'} | ||||
7892 | || $rOpts->{'closing-square-bracket-indentation'} | ||||
7893 | || $rOpts->{'indent-closing-brace'}; | ||||
7894 | |||||
7895 | %opening_token_right = ( | ||||
7896 | '(' => $rOpts->{'opening-paren-right'}, | ||||
7897 | '{' => $rOpts->{'opening-hash-brace-right'}, | ||||
7898 | '[' => $rOpts->{'opening-square-bracket-right'}, | ||||
7899 | ); | ||||
7900 | |||||
7901 | %stack_opening_token = ( | ||||
7902 | '(' => $rOpts->{'stack-opening-paren'}, | ||||
7903 | '{' => $rOpts->{'stack-opening-hash-brace'}, | ||||
7904 | '[' => $rOpts->{'stack-opening-square-bracket'}, | ||||
7905 | ); | ||||
7906 | |||||
7907 | %stack_closing_token = ( | ||||
7908 | ')' => $rOpts->{'stack-closing-paren'}, | ||||
7909 | '}' => $rOpts->{'stack-closing-hash-brace'}, | ||||
7910 | ']' => $rOpts->{'stack-closing-square-bracket'}, | ||||
7911 | ); | ||||
7912 | $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; | ||||
7913 | } | ||||
7914 | |||||
7915 | sub make_static_block_comment_pattern { | ||||
7916 | |||||
7917 | # create the pattern used to identify static block comments | ||||
7918 | $static_block_comment_pattern = '^\s*##'; | ||||
7919 | |||||
7920 | # allow the user to change it | ||||
7921 | if ( $rOpts->{'static-block-comment-prefix'} ) { | ||||
7922 | my $prefix = $rOpts->{'static-block-comment-prefix'}; | ||||
7923 | $prefix =~ s/^\s*//; | ||||
7924 | my $pattern = $prefix; | ||||
7925 | |||||
7926 | # user may give leading caret to force matching left comments only | ||||
7927 | if ( $prefix !~ /^\^#/ ) { | ||||
7928 | if ( $prefix !~ /^#/ ) { | ||||
7929 | Perl::Tidy::Die | ||||
7930 | "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; | ||||
7931 | } | ||||
7932 | $pattern = '^\s*' . $prefix; | ||||
7933 | } | ||||
7934 | eval "'##'=~/$pattern/"; | ||||
7935 | if ($@) { | ||||
7936 | Perl::Tidy::Die | ||||
7937 | "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; | ||||
7938 | } | ||||
7939 | $static_block_comment_pattern = $pattern; | ||||
7940 | } | ||||
7941 | } | ||||
7942 | |||||
7943 | sub make_format_skipping_pattern { | ||||
7944 | my ( $opt_name, $default ) = @_; | ||||
7945 | my $param = $rOpts->{$opt_name}; | ||||
7946 | unless ($param) { $param = $default } | ||||
7947 | $param =~ s/^\s*//; | ||||
7948 | if ( $param !~ /^#/ ) { | ||||
7949 | Perl::Tidy::Die | ||||
7950 | "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; | ||||
7951 | } | ||||
7952 | my $pattern = '^' . $param . '\s'; | ||||
7953 | eval "'#'=~/$pattern/"; | ||||
7954 | if ($@) { | ||||
7955 | Perl::Tidy::Die | ||||
7956 | "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; | ||||
7957 | } | ||||
7958 | return $pattern; | ||||
7959 | } | ||||
7960 | |||||
7961 | sub make_closing_side_comment_list_pattern { | ||||
7962 | |||||
7963 | # turn any input list into a regex for recognizing selected block types | ||||
7964 | $closing_side_comment_list_pattern = '^\w+'; | ||||
7965 | if ( defined( $rOpts->{'closing-side-comment-list'} ) | ||||
7966 | && $rOpts->{'closing-side-comment-list'} ) | ||||
7967 | { | ||||
7968 | $closing_side_comment_list_pattern = | ||||
7969 | make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); | ||||
7970 | } | ||||
7971 | } | ||||
7972 | |||||
7973 | sub make_bli_pattern { | ||||
7974 | |||||
7975 | if ( defined( $rOpts->{'brace-left-and-indent-list'} ) | ||||
7976 | && $rOpts->{'brace-left-and-indent-list'} ) | ||||
7977 | { | ||||
7978 | $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; | ||||
7979 | } | ||||
7980 | |||||
7981 | $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); | ||||
7982 | } | ||||
7983 | |||||
7984 | sub make_block_brace_vertical_tightness_pattern { | ||||
7985 | |||||
7986 | # turn any input list into a regex for recognizing selected block types | ||||
7987 | $block_brace_vertical_tightness_pattern = | ||||
7988 | '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; | ||||
7989 | if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) | ||||
7990 | && $rOpts->{'block-brace-vertical-tightness-list'} ) | ||||
7991 | { | ||||
7992 | $block_brace_vertical_tightness_pattern = | ||||
7993 | make_block_pattern( '-bbvtl', | ||||
7994 | $rOpts->{'block-brace-vertical-tightness-list'} ); | ||||
7995 | } | ||||
7996 | } | ||||
7997 | |||||
7998 | sub make_block_pattern { | ||||
7999 | |||||
8000 | # given a string of block-type keywords, return a regex to match them | ||||
8001 | # The only tricky part is that labels are indicated with a single ':' | ||||
8002 | # and the 'sub' token text may have additional text after it (name of | ||||
8003 | # sub). | ||||
8004 | # | ||||
8005 | # Example: | ||||
8006 | # | ||||
8007 | # input string: "if else elsif unless while for foreach do : sub"; | ||||
8008 | # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; | ||||
8009 | |||||
8010 | my ( $abbrev, $string ) = @_; | ||||
8011 | my @list = split_words($string); | ||||
8012 | my @words = (); | ||||
8013 | my %seen; | ||||
8014 | for my $i (@list) { | ||||
8015 | if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } | ||||
8016 | next if $seen{$i}; | ||||
8017 | $seen{$i} = 1; | ||||
8018 | if ( $i eq 'sub' ) { | ||||
8019 | } | ||||
8020 | elsif ( $i eq ';' ) { | ||||
8021 | push @words, ';'; | ||||
8022 | } | ||||
8023 | elsif ( $i eq '{' ) { | ||||
8024 | push @words, '\{'; | ||||
8025 | } | ||||
8026 | elsif ( $i eq ':' ) { | ||||
8027 | push @words, '\w+:'; | ||||
8028 | } | ||||
8029 | elsif ( $i =~ /^\w/ ) { | ||||
8030 | push @words, $i; | ||||
8031 | } | ||||
8032 | else { | ||||
8033 | Perl::Tidy::Warn | ||||
8034 | "unrecognized block type $i after $abbrev, ignoring\n"; | ||||
8035 | } | ||||
8036 | } | ||||
8037 | my $pattern = '(' . join( '|', @words ) . ')$'; | ||||
8038 | if ( $seen{'sub'} ) { | ||||
8039 | $pattern = '(' . $pattern . '|sub)'; | ||||
8040 | } | ||||
8041 | $pattern = '^' . $pattern; | ||||
8042 | return $pattern; | ||||
8043 | } | ||||
8044 | |||||
8045 | sub make_static_side_comment_pattern { | ||||
8046 | |||||
8047 | # create the pattern used to identify static side comments | ||||
8048 | $static_side_comment_pattern = '^##'; | ||||
8049 | |||||
8050 | # allow the user to change it | ||||
8051 | if ( $rOpts->{'static-side-comment-prefix'} ) { | ||||
8052 | my $prefix = $rOpts->{'static-side-comment-prefix'}; | ||||
8053 | $prefix =~ s/^\s*//; | ||||
8054 | my $pattern = '^' . $prefix; | ||||
8055 | eval "'##'=~/$pattern/"; | ||||
8056 | if ($@) { | ||||
8057 | Perl::Tidy::Die | ||||
8058 | "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; | ||||
8059 | } | ||||
8060 | $static_side_comment_pattern = $pattern; | ||||
8061 | } | ||||
8062 | } | ||||
8063 | |||||
8064 | sub make_closing_side_comment_prefix { | ||||
8065 | |||||
8066 | # Be sure we have a valid closing side comment prefix | ||||
8067 | my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; | ||||
8068 | my $csc_prefix_pattern; | ||||
8069 | if ( !defined($csc_prefix) ) { | ||||
8070 | $csc_prefix = '## end'; | ||||
8071 | $csc_prefix_pattern = '^##\s+end'; | ||||
8072 | } | ||||
8073 | else { | ||||
8074 | my $test_csc_prefix = $csc_prefix; | ||||
8075 | if ( $test_csc_prefix !~ /^#/ ) { | ||||
8076 | $test_csc_prefix = '#' . $test_csc_prefix; | ||||
8077 | } | ||||
8078 | |||||
8079 | # make a regex to recognize the prefix | ||||
8080 | my $test_csc_prefix_pattern = $test_csc_prefix; | ||||
8081 | |||||
8082 | # escape any special characters | ||||
8083 | $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; | ||||
8084 | |||||
8085 | $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; | ||||
8086 | |||||
8087 | # allow exact number of intermediate spaces to vary | ||||
8088 | $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; | ||||
8089 | |||||
8090 | # make sure we have a good pattern | ||||
8091 | # if we fail this we probably have an error in escaping | ||||
8092 | # characters. | ||||
8093 | eval "'##'=~/$test_csc_prefix_pattern/"; | ||||
8094 | if ($@) { | ||||
8095 | |||||
8096 | # shouldn't happen..must have screwed up escaping, above | ||||
8097 | report_definite_bug(); | ||||
8098 | Perl::Tidy::Warn | ||||
8099 | "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; | ||||
8100 | |||||
8101 | # just warn and keep going with defaults | ||||
8102 | Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n"; | ||||
8103 | Perl::Tidy::Warn | ||||
8104 | "Using default -cscp instead; please check output\n"; | ||||
8105 | } | ||||
8106 | else { | ||||
8107 | $csc_prefix = $test_csc_prefix; | ||||
8108 | $csc_prefix_pattern = $test_csc_prefix_pattern; | ||||
8109 | } | ||||
8110 | } | ||||
8111 | $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; | ||||
8112 | $closing_side_comment_prefix_pattern = $csc_prefix_pattern; | ||||
8113 | } | ||||
8114 | |||||
8115 | sub dump_want_left_space { | ||||
8116 | my $fh = shift; | ||||
8117 | local $" = "\n"; | ||||
8118 | print $fh <<EOM; | ||||
8119 | These values are the main control of whitespace to the left of a token type; | ||||
8120 | They may be altered with the -wls parameter. | ||||
8121 | For a list of token types, use perltidy --dump-token-types (-dtt) | ||||
8122 | 1 means the token wants a space to its left | ||||
8123 | -1 means the token does not want a space to its left | ||||
8124 | ------------------------------------------------------------------------ | ||||
8125 | EOM | ||||
8126 | foreach ( sort keys %want_left_space ) { | ||||
8127 | print $fh "$_\t$want_left_space{$_}\n"; | ||||
8128 | } | ||||
8129 | } | ||||
8130 | |||||
8131 | sub dump_want_right_space { | ||||
8132 | my $fh = shift; | ||||
8133 | local $" = "\n"; | ||||
8134 | print $fh <<EOM; | ||||
8135 | These values are the main control of whitespace to the right of a token type; | ||||
8136 | They may be altered with the -wrs parameter. | ||||
8137 | For a list of token types, use perltidy --dump-token-types (-dtt) | ||||
8138 | 1 means the token wants a space to its right | ||||
8139 | -1 means the token does not want a space to its right | ||||
8140 | ------------------------------------------------------------------------ | ||||
8141 | EOM | ||||
8142 | foreach ( sort keys %want_right_space ) { | ||||
8143 | print $fh "$_\t$want_right_space{$_}\n"; | ||||
8144 | } | ||||
8145 | } | ||||
8146 | |||||
8147 | { # begin is_essential_whitespace | ||||
8148 | |||||
8149 | 2 | 300ns | my %is_sort_grep_map; | ||
8150 | 1 | 200ns | my %is_for_foreach; | ||
8151 | |||||
8152 | # spent 11µs within Perl::Tidy::Formatter::BEGIN@8152 which was called:
# once (11µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 8160 | ||||
8153 | |||||
8154 | 1 | 2µs | @_ = qw(sort grep map); | ||
8155 | 1 | 3µs | @is_sort_grep_map{@_} = (1) x scalar(@_); | ||
8156 | |||||
8157 | 1 | 1µs | @_ = qw(for foreach); | ||
8158 | 1 | 10µs | @is_for_foreach{@_} = (1) x scalar(@_); | ||
8159 | |||||
8160 | 1 | 458µs | 1 | 11µs | } # spent 11µs making 1 call to Perl::Tidy::Formatter::BEGIN@8152 |
8161 | |||||
8162 | sub is_essential_whitespace { | ||||
8163 | |||||
8164 | # Essential whitespace means whitespace which cannot be safely deleted | ||||
8165 | # without risking the introduction of a syntax error. | ||||
8166 | # We are given three tokens and their types: | ||||
8167 | # ($tokenl, $typel) is the token to the left of the space in question | ||||
8168 | # ($tokenr, $typer) is the token to the right of the space in question | ||||
8169 | # ($tokenll, $typell) is previous nonblank token to the left of $tokenl | ||||
8170 | # | ||||
8171 | # This is a slow routine but is not needed too often except when -mangle | ||||
8172 | # is used. | ||||
8173 | # | ||||
8174 | # Note: This routine should almost never need to be changed. It is | ||||
8175 | # for avoiding syntax problems rather than for formatting. | ||||
8176 | my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; | ||||
8177 | |||||
8178 | my $result = | ||||
8179 | |||||
8180 | # never combine two bare words or numbers | ||||
8181 | # examples: and ::ok(1) | ||||
8182 | # return ::spw(...) | ||||
8183 | # for bla::bla:: abc | ||||
8184 | # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl | ||||
8185 | # $input eq"quit" to make $inputeq"quit" | ||||
8186 | # my $size=-s::SINK if $file; <==OK but we won't do it | ||||
8187 | # don't join something like: for bla::bla:: abc | ||||
8188 | # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl | ||||
8189 | ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' ) | ||||
8190 | && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) | ||||
8191 | |||||
8192 | # do not combine a number with a concatenation dot | ||||
8193 | # example: pom.caputo: | ||||
8194 | # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); | ||||
8195 | || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) | ||||
8196 | || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) | ||||
8197 | |||||
8198 | # do not join a minus with a bare word, because you might form | ||||
8199 | # a file test operator. Example from Complex.pm: | ||||
8200 | # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. | ||||
8201 | || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) | ||||
8202 | |||||
8203 | # and something like this could become ambiguous without space | ||||
8204 | # after the '-': | ||||
8205 | # use constant III=>1; | ||||
8206 | # $a = $b - III; | ||||
8207 | # and even this: | ||||
8208 | # $a = - III; | ||||
8209 | || ( ( $tokenl eq '-' ) | ||||
8210 | && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) | ||||
8211 | |||||
8212 | # '= -' should not become =- or you will get a warning | ||||
8213 | # about reversed -= | ||||
8214 | # || ($tokenr eq '-') | ||||
8215 | |||||
8216 | # keep a space between a quote and a bareword to prevent the | ||||
8217 | # bareword from becoming a quote modifier. | ||||
8218 | || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | ||||
8219 | |||||
8220 | # keep a space between a token ending in '$' and any word; | ||||
8221 | # this caused trouble: "die @$ if $@" | ||||
8222 | || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) | ||||
8223 | && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | ||||
8224 | |||||
8225 | # perl is very fussy about spaces before << | ||||
8226 | || ( $tokenr =~ /^\<\</ ) | ||||
8227 | |||||
8228 | # avoid combining tokens to create new meanings. Example: | ||||
8229 | # $a+ +$b must not become $a++$b | ||||
8230 | || ( $is_digraph{ $tokenl . $tokenr } ) | ||||
8231 | || ( $is_trigraph{ $tokenl . $tokenr } ) | ||||
8232 | |||||
8233 | # another example: do not combine these two &'s: | ||||
8234 | # allow_options & &OPT_EXECCGI | ||||
8235 | || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) | ||||
8236 | |||||
8237 | # don't combine $$ or $# with any alphanumeric | ||||
8238 | # (testfile mangle.t with --mangle) | ||||
8239 | || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) | ||||
8240 | |||||
8241 | # retain any space after possible filehandle | ||||
8242 | # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) | ||||
8243 | || ( $typel eq 'Z' ) | ||||
8244 | |||||
8245 | # Perl is sensitive to whitespace after the + here: | ||||
8246 | # $b = xvals $a + 0.1 * yvals $a; | ||||
8247 | || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ ) | ||||
8248 | |||||
8249 | # keep paren separate in 'use Foo::Bar ()' | ||||
8250 | || ( $tokenr eq '(' | ||||
8251 | && $typel eq 'w' | ||||
8252 | && $typell eq 'k' | ||||
8253 | && $tokenll eq 'use' ) | ||||
8254 | |||||
8255 | # keep any space between filehandle and paren: | ||||
8256 | # file mangle.t with --mangle: | ||||
8257 | || ( $typel eq 'Y' && $tokenr eq '(' ) | ||||
8258 | |||||
8259 | # retain any space after here doc operator ( hereerr.t) | ||||
8260 | || ( $typel eq 'h' ) | ||||
8261 | |||||
8262 | # be careful with a space around ++ and --, to avoid ambiguity as to | ||||
8263 | # which token it applies | ||||
8264 | || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) | ||||
8265 | || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) | ||||
8266 | |||||
8267 | # need space after foreach my; for example, this will fail in | ||||
8268 | # older versions of Perl: | ||||
8269 | # foreach my$ft(@filetypes)... | ||||
8270 | || ( | ||||
8271 | $tokenl eq 'my' | ||||
8272 | |||||
8273 | # /^(for|foreach)$/ | ||||
8274 | && $is_for_foreach{$tokenll} | ||||
8275 | && $tokenr =~ /^\$/ | ||||
8276 | ) | ||||
8277 | |||||
8278 | # must have space between grep and left paren; "grep(" will fail | ||||
8279 | || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} ) | ||||
8280 | |||||
8281 | # don't stick numbers next to left parens, as in: | ||||
8282 | #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) | ||||
8283 | || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) | ||||
8284 | |||||
8285 | # We must be sure that a space between a ? and a quoted string | ||||
8286 | # remains if the space before the ? remains. [Loca.pm, lockarea] | ||||
8287 | # ie, | ||||
8288 | # $b=join $comma ? ',' : ':', @_; # ok | ||||
8289 | # $b=join $comma?',' : ':', @_; # ok! | ||||
8290 | # $b=join $comma ?',' : ':', @_; # error! | ||||
8291 | # Not really required: | ||||
8292 | ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) | ||||
8293 | |||||
8294 | # do not remove space between an '&' and a bare word because | ||||
8295 | # it may turn into a function evaluation, like here | ||||
8296 | # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] | ||||
8297 | # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); | ||||
8298 | || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | ||||
8299 | |||||
8300 | # space stacked labels (TODO: check if really necessary) | ||||
8301 | || ( $typel eq 'J' && $typer eq 'J' ) | ||||
8302 | |||||
8303 | ; # the value of this long logic sequence is the result we want | ||||
8304 | return $result; | ||||
8305 | } | ||||
8306 | } | ||||
8307 | |||||
8308 | { | ||||
8309 | 2 | 0s | my %secret_operators; | ||
8310 | 1 | 2µs | my %is_leading_secret_token; | ||
8311 | |||||
8312 | # spent 28µs within Perl::Tidy::Formatter::BEGIN@8312 which was called:
# once (28µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 8337 | ||||
8313 | |||||
8314 | # token lists for perl secret operators as compiled by Philippe Bruhat | ||||
8315 | # at: https://metacpan.org/module/perlsecret | ||||
8316 | 1 | 14µs | %secret_operators = ( | ||
8317 | 'Goatse' => [qw#= ( ) =#], #=( )= | ||||
8318 | 'Venus1' => [qw#0 +#], # 0+ | ||||
8319 | 'Venus2' => [qw#+ 0#], # +0 | ||||
8320 | 'Enterprise' => [qw#) x ! !#], # ()x!! | ||||
8321 | 'Kite1' => [qw#~ ~ <>#], # ~~<> | ||||
8322 | 'Kite2' => [qw#~~ <>#], # ~~<> | ||||
8323 | 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> | ||||
8324 | ); | ||||
8325 | |||||
8326 | # The following operators and constants are not included because they | ||||
8327 | # are normally kept tight by perltidy: | ||||
8328 | # !! ~~ <~> | ||||
8329 | # | ||||
8330 | |||||
8331 | # Make a lookup table indexed by the first token of each operator: | ||||
8332 | # first token => [list, list, ...] | ||||
8333 | 1 | 9µs | foreach my $value ( values(%secret_operators) ) { | ||
8334 | 7 | 1µs | my $tok = $value->[0]; | ||
8335 | 7 | 5µs | push @{ $is_leading_secret_token{$tok} }, $value; | ||
8336 | } | ||||
8337 | 1 | 656µs | 1 | 28µs | } # spent 28µs making 1 call to Perl::Tidy::Formatter::BEGIN@8312 |
8338 | |||||
8339 | sub secret_operator_whitespace { | ||||
8340 | |||||
8341 | my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_; | ||||
8342 | |||||
8343 | # Loop over all tokens in this line | ||||
8344 | my ( $j, $token, $type ); | ||||
8345 | for ( $j = 0 ; $j <= $jmax ; $j++ ) { | ||||
8346 | |||||
8347 | $token = $$rtokens[$j]; | ||||
8348 | $type = $$rtoken_type[$j]; | ||||
8349 | |||||
8350 | # Skip unless this token might start a secret operator | ||||
8351 | next if ( $type eq 'b' ); | ||||
8352 | next unless ( $is_leading_secret_token{$token} ); | ||||
8353 | |||||
8354 | # Loop over all secret operators with this leading token | ||||
8355 | foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { | ||||
8356 | my $jend = $j - 1; | ||||
8357 | foreach my $tok ( @{$rpattern} ) { | ||||
8358 | $jend++; | ||||
8359 | $jend++ | ||||
8360 | |||||
8361 | if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' ); | ||||
8362 | if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) { | ||||
8363 | $jend = undef; | ||||
8364 | last; | ||||
8365 | } | ||||
8366 | } | ||||
8367 | |||||
8368 | if ($jend) { | ||||
8369 | |||||
8370 | # set flags to prevent spaces within this operator | ||||
8371 | for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) { | ||||
8372 | $rwhite_space_flag->[$jj] = WS_NO; | ||||
8373 | } | ||||
8374 | $j = $jend; | ||||
8375 | last; | ||||
8376 | } | ||||
8377 | } ## End Loop over all operators | ||||
8378 | } ## End loop over all tokens | ||||
8379 | } # End sub | ||||
8380 | } | ||||
8381 | |||||
8382 | sub set_white_space_flag { | ||||
8383 | |||||
8384 | # This routine examines each pair of nonblank tokens and | ||||
8385 | # sets values for array @white_space_flag. | ||||
8386 | # | ||||
8387 | # $white_space_flag[$j] is a flag indicating whether a white space | ||||
8388 | # BEFORE token $j is needed, with the following values: | ||||
8389 | # | ||||
8390 | # WS_NO = -1 do not want a space before token $j | ||||
8391 | # WS_OPTIONAL= 0 optional space or $j is a whitespace | ||||
8392 | # WS_YES = 1 want a space before token $j | ||||
8393 | # | ||||
8394 | # | ||||
8395 | # The values for the first token will be defined based | ||||
8396 | # upon the contents of the "to_go" output array. | ||||
8397 | # | ||||
8398 | # Note: retain debug print statements because they are usually | ||||
8399 | # required after adding new token types. | ||||
8400 | |||||
8401 | # spent 90µs within Perl::Tidy::Formatter::BEGIN@8401 which was called:
# once (90µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 8522 | ||||
8402 | |||||
8403 | # initialize these global hashes, which control the use of | ||||
8404 | # whitespace around tokens: | ||||
8405 | # | ||||
8406 | # %binary_ws_rules | ||||
8407 | # %want_left_space | ||||
8408 | # %want_right_space | ||||
8409 | # %space_after_keyword | ||||
8410 | # | ||||
8411 | # Many token types are identical to the tokens themselves. | ||||
8412 | # See the tokenizer for a complete list. Here are some special types: | ||||
8413 | # k = perl keyword | ||||
8414 | # f = semicolon in for statement | ||||
8415 | # m = unary minus | ||||
8416 | # p = unary plus | ||||
8417 | # Note that :: is excluded since it should be contained in an identifier | ||||
8418 | # Note that '->' is excluded because it never gets space | ||||
8419 | # parentheses and brackets are excluded since they are handled specially | ||||
8420 | # curly braces are included but may be overridden by logic, such as | ||||
8421 | # newline logic. | ||||
8422 | |||||
8423 | # NEW_TOKENS: create a whitespace rule here. This can be as | ||||
8424 | # simple as adding your new letter to @spaces_both_sides, for | ||||
8425 | # example. | ||||
8426 | |||||
8427 | 1 | 2µs | @_ = qw" L { ( [ "; | ||
8428 | 1 | 2µs | @is_opening_type{@_} = (1) x scalar(@_); | ||
8429 | |||||
8430 | 1 | 2µs | @_ = qw" R } ) ] "; | ||
8431 | 1 | 800ns | @is_closing_type{@_} = (1) x scalar(@_); | ||
8432 | |||||
8433 | 1 | 13µs | my @spaces_both_sides = qw" | ||
8434 | + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= | ||||
8435 | .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ | ||||
8436 | &&= ||= //= <=> A k f w F n C Y U G v | ||||
8437 | "; | ||||
8438 | |||||
8439 | 1 | 2µs | my @spaces_left_side = qw" | ||
8440 | t ! ~ m p { \ h pp mm Z j | ||||
8441 | "; | ||||
8442 | 1 | 600ns | push( @spaces_left_side, '#' ); # avoids warning message | ||
8443 | |||||
8444 | 1 | 1µs | my @spaces_right_side = qw" | ||
8445 | ; } ) ] R J ++ -- **= | ||||
8446 | "; | ||||
8447 | 1 | 200ns | push( @spaces_right_side, ',' ); # avoids warning message | ||
8448 | |||||
8449 | # Note that we are in a BEGIN block here. Later in processing | ||||
8450 | # the values of %want_left_space and %want_right_space | ||||
8451 | # may be overridden by any user settings specified by the | ||||
8452 | # -wls and -wrs parameters. However the binary_whitespace_rules | ||||
8453 | # are hardwired and have priority. | ||||
8454 | 1 | 21µs | @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); | ||
8455 | 1 | 9µs | @want_right_space{@spaces_both_sides} = | ||
8456 | (1) x scalar(@spaces_both_sides); | ||||
8457 | 1 | 5µs | @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); | ||
8458 | 1 | 3µs | @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); | ||
8459 | 1 | 2µs | @want_left_space{@spaces_right_side} = | ||
8460 | (-1) x scalar(@spaces_right_side); | ||||
8461 | 1 | 1µs | @want_right_space{@spaces_right_side} = | ||
8462 | (1) x scalar(@spaces_right_side); | ||||
8463 | 1 | 300ns | $want_left_space{'->'} = WS_NO; | ||
8464 | 1 | 100ns | $want_right_space{'->'} = WS_NO; | ||
8465 | 1 | 100ns | $want_left_space{'**'} = WS_NO; | ||
8466 | 1 | 100ns | $want_right_space{'**'} = WS_NO; | ||
8467 | 1 | 100ns | $want_right_space{'CORE::'} = WS_NO; | ||
8468 | |||||
8469 | # These binary_ws_rules are hardwired and have priority over the above | ||||
8470 | # settings. It would be nice to allow adjustment by the user, | ||||
8471 | # but it would be complicated to specify. | ||||
8472 | # | ||||
8473 | # hash type information must stay tightly bound | ||||
8474 | # as in : ${xxxx} | ||||
8475 | 1 | 3µs | $binary_ws_rules{'i'}{'L'} = WS_NO; | ||
8476 | 1 | 300ns | $binary_ws_rules{'i'}{'{'} = WS_YES; | ||
8477 | 1 | 400ns | $binary_ws_rules{'k'}{'{'} = WS_YES; | ||
8478 | 1 | 300ns | $binary_ws_rules{'U'}{'{'} = WS_YES; | ||
8479 | 1 | 200ns | $binary_ws_rules{'i'}{'['} = WS_NO; | ||
8480 | 1 | 300ns | $binary_ws_rules{'R'}{'L'} = WS_NO; | ||
8481 | 1 | 200ns | $binary_ws_rules{'R'}{'{'} = WS_NO; | ||
8482 | 1 | 300ns | $binary_ws_rules{'t'}{'L'} = WS_NO; | ||
8483 | 1 | 100ns | $binary_ws_rules{'t'}{'{'} = WS_NO; | ||
8484 | 1 | 200ns | $binary_ws_rules{'}'}{'L'} = WS_NO; | ||
8485 | 1 | 200ns | $binary_ws_rules{'}'}{'{'} = WS_NO; | ||
8486 | 1 | 300ns | $binary_ws_rules{'$'}{'L'} = WS_NO; | ||
8487 | 1 | 200ns | $binary_ws_rules{'$'}{'{'} = WS_NO; | ||
8488 | 1 | 700ns | $binary_ws_rules{'@'}{'L'} = WS_NO; | ||
8489 | 1 | 100ns | $binary_ws_rules{'@'}{'{'} = WS_NO; | ||
8490 | 1 | 300ns | $binary_ws_rules{'='}{'L'} = WS_YES; | ||
8491 | 1 | 300ns | $binary_ws_rules{'J'}{'J'} = WS_YES; | ||
8492 | |||||
8493 | # the following includes ') {' | ||||
8494 | # as in : if ( xxx ) { yyy } | ||||
8495 | 1 | 300ns | $binary_ws_rules{']'}{'L'} = WS_NO; | ||
8496 | 1 | 100ns | $binary_ws_rules{']'}{'{'} = WS_NO; | ||
8497 | 1 | 300ns | $binary_ws_rules{')'}{'{'} = WS_YES; | ||
8498 | 1 | 100ns | $binary_ws_rules{')'}{'['} = WS_NO; | ||
8499 | 1 | 100ns | $binary_ws_rules{']'}{'['} = WS_NO; | ||
8500 | 1 | 100ns | $binary_ws_rules{']'}{'{'} = WS_NO; | ||
8501 | 1 | 100ns | $binary_ws_rules{'}'}{'['} = WS_NO; | ||
8502 | 1 | 100ns | $binary_ws_rules{'R'}{'['} = WS_NO; | ||
8503 | |||||
8504 | 1 | 100ns | $binary_ws_rules{']'}{'++'} = WS_NO; | ||
8505 | 1 | 100ns | $binary_ws_rules{']'}{'--'} = WS_NO; | ||
8506 | 1 | 100ns | $binary_ws_rules{')'}{'++'} = WS_NO; | ||
8507 | 1 | 100ns | $binary_ws_rules{')'}{'--'} = WS_NO; | ||
8508 | |||||
8509 | 1 | 100ns | $binary_ws_rules{'R'}{'++'} = WS_NO; | ||
8510 | 1 | 100ns | $binary_ws_rules{'R'}{'--'} = WS_NO; | ||
8511 | |||||
8512 | 1 | 200ns | $binary_ws_rules{'i'}{'Q'} = WS_YES; | ||
8513 | 1 | 300ns | $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' | ||
8514 | |||||
8515 | # FIXME: we could to split 'i' into variables and functions | ||||
8516 | # and have no space for functions but space for variables. For now, | ||||
8517 | # I have a special patch in the special rules below | ||||
8518 | 1 | 100ns | $binary_ws_rules{'i'}{'('} = WS_NO; | ||
8519 | |||||
8520 | 1 | 300ns | $binary_ws_rules{'w'}{'('} = WS_NO; | ||
8521 | 1 | 14µs | $binary_ws_rules{'w'}{'{'} = WS_YES; | ||
8522 | 1 | 5.94ms | 1 | 90µs | } ## end BEGIN block # spent 90µs making 1 call to Perl::Tidy::Formatter::BEGIN@8401 |
8523 | |||||
8524 | my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; | ||||
8525 | my ( $last_token, $last_type, $last_block_type, $token, $type, | ||||
8526 | $block_type ); | ||||
8527 | my (@white_space_flag); | ||||
8528 | my $j_tight_closing_paren = -1; | ||||
8529 | |||||
8530 | if ( $max_index_to_go >= 0 ) { | ||||
8531 | $token = $tokens_to_go[$max_index_to_go]; | ||||
8532 | $type = $types_to_go[$max_index_to_go]; | ||||
8533 | $block_type = $block_type_to_go[$max_index_to_go]; | ||||
8534 | |||||
8535 | #--------------------------------------------------------------- | ||||
8536 | # Patch due to splitting of tokens with leading -> | ||||
8537 | #--------------------------------------------------------------- | ||||
8538 | # | ||||
8539 | # This routine is dealing with the raw tokens from the tokenizer, | ||||
8540 | # but to get started it needs the previous token, which will | ||||
8541 | # have been stored in the '_to_go' arrays. | ||||
8542 | # | ||||
8543 | # This patch avoids requiring two iterations to | ||||
8544 | # converge for cases such as the following, where a paren | ||||
8545 | # comes in on a line following a variable with leading arrow: | ||||
8546 | # $self->{main}->add_content_defer_opening | ||||
8547 | # ($name, $wmkf, $self->{attrs}, $self); | ||||
8548 | # In this case when we see the opening paren on line 2 we need | ||||
8549 | # to know if the last token on the previous line had an arrow, | ||||
8550 | # but it has already been split off so we have to add it back | ||||
8551 | # in to avoid getting an unwanted space before the paren. | ||||
8552 | if ( $type =~ /^[wi]$/ ) { | ||||
8553 | my $im = $iprev_to_go[$max_index_to_go]; | ||||
8554 | my $tm = ( $im >= 0 ) ? $types_to_go[$im] : ""; | ||||
8555 | if ( $tm eq '->' ) { $token = $tm . $token } | ||||
8556 | } | ||||
8557 | |||||
8558 | #--------------------------------------------------------------- | ||||
8559 | # End patch due to splitting of tokens with leading -> | ||||
8560 | #--------------------------------------------------------------- | ||||
8561 | } | ||||
8562 | else { | ||||
8563 | $token = ' '; | ||||
8564 | $type = 'b'; | ||||
8565 | $block_type = ''; | ||||
8566 | } | ||||
8567 | |||||
8568 | my ( $j, $ws ); | ||||
8569 | |||||
8570 | # main loop over all tokens to define the whitespace flags | ||||
8571 | for ( $j = 0 ; $j <= $jmax ; $j++ ) { | ||||
8572 | |||||
8573 | if ( $$rtoken_type[$j] eq 'b' ) { | ||||
8574 | $white_space_flag[$j] = WS_OPTIONAL; | ||||
8575 | next; | ||||
8576 | } | ||||
8577 | |||||
8578 | # set a default value, to be changed as needed | ||||
8579 | $ws = undef; | ||||
8580 | $last_token = $token; | ||||
8581 | $last_type = $type; | ||||
8582 | $last_block_type = $block_type; | ||||
8583 | $token = $$rtokens[$j]; | ||||
8584 | $type = $$rtoken_type[$j]; | ||||
8585 | $block_type = $$rblock_type[$j]; | ||||
8586 | |||||
8587 | #--------------------------------------------------------------- | ||||
8588 | # Whitespace Rules Section 1: | ||||
8589 | # Handle space on the inside of opening braces. | ||||
8590 | #--------------------------------------------------------------- | ||||
8591 | |||||
8592 | # /^[L\{\(\[]$/ | ||||
8593 | if ( $is_opening_type{$last_type} ) { | ||||
8594 | |||||
8595 | $j_tight_closing_paren = -1; | ||||
8596 | |||||
8597 | # let's keep empty matched braces together: () {} [] | ||||
8598 | # except for BLOCKS | ||||
8599 | if ( $token eq $matching_token{$last_token} ) { | ||||
8600 | if ($block_type) { | ||||
8601 | $ws = WS_YES; | ||||
8602 | } | ||||
8603 | else { | ||||
8604 | $ws = WS_NO; | ||||
8605 | } | ||||
8606 | } | ||||
8607 | else { | ||||
8608 | |||||
8609 | # we're considering the right of an opening brace | ||||
8610 | # tightness = 0 means always pad inside with space | ||||
8611 | # tightness = 1 means pad inside if "complex" | ||||
8612 | # tightness = 2 means never pad inside with space | ||||
8613 | |||||
8614 | my $tightness; | ||||
8615 | if ( $last_type eq '{' | ||||
8616 | && $last_token eq '{' | ||||
8617 | && $last_block_type ) | ||||
8618 | { | ||||
8619 | $tightness = $rOpts_block_brace_tightness; | ||||
8620 | } | ||||
8621 | else { $tightness = $tightness{$last_token} } | ||||
8622 | |||||
8623 | #============================================================= | ||||
8624 | # Patch for test problem fabrice_bug.pl | ||||
8625 | # We must always avoid spaces around a bare word beginning | ||||
8626 | # with ^ as in: | ||||
8627 | # my $before = ${^PREMATCH}; | ||||
8628 | # Because all of the following cause an error in perl: | ||||
8629 | # my $before = ${ ^PREMATCH }; | ||||
8630 | # my $before = ${ ^PREMATCH}; | ||||
8631 | # my $before = ${^PREMATCH }; | ||||
8632 | # So if brace tightness flag is -bt=0 we must temporarily reset | ||||
8633 | # to bt=1. Note that here we must set tightness=1 and not 2 so | ||||
8634 | # that the closing space | ||||
8635 | # is also avoided (via the $j_tight_closing_paren flag in coding) | ||||
8636 | if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } | ||||
8637 | |||||
8638 | #============================================================= | ||||
8639 | |||||
8640 | if ( $tightness <= 0 ) { | ||||
8641 | $ws = WS_YES; | ||||
8642 | } | ||||
8643 | elsif ( $tightness > 1 ) { | ||||
8644 | $ws = WS_NO; | ||||
8645 | } | ||||
8646 | else { | ||||
8647 | |||||
8648 | # Patch to count '-foo' as single token so that | ||||
8649 | # each of $a{-foo} and $a{foo} and $a{'foo'} do | ||||
8650 | # not get spaces with default formatting. | ||||
8651 | my $j_here = $j; | ||||
8652 | ++$j_here | ||||
8653 | if ( $token eq '-' | ||||
8654 | && $last_token eq '{' | ||||
8655 | && $$rtoken_type[ $j + 1 ] eq 'w' ); | ||||
8656 | |||||
8657 | # $j_next is where a closing token should be if | ||||
8658 | # the container has a single token | ||||
8659 | my $j_next = | ||||
8660 | ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) | ||||
8661 | ? $j_here + 2 | ||||
8662 | : $j_here + 1; | ||||
8663 | my $tok_next = $$rtokens[$j_next]; | ||||
8664 | my $type_next = $$rtoken_type[$j_next]; | ||||
8665 | |||||
8666 | # for tightness = 1, if there is just one token | ||||
8667 | # within the matching pair, we will keep it tight | ||||
8668 | if ( | ||||
8669 | $tok_next eq $matching_token{$last_token} | ||||
8670 | |||||
8671 | # but watch out for this: [ [ ] (misc.t) | ||||
8672 | && $last_token ne $token | ||||
8673 | ) | ||||
8674 | { | ||||
8675 | |||||
8676 | # remember where to put the space for the closing paren | ||||
8677 | $j_tight_closing_paren = $j_next; | ||||
8678 | $ws = WS_NO; | ||||
8679 | } | ||||
8680 | else { | ||||
8681 | $ws = WS_YES; | ||||
8682 | } | ||||
8683 | } | ||||
8684 | } | ||||
8685 | } # end setting space flag inside opening tokens | ||||
8686 | my $ws_1 = $ws | ||||
8687 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
8688 | |||||
8689 | #--------------------------------------------------------------- | ||||
8690 | # Whitespace Rules Section 2: | ||||
8691 | # Handle space on inside of closing brace pairs. | ||||
8692 | #--------------------------------------------------------------- | ||||
8693 | |||||
8694 | # /[\}\)\]R]/ | ||||
8695 | if ( $is_closing_type{$type} ) { | ||||
8696 | |||||
8697 | if ( $j == $j_tight_closing_paren ) { | ||||
8698 | |||||
8699 | $j_tight_closing_paren = -1; | ||||
8700 | $ws = WS_NO; | ||||
8701 | } | ||||
8702 | else { | ||||
8703 | |||||
8704 | if ( !defined($ws) ) { | ||||
8705 | |||||
8706 | my $tightness; | ||||
8707 | if ( $type eq '}' && $token eq '}' && $block_type ) { | ||||
8708 | $tightness = $rOpts_block_brace_tightness; | ||||
8709 | } | ||||
8710 | else { $tightness = $tightness{$token} } | ||||
8711 | |||||
8712 | $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; | ||||
8713 | } | ||||
8714 | } | ||||
8715 | } # end setting space flag inside closing tokens | ||||
8716 | |||||
8717 | my $ws_2 = $ws | ||||
8718 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
8719 | |||||
8720 | #--------------------------------------------------------------- | ||||
8721 | # Whitespace Rules Section 3: | ||||
8722 | # Use the binary rule table. | ||||
8723 | #--------------------------------------------------------------- | ||||
8724 | if ( !defined($ws) ) { | ||||
8725 | $ws = $binary_ws_rules{$last_type}{$type}; | ||||
8726 | } | ||||
8727 | my $ws_3 = $ws | ||||
8728 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
8729 | |||||
8730 | #--------------------------------------------------------------- | ||||
8731 | # Whitespace Rules Section 4: | ||||
8732 | # Handle some special cases. | ||||
8733 | #--------------------------------------------------------------- | ||||
8734 | if ( $token eq '(' ) { | ||||
8735 | |||||
8736 | # This will have to be tweaked as tokenization changes. | ||||
8737 | # We usually want a space at '} (', for example: | ||||
8738 | # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); | ||||
8739 | # | ||||
8740 | # But not others: | ||||
8741 | # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); | ||||
8742 | # At present, the above & block is marked as type L/R so this case | ||||
8743 | # won't go through here. | ||||
8744 | if ( $last_type eq '}' ) { $ws = WS_YES } | ||||
8745 | |||||
8746 | # NOTE: some older versions of Perl had occasional problems if | ||||
8747 | # spaces are introduced between keywords or functions and opening | ||||
8748 | # parens. So the default is not to do this except is certain | ||||
8749 | # cases. The current Perl seems to tolerate spaces. | ||||
8750 | |||||
8751 | # Space between keyword and '(' | ||||
8752 | elsif ( $last_type eq 'k' ) { | ||||
8753 | $ws = WS_NO | ||||
8754 | unless ( $rOpts_space_keyword_paren | ||||
8755 | || $space_after_keyword{$last_token} ); | ||||
8756 | } | ||||
8757 | |||||
8758 | # Space between function and '(' | ||||
8759 | # ----------------------------------------------------- | ||||
8760 | # 'w' and 'i' checks for something like: | ||||
8761 | # myfun( &myfun( ->myfun( | ||||
8762 | # ----------------------------------------------------- | ||||
8763 | elsif (( $last_type =~ /^[wUG]$/ ) | ||||
8764 | || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) | ||||
8765 | { | ||||
8766 | $ws = WS_NO unless ($rOpts_space_function_paren); | ||||
8767 | } | ||||
8768 | |||||
8769 | # space between something like $i and ( in | ||||
8770 | # for $i ( 0 .. 20 ) { | ||||
8771 | # FIXME: eventually, type 'i' needs to be split into multiple | ||||
8772 | # token types so this can be a hardwired rule. | ||||
8773 | elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { | ||||
8774 | $ws = WS_YES; | ||||
8775 | } | ||||
8776 | |||||
8777 | # allow constant function followed by '()' to retain no space | ||||
8778 | elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { | ||||
8779 | $ws = WS_NO; | ||||
8780 | } | ||||
8781 | } | ||||
8782 | |||||
8783 | # patch for SWITCH/CASE: make space at ']{' optional | ||||
8784 | # since the '{' might begin a case or when block | ||||
8785 | elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { | ||||
8786 | $ws = WS_OPTIONAL; | ||||
8787 | } | ||||
8788 | |||||
8789 | # keep space between 'sub' and '{' for anonymous sub definition | ||||
8790 | if ( $type eq '{' ) { | ||||
8791 | if ( $last_token eq 'sub' ) { | ||||
8792 | $ws = WS_YES; | ||||
8793 | } | ||||
8794 | |||||
8795 | # this is needed to avoid no space in '){' | ||||
8796 | if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } | ||||
8797 | |||||
8798 | # avoid any space before the brace or bracket in something like | ||||
8799 | # @opts{'a','b',...} | ||||
8800 | if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { | ||||
8801 | $ws = WS_NO; | ||||
8802 | } | ||||
8803 | } | ||||
8804 | |||||
8805 | elsif ( $type eq 'i' ) { | ||||
8806 | |||||
8807 | # never a space before -> | ||||
8808 | if ( $token =~ /^\-\>/ ) { | ||||
8809 | $ws = WS_NO; | ||||
8810 | } | ||||
8811 | } | ||||
8812 | |||||
8813 | # retain any space between '-' and bare word | ||||
8814 | elsif ( $type eq 'w' || $type eq 'C' ) { | ||||
8815 | $ws = WS_OPTIONAL if $last_type eq '-'; | ||||
8816 | |||||
8817 | # never a space before -> | ||||
8818 | if ( $token =~ /^\-\>/ ) { | ||||
8819 | $ws = WS_NO; | ||||
8820 | } | ||||
8821 | } | ||||
8822 | |||||
8823 | # retain any space between '-' and bare word | ||||
8824 | # example: avoid space between 'USER' and '-' here: | ||||
8825 | # $myhash{USER-NAME}='steve'; | ||||
8826 | elsif ( $type eq 'm' || $type eq '-' ) { | ||||
8827 | $ws = WS_OPTIONAL if ( $last_type eq 'w' ); | ||||
8828 | } | ||||
8829 | |||||
8830 | # always space before side comment | ||||
8831 | elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } | ||||
8832 | |||||
8833 | # always preserver whatever space was used after a possible | ||||
8834 | # filehandle (except _) or here doc operator | ||||
8835 | if ( | ||||
8836 | $type ne '#' | ||||
8837 | && ( ( $last_type eq 'Z' && $last_token ne '_' ) | ||||
8838 | || $last_type eq 'h' ) | ||||
8839 | ) | ||||
8840 | { | ||||
8841 | $ws = WS_OPTIONAL; | ||||
8842 | } | ||||
8843 | |||||
8844 | my $ws_4 = $ws | ||||
8845 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
8846 | |||||
8847 | #--------------------------------------------------------------- | ||||
8848 | # Whitespace Rules Section 5: | ||||
8849 | # Apply default rules not covered above. | ||||
8850 | #--------------------------------------------------------------- | ||||
8851 | |||||
8852 | # If we fall through to here, look at the pre-defined hash tables for | ||||
8853 | # the two tokens, and: | ||||
8854 | # if (they are equal) use the common value | ||||
8855 | # if (either is zero or undef) use the other | ||||
8856 | # if (either is -1) use it | ||||
8857 | # That is, | ||||
8858 | # left vs right | ||||
8859 | # 1 vs 1 --> 1 | ||||
8860 | # 0 vs 0 --> 0 | ||||
8861 | # -1 vs -1 --> -1 | ||||
8862 | # | ||||
8863 | # 0 vs -1 --> -1 | ||||
8864 | # 0 vs 1 --> 1 | ||||
8865 | # 1 vs 0 --> 1 | ||||
8866 | # -1 vs 0 --> -1 | ||||
8867 | # | ||||
8868 | # -1 vs 1 --> -1 | ||||
8869 | # 1 vs -1 --> -1 | ||||
8870 | if ( !defined($ws) ) { | ||||
8871 | my $wl = $want_left_space{$type}; | ||||
8872 | my $wr = $want_right_space{$last_type}; | ||||
8873 | if ( !defined($wl) ) { $wl = 0 } | ||||
8874 | if ( !defined($wr) ) { $wr = 0 } | ||||
8875 | $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; | ||||
8876 | } | ||||
8877 | |||||
8878 | if ( !defined($ws) ) { | ||||
8879 | $ws = 0; | ||||
8880 | write_diagnostics( | ||||
8881 | "WS flag is undefined for tokens $last_token $token\n"); | ||||
8882 | } | ||||
8883 | |||||
8884 | # Treat newline as a whitespace. Otherwise, we might combine | ||||
8885 | # 'Send' and '-recipients' here according to the above rules: | ||||
8886 | # my $msg = new Fax::Send | ||||
8887 | # -recipients => $to, | ||||
8888 | # -data => $data; | ||||
8889 | if ( $ws == 0 && $j == 0 ) { $ws = 1 } | ||||
8890 | |||||
8891 | if ( ( $ws == 0 ) | ||||
8892 | && $j > 0 | ||||
8893 | && $j < $jmax | ||||
8894 | && ( $last_type !~ /^[Zh]$/ ) ) | ||||
8895 | { | ||||
8896 | |||||
8897 | # If this happens, we have a non-fatal but undesirable | ||||
8898 | # hole in the above rules which should be patched. | ||||
8899 | write_diagnostics( | ||||
8900 | "WS flag is zero for tokens $last_token $token\n"); | ||||
8901 | } | ||||
8902 | $white_space_flag[$j] = $ws; | ||||
8903 | |||||
8904 | FORMATTER_DEBUG_FLAG_WHITE && do { | ||||
8905 | my $str = substr( $last_token, 0, 15 ); | ||||
8906 | $str .= ' ' x ( 16 - length($str) ); | ||||
8907 | if ( !defined($ws_1) ) { $ws_1 = "*" } | ||||
8908 | if ( !defined($ws_2) ) { $ws_2 = "*" } | ||||
8909 | if ( !defined($ws_3) ) { $ws_3 = "*" } | ||||
8910 | if ( !defined($ws_4) ) { $ws_4 = "*" } | ||||
8911 | print STDOUT | ||||
8912 | "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; | ||||
8913 | }; | ||||
8914 | } ## end main loop | ||||
8915 | |||||
8916 | if ($rOpts_tight_secret_operators) { | ||||
8917 | secret_operator_whitespace( $jmax, $rtokens, $rtoken_type, | ||||
8918 | \@white_space_flag ); | ||||
8919 | } | ||||
8920 | |||||
8921 | return \@white_space_flag; | ||||
8922 | } ## end sub set_white_space_flag | ||||
8923 | |||||
8924 | { # begin print_line_of_tokens | ||||
8925 | |||||
8926 | 2 | 0s | my $rtoken_type; | ||
8927 | 1 | 0s | my $rtokens; | ||
8928 | 1 | 100ns | my $rlevels; | ||
8929 | 1 | 0s | my $rslevels; | ||
8930 | 1 | 100ns | my $rblock_type; | ||
8931 | 1 | 0s | my $rcontainer_type; | ||
8932 | 1 | 0s | my $rcontainer_environment; | ||
8933 | 1 | 0s | my $rtype_sequence; | ||
8934 | 1 | 0s | my $input_line; | ||
8935 | 1 | 100ns | my $rnesting_tokens; | ||
8936 | 1 | 100ns | my $rci_levels; | ||
8937 | 1 | 100ns | my $rnesting_blocks; | ||
8938 | |||||
8939 | 1 | 100ns | my $in_quote; | ||
8940 | 1 | 0s | my $guessed_indentation_level; | ||
8941 | |||||
8942 | # These local token variables are stored by store_token_to_go: | ||||
8943 | 1 | 0s | my $block_type; | ||
8944 | 1 | 0s | my $ci_level; | ||
8945 | 1 | 0s | my $container_environment; | ||
8946 | 1 | 0s | my $container_type; | ||
8947 | 1 | 0s | my $in_continued_quote; | ||
8948 | 1 | 0s | my $level; | ||
8949 | 1 | 0s | my $nesting_blocks; | ||
8950 | 1 | 0s | my $no_internal_newlines; | ||
8951 | 1 | 0s | my $slevel; | ||
8952 | 1 | 0s | my $token; | ||
8953 | 1 | 0s | my $type; | ||
8954 | 1 | 0s | my $type_sequence; | ||
8955 | |||||
8956 | # routine to pull the jth token from the line of tokens | ||||
8957 | sub extract_token { | ||||
8958 | my $j = shift; | ||||
8959 | $token = $$rtokens[$j]; | ||||
8960 | $type = $$rtoken_type[$j]; | ||||
8961 | $block_type = $$rblock_type[$j]; | ||||
8962 | $container_type = $$rcontainer_type[$j]; | ||||
8963 | $container_environment = $$rcontainer_environment[$j]; | ||||
8964 | $type_sequence = $$rtype_sequence[$j]; | ||||
8965 | $level = $$rlevels[$j]; | ||||
8966 | $slevel = $$rslevels[$j]; | ||||
8967 | $nesting_blocks = $$rnesting_blocks[$j]; | ||||
8968 | $ci_level = $$rci_levels[$j]; | ||||
8969 | } | ||||
8970 | |||||
8971 | { | ||||
8972 | 2 | 600ns | my @saved_token; | ||
8973 | |||||
8974 | sub save_current_token { | ||||
8975 | |||||
8976 | @saved_token = ( | ||||
8977 | $block_type, $ci_level, | ||||
8978 | $container_environment, $container_type, | ||||
8979 | $in_continued_quote, $level, | ||||
8980 | $nesting_blocks, $no_internal_newlines, | ||||
8981 | $slevel, $token, | ||||
8982 | $type, $type_sequence, | ||||
8983 | ); | ||||
8984 | } | ||||
8985 | |||||
8986 | sub restore_current_token { | ||||
8987 | ( | ||||
8988 | $block_type, $ci_level, | ||||
8989 | $container_environment, $container_type, | ||||
8990 | $in_continued_quote, $level, | ||||
8991 | $nesting_blocks, $no_internal_newlines, | ||||
8992 | $slevel, $token, | ||||
8993 | $type, $type_sequence, | ||||
8994 | ) = @saved_token; | ||||
8995 | } | ||||
8996 | } | ||||
8997 | |||||
8998 | sub token_length { | ||||
8999 | |||||
9000 | # Returns the length of a token, given: | ||||
9001 | # $token=text of the token | ||||
9002 | # $type = type | ||||
9003 | # $not_first_token = should be TRUE if this is not the first token of | ||||
9004 | # the line. It might the index of this token in an array. It is | ||||
9005 | # used to test for a side comment vs a block comment. | ||||
9006 | # Note: Eventually this should be the only routine determining the | ||||
9007 | # length of a token in this package. | ||||
9008 | my ( $token, $type, $not_first_token ) = @_; | ||||
9009 | my $token_length = length($token); | ||||
9010 | |||||
9011 | # We mark lengths of side comments as just 1 if we are | ||||
9012 | # ignoring their lengths when setting line breaks. | ||||
9013 | $token_length = 1 | ||||
9014 | if ( $rOpts_ignore_side_comment_lengths | ||||
9015 | && $not_first_token | ||||
9016 | && $type eq '#' ); | ||||
9017 | return $token_length; | ||||
9018 | } | ||||
9019 | |||||
9020 | sub rtoken_length { | ||||
9021 | |||||
9022 | # return length of ith token in @{$rtokens} | ||||
9023 | my ($i) = @_; | ||||
9024 | return token_length( $$rtokens[$i], $$rtoken_type[$i], $i ); | ||||
9025 | } | ||||
9026 | |||||
9027 | # Routine to place the current token into the output stream. | ||||
9028 | # Called once per output token. | ||||
9029 | sub store_token_to_go { | ||||
9030 | |||||
9031 | my $flag = $no_internal_newlines; | ||||
9032 | if ( $_[0] ) { $flag = 1 } | ||||
9033 | |||||
9034 | $tokens_to_go[ ++$max_index_to_go ] = $token; | ||||
9035 | $types_to_go[$max_index_to_go] = $type; | ||||
9036 | $nobreak_to_go[$max_index_to_go] = $flag; | ||||
9037 | $old_breakpoint_to_go[$max_index_to_go] = 0; | ||||
9038 | $forced_breakpoint_to_go[$max_index_to_go] = 0; | ||||
9039 | $block_type_to_go[$max_index_to_go] = $block_type; | ||||
9040 | $type_sequence_to_go[$max_index_to_go] = $type_sequence; | ||||
9041 | $container_environment_to_go[$max_index_to_go] = $container_environment; | ||||
9042 | $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; | ||||
9043 | $ci_levels_to_go[$max_index_to_go] = $ci_level; | ||||
9044 | $mate_index_to_go[$max_index_to_go] = -1; | ||||
9045 | $matching_token_to_go[$max_index_to_go] = ''; | ||||
9046 | $bond_strength_to_go[$max_index_to_go] = 0; | ||||
9047 | |||||
9048 | # Note: negative levels are currently retained as a diagnostic so that | ||||
9049 | # the 'final indentation level' is correctly reported for bad scripts. | ||||
9050 | # But this means that every use of $level as an index must be checked. | ||||
9051 | # If this becomes too much of a problem, we might give up and just clip | ||||
9052 | # them at zero. | ||||
9053 | ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; | ||||
9054 | $levels_to_go[$max_index_to_go] = $level; | ||||
9055 | $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; | ||||
9056 | |||||
9057 | # link the non-blank tokens | ||||
9058 | my $iprev = $max_index_to_go - 1; | ||||
9059 | $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); | ||||
9060 | $iprev_to_go[$max_index_to_go] = $iprev; | ||||
9061 | $inext_to_go[$iprev] = $max_index_to_go | ||||
9062 | if ( $iprev >= 0 && $type ne 'b' ); | ||||
9063 | $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; | ||||
9064 | |||||
9065 | $token_lengths_to_go[$max_index_to_go] = | ||||
9066 | token_length( $token, $type, $max_index_to_go ); | ||||
9067 | |||||
9068 | # We keep a running sum of token lengths from the start of this batch: | ||||
9069 | # summed_lengths_to_go[$i] = total length to just before token $i | ||||
9070 | # summed_lengths_to_go[$i+1] = total length to just after token $i | ||||
9071 | $summed_lengths_to_go[ $max_index_to_go + 1 ] = | ||||
9072 | $summed_lengths_to_go[$max_index_to_go] + | ||||
9073 | $token_lengths_to_go[$max_index_to_go]; | ||||
9074 | |||||
9075 | # Define the indentation that this token would have if it started | ||||
9076 | # a new line. We have to do this now because we need to know this | ||||
9077 | # when considering one-line blocks. | ||||
9078 | set_leading_whitespace( $level, $ci_level, $in_continued_quote ); | ||||
9079 | |||||
9080 | # remember previous nonblank tokens seen | ||||
9081 | if ( $type ne 'b' ) { | ||||
9082 | $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; | ||||
9083 | $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; | ||||
9084 | $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; | ||||
9085 | $last_nonblank_index_to_go = $max_index_to_go; | ||||
9086 | $last_nonblank_type_to_go = $type; | ||||
9087 | $last_nonblank_token_to_go = $token; | ||||
9088 | if ( $type eq ',' ) { | ||||
9089 | $comma_count_in_batch++; | ||||
9090 | } | ||||
9091 | } | ||||
9092 | |||||
9093 | FORMATTER_DEBUG_FLAG_STORE && do { | ||||
9094 | my ( $a, $b, $c ) = caller(); | ||||
9095 | print STDOUT | ||||
9096 | "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; | ||||
9097 | }; | ||||
9098 | } | ||||
9099 | |||||
9100 | sub insert_new_token_to_go { | ||||
9101 | |||||
9102 | # insert a new token into the output stream. use same level as | ||||
9103 | # previous token; assumes a character at max_index_to_go. | ||||
9104 | save_current_token(); | ||||
9105 | ( $token, $type, $slevel, $no_internal_newlines ) = @_; | ||||
9106 | |||||
9107 | if ( $max_index_to_go == UNDEFINED_INDEX ) { | ||||
9108 | warning("code bug: bad call to insert_new_token_to_go\n"); | ||||
9109 | } | ||||
9110 | $level = $levels_to_go[$max_index_to_go]; | ||||
9111 | |||||
9112 | # FIXME: it seems to be necessary to use the next, rather than | ||||
9113 | # previous, value of this variable when creating a new blank (align.t) | ||||
9114 | #my $slevel = $nesting_depth_to_go[$max_index_to_go]; | ||||
9115 | $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; | ||||
9116 | $ci_level = $ci_levels_to_go[$max_index_to_go]; | ||||
9117 | $container_environment = $container_environment_to_go[$max_index_to_go]; | ||||
9118 | $in_continued_quote = 0; | ||||
9119 | $block_type = ""; | ||||
9120 | $type_sequence = ""; | ||||
9121 | store_token_to_go(); | ||||
9122 | restore_current_token(); | ||||
9123 | return; | ||||
9124 | } | ||||
9125 | |||||
9126 | sub print_line_of_tokens { | ||||
9127 | |||||
9128 | my $line_of_tokens = shift; | ||||
9129 | |||||
9130 | # This routine is called once per input line to process all of | ||||
9131 | # the tokens on that line. This is the first stage of | ||||
9132 | # beautification. | ||||
9133 | # | ||||
9134 | # Full-line comments and blank lines may be processed immediately. | ||||
9135 | # | ||||
9136 | # For normal lines of code, the tokens are stored one-by-one, | ||||
9137 | # via calls to 'sub store_token_to_go', until a known line break | ||||
9138 | # point is reached. Then, the batch of collected tokens is | ||||
9139 | # passed along to 'sub output_line_to_go' for further | ||||
9140 | # processing. This routine decides if there should be | ||||
9141 | # whitespace between each pair of non-white tokens, so later | ||||
9142 | # routines only need to decide on any additional line breaks. | ||||
9143 | # Any whitespace is initially a single space character. Later, | ||||
9144 | # the vertical aligner may expand that to be multiple space | ||||
9145 | # characters if necessary for alignment. | ||||
9146 | |||||
9147 | # extract input line number for error messages | ||||
9148 | $input_line_number = $line_of_tokens->{_line_number}; | ||||
9149 | |||||
9150 | $rtoken_type = $line_of_tokens->{_rtoken_type}; | ||||
9151 | $rtokens = $line_of_tokens->{_rtokens}; | ||||
9152 | $rlevels = $line_of_tokens->{_rlevels}; | ||||
9153 | $rslevels = $line_of_tokens->{_rslevels}; | ||||
9154 | $rblock_type = $line_of_tokens->{_rblock_type}; | ||||
9155 | $rcontainer_type = $line_of_tokens->{_rcontainer_type}; | ||||
9156 | $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; | ||||
9157 | $rtype_sequence = $line_of_tokens->{_rtype_sequence}; | ||||
9158 | $input_line = $line_of_tokens->{_line_text}; | ||||
9159 | $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; | ||||
9160 | $rci_levels = $line_of_tokens->{_rci_levels}; | ||||
9161 | $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; | ||||
9162 | |||||
9163 | $in_continued_quote = $starting_in_quote = | ||||
9164 | $line_of_tokens->{_starting_in_quote}; | ||||
9165 | $in_quote = $line_of_tokens->{_ending_in_quote}; | ||||
9166 | $ending_in_quote = $in_quote; | ||||
9167 | $guessed_indentation_level = | ||||
9168 | $line_of_tokens->{_guessed_indentation_level}; | ||||
9169 | |||||
9170 | my $j; | ||||
9171 | my $j_next; | ||||
9172 | my $jmax; | ||||
9173 | my $next_nonblank_token; | ||||
9174 | my $next_nonblank_token_type; | ||||
9175 | my $rwhite_space_flag; | ||||
9176 | |||||
9177 | $jmax = @$rtokens - 1; | ||||
9178 | $block_type = ""; | ||||
9179 | $container_type = ""; | ||||
9180 | $container_environment = ""; | ||||
9181 | $type_sequence = ""; | ||||
9182 | $no_internal_newlines = 1 - $rOpts_add_newlines; | ||||
9183 | $is_static_block_comment = 0; | ||||
9184 | |||||
9185 | # Handle a continued quote.. | ||||
9186 | if ($in_continued_quote) { | ||||
9187 | |||||
9188 | # A line which is entirely a quote or pattern must go out | ||||
9189 | # verbatim. Note: the \n is contained in $input_line. | ||||
9190 | if ( $jmax <= 0 ) { | ||||
9191 | if ( ( $input_line =~ "\t" ) ) { | ||||
9192 | note_embedded_tab(); | ||||
9193 | } | ||||
9194 | write_unindented_line("$input_line"); | ||||
9195 | $last_line_had_side_comment = 0; | ||||
9196 | return; | ||||
9197 | } | ||||
9198 | } | ||||
9199 | |||||
9200 | # Write line verbatim if we are in a formatting skip section | ||||
9201 | if ($in_format_skipping_section) { | ||||
9202 | write_unindented_line("$input_line"); | ||||
9203 | $last_line_had_side_comment = 0; | ||||
9204 | |||||
9205 | # Note: extra space appended to comment simplifies pattern matching | ||||
9206 | if ( $jmax == 0 | ||||
9207 | && $$rtoken_type[0] eq '#' | ||||
9208 | && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) | ||||
9209 | { | ||||
9210 | $in_format_skipping_section = 0; | ||||
9211 | write_logfile_entry("Exiting formatting skip section\n"); | ||||
9212 | $file_writer_object->reset_consecutive_blank_lines(); | ||||
9213 | } | ||||
9214 | return; | ||||
9215 | } | ||||
9216 | |||||
9217 | # See if we are entering a formatting skip section | ||||
9218 | if ( $rOpts_format_skipping | ||||
9219 | && $jmax == 0 | ||||
9220 | && $$rtoken_type[0] eq '#' | ||||
9221 | && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) | ||||
9222 | { | ||||
9223 | flush(); | ||||
9224 | $in_format_skipping_section = 1; | ||||
9225 | write_logfile_entry("Entering formatting skip section\n"); | ||||
9226 | write_unindented_line("$input_line"); | ||||
9227 | $last_line_had_side_comment = 0; | ||||
9228 | return; | ||||
9229 | } | ||||
9230 | |||||
9231 | # delete trailing blank tokens | ||||
9232 | if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } | ||||
9233 | |||||
9234 | # Handle a blank line.. | ||||
9235 | if ( $jmax < 0 ) { | ||||
9236 | |||||
9237 | # If keep-old-blank-lines is zero, we delete all | ||||
9238 | # old blank lines and let the blank line rules generate any | ||||
9239 | # needed blanks. | ||||
9240 | if ($rOpts_keep_old_blank_lines) { | ||||
9241 | flush(); | ||||
9242 | $file_writer_object->write_blank_code_line( | ||||
9243 | $rOpts_keep_old_blank_lines == 2 ); | ||||
9244 | $last_line_leading_type = 'b'; | ||||
9245 | } | ||||
9246 | $last_line_had_side_comment = 0; | ||||
9247 | return; | ||||
9248 | } | ||||
9249 | |||||
9250 | # see if this is a static block comment (starts with ## by default) | ||||
9251 | my $is_static_block_comment_without_leading_space = 0; | ||||
9252 | if ( $jmax == 0 | ||||
9253 | && $$rtoken_type[0] eq '#' | ||||
9254 | && $rOpts->{'static-block-comments'} | ||||
9255 | && $input_line =~ /$static_block_comment_pattern/o ) | ||||
9256 | { | ||||
9257 | $is_static_block_comment = 1; | ||||
9258 | $is_static_block_comment_without_leading_space = | ||||
9259 | substr( $input_line, 0, 1 ) eq '#'; | ||||
9260 | } | ||||
9261 | |||||
9262 | # Check for comments which are line directives | ||||
9263 | # Treat exactly as static block comments without leading space | ||||
9264 | # reference: perlsyn, near end, section Plain Old Comments (Not!) | ||||
9265 | # example: '# line 42 "new_filename.plx"' | ||||
9266 | if ( | ||||
9267 | $jmax == 0 | ||||
9268 | && $$rtoken_type[0] eq '#' | ||||
9269 | && $input_line =~ /^\# \s* | ||||
9270 | line \s+ (\d+) \s* | ||||
9271 | (?:\s("?)([^"]+)\2)? \s* | ||||
9272 | $/x | ||||
9273 | ) | ||||
9274 | { | ||||
9275 | $is_static_block_comment = 1; | ||||
9276 | $is_static_block_comment_without_leading_space = 1; | ||||
9277 | } | ||||
9278 | |||||
9279 | # create a hanging side comment if appropriate | ||||
9280 | my $is_hanging_side_comment; | ||||
9281 | if ( | ||||
9282 | $jmax == 0 | ||||
9283 | && $$rtoken_type[0] eq '#' # only token is a comment | ||||
9284 | && $last_line_had_side_comment # last line had side comment | ||||
9285 | && $input_line =~ /^\s/ # there is some leading space | ||||
9286 | && !$is_static_block_comment # do not make static comment hanging | ||||
9287 | && $rOpts->{'hanging-side-comments'} # user is allowing | ||||
9288 | # hanging side comments | ||||
9289 | # like this | ||||
9290 | ) | ||||
9291 | { | ||||
9292 | |||||
9293 | # We will insert an empty qw string at the start of the token list | ||||
9294 | # to force this comment to be a side comment. The vertical aligner | ||||
9295 | # should then line it up with the previous side comment. | ||||
9296 | $is_hanging_side_comment = 1; | ||||
9297 | unshift @$rtoken_type, 'q'; | ||||
9298 | unshift @$rtokens, ''; | ||||
9299 | unshift @$rlevels, $$rlevels[0]; | ||||
9300 | unshift @$rslevels, $$rslevels[0]; | ||||
9301 | unshift @$rblock_type, ''; | ||||
9302 | unshift @$rcontainer_type, ''; | ||||
9303 | unshift @$rcontainer_environment, ''; | ||||
9304 | unshift @$rtype_sequence, ''; | ||||
9305 | unshift @$rnesting_tokens, $$rnesting_tokens[0]; | ||||
9306 | unshift @$rci_levels, $$rci_levels[0]; | ||||
9307 | unshift @$rnesting_blocks, $$rnesting_blocks[0]; | ||||
9308 | $jmax = 1; | ||||
9309 | } | ||||
9310 | |||||
9311 | # remember if this line has a side comment | ||||
9312 | $last_line_had_side_comment = | ||||
9313 | ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); | ||||
9314 | |||||
9315 | # Handle a block (full-line) comment.. | ||||
9316 | if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { | ||||
9317 | |||||
9318 | if ( $rOpts->{'delete-block-comments'} ) { return } | ||||
9319 | |||||
9320 | if ( $rOpts->{'tee-block-comments'} ) { | ||||
9321 | $file_writer_object->tee_on(); | ||||
9322 | } | ||||
9323 | |||||
9324 | destroy_one_line_block(); | ||||
9325 | output_line_to_go(); | ||||
9326 | |||||
9327 | # output a blank line before block comments | ||||
9328 | if ( | ||||
9329 | # unless we follow a blank or comment line | ||||
9330 | $last_line_leading_type !~ /^[#b]$/ | ||||
9331 | |||||
9332 | # only if allowed | ||||
9333 | && $rOpts->{'blanks-before-comments'} | ||||
9334 | |||||
9335 | # not if this is an empty comment line | ||||
9336 | && $$rtokens[0] ne '#' | ||||
9337 | |||||
9338 | # not after a short line ending in an opening token | ||||
9339 | # because we already have space above this comment. | ||||
9340 | # Note that the first comment in this if block, after | ||||
9341 | # the 'if (', does not get a blank line because of this. | ||||
9342 | && !$last_output_short_opening_token | ||||
9343 | |||||
9344 | # never before static block comments | ||||
9345 | && !$is_static_block_comment | ||||
9346 | ) | ||||
9347 | { | ||||
9348 | flush(); # switching to new output stream | ||||
9349 | $file_writer_object->write_blank_code_line(); | ||||
9350 | $last_line_leading_type = 'b'; | ||||
9351 | } | ||||
9352 | |||||
9353 | # TRIM COMMENTS -- This could be turned off as a option | ||||
9354 | $$rtokens[0] =~ s/\s*$//; # trim right end | ||||
9355 | |||||
9356 | if ( | ||||
9357 | $rOpts->{'indent-block-comments'} | ||||
9358 | && ( !$rOpts->{'indent-spaced-block-comments'} | ||||
9359 | || $input_line =~ /^\s+/ ) | ||||
9360 | && !$is_static_block_comment_without_leading_space | ||||
9361 | ) | ||||
9362 | { | ||||
9363 | extract_token(0); | ||||
9364 | store_token_to_go(); | ||||
9365 | output_line_to_go(); | ||||
9366 | } | ||||
9367 | else { | ||||
9368 | flush(); # switching to new output stream | ||||
9369 | $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); | ||||
9370 | $last_line_leading_type = '#'; | ||||
9371 | } | ||||
9372 | if ( $rOpts->{'tee-block-comments'} ) { | ||||
9373 | $file_writer_object->tee_off(); | ||||
9374 | } | ||||
9375 | return; | ||||
9376 | } | ||||
9377 | |||||
9378 | # compare input/output indentation except for continuation lines | ||||
9379 | # (because they have an unknown amount of initial blank space) | ||||
9380 | # and lines which are quotes (because they may have been outdented) | ||||
9381 | # Note: this test is placed here because we know the continuation flag | ||||
9382 | # at this point, which allows us to avoid non-meaningful checks. | ||||
9383 | my $structural_indentation_level = $$rlevels[0]; | ||||
9384 | compare_indentation_levels( $guessed_indentation_level, | ||||
9385 | $structural_indentation_level ) | ||||
9386 | unless ( $is_hanging_side_comment | ||||
9387 | || $$rci_levels[0] > 0 | ||||
9388 | || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' ); | ||||
9389 | |||||
9390 | # Patch needed for MakeMaker. Do not break a statement | ||||
9391 | # in which $VERSION may be calculated. See MakeMaker.pm; | ||||
9392 | # this is based on the coding in it. | ||||
9393 | # The first line of a file that matches this will be eval'd: | ||||
9394 | # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ | ||||
9395 | # Examples: | ||||
9396 | # *VERSION = \'1.01'; | ||||
9397 | # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; | ||||
9398 | # We will pass such a line straight through without breaking | ||||
9399 | # it unless -npvl is used | ||||
9400 | |||||
9401 | my $is_VERSION_statement = 0; | ||||
9402 | |||||
9403 | if ( | ||||
9404 | !$saw_VERSION_in_this_file | ||||
9405 | && $input_line =~ /VERSION/ # quick check to reject most lines | ||||
9406 | && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ | ||||
9407 | ) | ||||
9408 | { | ||||
9409 | $saw_VERSION_in_this_file = 1; | ||||
9410 | $is_VERSION_statement = 1; | ||||
9411 | write_logfile_entry("passing VERSION line; -npvl deactivates\n"); | ||||
9412 | $no_internal_newlines = 1; | ||||
9413 | } | ||||
9414 | |||||
9415 | # take care of indentation-only | ||||
9416 | # NOTE: In previous versions we sent all qw lines out immediately here. | ||||
9417 | # No longer doing this: also write a line which is entirely a 'qw' list | ||||
9418 | # to allow stacking of opening and closing tokens. Note that interior | ||||
9419 | # qw lines will still go out at the end of this routine. | ||||
9420 | if ( $rOpts->{'indent-only'} ) { | ||||
9421 | flush(); | ||||
9422 | trim($input_line); | ||||
9423 | |||||
9424 | extract_token(0); | ||||
9425 | $token = $input_line; | ||||
9426 | $type = 'q'; | ||||
9427 | $block_type = ""; | ||||
9428 | $container_type = ""; | ||||
9429 | $container_environment = ""; | ||||
9430 | $type_sequence = ""; | ||||
9431 | store_token_to_go(); | ||||
9432 | output_line_to_go(); | ||||
9433 | return; | ||||
9434 | } | ||||
9435 | |||||
9436 | push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding | ||||
9437 | push( @$rtoken_type, 'b', 'b' ); | ||||
9438 | ($rwhite_space_flag) = | ||||
9439 | set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); | ||||
9440 | |||||
9441 | # if the buffer hasn't been flushed, add a leading space if | ||||
9442 | # necessary to keep essential whitespace. This is really only | ||||
9443 | # necessary if we are squeezing out all ws. | ||||
9444 | if ( $max_index_to_go >= 0 ) { | ||||
9445 | |||||
9446 | $old_line_count_in_batch++; | ||||
9447 | |||||
9448 | if ( | ||||
9449 | is_essential_whitespace( | ||||
9450 | $last_last_nonblank_token, | ||||
9451 | $last_last_nonblank_type, | ||||
9452 | $tokens_to_go[$max_index_to_go], | ||||
9453 | $types_to_go[$max_index_to_go], | ||||
9454 | $$rtokens[0], | ||||
9455 | $$rtoken_type[0] | ||||
9456 | ) | ||||
9457 | ) | ||||
9458 | { | ||||
9459 | my $slevel = $$rslevels[0]; | ||||
9460 | insert_new_token_to_go( ' ', 'b', $slevel, | ||||
9461 | $no_internal_newlines ); | ||||
9462 | } | ||||
9463 | } | ||||
9464 | |||||
9465 | # If we just saw the end of an elsif block, write nag message | ||||
9466 | # if we do not see another elseif or an else. | ||||
9467 | if ($looking_for_else) { | ||||
9468 | |||||
9469 | unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { | ||||
9470 | write_logfile_entry("(No else block)\n"); | ||||
9471 | } | ||||
9472 | $looking_for_else = 0; | ||||
9473 | } | ||||
9474 | |||||
9475 | # This is a good place to kill incomplete one-line blocks | ||||
9476 | if ( ( $semicolons_before_block_self_destruct == 0 ) | ||||
9477 | && ( $max_index_to_go >= 0 ) | ||||
9478 | && ( $types_to_go[$max_index_to_go] eq ';' ) | ||||
9479 | && ( $$rtokens[0] ne '}' ) ) | ||||
9480 | { | ||||
9481 | destroy_one_line_block(); | ||||
9482 | output_line_to_go(); | ||||
9483 | } | ||||
9484 | |||||
9485 | # loop to process the tokens one-by-one | ||||
9486 | $type = 'b'; | ||||
9487 | $token = ""; | ||||
9488 | |||||
9489 | foreach $j ( 0 .. $jmax ) { | ||||
9490 | |||||
9491 | # pull out the local values for this token | ||||
9492 | extract_token($j); | ||||
9493 | |||||
9494 | if ( $type eq '#' ) { | ||||
9495 | |||||
9496 | # trim trailing whitespace | ||||
9497 | # (there is no option at present to prevent this) | ||||
9498 | $token =~ s/\s*$//; | ||||
9499 | |||||
9500 | if ( | ||||
9501 | $rOpts->{'delete-side-comments'} | ||||
9502 | |||||
9503 | # delete closing side comments if necessary | ||||
9504 | || ( $rOpts->{'delete-closing-side-comments'} | ||||
9505 | && $token =~ /$closing_side_comment_prefix_pattern/o | ||||
9506 | && $last_nonblank_block_type =~ | ||||
9507 | /$closing_side_comment_list_pattern/o ) | ||||
9508 | ) | ||||
9509 | { | ||||
9510 | if ( $types_to_go[$max_index_to_go] eq 'b' ) { | ||||
9511 | unstore_token_to_go(); | ||||
9512 | } | ||||
9513 | last; | ||||
9514 | } | ||||
9515 | } | ||||
9516 | |||||
9517 | # If we are continuing after seeing a right curly brace, flush | ||||
9518 | # buffer unless we see what we are looking for, as in | ||||
9519 | # } else ... | ||||
9520 | if ( $rbrace_follower && $type ne 'b' ) { | ||||
9521 | |||||
9522 | unless ( $rbrace_follower->{$token} ) { | ||||
9523 | output_line_to_go(); | ||||
9524 | } | ||||
9525 | $rbrace_follower = undef; | ||||
9526 | } | ||||
9527 | |||||
9528 | $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; | ||||
9529 | $next_nonblank_token = $$rtokens[$j_next]; | ||||
9530 | $next_nonblank_token_type = $$rtoken_type[$j_next]; | ||||
9531 | |||||
9532 | #-------------------------------------------------------- | ||||
9533 | # Start of section to patch token text | ||||
9534 | #-------------------------------------------------------- | ||||
9535 | |||||
9536 | # Modify certain tokens here for whitespace | ||||
9537 | # The following is not yet done, but could be: | ||||
9538 | # sub (x x x) | ||||
9539 | if ( $type =~ /^[wit]$/ ) { | ||||
9540 | |||||
9541 | # Examples: | ||||
9542 | # change '$ var' to '$var' etc | ||||
9543 | # '-> new' to '->new' | ||||
9544 | if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { | ||||
9545 | $token =~ s/\s*//g; | ||||
9546 | } | ||||
9547 | |||||
9548 | # Split identifiers with leading arrows, inserting blanks if | ||||
9549 | # necessary. It is easier and safer here than in the | ||||
9550 | # tokenizer. For example '->new' becomes two tokens, '->' and | ||||
9551 | # 'new' with a possible blank between. | ||||
9552 | # | ||||
9553 | # Note: there is a related patch in sub set_white_space_flag | ||||
9554 | if ( $token =~ /^\-\>(.*)$/ && $1 ) { | ||||
9555 | my $token_save = $1; | ||||
9556 | my $type_save = $type; | ||||
9557 | |||||
9558 | # store a blank to left of arrow if necessary | ||||
9559 | if ( $max_index_to_go >= 0 | ||||
9560 | && $types_to_go[$max_index_to_go] ne 'b' | ||||
9561 | && $want_left_space{'->'} == WS_YES ) | ||||
9562 | { | ||||
9563 | insert_new_token_to_go( ' ', 'b', $slevel, | ||||
9564 | $no_internal_newlines ); | ||||
9565 | } | ||||
9566 | |||||
9567 | # then store the arrow | ||||
9568 | $token = '->'; | ||||
9569 | $type = $token; | ||||
9570 | store_token_to_go(); | ||||
9571 | |||||
9572 | # then reset the current token to be the remainder, | ||||
9573 | # and reset the whitespace flag according to the arrow | ||||
9574 | $$rwhite_space_flag[$j] = $want_right_space{'->'}; | ||||
9575 | $token = $token_save; | ||||
9576 | $type = $type_save; | ||||
9577 | } | ||||
9578 | |||||
9579 | if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } | ||||
9580 | |||||
9581 | # trim identifiers of trailing blanks which can occur | ||||
9582 | # under some unusual circumstances, such as if the | ||||
9583 | # identifier 'witch' has trailing blanks on input here: | ||||
9584 | # | ||||
9585 | # sub | ||||
9586 | # witch | ||||
9587 | # () # prototype may be on new line ... | ||||
9588 | # ... | ||||
9589 | if ( $type eq 'i' ) { $token =~ s/\s+$//g } | ||||
9590 | } | ||||
9591 | |||||
9592 | # change 'LABEL :' to 'LABEL:' | ||||
9593 | elsif ( $type eq 'J' ) { $token =~ s/\s+//g } | ||||
9594 | |||||
9595 | # patch to add space to something like "x10" | ||||
9596 | # This avoids having to split this token in the pre-tokenizer | ||||
9597 | elsif ( $type eq 'n' ) { | ||||
9598 | if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } | ||||
9599 | } | ||||
9600 | |||||
9601 | elsif ( $type eq 'Q' ) { | ||||
9602 | note_embedded_tab() if ( $token =~ "\t" ); | ||||
9603 | |||||
9604 | # make note of something like '$var = s/xxx/yyy/;' | ||||
9605 | # in case it should have been '$var =~ s/xxx/yyy/;' | ||||
9606 | if ( | ||||
9607 | $token =~ /^(s|tr|y|m|\/)/ | ||||
9608 | && $last_nonblank_token =~ /^(=|==|!=)$/ | ||||
9609 | |||||
9610 | # preceded by simple scalar | ||||
9611 | && $last_last_nonblank_type eq 'i' | ||||
9612 | && $last_last_nonblank_token =~ /^\$/ | ||||
9613 | |||||
9614 | # followed by some kind of termination | ||||
9615 | # (but give complaint if we can's see far enough ahead) | ||||
9616 | && $next_nonblank_token =~ /^[; \)\}]$/ | ||||
9617 | |||||
9618 | # scalar is not declared | ||||
9619 | && !( | ||||
9620 | $types_to_go[0] eq 'k' | ||||
9621 | && $tokens_to_go[0] =~ /^(my|our|local)$/ | ||||
9622 | ) | ||||
9623 | ) | ||||
9624 | { | ||||
9625 | my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; | ||||
9626 | complain( | ||||
9627 | "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" | ||||
9628 | ); | ||||
9629 | } | ||||
9630 | } | ||||
9631 | |||||
9632 | # trim blanks from right of qw quotes | ||||
9633 | # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) | ||||
9634 | elsif ( $type eq 'q' ) { | ||||
9635 | $token =~ s/\s*$//; | ||||
9636 | note_embedded_tab() if ( $token =~ "\t" ); | ||||
9637 | } | ||||
9638 | |||||
9639 | #-------------------------------------------------------- | ||||
9640 | # End of section to patch token text | ||||
9641 | #-------------------------------------------------------- | ||||
9642 | |||||
9643 | # insert any needed whitespace | ||||
9644 | if ( ( $type ne 'b' ) | ||||
9645 | && ( $max_index_to_go >= 0 ) | ||||
9646 | && ( $types_to_go[$max_index_to_go] ne 'b' ) | ||||
9647 | && $rOpts_add_whitespace ) | ||||
9648 | { | ||||
9649 | my $ws = $$rwhite_space_flag[$j]; | ||||
9650 | |||||
9651 | if ( $ws == 1 ) { | ||||
9652 | insert_new_token_to_go( ' ', 'b', $slevel, | ||||
9653 | $no_internal_newlines ); | ||||
9654 | } | ||||
9655 | } | ||||
9656 | |||||
9657 | # Do not allow breaks which would promote a side comment to a | ||||
9658 | # block comment. In order to allow a break before an opening | ||||
9659 | # or closing BLOCK, followed by a side comment, those sections | ||||
9660 | # of code will handle this flag separately. | ||||
9661 | my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); | ||||
9662 | my $is_opening_BLOCK = | ||||
9663 | ( $type eq '{' | ||||
9664 | && $token eq '{' | ||||
9665 | && $block_type | ||||
9666 | && $block_type ne 't' ); | ||||
9667 | my $is_closing_BLOCK = | ||||
9668 | ( $type eq '}' | ||||
9669 | && $token eq '}' | ||||
9670 | && $block_type | ||||
9671 | && $block_type ne 't' ); | ||||
9672 | |||||
9673 | if ( $side_comment_follows | ||||
9674 | && !$is_opening_BLOCK | ||||
9675 | && !$is_closing_BLOCK ) | ||||
9676 | { | ||||
9677 | $no_internal_newlines = 1; | ||||
9678 | } | ||||
9679 | |||||
9680 | # We're only going to handle breaking for code BLOCKS at this | ||||
9681 | # (top) level. Other indentation breaks will be handled by | ||||
9682 | # sub scan_list, which is better suited to dealing with them. | ||||
9683 | if ($is_opening_BLOCK) { | ||||
9684 | |||||
9685 | # Tentatively output this token. This is required before | ||||
9686 | # calling starting_one_line_block. We may have to unstore | ||||
9687 | # it, though, if we have to break before it. | ||||
9688 | store_token_to_go($side_comment_follows); | ||||
9689 | |||||
9690 | # Look ahead to see if we might form a one-line block | ||||
9691 | my $too_long = | ||||
9692 | starting_one_line_block( $j, $jmax, $level, $slevel, | ||||
9693 | $ci_level, $rtokens, $rtoken_type, $rblock_type ); | ||||
9694 | clear_breakpoint_undo_stack(); | ||||
9695 | |||||
9696 | # to simplify the logic below, set a flag to indicate if | ||||
9697 | # this opening brace is far from the keyword which introduces it | ||||
9698 | my $keyword_on_same_line = 1; | ||||
9699 | if ( ( $max_index_to_go >= 0 ) | ||||
9700 | && ( $last_nonblank_type eq ')' ) ) | ||||
9701 | { | ||||
9702 | if ( $block_type =~ /^(if|else|elsif)$/ | ||||
9703 | && ( $tokens_to_go[0] eq '}' ) | ||||
9704 | && $rOpts_cuddled_else ) | ||||
9705 | { | ||||
9706 | $keyword_on_same_line = 1; | ||||
9707 | } | ||||
9708 | elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) | ||||
9709 | { | ||||
9710 | $keyword_on_same_line = 0; | ||||
9711 | } | ||||
9712 | } | ||||
9713 | |||||
9714 | # decide if user requested break before '{' | ||||
9715 | my $want_break = | ||||
9716 | |||||
9717 | # use -bl flag if not a sub block of any type | ||||
9718 | $block_type !~ /^sub/ | ||||
9719 | ? $rOpts->{'opening-brace-on-new-line'} | ||||
9720 | |||||
9721 | # use -sbl flag for a named sub block | ||||
9722 | : $block_type !~ /^sub\W*$/ | ||||
9723 | ? $rOpts->{'opening-sub-brace-on-new-line'} | ||||
9724 | |||||
9725 | # use -asbl flag for an anonymous sub block | ||||
9726 | : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; | ||||
9727 | |||||
9728 | # Break before an opening '{' ... | ||||
9729 | if ( | ||||
9730 | |||||
9731 | # if requested | ||||
9732 | $want_break | ||||
9733 | |||||
9734 | # and we were unable to start looking for a block, | ||||
9735 | && $index_start_one_line_block == UNDEFINED_INDEX | ||||
9736 | |||||
9737 | # or if it will not be on same line as its keyword, so that | ||||
9738 | # it will be outdented (eval.t, overload.t), and the user | ||||
9739 | # has not insisted on keeping it on the right | ||||
9740 | || ( !$keyword_on_same_line | ||||
9741 | && !$rOpts->{'opening-brace-always-on-right'} ) | ||||
9742 | |||||
9743 | ) | ||||
9744 | { | ||||
9745 | |||||
9746 | # but only if allowed | ||||
9747 | unless ($no_internal_newlines) { | ||||
9748 | |||||
9749 | # since we already stored this token, we must unstore it | ||||
9750 | unstore_token_to_go(); | ||||
9751 | |||||
9752 | # then output the line | ||||
9753 | output_line_to_go(); | ||||
9754 | |||||
9755 | # and now store this token at the start of a new line | ||||
9756 | store_token_to_go($side_comment_follows); | ||||
9757 | } | ||||
9758 | } | ||||
9759 | |||||
9760 | # Now update for side comment | ||||
9761 | if ($side_comment_follows) { $no_internal_newlines = 1 } | ||||
9762 | |||||
9763 | # now output this line | ||||
9764 | unless ($no_internal_newlines) { | ||||
9765 | output_line_to_go(); | ||||
9766 | } | ||||
9767 | } | ||||
9768 | |||||
9769 | elsif ($is_closing_BLOCK) { | ||||
9770 | |||||
9771 | # If there is a pending one-line block .. | ||||
9772 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | ||||
9773 | |||||
9774 | # we have to terminate it if.. | ||||
9775 | if ( | ||||
9776 | |||||
9777 | # it is too long (final length may be different from | ||||
9778 | # initial estimate). note: must allow 1 space for this token | ||||
9779 | excess_line_length( $index_start_one_line_block, | ||||
9780 | $max_index_to_go ) >= 0 | ||||
9781 | |||||
9782 | # or if it has too many semicolons | ||||
9783 | || ( $semicolons_before_block_self_destruct == 0 | ||||
9784 | && $last_nonblank_type ne ';' ) | ||||
9785 | ) | ||||
9786 | { | ||||
9787 | destroy_one_line_block(); | ||||
9788 | } | ||||
9789 | } | ||||
9790 | |||||
9791 | # put a break before this closing curly brace if appropriate | ||||
9792 | unless ( $no_internal_newlines | ||||
9793 | || $index_start_one_line_block != UNDEFINED_INDEX ) | ||||
9794 | { | ||||
9795 | |||||
9796 | # add missing semicolon if ... | ||||
9797 | # there are some tokens | ||||
9798 | if ( | ||||
9799 | ( $max_index_to_go > 0 ) | ||||
9800 | |||||
9801 | # and we don't have one | ||||
9802 | && ( $last_nonblank_type ne ';' ) | ||||
9803 | |||||
9804 | # patch until some block type issues are fixed: | ||||
9805 | # Do not add semi-colon for block types '{', | ||||
9806 | # '}', and ';' because we cannot be sure yet | ||||
9807 | # that this is a block and not an anonymous | ||||
9808 | # hash (blktype.t, blktype1.t) | ||||
9809 | && ( $block_type !~ /^[\{\};]$/ ) | ||||
9810 | |||||
9811 | # patch: and do not add semi-colons for recently | ||||
9812 | # added block types (see tmp/semicolon.t) | ||||
9813 | && ( $block_type !~ | ||||
9814 | /^(switch|case|given|when|default)$/ ) | ||||
9815 | |||||
9816 | # it seems best not to add semicolons in these | ||||
9817 | # special block types: sort|map|grep | ||||
9818 | && ( !$is_sort_map_grep{$block_type} ) | ||||
9819 | |||||
9820 | # and we are allowed to do so. | ||||
9821 | && $rOpts->{'add-semicolons'} | ||||
9822 | ) | ||||
9823 | { | ||||
9824 | |||||
9825 | save_current_token(); | ||||
9826 | $token = ';'; | ||||
9827 | $type = ';'; | ||||
9828 | $level = $levels_to_go[$max_index_to_go]; | ||||
9829 | $slevel = $nesting_depth_to_go[$max_index_to_go]; | ||||
9830 | $nesting_blocks = | ||||
9831 | $nesting_blocks_to_go[$max_index_to_go]; | ||||
9832 | $ci_level = $ci_levels_to_go[$max_index_to_go]; | ||||
9833 | $block_type = ""; | ||||
9834 | $container_type = ""; | ||||
9835 | $container_environment = ""; | ||||
9836 | $type_sequence = ""; | ||||
9837 | |||||
9838 | # Note - we remove any blank AFTER extracting its | ||||
9839 | # parameters such as level, etc, above | ||||
9840 | if ( $types_to_go[$max_index_to_go] eq 'b' ) { | ||||
9841 | unstore_token_to_go(); | ||||
9842 | } | ||||
9843 | store_token_to_go(); | ||||
9844 | |||||
9845 | note_added_semicolon(); | ||||
9846 | restore_current_token(); | ||||
9847 | } | ||||
9848 | |||||
9849 | # then write out everything before this closing curly brace | ||||
9850 | output_line_to_go(); | ||||
9851 | |||||
9852 | } | ||||
9853 | |||||
9854 | # Now update for side comment | ||||
9855 | if ($side_comment_follows) { $no_internal_newlines = 1 } | ||||
9856 | |||||
9857 | # store the closing curly brace | ||||
9858 | store_token_to_go(); | ||||
9859 | |||||
9860 | # ok, we just stored a closing curly brace. Often, but | ||||
9861 | # not always, we want to end the line immediately. | ||||
9862 | # So now we have to check for special cases. | ||||
9863 | |||||
9864 | # if this '}' successfully ends a one-line block.. | ||||
9865 | my $is_one_line_block = 0; | ||||
9866 | my $keep_going = 0; | ||||
9867 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | ||||
9868 | |||||
9869 | # Remember the type of token just before the | ||||
9870 | # opening brace. It would be more general to use | ||||
9871 | # a stack, but this will work for one-line blocks. | ||||
9872 | $is_one_line_block = | ||||
9873 | $types_to_go[$index_start_one_line_block]; | ||||
9874 | |||||
9875 | # we have to actually make it by removing tentative | ||||
9876 | # breaks that were set within it | ||||
9877 | undo_forced_breakpoint_stack(0); | ||||
9878 | set_nobreaks( $index_start_one_line_block, | ||||
9879 | $max_index_to_go - 1 ); | ||||
9880 | |||||
9881 | # then re-initialize for the next one-line block | ||||
9882 | destroy_one_line_block(); | ||||
9883 | |||||
9884 | # then decide if we want to break after the '}' .. | ||||
9885 | # We will keep going to allow certain brace followers as in: | ||||
9886 | # do { $ifclosed = 1; last } unless $losing; | ||||
9887 | # | ||||
9888 | # But make a line break if the curly ends a | ||||
9889 | # significant block: | ||||
9890 | if ( | ||||
9891 | $is_block_without_semicolon{$block_type} | ||||
9892 | |||||
9893 | # if needless semicolon follows we handle it later | ||||
9894 | && $next_nonblank_token ne ';' | ||||
9895 | ) | ||||
9896 | { | ||||
9897 | output_line_to_go() unless ($no_internal_newlines); | ||||
9898 | } | ||||
9899 | } | ||||
9900 | |||||
9901 | # set string indicating what we need to look for brace follower | ||||
9902 | # tokens | ||||
9903 | if ( $block_type eq 'do' ) { | ||||
9904 | $rbrace_follower = \%is_do_follower; | ||||
9905 | } | ||||
9906 | elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { | ||||
9907 | $rbrace_follower = \%is_if_brace_follower; | ||||
9908 | } | ||||
9909 | elsif ( $block_type eq 'else' ) { | ||||
9910 | $rbrace_follower = \%is_else_brace_follower; | ||||
9911 | } | ||||
9912 | |||||
9913 | # added eval for borris.t | ||||
9914 | elsif ($is_sort_map_grep_eval{$block_type} | ||||
9915 | || $is_one_line_block eq 'G' ) | ||||
9916 | { | ||||
9917 | $rbrace_follower = undef; | ||||
9918 | $keep_going = 1; | ||||
9919 | } | ||||
9920 | |||||
9921 | # anonymous sub | ||||
9922 | elsif ( $block_type =~ /^sub\W*$/ ) { | ||||
9923 | |||||
9924 | if ($is_one_line_block) { | ||||
9925 | $rbrace_follower = \%is_anon_sub_1_brace_follower; | ||||
9926 | } | ||||
9927 | else { | ||||
9928 | $rbrace_follower = \%is_anon_sub_brace_follower; | ||||
9929 | } | ||||
9930 | } | ||||
9931 | |||||
9932 | # None of the above: specify what can follow a closing | ||||
9933 | # brace of a block which is not an | ||||
9934 | # if/elsif/else/do/sort/map/grep/eval | ||||
9935 | # Testfiles: | ||||
9936 | # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t | ||||
9937 | else { | ||||
9938 | $rbrace_follower = \%is_other_brace_follower; | ||||
9939 | } | ||||
9940 | |||||
9941 | # See if an elsif block is followed by another elsif or else; | ||||
9942 | # complain if not. | ||||
9943 | if ( $block_type eq 'elsif' ) { | ||||
9944 | |||||
9945 | if ( $next_nonblank_token_type eq 'b' ) { # end of line? | ||||
9946 | $looking_for_else = 1; # ok, check on next line | ||||
9947 | } | ||||
9948 | else { | ||||
9949 | |||||
9950 | unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { | ||||
9951 | write_logfile_entry("No else block :(\n"); | ||||
9952 | } | ||||
9953 | } | ||||
9954 | } | ||||
9955 | |||||
9956 | # keep going after certain block types (map,sort,grep,eval) | ||||
9957 | # added eval for borris.t | ||||
9958 | if ($keep_going) { | ||||
9959 | |||||
9960 | # keep going | ||||
9961 | } | ||||
9962 | |||||
9963 | # if no more tokens, postpone decision until re-entring | ||||
9964 | elsif ( ( $next_nonblank_token_type eq 'b' ) | ||||
9965 | && $rOpts_add_newlines ) | ||||
9966 | { | ||||
9967 | unless ($rbrace_follower) { | ||||
9968 | output_line_to_go() unless ($no_internal_newlines); | ||||
9969 | } | ||||
9970 | } | ||||
9971 | |||||
9972 | elsif ($rbrace_follower) { | ||||
9973 | |||||
9974 | unless ( $rbrace_follower->{$next_nonblank_token} ) { | ||||
9975 | output_line_to_go() unless ($no_internal_newlines); | ||||
9976 | } | ||||
9977 | $rbrace_follower = undef; | ||||
9978 | } | ||||
9979 | |||||
9980 | else { | ||||
9981 | output_line_to_go() unless ($no_internal_newlines); | ||||
9982 | } | ||||
9983 | |||||
9984 | } # end treatment of closing block token | ||||
9985 | |||||
9986 | # handle semicolon | ||||
9987 | elsif ( $type eq ';' ) { | ||||
9988 | |||||
9989 | # kill one-line blocks with too many semicolons | ||||
9990 | $semicolons_before_block_self_destruct--; | ||||
9991 | if ( | ||||
9992 | ( $semicolons_before_block_self_destruct < 0 ) | ||||
9993 | || ( $semicolons_before_block_self_destruct == 0 | ||||
9994 | && $next_nonblank_token_type !~ /^[b\}]$/ ) | ||||
9995 | ) | ||||
9996 | { | ||||
9997 | destroy_one_line_block(); | ||||
9998 | } | ||||
9999 | |||||
10000 | # Remove unnecessary semicolons, but not after bare | ||||
10001 | # blocks, where it could be unsafe if the brace is | ||||
10002 | # mistokenized. | ||||
10003 | if ( | ||||
10004 | ( | ||||
10005 | $last_nonblank_token eq '}' | ||||
10006 | && ( | ||||
10007 | $is_block_without_semicolon{ | ||||
10008 | $last_nonblank_block_type} | ||||
10009 | || $last_nonblank_block_type =~ /^sub\s+\w/ | ||||
10010 | || $last_nonblank_block_type =~ /^\w+:$/ ) | ||||
10011 | ) | ||||
10012 | || $last_nonblank_type eq ';' | ||||
10013 | ) | ||||
10014 | { | ||||
10015 | |||||
10016 | if ( | ||||
10017 | $rOpts->{'delete-semicolons'} | ||||
10018 | |||||
10019 | # don't delete ; before a # because it would promote it | ||||
10020 | # to a block comment | ||||
10021 | && ( $next_nonblank_token_type ne '#' ) | ||||
10022 | ) | ||||
10023 | { | ||||
10024 | note_deleted_semicolon(); | ||||
10025 | output_line_to_go() | ||||
10026 | unless ( $no_internal_newlines | ||||
10027 | || $index_start_one_line_block != UNDEFINED_INDEX ); | ||||
10028 | next; | ||||
10029 | } | ||||
10030 | else { | ||||
10031 | write_logfile_entry("Extra ';'\n"); | ||||
10032 | } | ||||
10033 | } | ||||
10034 | store_token_to_go(); | ||||
10035 | |||||
10036 | output_line_to_go() | ||||
10037 | unless ( $no_internal_newlines | ||||
10038 | || ( $rOpts_keep_interior_semicolons && $j < $jmax ) | ||||
10039 | || ( $next_nonblank_token eq '}' ) ); | ||||
10040 | |||||
10041 | } | ||||
10042 | |||||
10043 | # handle here_doc target string | ||||
10044 | elsif ( $type eq 'h' ) { | ||||
10045 | $no_internal_newlines = | ||||
10046 | 1; # no newlines after seeing here-target | ||||
10047 | destroy_one_line_block(); | ||||
10048 | store_token_to_go(); | ||||
10049 | } | ||||
10050 | |||||
10051 | # handle all other token types | ||||
10052 | else { | ||||
10053 | |||||
10054 | # if this is a blank... | ||||
10055 | if ( $type eq 'b' ) { | ||||
10056 | |||||
10057 | # make it just one character | ||||
10058 | $token = ' ' if $rOpts_add_whitespace; | ||||
10059 | |||||
10060 | # delete it if unwanted by whitespace rules | ||||
10061 | # or we are deleting all whitespace | ||||
10062 | my $ws = $$rwhite_space_flag[ $j + 1 ]; | ||||
10063 | if ( ( defined($ws) && $ws == -1 ) | ||||
10064 | || $rOpts_delete_old_whitespace ) | ||||
10065 | { | ||||
10066 | |||||
10067 | # unless it might make a syntax error | ||||
10068 | next | ||||
10069 | unless is_essential_whitespace( | ||||
10070 | $last_last_nonblank_token, | ||||
10071 | $last_last_nonblank_type, | ||||
10072 | $tokens_to_go[$max_index_to_go], | ||||
10073 | $types_to_go[$max_index_to_go], | ||||
10074 | $$rtokens[ $j + 1 ], | ||||
10075 | $$rtoken_type[ $j + 1 ] | ||||
10076 | ); | ||||
10077 | } | ||||
10078 | } | ||||
10079 | store_token_to_go(); | ||||
10080 | } | ||||
10081 | |||||
10082 | # remember two previous nonblank OUTPUT tokens | ||||
10083 | if ( $type ne '#' && $type ne 'b' ) { | ||||
10084 | $last_last_nonblank_token = $last_nonblank_token; | ||||
10085 | $last_last_nonblank_type = $last_nonblank_type; | ||||
10086 | $last_nonblank_token = $token; | ||||
10087 | $last_nonblank_type = $type; | ||||
10088 | $last_nonblank_block_type = $block_type; | ||||
10089 | } | ||||
10090 | |||||
10091 | # unset the continued-quote flag since it only applies to the | ||||
10092 | # first token, and we want to resume normal formatting if | ||||
10093 | # there are additional tokens on the line | ||||
10094 | $in_continued_quote = 0; | ||||
10095 | |||||
10096 | } # end of loop over all tokens in this 'line_of_tokens' | ||||
10097 | |||||
10098 | # we have to flush .. | ||||
10099 | if ( | ||||
10100 | |||||
10101 | # if there is a side comment | ||||
10102 | ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) | ||||
10103 | |||||
10104 | # if this line ends in a quote | ||||
10105 | # NOTE: This is critically important for insuring that quoted lines | ||||
10106 | # do not get processed by things like -sot and -sct | ||||
10107 | || $in_quote | ||||
10108 | |||||
10109 | # if this is a VERSION statement | ||||
10110 | || $is_VERSION_statement | ||||
10111 | |||||
10112 | # to keep a label at the end of a line | ||||
10113 | || $type eq 'J' | ||||
10114 | |||||
10115 | # if we are instructed to keep all old line breaks | ||||
10116 | || !$rOpts->{'delete-old-newlines'} | ||||
10117 | ) | ||||
10118 | { | ||||
10119 | destroy_one_line_block(); | ||||
10120 | output_line_to_go(); | ||||
10121 | } | ||||
10122 | |||||
10123 | # mark old line breakpoints in current output stream | ||||
10124 | if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { | ||||
10125 | $old_breakpoint_to_go[$max_index_to_go] = 1; | ||||
10126 | } | ||||
10127 | } ## end sub print_line_of_tokens | ||||
10128 | } ## end block print_line_of_tokens | ||||
10129 | |||||
10130 | # sub output_line_to_go sends one logical line of tokens on down the | ||||
10131 | # pipeline to the VerticalAligner package, breaking the line into continuation | ||||
10132 | # lines as necessary. The line of tokens is ready to go in the "to_go" | ||||
10133 | # arrays. | ||||
10134 | sub output_line_to_go { | ||||
10135 | |||||
10136 | # debug stuff; this routine can be called from many points | ||||
10137 | FORMATTER_DEBUG_FLAG_OUTPUT && do { | ||||
10138 | my ( $a, $b, $c ) = caller; | ||||
10139 | write_diagnostics( | ||||
10140 | "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" | ||||
10141 | ); | ||||
10142 | my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; | ||||
10143 | write_diagnostics("$output_str\n"); | ||||
10144 | }; | ||||
10145 | |||||
10146 | # just set a tentative breakpoint if we might be in a one-line block | ||||
10147 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | ||||
10148 | set_forced_breakpoint($max_index_to_go); | ||||
10149 | return; | ||||
10150 | } | ||||
10151 | |||||
10152 | my $cscw_block_comment; | ||||
10153 | $cscw_block_comment = add_closing_side_comment() | ||||
10154 | if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); | ||||
10155 | |||||
10156 | my $comma_arrow_count_contained = match_opening_and_closing_tokens(); | ||||
10157 | |||||
10158 | # tell the -lp option we are outputting a batch so it can close | ||||
10159 | # any unfinished items in its stack | ||||
10160 | finish_lp_batch(); | ||||
10161 | |||||
10162 | # If this line ends in a code block brace, set breaks at any | ||||
10163 | # previous closing code block braces to breakup a chain of code | ||||
10164 | # blocks on one line. This is very rare but can happen for | ||||
10165 | # user-defined subs. For example we might be looking at this: | ||||
10166 | # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { | ||||
10167 | my $saw_good_break = 0; # flag to force breaks even if short line | ||||
10168 | if ( | ||||
10169 | |||||
10170 | # looking for opening or closing block brace | ||||
10171 | $block_type_to_go[$max_index_to_go] | ||||
10172 | |||||
10173 | # but not one of these which are never duplicated on a line: | ||||
10174 | # until|while|for|if|elsif|else | ||||
10175 | && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } | ||||
10176 | ) | ||||
10177 | { | ||||
10178 | my $lev = $nesting_depth_to_go[$max_index_to_go]; | ||||
10179 | |||||
10180 | # Walk backwards from the end and | ||||
10181 | # set break at any closing block braces at the same level. | ||||
10182 | # But quit if we are not in a chain of blocks. | ||||
10183 | for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { | ||||
10184 | last if ( $levels_to_go[$i] < $lev ); # stop at a lower level | ||||
10185 | next if ( $levels_to_go[$i] > $lev ); # skip past higher level | ||||
10186 | |||||
10187 | if ( $block_type_to_go[$i] ) { | ||||
10188 | if ( $tokens_to_go[$i] eq '}' ) { | ||||
10189 | set_forced_breakpoint($i); | ||||
10190 | $saw_good_break = 1; | ||||
10191 | } | ||||
10192 | } | ||||
10193 | |||||
10194 | # quit if we see anything besides words, function, blanks | ||||
10195 | # at this level | ||||
10196 | elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } | ||||
10197 | } | ||||
10198 | } | ||||
10199 | |||||
10200 | my $imin = 0; | ||||
10201 | my $imax = $max_index_to_go; | ||||
10202 | |||||
10203 | # trim any blank tokens | ||||
10204 | if ( $max_index_to_go >= 0 ) { | ||||
10205 | if ( $types_to_go[$imin] eq 'b' ) { $imin++ } | ||||
10206 | if ( $types_to_go[$imax] eq 'b' ) { $imax-- } | ||||
10207 | } | ||||
10208 | |||||
10209 | # anything left to write? | ||||
10210 | if ( $imin <= $imax ) { | ||||
10211 | |||||
10212 | # add a blank line before certain key types but not after a comment | ||||
10213 | if ( $last_line_leading_type !~ /^[#]/ ) { | ||||
10214 | my $want_blank = 0; | ||||
10215 | my $leading_token = $tokens_to_go[$imin]; | ||||
10216 | my $leading_type = $types_to_go[$imin]; | ||||
10217 | |||||
10218 | # blank lines before subs except declarations and one-liners | ||||
10219 | # MCONVERSION LOCATION - for sub tokenization change | ||||
10220 | if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { | ||||
10221 | $want_blank = $rOpts->{'blank-lines-before-subs'} | ||||
10222 | if ( | ||||
10223 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | ||||
10224 | $imax ) !~ /^[\;\}]$/ | ||||
10225 | ); | ||||
10226 | } | ||||
10227 | |||||
10228 | # break before all package declarations | ||||
10229 | # MCONVERSION LOCATION - for tokenizaton change | ||||
10230 | elsif ($leading_token =~ /^(package\s)/ | ||||
10231 | && $leading_type eq 'i' ) | ||||
10232 | { | ||||
10233 | $want_blank = $rOpts->{'blank-lines-before-packages'}; | ||||
10234 | } | ||||
10235 | |||||
10236 | # break before certain key blocks except one-liners | ||||
10237 | if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { | ||||
10238 | $want_blank = $rOpts->{'blank-lines-before-subs'} | ||||
10239 | if ( | ||||
10240 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | ||||
10241 | $imax ) ne '}' | ||||
10242 | ); | ||||
10243 | } | ||||
10244 | |||||
10245 | # Break before certain block types if we haven't had a | ||||
10246 | # break at this level for a while. This is the | ||||
10247 | # difficult decision.. | ||||
10248 | elsif ($leading_type eq 'k' | ||||
10249 | && $last_line_leading_type ne 'b' | ||||
10250 | && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) | ||||
10251 | { | ||||
10252 | my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; | ||||
10253 | if ( !defined($lc) ) { $lc = 0 } | ||||
10254 | |||||
10255 | $want_blank = | ||||
10256 | $rOpts->{'blanks-before-blocks'} | ||||
10257 | && $lc >= $rOpts->{'long-block-line-count'} | ||||
10258 | && $file_writer_object->get_consecutive_nonblank_lines() >= | ||||
10259 | $rOpts->{'long-block-line-count'} | ||||
10260 | && ( | ||||
10261 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | ||||
10262 | $imax ) ne '}' | ||||
10263 | ); | ||||
10264 | } | ||||
10265 | |||||
10266 | if ($want_blank) { | ||||
10267 | |||||
10268 | # future: send blank line down normal path to VerticalAligner | ||||
10269 | Perl::Tidy::VerticalAligner::flush(); | ||||
10270 | $file_writer_object->require_blank_code_lines($want_blank); | ||||
10271 | } | ||||
10272 | } | ||||
10273 | |||||
10274 | # update blank line variables and count number of consecutive | ||||
10275 | # non-blank, non-comment lines at this level | ||||
10276 | $last_last_line_leading_level = $last_line_leading_level; | ||||
10277 | $last_line_leading_level = $levels_to_go[$imin]; | ||||
10278 | if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } | ||||
10279 | $last_line_leading_type = $types_to_go[$imin]; | ||||
10280 | if ( $last_line_leading_level == $last_last_line_leading_level | ||||
10281 | && $last_line_leading_type ne 'b' | ||||
10282 | && $last_line_leading_type ne '#' | ||||
10283 | && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) | ||||
10284 | { | ||||
10285 | $nonblank_lines_at_depth[$last_line_leading_level]++; | ||||
10286 | } | ||||
10287 | else { | ||||
10288 | $nonblank_lines_at_depth[$last_line_leading_level] = 1; | ||||
10289 | } | ||||
10290 | |||||
10291 | FORMATTER_DEBUG_FLAG_FLUSH && do { | ||||
10292 | my ( $package, $file, $line ) = caller; | ||||
10293 | print STDOUT | ||||
10294 | "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; | ||||
10295 | }; | ||||
10296 | |||||
10297 | # add a couple of extra terminal blank tokens | ||||
10298 | pad_array_to_go(); | ||||
10299 | |||||
10300 | # set all forced breakpoints for good list formatting | ||||
10301 | my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; | ||||
10302 | |||||
10303 | if ( | ||||
10304 | $is_long_line | ||||
10305 | || $old_line_count_in_batch > 1 | ||||
10306 | |||||
10307 | # must always call scan_list() with unbalanced batches because it | ||||
10308 | # is maintaining some stacks | ||||
10309 | || is_unbalanced_batch() | ||||
10310 | |||||
10311 | # call scan_list if we might want to break at commas | ||||
10312 | || ( | ||||
10313 | $comma_count_in_batch | ||||
10314 | && ( $rOpts_maximum_fields_per_table > 0 | ||||
10315 | || $rOpts_comma_arrow_breakpoints == 0 ) | ||||
10316 | ) | ||||
10317 | |||||
10318 | # call scan_list if user may want to break open some one-line | ||||
10319 | # hash references | ||||
10320 | || ( $comma_arrow_count_contained | ||||
10321 | && $rOpts_comma_arrow_breakpoints != 3 ) | ||||
10322 | ) | ||||
10323 | { | ||||
10324 | ## This caused problems in one version of perl for unknown reasons: | ||||
10325 | ## $saw_good_break ||= scan_list(); | ||||
10326 | my $sgb = scan_list(); | ||||
10327 | $saw_good_break ||= $sgb; | ||||
10328 | } | ||||
10329 | |||||
10330 | # let $ri_first and $ri_last be references to lists of | ||||
10331 | # first and last tokens of line fragments to output.. | ||||
10332 | my ( $ri_first, $ri_last ); | ||||
10333 | |||||
10334 | # write a single line if.. | ||||
10335 | if ( | ||||
10336 | |||||
10337 | # we aren't allowed to add any newlines | ||||
10338 | !$rOpts_add_newlines | ||||
10339 | |||||
10340 | # or, we don't already have an interior breakpoint | ||||
10341 | # and we didn't see a good breakpoint | ||||
10342 | || ( | ||||
10343 | !$forced_breakpoint_count | ||||
10344 | && !$saw_good_break | ||||
10345 | |||||
10346 | # and this line is 'short' | ||||
10347 | && !$is_long_line | ||||
10348 | ) | ||||
10349 | ) | ||||
10350 | { | ||||
10351 | @$ri_first = ($imin); | ||||
10352 | @$ri_last = ($imax); | ||||
10353 | } | ||||
10354 | |||||
10355 | # otherwise use multiple lines | ||||
10356 | else { | ||||
10357 | |||||
10358 | ( $ri_first, $ri_last, my $colon_count ) = | ||||
10359 | set_continuation_breaks($saw_good_break); | ||||
10360 | |||||
10361 | break_all_chain_tokens( $ri_first, $ri_last ); | ||||
10362 | |||||
10363 | break_equals( $ri_first, $ri_last ); | ||||
10364 | |||||
10365 | # now we do a correction step to clean this up a bit | ||||
10366 | # (The only time we would not do this is for debugging) | ||||
10367 | if ( $rOpts->{'recombine'} ) { | ||||
10368 | ( $ri_first, $ri_last ) = | ||||
10369 | recombine_breakpoints( $ri_first, $ri_last ); | ||||
10370 | } | ||||
10371 | |||||
10372 | insert_final_breaks( $ri_first, $ri_last ) if $colon_count; | ||||
10373 | } | ||||
10374 | |||||
10375 | # do corrector step if -lp option is used | ||||
10376 | my $do_not_pad = 0; | ||||
10377 | if ($rOpts_line_up_parentheses) { | ||||
10378 | $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); | ||||
10379 | } | ||||
10380 | send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); | ||||
10381 | } | ||||
10382 | prepare_for_new_input_lines(); | ||||
10383 | |||||
10384 | # output any new -cscw block comment | ||||
10385 | if ($cscw_block_comment) { | ||||
10386 | flush(); | ||||
10387 | $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); | ||||
10388 | } | ||||
10389 | } | ||||
10390 | |||||
10391 | sub note_added_semicolon { | ||||
10392 | $last_added_semicolon_at = $input_line_number; | ||||
10393 | if ( $added_semicolon_count == 0 ) { | ||||
10394 | $first_added_semicolon_at = $last_added_semicolon_at; | ||||
10395 | } | ||||
10396 | $added_semicolon_count++; | ||||
10397 | write_logfile_entry("Added ';' here\n"); | ||||
10398 | } | ||||
10399 | |||||
10400 | sub note_deleted_semicolon { | ||||
10401 | $last_deleted_semicolon_at = $input_line_number; | ||||
10402 | if ( $deleted_semicolon_count == 0 ) { | ||||
10403 | $first_deleted_semicolon_at = $last_deleted_semicolon_at; | ||||
10404 | } | ||||
10405 | $deleted_semicolon_count++; | ||||
10406 | write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) | ||||
10407 | } | ||||
10408 | |||||
10409 | sub note_embedded_tab { | ||||
10410 | $embedded_tab_count++; | ||||
10411 | $last_embedded_tab_at = $input_line_number; | ||||
10412 | if ( !$first_embedded_tab_at ) { | ||||
10413 | $first_embedded_tab_at = $last_embedded_tab_at; | ||||
10414 | } | ||||
10415 | |||||
10416 | if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { | ||||
10417 | write_logfile_entry("Embedded tabs in quote or pattern\n"); | ||||
10418 | } | ||||
10419 | } | ||||
10420 | |||||
10421 | sub starting_one_line_block { | ||||
10422 | |||||
10423 | # after seeing an opening curly brace, look for the closing brace | ||||
10424 | # and see if the entire block will fit on a line. This routine is | ||||
10425 | # not always right because it uses the old whitespace, so a check | ||||
10426 | # is made later (at the closing brace) to make sure we really | ||||
10427 | # have a one-line block. We have to do this preliminary check, | ||||
10428 | # though, because otherwise we would always break at a semicolon | ||||
10429 | # within a one-line block if the block contains multiple statements. | ||||
10430 | |||||
10431 | my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, | ||||
10432 | $rblock_type ) | ||||
10433 | = @_; | ||||
10434 | |||||
10435 | # kill any current block - we can only go 1 deep | ||||
10436 | destroy_one_line_block(); | ||||
10437 | |||||
10438 | # return value: | ||||
10439 | # 1=distance from start of block to opening brace exceeds line length | ||||
10440 | # 0=otherwise | ||||
10441 | |||||
10442 | my $i_start = 0; | ||||
10443 | |||||
10444 | # shouldn't happen: there must have been a prior call to | ||||
10445 | # store_token_to_go to put the opening brace in the output stream | ||||
10446 | if ( $max_index_to_go < 0 ) { | ||||
10447 | warning("program bug: store_token_to_go called incorrectly\n"); | ||||
10448 | report_definite_bug(); | ||||
10449 | } | ||||
10450 | else { | ||||
10451 | |||||
10452 | # cannot use one-line blocks with cuddled else/elsif lines | ||||
10453 | if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { | ||||
10454 | return 0; | ||||
10455 | } | ||||
10456 | } | ||||
10457 | |||||
10458 | my $block_type = $$rblock_type[$j]; | ||||
10459 | |||||
10460 | # find the starting keyword for this block (such as 'if', 'else', ...) | ||||
10461 | |||||
10462 | if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { | ||||
10463 | $i_start = $max_index_to_go; | ||||
10464 | } | ||||
10465 | |||||
10466 | elsif ( $last_last_nonblank_token_to_go eq ')' ) { | ||||
10467 | |||||
10468 | # For something like "if (xxx) {", the keyword "if" will be | ||||
10469 | # just after the most recent break. This will be 0 unless | ||||
10470 | # we have just killed a one-line block and are starting another. | ||||
10471 | # (doif.t) | ||||
10472 | # Note: cannot use inext_index_to_go[] here because that array | ||||
10473 | # is still being constructed. | ||||
10474 | $i_start = $index_max_forced_break + 1; | ||||
10475 | if ( $types_to_go[$i_start] eq 'b' ) { | ||||
10476 | $i_start++; | ||||
10477 | } | ||||
10478 | |||||
10479 | unless ( $tokens_to_go[$i_start] eq $block_type ) { | ||||
10480 | return 0; | ||||
10481 | } | ||||
10482 | } | ||||
10483 | |||||
10484 | # the previous nonblank token should start these block types | ||||
10485 | elsif (( $last_last_nonblank_token_to_go eq $block_type ) | ||||
10486 | || ( $block_type =~ /^sub/ ) ) | ||||
10487 | { | ||||
10488 | $i_start = $last_last_nonblank_index_to_go; | ||||
10489 | } | ||||
10490 | |||||
10491 | # patch for SWITCH/CASE to retain one-line case/when blocks | ||||
10492 | elsif ( $block_type eq 'case' || $block_type eq 'when' ) { | ||||
10493 | |||||
10494 | # Note: cannot use inext_index_to_go[] here because that array | ||||
10495 | # is still being constructed. | ||||
10496 | $i_start = $index_max_forced_break + 1; | ||||
10497 | if ( $types_to_go[$i_start] eq 'b' ) { | ||||
10498 | $i_start++; | ||||
10499 | } | ||||
10500 | unless ( $tokens_to_go[$i_start] eq $block_type ) { | ||||
10501 | return 0; | ||||
10502 | } | ||||
10503 | } | ||||
10504 | |||||
10505 | else { | ||||
10506 | return 1; | ||||
10507 | } | ||||
10508 | |||||
10509 | my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; | ||||
10510 | |||||
10511 | my $i; | ||||
10512 | |||||
10513 | # see if length is too long to even start | ||||
10514 | if ( $pos > maximum_line_length($i_start) ) { | ||||
10515 | return 1; | ||||
10516 | } | ||||
10517 | |||||
10518 | for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { | ||||
10519 | |||||
10520 | # old whitespace could be arbitrarily large, so don't use it | ||||
10521 | if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } | ||||
10522 | else { $pos += rtoken_length($i) } | ||||
10523 | |||||
10524 | # Return false result if we exceed the maximum line length, | ||||
10525 | if ( $pos > maximum_line_length($i_start) ) { | ||||
10526 | return 0; | ||||
10527 | } | ||||
10528 | |||||
10529 | # or encounter another opening brace before finding the closing brace. | ||||
10530 | elsif ($$rtokens[$i] eq '{' | ||||
10531 | && $$rtoken_type[$i] eq '{' | ||||
10532 | && $$rblock_type[$i] ) | ||||
10533 | { | ||||
10534 | return 0; | ||||
10535 | } | ||||
10536 | |||||
10537 | # if we find our closing brace.. | ||||
10538 | elsif ($$rtokens[$i] eq '}' | ||||
10539 | && $$rtoken_type[$i] eq '}' | ||||
10540 | && $$rblock_type[$i] ) | ||||
10541 | { | ||||
10542 | |||||
10543 | # be sure any trailing comment also fits on the line | ||||
10544 | my $i_nonblank = | ||||
10545 | ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; | ||||
10546 | |||||
10547 | # Patch for one-line sort/map/grep/eval blocks with side comments: | ||||
10548 | # We will ignore the side comment length for sort/map/grep/eval | ||||
10549 | # because this can lead to statements which change every time | ||||
10550 | # perltidy is run. Here is an example from Denis Moskowitz which | ||||
10551 | # oscillates between these two states without this patch: | ||||
10552 | |||||
10553 | ## -------- | ||||
10554 | ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf | ||||
10555 | ## @baz; | ||||
10556 | ## | ||||
10557 | ## grep { | ||||
10558 | ## $_->foo ne 'bar' | ||||
10559 | ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf | ||||
10560 | ## @baz; | ||||
10561 | ## -------- | ||||
10562 | |||||
10563 | # When the first line is input it gets broken apart by the main | ||||
10564 | # line break logic in sub print_line_of_tokens. | ||||
10565 | # When the second line is input it gets recombined by | ||||
10566 | # print_line_of_tokens and passed to the output routines. The | ||||
10567 | # output routines (set_continuation_breaks) do not break it apart | ||||
10568 | # because the bond strengths are set to the highest possible value | ||||
10569 | # for grep/map/eval/sort blocks, so the first version gets output. | ||||
10570 | # It would be possible to fix this by changing bond strengths, | ||||
10571 | # but they are high to prevent errors in older versions of perl. | ||||
10572 | |||||
10573 | if ( $$rtoken_type[$i_nonblank] eq '#' | ||||
10574 | && !$is_sort_map_grep{$block_type} ) | ||||
10575 | { | ||||
10576 | |||||
10577 | $pos += rtoken_length($i_nonblank); | ||||
10578 | |||||
10579 | if ( $i_nonblank > $i + 1 ) { | ||||
10580 | |||||
10581 | # source whitespace could be anything, assume | ||||
10582 | # at least one space before the hash on output | ||||
10583 | if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 } | ||||
10584 | else { $pos += rtoken_length( $i + 1 ) } | ||||
10585 | } | ||||
10586 | |||||
10587 | if ( $pos >= maximum_line_length($i_start) ) { | ||||
10588 | return 0; | ||||
10589 | } | ||||
10590 | } | ||||
10591 | |||||
10592 | # ok, it's a one-line block | ||||
10593 | create_one_line_block( $i_start, 20 ); | ||||
10594 | return 0; | ||||
10595 | } | ||||
10596 | |||||
10597 | # just keep going for other characters | ||||
10598 | else { | ||||
10599 | } | ||||
10600 | } | ||||
10601 | |||||
10602 | # Allow certain types of new one-line blocks to form by joining | ||||
10603 | # input lines. These can be safely done, but for other block types, | ||||
10604 | # we keep old one-line blocks but do not form new ones. It is not | ||||
10605 | # always a good idea to make as many one-line blocks as possible, | ||||
10606 | # so other types are not done. The user can always use -mangle. | ||||
10607 | if ( $is_sort_map_grep_eval{$block_type} ) { | ||||
10608 | create_one_line_block( $i_start, 1 ); | ||||
10609 | } | ||||
10610 | |||||
10611 | return 0; | ||||
10612 | } | ||||
10613 | |||||
10614 | sub unstore_token_to_go { | ||||
10615 | |||||
10616 | # remove most recent token from output stream | ||||
10617 | if ( $max_index_to_go > 0 ) { | ||||
10618 | $max_index_to_go--; | ||||
10619 | } | ||||
10620 | else { | ||||
10621 | $max_index_to_go = UNDEFINED_INDEX; | ||||
10622 | } | ||||
10623 | |||||
10624 | } | ||||
10625 | |||||
10626 | sub want_blank_line { | ||||
10627 | flush(); | ||||
10628 | $file_writer_object->want_blank_line(); | ||||
10629 | } | ||||
10630 | |||||
10631 | sub write_unindented_line { | ||||
10632 | flush(); | ||||
10633 | $file_writer_object->write_line( $_[0] ); | ||||
10634 | } | ||||
10635 | |||||
10636 | sub undo_ci { | ||||
10637 | |||||
10638 | # Undo continuation indentation in certain sequences | ||||
10639 | # For example, we can undo continuation indentation in sort/map/grep chains | ||||
10640 | # my $dat1 = pack( "n*", | ||||
10641 | # map { $_, $lookup->{$_} } | ||||
10642 | # sort { $a <=> $b } | ||||
10643 | # grep { $lookup->{$_} ne $default } keys %$lookup ); | ||||
10644 | # To align the map/sort/grep keywords like this: | ||||
10645 | # my $dat1 = pack( "n*", | ||||
10646 | # map { $_, $lookup->{$_} } | ||||
10647 | # sort { $a <=> $b } | ||||
10648 | # grep { $lookup->{$_} ne $default } keys %$lookup ); | ||||
10649 | my ( $ri_first, $ri_last ) = @_; | ||||
10650 | my ( $line_1, $line_2, $lev_last ); | ||||
10651 | my $this_line_is_semicolon_terminated; | ||||
10652 | my $max_line = @$ri_first - 1; | ||||
10653 | |||||
10654 | # looking at each line of this batch.. | ||||
10655 | # We are looking at leading tokens and looking for a sequence | ||||
10656 | # all at the same level and higher level than enclosing lines. | ||||
10657 | foreach my $line ( 0 .. $max_line ) { | ||||
10658 | |||||
10659 | my $ibeg = $$ri_first[$line]; | ||||
10660 | my $lev = $levels_to_go[$ibeg]; | ||||
10661 | if ( $line > 0 ) { | ||||
10662 | |||||
10663 | # if we have started a chain.. | ||||
10664 | if ($line_1) { | ||||
10665 | |||||
10666 | # see if it continues.. | ||||
10667 | if ( $lev == $lev_last ) { | ||||
10668 | if ( $types_to_go[$ibeg] eq 'k' | ||||
10669 | && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) | ||||
10670 | { | ||||
10671 | |||||
10672 | # chain continues... | ||||
10673 | # check for chain ending at end of a statement | ||||
10674 | if ( $line == $max_line ) { | ||||
10675 | |||||
10676 | # see of this line ends a statement | ||||
10677 | my $iend = $$ri_last[$line]; | ||||
10678 | $this_line_is_semicolon_terminated = | ||||
10679 | $types_to_go[$iend] eq ';' | ||||
10680 | |||||
10681 | # with possible side comment | ||||
10682 | || ( $types_to_go[$iend] eq '#' | ||||
10683 | && $iend - $ibeg >= 2 | ||||
10684 | && $types_to_go[ $iend - 2 ] eq ';' | ||||
10685 | && $types_to_go[ $iend - 1 ] eq 'b' ); | ||||
10686 | } | ||||
10687 | $line_2 = $line if ($this_line_is_semicolon_terminated); | ||||
10688 | } | ||||
10689 | else { | ||||
10690 | |||||
10691 | # kill chain | ||||
10692 | $line_1 = undef; | ||||
10693 | } | ||||
10694 | } | ||||
10695 | elsif ( $lev < $lev_last ) { | ||||
10696 | |||||
10697 | # chain ends with previous line | ||||
10698 | $line_2 = $line - 1; | ||||
10699 | } | ||||
10700 | elsif ( $lev > $lev_last ) { | ||||
10701 | |||||
10702 | # kill chain | ||||
10703 | $line_1 = undef; | ||||
10704 | } | ||||
10705 | |||||
10706 | # undo the continuation indentation if a chain ends | ||||
10707 | if ( defined($line_2) && defined($line_1) ) { | ||||
10708 | my $continuation_line_count = $line_2 - $line_1 + 1; | ||||
10709 | @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = | ||||
10710 | (0) x ($continuation_line_count); | ||||
10711 | @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = | ||||
10712 | @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ]; | ||||
10713 | $line_1 = undef; | ||||
10714 | } | ||||
10715 | } | ||||
10716 | |||||
10717 | # not in a chain yet.. | ||||
10718 | else { | ||||
10719 | |||||
10720 | # look for start of a new sort/map/grep chain | ||||
10721 | if ( $lev > $lev_last ) { | ||||
10722 | if ( $types_to_go[$ibeg] eq 'k' | ||||
10723 | && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) | ||||
10724 | { | ||||
10725 | $line_1 = $line; | ||||
10726 | } | ||||
10727 | } | ||||
10728 | } | ||||
10729 | } | ||||
10730 | $lev_last = $lev; | ||||
10731 | } | ||||
10732 | } | ||||
10733 | |||||
10734 | sub undo_lp_ci { | ||||
10735 | |||||
10736 | # If there is a single, long parameter within parens, like this: | ||||
10737 | # | ||||
10738 | # $self->command( "/msg " | ||||
10739 | # . $infoline->chan | ||||
10740 | # . " You said $1, but did you know that it's square was " | ||||
10741 | # . $1 * $1 . " ?" ); | ||||
10742 | # | ||||
10743 | # we can remove the continuation indentation of the 2nd and higher lines | ||||
10744 | # to achieve this effect, which is more pleasing: | ||||
10745 | # | ||||
10746 | # $self->command("/msg " | ||||
10747 | # . $infoline->chan | ||||
10748 | # . " You said $1, but did you know that it's square was " | ||||
10749 | # . $1 * $1 . " ?"); | ||||
10750 | |||||
10751 | my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; | ||||
10752 | my $max_line = @$ri_first - 1; | ||||
10753 | |||||
10754 | # must be multiple lines | ||||
10755 | return unless $max_line > $line_open; | ||||
10756 | |||||
10757 | my $lev_start = $levels_to_go[$i_start]; | ||||
10758 | my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; | ||||
10759 | |||||
10760 | # see if all additional lines in this container have continuation | ||||
10761 | # indentation | ||||
10762 | my $n; | ||||
10763 | my $line_1 = 1 + $line_open; | ||||
10764 | for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { | ||||
10765 | my $ibeg = $$ri_first[$n]; | ||||
10766 | my $iend = $$ri_last[$n]; | ||||
10767 | if ( $ibeg eq $closing_index ) { $n--; last } | ||||
10768 | return if ( $lev_start != $levels_to_go[$ibeg] ); | ||||
10769 | return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); | ||||
10770 | last if ( $closing_index <= $iend ); | ||||
10771 | } | ||||
10772 | |||||
10773 | # we can reduce the indentation of all continuation lines | ||||
10774 | my $continuation_line_count = $n - $line_open; | ||||
10775 | @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = | ||||
10776 | (0) x ($continuation_line_count); | ||||
10777 | @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = | ||||
10778 | @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; | ||||
10779 | } | ||||
10780 | |||||
10781 | sub pad_token { | ||||
10782 | |||||
10783 | # insert $pad_spaces before token number $ipad | ||||
10784 | my ( $ipad, $pad_spaces ) = @_; | ||||
10785 | if ( $pad_spaces > 0 ) { | ||||
10786 | $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; | ||||
10787 | } | ||||
10788 | elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { | ||||
10789 | $tokens_to_go[$ipad] = ""; | ||||
10790 | } | ||||
10791 | else { | ||||
10792 | |||||
10793 | # shouldn't happen | ||||
10794 | return; | ||||
10795 | } | ||||
10796 | |||||
10797 | $token_lengths_to_go[$ipad] += $pad_spaces; | ||||
10798 | for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) { | ||||
10799 | $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; | ||||
10800 | } | ||||
10801 | } | ||||
10802 | |||||
10803 | { | ||||
10804 | 2 | 700ns | my %is_math_op; | ||
10805 | |||||
10806 | # spent 8µs within Perl::Tidy::Formatter::BEGIN@10806 which was called:
# once (8µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 10810 | ||||
10807 | |||||
10808 | 1 | 1µs | @_ = qw( + - * / ); | ||
10809 | 1 | 8µs | @is_math_op{@_} = (1) x scalar(@_); | ||
10810 | 1 | 1.57ms | 1 | 8µs | } # spent 8µs making 1 call to Perl::Tidy::Formatter::BEGIN@10806 |
10811 | |||||
10812 | sub set_logical_padding { | ||||
10813 | |||||
10814 | # Look at a batch of lines and see if extra padding can improve the | ||||
10815 | # alignment when there are certain leading operators. Here is an | ||||
10816 | # example, in which some extra space is introduced before | ||||
10817 | # '( $year' to make it line up with the subsequent lines: | ||||
10818 | # | ||||
10819 | # if ( ( $Year < 1601 ) | ||||
10820 | # || ( $Year > 2899 ) | ||||
10821 | # || ( $EndYear < 1601 ) | ||||
10822 | # || ( $EndYear > 2899 ) ) | ||||
10823 | # { | ||||
10824 | # &Error_OutOfRange; | ||||
10825 | # } | ||||
10826 | # | ||||
10827 | my ( $ri_first, $ri_last ) = @_; | ||||
10828 | my $max_line = @$ri_first - 1; | ||||
10829 | |||||
10830 | my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, | ||||
10831 | $pad_spaces, | ||||
10832 | $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); | ||||
10833 | |||||
10834 | # looking at each line of this batch.. | ||||
10835 | foreach $line ( 0 .. $max_line - 1 ) { | ||||
10836 | |||||
10837 | # see if the next line begins with a logical operator | ||||
10838 | $ibeg = $$ri_first[$line]; | ||||
10839 | $iend = $$ri_last[$line]; | ||||
10840 | $ibeg_next = $$ri_first[ $line + 1 ]; | ||||
10841 | $tok_next = $tokens_to_go[$ibeg_next]; | ||||
10842 | $type_next = $types_to_go[$ibeg_next]; | ||||
10843 | |||||
10844 | $has_leading_op_next = ( $tok_next =~ /^\w/ ) | ||||
10845 | ? $is_chain_operator{$tok_next} # + - * / : ? && || | ||||
10846 | : $is_chain_operator{$type_next}; # and, or | ||||
10847 | |||||
10848 | next unless ($has_leading_op_next); | ||||
10849 | |||||
10850 | # next line must not be at lesser depth | ||||
10851 | next | ||||
10852 | if ( $nesting_depth_to_go[$ibeg] > | ||||
10853 | $nesting_depth_to_go[$ibeg_next] ); | ||||
10854 | |||||
10855 | # identify the token in this line to be padded on the left | ||||
10856 | $ipad = undef; | ||||
10857 | |||||
10858 | # handle lines at same depth... | ||||
10859 | if ( $nesting_depth_to_go[$ibeg] == | ||||
10860 | $nesting_depth_to_go[$ibeg_next] ) | ||||
10861 | { | ||||
10862 | |||||
10863 | # if this is not first line of the batch ... | ||||
10864 | if ( $line > 0 ) { | ||||
10865 | |||||
10866 | # and we have leading operator.. | ||||
10867 | next if $has_leading_op; | ||||
10868 | |||||
10869 | # Introduce padding if.. | ||||
10870 | # 1. the previous line is at lesser depth, or | ||||
10871 | # 2. the previous line ends in an assignment | ||||
10872 | # 3. the previous line ends in a 'return' | ||||
10873 | # 4. the previous line ends in a comma | ||||
10874 | # Example 1: previous line at lesser depth | ||||
10875 | # if ( ( $Year < 1601 ) # <- we are here but | ||||
10876 | # || ( $Year > 2899 ) # list has not yet | ||||
10877 | # || ( $EndYear < 1601 ) # collapsed vertically | ||||
10878 | # || ( $EndYear > 2899 ) ) | ||||
10879 | # { | ||||
10880 | # | ||||
10881 | # Example 2: previous line ending in assignment: | ||||
10882 | # $leapyear = | ||||
10883 | # $year % 4 ? 0 # <- We are here | ||||
10884 | # : $year % 100 ? 1 | ||||
10885 | # : $year % 400 ? 0 | ||||
10886 | # : 1; | ||||
10887 | # | ||||
10888 | # Example 3: previous line ending in comma: | ||||
10889 | # push @expr, | ||||
10890 | # /test/ ? undef | ||||
10891 | # : eval($_) ? 1 | ||||
10892 | # : eval($_) ? 1 | ||||
10893 | # : 0; | ||||
10894 | |||||
10895 | # be sure levels agree (do not indent after an indented 'if') | ||||
10896 | next | ||||
10897 | if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); | ||||
10898 | |||||
10899 | # allow padding on first line after a comma but only if: | ||||
10900 | # (1) this is line 2 and | ||||
10901 | # (2) there are at more than three lines and | ||||
10902 | # (3) lines 3 and 4 have the same leading operator | ||||
10903 | # These rules try to prevent padding within a long | ||||
10904 | # comma-separated list. | ||||
10905 | my $ok_comma; | ||||
10906 | if ( $types_to_go[$iendm] eq ',' | ||||
10907 | && $line == 1 | ||||
10908 | && $max_line > 2 ) | ||||
10909 | { | ||||
10910 | my $ibeg_next_next = $$ri_first[ $line + 2 ]; | ||||
10911 | my $tok_next_next = $tokens_to_go[$ibeg_next_next]; | ||||
10912 | $ok_comma = $tok_next_next eq $tok_next; | ||||
10913 | } | ||||
10914 | |||||
10915 | next | ||||
10916 | unless ( | ||||
10917 | $is_assignment{ $types_to_go[$iendm] } | ||||
10918 | || $ok_comma | ||||
10919 | || ( $nesting_depth_to_go[$ibegm] < | ||||
10920 | $nesting_depth_to_go[$ibeg] ) | ||||
10921 | || ( $types_to_go[$iendm] eq 'k' | ||||
10922 | && $tokens_to_go[$iendm] eq 'return' ) | ||||
10923 | ); | ||||
10924 | |||||
10925 | # we will add padding before the first token | ||||
10926 | $ipad = $ibeg; | ||||
10927 | } | ||||
10928 | |||||
10929 | # for first line of the batch.. | ||||
10930 | else { | ||||
10931 | |||||
10932 | # WARNING: Never indent if first line is starting in a | ||||
10933 | # continued quote, which would change the quote. | ||||
10934 | next if $starting_in_quote; | ||||
10935 | |||||
10936 | # if this is text after closing '}' | ||||
10937 | # then look for an interior token to pad | ||||
10938 | if ( $types_to_go[$ibeg] eq '}' ) { | ||||
10939 | |||||
10940 | } | ||||
10941 | |||||
10942 | # otherwise, we might pad if it looks really good | ||||
10943 | else { | ||||
10944 | |||||
10945 | # we might pad token $ibeg, so be sure that it | ||||
10946 | # is at the same depth as the next line. | ||||
10947 | next | ||||
10948 | if ( $nesting_depth_to_go[$ibeg] != | ||||
10949 | $nesting_depth_to_go[$ibeg_next] ); | ||||
10950 | |||||
10951 | # We can pad on line 1 of a statement if at least 3 | ||||
10952 | # lines will be aligned. Otherwise, it | ||||
10953 | # can look very confusing. | ||||
10954 | |||||
10955 | # We have to be careful not to pad if there are too few | ||||
10956 | # lines. The current rule is: | ||||
10957 | # (1) in general we require at least 3 consecutive lines | ||||
10958 | # with the same leading chain operator token, | ||||
10959 | # (2) but an exception is that we only require two lines | ||||
10960 | # with leading colons if there are no more lines. For example, | ||||
10961 | # the first $i in the following snippet would get padding | ||||
10962 | # by the second rule: | ||||
10963 | # | ||||
10964 | # $i == 1 ? ( "First", "Color" ) | ||||
10965 | # : $i == 2 ? ( "Then", "Rarity" ) | ||||
10966 | # : ( "Then", "Name" ); | ||||
10967 | |||||
10968 | if ( $max_line > 1 ) { | ||||
10969 | my $leading_token = $tokens_to_go[$ibeg_next]; | ||||
10970 | my $tokens_differ; | ||||
10971 | |||||
10972 | # never indent line 1 of a '.' series because | ||||
10973 | # previous line is most likely at same level. | ||||
10974 | # TODO: we should also look at the leasing_spaces | ||||
10975 | # of the last output line and skip if it is same | ||||
10976 | # as this line. | ||||
10977 | next if ( $leading_token eq '.' ); | ||||
10978 | |||||
10979 | my $count = 1; | ||||
10980 | foreach my $l ( 2 .. 3 ) { | ||||
10981 | last if ( $line + $l > $max_line ); | ||||
10982 | my $ibeg_next_next = $$ri_first[ $line + $l ]; | ||||
10983 | if ( $tokens_to_go[$ibeg_next_next] ne | ||||
10984 | $leading_token ) | ||||
10985 | { | ||||
10986 | $tokens_differ = 1; | ||||
10987 | last; | ||||
10988 | } | ||||
10989 | $count++; | ||||
10990 | } | ||||
10991 | next if ($tokens_differ); | ||||
10992 | next if ( $count < 3 && $leading_token ne ':' ); | ||||
10993 | $ipad = $ibeg; | ||||
10994 | } | ||||
10995 | else { | ||||
10996 | next; | ||||
10997 | } | ||||
10998 | } | ||||
10999 | } | ||||
11000 | } | ||||
11001 | |||||
11002 | # find interior token to pad if necessary | ||||
11003 | if ( !defined($ipad) ) { | ||||
11004 | |||||
11005 | for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { | ||||
11006 | |||||
11007 | # find any unclosed container | ||||
11008 | next | ||||
11009 | unless ( $type_sequence_to_go[$i] | ||||
11010 | && $mate_index_to_go[$i] > $iend ); | ||||
11011 | |||||
11012 | # find next nonblank token to pad | ||||
11013 | $ipad = $inext_to_go[$i]; | ||||
11014 | last if ( $ipad > $iend ); | ||||
11015 | } | ||||
11016 | last unless $ipad; | ||||
11017 | } | ||||
11018 | |||||
11019 | # We cannot pad a leading token at the lowest level because | ||||
11020 | # it could cause a bug in which the starting indentation | ||||
11021 | # level is guessed incorrectly each time the code is run | ||||
11022 | # though perltidy, thus causing the code to march off to | ||||
11023 | # the right. For example, the following snippet would have | ||||
11024 | # this problem: | ||||
11025 | |||||
11026 | ## ov_method mycan( $package, '(""' ), $package | ||||
11027 | ## or ov_method mycan( $package, '(0+' ), $package | ||||
11028 | ## or ov_method mycan( $package, '(bool' ), $package | ||||
11029 | ## or ov_method mycan( $package, '(nomethod' ), $package; | ||||
11030 | |||||
11031 | # If this snippet is within a block this won't happen | ||||
11032 | # unless the user just processes the snippet alone within | ||||
11033 | # an editor. In that case either the user will see and | ||||
11034 | # fix the problem or it will be corrected next time the | ||||
11035 | # entire file is processed with perltidy. | ||||
11036 | next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); | ||||
11037 | |||||
11038 | ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT | ||||
11039 | ## IT DID MORE HARM THAN GOOD | ||||
11040 | ## ceil( | ||||
11041 | ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 | ||||
11042 | ## / $upem | ||||
11043 | ## ), | ||||
11044 | ##? # do not put leading padding for just 2 lines of math | ||||
11045 | ##? if ( $ipad == $ibeg | ||||
11046 | ##? && $line > 0 | ||||
11047 | ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] | ||||
11048 | ##? && $is_math_op{$type_next} | ||||
11049 | ##? && $line + 2 <= $max_line ) | ||||
11050 | ##? { | ||||
11051 | ##? my $ibeg_next_next = $$ri_first[ $line + 2 ]; | ||||
11052 | ##? my $type_next_next = $types_to_go[$ibeg_next_next]; | ||||
11053 | ##? next if !$is_math_op{$type_next_next}; | ||||
11054 | ##? } | ||||
11055 | |||||
11056 | # next line must not be at greater depth | ||||
11057 | my $iend_next = $$ri_last[ $line + 1 ]; | ||||
11058 | next | ||||
11059 | if ( $nesting_depth_to_go[ $iend_next + 1 ] > | ||||
11060 | $nesting_depth_to_go[$ipad] ); | ||||
11061 | |||||
11062 | # lines must be somewhat similar to be padded.. | ||||
11063 | my $inext_next = $inext_to_go[$ibeg_next]; | ||||
11064 | my $type = $types_to_go[$ipad]; | ||||
11065 | my $type_next = $types_to_go[ $ipad + 1 ]; | ||||
11066 | |||||
11067 | # see if there are multiple continuation lines | ||||
11068 | my $logical_continuation_lines = 1; | ||||
11069 | if ( $line + 2 <= $max_line ) { | ||||
11070 | my $leading_token = $tokens_to_go[$ibeg_next]; | ||||
11071 | my $ibeg_next_next = $$ri_first[ $line + 2 ]; | ||||
11072 | if ( $tokens_to_go[$ibeg_next_next] eq $leading_token | ||||
11073 | && $nesting_depth_to_go[$ibeg_next] eq | ||||
11074 | $nesting_depth_to_go[$ibeg_next_next] ) | ||||
11075 | { | ||||
11076 | $logical_continuation_lines++; | ||||
11077 | } | ||||
11078 | } | ||||
11079 | |||||
11080 | # see if leading types match | ||||
11081 | my $types_match = $types_to_go[$inext_next] eq $type; | ||||
11082 | my $matches_without_bang; | ||||
11083 | |||||
11084 | # if first line has leading ! then compare the following token | ||||
11085 | if ( !$types_match && $type eq '!' ) { | ||||
11086 | $types_match = $matches_without_bang = | ||||
11087 | $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; | ||||
11088 | } | ||||
11089 | |||||
11090 | if ( | ||||
11091 | |||||
11092 | # either we have multiple continuation lines to follow | ||||
11093 | # and we are not padding the first token | ||||
11094 | ( $logical_continuation_lines > 1 && $ipad > 0 ) | ||||
11095 | |||||
11096 | # or.. | ||||
11097 | || ( | ||||
11098 | |||||
11099 | # types must match | ||||
11100 | $types_match | ||||
11101 | |||||
11102 | # and keywords must match if keyword | ||||
11103 | && !( | ||||
11104 | $type eq 'k' | ||||
11105 | && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] | ||||
11106 | ) | ||||
11107 | ) | ||||
11108 | ) | ||||
11109 | { | ||||
11110 | |||||
11111 | #----------------------begin special checks-------------- | ||||
11112 | # | ||||
11113 | # SPECIAL CHECK 1: | ||||
11114 | # A check is needed before we can make the pad. | ||||
11115 | # If we are in a list with some long items, we want each | ||||
11116 | # item to stand out. So in the following example, the | ||||
11117 | # first line beginning with '$casefold->' would look good | ||||
11118 | # padded to align with the next line, but then it | ||||
11119 | # would be indented more than the last line, so we | ||||
11120 | # won't do it. | ||||
11121 | # | ||||
11122 | # ok( | ||||
11123 | # $casefold->{code} eq '0041' | ||||
11124 | # && $casefold->{status} eq 'C' | ||||
11125 | # && $casefold->{mapping} eq '0061', | ||||
11126 | # 'casefold 0x41' | ||||
11127 | # ); | ||||
11128 | # | ||||
11129 | # Note: | ||||
11130 | # It would be faster, and almost as good, to use a comma | ||||
11131 | # count, and not pad if comma_count > 1 and the previous | ||||
11132 | # line did not end with a comma. | ||||
11133 | # | ||||
11134 | my $ok_to_pad = 1; | ||||
11135 | |||||
11136 | my $ibg = $$ri_first[ $line + 1 ]; | ||||
11137 | my $depth = $nesting_depth_to_go[ $ibg + 1 ]; | ||||
11138 | |||||
11139 | # just use simplified formula for leading spaces to avoid | ||||
11140 | # needless sub calls | ||||
11141 | my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; | ||||
11142 | |||||
11143 | # look at each line beyond the next .. | ||||
11144 | my $l = $line + 1; | ||||
11145 | foreach $l ( $line + 2 .. $max_line ) { | ||||
11146 | my $ibg = $$ri_first[$l]; | ||||
11147 | |||||
11148 | # quit looking at the end of this container | ||||
11149 | last | ||||
11150 | if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) | ||||
11151 | || ( $nesting_depth_to_go[$ibg] < $depth ); | ||||
11152 | |||||
11153 | # cannot do the pad if a later line would be | ||||
11154 | # outdented more | ||||
11155 | if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { | ||||
11156 | $ok_to_pad = 0; | ||||
11157 | last; | ||||
11158 | } | ||||
11159 | } | ||||
11160 | |||||
11161 | # don't pad if we end in a broken list | ||||
11162 | if ( $l == $max_line ) { | ||||
11163 | my $i2 = $$ri_last[$l]; | ||||
11164 | if ( $types_to_go[$i2] eq '#' ) { | ||||
11165 | my $i1 = $$ri_first[$l]; | ||||
11166 | next | ||||
11167 | if ( | ||||
11168 | terminal_type( \@types_to_go, \@block_type_to_go, | ||||
11169 | $i1, $i2 ) eq ',' | ||||
11170 | ); | ||||
11171 | } | ||||
11172 | } | ||||
11173 | |||||
11174 | # SPECIAL CHECK 2: | ||||
11175 | # a minus may introduce a quoted variable, and we will | ||||
11176 | # add the pad only if this line begins with a bare word, | ||||
11177 | # such as for the word 'Button' here: | ||||
11178 | # [ | ||||
11179 | # Button => "Print letter \"~$_\"", | ||||
11180 | # -command => [ sub { print "$_[0]\n" }, $_ ], | ||||
11181 | # -accelerator => "Meta+$_" | ||||
11182 | # ]; | ||||
11183 | # | ||||
11184 | # On the other hand, if 'Button' is quoted, it looks best | ||||
11185 | # not to pad: | ||||
11186 | # [ | ||||
11187 | # 'Button' => "Print letter \"~$_\"", | ||||
11188 | # -command => [ sub { print "$_[0]\n" }, $_ ], | ||||
11189 | # -accelerator => "Meta+$_" | ||||
11190 | # ]; | ||||
11191 | if ( $types_to_go[$ibeg_next] eq 'm' ) { | ||||
11192 | $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; | ||||
11193 | } | ||||
11194 | |||||
11195 | next unless $ok_to_pad; | ||||
11196 | |||||
11197 | #----------------------end special check--------------- | ||||
11198 | |||||
11199 | my $length_1 = total_line_length( $ibeg, $ipad - 1 ); | ||||
11200 | my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); | ||||
11201 | $pad_spaces = $length_2 - $length_1; | ||||
11202 | |||||
11203 | # If the first line has a leading ! and the second does | ||||
11204 | # not, then remove one space to try to align the next | ||||
11205 | # leading characters, which are often the same. For example: | ||||
11206 | # if ( !$ts | ||||
11207 | # || $ts == $self->Holder | ||||
11208 | # || $self->Holder->Type eq "Arena" ) | ||||
11209 | # | ||||
11210 | # This usually helps readability, but if there are subsequent | ||||
11211 | # ! operators things will still get messed up. For example: | ||||
11212 | # | ||||
11213 | # if ( !exists $Net::DNS::typesbyname{$qtype} | ||||
11214 | # && exists $Net::DNS::classesbyname{$qtype} | ||||
11215 | # && !exists $Net::DNS::classesbyname{$qclass} | ||||
11216 | # && exists $Net::DNS::typesbyname{$qclass} ) | ||||
11217 | # We can't fix that. | ||||
11218 | if ($matches_without_bang) { $pad_spaces-- } | ||||
11219 | |||||
11220 | # make sure this won't change if -lp is used | ||||
11221 | my $indentation_1 = $leading_spaces_to_go[$ibeg]; | ||||
11222 | if ( ref($indentation_1) ) { | ||||
11223 | if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { | ||||
11224 | my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; | ||||
11225 | unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) | ||||
11226 | { | ||||
11227 | $pad_spaces = 0; | ||||
11228 | } | ||||
11229 | } | ||||
11230 | } | ||||
11231 | |||||
11232 | # we might be able to handle a pad of -1 by removing a blank | ||||
11233 | # token | ||||
11234 | if ( $pad_spaces < 0 ) { | ||||
11235 | |||||
11236 | if ( $pad_spaces == -1 ) { | ||||
11237 | if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) | ||||
11238 | { | ||||
11239 | pad_token( $ipad - 1, $pad_spaces ); | ||||
11240 | } | ||||
11241 | } | ||||
11242 | $pad_spaces = 0; | ||||
11243 | } | ||||
11244 | |||||
11245 | # now apply any padding for alignment | ||||
11246 | if ( $ipad >= 0 && $pad_spaces ) { | ||||
11247 | |||||
11248 | my $length_t = total_line_length( $ibeg, $iend ); | ||||
11249 | if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) | ||||
11250 | { | ||||
11251 | pad_token( $ipad, $pad_spaces ); | ||||
11252 | } | ||||
11253 | } | ||||
11254 | } | ||||
11255 | } | ||||
11256 | continue { | ||||
11257 | $iendm = $iend; | ||||
11258 | $ibegm = $ibeg; | ||||
11259 | $has_leading_op = $has_leading_op_next; | ||||
11260 | } # end of loop over lines | ||||
11261 | return; | ||||
11262 | } | ||||
11263 | } | ||||
11264 | |||||
11265 | sub correct_lp_indentation { | ||||
11266 | |||||
11267 | # When the -lp option is used, we need to make a last pass through | ||||
11268 | # each line to correct the indentation positions in case they differ | ||||
11269 | # from the predictions. This is necessary because perltidy uses a | ||||
11270 | # predictor/corrector method for aligning with opening parens. The | ||||
11271 | # predictor is usually good, but sometimes stumbles. The corrector | ||||
11272 | # tries to patch things up once the actual opening paren locations | ||||
11273 | # are known. | ||||
11274 | my ( $ri_first, $ri_last ) = @_; | ||||
11275 | my $do_not_pad = 0; | ||||
11276 | |||||
11277 | # Note on flag '$do_not_pad': | ||||
11278 | # We want to avoid a situation like this, where the aligner inserts | ||||
11279 | # whitespace before the '=' to align it with a previous '=', because | ||||
11280 | # otherwise the parens might become mis-aligned in a situation like | ||||
11281 | # this, where the '=' has become aligned with the previous line, | ||||
11282 | # pushing the opening '(' forward beyond where we want it. | ||||
11283 | # | ||||
11284 | # $mkFloor::currentRoom = ''; | ||||
11285 | # $mkFloor::c_entry = $c->Entry( | ||||
11286 | # -width => '10', | ||||
11287 | # -relief => 'sunken', | ||||
11288 | # ... | ||||
11289 | # ); | ||||
11290 | # | ||||
11291 | # We leave it to the aligner to decide how to do this. | ||||
11292 | |||||
11293 | # first remove continuation indentation if appropriate | ||||
11294 | my $max_line = @$ri_first - 1; | ||||
11295 | |||||
11296 | # looking at each line of this batch.. | ||||
11297 | my ( $ibeg, $iend ); | ||||
11298 | my $line; | ||||
11299 | foreach $line ( 0 .. $max_line ) { | ||||
11300 | $ibeg = $$ri_first[$line]; | ||||
11301 | $iend = $$ri_last[$line]; | ||||
11302 | |||||
11303 | # looking at each token in this output line.. | ||||
11304 | my $i; | ||||
11305 | foreach $i ( $ibeg .. $iend ) { | ||||
11306 | |||||
11307 | # How many space characters to place before this token | ||||
11308 | # for special alignment. Actual padding is done in the | ||||
11309 | # continue block. | ||||
11310 | |||||
11311 | # looking for next unvisited indentation item | ||||
11312 | my $indentation = $leading_spaces_to_go[$i]; | ||||
11313 | if ( !$indentation->get_MARKED() ) { | ||||
11314 | $indentation->set_MARKED(1); | ||||
11315 | |||||
11316 | # looking for indentation item for which we are aligning | ||||
11317 | # with parens, braces, and brackets | ||||
11318 | next unless ( $indentation->get_ALIGN_PAREN() ); | ||||
11319 | |||||
11320 | # skip closed container on this line | ||||
11321 | if ( $i > $ibeg ) { | ||||
11322 | my $im = max( $ibeg, $iprev_to_go[$i] ); | ||||
11323 | if ( $type_sequence_to_go[$im] | ||||
11324 | && $mate_index_to_go[$im] <= $iend ) | ||||
11325 | { | ||||
11326 | next; | ||||
11327 | } | ||||
11328 | } | ||||
11329 | |||||
11330 | if ( $line == 1 && $i == $ibeg ) { | ||||
11331 | $do_not_pad = 1; | ||||
11332 | } | ||||
11333 | |||||
11334 | # Ok, let's see what the error is and try to fix it | ||||
11335 | my $actual_pos; | ||||
11336 | my $predicted_pos = $indentation->get_SPACES(); | ||||
11337 | if ( $i > $ibeg ) { | ||||
11338 | |||||
11339 | # token is mid-line - use length to previous token | ||||
11340 | $actual_pos = total_line_length( $ibeg, $i - 1 ); | ||||
11341 | |||||
11342 | # for mid-line token, we must check to see if all | ||||
11343 | # additional lines have continuation indentation, | ||||
11344 | # and remove it if so. Otherwise, we do not get | ||||
11345 | # good alignment. | ||||
11346 | my $closing_index = $indentation->get_CLOSED(); | ||||
11347 | if ( $closing_index > $iend ) { | ||||
11348 | my $ibeg_next = $$ri_first[ $line + 1 ]; | ||||
11349 | if ( $ci_levels_to_go[$ibeg_next] > 0 ) { | ||||
11350 | undo_lp_ci( $line, $i, $closing_index, $ri_first, | ||||
11351 | $ri_last ); | ||||
11352 | } | ||||
11353 | } | ||||
11354 | } | ||||
11355 | elsif ( $line > 0 ) { | ||||
11356 | |||||
11357 | # handle case where token starts a new line; | ||||
11358 | # use length of previous line | ||||
11359 | my $ibegm = $$ri_first[ $line - 1 ]; | ||||
11360 | my $iendm = $$ri_last[ $line - 1 ]; | ||||
11361 | $actual_pos = total_line_length( $ibegm, $iendm ); | ||||
11362 | |||||
11363 | # follow -pt style | ||||
11364 | ++$actual_pos | ||||
11365 | if ( $types_to_go[ $iendm + 1 ] eq 'b' ); | ||||
11366 | } | ||||
11367 | else { | ||||
11368 | |||||
11369 | # token is first character of first line of batch | ||||
11370 | $actual_pos = $predicted_pos; | ||||
11371 | } | ||||
11372 | |||||
11373 | my $move_right = $actual_pos - $predicted_pos; | ||||
11374 | |||||
11375 | # done if no error to correct (gnu2.t) | ||||
11376 | if ( $move_right == 0 ) { | ||||
11377 | $indentation->set_RECOVERABLE_SPACES($move_right); | ||||
11378 | next; | ||||
11379 | } | ||||
11380 | |||||
11381 | # if we have not seen closure for this indentation in | ||||
11382 | # this batch, we can only pass on a request to the | ||||
11383 | # vertical aligner | ||||
11384 | my $closing_index = $indentation->get_CLOSED(); | ||||
11385 | |||||
11386 | if ( $closing_index < 0 ) { | ||||
11387 | $indentation->set_RECOVERABLE_SPACES($move_right); | ||||
11388 | next; | ||||
11389 | } | ||||
11390 | |||||
11391 | # If necessary, look ahead to see if there is really any | ||||
11392 | # leading whitespace dependent on this whitespace, and | ||||
11393 | # also find the longest line using this whitespace. | ||||
11394 | # Since it is always safe to move left if there are no | ||||
11395 | # dependents, we only need to do this if we may have | ||||
11396 | # dependent nodes or need to move right. | ||||
11397 | |||||
11398 | my $right_margin = 0; | ||||
11399 | my $have_child = $indentation->get_HAVE_CHILD(); | ||||
11400 | |||||
11401 | my %saw_indentation; | ||||
11402 | my $line_count = 1; | ||||
11403 | $saw_indentation{$indentation} = $indentation; | ||||
11404 | |||||
11405 | if ( $have_child || $move_right > 0 ) { | ||||
11406 | $have_child = 0; | ||||
11407 | my $max_length = 0; | ||||
11408 | if ( $i == $ibeg ) { | ||||
11409 | $max_length = total_line_length( $ibeg, $iend ); | ||||
11410 | } | ||||
11411 | |||||
11412 | # look ahead at the rest of the lines of this batch.. | ||||
11413 | my $line_t; | ||||
11414 | foreach $line_t ( $line + 1 .. $max_line ) { | ||||
11415 | my $ibeg_t = $$ri_first[$line_t]; | ||||
11416 | my $iend_t = $$ri_last[$line_t]; | ||||
11417 | last if ( $closing_index <= $ibeg_t ); | ||||
11418 | |||||
11419 | # remember all different indentation objects | ||||
11420 | my $indentation_t = $leading_spaces_to_go[$ibeg_t]; | ||||
11421 | $saw_indentation{$indentation_t} = $indentation_t; | ||||
11422 | $line_count++; | ||||
11423 | |||||
11424 | # remember longest line in the group | ||||
11425 | my $length_t = total_line_length( $ibeg_t, $iend_t ); | ||||
11426 | if ( $length_t > $max_length ) { | ||||
11427 | $max_length = $length_t; | ||||
11428 | } | ||||
11429 | } | ||||
11430 | $right_margin = maximum_line_length($ibeg) - $max_length; | ||||
11431 | if ( $right_margin < 0 ) { $right_margin = 0 } | ||||
11432 | } | ||||
11433 | |||||
11434 | my $first_line_comma_count = | ||||
11435 | grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; | ||||
11436 | my $comma_count = $indentation->get_COMMA_COUNT(); | ||||
11437 | my $arrow_count = $indentation->get_ARROW_COUNT(); | ||||
11438 | |||||
11439 | # This is a simple approximate test for vertical alignment: | ||||
11440 | # if we broke just after an opening paren, brace, bracket, | ||||
11441 | # and there are 2 or more commas in the first line, | ||||
11442 | # and there are no '=>'s, | ||||
11443 | # then we are probably vertically aligned. We could set | ||||
11444 | # an exact flag in sub scan_list, but this is good | ||||
11445 | # enough. | ||||
11446 | my $indentation_count = keys %saw_indentation; | ||||
11447 | my $is_vertically_aligned = | ||||
11448 | ( $i == $ibeg | ||||
11449 | && $first_line_comma_count > 1 | ||||
11450 | && $indentation_count == 1 | ||||
11451 | && ( $arrow_count == 0 || $arrow_count == $line_count ) ); | ||||
11452 | |||||
11453 | # Make the move if possible .. | ||||
11454 | if ( | ||||
11455 | |||||
11456 | # we can always move left | ||||
11457 | $move_right < 0 | ||||
11458 | |||||
11459 | # but we should only move right if we are sure it will | ||||
11460 | # not spoil vertical alignment | ||||
11461 | || ( $comma_count == 0 ) | ||||
11462 | || ( $comma_count > 0 && !$is_vertically_aligned ) | ||||
11463 | ) | ||||
11464 | { | ||||
11465 | my $move = | ||||
11466 | ( $move_right <= $right_margin ) | ||||
11467 | ? $move_right | ||||
11468 | : $right_margin; | ||||
11469 | |||||
11470 | foreach ( keys %saw_indentation ) { | ||||
11471 | $saw_indentation{$_} | ||||
11472 | ->permanently_decrease_AVAILABLE_SPACES( -$move ); | ||||
11473 | } | ||||
11474 | } | ||||
11475 | |||||
11476 | # Otherwise, record what we want and the vertical aligner | ||||
11477 | # will try to recover it. | ||||
11478 | else { | ||||
11479 | $indentation->set_RECOVERABLE_SPACES($move_right); | ||||
11480 | } | ||||
11481 | } | ||||
11482 | } | ||||
11483 | } | ||||
11484 | return $do_not_pad; | ||||
11485 | } | ||||
11486 | |||||
11487 | # flush is called to output any tokens in the pipeline, so that | ||||
11488 | # an alternate source of lines can be written in the correct order | ||||
11489 | |||||
11490 | sub flush { | ||||
11491 | destroy_one_line_block(); | ||||
11492 | output_line_to_go(); | ||||
11493 | Perl::Tidy::VerticalAligner::flush(); | ||||
11494 | } | ||||
11495 | |||||
11496 | sub reset_block_text_accumulator { | ||||
11497 | |||||
11498 | # save text after 'if' and 'elsif' to append after 'else' | ||||
11499 | if ($accumulating_text_for_block) { | ||||
11500 | |||||
11501 | if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { | ||||
11502 | push @{$rleading_block_if_elsif_text}, $leading_block_text; | ||||
11503 | } | ||||
11504 | } | ||||
11505 | $accumulating_text_for_block = ""; | ||||
11506 | $leading_block_text = ""; | ||||
11507 | $leading_block_text_level = 0; | ||||
11508 | $leading_block_text_length_exceeded = 0; | ||||
11509 | $leading_block_text_line_number = 0; | ||||
11510 | $leading_block_text_line_length = 0; | ||||
11511 | } | ||||
11512 | |||||
11513 | sub set_block_text_accumulator { | ||||
11514 | my $i = shift; | ||||
11515 | $accumulating_text_for_block = $tokens_to_go[$i]; | ||||
11516 | if ( $accumulating_text_for_block !~ /^els/ ) { | ||||
11517 | $rleading_block_if_elsif_text = []; | ||||
11518 | } | ||||
11519 | $leading_block_text = ""; | ||||
11520 | $leading_block_text_level = $levels_to_go[$i]; | ||||
11521 | $leading_block_text_line_number = | ||||
11522 | $vertical_aligner_object->get_output_line_number(); | ||||
11523 | $leading_block_text_length_exceeded = 0; | ||||
11524 | |||||
11525 | # this will contain the column number of the last character | ||||
11526 | # of the closing side comment | ||||
11527 | $leading_block_text_line_length = | ||||
11528 | length($csc_last_label) + | ||||
11529 | length($accumulating_text_for_block) + | ||||
11530 | length( $rOpts->{'closing-side-comment-prefix'} ) + | ||||
11531 | $leading_block_text_level * $rOpts_indent_columns + 3; | ||||
11532 | } | ||||
11533 | |||||
11534 | sub accumulate_block_text { | ||||
11535 | my $i = shift; | ||||
11536 | |||||
11537 | # accumulate leading text for -csc, ignoring any side comments | ||||
11538 | if ( $accumulating_text_for_block | ||||
11539 | && !$leading_block_text_length_exceeded | ||||
11540 | && $types_to_go[$i] ne '#' ) | ||||
11541 | { | ||||
11542 | |||||
11543 | my $added_length = $token_lengths_to_go[$i]; | ||||
11544 | $added_length += 1 if $i == 0; | ||||
11545 | my $new_line_length = $leading_block_text_line_length + $added_length; | ||||
11546 | |||||
11547 | # we can add this text if we don't exceed some limits.. | ||||
11548 | if ( | ||||
11549 | |||||
11550 | # we must not have already exceeded the text length limit | ||||
11551 | length($leading_block_text) < | ||||
11552 | $rOpts_closing_side_comment_maximum_text | ||||
11553 | |||||
11554 | # and either: | ||||
11555 | # the new total line length must be below the line length limit | ||||
11556 | # or the new length must be below the text length limit | ||||
11557 | # (ie, we may allow one token to exceed the text length limit) | ||||
11558 | && ( | ||||
11559 | $new_line_length < | ||||
11560 | maximum_line_length_for_level($leading_block_text_level) | ||||
11561 | |||||
11562 | || length($leading_block_text) + $added_length < | ||||
11563 | $rOpts_closing_side_comment_maximum_text | ||||
11564 | ) | ||||
11565 | |||||
11566 | # UNLESS: we are adding a closing paren before the brace we seek. | ||||
11567 | # This is an attempt to avoid situations where the ... to be | ||||
11568 | # added are longer than the omitted right paren, as in: | ||||
11569 | |||||
11570 | # foreach my $item (@a_rather_long_variable_name_here) { | ||||
11571 | # &whatever; | ||||
11572 | # } ## end foreach my $item (@a_rather_long_variable_name_here... | ||||
11573 | |||||
11574 | || ( | ||||
11575 | $tokens_to_go[$i] eq ')' | ||||
11576 | && ( | ||||
11577 | ( | ||||
11578 | $i + 1 <= $max_index_to_go | ||||
11579 | && $block_type_to_go[ $i + 1 ] eq | ||||
11580 | $accumulating_text_for_block | ||||
11581 | ) | ||||
11582 | || ( $i + 2 <= $max_index_to_go | ||||
11583 | && $block_type_to_go[ $i + 2 ] eq | ||||
11584 | $accumulating_text_for_block ) | ||||
11585 | ) | ||||
11586 | ) | ||||
11587 | ) | ||||
11588 | { | ||||
11589 | |||||
11590 | # add an extra space at each newline | ||||
11591 | if ( $i == 0 ) { $leading_block_text .= ' ' } | ||||
11592 | |||||
11593 | # add the token text | ||||
11594 | $leading_block_text .= $tokens_to_go[$i]; | ||||
11595 | $leading_block_text_line_length = $new_line_length; | ||||
11596 | } | ||||
11597 | |||||
11598 | # show that text was truncated if necessary | ||||
11599 | elsif ( $types_to_go[$i] ne 'b' ) { | ||||
11600 | $leading_block_text_length_exceeded = 1; | ||||
11601 | ## Please see file perltidy.ERR | ||||
11602 | $leading_block_text .= '...'; | ||||
11603 | } | ||||
11604 | } | ||||
11605 | } | ||||
11606 | |||||
11607 | { | ||||
11608 | 2 | 200ns | my %is_if_elsif_else_unless_while_until_for_foreach; | ||
11609 | |||||
11610 | # spent 12µs within Perl::Tidy::Formatter::BEGIN@11610 which was called:
# once (12µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 11619 | ||||
11611 | |||||
11612 | # These block types may have text between the keyword and opening | ||||
11613 | # curly. Note: 'else' does not, but must be included to allow trailing | ||||
11614 | # if/elsif text to be appended. | ||||
11615 | # patch for SWITCH/CASE: added 'case' and 'when' | ||||
11616 | 1 | 2µs | @_ = qw(if elsif else unless while until for foreach case when); | ||
11617 | 1 | 11µs | @is_if_elsif_else_unless_while_until_for_foreach{@_} = | ||
11618 | (1) x scalar(@_); | ||||
11619 | 1 | 554µs | 1 | 12µs | } # spent 12µs making 1 call to Perl::Tidy::Formatter::BEGIN@11610 |
11620 | |||||
11621 | sub accumulate_csc_text { | ||||
11622 | |||||
11623 | # called once per output buffer when -csc is used. Accumulates | ||||
11624 | # the text placed after certain closing block braces. | ||||
11625 | # Defines and returns the following for this buffer: | ||||
11626 | |||||
11627 | my $block_leading_text = ""; # the leading text of the last '}' | ||||
11628 | my $rblock_leading_if_elsif_text; | ||||
11629 | my $i_block_leading_text = | ||||
11630 | -1; # index of token owning block_leading_text | ||||
11631 | my $block_line_count = 100; # how many lines the block spans | ||||
11632 | my $terminal_type = 'b'; # type of last nonblank token | ||||
11633 | my $i_terminal = 0; # index of last nonblank token | ||||
11634 | my $terminal_block_type = ""; | ||||
11635 | |||||
11636 | # update most recent statement label | ||||
11637 | $csc_last_label = "" unless ($csc_last_label); | ||||
11638 | if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } | ||||
11639 | my $block_label = $csc_last_label; | ||||
11640 | |||||
11641 | # Loop over all tokens of this batch | ||||
11642 | for my $i ( 0 .. $max_index_to_go ) { | ||||
11643 | my $type = $types_to_go[$i]; | ||||
11644 | my $block_type = $block_type_to_go[$i]; | ||||
11645 | my $token = $tokens_to_go[$i]; | ||||
11646 | |||||
11647 | # remember last nonblank token type | ||||
11648 | if ( $type ne '#' && $type ne 'b' ) { | ||||
11649 | $terminal_type = $type; | ||||
11650 | $terminal_block_type = $block_type; | ||||
11651 | $i_terminal = $i; | ||||
11652 | } | ||||
11653 | |||||
11654 | my $type_sequence = $type_sequence_to_go[$i]; | ||||
11655 | if ( $block_type && $type_sequence ) { | ||||
11656 | |||||
11657 | if ( $token eq '}' ) { | ||||
11658 | |||||
11659 | # restore any leading text saved when we entered this block | ||||
11660 | if ( defined( $block_leading_text{$type_sequence} ) ) { | ||||
11661 | ( $block_leading_text, $rblock_leading_if_elsif_text ) | ||||
11662 | = @{ $block_leading_text{$type_sequence} }; | ||||
11663 | $i_block_leading_text = $i; | ||||
11664 | delete $block_leading_text{$type_sequence}; | ||||
11665 | $rleading_block_if_elsif_text = | ||||
11666 | $rblock_leading_if_elsif_text; | ||||
11667 | } | ||||
11668 | |||||
11669 | if ( defined( $csc_block_label{$type_sequence} ) ) { | ||||
11670 | $block_label = $csc_block_label{$type_sequence}; | ||||
11671 | delete $csc_block_label{$type_sequence}; | ||||
11672 | } | ||||
11673 | |||||
11674 | # if we run into a '}' then we probably started accumulating | ||||
11675 | # at something like a trailing 'if' clause..no harm done. | ||||
11676 | if ( $accumulating_text_for_block | ||||
11677 | && $levels_to_go[$i] <= $leading_block_text_level ) | ||||
11678 | { | ||||
11679 | my $lev = $levels_to_go[$i]; | ||||
11680 | reset_block_text_accumulator(); | ||||
11681 | } | ||||
11682 | |||||
11683 | if ( defined( $block_opening_line_number{$type_sequence} ) ) | ||||
11684 | { | ||||
11685 | my $output_line_number = | ||||
11686 | $vertical_aligner_object->get_output_line_number(); | ||||
11687 | $block_line_count = | ||||
11688 | $output_line_number - | ||||
11689 | $block_opening_line_number{$type_sequence} + 1; | ||||
11690 | delete $block_opening_line_number{$type_sequence}; | ||||
11691 | } | ||||
11692 | else { | ||||
11693 | |||||
11694 | # Error: block opening line undefined for this line.. | ||||
11695 | # This shouldn't be possible, but it is not a | ||||
11696 | # significant problem. | ||||
11697 | } | ||||
11698 | } | ||||
11699 | |||||
11700 | elsif ( $token eq '{' ) { | ||||
11701 | |||||
11702 | my $line_number = | ||||
11703 | $vertical_aligner_object->get_output_line_number(); | ||||
11704 | $block_opening_line_number{$type_sequence} = $line_number; | ||||
11705 | |||||
11706 | # set a label for this block, except for | ||||
11707 | # a bare block which already has the label | ||||
11708 | # A label can only be used on the next { | ||||
11709 | if ( $block_type =~ /:$/ ) { $csc_last_label = "" } | ||||
11710 | $csc_block_label{$type_sequence} = $csc_last_label; | ||||
11711 | $csc_last_label = ""; | ||||
11712 | |||||
11713 | if ( $accumulating_text_for_block | ||||
11714 | && $levels_to_go[$i] == $leading_block_text_level ) | ||||
11715 | { | ||||
11716 | |||||
11717 | if ( $accumulating_text_for_block eq $block_type ) { | ||||
11718 | |||||
11719 | # save any leading text before we enter this block | ||||
11720 | $block_leading_text{$type_sequence} = [ | ||||
11721 | $leading_block_text, | ||||
11722 | $rleading_block_if_elsif_text | ||||
11723 | ]; | ||||
11724 | $block_opening_line_number{$type_sequence} = | ||||
11725 | $leading_block_text_line_number; | ||||
11726 | reset_block_text_accumulator(); | ||||
11727 | } | ||||
11728 | else { | ||||
11729 | |||||
11730 | # shouldn't happen, but not a serious error. | ||||
11731 | # We were accumulating -csc text for block type | ||||
11732 | # $accumulating_text_for_block and unexpectedly | ||||
11733 | # encountered a '{' for block type $block_type. | ||||
11734 | } | ||||
11735 | } | ||||
11736 | } | ||||
11737 | } | ||||
11738 | |||||
11739 | if ( $type eq 'k' | ||||
11740 | && $csc_new_statement_ok | ||||
11741 | && $is_if_elsif_else_unless_while_until_for_foreach{$token} | ||||
11742 | && $token =~ /$closing_side_comment_list_pattern/o ) | ||||
11743 | { | ||||
11744 | set_block_text_accumulator($i); | ||||
11745 | } | ||||
11746 | else { | ||||
11747 | |||||
11748 | # note: ignoring type 'q' because of tricks being played | ||||
11749 | # with 'q' for hanging side comments | ||||
11750 | if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { | ||||
11751 | $csc_new_statement_ok = | ||||
11752 | ( $block_type || $type eq 'J' || $type eq ';' ); | ||||
11753 | } | ||||
11754 | if ( $type eq ';' | ||||
11755 | && $accumulating_text_for_block | ||||
11756 | && $levels_to_go[$i] == $leading_block_text_level ) | ||||
11757 | { | ||||
11758 | reset_block_text_accumulator(); | ||||
11759 | } | ||||
11760 | else { | ||||
11761 | accumulate_block_text($i); | ||||
11762 | } | ||||
11763 | } | ||||
11764 | } | ||||
11765 | |||||
11766 | # Treat an 'else' block specially by adding preceding 'if' and | ||||
11767 | # 'elsif' text. Otherwise, the 'end else' is not helpful, | ||||
11768 | # especially for cuddled-else formatting. | ||||
11769 | if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { | ||||
11770 | $block_leading_text = | ||||
11771 | make_else_csc_text( $i_terminal, $terminal_block_type, | ||||
11772 | $block_leading_text, $rblock_leading_if_elsif_text ); | ||||
11773 | } | ||||
11774 | |||||
11775 | # if this line ends in a label then remember it for the next pass | ||||
11776 | $csc_last_label = ""; | ||||
11777 | if ( $terminal_type eq 'J' ) { | ||||
11778 | $csc_last_label = $tokens_to_go[$i_terminal]; | ||||
11779 | } | ||||
11780 | |||||
11781 | return ( $terminal_type, $i_terminal, $i_block_leading_text, | ||||
11782 | $block_leading_text, $block_line_count, $block_label ); | ||||
11783 | } | ||||
11784 | } | ||||
11785 | |||||
11786 | sub make_else_csc_text { | ||||
11787 | |||||
11788 | # create additional -csc text for an 'else' and optionally 'elsif', | ||||
11789 | # depending on the value of switch | ||||
11790 | # $rOpts_closing_side_comment_else_flag: | ||||
11791 | # | ||||
11792 | # = 0 add 'if' text to trailing else | ||||
11793 | # = 1 same as 0 plus: | ||||
11794 | # add 'if' to 'elsif's if can fit in line length | ||||
11795 | # add last 'elsif' to trailing else if can fit in one line | ||||
11796 | # = 2 same as 1 but do not check if exceed line length | ||||
11797 | # | ||||
11798 | # $rif_elsif_text = a reference to a list of all previous closing | ||||
11799 | # side comments created for this if block | ||||
11800 | # | ||||
11801 | my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; | ||||
11802 | my $csc_text = $block_leading_text; | ||||
11803 | |||||
11804 | if ( $block_type eq 'elsif' | ||||
11805 | && $rOpts_closing_side_comment_else_flag == 0 ) | ||||
11806 | { | ||||
11807 | return $csc_text; | ||||
11808 | } | ||||
11809 | |||||
11810 | my $count = @{$rif_elsif_text}; | ||||
11811 | return $csc_text unless ($count); | ||||
11812 | |||||
11813 | my $if_text = '[ if' . $rif_elsif_text->[0]; | ||||
11814 | |||||
11815 | # always show the leading 'if' text on 'else' | ||||
11816 | if ( $block_type eq 'else' ) { | ||||
11817 | $csc_text .= $if_text; | ||||
11818 | } | ||||
11819 | |||||
11820 | # see if that's all | ||||
11821 | if ( $rOpts_closing_side_comment_else_flag == 0 ) { | ||||
11822 | return $csc_text; | ||||
11823 | } | ||||
11824 | |||||
11825 | my $last_elsif_text = ""; | ||||
11826 | if ( $count > 1 ) { | ||||
11827 | $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; | ||||
11828 | if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } | ||||
11829 | } | ||||
11830 | |||||
11831 | # tentatively append one more item | ||||
11832 | my $saved_text = $csc_text; | ||||
11833 | if ( $block_type eq 'else' ) { | ||||
11834 | $csc_text .= $last_elsif_text; | ||||
11835 | } | ||||
11836 | else { | ||||
11837 | $csc_text .= ' ' . $if_text; | ||||
11838 | } | ||||
11839 | |||||
11840 | # all done if no length checks requested | ||||
11841 | if ( $rOpts_closing_side_comment_else_flag == 2 ) { | ||||
11842 | return $csc_text; | ||||
11843 | } | ||||
11844 | |||||
11845 | # undo it if line length exceeded | ||||
11846 | my $length = | ||||
11847 | length($csc_text) + | ||||
11848 | length($block_type) + | ||||
11849 | length( $rOpts->{'closing-side-comment-prefix'} ) + | ||||
11850 | $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; | ||||
11851 | if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { | ||||
11852 | $csc_text = $saved_text; | ||||
11853 | } | ||||
11854 | return $csc_text; | ||||
11855 | } | ||||
11856 | |||||
11857 | { # sub balance_csc_text | ||||
11858 | |||||
11859 | 2 | 100ns | my %matching_char; | ||
11860 | |||||
11861 | # spent 7µs within Perl::Tidy::Formatter::BEGIN@11861 which was called:
# once (7µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 11870 | ||||
11862 | 1 | 7µs | %matching_char = ( | ||
11863 | '{' => '}', | ||||
11864 | '(' => ')', | ||||
11865 | '[' => ']', | ||||
11866 | '}' => '{', | ||||
11867 | ')' => '(', | ||||
11868 | ']' => '[', | ||||
11869 | ); | ||||
11870 | 1 | 1.17ms | 1 | 7µs | } # spent 7µs making 1 call to Perl::Tidy::Formatter::BEGIN@11861 |
11871 | |||||
11872 | sub balance_csc_text { | ||||
11873 | |||||
11874 | # Append characters to balance a closing side comment so that editors | ||||
11875 | # such as vim can correctly jump through code. | ||||
11876 | # Simple Example: | ||||
11877 | # input = ## end foreach my $foo ( sort { $b ... | ||||
11878 | # output = ## end foreach my $foo ( sort { $b ...}) | ||||
11879 | |||||
11880 | # NOTE: This routine does not currently filter out structures within | ||||
11881 | # quoted text because the bounce algorithms in text editors do not | ||||
11882 | # necessarily do this either (a version of vim was checked and | ||||
11883 | # did not do this). | ||||
11884 | |||||
11885 | # Some complex examples which will cause trouble for some editors: | ||||
11886 | # while ( $mask_string =~ /\{[^{]*?\}/g ) { | ||||
11887 | # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { | ||||
11888 | # if ( $1 eq '{' ) { | ||||
11889 | # test file test1/braces.pl has many such examples. | ||||
11890 | |||||
11891 | my ($csc) = @_; | ||||
11892 | |||||
11893 | # loop to examine characters one-by-one, RIGHT to LEFT and | ||||
11894 | # build a balancing ending, LEFT to RIGHT. | ||||
11895 | for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { | ||||
11896 | |||||
11897 | my $char = substr( $csc, $pos, 1 ); | ||||
11898 | |||||
11899 | # ignore everything except structural characters | ||||
11900 | next unless ( $matching_char{$char} ); | ||||
11901 | |||||
11902 | # pop most recently appended character | ||||
11903 | my $top = chop($csc); | ||||
11904 | |||||
11905 | # push it back plus the mate to the newest character | ||||
11906 | # unless they balance each other. | ||||
11907 | $csc = $csc . $top . $matching_char{$char} unless $top eq $char; | ||||
11908 | } | ||||
11909 | |||||
11910 | # return the balanced string | ||||
11911 | return $csc; | ||||
11912 | } | ||||
11913 | } | ||||
11914 | |||||
11915 | sub add_closing_side_comment { | ||||
11916 | |||||
11917 | # add closing side comments after closing block braces if -csc used | ||||
11918 | my $cscw_block_comment; | ||||
11919 | |||||
11920 | #--------------------------------------------------------------- | ||||
11921 | # Step 1: loop through all tokens of this line to accumulate | ||||
11922 | # the text needed to create the closing side comments. Also see | ||||
11923 | # how the line ends. | ||||
11924 | #--------------------------------------------------------------- | ||||
11925 | |||||
11926 | my ( $terminal_type, $i_terminal, $i_block_leading_text, | ||||
11927 | $block_leading_text, $block_line_count, $block_label ) | ||||
11928 | = accumulate_csc_text(); | ||||
11929 | |||||
11930 | #--------------------------------------------------------------- | ||||
11931 | # Step 2: make the closing side comment if this ends a block | ||||
11932 | #--------------------------------------------------------------- | ||||
11933 | my $have_side_comment = $i_terminal != $max_index_to_go; | ||||
11934 | |||||
11935 | # if this line might end in a block closure.. | ||||
11936 | if ( | ||||
11937 | $terminal_type eq '}' | ||||
11938 | |||||
11939 | # ..and either | ||||
11940 | && ( | ||||
11941 | |||||
11942 | # the block is long enough | ||||
11943 | ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) | ||||
11944 | |||||
11945 | # or there is an existing comment to check | ||||
11946 | || ( $have_side_comment | ||||
11947 | && $rOpts->{'closing-side-comment-warnings'} ) | ||||
11948 | ) | ||||
11949 | |||||
11950 | # .. and if this is one of the types of interest | ||||
11951 | && $block_type_to_go[$i_terminal] =~ | ||||
11952 | /$closing_side_comment_list_pattern/o | ||||
11953 | |||||
11954 | # .. but not an anonymous sub | ||||
11955 | # These are not normally of interest, and their closing braces are | ||||
11956 | # often followed by commas or semicolons anyway. This also avoids | ||||
11957 | # possible erratic output due to line numbering inconsistencies | ||||
11958 | # in the cases where their closing braces terminate a line. | ||||
11959 | && $block_type_to_go[$i_terminal] ne 'sub' | ||||
11960 | |||||
11961 | # ..and the corresponding opening brace must is not in this batch | ||||
11962 | # (because we do not need to tag one-line blocks, although this | ||||
11963 | # should also be caught with a positive -csci value) | ||||
11964 | && $mate_index_to_go[$i_terminal] < 0 | ||||
11965 | |||||
11966 | # ..and either | ||||
11967 | && ( | ||||
11968 | |||||
11969 | # this is the last token (line doesn't have a side comment) | ||||
11970 | !$have_side_comment | ||||
11971 | |||||
11972 | # or the old side comment is a closing side comment | ||||
11973 | || $tokens_to_go[$max_index_to_go] =~ | ||||
11974 | /$closing_side_comment_prefix_pattern/o | ||||
11975 | ) | ||||
11976 | ) | ||||
11977 | { | ||||
11978 | |||||
11979 | # then make the closing side comment text | ||||
11980 | if ($block_label) { $block_label .= " " } | ||||
11981 | my $token = | ||||
11982 | "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; | ||||
11983 | |||||
11984 | # append any extra descriptive text collected above | ||||
11985 | if ( $i_block_leading_text == $i_terminal ) { | ||||
11986 | $token .= $block_leading_text; | ||||
11987 | } | ||||
11988 | |||||
11989 | $token = balance_csc_text($token) | ||||
11990 | if $rOpts->{'closing-side-comments-balanced'}; | ||||
11991 | |||||
11992 | $token =~ s/\s*$//; # trim any trailing whitespace | ||||
11993 | |||||
11994 | # handle case of existing closing side comment | ||||
11995 | if ($have_side_comment) { | ||||
11996 | |||||
11997 | # warn if requested and tokens differ significantly | ||||
11998 | if ( $rOpts->{'closing-side-comment-warnings'} ) { | ||||
11999 | my $old_csc = $tokens_to_go[$max_index_to_go]; | ||||
12000 | my $new_csc = $token; | ||||
12001 | $new_csc =~ s/\s+//g; # trim all whitespace | ||||
12002 | $old_csc =~ s/\s+//g; # trim all whitespace | ||||
12003 | $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures | ||||
12004 | $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures | ||||
12005 | $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' | ||||
12006 | my $new_trailing_dots = $1; | ||||
12007 | $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' | ||||
12008 | |||||
12009 | # Patch to handle multiple closing side comments at | ||||
12010 | # else and elsif's. These have become too complicated | ||||
12011 | # to check, so if we see an indication of | ||||
12012 | # '[ if' or '[ # elsif', then assume they were made | ||||
12013 | # by perltidy. | ||||
12014 | if ( $block_type_to_go[$i_terminal] eq 'else' ) { | ||||
12015 | if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } | ||||
12016 | } | ||||
12017 | elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { | ||||
12018 | if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } | ||||
12019 | } | ||||
12020 | |||||
12021 | # if old comment is contained in new comment, | ||||
12022 | # only compare the common part. | ||||
12023 | if ( length($new_csc) > length($old_csc) ) { | ||||
12024 | $new_csc = substr( $new_csc, 0, length($old_csc) ); | ||||
12025 | } | ||||
12026 | |||||
12027 | # if the new comment is shorter and has been limited, | ||||
12028 | # only compare the common part. | ||||
12029 | if ( length($new_csc) < length($old_csc) | ||||
12030 | && $new_trailing_dots ) | ||||
12031 | { | ||||
12032 | $old_csc = substr( $old_csc, 0, length($new_csc) ); | ||||
12033 | } | ||||
12034 | |||||
12035 | # any remaining difference? | ||||
12036 | if ( $new_csc ne $old_csc ) { | ||||
12037 | |||||
12038 | # just leave the old comment if we are below the threshold | ||||
12039 | # for creating side comments | ||||
12040 | if ( $block_line_count < | ||||
12041 | $rOpts->{'closing-side-comment-interval'} ) | ||||
12042 | { | ||||
12043 | $token = undef; | ||||
12044 | } | ||||
12045 | |||||
12046 | # otherwise we'll make a note of it | ||||
12047 | else { | ||||
12048 | |||||
12049 | warning( | ||||
12050 | "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" | ||||
12051 | ); | ||||
12052 | |||||
12053 | # save the old side comment in a new trailing block comment | ||||
12054 | my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; | ||||
12055 | $year += 1900; | ||||
12056 | $month += 1; | ||||
12057 | $cscw_block_comment = | ||||
12058 | "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; | ||||
12059 | } | ||||
12060 | } | ||||
12061 | else { | ||||
12062 | |||||
12063 | # No differences.. we can safely delete old comment if we | ||||
12064 | # are below the threshold | ||||
12065 | if ( $block_line_count < | ||||
12066 | $rOpts->{'closing-side-comment-interval'} ) | ||||
12067 | { | ||||
12068 | $token = undef; | ||||
12069 | unstore_token_to_go() | ||||
12070 | if ( $types_to_go[$max_index_to_go] eq '#' ); | ||||
12071 | unstore_token_to_go() | ||||
12072 | if ( $types_to_go[$max_index_to_go] eq 'b' ); | ||||
12073 | } | ||||
12074 | } | ||||
12075 | } | ||||
12076 | |||||
12077 | # switch to the new csc (unless we deleted it!) | ||||
12078 | $tokens_to_go[$max_index_to_go] = $token if $token; | ||||
12079 | } | ||||
12080 | |||||
12081 | # handle case of NO existing closing side comment | ||||
12082 | else { | ||||
12083 | |||||
12084 | # insert the new side comment into the output token stream | ||||
12085 | my $type = '#'; | ||||
12086 | my $block_type = ''; | ||||
12087 | my $type_sequence = ''; | ||||
12088 | my $container_environment = | ||||
12089 | $container_environment_to_go[$max_index_to_go]; | ||||
12090 | my $level = $levels_to_go[$max_index_to_go]; | ||||
12091 | my $slevel = $nesting_depth_to_go[$max_index_to_go]; | ||||
12092 | my $no_internal_newlines = 0; | ||||
12093 | |||||
12094 | my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; | ||||
12095 | my $ci_level = $ci_levels_to_go[$max_index_to_go]; | ||||
12096 | my $in_continued_quote = 0; | ||||
12097 | |||||
12098 | # first insert a blank token | ||||
12099 | insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); | ||||
12100 | |||||
12101 | # then the side comment | ||||
12102 | insert_new_token_to_go( $token, $type, $slevel, | ||||
12103 | $no_internal_newlines ); | ||||
12104 | } | ||||
12105 | } | ||||
12106 | return $cscw_block_comment; | ||||
12107 | } | ||||
12108 | |||||
12109 | sub previous_nonblank_token { | ||||
12110 | my ($i) = @_; | ||||
12111 | my $name = ""; | ||||
12112 | my $im = $i - 1; | ||||
12113 | return "" if ( $im < 0 ); | ||||
12114 | if ( $types_to_go[$im] eq 'b' ) { $im--; } | ||||
12115 | return "" if ( $im < 0 ); | ||||
12116 | $name = $tokens_to_go[$im]; | ||||
12117 | |||||
12118 | # prepend any sub name to an isolated -> to avoid unwanted alignments | ||||
12119 | # [test case is test8/penco.pl] | ||||
12120 | if ( $name eq '->' ) { | ||||
12121 | $im--; | ||||
12122 | if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { | ||||
12123 | $name = $tokens_to_go[$im] . $name; | ||||
12124 | } | ||||
12125 | } | ||||
12126 | return $name; | ||||
12127 | } | ||||
12128 | |||||
12129 | sub send_lines_to_vertical_aligner { | ||||
12130 | |||||
12131 | my ( $ri_first, $ri_last, $do_not_pad ) = @_; | ||||
12132 | |||||
12133 | my $rindentation_list = [0]; # ref to indentations for each line | ||||
12134 | |||||
12135 | # define the array @matching_token_to_go for the output tokens | ||||
12136 | # which will be non-blank for each special token (such as =>) | ||||
12137 | # for which alignment is required. | ||||
12138 | set_vertical_alignment_markers( $ri_first, $ri_last ); | ||||
12139 | |||||
12140 | # flush if necessary to avoid unwanted alignment | ||||
12141 | my $must_flush = 0; | ||||
12142 | if ( @$ri_first > 1 ) { | ||||
12143 | |||||
12144 | # flush before a long if statement | ||||
12145 | if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { | ||||
12146 | $must_flush = 1; | ||||
12147 | } | ||||
12148 | } | ||||
12149 | if ($must_flush) { | ||||
12150 | Perl::Tidy::VerticalAligner::flush(); | ||||
12151 | } | ||||
12152 | |||||
12153 | undo_ci( $ri_first, $ri_last ); | ||||
12154 | |||||
12155 | set_logical_padding( $ri_first, $ri_last ); | ||||
12156 | |||||
12157 | # loop to prepare each line for shipment | ||||
12158 | my $n_last_line = @$ri_first - 1; | ||||
12159 | my $in_comma_list; | ||||
12160 | for my $n ( 0 .. $n_last_line ) { | ||||
12161 | my $ibeg = $$ri_first[$n]; | ||||
12162 | my $iend = $$ri_last[$n]; | ||||
12163 | |||||
12164 | my ( $rtokens, $rfields, $rpatterns ) = | ||||
12165 | make_alignment_patterns( $ibeg, $iend ); | ||||
12166 | |||||
12167 | # Set flag to show how much level changes between this line | ||||
12168 | # and the next line, if we have it. | ||||
12169 | my $ljump = 0; | ||||
12170 | if ( $n < $n_last_line ) { | ||||
12171 | my $ibegp = $$ri_first[ $n + 1 ]; | ||||
12172 | $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; | ||||
12173 | } | ||||
12174 | |||||
12175 | my ( $indentation, $lev, $level_end, $terminal_type, | ||||
12176 | $is_semicolon_terminated, $is_outdented_line ) | ||||
12177 | = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, | ||||
12178 | $ri_first, $ri_last, $rindentation_list, $ljump ); | ||||
12179 | |||||
12180 | # we will allow outdenting of long lines.. | ||||
12181 | my $outdent_long_lines = ( | ||||
12182 | |||||
12183 | # which are long quotes, if allowed | ||||
12184 | ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) | ||||
12185 | |||||
12186 | # which are long block comments, if allowed | ||||
12187 | || ( | ||||
12188 | $types_to_go[$ibeg] eq '#' | ||||
12189 | && $rOpts->{'outdent-long-comments'} | ||||
12190 | |||||
12191 | # but not if this is a static block comment | ||||
12192 | && !$is_static_block_comment | ||||
12193 | ) | ||||
12194 | ); | ||||
12195 | |||||
12196 | my $level_jump = | ||||
12197 | $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; | ||||
12198 | |||||
12199 | my $rvertical_tightness_flags = | ||||
12200 | set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, | ||||
12201 | $ri_first, $ri_last ); | ||||
12202 | |||||
12203 | # flush an outdented line to avoid any unwanted vertical alignment | ||||
12204 | Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); | ||||
12205 | |||||
12206 | # Set a flag at the final ':' of a ternary chain to request | ||||
12207 | # vertical alignment of the final term. Here is a | ||||
12208 | # slightly complex example: | ||||
12209 | # | ||||
12210 | # $self->{_text} = ( | ||||
12211 | # !$section ? '' | ||||
12212 | # : $type eq 'item' ? "the $section entry" | ||||
12213 | # : "the section on $section" | ||||
12214 | # ) | ||||
12215 | # . ( | ||||
12216 | # $page | ||||
12217 | # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" | ||||
12218 | # : ' elsewhere in this document' | ||||
12219 | # ); | ||||
12220 | # | ||||
12221 | my $is_terminal_ternary = 0; | ||||
12222 | if ( $tokens_to_go[$ibeg] eq ':' | ||||
12223 | || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) | ||||
12224 | { | ||||
12225 | my $last_leading_type = ":"; | ||||
12226 | if ( $n > 0 ) { | ||||
12227 | my $iprev = $$ri_first[ $n - 1 ]; | ||||
12228 | $last_leading_type = $types_to_go[$iprev]; | ||||
12229 | } | ||||
12230 | if ( $terminal_type ne ';' | ||||
12231 | && $n_last_line > $n | ||||
12232 | && $level_end == $lev ) | ||||
12233 | { | ||||
12234 | my $inext = $$ri_first[ $n + 1 ]; | ||||
12235 | $level_end = $levels_to_go[$inext]; | ||||
12236 | $terminal_type = $types_to_go[$inext]; | ||||
12237 | } | ||||
12238 | |||||
12239 | $is_terminal_ternary = $last_leading_type eq ':' | ||||
12240 | && ( ( $terminal_type eq ';' && $level_end <= $lev ) | ||||
12241 | || ( $terminal_type ne ':' && $level_end < $lev ) ) | ||||
12242 | |||||
12243 | # the terminal term must not contain any ternary terms, as in | ||||
12244 | # my $ECHO = ( | ||||
12245 | # $Is_MSWin32 ? ".\\echo$$" | ||||
12246 | # : $Is_MacOS ? ":echo$$" | ||||
12247 | # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) | ||||
12248 | # ); | ||||
12249 | && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ]; | ||||
12250 | } | ||||
12251 | |||||
12252 | # send this new line down the pipe | ||||
12253 | my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; | ||||
12254 | Perl::Tidy::VerticalAligner::valign_input( | ||||
12255 | $lev, | ||||
12256 | $level_end, | ||||
12257 | $indentation, | ||||
12258 | $rfields, | ||||
12259 | $rtokens, | ||||
12260 | $rpatterns, | ||||
12261 | $forced_breakpoint_to_go[$iend] || $in_comma_list, | ||||
12262 | $outdent_long_lines, | ||||
12263 | $is_terminal_ternary, | ||||
12264 | $is_semicolon_terminated, | ||||
12265 | $do_not_pad, | ||||
12266 | $rvertical_tightness_flags, | ||||
12267 | $level_jump, | ||||
12268 | ); | ||||
12269 | $in_comma_list = | ||||
12270 | $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; | ||||
12271 | |||||
12272 | # flush an outdented line to avoid any unwanted vertical alignment | ||||
12273 | Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); | ||||
12274 | |||||
12275 | $do_not_pad = 0; | ||||
12276 | |||||
12277 | # Set flag indicating if this line ends in an opening | ||||
12278 | # token and is very short, so that a blank line is not | ||||
12279 | # needed if the subsequent line is a comment. | ||||
12280 | # Examples of what we are looking for: | ||||
12281 | # { | ||||
12282 | # && ( | ||||
12283 | # BEGIN { | ||||
12284 | # default { | ||||
12285 | # sub { | ||||
12286 | $last_output_short_opening_token | ||||
12287 | |||||
12288 | # line ends in opening token | ||||
12289 | = $types_to_go[$iend] =~ /^[\{\(\[L]$/ | ||||
12290 | |||||
12291 | # and either | ||||
12292 | && ( | ||||
12293 | # line has either single opening token | ||||
12294 | $iend == $ibeg | ||||
12295 | |||||
12296 | # or is a single token followed by opening token. | ||||
12297 | # Note that sub identifiers have blanks like 'sub doit' | ||||
12298 | || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) | ||||
12299 | ) | ||||
12300 | |||||
12301 | # and limit total to 10 character widths | ||||
12302 | && token_sequence_length( $ibeg, $iend ) <= 10; | ||||
12303 | |||||
12304 | } # end of loop to output each line | ||||
12305 | |||||
12306 | # remember indentation of lines containing opening containers for | ||||
12307 | # later use by sub set_adjusted_indentation | ||||
12308 | save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); | ||||
12309 | } | ||||
12310 | |||||
12311 | { # begin make_alignment_patterns | ||||
12312 | |||||
12313 | 2 | 0s | my %block_type_map; | ||
12314 | 1 | 200ns | my %keyword_map; | ||
12315 | |||||
12316 | # spent 10µs within Perl::Tidy::Formatter::BEGIN@12316 which was called:
# once (10µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 12344 | ||||
12317 | |||||
12318 | # map related block names into a common name to | ||||
12319 | # allow alignment | ||||
12320 | 1 | 3µs | %block_type_map = ( | ||
12321 | 'unless' => 'if', | ||||
12322 | 'else' => 'if', | ||||
12323 | 'elsif' => 'if', | ||||
12324 | 'when' => 'if', | ||||
12325 | 'default' => 'if', | ||||
12326 | 'case' => 'if', | ||||
12327 | 'sort' => 'map', | ||||
12328 | 'grep' => 'map', | ||||
12329 | ); | ||||
12330 | |||||
12331 | # map certain keywords to the same 'if' class to align | ||||
12332 | # long if/elsif sequences. [elsif.pl] | ||||
12333 | 1 | 9µs | %keyword_map = ( | ||
12334 | 'unless' => 'if', | ||||
12335 | 'else' => 'if', | ||||
12336 | 'elsif' => 'if', | ||||
12337 | 'when' => 'given', | ||||
12338 | 'default' => 'given', | ||||
12339 | 'case' => 'switch', | ||||
12340 | |||||
12341 | # treat an 'undef' similar to numbers and quotes | ||||
12342 | 'undef' => 'Q', | ||||
12343 | ); | ||||
12344 | 1 | 1.02ms | 1 | 10µs | } # spent 10µs making 1 call to Perl::Tidy::Formatter::BEGIN@12316 |
12345 | |||||
12346 | sub make_alignment_patterns { | ||||
12347 | |||||
12348 | # Here we do some important preliminary work for the | ||||
12349 | # vertical aligner. We create three arrays for one | ||||
12350 | # output line. These arrays contain strings that can | ||||
12351 | # be tested by the vertical aligner to see if | ||||
12352 | # consecutive lines can be aligned vertically. | ||||
12353 | # | ||||
12354 | # The three arrays are indexed on the vertical | ||||
12355 | # alignment fields and are: | ||||
12356 | # @tokens - a list of any vertical alignment tokens for this line. | ||||
12357 | # These are tokens, such as '=' '&&' '#' etc which | ||||
12358 | # we want to might align vertically. These are | ||||
12359 | # decorated with various information such as | ||||
12360 | # nesting depth to prevent unwanted vertical | ||||
12361 | # alignment matches. | ||||
12362 | # @fields - the actual text of the line between the vertical alignment | ||||
12363 | # tokens. | ||||
12364 | # @patterns - a modified list of token types, one for each alignment | ||||
12365 | # field. These should normally each match before alignment is | ||||
12366 | # allowed, even when the alignment tokens match. | ||||
12367 | my ( $ibeg, $iend ) = @_; | ||||
12368 | my @tokens = (); | ||||
12369 | my @fields = (); | ||||
12370 | my @patterns = (); | ||||
12371 | my $i_start = $ibeg; | ||||
12372 | my $i; | ||||
12373 | |||||
12374 | my $depth = 0; | ||||
12375 | my @container_name = (""); | ||||
12376 | my @multiple_comma_arrows = (undef); | ||||
12377 | |||||
12378 | my $j = 0; # field index | ||||
12379 | |||||
12380 | $patterns[0] = ""; | ||||
12381 | for $i ( $ibeg .. $iend ) { | ||||
12382 | |||||
12383 | # Keep track of containers balanced on this line only. | ||||
12384 | # These are used below to prevent unwanted cross-line alignments. | ||||
12385 | # Unbalanced containers already avoid aligning across | ||||
12386 | # container boundaries. | ||||
12387 | if ( $tokens_to_go[$i] eq '(' ) { | ||||
12388 | |||||
12389 | # if container is balanced on this line... | ||||
12390 | my $i_mate = $mate_index_to_go[$i]; | ||||
12391 | if ( $i_mate > $i && $i_mate <= $iend ) { | ||||
12392 | $depth++; | ||||
12393 | my $seqno = $type_sequence_to_go[$i]; | ||||
12394 | my $count = comma_arrow_count($seqno); | ||||
12395 | $multiple_comma_arrows[$depth] = $count && $count > 1; | ||||
12396 | |||||
12397 | # Append the previous token name to make the container name | ||||
12398 | # more unique. This name will also be given to any commas | ||||
12399 | # within this container, and it helps avoid undesirable | ||||
12400 | # alignments of different types of containers. | ||||
12401 | my $name = previous_nonblank_token($i); | ||||
12402 | $name =~ s/^->//; | ||||
12403 | $container_name[$depth] = "+" . $name; | ||||
12404 | |||||
12405 | # Make the container name even more unique if necessary. | ||||
12406 | # If we are not vertically aligning this opening paren, | ||||
12407 | # append a character count to avoid bad alignment because | ||||
12408 | # it usually looks bad to align commas within containers | ||||
12409 | # for which the opening parens do not align. Here | ||||
12410 | # is an example very BAD alignment of commas (because | ||||
12411 | # the atan2 functions are not all aligned): | ||||
12412 | # $XY = | ||||
12413 | # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + | ||||
12414 | # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - | ||||
12415 | # $X * atan2( $X, 1 ) - | ||||
12416 | # $Y * atan2( $Y, 1 ); | ||||
12417 | # | ||||
12418 | # On the other hand, it is usually okay to align commas if | ||||
12419 | # opening parens align, such as: | ||||
12420 | # glVertex3d( $cx + $s * $xs, $cy, $z ); | ||||
12421 | # glVertex3d( $cx, $cy + $s * $ys, $z ); | ||||
12422 | # glVertex3d( $cx - $s * $xs, $cy, $z ); | ||||
12423 | # glVertex3d( $cx, $cy - $s * $ys, $z ); | ||||
12424 | # | ||||
12425 | # To distinguish between these situations, we will | ||||
12426 | # append the length of the line from the previous matching | ||||
12427 | # token, or beginning of line, to the function name. This | ||||
12428 | # will allow the vertical aligner to reject undesirable | ||||
12429 | # matches. | ||||
12430 | |||||
12431 | # if we are not aligning on this paren... | ||||
12432 | if ( $matching_token_to_go[$i] eq '' ) { | ||||
12433 | |||||
12434 | # Sum length from previous alignment, or start of line. | ||||
12435 | my $len = | ||||
12436 | ( $i_start == $ibeg ) | ||||
12437 | ? total_line_length( $i_start, $i - 1 ) | ||||
12438 | : token_sequence_length( $i_start, $i - 1 ); | ||||
12439 | |||||
12440 | # tack length onto the container name to make unique | ||||
12441 | $container_name[$depth] .= "-" . $len; | ||||
12442 | } | ||||
12443 | } | ||||
12444 | } | ||||
12445 | elsif ( $tokens_to_go[$i] eq ')' ) { | ||||
12446 | $depth-- if $depth > 0; | ||||
12447 | } | ||||
12448 | |||||
12449 | # if we find a new synchronization token, we are done with | ||||
12450 | # a field | ||||
12451 | if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { | ||||
12452 | |||||
12453 | my $tok = my $raw_tok = $matching_token_to_go[$i]; | ||||
12454 | |||||
12455 | # make separators in different nesting depths unique | ||||
12456 | # by appending the nesting depth digit. | ||||
12457 | if ( $raw_tok ne '#' ) { | ||||
12458 | $tok .= "$nesting_depth_to_go[$i]"; | ||||
12459 | } | ||||
12460 | |||||
12461 | # also decorate commas with any container name to avoid | ||||
12462 | # unwanted cross-line alignments. | ||||
12463 | if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { | ||||
12464 | if ( $container_name[$depth] ) { | ||||
12465 | $tok .= $container_name[$depth]; | ||||
12466 | } | ||||
12467 | } | ||||
12468 | |||||
12469 | # Patch to avoid aligning leading and trailing if, unless. | ||||
12470 | # Mark trailing if, unless statements with container names. | ||||
12471 | # This makes them different from leading if, unless which | ||||
12472 | # are not so marked at present. If we ever need to name | ||||
12473 | # them too, we could use ci to distinguish them. | ||||
12474 | # Example problem to avoid: | ||||
12475 | # return ( 2, "DBERROR" ) | ||||
12476 | # if ( $retval == 2 ); | ||||
12477 | # if ( scalar @_ ) { | ||||
12478 | # my ( $a, $b, $c, $d, $e, $f ) = @_; | ||||
12479 | # } | ||||
12480 | if ( $raw_tok eq '(' ) { | ||||
12481 | my $ci = $ci_levels_to_go[$ibeg]; | ||||
12482 | if ( $container_name[$depth] =~ /^\+(if|unless)/ | ||||
12483 | && $ci ) | ||||
12484 | { | ||||
12485 | $tok .= $container_name[$depth]; | ||||
12486 | } | ||||
12487 | } | ||||
12488 | |||||
12489 | # Decorate block braces with block types to avoid | ||||
12490 | # unwanted alignments such as the following: | ||||
12491 | # foreach ( @{$routput_array} ) { $fh->print($_) } | ||||
12492 | # eval { $fh->close() }; | ||||
12493 | if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { | ||||
12494 | my $block_type = $block_type_to_go[$i]; | ||||
12495 | |||||
12496 | # map certain related block types to allow | ||||
12497 | # else blocks to align | ||||
12498 | $block_type = $block_type_map{$block_type} | ||||
12499 | if ( defined( $block_type_map{$block_type} ) ); | ||||
12500 | |||||
12501 | # remove sub names to allow one-line sub braces to align | ||||
12502 | # regardless of name | ||||
12503 | if ( $block_type =~ /^sub / ) { $block_type = 'sub' } | ||||
12504 | |||||
12505 | # allow all control-type blocks to align | ||||
12506 | if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } | ||||
12507 | |||||
12508 | $tok .= $block_type; | ||||
12509 | } | ||||
12510 | |||||
12511 | # concatenate the text of the consecutive tokens to form | ||||
12512 | # the field | ||||
12513 | push( @fields, | ||||
12514 | join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); | ||||
12515 | |||||
12516 | # store the alignment token for this field | ||||
12517 | push( @tokens, $tok ); | ||||
12518 | |||||
12519 | # get ready for the next batch | ||||
12520 | $i_start = $i; | ||||
12521 | $j++; | ||||
12522 | $patterns[$j] = ""; | ||||
12523 | } | ||||
12524 | |||||
12525 | # continue accumulating tokens | ||||
12526 | # handle non-keywords.. | ||||
12527 | if ( $types_to_go[$i] ne 'k' ) { | ||||
12528 | my $type = $types_to_go[$i]; | ||||
12529 | |||||
12530 | # Mark most things before arrows as a quote to | ||||
12531 | # get them to line up. Testfile: mixed.pl. | ||||
12532 | if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { | ||||
12533 | my $next_type = $types_to_go[ $i + 1 ]; | ||||
12534 | my $i_next_nonblank = | ||||
12535 | ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | ||||
12536 | |||||
12537 | if ( $types_to_go[$i_next_nonblank] eq '=>' ) { | ||||
12538 | $type = 'Q'; | ||||
12539 | |||||
12540 | # Patch to ignore leading minus before words, | ||||
12541 | # by changing pattern 'mQ' into just 'Q', | ||||
12542 | # so that we can align things like this: | ||||
12543 | # Button => "Print letter \"~$_\"", | ||||
12544 | # -command => [ sub { print "$_[0]\n" }, $_ ], | ||||
12545 | if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } | ||||
12546 | } | ||||
12547 | } | ||||
12548 | |||||
12549 | # patch to make numbers and quotes align | ||||
12550 | if ( $type eq 'n' ) { $type = 'Q' } | ||||
12551 | |||||
12552 | # patch to ignore any ! in patterns | ||||
12553 | if ( $type eq '!' ) { $type = '' } | ||||
12554 | |||||
12555 | $patterns[$j] .= $type; | ||||
12556 | } | ||||
12557 | |||||
12558 | # for keywords we have to use the actual text | ||||
12559 | else { | ||||
12560 | |||||
12561 | my $tok = $tokens_to_go[$i]; | ||||
12562 | |||||
12563 | # but map certain keywords to a common string to allow | ||||
12564 | # alignment. | ||||
12565 | $tok = $keyword_map{$tok} | ||||
12566 | if ( defined( $keyword_map{$tok} ) ); | ||||
12567 | $patterns[$j] .= $tok; | ||||
12568 | } | ||||
12569 | } | ||||
12570 | |||||
12571 | # done with this line .. join text of tokens to make the last field | ||||
12572 | push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); | ||||
12573 | return ( \@tokens, \@fields, \@patterns ); | ||||
12574 | } | ||||
12575 | |||||
12576 | } # end make_alignment_patterns | ||||
12577 | |||||
12578 | { # begin unmatched_indexes | ||||
12579 | |||||
12580 | # closure to keep track of unbalanced containers. | ||||
12581 | # arrays shared by the routines in this block: | ||||
12582 | 2 | 0s | my @unmatched_opening_indexes_in_this_batch; | ||
12583 | 1 | 0s | my @unmatched_closing_indexes_in_this_batch; | ||
12584 | 1 | 200ns | my %comma_arrow_count; | ||
12585 | |||||
12586 | sub is_unbalanced_batch { | ||||
12587 | @unmatched_opening_indexes_in_this_batch + | ||||
12588 | @unmatched_closing_indexes_in_this_batch; | ||||
12589 | } | ||||
12590 | |||||
12591 | sub comma_arrow_count { | ||||
12592 | my $seqno = $_[0]; | ||||
12593 | return $comma_arrow_count{$seqno}; | ||||
12594 | } | ||||
12595 | |||||
12596 | sub match_opening_and_closing_tokens { | ||||
12597 | |||||
12598 | # Match up indexes of opening and closing braces, etc, in this batch. | ||||
12599 | # This has to be done after all tokens are stored because unstoring | ||||
12600 | # of tokens would otherwise cause trouble. | ||||
12601 | |||||
12602 | @unmatched_opening_indexes_in_this_batch = (); | ||||
12603 | @unmatched_closing_indexes_in_this_batch = (); | ||||
12604 | %comma_arrow_count = (); | ||||
12605 | my $comma_arrow_count_contained = 0; | ||||
12606 | |||||
12607 | my ( $i, $i_mate, $token ); | ||||
12608 | foreach $i ( 0 .. $max_index_to_go ) { | ||||
12609 | if ( $type_sequence_to_go[$i] ) { | ||||
12610 | $token = $tokens_to_go[$i]; | ||||
12611 | if ( $token =~ /^[\(\[\{\?]$/ ) { | ||||
12612 | push @unmatched_opening_indexes_in_this_batch, $i; | ||||
12613 | } | ||||
12614 | elsif ( $token =~ /^[\)\]\}\:]$/ ) { | ||||
12615 | |||||
12616 | $i_mate = pop @unmatched_opening_indexes_in_this_batch; | ||||
12617 | if ( defined($i_mate) && $i_mate >= 0 ) { | ||||
12618 | if ( $type_sequence_to_go[$i_mate] == | ||||
12619 | $type_sequence_to_go[$i] ) | ||||
12620 | { | ||||
12621 | $mate_index_to_go[$i] = $i_mate; | ||||
12622 | $mate_index_to_go[$i_mate] = $i; | ||||
12623 | my $seqno = $type_sequence_to_go[$i]; | ||||
12624 | if ( $comma_arrow_count{$seqno} ) { | ||||
12625 | $comma_arrow_count_contained += | ||||
12626 | $comma_arrow_count{$seqno}; | ||||
12627 | } | ||||
12628 | } | ||||
12629 | else { | ||||
12630 | push @unmatched_opening_indexes_in_this_batch, | ||||
12631 | $i_mate; | ||||
12632 | push @unmatched_closing_indexes_in_this_batch, $i; | ||||
12633 | } | ||||
12634 | } | ||||
12635 | else { | ||||
12636 | push @unmatched_closing_indexes_in_this_batch, $i; | ||||
12637 | } | ||||
12638 | } | ||||
12639 | } | ||||
12640 | elsif ( $tokens_to_go[$i] eq '=>' ) { | ||||
12641 | if (@unmatched_opening_indexes_in_this_batch) { | ||||
12642 | my $j = $unmatched_opening_indexes_in_this_batch[-1]; | ||||
12643 | my $seqno = $type_sequence_to_go[$j]; | ||||
12644 | $comma_arrow_count{$seqno}++; | ||||
12645 | } | ||||
12646 | } | ||||
12647 | } | ||||
12648 | return $comma_arrow_count_contained; | ||||
12649 | } | ||||
12650 | |||||
12651 | sub save_opening_indentation { | ||||
12652 | |||||
12653 | # This should be called after each batch of tokens is output. It | ||||
12654 | # saves indentations of lines of all unmatched opening tokens. | ||||
12655 | # These will be used by sub get_opening_indentation. | ||||
12656 | |||||
12657 | my ( $ri_first, $ri_last, $rindentation_list ) = @_; | ||||
12658 | |||||
12659 | # we no longer need indentations of any saved indentations which | ||||
12660 | # are unmatched closing tokens in this batch, because we will | ||||
12661 | # never encounter them again. So we can delete them to keep | ||||
12662 | # the hash size down. | ||||
12663 | foreach (@unmatched_closing_indexes_in_this_batch) { | ||||
12664 | my $seqno = $type_sequence_to_go[$_]; | ||||
12665 | delete $saved_opening_indentation{$seqno}; | ||||
12666 | } | ||||
12667 | |||||
12668 | # we need to save indentations of any unmatched opening tokens | ||||
12669 | # in this batch because we may need them in a subsequent batch. | ||||
12670 | foreach (@unmatched_opening_indexes_in_this_batch) { | ||||
12671 | my $seqno = $type_sequence_to_go[$_]; | ||||
12672 | $saved_opening_indentation{$seqno} = [ | ||||
12673 | lookup_opening_indentation( | ||||
12674 | $_, $ri_first, $ri_last, $rindentation_list | ||||
12675 | ) | ||||
12676 | ]; | ||||
12677 | } | ||||
12678 | } | ||||
12679 | } # end unmatched_indexes | ||||
12680 | |||||
12681 | sub get_opening_indentation { | ||||
12682 | |||||
12683 | # get the indentation of the line which output the opening token | ||||
12684 | # corresponding to a given closing token in the current output batch. | ||||
12685 | # | ||||
12686 | # given: | ||||
12687 | # $i_closing - index in this line of a closing token ')' '}' or ']' | ||||
12688 | # | ||||
12689 | # $ri_first - reference to list of the first index $i for each output | ||||
12690 | # line in this batch | ||||
12691 | # $ri_last - reference to list of the last index $i for each output line | ||||
12692 | # in this batch | ||||
12693 | # $rindentation_list - reference to a list containing the indentation | ||||
12694 | # used for each line. | ||||
12695 | # | ||||
12696 | # return: | ||||
12697 | # -the indentation of the line which contained the opening token | ||||
12698 | # which matches the token at index $i_opening | ||||
12699 | # -and its offset (number of columns) from the start of the line | ||||
12700 | # | ||||
12701 | my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; | ||||
12702 | |||||
12703 | # first, see if the opening token is in the current batch | ||||
12704 | my $i_opening = $mate_index_to_go[$i_closing]; | ||||
12705 | my ( $indent, $offset, $is_leading, $exists ); | ||||
12706 | $exists = 1; | ||||
12707 | if ( $i_opening >= 0 ) { | ||||
12708 | |||||
12709 | # it is..look up the indentation | ||||
12710 | ( $indent, $offset, $is_leading ) = | ||||
12711 | lookup_opening_indentation( $i_opening, $ri_first, $ri_last, | ||||
12712 | $rindentation_list ); | ||||
12713 | } | ||||
12714 | |||||
12715 | # if not, it should have been stored in the hash by a previous batch | ||||
12716 | else { | ||||
12717 | my $seqno = $type_sequence_to_go[$i_closing]; | ||||
12718 | if ($seqno) { | ||||
12719 | if ( $saved_opening_indentation{$seqno} ) { | ||||
12720 | ( $indent, $offset, $is_leading ) = | ||||
12721 | @{ $saved_opening_indentation{$seqno} }; | ||||
12722 | } | ||||
12723 | |||||
12724 | # some kind of serious error | ||||
12725 | # (example is badfile.t) | ||||
12726 | else { | ||||
12727 | $indent = 0; | ||||
12728 | $offset = 0; | ||||
12729 | $is_leading = 0; | ||||
12730 | $exists = 0; | ||||
12731 | } | ||||
12732 | } | ||||
12733 | |||||
12734 | # if no sequence number it must be an unbalanced container | ||||
12735 | else { | ||||
12736 | $indent = 0; | ||||
12737 | $offset = 0; | ||||
12738 | $is_leading = 0; | ||||
12739 | $exists = 0; | ||||
12740 | } | ||||
12741 | } | ||||
12742 | return ( $indent, $offset, $is_leading, $exists ); | ||||
12743 | } | ||||
12744 | |||||
12745 | sub lookup_opening_indentation { | ||||
12746 | |||||
12747 | # get the indentation of the line in the current output batch | ||||
12748 | # which output a selected opening token | ||||
12749 | # | ||||
12750 | # given: | ||||
12751 | # $i_opening - index of an opening token in the current output batch | ||||
12752 | # whose line indentation we need | ||||
12753 | # $ri_first - reference to list of the first index $i for each output | ||||
12754 | # line in this batch | ||||
12755 | # $ri_last - reference to list of the last index $i for each output line | ||||
12756 | # in this batch | ||||
12757 | # $rindentation_list - reference to a list containing the indentation | ||||
12758 | # used for each line. (NOTE: the first slot in | ||||
12759 | # this list is the last returned line number, and this is | ||||
12760 | # followed by the list of indentations). | ||||
12761 | # | ||||
12762 | # return | ||||
12763 | # -the indentation of the line which contained token $i_opening | ||||
12764 | # -and its offset (number of columns) from the start of the line | ||||
12765 | |||||
12766 | my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; | ||||
12767 | |||||
12768 | my $nline = $rindentation_list->[0]; # line number of previous lookup | ||||
12769 | |||||
12770 | # reset line location if necessary | ||||
12771 | $nline = 0 if ( $i_opening < $ri_start->[$nline] ); | ||||
12772 | |||||
12773 | # find the correct line | ||||
12774 | unless ( $i_opening > $ri_last->[-1] ) { | ||||
12775 | while ( $i_opening > $ri_last->[$nline] ) { $nline++; } | ||||
12776 | } | ||||
12777 | |||||
12778 | # error - token index is out of bounds - shouldn't happen | ||||
12779 | else { | ||||
12780 | warning( | ||||
12781 | "non-fatal program bug in lookup_opening_indentation - index out of range\n" | ||||
12782 | ); | ||||
12783 | report_definite_bug(); | ||||
12784 | $nline = $#{$ri_last}; | ||||
12785 | } | ||||
12786 | |||||
12787 | $rindentation_list->[0] = | ||||
12788 | $nline; # save line number to start looking next call | ||||
12789 | my $ibeg = $ri_start->[$nline]; | ||||
12790 | my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; | ||||
12791 | my $is_leading = ( $ibeg == $i_opening ); | ||||
12792 | return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); | ||||
12793 | } | ||||
12794 | |||||
12795 | { | ||||
12796 | 2 | 100ns | my %is_if_elsif_else_unless_while_until_for_foreach; | ||
12797 | |||||
12798 | # spent 10µs within Perl::Tidy::Formatter::BEGIN@12798 which was called:
# once (10µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 12807 | ||||
12799 | |||||
12800 | # These block types may have text between the keyword and opening | ||||
12801 | # curly. Note: 'else' does not, but must be included to allow trailing | ||||
12802 | # if/elsif text to be appended. | ||||
12803 | # patch for SWITCH/CASE: added 'case' and 'when' | ||||
12804 | 1 | 2µs | @_ = qw(if elsif else unless while until for foreach case when); | ||
12805 | 1 | 10µs | @is_if_elsif_else_unless_while_until_for_foreach{@_} = | ||
12806 | (1) x scalar(@_); | ||||
12807 | 1 | 1.53ms | 1 | 10µs | } # spent 10µs making 1 call to Perl::Tidy::Formatter::BEGIN@12798 |
12808 | |||||
12809 | sub set_adjusted_indentation { | ||||
12810 | |||||
12811 | # This routine has the final say regarding the actual indentation of | ||||
12812 | # a line. It starts with the basic indentation which has been | ||||
12813 | # defined for the leading token, and then takes into account any | ||||
12814 | # options that the user has set regarding special indenting and | ||||
12815 | # outdenting. | ||||
12816 | |||||
12817 | my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, | ||||
12818 | $rindentation_list, $level_jump ) | ||||
12819 | = @_; | ||||
12820 | |||||
12821 | # we need to know the last token of this line | ||||
12822 | my ( $terminal_type, $i_terminal ) = | ||||
12823 | terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); | ||||
12824 | |||||
12825 | my $is_outdented_line = 0; | ||||
12826 | |||||
12827 | my $is_semicolon_terminated = $terminal_type eq ';' | ||||
12828 | && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; | ||||
12829 | |||||
12830 | ########################################################## | ||||
12831 | # Section 1: set a flag and a default indentation | ||||
12832 | # | ||||
12833 | # Most lines are indented according to the initial token. | ||||
12834 | # But it is common to outdent to the level just after the | ||||
12835 | # terminal token in certain cases... | ||||
12836 | # adjust_indentation flag: | ||||
12837 | # 0 - do not adjust | ||||
12838 | # 1 - outdent | ||||
12839 | # 2 - vertically align with opening token | ||||
12840 | # 3 - indent | ||||
12841 | ########################################################## | ||||
12842 | my $adjust_indentation = 0; | ||||
12843 | my $default_adjust_indentation = $adjust_indentation; | ||||
12844 | |||||
12845 | my ( | ||||
12846 | $opening_indentation, $opening_offset, | ||||
12847 | $is_leading, $opening_exists | ||||
12848 | ); | ||||
12849 | |||||
12850 | # if we are at a closing token of some type.. | ||||
12851 | if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) { | ||||
12852 | |||||
12853 | # get the indentation of the line containing the corresponding | ||||
12854 | # opening token | ||||
12855 | ( | ||||
12856 | $opening_indentation, $opening_offset, | ||||
12857 | $is_leading, $opening_exists | ||||
12858 | ) | ||||
12859 | = get_opening_indentation( $ibeg, $ri_first, $ri_last, | ||||
12860 | $rindentation_list ); | ||||
12861 | |||||
12862 | # First set the default behavior: | ||||
12863 | if ( | ||||
12864 | |||||
12865 | # default behavior is to outdent closing lines | ||||
12866 | # of the form: "); }; ]; )->xxx;" | ||||
12867 | $is_semicolon_terminated | ||||
12868 | |||||
12869 | # and 'cuddled parens' of the form: ")->pack(" | ||||
12870 | || ( | ||||
12871 | $terminal_type eq '(' | ||||
12872 | && $types_to_go[$ibeg] eq ')' | ||||
12873 | && ( $nesting_depth_to_go[$iend] + 1 == | ||||
12874 | $nesting_depth_to_go[$ibeg] ) | ||||
12875 | ) | ||||
12876 | |||||
12877 | # and when the next line is at a lower indentation level | ||||
12878 | # PATCH: and only if the style allows undoing continuation | ||||
12879 | # for all closing token types. We should really wait until | ||||
12880 | # the indentation of the next line is known and then make | ||||
12881 | # a decision, but that would require another pass. | ||||
12882 | || ( $level_jump < 0 && !$some_closing_token_indentation ) | ||||
12883 | ) | ||||
12884 | { | ||||
12885 | $adjust_indentation = 1; | ||||
12886 | } | ||||
12887 | |||||
12888 | # outdent something like '),' | ||||
12889 | if ( | ||||
12890 | $terminal_type eq ',' | ||||
12891 | |||||
12892 | # allow just one character before the comma | ||||
12893 | && $i_terminal == $ibeg + 1 | ||||
12894 | |||||
12895 | # require LIST environment; otherwise, we may outdent too much - | ||||
12896 | # this can happen in calls without parentheses (overload.t); | ||||
12897 | && $container_environment_to_go[$i_terminal] eq 'LIST' | ||||
12898 | ) | ||||
12899 | { | ||||
12900 | $adjust_indentation = 1; | ||||
12901 | } | ||||
12902 | |||||
12903 | # undo continuation indentation of a terminal closing token if | ||||
12904 | # it is the last token before a level decrease. This will allow | ||||
12905 | # a closing token to line up with its opening counterpart, and | ||||
12906 | # avoids a indentation jump larger than 1 level. | ||||
12907 | if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ | ||||
12908 | && $i_terminal == $ibeg ) | ||||
12909 | { | ||||
12910 | my $ci = $ci_levels_to_go[$ibeg]; | ||||
12911 | my $lev = $levels_to_go[$ibeg]; | ||||
12912 | my $next_type = $types_to_go[ $ibeg + 1 ]; | ||||
12913 | my $i_next_nonblank = | ||||
12914 | ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); | ||||
12915 | if ( $i_next_nonblank <= $max_index_to_go | ||||
12916 | && $levels_to_go[$i_next_nonblank] < $lev ) | ||||
12917 | { | ||||
12918 | $adjust_indentation = 1; | ||||
12919 | } | ||||
12920 | } | ||||
12921 | |||||
12922 | # YVES patch 1 of 2: | ||||
12923 | # Undo ci of line with leading closing eval brace, | ||||
12924 | # but not beyond the indention of the line with | ||||
12925 | # the opening brace. | ||||
12926 | if ( $block_type_to_go[$ibeg] eq 'eval' | ||||
12927 | && !$rOpts->{'line-up-parentheses'} | ||||
12928 | && !$rOpts->{'indent-closing-brace'} ) | ||||
12929 | { | ||||
12930 | ( | ||||
12931 | $opening_indentation, $opening_offset, | ||||
12932 | $is_leading, $opening_exists | ||||
12933 | ) | ||||
12934 | = get_opening_indentation( $ibeg, $ri_first, $ri_last, | ||||
12935 | $rindentation_list ); | ||||
12936 | my $indentation = $leading_spaces_to_go[$ibeg]; | ||||
12937 | if ( defined($opening_indentation) | ||||
12938 | && $indentation > $opening_indentation ) | ||||
12939 | { | ||||
12940 | $adjust_indentation = 1; | ||||
12941 | } | ||||
12942 | } | ||||
12943 | |||||
12944 | $default_adjust_indentation = $adjust_indentation; | ||||
12945 | |||||
12946 | # Now modify default behavior according to user request: | ||||
12947 | # handle option to indent non-blocks of the form ); }; ]; | ||||
12948 | # But don't do special indentation to something like ')->pack(' | ||||
12949 | if ( !$block_type_to_go[$ibeg] ) { | ||||
12950 | my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; | ||||
12951 | if ( $cti == 1 ) { | ||||
12952 | if ( $i_terminal <= $ibeg + 1 | ||||
12953 | || $is_semicolon_terminated ) | ||||
12954 | { | ||||
12955 | $adjust_indentation = 2; | ||||
12956 | } | ||||
12957 | else { | ||||
12958 | $adjust_indentation = 0; | ||||
12959 | } | ||||
12960 | } | ||||
12961 | elsif ( $cti == 2 ) { | ||||
12962 | if ($is_semicolon_terminated) { | ||||
12963 | $adjust_indentation = 3; | ||||
12964 | } | ||||
12965 | else { | ||||
12966 | $adjust_indentation = 0; | ||||
12967 | } | ||||
12968 | } | ||||
12969 | elsif ( $cti == 3 ) { | ||||
12970 | $adjust_indentation = 3; | ||||
12971 | } | ||||
12972 | } | ||||
12973 | |||||
12974 | # handle option to indent blocks | ||||
12975 | else { | ||||
12976 | if ( | ||||
12977 | $rOpts->{'indent-closing-brace'} | ||||
12978 | && ( | ||||
12979 | $i_terminal == $ibeg # isolated terminal '}' | ||||
12980 | || $is_semicolon_terminated | ||||
12981 | ) | ||||
12982 | ) # } xxxx ; | ||||
12983 | { | ||||
12984 | $adjust_indentation = 3; | ||||
12985 | } | ||||
12986 | } | ||||
12987 | } | ||||
12988 | |||||
12989 | # if at ');', '};', '>;', and '];' of a terminal qw quote | ||||
12990 | elsif ($$rpatterns[0] =~ /^qb*;$/ | ||||
12991 | && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) | ||||
12992 | { | ||||
12993 | if ( $closing_token_indentation{$1} == 0 ) { | ||||
12994 | $adjust_indentation = 1; | ||||
12995 | } | ||||
12996 | else { | ||||
12997 | $adjust_indentation = 3; | ||||
12998 | } | ||||
12999 | } | ||||
13000 | |||||
13001 | # if line begins with a ':', align it with any | ||||
13002 | # previous line leading with corresponding ? | ||||
13003 | elsif ( $types_to_go[$ibeg] eq ':' ) { | ||||
13004 | ( | ||||
13005 | $opening_indentation, $opening_offset, | ||||
13006 | $is_leading, $opening_exists | ||||
13007 | ) | ||||
13008 | = get_opening_indentation( $ibeg, $ri_first, $ri_last, | ||||
13009 | $rindentation_list ); | ||||
13010 | if ($is_leading) { $adjust_indentation = 2; } | ||||
13011 | } | ||||
13012 | |||||
13013 | ########################################################## | ||||
13014 | # Section 2: set indentation according to flag set above | ||||
13015 | # | ||||
13016 | # Select the indentation object to define leading | ||||
13017 | # whitespace. If we are outdenting something like '} } );' | ||||
13018 | # then we want to use one level below the last token | ||||
13019 | # ($i_terminal) in order to get it to fully outdent through | ||||
13020 | # all levels. | ||||
13021 | ########################################################## | ||||
13022 | my $indentation; | ||||
13023 | my $lev; | ||||
13024 | my $level_end = $levels_to_go[$iend]; | ||||
13025 | |||||
13026 | if ( $adjust_indentation == 0 ) { | ||||
13027 | $indentation = $leading_spaces_to_go[$ibeg]; | ||||
13028 | $lev = $levels_to_go[$ibeg]; | ||||
13029 | } | ||||
13030 | elsif ( $adjust_indentation == 1 ) { | ||||
13031 | $indentation = $reduced_spaces_to_go[$i_terminal]; | ||||
13032 | $lev = $levels_to_go[$i_terminal]; | ||||
13033 | } | ||||
13034 | |||||
13035 | # handle indented closing token which aligns with opening token | ||||
13036 | elsif ( $adjust_indentation == 2 ) { | ||||
13037 | |||||
13038 | # handle option to align closing token with opening token | ||||
13039 | $lev = $levels_to_go[$ibeg]; | ||||
13040 | |||||
13041 | # calculate spaces needed to align with opening token | ||||
13042 | my $space_count = | ||||
13043 | get_SPACES($opening_indentation) + $opening_offset; | ||||
13044 | |||||
13045 | # Indent less than the previous line. | ||||
13046 | # | ||||
13047 | # Problem: For -lp we don't exactly know what it was if there | ||||
13048 | # were recoverable spaces sent to the aligner. A good solution | ||||
13049 | # would be to force a flush of the vertical alignment buffer, so | ||||
13050 | # that we would know. For now, this rule is used for -lp: | ||||
13051 | # | ||||
13052 | # When the last line did not start with a closing token we will | ||||
13053 | # be optimistic that the aligner will recover everything wanted. | ||||
13054 | # | ||||
13055 | # This rule will prevent us from breaking a hierarchy of closing | ||||
13056 | # tokens, and in a worst case will leave a closing paren too far | ||||
13057 | # indented, but this is better than frequently leaving it not | ||||
13058 | # indented enough. | ||||
13059 | my $last_spaces = get_SPACES($last_indentation_written); | ||||
13060 | if ( $last_leading_token !~ /^[\}\]\)]$/ ) { | ||||
13061 | $last_spaces += | ||||
13062 | get_RECOVERABLE_SPACES($last_indentation_written); | ||||
13063 | } | ||||
13064 | |||||
13065 | # reset the indentation to the new space count if it works | ||||
13066 | # only options are all or none: nothing in-between looks good | ||||
13067 | $lev = $levels_to_go[$ibeg]; | ||||
13068 | if ( $space_count < $last_spaces ) { | ||||
13069 | if ($rOpts_line_up_parentheses) { | ||||
13070 | my $lev = $levels_to_go[$ibeg]; | ||||
13071 | $indentation = | ||||
13072 | new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); | ||||
13073 | } | ||||
13074 | else { | ||||
13075 | $indentation = $space_count; | ||||
13076 | } | ||||
13077 | } | ||||
13078 | |||||
13079 | # revert to default if it doesn't work | ||||
13080 | else { | ||||
13081 | $space_count = leading_spaces_to_go($ibeg); | ||||
13082 | if ( $default_adjust_indentation == 0 ) { | ||||
13083 | $indentation = $leading_spaces_to_go[$ibeg]; | ||||
13084 | } | ||||
13085 | elsif ( $default_adjust_indentation == 1 ) { | ||||
13086 | $indentation = $reduced_spaces_to_go[$i_terminal]; | ||||
13087 | $lev = $levels_to_go[$i_terminal]; | ||||
13088 | } | ||||
13089 | } | ||||
13090 | } | ||||
13091 | |||||
13092 | # Full indentaion of closing tokens (-icb and -icp or -cti=2) | ||||
13093 | else { | ||||
13094 | |||||
13095 | # handle -icb (indented closing code block braces) | ||||
13096 | # Updated method for indented block braces: indent one full level if | ||||
13097 | # there is no continuation indentation. This will occur for major | ||||
13098 | # structures such as sub, if, else, but not for things like map | ||||
13099 | # blocks. | ||||
13100 | # | ||||
13101 | # Note: only code blocks without continuation indentation are | ||||
13102 | # handled here (if, else, unless, ..). In the following snippet, | ||||
13103 | # the terminal brace of the sort block will have continuation | ||||
13104 | # indentation as shown so it will not be handled by the coding | ||||
13105 | # here. We would have to undo the continuation indentation to do | ||||
13106 | # this, but it probably looks ok as is. This is a possible future | ||||
13107 | # update for semicolon terminated lines. | ||||
13108 | # | ||||
13109 | # if ($sortby eq 'date' or $sortby eq 'size') { | ||||
13110 | # @files = sort { | ||||
13111 | # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} | ||||
13112 | # or $a cmp $b | ||||
13113 | # } @files; | ||||
13114 | # } | ||||
13115 | # | ||||
13116 | if ( $block_type_to_go[$ibeg] | ||||
13117 | && $ci_levels_to_go[$i_terminal] == 0 ) | ||||
13118 | { | ||||
13119 | my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); | ||||
13120 | $indentation = $spaces + $rOpts_indent_columns; | ||||
13121 | |||||
13122 | # NOTE: for -lp we could create a new indentation object, but | ||||
13123 | # there is probably no need to do it | ||||
13124 | } | ||||
13125 | |||||
13126 | # handle -icp and any -icb block braces which fall through above | ||||
13127 | # test such as the 'sort' block mentioned above. | ||||
13128 | else { | ||||
13129 | |||||
13130 | # There are currently two ways to handle -icp... | ||||
13131 | # One way is to use the indentation of the previous line: | ||||
13132 | # $indentation = $last_indentation_written; | ||||
13133 | |||||
13134 | # The other way is to use the indentation that the previous line | ||||
13135 | # would have had if it hadn't been adjusted: | ||||
13136 | $indentation = $last_unadjusted_indentation; | ||||
13137 | |||||
13138 | # Current method: use the minimum of the two. This avoids | ||||
13139 | # inconsistent indentation. | ||||
13140 | if ( get_SPACES($last_indentation_written) < | ||||
13141 | get_SPACES($indentation) ) | ||||
13142 | { | ||||
13143 | $indentation = $last_indentation_written; | ||||
13144 | } | ||||
13145 | } | ||||
13146 | |||||
13147 | # use previous indentation but use own level | ||||
13148 | # to cause list to be flushed properly | ||||
13149 | $lev = $levels_to_go[$ibeg]; | ||||
13150 | } | ||||
13151 | |||||
13152 | # remember indentation except for multi-line quotes, which get | ||||
13153 | # no indentation | ||||
13154 | unless ( $ibeg == 0 && $starting_in_quote ) { | ||||
13155 | $last_indentation_written = $indentation; | ||||
13156 | $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; | ||||
13157 | $last_leading_token = $tokens_to_go[$ibeg]; | ||||
13158 | } | ||||
13159 | |||||
13160 | # be sure lines with leading closing tokens are not outdented more | ||||
13161 | # than the line which contained the corresponding opening token. | ||||
13162 | |||||
13163 | ############################################################# | ||||
13164 | # updated per bug report in alex_bug.pl: we must not | ||||
13165 | # mess with the indentation of closing logical braces so | ||||
13166 | # we must treat something like '} else {' as if it were | ||||
13167 | # an isolated brace my $is_isolated_block_brace = ( | ||||
13168 | # $iend == $ibeg ) && $block_type_to_go[$ibeg]; | ||||
13169 | ############################################################# | ||||
13170 | my $is_isolated_block_brace = $block_type_to_go[$ibeg] | ||||
13171 | && ( $iend == $ibeg | ||||
13172 | || $is_if_elsif_else_unless_while_until_for_foreach{ | ||||
13173 | $block_type_to_go[$ibeg] | ||||
13174 | } ); | ||||
13175 | |||||
13176 | # only do this for a ':; which is aligned with its leading '?' | ||||
13177 | my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; | ||||
13178 | if ( defined($opening_indentation) | ||||
13179 | && !$is_isolated_block_brace | ||||
13180 | && !$is_unaligned_colon ) | ||||
13181 | { | ||||
13182 | if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { | ||||
13183 | $indentation = $opening_indentation; | ||||
13184 | } | ||||
13185 | } | ||||
13186 | |||||
13187 | # remember the indentation of each line of this batch | ||||
13188 | push @{$rindentation_list}, $indentation; | ||||
13189 | |||||
13190 | # outdent lines with certain leading tokens... | ||||
13191 | if ( | ||||
13192 | |||||
13193 | # must be first word of this batch | ||||
13194 | $ibeg == 0 | ||||
13195 | |||||
13196 | # and ... | ||||
13197 | && ( | ||||
13198 | |||||
13199 | # certain leading keywords if requested | ||||
13200 | ( | ||||
13201 | $rOpts->{'outdent-keywords'} | ||||
13202 | && $types_to_go[$ibeg] eq 'k' | ||||
13203 | && $outdent_keyword{ $tokens_to_go[$ibeg] } | ||||
13204 | ) | ||||
13205 | |||||
13206 | # or labels if requested | ||||
13207 | || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) | ||||
13208 | |||||
13209 | # or static block comments if requested | ||||
13210 | || ( $types_to_go[$ibeg] eq '#' | ||||
13211 | && $rOpts->{'outdent-static-block-comments'} | ||||
13212 | && $is_static_block_comment ) | ||||
13213 | ) | ||||
13214 | ) | ||||
13215 | |||||
13216 | { | ||||
13217 | my $space_count = leading_spaces_to_go($ibeg); | ||||
13218 | if ( $space_count > 0 ) { | ||||
13219 | $space_count -= $rOpts_continuation_indentation; | ||||
13220 | $is_outdented_line = 1; | ||||
13221 | if ( $space_count < 0 ) { $space_count = 0 } | ||||
13222 | |||||
13223 | # do not promote a spaced static block comment to non-spaced; | ||||
13224 | # this is not normally necessary but could be for some | ||||
13225 | # unusual user inputs (such as -ci = -i) | ||||
13226 | if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { | ||||
13227 | $space_count = 1; | ||||
13228 | } | ||||
13229 | |||||
13230 | if ($rOpts_line_up_parentheses) { | ||||
13231 | $indentation = | ||||
13232 | new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); | ||||
13233 | } | ||||
13234 | else { | ||||
13235 | $indentation = $space_count; | ||||
13236 | } | ||||
13237 | } | ||||
13238 | } | ||||
13239 | |||||
13240 | return ( $indentation, $lev, $level_end, $terminal_type, | ||||
13241 | $is_semicolon_terminated, $is_outdented_line ); | ||||
13242 | } | ||||
13243 | } | ||||
13244 | |||||
13245 | sub set_vertical_tightness_flags { | ||||
13246 | |||||
13247 | my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; | ||||
13248 | |||||
13249 | # Define vertical tightness controls for the nth line of a batch. | ||||
13250 | # We create an array of parameters which tell the vertical aligner | ||||
13251 | # if we should combine this line with the next line to achieve the | ||||
13252 | # desired vertical tightness. The array of parameters contains: | ||||
13253 | # | ||||
13254 | # [0] type: 1=opening non-block 2=closing non-block | ||||
13255 | # 3=opening block brace 4=closing block brace | ||||
13256 | # | ||||
13257 | # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok | ||||
13258 | # if closing: spaces of padding to use | ||||
13259 | # [2] sequence number of container | ||||
13260 | # [3] valid flag: do not append if this flag is false. Will be | ||||
13261 | # true if appropriate -vt flag is set. Otherwise, Will be | ||||
13262 | # made true only for 2 line container in parens with -lp | ||||
13263 | # | ||||
13264 | # These flags are used by sub set_leading_whitespace in | ||||
13265 | # the vertical aligner | ||||
13266 | |||||
13267 | my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; | ||||
13268 | |||||
13269 | #-------------------------------------------------------------- | ||||
13270 | # Vertical Tightness Flags Section 1: | ||||
13271 | # Handle Lines 1 .. n-1 but not the last line | ||||
13272 | # For non-BLOCK tokens, we will need to examine the next line | ||||
13273 | # too, so we won't consider the last line. | ||||
13274 | #-------------------------------------------------------------- | ||||
13275 | if ( $n < $n_last_line ) { | ||||
13276 | |||||
13277 | #-------------------------------------------------------------- | ||||
13278 | # Vertical Tightness Flags Section 1a: | ||||
13279 | # Look for Type 1, last token of this line is a non-block opening token | ||||
13280 | #-------------------------------------------------------------- | ||||
13281 | my $ibeg_next = $$ri_first[ $n + 1 ]; | ||||
13282 | my $token_end = $tokens_to_go[$iend]; | ||||
13283 | my $iend_next = $$ri_last[ $n + 1 ]; | ||||
13284 | if ( | ||||
13285 | $type_sequence_to_go[$iend] | ||||
13286 | && !$block_type_to_go[$iend] | ||||
13287 | && $is_opening_token{$token_end} | ||||
13288 | && ( | ||||
13289 | $opening_vertical_tightness{$token_end} > 0 | ||||
13290 | |||||
13291 | # allow 2-line method call to be closed up | ||||
13292 | || ( $rOpts_line_up_parentheses | ||||
13293 | && $token_end eq '(' | ||||
13294 | && $iend > $ibeg | ||||
13295 | && $types_to_go[ $iend - 1 ] ne 'b' ) | ||||
13296 | ) | ||||
13297 | ) | ||||
13298 | { | ||||
13299 | |||||
13300 | # avoid multiple jumps in nesting depth in one line if | ||||
13301 | # requested | ||||
13302 | my $ovt = $opening_vertical_tightness{$token_end}; | ||||
13303 | my $iend_next = $$ri_last[ $n + 1 ]; | ||||
13304 | unless ( | ||||
13305 | $ovt < 2 | ||||
13306 | && ( $nesting_depth_to_go[ $iend_next + 1 ] != | ||||
13307 | $nesting_depth_to_go[$ibeg_next] ) | ||||
13308 | ) | ||||
13309 | { | ||||
13310 | |||||
13311 | # If -vt flag has not been set, mark this as invalid | ||||
13312 | # and aligner will validate it if it sees the closing paren | ||||
13313 | # within 2 lines. | ||||
13314 | my $valid_flag = $ovt; | ||||
13315 | @{$rvertical_tightness_flags} = | ||||
13316 | ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); | ||||
13317 | } | ||||
13318 | } | ||||
13319 | |||||
13320 | #-------------------------------------------------------------- | ||||
13321 | # Vertical Tightness Flags Section 1b: | ||||
13322 | # Look for Type 2, first token of next line is a non-block closing | ||||
13323 | # token .. and be sure this line does not have a side comment | ||||
13324 | #-------------------------------------------------------------- | ||||
13325 | my $token_next = $tokens_to_go[$ibeg_next]; | ||||
13326 | if ( $type_sequence_to_go[$ibeg_next] | ||||
13327 | && !$block_type_to_go[$ibeg_next] | ||||
13328 | && $is_closing_token{$token_next} | ||||
13329 | && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! | ||||
13330 | { | ||||
13331 | my $ovt = $opening_vertical_tightness{$token_next}; | ||||
13332 | my $cvt = $closing_vertical_tightness{$token_next}; | ||||
13333 | if ( | ||||
13334 | |||||
13335 | # never append a trailing line like )->pack( | ||||
13336 | # because it will throw off later alignment | ||||
13337 | ( | ||||
13338 | $nesting_depth_to_go[$ibeg_next] == | ||||
13339 | $nesting_depth_to_go[ $iend_next + 1 ] + 1 | ||||
13340 | ) | ||||
13341 | && ( | ||||
13342 | $cvt == 2 | ||||
13343 | || ( | ||||
13344 | $container_environment_to_go[$ibeg_next] ne 'LIST' | ||||
13345 | && ( | ||||
13346 | $cvt == 1 | ||||
13347 | |||||
13348 | # allow closing up 2-line method calls | ||||
13349 | || ( $rOpts_line_up_parentheses | ||||
13350 | && $token_next eq ')' ) | ||||
13351 | ) | ||||
13352 | ) | ||||
13353 | ) | ||||
13354 | ) | ||||
13355 | { | ||||
13356 | |||||
13357 | # decide which trailing closing tokens to append.. | ||||
13358 | my $ok = 0; | ||||
13359 | if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } | ||||
13360 | else { | ||||
13361 | my $str = join( '', | ||||
13362 | @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); | ||||
13363 | |||||
13364 | # append closing token if followed by comment or ';' | ||||
13365 | if ( $str =~ /^b?[#;]/ ) { $ok = 1 } | ||||
13366 | } | ||||
13367 | |||||
13368 | if ($ok) { | ||||
13369 | my $valid_flag = $cvt; | ||||
13370 | @{$rvertical_tightness_flags} = ( | ||||
13371 | 2, | ||||
13372 | $tightness{$token_next} == 2 ? 0 : 1, | ||||
13373 | $type_sequence_to_go[$ibeg_next], $valid_flag, | ||||
13374 | ); | ||||
13375 | } | ||||
13376 | } | ||||
13377 | } | ||||
13378 | |||||
13379 | #-------------------------------------------------------------- | ||||
13380 | # Vertical Tightness Flags Section 1c: | ||||
13381 | # Implement the Opening Token Right flag (Type 2).. | ||||
13382 | # If requested, move an isolated trailing opening token to the end of | ||||
13383 | # the previous line which ended in a comma. We could do this | ||||
13384 | # in sub recombine_breakpoints but that would cause problems | ||||
13385 | # with -lp formatting. The problem is that indentation will | ||||
13386 | # quickly move far to the right in nested expressions. By | ||||
13387 | # doing it after indentation has been set, we avoid changes | ||||
13388 | # to the indentation. Actual movement of the token takes place | ||||
13389 | # in sub valign_output_step_B. | ||||
13390 | #-------------------------------------------------------------- | ||||
13391 | if ( | ||||
13392 | $opening_token_right{ $tokens_to_go[$ibeg_next] } | ||||
13393 | |||||
13394 | # previous line is not opening | ||||
13395 | # (use -sot to combine with it) | ||||
13396 | && !$is_opening_token{$token_end} | ||||
13397 | |||||
13398 | # previous line ended in one of these | ||||
13399 | # (add other cases if necessary; '=>' and '.' are not necessary | ||||
13400 | && !$block_type_to_go[$ibeg_next] | ||||
13401 | |||||
13402 | # this is a line with just an opening token | ||||
13403 | && ( $iend_next == $ibeg_next | ||||
13404 | || $iend_next == $ibeg_next + 2 | ||||
13405 | && $types_to_go[$iend_next] eq '#' ) | ||||
13406 | |||||
13407 | # looks bad if we align vertically with the wrong container | ||||
13408 | && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] | ||||
13409 | ) | ||||
13410 | { | ||||
13411 | my $valid_flag = 1; | ||||
13412 | my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; | ||||
13413 | @{$rvertical_tightness_flags} = | ||||
13414 | ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); | ||||
13415 | } | ||||
13416 | |||||
13417 | #-------------------------------------------------------------- | ||||
13418 | # Vertical Tightness Flags Section 1d: | ||||
13419 | # Stacking of opening and closing tokens (Type 2) | ||||
13420 | #-------------------------------------------------------------- | ||||
13421 | my $stackable; | ||||
13422 | my $token_beg_next = $tokens_to_go[$ibeg_next]; | ||||
13423 | |||||
13424 | # patch to make something like 'qw(' behave like an opening paren | ||||
13425 | # (aran.t) | ||||
13426 | if ( $types_to_go[$ibeg_next] eq 'q' ) { | ||||
13427 | if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { | ||||
13428 | $token_beg_next = $1; | ||||
13429 | } | ||||
13430 | } | ||||
13431 | |||||
13432 | if ( $is_closing_token{$token_end} | ||||
13433 | && $is_closing_token{$token_beg_next} ) | ||||
13434 | { | ||||
13435 | $stackable = $stack_closing_token{$token_beg_next} | ||||
13436 | unless ( $block_type_to_go[$ibeg_next] ) | ||||
13437 | ; # shouldn't happen; just checking | ||||
13438 | } | ||||
13439 | elsif ($is_opening_token{$token_end} | ||||
13440 | && $is_opening_token{$token_beg_next} ) | ||||
13441 | { | ||||
13442 | $stackable = $stack_opening_token{$token_beg_next} | ||||
13443 | unless ( $block_type_to_go[$ibeg_next] ) | ||||
13444 | ; # shouldn't happen; just checking | ||||
13445 | } | ||||
13446 | |||||
13447 | if ($stackable) { | ||||
13448 | |||||
13449 | my $is_semicolon_terminated; | ||||
13450 | if ( $n + 1 == $n_last_line ) { | ||||
13451 | my ( $terminal_type, $i_terminal ) = terminal_type( | ||||
13452 | \@types_to_go, \@block_type_to_go, | ||||
13453 | $ibeg_next, $iend_next | ||||
13454 | ); | ||||
13455 | $is_semicolon_terminated = $terminal_type eq ';' | ||||
13456 | && $nesting_depth_to_go[$iend_next] < | ||||
13457 | $nesting_depth_to_go[$ibeg_next]; | ||||
13458 | } | ||||
13459 | |||||
13460 | # this must be a line with just an opening token | ||||
13461 | # or end in a semicolon | ||||
13462 | if ( | ||||
13463 | $is_semicolon_terminated | ||||
13464 | || ( $iend_next == $ibeg_next | ||||
13465 | || $iend_next == $ibeg_next + 2 | ||||
13466 | && $types_to_go[$iend_next] eq '#' ) | ||||
13467 | ) | ||||
13468 | { | ||||
13469 | my $valid_flag = 1; | ||||
13470 | my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; | ||||
13471 | @{$rvertical_tightness_flags} = | ||||
13472 | ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, | ||||
13473 | ); | ||||
13474 | } | ||||
13475 | } | ||||
13476 | } | ||||
13477 | |||||
13478 | #-------------------------------------------------------------- | ||||
13479 | # Vertical Tightness Flags Section 2: | ||||
13480 | # Handle type 3, opening block braces on last line of the batch | ||||
13481 | # Check for a last line with isolated opening BLOCK curly | ||||
13482 | #-------------------------------------------------------------- | ||||
13483 | elsif ($rOpts_block_brace_vertical_tightness | ||||
13484 | && $ibeg eq $iend | ||||
13485 | && $types_to_go[$iend] eq '{' | ||||
13486 | && $block_type_to_go[$iend] =~ | ||||
13487 | /$block_brace_vertical_tightness_pattern/o ) | ||||
13488 | { | ||||
13489 | @{$rvertical_tightness_flags} = | ||||
13490 | ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); | ||||
13491 | } | ||||
13492 | |||||
13493 | #-------------------------------------------------------------- | ||||
13494 | # Vertical Tightness Flags Section 3: | ||||
13495 | # Handle type 4, a closing block brace on the last line of the batch Check | ||||
13496 | # for a last line with isolated closing BLOCK curly | ||||
13497 | #-------------------------------------------------------------- | ||||
13498 | elsif ($rOpts_stack_closing_block_brace | ||||
13499 | && $ibeg eq $iend | ||||
13500 | && $block_type_to_go[$iend] | ||||
13501 | && $types_to_go[$iend] eq '}' ) | ||||
13502 | { | ||||
13503 | my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; | ||||
13504 | @{$rvertical_tightness_flags} = | ||||
13505 | ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); | ||||
13506 | } | ||||
13507 | |||||
13508 | # pack in the sequence numbers of the ends of this line | ||||
13509 | $rvertical_tightness_flags->[4] = get_seqno($ibeg); | ||||
13510 | $rvertical_tightness_flags->[5] = get_seqno($iend); | ||||
13511 | return $rvertical_tightness_flags; | ||||
13512 | } | ||||
13513 | |||||
13514 | sub get_seqno { | ||||
13515 | |||||
13516 | # get opening and closing sequence numbers of a token for the vertical | ||||
13517 | # aligner. Assign qw quotes a value to allow qw opening and closing tokens | ||||
13518 | # to be treated somewhat like opening and closing tokens for stacking | ||||
13519 | # tokens by the vertical aligner. | ||||
13520 | my ($ii) = @_; | ||||
13521 | my $seqno = $type_sequence_to_go[$ii]; | ||||
13522 | if ( $types_to_go[$ii] eq 'q' ) { | ||||
13523 | my $SEQ_QW = -1; | ||||
13524 | if ( $ii > 0 ) { | ||||
13525 | $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); | ||||
13526 | } | ||||
13527 | else { | ||||
13528 | if ( !$ending_in_quote ) { | ||||
13529 | $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); | ||||
13530 | } | ||||
13531 | } | ||||
13532 | } | ||||
13533 | return ($seqno); | ||||
13534 | } | ||||
13535 | |||||
13536 | { | ||||
13537 | 2 | 0s | my %is_vertical_alignment_type; | ||
13538 | 1 | 0s | my %is_vertical_alignment_keyword; | ||
13539 | 1 | 300ns | my %is_terminal_alignment_type; | ||
13540 | |||||
13541 | # spent 30µs within Perl::Tidy::Formatter::BEGIN@13541 which was called:
# once (30µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 13557 | ||||
13542 | |||||
13543 | # Removed =~ from list to improve chances of alignment | ||||
13544 | 1 | 4µs | @_ = qw# | ||
13545 | = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= | ||||
13546 | { ? : => && || // ~~ !~~ | ||||
13547 | #; | ||||
13548 | 1 | 12µs | @is_vertical_alignment_type{@_} = (1) x scalar(@_); | ||
13549 | |||||
13550 | # only align these at end of line | ||||
13551 | 1 | 3µs | @_ = qw(&& ||); | ||
13552 | 1 | 900ns | @is_terminal_alignment_type{@_} = (1) x scalar(@_); | ||
13553 | |||||
13554 | # eq and ne were removed from this list to improve alignment chances | ||||
13555 | 1 | 2µs | @_ = qw(if unless and or err for foreach while until); | ||
13556 | 1 | 9µs | @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); | ||
13557 | 1 | 1.92ms | 1 | 30µs | } # spent 30µs making 1 call to Perl::Tidy::Formatter::BEGIN@13541 |
13558 | |||||
13559 | sub set_vertical_alignment_markers { | ||||
13560 | |||||
13561 | # This routine takes the first step toward vertical alignment of the | ||||
13562 | # lines of output text. It looks for certain tokens which can serve as | ||||
13563 | # vertical alignment markers (such as an '='). | ||||
13564 | # | ||||
13565 | # Method: We look at each token $i in this output batch and set | ||||
13566 | # $matching_token_to_go[$i] equal to those tokens at which we would | ||||
13567 | # accept vertical alignment. | ||||
13568 | |||||
13569 | # nothing to do if we aren't allowed to change whitespace | ||||
13570 | if ( !$rOpts_add_whitespace ) { | ||||
13571 | for my $i ( 0 .. $max_index_to_go ) { | ||||
13572 | $matching_token_to_go[$i] = ''; | ||||
13573 | } | ||||
13574 | return; | ||||
13575 | } | ||||
13576 | |||||
13577 | my ( $ri_first, $ri_last ) = @_; | ||||
13578 | |||||
13579 | # remember the index of last nonblank token before any sidecomment | ||||
13580 | my $i_terminal = $max_index_to_go; | ||||
13581 | if ( $types_to_go[$i_terminal] eq '#' ) { | ||||
13582 | if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { | ||||
13583 | if ( $i_terminal > 0 ) { --$i_terminal } | ||||
13584 | } | ||||
13585 | } | ||||
13586 | |||||
13587 | # look at each line of this batch.. | ||||
13588 | my $last_vertical_alignment_before_index; | ||||
13589 | my $vert_last_nonblank_type; | ||||
13590 | my $vert_last_nonblank_token; | ||||
13591 | my $vert_last_nonblank_block_type; | ||||
13592 | my $max_line = @$ri_first - 1; | ||||
13593 | my ( $i, $type, $token, $block_type, $alignment_type ); | ||||
13594 | my ( $ibeg, $iend, $line ); | ||||
13595 | |||||
13596 | foreach $line ( 0 .. $max_line ) { | ||||
13597 | $ibeg = $$ri_first[$line]; | ||||
13598 | $iend = $$ri_last[$line]; | ||||
13599 | $last_vertical_alignment_before_index = -1; | ||||
13600 | $vert_last_nonblank_type = ''; | ||||
13601 | $vert_last_nonblank_token = ''; | ||||
13602 | $vert_last_nonblank_block_type = ''; | ||||
13603 | |||||
13604 | # look at each token in this output line.. | ||||
13605 | foreach $i ( $ibeg .. $iend ) { | ||||
13606 | $alignment_type = ''; | ||||
13607 | $type = $types_to_go[$i]; | ||||
13608 | $block_type = $block_type_to_go[$i]; | ||||
13609 | $token = $tokens_to_go[$i]; | ||||
13610 | |||||
13611 | # check for flag indicating that we should not align | ||||
13612 | # this token | ||||
13613 | if ( $matching_token_to_go[$i] ) { | ||||
13614 | $matching_token_to_go[$i] = ''; | ||||
13615 | next; | ||||
13616 | } | ||||
13617 | |||||
13618 | #-------------------------------------------------------- | ||||
13619 | # First see if we want to align BEFORE this token | ||||
13620 | #-------------------------------------------------------- | ||||
13621 | |||||
13622 | # The first possible token that we can align before | ||||
13623 | # is index 2 because: 1) it doesn't normally make sense to | ||||
13624 | # align before the first token and 2) the second | ||||
13625 | # token must be a blank if we are to align before | ||||
13626 | # the third | ||||
13627 | if ( $i < $ibeg + 2 ) { } | ||||
13628 | |||||
13629 | # must follow a blank token | ||||
13630 | elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } | ||||
13631 | |||||
13632 | # align a side comment -- | ||||
13633 | elsif ( $type eq '#' ) { | ||||
13634 | |||||
13635 | unless ( | ||||
13636 | |||||
13637 | # it is a static side comment | ||||
13638 | ( | ||||
13639 | $rOpts->{'static-side-comments'} | ||||
13640 | && $token =~ /$static_side_comment_pattern/o | ||||
13641 | ) | ||||
13642 | |||||
13643 | # or a closing side comment | ||||
13644 | || ( $vert_last_nonblank_block_type | ||||
13645 | && $token =~ | ||||
13646 | /$closing_side_comment_prefix_pattern/o ) | ||||
13647 | ) | ||||
13648 | { | ||||
13649 | $alignment_type = $type; | ||||
13650 | } ## Example of a static side comment | ||||
13651 | } | ||||
13652 | |||||
13653 | # otherwise, do not align two in a row to create a | ||||
13654 | # blank field | ||||
13655 | elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } | ||||
13656 | |||||
13657 | # align before one of these keywords | ||||
13658 | # (within a line, since $i>1) | ||||
13659 | elsif ( $type eq 'k' ) { | ||||
13660 | |||||
13661 | # /^(if|unless|and|or|eq|ne)$/ | ||||
13662 | if ( $is_vertical_alignment_keyword{$token} ) { | ||||
13663 | $alignment_type = $token; | ||||
13664 | } | ||||
13665 | } | ||||
13666 | |||||
13667 | # align before one of these types.. | ||||
13668 | # Note: add '.' after new vertical aligner is operational | ||||
13669 | elsif ( $is_vertical_alignment_type{$type} ) { | ||||
13670 | $alignment_type = $token; | ||||
13671 | |||||
13672 | # Do not align a terminal token. Although it might | ||||
13673 | # occasionally look ok to do this, this has been found to be | ||||
13674 | # a good general rule. The main problems are: | ||||
13675 | # (1) that the terminal token (such as an = or :) might get | ||||
13676 | # moved far to the right where it is hard to see because | ||||
13677 | # nothing follows it, and | ||||
13678 | # (2) doing so may prevent other good alignments. | ||||
13679 | # Current exceptions are && and || | ||||
13680 | if ( $i == $iend || $i >= $i_terminal ) { | ||||
13681 | $alignment_type = "" | ||||
13682 | unless ( $is_terminal_alignment_type{$type} ); | ||||
13683 | } | ||||
13684 | |||||
13685 | # Do not align leading ': (' or '. ('. This would prevent | ||||
13686 | # alignment in something like the following: | ||||
13687 | # $extra_space .= | ||||
13688 | # ( $input_line_number < 10 ) ? " " | ||||
13689 | # : ( $input_line_number < 100 ) ? " " | ||||
13690 | # : ""; | ||||
13691 | # or | ||||
13692 | # $code = | ||||
13693 | # ( $case_matters ? $accessor : " lc($accessor) " ) | ||||
13694 | # . ( $yesno ? " eq " : " ne " ) | ||||
13695 | if ( $i == $ibeg + 2 | ||||
13696 | && $types_to_go[$ibeg] =~ /^[\.\:]$/ | ||||
13697 | && $types_to_go[ $i - 1 ] eq 'b' ) | ||||
13698 | { | ||||
13699 | $alignment_type = ""; | ||||
13700 | } | ||||
13701 | |||||
13702 | # For a paren after keyword, only align something like this: | ||||
13703 | # if ( $a ) { &a } | ||||
13704 | # elsif ( $b ) { &b } | ||||
13705 | if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { | ||||
13706 | $alignment_type = "" | ||||
13707 | unless $vert_last_nonblank_token =~ | ||||
13708 | /^(if|unless|elsif)$/; | ||||
13709 | } | ||||
13710 | |||||
13711 | # be sure the alignment tokens are unique | ||||
13712 | # This didn't work well: reason not determined | ||||
13713 | # if ($token ne $type) {$alignment_type .= $type} | ||||
13714 | } | ||||
13715 | |||||
13716 | # NOTE: This is deactivated because it causes the previous | ||||
13717 | # if/elsif alignment to fail | ||||
13718 | #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) | ||||
13719 | #{ $alignment_type = $type; } | ||||
13720 | |||||
13721 | if ($alignment_type) { | ||||
13722 | $last_vertical_alignment_before_index = $i; | ||||
13723 | } | ||||
13724 | |||||
13725 | #-------------------------------------------------------- | ||||
13726 | # Next see if we want to align AFTER the previous nonblank | ||||
13727 | #-------------------------------------------------------- | ||||
13728 | |||||
13729 | # We want to line up ',' and interior ';' tokens, with the added | ||||
13730 | # space AFTER these tokens. (Note: interior ';' is included | ||||
13731 | # because it may occur in short blocks). | ||||
13732 | if ( | ||||
13733 | |||||
13734 | # we haven't already set it | ||||
13735 | !$alignment_type | ||||
13736 | |||||
13737 | # and its not the first token of the line | ||||
13738 | && ( $i > $ibeg ) | ||||
13739 | |||||
13740 | # and it follows a blank | ||||
13741 | && $types_to_go[ $i - 1 ] eq 'b' | ||||
13742 | |||||
13743 | # and previous token IS one of these: | ||||
13744 | && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) | ||||
13745 | |||||
13746 | # and it's NOT one of these | ||||
13747 | && ( $type !~ /^[b\#\)\]\}]$/ ) | ||||
13748 | |||||
13749 | # then go ahead and align | ||||
13750 | ) | ||||
13751 | |||||
13752 | { | ||||
13753 | $alignment_type = $vert_last_nonblank_type; | ||||
13754 | } | ||||
13755 | |||||
13756 | #-------------------------------------------------------- | ||||
13757 | # then store the value | ||||
13758 | #-------------------------------------------------------- | ||||
13759 | $matching_token_to_go[$i] = $alignment_type; | ||||
13760 | if ( $type ne 'b' ) { | ||||
13761 | $vert_last_nonblank_type = $type; | ||||
13762 | $vert_last_nonblank_token = $token; | ||||
13763 | $vert_last_nonblank_block_type = $block_type; | ||||
13764 | } | ||||
13765 | } | ||||
13766 | } | ||||
13767 | } | ||||
13768 | } | ||||
13769 | |||||
13770 | sub terminal_type { | ||||
13771 | |||||
13772 | # returns type of last token on this line (terminal token), as follows: | ||||
13773 | # returns # for a full-line comment | ||||
13774 | # returns ' ' for a blank line | ||||
13775 | # otherwise returns final token type | ||||
13776 | |||||
13777 | my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; | ||||
13778 | |||||
13779 | # check for full-line comment.. | ||||
13780 | if ( $$rtype[$ibeg] eq '#' ) { | ||||
13781 | return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; | ||||
13782 | } | ||||
13783 | else { | ||||
13784 | |||||
13785 | # start at end and walk backwards.. | ||||
13786 | for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { | ||||
13787 | |||||
13788 | # skip past any side comment and blanks | ||||
13789 | next if ( $$rtype[$i] eq 'b' ); | ||||
13790 | next if ( $$rtype[$i] eq '#' ); | ||||
13791 | |||||
13792 | # found it..make sure it is a BLOCK termination, | ||||
13793 | # but hide a terminal } after sort/grep/map because it is not | ||||
13794 | # necessarily the end of the line. (terminal.t) | ||||
13795 | my $terminal_type = $$rtype[$i]; | ||||
13796 | if ( | ||||
13797 | $terminal_type eq '}' | ||||
13798 | && ( !$$rblock_type[$i] | ||||
13799 | || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) | ||||
13800 | ) | ||||
13801 | { | ||||
13802 | $terminal_type = 'b'; | ||||
13803 | } | ||||
13804 | return wantarray ? ( $terminal_type, $i ) : $terminal_type; | ||||
13805 | } | ||||
13806 | |||||
13807 | # empty line | ||||
13808 | return wantarray ? ( ' ', $ibeg ) : ' '; | ||||
13809 | } | ||||
13810 | } | ||||
13811 | |||||
13812 | { # set_bond_strengths | ||||
13813 | |||||
13814 | 2 | 100ns | my %is_good_keyword_breakpoint; | ||
13815 | 1 | 0s | my %is_lt_gt_le_ge; | ||
13816 | |||||
13817 | 1 | 0s | my %binary_bond_strength; | ||
13818 | 1 | 0s | my %nobreak_lhs; | ||
13819 | 1 | 0s | my %nobreak_rhs; | ||
13820 | |||||
13821 | 1 | 0s | my @bias_tokens; | ||
13822 | 1 | 400ns | my $delta_bias; | ||
13823 | |||||
13824 | sub bias_table_key { | ||||
13825 | my ( $type, $token ) = @_; | ||||
13826 | my $bias_table_key = $type; | ||||
13827 | if ( $type eq 'k' ) { | ||||
13828 | $bias_table_key = $token; | ||||
13829 | if ( $token eq 'err' ) { $bias_table_key = 'or' } | ||||
13830 | } | ||||
13831 | return $bias_table_key; | ||||
13832 | } | ||||
13833 | |||||
13834 | sub set_bond_strengths { | ||||
13835 | |||||
13836 | # spent 86µs within Perl::Tidy::Formatter::BEGIN@13836 which was called:
# once (86µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 14245 | ||||
13837 | |||||
13838 | 1 | 2µs | @_ = qw(if unless while until for foreach); | ||
13839 | 1 | 2µs | @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); | ||
13840 | |||||
13841 | 1 | 2µs | @_ = qw(lt gt le ge); | ||
13842 | 1 | 2µs | @is_lt_gt_le_ge{@_} = (1) x scalar(@_); | ||
13843 | # | ||||
13844 | # The decision about where to break a line depends upon a "bond | ||||
13845 | # strength" between tokens. The LOWER the bond strength, the MORE | ||||
13846 | # likely a break. A bond strength may be any value but to simplify | ||||
13847 | # things there are several pre-defined strength levels: | ||||
13848 | |||||
13849 | # NO_BREAK => 10000; | ||||
13850 | # VERY_STRONG => 100; | ||||
13851 | # STRONG => 2.1; | ||||
13852 | # NOMINAL => 1.1; | ||||
13853 | # WEAK => 0.8; | ||||
13854 | # VERY_WEAK => 0.55; | ||||
13855 | |||||
13856 | # The strength values are based on trial-and-error, and need to be | ||||
13857 | # tweaked occasionally to get desired results. Some comments: | ||||
13858 | # | ||||
13859 | # 1. Only relative strengths are important. small differences | ||||
13860 | # in strengths can make big formatting differences. | ||||
13861 | # 2. Each indentation level adds one unit of bond strength. | ||||
13862 | # 3. A value of NO_BREAK makes an unbreakable bond | ||||
13863 | # 4. A value of VERY_WEAK is the strength of a ',' | ||||
13864 | # 5. Values below NOMINAL are considered ok break points. | ||||
13865 | # 6. Values above NOMINAL are considered poor break points. | ||||
13866 | # | ||||
13867 | # The bond strengths should roughly follow precedence order where | ||||
13868 | # possible. If you make changes, please check the results very | ||||
13869 | # carefully on a variety of scripts. Testing with the -extrude | ||||
13870 | # options is particularly helpful in exercising all of the rules. | ||||
13871 | |||||
13872 | # Wherever possible, bond strengths are defined in the following | ||||
13873 | # tables. There are two main stages to setting bond strengths and | ||||
13874 | # two types of tables: | ||||
13875 | # | ||||
13876 | # The first stage involves looking at each token individually and | ||||
13877 | # defining left and right bond strengths, according to if we want | ||||
13878 | # to break to the left or right side, and how good a break point it | ||||
13879 | # is. For example tokens like =, ||, && make good break points and | ||||
13880 | # will have low strengths, but one might want to break on either | ||||
13881 | # side to put them at the end of one line or beginning of the next. | ||||
13882 | # | ||||
13883 | # The second stage involves looking at certain pairs of tokens and | ||||
13884 | # defining a bond strength for that particular pair. This second | ||||
13885 | # stage has priority. | ||||
13886 | |||||
13887 | #--------------------------------------------------------------- | ||||
13888 | # Bond Strength BEGIN Section 1. | ||||
13889 | # Set left and right bond strengths of individual tokens. | ||||
13890 | #--------------------------------------------------------------- | ||||
13891 | |||||
13892 | # NOTE: NO_BREAK's set in this section first are HINTS which will | ||||
13893 | # probably not be honored. Essential NO_BREAKS's should be set in | ||||
13894 | # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end | ||||
13895 | # of this subroutine. | ||||
13896 | |||||
13897 | # Note that we are setting defaults in this section. The user | ||||
13898 | # cannot change bond strengths but can cause the left and right | ||||
13899 | # bond strengths of any token type to be swapped through the use of | ||||
13900 | # the -wba and -wbb flags. In this way the user can determine if a | ||||
13901 | # breakpoint token should appear at the end of one line or the | ||||
13902 | # beginning of the next line. | ||||
13903 | |||||
13904 | # The hash keys in this section are token types, plus the text of | ||||
13905 | # certain keywords like 'or', 'and'. | ||||
13906 | |||||
13907 | # no break around possible filehandle | ||||
13908 | 1 | 500ns | $left_bond_strength{'Z'} = NO_BREAK; | ||
13909 | 1 | 200ns | $right_bond_strength{'Z'} = NO_BREAK; | ||
13910 | |||||
13911 | # never put a bare word on a new line: | ||||
13912 | # example print (STDERR, "bla"); will fail with break after ( | ||||
13913 | 1 | 100ns | $left_bond_strength{'w'} = NO_BREAK; | ||
13914 | |||||
13915 | # blanks always have infinite strength to force breaks after | ||||
13916 | # real tokens | ||||
13917 | 1 | 200ns | $right_bond_strength{'b'} = NO_BREAK; | ||
13918 | |||||
13919 | # try not to break on exponentation | ||||
13920 | 1 | 1µs | @_ = qw" ** .. ... <=> "; | ||
13921 | 1 | 2µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13922 | 1 | 1µs | @right_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13923 | |||||
13924 | # The comma-arrow has very low precedence but not a good break point | ||||
13925 | 1 | 200ns | $left_bond_strength{'=>'} = NO_BREAK; | ||
13926 | 1 | 200ns | $right_bond_strength{'=>'} = NOMINAL; | ||
13927 | |||||
13928 | # ok to break after label | ||||
13929 | 1 | 700ns | $left_bond_strength{'J'} = NO_BREAK; | ||
13930 | 1 | 500ns | $right_bond_strength{'J'} = NOMINAL; | ||
13931 | 1 | 100ns | $left_bond_strength{'j'} = STRONG; | ||
13932 | 1 | 100ns | $right_bond_strength{'j'} = STRONG; | ||
13933 | 1 | 100ns | $left_bond_strength{'A'} = STRONG; | ||
13934 | 1 | 100ns | $right_bond_strength{'A'} = STRONG; | ||
13935 | |||||
13936 | 1 | 100ns | $left_bond_strength{'->'} = STRONG; | ||
13937 | 1 | 100ns | $right_bond_strength{'->'} = VERY_STRONG; | ||
13938 | |||||
13939 | 1 | 200ns | $left_bond_strength{'CORE::'} = NOMINAL; | ||
13940 | 1 | 100ns | $right_bond_strength{'CORE::'} = NO_BREAK; | ||
13941 | |||||
13942 | # breaking AFTER modulus operator is ok: | ||||
13943 | 1 | 900ns | @_ = qw" % "; | ||
13944 | 1 | 800ns | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13945 | 1 | 400ns | @right_bond_strength{@_} = | ||
13946 | ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); | ||||
13947 | |||||
13948 | # Break AFTER math operators * and / | ||||
13949 | 1 | 800ns | @_ = qw" * / x "; | ||
13950 | 1 | 2µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13951 | 1 | 4µs | @right_bond_strength{@_} = (NOMINAL) x scalar(@_); | ||
13952 | |||||
13953 | # Break AFTER weakest math operators + and - | ||||
13954 | # Make them weaker than * but a bit stronger than '.' | ||||
13955 | 1 | 1µs | @_ = qw" + - "; | ||
13956 | 1 | 700ns | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13957 | 1 | 500ns | @right_bond_strength{@_} = | ||
13958 | ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); | ||||
13959 | |||||
13960 | # breaking BEFORE these is just ok: | ||||
13961 | 1 | 700ns | @_ = qw" >> << "; | ||
13962 | 1 | 700ns | @right_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13963 | 1 | 500ns | @left_bond_strength{@_} = (NOMINAL) x scalar(@_); | ||
13964 | |||||
13965 | # breaking before the string concatenation operator seems best | ||||
13966 | # because it can be hard to see at the end of a line | ||||
13967 | 1 | 200ns | $right_bond_strength{'.'} = STRONG; | ||
13968 | 1 | 200ns | $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; | ||
13969 | |||||
13970 | 1 | 1µs | @_ = qw"} ] ) R"; | ||
13971 | 1 | 1µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13972 | 1 | 700ns | @right_bond_strength{@_} = (NOMINAL) x scalar(@_); | ||
13973 | |||||
13974 | # make these a little weaker than nominal so that they get | ||||
13975 | # favored for end-of-line characters | ||||
13976 | 1 | 2µs | @_ = qw"!= == =~ !~ ~~ !~~"; | ||
13977 | 1 | 2µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
13978 | 1 | 1µs | @right_bond_strength{@_} = | ||
13979 | ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); | ||||
13980 | |||||
13981 | # break AFTER these | ||||
13982 | 1 | 2µs | @_ = qw" < > | & >= <="; | ||
13983 | 1 | 3µs | @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); | ||
13984 | 1 | 2µs | @right_bond_strength{@_} = | ||
13985 | ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); | ||||
13986 | |||||
13987 | # breaking either before or after a quote is ok | ||||
13988 | # but bias for breaking before a quote | ||||
13989 | 1 | 200ns | $left_bond_strength{'Q'} = NOMINAL; | ||
13990 | 1 | 100ns | $right_bond_strength{'Q'} = NOMINAL + 0.02; | ||
13991 | 1 | 200ns | $left_bond_strength{'q'} = NOMINAL; | ||
13992 | 1 | 100ns | $right_bond_strength{'q'} = NOMINAL; | ||
13993 | |||||
13994 | # starting a line with a keyword is usually ok | ||||
13995 | 1 | 100ns | $left_bond_strength{'k'} = NOMINAL; | ||
13996 | |||||
13997 | # we usually want to bond a keyword strongly to what immediately | ||||
13998 | # follows, rather than leaving it stranded at the end of a line | ||||
13999 | 1 | 100ns | $right_bond_strength{'k'} = STRONG; | ||
14000 | |||||
14001 | 1 | 100ns | $left_bond_strength{'G'} = NOMINAL; | ||
14002 | 1 | 100ns | $right_bond_strength{'G'} = STRONG; | ||
14003 | |||||
14004 | # assignment operators | ||||
14005 | 1 | 4µs | @_ = qw( | ||
14006 | = **= += *= &= <<= &&= | ||||
14007 | -= /= |= >>= ||= //= | ||||
14008 | .= %= ^= | ||||
14009 | x= | ||||
14010 | ); | ||||
14011 | |||||
14012 | # Default is to break AFTER various assignment operators | ||||
14013 | 1 | 4µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
14014 | 1 | 2µs | @right_bond_strength{@_} = | ||
14015 | ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); | ||||
14016 | |||||
14017 | # Default is to break BEFORE '&&' and '||' and '//' | ||||
14018 | # set strength of '||' to same as '=' so that chains like | ||||
14019 | # $a = $b || $c || $d will break before the first '||' | ||||
14020 | 1 | 100ns | $right_bond_strength{'||'} = NOMINAL; | ||
14021 | 1 | 300ns | $left_bond_strength{'||'} = $right_bond_strength{'='}; | ||
14022 | |||||
14023 | # same thing for '//' | ||||
14024 | 1 | 200ns | $right_bond_strength{'//'} = NOMINAL; | ||
14025 | 1 | 200ns | $left_bond_strength{'//'} = $right_bond_strength{'='}; | ||
14026 | |||||
14027 | # set strength of && a little higher than || | ||||
14028 | 1 | 200ns | $right_bond_strength{'&&'} = NOMINAL; | ||
14029 | 1 | 500ns | $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; | ||
14030 | |||||
14031 | 1 | 200ns | $left_bond_strength{';'} = VERY_STRONG; | ||
14032 | 1 | 200ns | $right_bond_strength{';'} = VERY_WEAK; | ||
14033 | 1 | 100ns | $left_bond_strength{'f'} = VERY_STRONG; | ||
14034 | |||||
14035 | # make right strength of for ';' a little less than '=' | ||||
14036 | # to make for contents break after the ';' to avoid this: | ||||
14037 | # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += | ||||
14038 | # $number_of_fields ) | ||||
14039 | # and make it weaker than ',' and 'and' too | ||||
14040 | 1 | 100ns | $right_bond_strength{'f'} = VERY_WEAK - 0.03; | ||
14041 | |||||
14042 | # The strengths of ?/: should be somewhere between | ||||
14043 | # an '=' and a quote (NOMINAL), | ||||
14044 | # make strength of ':' slightly less than '?' to help | ||||
14045 | # break long chains of ? : after the colons | ||||
14046 | 1 | 2µs | $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; | ||
14047 | 1 | 1µs | $right_bond_strength{':'} = NO_BREAK; | ||
14048 | 1 | 400ns | $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; | ||
14049 | 1 | 200ns | $right_bond_strength{'?'} = NO_BREAK; | ||
14050 | |||||
14051 | 1 | 100ns | $left_bond_strength{','} = VERY_STRONG; | ||
14052 | 1 | 100ns | $right_bond_strength{','} = VERY_WEAK; | ||
14053 | |||||
14054 | # remaining digraphs and trigraphs not defined above | ||||
14055 | 1 | 2µs | @_ = qw( :: <> ++ --); | ||
14056 | 1 | 1µs | @left_bond_strength{@_} = (WEAK) x scalar(@_); | ||
14057 | 1 | 1µs | @right_bond_strength{@_} = (STRONG) x scalar(@_); | ||
14058 | |||||
14059 | # Set bond strengths of certain keywords | ||||
14060 | # make 'or', 'err', 'and' slightly weaker than a ',' | ||||
14061 | 1 | 200ns | $left_bond_strength{'and'} = VERY_WEAK - 0.01; | ||
14062 | 1 | 100ns | $left_bond_strength{'or'} = VERY_WEAK - 0.02; | ||
14063 | 1 | 100ns | $left_bond_strength{'err'} = VERY_WEAK - 0.02; | ||
14064 | 1 | 200ns | $left_bond_strength{'xor'} = NOMINAL; | ||
14065 | 1 | 100ns | $right_bond_strength{'and'} = NOMINAL; | ||
14066 | 1 | 100ns | $right_bond_strength{'or'} = NOMINAL; | ||
14067 | 1 | 100ns | $right_bond_strength{'err'} = NOMINAL; | ||
14068 | 1 | 200ns | $right_bond_strength{'xor'} = STRONG; | ||
14069 | |||||
14070 | #--------------------------------------------------------------- | ||||
14071 | # Bond Strength BEGIN Section 2. | ||||
14072 | # Set binary rules for bond strengths between certain token types. | ||||
14073 | #--------------------------------------------------------------- | ||||
14074 | |||||
14075 | # We have a little problem making tables which apply to the | ||||
14076 | # container tokens. Here is a list of container tokens and | ||||
14077 | # their types: | ||||
14078 | # | ||||
14079 | # type tokens // meaning | ||||
14080 | # { {, [, ( // indent | ||||
14081 | # } }, ], ) // outdent | ||||
14082 | # [ [ // left non-structural [ (enclosing an array index) | ||||
14083 | # ] ] // right non-structural square bracket | ||||
14084 | # ( ( // left non-structural paren | ||||
14085 | # ) ) // right non-structural paren | ||||
14086 | # L { // left non-structural curly brace (enclosing a key) | ||||
14087 | # R } // right non-structural curly brace | ||||
14088 | # | ||||
14089 | # Some rules apply to token types and some to just the token | ||||
14090 | # itself. We solve the problem by combining type and token into a | ||||
14091 | # new hash key for the container types. | ||||
14092 | # | ||||
14093 | # If a rule applies to a token 'type' then we need to make rules | ||||
14094 | # for each of these 'type.token' combinations: | ||||
14095 | # Type Type.Token | ||||
14096 | # { {{, {[, {( | ||||
14097 | # [ [[ | ||||
14098 | # ( (( | ||||
14099 | # L L{ | ||||
14100 | # } }}, }], }) | ||||
14101 | # ] ]] | ||||
14102 | # ) )) | ||||
14103 | # R R} | ||||
14104 | # | ||||
14105 | # If a rule applies to a token then we need to make rules for | ||||
14106 | # these 'type.token' combinations: | ||||
14107 | # Token Type.Token | ||||
14108 | # { {{, L{ | ||||
14109 | # [ {[, [[ | ||||
14110 | # ( {(, (( | ||||
14111 | # } }}, R} | ||||
14112 | # ] }], ]] | ||||
14113 | # ) }), )) | ||||
14114 | |||||
14115 | # allow long lines before final { in an if statement, as in: | ||||
14116 | # if (.......... | ||||
14117 | # ..........) | ||||
14118 | # { | ||||
14119 | # | ||||
14120 | # Otherwise, the line before the { tends to be too short. | ||||
14121 | |||||
14122 | 1 | 700ns | $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; | ||
14123 | 1 | 400ns | $binary_bond_strength{'(('}{'{{'} = NOMINAL; | ||
14124 | |||||
14125 | # break on something like '} (', but keep this stronger than a ',' | ||||
14126 | # example is in 'howe.pl' | ||||
14127 | 1 | 300ns | $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; | ||
14128 | 1 | 300ns | $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; | ||
14129 | |||||
14130 | # keep matrix and hash indices together | ||||
14131 | # but make them a little below STRONG to allow breaking open | ||||
14132 | # something like {'some-word'}{'some-very-long-word'} at the }{ | ||||
14133 | # (bracebrk.t) | ||||
14134 | 1 | 300ns | $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
14135 | 1 | 200ns | $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
14136 | 1 | 200ns | $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
14137 | 1 | 100ns | $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
14138 | |||||
14139 | # increase strength to the point where a break in the following | ||||
14140 | # will be after the opening paren rather than at the arrow: | ||||
14141 | # $a->$b($c); | ||||
14142 | 1 | 300ns | $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; | ||
14143 | |||||
14144 | 1 | 100ns | $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
14145 | 1 | 100ns | $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
14146 | 1 | 300ns | $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
14147 | 1 | 700ns | $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
14148 | 1 | 100ns | $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
14149 | 1 | 100ns | $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
14150 | |||||
14151 | 1 | 100ns | $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
14152 | 1 | 100ns | $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
14153 | 1 | 200ns | $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
14154 | 1 | 100ns | $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
14155 | |||||
14156 | #--------------------------------------------------------------- | ||||
14157 | # Binary NO_BREAK rules | ||||
14158 | #--------------------------------------------------------------- | ||||
14159 | |||||
14160 | # use strict requires that bare word and => not be separated | ||||
14161 | 1 | 400ns | $binary_bond_strength{'C'}{'=>'} = NO_BREAK; | ||
14162 | 1 | 300ns | $binary_bond_strength{'U'}{'=>'} = NO_BREAK; | ||
14163 | |||||
14164 | # Never break between a bareword and a following paren because | ||||
14165 | # perl may give an error. For example, if a break is placed | ||||
14166 | # between 'to_filehandle' and its '(' the following line will | ||||
14167 | # give a syntax error [Carp.pm]: my( $no) =fileno( | ||||
14168 | # to_filehandle( $in)) ; | ||||
14169 | 1 | 200ns | $binary_bond_strength{'C'}{'(('} = NO_BREAK; | ||
14170 | 1 | 100ns | $binary_bond_strength{'C'}{'{('} = NO_BREAK; | ||
14171 | 1 | 200ns | $binary_bond_strength{'U'}{'(('} = NO_BREAK; | ||
14172 | 1 | 100ns | $binary_bond_strength{'U'}{'{('} = NO_BREAK; | ||
14173 | |||||
14174 | # use strict requires that bare word within braces not start new | ||||
14175 | # line | ||||
14176 | 1 | 300ns | $binary_bond_strength{'L{'}{'w'} = NO_BREAK; | ||
14177 | |||||
14178 | 1 | 200ns | $binary_bond_strength{'w'}{'R}'} = NO_BREAK; | ||
14179 | |||||
14180 | # use strict requires that bare word and => not be separated | ||||
14181 | 1 | 200ns | $binary_bond_strength{'w'}{'=>'} = NO_BREAK; | ||
14182 | |||||
14183 | # use strict does not allow separating type info from trailing { } | ||||
14184 | # testfile is readmail.pl | ||||
14185 | 1 | 300ns | $binary_bond_strength{'t'}{'L{'} = NO_BREAK; | ||
14186 | 1 | 100ns | $binary_bond_strength{'i'}{'L{'} = NO_BREAK; | ||
14187 | |||||
14188 | # As a defensive measure, do not break between a '(' and a | ||||
14189 | # filehandle. In some cases, this can cause an error. For | ||||
14190 | # example, the following program works: | ||||
14191 | # my $msg="hi!\n"; | ||||
14192 | |||||
14193 | # ( STDOUT | ||||
14194 | # $msg | ||||
14195 | # ); | ||||
14196 | # | ||||
14197 | # But this program fails: | ||||
14198 | # my $msg="hi!\n"; | ||||
14199 | |||||
14200 | # ( | ||||
14201 | # STDOUT | ||||
14202 | # $msg | ||||
14203 | # ); | ||||
14204 | # | ||||
14205 | # This is normally only a problem with the 'extrude' option | ||||
14206 | 1 | 200ns | $binary_bond_strength{'(('}{'Y'} = NO_BREAK; | ||
14207 | 1 | 200ns | $binary_bond_strength{'{('}{'Y'} = NO_BREAK; | ||
14208 | |||||
14209 | # never break between sub name and opening paren | ||||
14210 | 1 | 100ns | $binary_bond_strength{'w'}{'(('} = NO_BREAK; | ||
14211 | 1 | 100ns | $binary_bond_strength{'w'}{'{('} = NO_BREAK; | ||
14212 | |||||
14213 | # keep '}' together with ';' | ||||
14214 | 1 | 200ns | $binary_bond_strength{'}}'}{';'} = NO_BREAK; | ||
14215 | |||||
14216 | # Breaking before a ++ can cause perl to guess wrong. For | ||||
14217 | # example the following line will cause a syntax error | ||||
14218 | # with -extrude if we break between '$i' and '++' [fixstyle2] | ||||
14219 | # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); | ||||
14220 | 1 | 200ns | $nobreak_lhs{'++'} = NO_BREAK; | ||
14221 | |||||
14222 | # Do not break before a possible file handle | ||||
14223 | 1 | 100ns | $nobreak_lhs{'Z'} = NO_BREAK; | ||
14224 | |||||
14225 | # use strict hates bare words on any new line. For | ||||
14226 | # example, a break before the underscore here provokes the | ||||
14227 | # wrath of use strict: | ||||
14228 | # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { | ||||
14229 | 1 | 200ns | $nobreak_rhs{'F'} = NO_BREAK; | ||
14230 | 1 | 100ns | $nobreak_rhs{'CORE::'} = NO_BREAK; | ||
14231 | |||||
14232 | #--------------------------------------------------------------- | ||||
14233 | # Bond Strength BEGIN Section 3. | ||||
14234 | # Define tables and values for applying a small bias to the above | ||||
14235 | # values. | ||||
14236 | #--------------------------------------------------------------- | ||||
14237 | # Adding a small 'bias' to strengths is a simple way to make a line | ||||
14238 | # break at the first of a sequence of identical terms. For | ||||
14239 | # example, to force long string of conditional operators to break | ||||
14240 | # with each line ending in a ':', we can add a small number to the | ||||
14241 | # bond strength of each ':' (colon.t) | ||||
14242 | 1 | 1µs | @bias_tokens = qw( : && || f and or . ); # tokens which get bias | ||
14243 | 1 | 6µs | $delta_bias = 0.0001; # a very small strength level | ||
14244 | |||||
14245 | 1 | 1.56ms | 1 | 86µs | } ## end BEGIN # spent 86µs making 1 call to Perl::Tidy::Formatter::BEGIN@13836 |
14246 | |||||
14247 | # patch-its always ok to break at end of line | ||||
14248 | $nobreak_to_go[$max_index_to_go] = 0; | ||||
14249 | |||||
14250 | # we start a new set of bias values for each line | ||||
14251 | my %bias; | ||||
14252 | @bias{@bias_tokens} = (0) x scalar(@bias_tokens); | ||||
14253 | my $code_bias = -.01; # bias for closing block braces | ||||
14254 | |||||
14255 | my $type = 'b'; | ||||
14256 | my $token = ' '; | ||||
14257 | my $last_type; | ||||
14258 | my $last_nonblank_type = $type; | ||||
14259 | my $last_nonblank_token = $token; | ||||
14260 | my $list_str = $left_bond_strength{'?'}; | ||||
14261 | |||||
14262 | my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, | ||||
14263 | $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, | ||||
14264 | ); | ||||
14265 | |||||
14266 | # main loop to compute bond strengths between each pair of tokens | ||||
14267 | for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { | ||||
14268 | $last_type = $type; | ||||
14269 | if ( $type ne 'b' ) { | ||||
14270 | $last_nonblank_type = $type; | ||||
14271 | $last_nonblank_token = $token; | ||||
14272 | } | ||||
14273 | $type = $types_to_go[$i]; | ||||
14274 | |||||
14275 | # strength on both sides of a blank is the same | ||||
14276 | if ( $type eq 'b' && $last_type ne 'b' ) { | ||||
14277 | $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; | ||||
14278 | next; | ||||
14279 | } | ||||
14280 | |||||
14281 | $token = $tokens_to_go[$i]; | ||||
14282 | $block_type = $block_type_to_go[$i]; | ||||
14283 | $i_next = $i + 1; | ||||
14284 | $next_type = $types_to_go[$i_next]; | ||||
14285 | $next_token = $tokens_to_go[$i_next]; | ||||
14286 | $total_nesting_depth = $nesting_depth_to_go[$i_next]; | ||||
14287 | $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | ||||
14288 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
14289 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
14290 | |||||
14291 | # We are computing the strength of the bond between the current | ||||
14292 | # token and the NEXT token. | ||||
14293 | |||||
14294 | #--------------------------------------------------------------- | ||||
14295 | # Bond Strength Section 1: | ||||
14296 | # First Approximation. | ||||
14297 | # Use minimum of individual left and right tabulated bond | ||||
14298 | # strengths. | ||||
14299 | #--------------------------------------------------------------- | ||||
14300 | my $bsr = $right_bond_strength{$type}; | ||||
14301 | my $bsl = $left_bond_strength{$next_nonblank_type}; | ||||
14302 | |||||
14303 | # define right bond strengths of certain keywords | ||||
14304 | if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { | ||||
14305 | $bsr = $right_bond_strength{$token}; | ||||
14306 | } | ||||
14307 | elsif ( $token eq 'ne' or $token eq 'eq' ) { | ||||
14308 | $bsr = NOMINAL; | ||||
14309 | } | ||||
14310 | |||||
14311 | # set terminal bond strength to the nominal value | ||||
14312 | # this will cause good preceding breaks to be retained | ||||
14313 | if ( $i_next_nonblank > $max_index_to_go ) { | ||||
14314 | $bsl = NOMINAL; | ||||
14315 | } | ||||
14316 | |||||
14317 | # define right bond strengths of certain keywords | ||||
14318 | if ( $next_nonblank_type eq 'k' | ||||
14319 | && defined( $left_bond_strength{$next_nonblank_token} ) ) | ||||
14320 | { | ||||
14321 | $bsl = $left_bond_strength{$next_nonblank_token}; | ||||
14322 | } | ||||
14323 | elsif ($next_nonblank_token eq 'ne' | ||||
14324 | or $next_nonblank_token eq 'eq' ) | ||||
14325 | { | ||||
14326 | $bsl = NOMINAL; | ||||
14327 | } | ||||
14328 | elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { | ||||
14329 | $bsl = 0.9 * NOMINAL + 0.1 * STRONG; | ||||
14330 | } | ||||
14331 | |||||
14332 | # Use the minimum of the left and right strengths. Note: it might | ||||
14333 | # seem that we would want to keep a NO_BREAK if either token has | ||||
14334 | # this value. This didn't work, for example because in an arrow | ||||
14335 | # list, it prevents the comma from separating from the following | ||||
14336 | # bare word (which is probably quoted by its arrow). So necessary | ||||
14337 | # NO_BREAK's have to be handled as special cases in the final | ||||
14338 | # section. | ||||
14339 | if ( !defined($bsr) ) { $bsr = VERY_STRONG } | ||||
14340 | if ( !defined($bsl) ) { $bsl = VERY_STRONG } | ||||
14341 | my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; | ||||
14342 | my $bond_str_1 = $bond_str; | ||||
14343 | |||||
14344 | #--------------------------------------------------------------- | ||||
14345 | # Bond Strength Section 2: | ||||
14346 | # Apply hardwired rules.. | ||||
14347 | #--------------------------------------------------------------- | ||||
14348 | |||||
14349 | # Patch to put terminal or clauses on a new line: Weaken the bond | ||||
14350 | # at an || followed by die or similar keyword to make the terminal | ||||
14351 | # or clause fall on a new line, like this: | ||||
14352 | # | ||||
14353 | # my $class = shift | ||||
14354 | # || die "Cannot add broadcast: No class identifier found"; | ||||
14355 | # | ||||
14356 | # Otherwise the break will be at the previous '=' since the || and | ||||
14357 | # = have the same starting strength and the or is biased, like | ||||
14358 | # this: | ||||
14359 | # | ||||
14360 | # my $class = | ||||
14361 | # shift || die "Cannot add broadcast: No class identifier found"; | ||||
14362 | # | ||||
14363 | # In any case if the user places a break at either the = or the || | ||||
14364 | # it should remain there. | ||||
14365 | if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { | ||||
14366 | if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { | ||||
14367 | if ( $want_break_before{$token} && $i > 0 ) { | ||||
14368 | $bond_strength_to_go[ $i - 1 ] -= $delta_bias; | ||||
14369 | } | ||||
14370 | else { | ||||
14371 | $bond_str -= $delta_bias; | ||||
14372 | } | ||||
14373 | } | ||||
14374 | } | ||||
14375 | |||||
14376 | # good to break after end of code blocks | ||||
14377 | if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { | ||||
14378 | |||||
14379 | $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; | ||||
14380 | $code_bias += $delta_bias; | ||||
14381 | } | ||||
14382 | |||||
14383 | if ( $type eq 'k' ) { | ||||
14384 | |||||
14385 | # allow certain control keywords to stand out | ||||
14386 | if ( $next_nonblank_type eq 'k' | ||||
14387 | && $is_last_next_redo_return{$token} ) | ||||
14388 | { | ||||
14389 | $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; | ||||
14390 | } | ||||
14391 | |||||
14392 | # Don't break after keyword my. This is a quick fix for a | ||||
14393 | # rare problem with perl. An example is this line from file | ||||
14394 | # Container.pm: | ||||
14395 | |||||
14396 | # foreach my $question( Debian::DebConf::ConfigDb::gettree( | ||||
14397 | # $this->{'question'} ) ) | ||||
14398 | |||||
14399 | if ( $token eq 'my' ) { | ||||
14400 | $bond_str = NO_BREAK; | ||||
14401 | } | ||||
14402 | |||||
14403 | } | ||||
14404 | |||||
14405 | # good to break before 'if', 'unless', etc | ||||
14406 | if ( $is_if_brace_follower{$next_nonblank_token} ) { | ||||
14407 | $bond_str = VERY_WEAK; | ||||
14408 | } | ||||
14409 | |||||
14410 | if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { | ||||
14411 | |||||
14412 | # FIXME: needs more testing | ||||
14413 | if ( $is_keyword_returning_list{$next_nonblank_token} ) { | ||||
14414 | $bond_str = $list_str if ( $bond_str > $list_str ); | ||||
14415 | } | ||||
14416 | |||||
14417 | # keywords like 'unless', 'if', etc, within statements | ||||
14418 | # make good breaks | ||||
14419 | if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { | ||||
14420 | $bond_str = VERY_WEAK / 1.05; | ||||
14421 | } | ||||
14422 | } | ||||
14423 | |||||
14424 | # try not to break before a comma-arrow | ||||
14425 | elsif ( $next_nonblank_type eq '=>' ) { | ||||
14426 | if ( $bond_str < STRONG ) { $bond_str = STRONG } | ||||
14427 | } | ||||
14428 | |||||
14429 | #--------------------------------------------------------------- | ||||
14430 | # Additional hardwired NOBREAK rules | ||||
14431 | #--------------------------------------------------------------- | ||||
14432 | |||||
14433 | # map1.t -- correct for a quirk in perl | ||||
14434 | if ( $token eq '(' | ||||
14435 | && $next_nonblank_type eq 'i' | ||||
14436 | && $last_nonblank_type eq 'k' | ||||
14437 | && $is_sort_map_grep{$last_nonblank_token} ) | ||||
14438 | |||||
14439 | # /^(sort|map|grep)$/ ) | ||||
14440 | { | ||||
14441 | $bond_str = NO_BREAK; | ||||
14442 | } | ||||
14443 | |||||
14444 | # extrude.t: do not break before paren at: | ||||
14445 | # -l pid_filename( | ||||
14446 | if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { | ||||
14447 | $bond_str = NO_BREAK; | ||||
14448 | } | ||||
14449 | |||||
14450 | # in older version of perl, use strict can cause problems with | ||||
14451 | # breaks before bare words following opening parens. For example, | ||||
14452 | # this will fail under older versions if a break is made between | ||||
14453 | # '(' and 'MAIL': use strict; open( MAIL, "a long filename or | ||||
14454 | # command"); close MAIL; | ||||
14455 | if ( $type eq '{' ) { | ||||
14456 | |||||
14457 | if ( $token eq '(' && $next_nonblank_type eq 'w' ) { | ||||
14458 | |||||
14459 | # but it's fine to break if the word is followed by a '=>' | ||||
14460 | # or if it is obviously a sub call | ||||
14461 | my $i_next_next_nonblank = $i_next_nonblank + 1; | ||||
14462 | my $next_next_type = $types_to_go[$i_next_next_nonblank]; | ||||
14463 | if ( $next_next_type eq 'b' | ||||
14464 | && $i_next_nonblank < $max_index_to_go ) | ||||
14465 | { | ||||
14466 | $i_next_next_nonblank++; | ||||
14467 | $next_next_type = $types_to_go[$i_next_next_nonblank]; | ||||
14468 | } | ||||
14469 | |||||
14470 | # We'll check for an old breakpoint and keep a leading | ||||
14471 | # bareword if it was that way in the input file. | ||||
14472 | # Presumably it was ok that way. For example, the | ||||
14473 | # following would remain unchanged: | ||||
14474 | # | ||||
14475 | # @months = ( | ||||
14476 | # January, February, March, April, | ||||
14477 | # May, June, July, August, | ||||
14478 | # September, October, November, December, | ||||
14479 | # ); | ||||
14480 | # | ||||
14481 | # This should be sufficient: | ||||
14482 | if ( | ||||
14483 | !$old_breakpoint_to_go[$i] | ||||
14484 | && ( $next_next_type eq ',' | ||||
14485 | || $next_next_type eq '}' ) | ||||
14486 | ) | ||||
14487 | { | ||||
14488 | $bond_str = NO_BREAK; | ||||
14489 | } | ||||
14490 | } | ||||
14491 | } | ||||
14492 | |||||
14493 | # Do not break between a possible filehandle and a ? or / and do | ||||
14494 | # not introduce a break after it if there is no blank | ||||
14495 | # (extrude.t) | ||||
14496 | elsif ( $type eq 'Z' ) { | ||||
14497 | |||||
14498 | # don't break.. | ||||
14499 | if ( | ||||
14500 | |||||
14501 | # if there is no blank and we do not want one. Examples: | ||||
14502 | # print $x++ # do not break after $x | ||||
14503 | # print HTML"HELLO" # break ok after HTML | ||||
14504 | ( | ||||
14505 | $next_type ne 'b' | ||||
14506 | && defined( $want_left_space{$next_type} ) | ||||
14507 | && $want_left_space{$next_type} == WS_NO | ||||
14508 | ) | ||||
14509 | |||||
14510 | # or we might be followed by the start of a quote | ||||
14511 | || $next_nonblank_type =~ /^[\/\?]$/ | ||||
14512 | ) | ||||
14513 | { | ||||
14514 | $bond_str = NO_BREAK; | ||||
14515 | } | ||||
14516 | } | ||||
14517 | |||||
14518 | # Breaking before a ? before a quote can cause trouble if | ||||
14519 | # they are not separated by a blank. | ||||
14520 | # Example: a syntax error occurs if you break before the ? here | ||||
14521 | # my$logic=join$all?' && ':' || ',@regexps; | ||||
14522 | # From: Professional_Perl_Programming_Code/multifind.pl | ||||
14523 | if ( $next_nonblank_type eq '?' ) { | ||||
14524 | $bond_str = NO_BREAK | ||||
14525 | if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); | ||||
14526 | } | ||||
14527 | |||||
14528 | # Breaking before a . followed by a number | ||||
14529 | # can cause trouble if there is no intervening space | ||||
14530 | # Example: a syntax error occurs if you break before the .2 here | ||||
14531 | # $str .= pack($endian.2, ensurrogate($ord)); | ||||
14532 | # From: perl58/Unicode.pm | ||||
14533 | elsif ( $next_nonblank_type eq '.' ) { | ||||
14534 | $bond_str = NO_BREAK | ||||
14535 | if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); | ||||
14536 | } | ||||
14537 | |||||
14538 | # patch to put cuddled elses back together when on multiple | ||||
14539 | # lines, as in: } \n else \n { \n | ||||
14540 | if ($rOpts_cuddled_else) { | ||||
14541 | |||||
14542 | if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) | ||||
14543 | || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) | ||||
14544 | { | ||||
14545 | $bond_str = NO_BREAK; | ||||
14546 | } | ||||
14547 | } | ||||
14548 | my $bond_str_2 = $bond_str; | ||||
14549 | |||||
14550 | #--------------------------------------------------------------- | ||||
14551 | # End of hardwired rules | ||||
14552 | #--------------------------------------------------------------- | ||||
14553 | |||||
14554 | #--------------------------------------------------------------- | ||||
14555 | # Bond Strength Section 3: | ||||
14556 | # Apply table rules. These have priority over the above | ||||
14557 | # hardwired rules. | ||||
14558 | #--------------------------------------------------------------- | ||||
14559 | |||||
14560 | my $tabulated_bond_str; | ||||
14561 | my $ltype = $type; | ||||
14562 | my $rtype = $next_nonblank_type; | ||||
14563 | if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } | ||||
14564 | if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { | ||||
14565 | $rtype = $next_nonblank_type . $next_nonblank_token; | ||||
14566 | } | ||||
14567 | |||||
14568 | if ( $binary_bond_strength{$ltype}{$rtype} ) { | ||||
14569 | $bond_str = $binary_bond_strength{$ltype}{$rtype}; | ||||
14570 | $tabulated_bond_str = $bond_str; | ||||
14571 | } | ||||
14572 | |||||
14573 | if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { | ||||
14574 | $bond_str = NO_BREAK; | ||||
14575 | $tabulated_bond_str = $bond_str; | ||||
14576 | } | ||||
14577 | my $bond_str_3 = $bond_str; | ||||
14578 | |||||
14579 | # If the hardwired rules conflict with the tabulated bond | ||||
14580 | # strength then there is an inconsistency that should be fixed | ||||
14581 | FORMATTER_DEBUG_FLAG_BOND_TABLES | ||||
14582 | && $tabulated_bond_str | ||||
14583 | && $bond_str_1 | ||||
14584 | && $bond_str_1 != $bond_str_2 | ||||
14585 | && $bond_str_2 != $tabulated_bond_str | ||||
14586 | && do { | ||||
14587 | print STDERR | ||||
14588 | "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; | ||||
14589 | }; | ||||
14590 | |||||
14591 | #----------------------------------------------------------------- | ||||
14592 | # Bond Strength Section 4: | ||||
14593 | # Modify strengths of certain tokens which often occur in sequence | ||||
14594 | # by adding a small bias to each one in turn so that the breaks | ||||
14595 | # occur from left to right. | ||||
14596 | # | ||||
14597 | # Note that we only changing strengths by small amounts here, | ||||
14598 | # and usually increasing, so we should not be altering any NO_BREAKs. | ||||
14599 | # Other routines which check for NO_BREAKs will use a tolerance | ||||
14600 | # of one to avoid any problem. | ||||
14601 | #----------------------------------------------------------------- | ||||
14602 | |||||
14603 | # The bias tables use special keys | ||||
14604 | my $left_key = bias_table_key( $type, $token ); | ||||
14605 | my $right_key = | ||||
14606 | bias_table_key( $next_nonblank_type, $next_nonblank_token ); | ||||
14607 | |||||
14608 | # add any bias set by sub scan_list at old comma break points. | ||||
14609 | if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } | ||||
14610 | |||||
14611 | # bias left token | ||||
14612 | elsif ( defined( $bias{$left_key} ) ) { | ||||
14613 | if ( !$want_break_before{$left_key} ) { | ||||
14614 | $bias{$left_key} += $delta_bias; | ||||
14615 | $bond_str += $bias{$left_key}; | ||||
14616 | } | ||||
14617 | } | ||||
14618 | |||||
14619 | # bias right token | ||||
14620 | if ( defined( $bias{$right_key} ) ) { | ||||
14621 | if ( $want_break_before{$right_key} ) { | ||||
14622 | |||||
14623 | # for leading '.' align all but 'short' quotes; the idea | ||||
14624 | # is to not place something like "\n" on a single line. | ||||
14625 | if ( $right_key eq '.' ) { | ||||
14626 | unless ( | ||||
14627 | $last_nonblank_type eq '.' | ||||
14628 | && ( | ||||
14629 | length($token) <= | ||||
14630 | $rOpts_short_concatenation_item_length ) | ||||
14631 | && ( $token !~ /^[\)\]\}]$/ ) | ||||
14632 | ) | ||||
14633 | { | ||||
14634 | $bias{$right_key} += $delta_bias; | ||||
14635 | } | ||||
14636 | } | ||||
14637 | else { | ||||
14638 | $bias{$right_key} += $delta_bias; | ||||
14639 | } | ||||
14640 | $bond_str += $bias{$right_key}; | ||||
14641 | } | ||||
14642 | } | ||||
14643 | my $bond_str_4 = $bond_str; | ||||
14644 | |||||
14645 | #--------------------------------------------------------------- | ||||
14646 | # Bond Strength Section 5: | ||||
14647 | # Fifth Approximation. | ||||
14648 | # Take nesting depth into account by adding the nesting depth | ||||
14649 | # to the bond strength. | ||||
14650 | #--------------------------------------------------------------- | ||||
14651 | my $strength; | ||||
14652 | |||||
14653 | if ( defined($bond_str) && !$nobreak_to_go[$i] ) { | ||||
14654 | if ( $total_nesting_depth > 0 ) { | ||||
14655 | $strength = $bond_str + $total_nesting_depth; | ||||
14656 | } | ||||
14657 | else { | ||||
14658 | $strength = $bond_str; | ||||
14659 | } | ||||
14660 | } | ||||
14661 | else { | ||||
14662 | $strength = NO_BREAK; | ||||
14663 | } | ||||
14664 | |||||
14665 | # always break after side comment | ||||
14666 | if ( $type eq '#' ) { $strength = 0 } | ||||
14667 | |||||
14668 | $bond_strength_to_go[$i] = $strength; | ||||
14669 | |||||
14670 | FORMATTER_DEBUG_FLAG_BOND && do { | ||||
14671 | my $str = substr( $token, 0, 15 ); | ||||
14672 | $str .= ' ' x ( 16 - length($str) ); | ||||
14673 | print STDOUT | ||||
14674 | "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; | ||||
14675 | }; | ||||
14676 | } ## end main loop | ||||
14677 | } ## end sub set_bond_strengths | ||||
14678 | } | ||||
14679 | |||||
14680 | sub pad_array_to_go { | ||||
14681 | |||||
14682 | # to simplify coding in scan_list and set_bond_strengths, it helps | ||||
14683 | # to create some extra blank tokens at the end of the arrays | ||||
14684 | $tokens_to_go[ $max_index_to_go + 1 ] = ''; | ||||
14685 | $tokens_to_go[ $max_index_to_go + 2 ] = ''; | ||||
14686 | $types_to_go[ $max_index_to_go + 1 ] = 'b'; | ||||
14687 | $types_to_go[ $max_index_to_go + 2 ] = 'b'; | ||||
14688 | $nesting_depth_to_go[ $max_index_to_go + 1 ] = | ||||
14689 | $nesting_depth_to_go[$max_index_to_go]; | ||||
14690 | |||||
14691 | # /^[R\}\)\]]$/ | ||||
14692 | if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { | ||||
14693 | if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { | ||||
14694 | |||||
14695 | # shouldn't happen: | ||||
14696 | unless ( get_saw_brace_error() ) { | ||||
14697 | warning( | ||||
14698 | "Program bug in scan_list: hit nesting error which should have been caught\n" | ||||
14699 | ); | ||||
14700 | report_definite_bug(); | ||||
14701 | } | ||||
14702 | } | ||||
14703 | else { | ||||
14704 | $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; | ||||
14705 | } | ||||
14706 | } | ||||
14707 | |||||
14708 | # /^[L\{\(\[]$/ | ||||
14709 | elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { | ||||
14710 | $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; | ||||
14711 | } | ||||
14712 | } | ||||
14713 | |||||
14714 | { # begin scan_list | ||||
14715 | |||||
14716 | 1 | 100ns | my ( | ||
14717 | 1 | 200ns | $block_type, $current_depth, | ||
14718 | $depth, $i, | ||||
14719 | $i_last_nonblank_token, $last_colon_sequence_number, | ||||
14720 | $last_nonblank_token, $last_nonblank_type, | ||||
14721 | $last_nonblank_block_type, $last_old_breakpoint_count, | ||||
14722 | $minimum_depth, $next_nonblank_block_type, | ||||
14723 | $next_nonblank_token, $next_nonblank_type, | ||||
14724 | $old_breakpoint_count, $starting_breakpoint_count, | ||||
14725 | $starting_depth, $token, | ||||
14726 | $type, $type_sequence, | ||||
14727 | ); | ||||
14728 | |||||
14729 | my ( | ||||
14730 | @breakpoint_stack, @breakpoint_undo_stack, | ||||
14731 | @comma_index, @container_type, | ||||
14732 | @identifier_count_stack, @index_before_arrow, | ||||
14733 | @interrupted_list, @item_count_stack, | ||||
14734 | @last_comma_index, @last_dot_index, | ||||
14735 | @last_nonblank_type, @old_breakpoint_count_stack, | ||||
14736 | @opening_structure_index_stack, @rfor_semicolon_list, | ||||
14737 | @has_old_logical_breakpoints, @rand_or_list, | ||||
14738 | @i_equals, | ||||
14739 | ); | ||||
14740 | |||||
14741 | # routine to define essential variables when we go 'up' to | ||||
14742 | # a new depth | ||||
14743 | sub check_for_new_minimum_depth { | ||||
14744 | my $depth = shift; | ||||
14745 | if ( $depth < $minimum_depth ) { | ||||
14746 | |||||
14747 | $minimum_depth = $depth; | ||||
14748 | |||||
14749 | # these arrays need not retain values between calls | ||||
14750 | $breakpoint_stack[$depth] = $starting_breakpoint_count; | ||||
14751 | $container_type[$depth] = ""; | ||||
14752 | $identifier_count_stack[$depth] = 0; | ||||
14753 | $index_before_arrow[$depth] = -1; | ||||
14754 | $interrupted_list[$depth] = 1; | ||||
14755 | $item_count_stack[$depth] = 0; | ||||
14756 | $last_nonblank_type[$depth] = ""; | ||||
14757 | $opening_structure_index_stack[$depth] = -1; | ||||
14758 | |||||
14759 | $breakpoint_undo_stack[$depth] = undef; | ||||
14760 | $comma_index[$depth] = undef; | ||||
14761 | $last_comma_index[$depth] = undef; | ||||
14762 | $last_dot_index[$depth] = undef; | ||||
14763 | $old_breakpoint_count_stack[$depth] = undef; | ||||
14764 | $has_old_logical_breakpoints[$depth] = 0; | ||||
14765 | $rand_or_list[$depth] = []; | ||||
14766 | $rfor_semicolon_list[$depth] = []; | ||||
14767 | $i_equals[$depth] = -1; | ||||
14768 | |||||
14769 | # these arrays must retain values between calls | ||||
14770 | if ( !defined( $has_broken_sublist[$depth] ) ) { | ||||
14771 | $dont_align[$depth] = 0; | ||||
14772 | $has_broken_sublist[$depth] = 0; | ||||
14773 | $want_comma_break[$depth] = 0; | ||||
14774 | } | ||||
14775 | } | ||||
14776 | } | ||||
14777 | |||||
14778 | # routine to decide which commas to break at within a container; | ||||
14779 | # returns: | ||||
14780 | # $bp_count = number of comma breakpoints set | ||||
14781 | # $do_not_break_apart = a flag indicating if container need not | ||||
14782 | # be broken open | ||||
14783 | sub set_comma_breakpoints { | ||||
14784 | |||||
14785 | my $dd = shift; | ||||
14786 | my $bp_count = 0; | ||||
14787 | my $do_not_break_apart = 0; | ||||
14788 | |||||
14789 | # anything to do? | ||||
14790 | if ( $item_count_stack[$dd] ) { | ||||
14791 | |||||
14792 | # handle commas not in containers... | ||||
14793 | if ( $dont_align[$dd] ) { | ||||
14794 | do_uncontained_comma_breaks($dd); | ||||
14795 | } | ||||
14796 | |||||
14797 | # handle commas within containers... | ||||
14798 | else { | ||||
14799 | my $fbc = $forced_breakpoint_count; | ||||
14800 | |||||
14801 | # always open comma lists not preceded by keywords, | ||||
14802 | # barewords, identifiers (that is, anything that doesn't | ||||
14803 | # look like a function call) | ||||
14804 | my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; | ||||
14805 | |||||
14806 | set_comma_breakpoints_do( | ||||
14807 | $dd, | ||||
14808 | $opening_structure_index_stack[$dd], | ||||
14809 | $i, | ||||
14810 | $item_count_stack[$dd], | ||||
14811 | $identifier_count_stack[$dd], | ||||
14812 | $comma_index[$dd], | ||||
14813 | $next_nonblank_type, | ||||
14814 | $container_type[$dd], | ||||
14815 | $interrupted_list[$dd], | ||||
14816 | \$do_not_break_apart, | ||||
14817 | $must_break_open, | ||||
14818 | ); | ||||
14819 | $bp_count = $forced_breakpoint_count - $fbc; | ||||
14820 | $do_not_break_apart = 0 if $must_break_open; | ||||
14821 | } | ||||
14822 | } | ||||
14823 | return ( $bp_count, $do_not_break_apart ); | ||||
14824 | } | ||||
14825 | |||||
14826 | sub do_uncontained_comma_breaks { | ||||
14827 | |||||
14828 | # Handle commas not in containers... | ||||
14829 | # This is a catch-all routine for commas that we | ||||
14830 | # don't know what to do with because the don't fall | ||||
14831 | # within containers. We will bias the bond strength | ||||
14832 | # to break at commas which ended lines in the input | ||||
14833 | # file. This usually works better than just trying | ||||
14834 | # to put as many items on a line as possible. A | ||||
14835 | # downside is that if the input file is garbage it | ||||
14836 | # won't work very well. However, the user can always | ||||
14837 | # prevent following the old breakpoints with the | ||||
14838 | # -iob flag. | ||||
14839 | my $dd = shift; | ||||
14840 | my $bias = -.01; | ||||
14841 | my $old_comma_break_count = 0; | ||||
14842 | foreach my $ii ( @{ $comma_index[$dd] } ) { | ||||
14843 | if ( $old_breakpoint_to_go[$ii] ) { | ||||
14844 | $old_comma_break_count++; | ||||
14845 | $bond_strength_to_go[$ii] = $bias; | ||||
14846 | |||||
14847 | # reduce bias magnitude to force breaks in order | ||||
14848 | $bias *= 0.99; | ||||
14849 | } | ||||
14850 | } | ||||
14851 | |||||
14852 | # Also put a break before the first comma if | ||||
14853 | # (1) there was a break there in the input, and | ||||
14854 | # (2) there was exactly one old break before the first comma break | ||||
14855 | # (3) OLD: there are multiple old comma breaks | ||||
14856 | # (3) NEW: there are one or more old comma breaks (see return example) | ||||
14857 | # | ||||
14858 | # For example, we will follow the user and break after | ||||
14859 | # 'print' in this snippet: | ||||
14860 | |||||
14861 | # "conformability (Not the same dimension)\n", | ||||
14862 | # "\t", $have, " is ", text_unit($hu), "\n", | ||||
14863 | # "\t", $want, " is ", text_unit($wu), "\n", | ||||
14864 | # ; | ||||
14865 | # | ||||
14866 | # Another example, just one comma, where we will break after | ||||
14867 | # the return: | ||||
14868 | # return | ||||
14869 | # $x * cos($a) - $y * sin($a), | ||||
14870 | # $x * sin($a) + $y * cos($a); | ||||
14871 | |||||
14872 | # Breaking a print statement: | ||||
14873 | # print SAVEOUT | ||||
14874 | # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", | ||||
14875 | # ( $? & 128 ) ? " -- core dumped" : "", "\n"; | ||||
14876 | # | ||||
14877 | # But we will not force a break after the opening paren here | ||||
14878 | # (causes a blinker): | ||||
14879 | # $heap->{stream}->set_output_filter( | ||||
14880 | # poe::filter::reference->new('myotherfreezer') ), | ||||
14881 | # ; | ||||
14882 | # | ||||
14883 | my $i_first_comma = $comma_index[$dd]->[0]; | ||||
14884 | if ( $old_breakpoint_to_go[$i_first_comma] ) { | ||||
14885 | my $level_comma = $levels_to_go[$i_first_comma]; | ||||
14886 | my $ibreak = -1; | ||||
14887 | my $obp_count = 0; | ||||
14888 | for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { | ||||
14889 | if ( $old_breakpoint_to_go[$ii] ) { | ||||
14890 | $obp_count++; | ||||
14891 | last if ( $obp_count > 1 ); | ||||
14892 | $ibreak = $ii | ||||
14893 | if ( $levels_to_go[$ii] == $level_comma ); | ||||
14894 | } | ||||
14895 | } | ||||
14896 | |||||
14897 | # Changed rule from multiple old commas to just one here: | ||||
14898 | if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) | ||||
14899 | { | ||||
14900 | # Do not to break before an opening token because | ||||
14901 | # it can lead to "blinkers". | ||||
14902 | my $ibreakm = $ibreak; | ||||
14903 | $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); | ||||
14904 | if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) | ||||
14905 | { | ||||
14906 | set_forced_breakpoint($ibreak); | ||||
14907 | } | ||||
14908 | } | ||||
14909 | } | ||||
14910 | } | ||||
14911 | |||||
14912 | 1 | 900ns | my %is_logical_container; | ||
14913 | |||||
14914 | # spent 14µs within Perl::Tidy::Formatter::BEGIN@14914 which was called:
# once (14µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 14917 | ||||
14915 | 1 | 5µs | @_ = qw# if elsif unless while and or err not && | || ? : ! #; | ||
14916 | 1 | 12µs | @is_logical_container{@_} = (1) x scalar(@_); | ||
14917 | 1 | 2.48ms | 1 | 14µs | } # spent 14µs making 1 call to Perl::Tidy::Formatter::BEGIN@14914 |
14918 | |||||
14919 | sub set_for_semicolon_breakpoints { | ||||
14920 | my $dd = shift; | ||||
14921 | foreach ( @{ $rfor_semicolon_list[$dd] } ) { | ||||
14922 | set_forced_breakpoint($_); | ||||
14923 | } | ||||
14924 | } | ||||
14925 | |||||
14926 | sub set_logical_breakpoints { | ||||
14927 | my $dd = shift; | ||||
14928 | if ( | ||||
14929 | $item_count_stack[$dd] == 0 | ||||
14930 | && $is_logical_container{ $container_type[$dd] } | ||||
14931 | |||||
14932 | || $has_old_logical_breakpoints[$dd] | ||||
14933 | ) | ||||
14934 | { | ||||
14935 | |||||
14936 | # Look for breaks in this order: | ||||
14937 | # 0 1 2 3 | ||||
14938 | # or and || && | ||||
14939 | foreach my $i ( 0 .. 3 ) { | ||||
14940 | if ( $rand_or_list[$dd][$i] ) { | ||||
14941 | foreach ( @{ $rand_or_list[$dd][$i] } ) { | ||||
14942 | set_forced_breakpoint($_); | ||||
14943 | } | ||||
14944 | |||||
14945 | # break at any 'if' and 'unless' too | ||||
14946 | foreach ( @{ $rand_or_list[$dd][4] } ) { | ||||
14947 | set_forced_breakpoint($_); | ||||
14948 | } | ||||
14949 | $rand_or_list[$dd] = []; | ||||
14950 | last; | ||||
14951 | } | ||||
14952 | } | ||||
14953 | } | ||||
14954 | } | ||||
14955 | |||||
14956 | sub is_unbreakable_container { | ||||
14957 | |||||
14958 | # never break a container of one of these types | ||||
14959 | # because bad things can happen (map1.t) | ||||
14960 | my $dd = shift; | ||||
14961 | $is_sort_map_grep{ $container_type[$dd] }; | ||||
14962 | } | ||||
14963 | |||||
14964 | sub scan_list { | ||||
14965 | |||||
14966 | # This routine is responsible for setting line breaks for all lists, | ||||
14967 | # so that hierarchical structure can be displayed and so that list | ||||
14968 | # items can be vertically aligned. The output of this routine is | ||||
14969 | # stored in the array @forced_breakpoint_to_go, which is used to set | ||||
14970 | # final breakpoints. | ||||
14971 | |||||
14972 | $starting_depth = $nesting_depth_to_go[0]; | ||||
14973 | |||||
14974 | $block_type = ' '; | ||||
14975 | $current_depth = $starting_depth; | ||||
14976 | $i = -1; | ||||
14977 | $last_colon_sequence_number = -1; | ||||
14978 | $last_nonblank_token = ';'; | ||||
14979 | $last_nonblank_type = ';'; | ||||
14980 | $last_nonblank_block_type = ' '; | ||||
14981 | $last_old_breakpoint_count = 0; | ||||
14982 | $minimum_depth = $current_depth + 1; # forces update in check below | ||||
14983 | $old_breakpoint_count = 0; | ||||
14984 | $starting_breakpoint_count = $forced_breakpoint_count; | ||||
14985 | $token = ';'; | ||||
14986 | $type = ';'; | ||||
14987 | $type_sequence = ''; | ||||
14988 | |||||
14989 | my $total_depth_variation = 0; | ||||
14990 | my $i_old_assignment_break; | ||||
14991 | my $depth_last = $starting_depth; | ||||
14992 | |||||
14993 | check_for_new_minimum_depth($current_depth); | ||||
14994 | |||||
14995 | my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; | ||||
14996 | my $want_previous_breakpoint = -1; | ||||
14997 | |||||
14998 | my $saw_good_breakpoint; | ||||
14999 | my $i_line_end = -1; | ||||
15000 | my $i_line_start = -1; | ||||
15001 | |||||
15002 | # loop over all tokens in this batch | ||||
15003 | while ( ++$i <= $max_index_to_go ) { | ||||
15004 | if ( $type ne 'b' ) { | ||||
15005 | $i_last_nonblank_token = $i - 1; | ||||
15006 | $last_nonblank_type = $type; | ||||
15007 | $last_nonblank_token = $token; | ||||
15008 | $last_nonblank_block_type = $block_type; | ||||
15009 | } ## end if ( $type ne 'b' ) | ||||
15010 | $type = $types_to_go[$i]; | ||||
15011 | $block_type = $block_type_to_go[$i]; | ||||
15012 | $token = $tokens_to_go[$i]; | ||||
15013 | $type_sequence = $type_sequence_to_go[$i]; | ||||
15014 | my $next_type = $types_to_go[ $i + 1 ]; | ||||
15015 | my $next_token = $tokens_to_go[ $i + 1 ]; | ||||
15016 | my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | ||||
15017 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
15018 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
15019 | $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; | ||||
15020 | |||||
15021 | # set break if flag was set | ||||
15022 | if ( $want_previous_breakpoint >= 0 ) { | ||||
15023 | set_forced_breakpoint($want_previous_breakpoint); | ||||
15024 | $want_previous_breakpoint = -1; | ||||
15025 | } | ||||
15026 | |||||
15027 | $last_old_breakpoint_count = $old_breakpoint_count; | ||||
15028 | if ( $old_breakpoint_to_go[$i] ) { | ||||
15029 | $i_line_end = $i; | ||||
15030 | $i_line_start = $i_next_nonblank; | ||||
15031 | |||||
15032 | $old_breakpoint_count++; | ||||
15033 | |||||
15034 | # Break before certain keywords if user broke there and | ||||
15035 | # this is a 'safe' break point. The idea is to retain | ||||
15036 | # any preferred breaks for sequential list operations, | ||||
15037 | # like a schwartzian transform. | ||||
15038 | if ($rOpts_break_at_old_keyword_breakpoints) { | ||||
15039 | if ( | ||||
15040 | $next_nonblank_type eq 'k' | ||||
15041 | && $is_keyword_returning_list{$next_nonblank_token} | ||||
15042 | && ( $type =~ /^[=\)\]\}Riw]$/ | ||||
15043 | || $type eq 'k' | ||||
15044 | && $is_keyword_returning_list{$token} ) | ||||
15045 | ) | ||||
15046 | { | ||||
15047 | |||||
15048 | # we actually have to set this break next time through | ||||
15049 | # the loop because if we are at a closing token (such | ||||
15050 | # as '}') which forms a one-line block, this break might | ||||
15051 | # get undone. | ||||
15052 | $want_previous_breakpoint = $i; | ||||
15053 | } ## end if ( $next_nonblank_type...) | ||||
15054 | } ## end if ($rOpts_break_at_old_keyword_breakpoints) | ||||
15055 | |||||
15056 | # Break before attributes if user broke there | ||||
15057 | if ($rOpts_break_at_old_attribute_breakpoints) { | ||||
15058 | if ( $next_nonblank_type eq 'A' ) { | ||||
15059 | $want_previous_breakpoint = $i; | ||||
15060 | } | ||||
15061 | } | ||||
15062 | |||||
15063 | # remember an = break as possible good break point | ||||
15064 | if ( $is_assignment{$type} ) { | ||||
15065 | $i_old_assignment_break = $i; | ||||
15066 | } | ||||
15067 | elsif ( $is_assignment{$next_nonblank_type} ) { | ||||
15068 | $i_old_assignment_break = $i_next_nonblank; | ||||
15069 | } | ||||
15070 | } ## end if ( $old_breakpoint_to_go...) | ||||
15071 | next if ( $type eq 'b' ); | ||||
15072 | $depth = $nesting_depth_to_go[ $i + 1 ]; | ||||
15073 | |||||
15074 | $total_depth_variation += abs( $depth - $depth_last ); | ||||
15075 | $depth_last = $depth; | ||||
15076 | |||||
15077 | # safety check - be sure we always break after a comment | ||||
15078 | # Shouldn't happen .. an error here probably means that the | ||||
15079 | # nobreak flag did not get turned off correctly during | ||||
15080 | # formatting. | ||||
15081 | if ( $type eq '#' ) { | ||||
15082 | if ( $i != $max_index_to_go ) { | ||||
15083 | warning( | ||||
15084 | "Non-fatal program bug: backup logic needed to break after a comment\n" | ||||
15085 | ); | ||||
15086 | report_definite_bug(); | ||||
15087 | $nobreak_to_go[$i] = 0; | ||||
15088 | set_forced_breakpoint($i); | ||||
15089 | } ## end if ( $i != $max_index_to_go) | ||||
15090 | } ## end if ( $type eq '#' ) | ||||
15091 | |||||
15092 | # Force breakpoints at certain tokens in long lines. | ||||
15093 | # Note that such breakpoints will be undone later if these tokens | ||||
15094 | # are fully contained within parens on a line. | ||||
15095 | if ( | ||||
15096 | |||||
15097 | # break before a keyword within a line | ||||
15098 | $type eq 'k' | ||||
15099 | && $i > 0 | ||||
15100 | |||||
15101 | # if one of these keywords: | ||||
15102 | && $token =~ /^(if|unless|while|until|for)$/ | ||||
15103 | |||||
15104 | # but do not break at something like '1 while' | ||||
15105 | && ( $last_nonblank_type ne 'n' || $i > 2 ) | ||||
15106 | |||||
15107 | # and let keywords follow a closing 'do' brace | ||||
15108 | && $last_nonblank_block_type ne 'do' | ||||
15109 | |||||
15110 | && ( | ||||
15111 | $is_long_line | ||||
15112 | |||||
15113 | # or container is broken (by side-comment, etc) | ||||
15114 | || ( $next_nonblank_token eq '(' | ||||
15115 | && $mate_index_to_go[$i_next_nonblank] < $i ) | ||||
15116 | ) | ||||
15117 | ) | ||||
15118 | { | ||||
15119 | set_forced_breakpoint( $i - 1 ); | ||||
15120 | } ## end if ( $type eq 'k' && $i...) | ||||
15121 | |||||
15122 | # remember locations of '||' and '&&' for possible breaks if we | ||||
15123 | # decide this is a long logical expression. | ||||
15124 | if ( $type eq '||' ) { | ||||
15125 | push @{ $rand_or_list[$depth][2] }, $i; | ||||
15126 | ++$has_old_logical_breakpoints[$depth] | ||||
15127 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15128 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
15129 | } ## end if ( $type eq '||' ) | ||||
15130 | elsif ( $type eq '&&' ) { | ||||
15131 | push @{ $rand_or_list[$depth][3] }, $i; | ||||
15132 | ++$has_old_logical_breakpoints[$depth] | ||||
15133 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15134 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
15135 | } ## end elsif ( $type eq '&&' ) | ||||
15136 | elsif ( $type eq 'f' ) { | ||||
15137 | push @{ $rfor_semicolon_list[$depth] }, $i; | ||||
15138 | } | ||||
15139 | elsif ( $type eq 'k' ) { | ||||
15140 | if ( $token eq 'and' ) { | ||||
15141 | push @{ $rand_or_list[$depth][1] }, $i; | ||||
15142 | ++$has_old_logical_breakpoints[$depth] | ||||
15143 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15144 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
15145 | } ## end if ( $token eq 'and' ) | ||||
15146 | |||||
15147 | # break immediately at 'or's which are probably not in a logical | ||||
15148 | # block -- but we will break in logical breaks below so that | ||||
15149 | # they do not add to the forced_breakpoint_count | ||||
15150 | elsif ( $token eq 'or' ) { | ||||
15151 | push @{ $rand_or_list[$depth][0] }, $i; | ||||
15152 | ++$has_old_logical_breakpoints[$depth] | ||||
15153 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15154 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
15155 | if ( $is_logical_container{ $container_type[$depth] } ) { | ||||
15156 | } | ||||
15157 | else { | ||||
15158 | if ($is_long_line) { set_forced_breakpoint($i) } | ||||
15159 | elsif ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15160 | && $rOpts_break_at_old_logical_breakpoints ) | ||||
15161 | { | ||||
15162 | $saw_good_breakpoint = 1; | ||||
15163 | } | ||||
15164 | } ## end else [ if ( $is_logical_container...)] | ||||
15165 | } ## end elsif ( $token eq 'or' ) | ||||
15166 | elsif ( $token eq 'if' || $token eq 'unless' ) { | ||||
15167 | push @{ $rand_or_list[$depth][4] }, $i; | ||||
15168 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15169 | && $rOpts_break_at_old_logical_breakpoints ) | ||||
15170 | { | ||||
15171 | set_forced_breakpoint($i); | ||||
15172 | } | ||||
15173 | } ## end elsif ( $token eq 'if' ||...) | ||||
15174 | } ## end elsif ( $type eq 'k' ) | ||||
15175 | elsif ( $is_assignment{$type} ) { | ||||
15176 | $i_equals[$depth] = $i; | ||||
15177 | } | ||||
15178 | |||||
15179 | if ($type_sequence) { | ||||
15180 | |||||
15181 | # handle any postponed closing breakpoints | ||||
15182 | if ( $token =~ /^[\)\]\}\:]$/ ) { | ||||
15183 | if ( $type eq ':' ) { | ||||
15184 | $last_colon_sequence_number = $type_sequence; | ||||
15185 | |||||
15186 | # retain break at a ':' line break | ||||
15187 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
15188 | && $rOpts_break_at_old_ternary_breakpoints ) | ||||
15189 | { | ||||
15190 | |||||
15191 | set_forced_breakpoint($i); | ||||
15192 | |||||
15193 | # break at previous '=' | ||||
15194 | if ( $i_equals[$depth] > 0 ) { | ||||
15195 | set_forced_breakpoint( $i_equals[$depth] ); | ||||
15196 | $i_equals[$depth] = -1; | ||||
15197 | } | ||||
15198 | } ## end if ( ( $i == $i_line_start...)) | ||||
15199 | } ## end if ( $type eq ':' ) | ||||
15200 | if ( defined( $postponed_breakpoint{$type_sequence} ) ) { | ||||
15201 | my $inc = ( $type eq ':' ) ? 0 : 1; | ||||
15202 | set_forced_breakpoint( $i - $inc ); | ||||
15203 | delete $postponed_breakpoint{$type_sequence}; | ||||
15204 | } | ||||
15205 | } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) | ||||
15206 | |||||
15207 | # set breaks at ?/: if they will get separated (and are | ||||
15208 | # not a ?/: chain), or if the '?' is at the end of the | ||||
15209 | # line | ||||
15210 | elsif ( $token eq '?' ) { | ||||
15211 | my $i_colon = $mate_index_to_go[$i]; | ||||
15212 | if ( | ||||
15213 | $i_colon <= 0 # the ':' is not in this batch | ||||
15214 | || $i == 0 # this '?' is the first token of the line | ||||
15215 | || $i == | ||||
15216 | $max_index_to_go # or this '?' is the last token | ||||
15217 | ) | ||||
15218 | { | ||||
15219 | |||||
15220 | # don't break at a '?' if preceded by ':' on | ||||
15221 | # this line of previous ?/: pair on this line. | ||||
15222 | # This is an attempt to preserve a chain of ?/: | ||||
15223 | # expressions (elsif2.t). And don't break if | ||||
15224 | # this has a side comment. | ||||
15225 | set_forced_breakpoint($i) | ||||
15226 | unless ( | ||||
15227 | $type_sequence == ( | ||||
15228 | $last_colon_sequence_number + | ||||
15229 | TYPE_SEQUENCE_INCREMENT | ||||
15230 | ) | ||||
15231 | || $tokens_to_go[$max_index_to_go] eq '#' | ||||
15232 | ); | ||||
15233 | set_closing_breakpoint($i); | ||||
15234 | } ## end if ( $i_colon <= 0 ||...) | ||||
15235 | } ## end elsif ( $token eq '?' ) | ||||
15236 | } ## end if ($type_sequence) | ||||
15237 | |||||
15238 | #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; | ||||
15239 | |||||
15240 | #------------------------------------------------------------ | ||||
15241 | # Handle Increasing Depth.. | ||||
15242 | # | ||||
15243 | # prepare for a new list when depth increases | ||||
15244 | # token $i is a '(','{', or '[' | ||||
15245 | #------------------------------------------------------------ | ||||
15246 | if ( $depth > $current_depth ) { | ||||
15247 | |||||
15248 | $breakpoint_stack[$depth] = $forced_breakpoint_count; | ||||
15249 | $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; | ||||
15250 | $has_broken_sublist[$depth] = 0; | ||||
15251 | $identifier_count_stack[$depth] = 0; | ||||
15252 | $index_before_arrow[$depth] = -1; | ||||
15253 | $interrupted_list[$depth] = 0; | ||||
15254 | $item_count_stack[$depth] = 0; | ||||
15255 | $last_comma_index[$depth] = undef; | ||||
15256 | $last_dot_index[$depth] = undef; | ||||
15257 | $last_nonblank_type[$depth] = $last_nonblank_type; | ||||
15258 | $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; | ||||
15259 | $opening_structure_index_stack[$depth] = $i; | ||||
15260 | $rand_or_list[$depth] = []; | ||||
15261 | $rfor_semicolon_list[$depth] = []; | ||||
15262 | $i_equals[$depth] = -1; | ||||
15263 | $want_comma_break[$depth] = 0; | ||||
15264 | $container_type[$depth] = | ||||
15265 | ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) | ||||
15266 | ? $last_nonblank_token | ||||
15267 | : ""; | ||||
15268 | $has_old_logical_breakpoints[$depth] = 0; | ||||
15269 | |||||
15270 | # if line ends here then signal closing token to break | ||||
15271 | if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) | ||||
15272 | { | ||||
15273 | set_closing_breakpoint($i); | ||||
15274 | } | ||||
15275 | |||||
15276 | # Not all lists of values should be vertically aligned.. | ||||
15277 | $dont_align[$depth] = | ||||
15278 | |||||
15279 | # code BLOCKS are handled at a higher level | ||||
15280 | ( $block_type ne "" ) | ||||
15281 | |||||
15282 | # certain paren lists | ||||
15283 | || ( $type eq '(' ) && ( | ||||
15284 | |||||
15285 | # it does not usually look good to align a list of | ||||
15286 | # identifiers in a parameter list, as in: | ||||
15287 | # my($var1, $var2, ...) | ||||
15288 | # (This test should probably be refined, for now I'm just | ||||
15289 | # testing for any keyword) | ||||
15290 | ( $last_nonblank_type eq 'k' ) | ||||
15291 | |||||
15292 | # a trailing '(' usually indicates a non-list | ||||
15293 | || ( $next_nonblank_type eq '(' ) | ||||
15294 | ); | ||||
15295 | |||||
15296 | # patch to outdent opening brace of long if/for/.. | ||||
15297 | # statements (like this one). See similar coding in | ||||
15298 | # set_continuation breaks. We have also catch it here for | ||||
15299 | # short line fragments which otherwise will not go through | ||||
15300 | # set_continuation_breaks. | ||||
15301 | if ( | ||||
15302 | $block_type | ||||
15303 | |||||
15304 | # if we have the ')' but not its '(' in this batch.. | ||||
15305 | && ( $last_nonblank_token eq ')' ) | ||||
15306 | && $mate_index_to_go[$i_last_nonblank_token] < 0 | ||||
15307 | |||||
15308 | # and user wants brace to left | ||||
15309 | && !$rOpts->{'opening-brace-always-on-right'} | ||||
15310 | |||||
15311 | && ( $type eq '{' ) # should be true | ||||
15312 | && ( $token eq '{' ) # should be true | ||||
15313 | ) | ||||
15314 | { | ||||
15315 | set_forced_breakpoint( $i - 1 ); | ||||
15316 | } ## end if ( $block_type && ( ...)) | ||||
15317 | } ## end if ( $depth > $current_depth) | ||||
15318 | |||||
15319 | #------------------------------------------------------------ | ||||
15320 | # Handle Decreasing Depth.. | ||||
15321 | # | ||||
15322 | # finish off any old list when depth decreases | ||||
15323 | # token $i is a ')','}', or ']' | ||||
15324 | #------------------------------------------------------------ | ||||
15325 | elsif ( $depth < $current_depth ) { | ||||
15326 | |||||
15327 | check_for_new_minimum_depth($depth); | ||||
15328 | |||||
15329 | # force all outer logical containers to break after we see on | ||||
15330 | # old breakpoint | ||||
15331 | $has_old_logical_breakpoints[$depth] ||= | ||||
15332 | $has_old_logical_breakpoints[$current_depth]; | ||||
15333 | |||||
15334 | # Patch to break between ') {' if the paren list is broken. | ||||
15335 | # There is similar logic in set_continuation_breaks for | ||||
15336 | # non-broken lists. | ||||
15337 | if ( $token eq ')' | ||||
15338 | && $next_nonblank_block_type | ||||
15339 | && $interrupted_list[$current_depth] | ||||
15340 | && $next_nonblank_type eq '{' | ||||
15341 | && !$rOpts->{'opening-brace-always-on-right'} ) | ||||
15342 | { | ||||
15343 | set_forced_breakpoint($i); | ||||
15344 | } ## end if ( $token eq ')' && ... | ||||
15345 | |||||
15346 | #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; | ||||
15347 | |||||
15348 | # set breaks at commas if necessary | ||||
15349 | my ( $bp_count, $do_not_break_apart ) = | ||||
15350 | set_comma_breakpoints($current_depth); | ||||
15351 | |||||
15352 | my $i_opening = $opening_structure_index_stack[$current_depth]; | ||||
15353 | my $saw_opening_structure = ( $i_opening >= 0 ); | ||||
15354 | |||||
15355 | # this term is long if we had to break at interior commas.. | ||||
15356 | my $is_long_term = $bp_count > 0; | ||||
15357 | |||||
15358 | # If this is a short container with one or more comma arrows, | ||||
15359 | # then we will mark it as a long term to open it if requested. | ||||
15360 | # $rOpts_comma_arrow_breakpoints = | ||||
15361 | # 0 - open only if comma precedes closing brace | ||||
15362 | # 1 - stable: except for one line blocks | ||||
15363 | # 2 - try to form 1 line blocks | ||||
15364 | # 3 - ignore => | ||||
15365 | # 4 - always open up if vt=0 | ||||
15366 | # 5 - stable: even for one line blocks if vt=0 | ||||
15367 | if ( !$is_long_term | ||||
15368 | && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/ | ||||
15369 | && $index_before_arrow[ $depth + 1 ] > 0 | ||||
15370 | && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } | ||||
15371 | ) | ||||
15372 | { | ||||
15373 | $is_long_term = $rOpts_comma_arrow_breakpoints == 4 | ||||
15374 | || ( $rOpts_comma_arrow_breakpoints == 0 | ||||
15375 | && $last_nonblank_token eq ',' ) | ||||
15376 | || ( $rOpts_comma_arrow_breakpoints == 5 | ||||
15377 | && $old_breakpoint_to_go[$i_opening] ); | ||||
15378 | } ## end if ( !$is_long_term &&...) | ||||
15379 | |||||
15380 | # mark term as long if the length between opening and closing | ||||
15381 | # parens exceeds allowed line length | ||||
15382 | if ( !$is_long_term && $saw_opening_structure ) { | ||||
15383 | my $i_opening_minus = find_token_starting_list($i_opening); | ||||
15384 | |||||
15385 | # Note: we have to allow for one extra space after a | ||||
15386 | # closing token so that we do not strand a comma or | ||||
15387 | # semicolon, hence the '>=' here (oneline.t) | ||||
15388 | $is_long_term = | ||||
15389 | excess_line_length( $i_opening_minus, $i ) >= 0; | ||||
15390 | } ## end if ( !$is_long_term &&...) | ||||
15391 | |||||
15392 | # We've set breaks after all comma-arrows. Now we have to | ||||
15393 | # undo them if this can be a one-line block | ||||
15394 | # (the only breakpoints set will be due to comma-arrows) | ||||
15395 | if ( | ||||
15396 | |||||
15397 | # user doesn't require breaking after all comma-arrows | ||||
15398 | ( $rOpts_comma_arrow_breakpoints != 0 ) | ||||
15399 | && ( $rOpts_comma_arrow_breakpoints != 4 ) | ||||
15400 | |||||
15401 | # and if the opening structure is in this batch | ||||
15402 | && $saw_opening_structure | ||||
15403 | |||||
15404 | # and either on the same old line | ||||
15405 | && ( | ||||
15406 | $old_breakpoint_count_stack[$current_depth] == | ||||
15407 | $last_old_breakpoint_count | ||||
15408 | |||||
15409 | # or user wants to form long blocks with arrows | ||||
15410 | || $rOpts_comma_arrow_breakpoints == 2 | ||||
15411 | ) | ||||
15412 | |||||
15413 | # and we made some breakpoints between the opening and closing | ||||
15414 | && ( $breakpoint_undo_stack[$current_depth] < | ||||
15415 | $forced_breakpoint_undo_count ) | ||||
15416 | |||||
15417 | # and this block is short enough to fit on one line | ||||
15418 | # Note: use < because need 1 more space for possible comma | ||||
15419 | && !$is_long_term | ||||
15420 | |||||
15421 | ) | ||||
15422 | { | ||||
15423 | undo_forced_breakpoint_stack( | ||||
15424 | $breakpoint_undo_stack[$current_depth] ); | ||||
15425 | } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) | ||||
15426 | |||||
15427 | # now see if we have any comma breakpoints left | ||||
15428 | my $has_comma_breakpoints = | ||||
15429 | ( $breakpoint_stack[$current_depth] != | ||||
15430 | $forced_breakpoint_count ); | ||||
15431 | |||||
15432 | # update broken-sublist flag of the outer container | ||||
15433 | $has_broken_sublist[$depth] = | ||||
15434 | $has_broken_sublist[$depth] | ||||
15435 | || $has_broken_sublist[$current_depth] | ||||
15436 | || $is_long_term | ||||
15437 | || $has_comma_breakpoints; | ||||
15438 | |||||
15439 | # Having come to the closing ')', '}', or ']', now we have to decide if we | ||||
15440 | # should 'open up' the structure by placing breaks at the opening and | ||||
15441 | # closing containers. This is a tricky decision. Here are some of the | ||||
15442 | # basic considerations: | ||||
15443 | # | ||||
15444 | # -If this is a BLOCK container, then any breakpoints will have already | ||||
15445 | # been set (and according to user preferences), so we need do nothing here. | ||||
15446 | # | ||||
15447 | # -If we have a comma-separated list for which we can align the list items, | ||||
15448 | # then we need to do so because otherwise the vertical aligner cannot | ||||
15449 | # currently do the alignment. | ||||
15450 | # | ||||
15451 | # -If this container does itself contain a container which has been broken | ||||
15452 | # open, then it should be broken open to properly show the structure. | ||||
15453 | # | ||||
15454 | # -If there is nothing to align, and no other reason to break apart, | ||||
15455 | # then do not do it. | ||||
15456 | # | ||||
15457 | # We will not break open the parens of a long but 'simple' logical expression. | ||||
15458 | # For example: | ||||
15459 | # | ||||
15460 | # This is an example of a simple logical expression and its formatting: | ||||
15461 | # | ||||
15462 | # if ( $bigwasteofspace1 && $bigwasteofspace2 | ||||
15463 | # || $bigwasteofspace3 && $bigwasteofspace4 ) | ||||
15464 | # | ||||
15465 | # Most people would prefer this than the 'spacey' version: | ||||
15466 | # | ||||
15467 | # if ( | ||||
15468 | # $bigwasteofspace1 && $bigwasteofspace2 | ||||
15469 | # || $bigwasteofspace3 && $bigwasteofspace4 | ||||
15470 | # ) | ||||
15471 | # | ||||
15472 | # To illustrate the rules for breaking logical expressions, consider: | ||||
15473 | # | ||||
15474 | # FULLY DENSE: | ||||
15475 | # if ( $opt_excl | ||||
15476 | # and ( exists $ids_excl_uc{$id_uc} | ||||
15477 | # or grep $id_uc =~ /$_/, @ids_excl_uc )) | ||||
15478 | # | ||||
15479 | # This is on the verge of being difficult to read. The current default is to | ||||
15480 | # open it up like this: | ||||
15481 | # | ||||
15482 | # DEFAULT: | ||||
15483 | # if ( | ||||
15484 | # $opt_excl | ||||
15485 | # and ( exists $ids_excl_uc{$id_uc} | ||||
15486 | # or grep $id_uc =~ /$_/, @ids_excl_uc ) | ||||
15487 | # ) | ||||
15488 | # | ||||
15489 | # This is a compromise which tries to avoid being too dense and to spacey. | ||||
15490 | # A more spaced version would be: | ||||
15491 | # | ||||
15492 | # SPACEY: | ||||
15493 | # if ( | ||||
15494 | # $opt_excl | ||||
15495 | # and ( | ||||
15496 | # exists $ids_excl_uc{$id_uc} | ||||
15497 | # or grep $id_uc =~ /$_/, @ids_excl_uc | ||||
15498 | # ) | ||||
15499 | # ) | ||||
15500 | # | ||||
15501 | # Some people might prefer the spacey version -- an option could be added. The | ||||
15502 | # innermost expression contains a long block '( exists $ids_... ')'. | ||||
15503 | # | ||||
15504 | # Here is how the logic goes: We will force a break at the 'or' that the | ||||
15505 | # innermost expression contains, but we will not break apart its opening and | ||||
15506 | # closing containers because (1) it contains no multi-line sub-containers itself, | ||||
15507 | # and (2) there is no alignment to be gained by breaking it open like this | ||||
15508 | # | ||||
15509 | # and ( | ||||
15510 | # exists $ids_excl_uc{$id_uc} | ||||
15511 | # or grep $id_uc =~ /$_/, @ids_excl_uc | ||||
15512 | # ) | ||||
15513 | # | ||||
15514 | # (although this looks perfectly ok and might be good for long expressions). The | ||||
15515 | # outer 'if' container, though, contains a broken sub-container, so it will be | ||||
15516 | # broken open to avoid too much density. Also, since it contains no 'or's, there | ||||
15517 | # will be a forced break at its 'and'. | ||||
15518 | |||||
15519 | # set some flags telling something about this container.. | ||||
15520 | my $is_simple_logical_expression = 0; | ||||
15521 | if ( $item_count_stack[$current_depth] == 0 | ||||
15522 | && $saw_opening_structure | ||||
15523 | && $tokens_to_go[$i_opening] eq '(' | ||||
15524 | && $is_logical_container{ $container_type[$current_depth] } | ||||
15525 | ) | ||||
15526 | { | ||||
15527 | |||||
15528 | # This seems to be a simple logical expression with | ||||
15529 | # no existing breakpoints. Set a flag to prevent | ||||
15530 | # opening it up. | ||||
15531 | if ( !$has_comma_breakpoints ) { | ||||
15532 | $is_simple_logical_expression = 1; | ||||
15533 | } | ||||
15534 | |||||
15535 | # This seems to be a simple logical expression with | ||||
15536 | # breakpoints (broken sublists, for example). Break | ||||
15537 | # at all 'or's and '||'s. | ||||
15538 | else { | ||||
15539 | set_logical_breakpoints($current_depth); | ||||
15540 | } | ||||
15541 | } ## end if ( $item_count_stack...) | ||||
15542 | |||||
15543 | if ( $is_long_term | ||||
15544 | && @{ $rfor_semicolon_list[$current_depth] } ) | ||||
15545 | { | ||||
15546 | set_for_semicolon_breakpoints($current_depth); | ||||
15547 | |||||
15548 | # open up a long 'for' or 'foreach' container to allow | ||||
15549 | # leading term alignment unless -lp is used. | ||||
15550 | $has_comma_breakpoints = 1 | ||||
15551 | unless $rOpts_line_up_parentheses; | ||||
15552 | } ## end if ( $is_long_term && ...) | ||||
15553 | |||||
15554 | if ( | ||||
15555 | |||||
15556 | # breaks for code BLOCKS are handled at a higher level | ||||
15557 | !$block_type | ||||
15558 | |||||
15559 | # we do not need to break at the top level of an 'if' | ||||
15560 | # type expression | ||||
15561 | && !$is_simple_logical_expression | ||||
15562 | |||||
15563 | ## modification to keep ': (' containers vertically tight; | ||||
15564 | ## but probably better to let user set -vt=1 to avoid | ||||
15565 | ## inconsistency with other paren types | ||||
15566 | ## && ($container_type[$current_depth] ne ':') | ||||
15567 | |||||
15568 | # otherwise, we require one of these reasons for breaking: | ||||
15569 | && ( | ||||
15570 | |||||
15571 | # - this term has forced line breaks | ||||
15572 | $has_comma_breakpoints | ||||
15573 | |||||
15574 | # - the opening container is separated from this batch | ||||
15575 | # for some reason (comment, blank line, code block) | ||||
15576 | # - this is a non-paren container spanning multiple lines | ||||
15577 | || !$saw_opening_structure | ||||
15578 | |||||
15579 | # - this is a long block contained in another breakable | ||||
15580 | # container | ||||
15581 | || ( $is_long_term | ||||
15582 | && $container_environment_to_go[$i_opening] ne | ||||
15583 | 'BLOCK' ) | ||||
15584 | ) | ||||
15585 | ) | ||||
15586 | { | ||||
15587 | |||||
15588 | # For -lp option, we must put a breakpoint before | ||||
15589 | # the token which has been identified as starting | ||||
15590 | # this indentation level. This is necessary for | ||||
15591 | # proper alignment. | ||||
15592 | if ( $rOpts_line_up_parentheses && $saw_opening_structure ) | ||||
15593 | { | ||||
15594 | my $item = $leading_spaces_to_go[ $i_opening + 1 ]; | ||||
15595 | if ( $i_opening + 1 < $max_index_to_go | ||||
15596 | && $types_to_go[ $i_opening + 1 ] eq 'b' ) | ||||
15597 | { | ||||
15598 | $item = $leading_spaces_to_go[ $i_opening + 2 ]; | ||||
15599 | } | ||||
15600 | if ( defined($item) ) { | ||||
15601 | my $i_start_2 = $item->get_STARTING_INDEX(); | ||||
15602 | if ( | ||||
15603 | defined($i_start_2) | ||||
15604 | |||||
15605 | # we are breaking after an opening brace, paren, | ||||
15606 | # so don't break before it too | ||||
15607 | && $i_start_2 ne $i_opening | ||||
15608 | ) | ||||
15609 | { | ||||
15610 | |||||
15611 | # Only break for breakpoints at the same | ||||
15612 | # indentation level as the opening paren | ||||
15613 | my $test1 = $nesting_depth_to_go[$i_opening]; | ||||
15614 | my $test2 = $nesting_depth_to_go[$i_start_2]; | ||||
15615 | if ( $test2 == $test1 ) { | ||||
15616 | set_forced_breakpoint( $i_start_2 - 1 ); | ||||
15617 | } | ||||
15618 | } ## end if ( defined($i_start_2...)) | ||||
15619 | } ## end if ( defined($item) ) | ||||
15620 | } ## end if ( $rOpts_line_up_parentheses...) | ||||
15621 | |||||
15622 | # break after opening structure. | ||||
15623 | # note: break before closing structure will be automatic | ||||
15624 | if ( $minimum_depth <= $current_depth ) { | ||||
15625 | |||||
15626 | set_forced_breakpoint($i_opening) | ||||
15627 | unless ( $do_not_break_apart | ||||
15628 | || is_unbreakable_container($current_depth) ); | ||||
15629 | |||||
15630 | # break at ',' of lower depth level before opening token | ||||
15631 | if ( $last_comma_index[$depth] ) { | ||||
15632 | set_forced_breakpoint( $last_comma_index[$depth] ); | ||||
15633 | } | ||||
15634 | |||||
15635 | # break at '.' of lower depth level before opening token | ||||
15636 | if ( $last_dot_index[$depth] ) { | ||||
15637 | set_forced_breakpoint( $last_dot_index[$depth] ); | ||||
15638 | } | ||||
15639 | |||||
15640 | # break before opening structure if preceded by another | ||||
15641 | # closing structure and a comma. This is normally | ||||
15642 | # done by the previous closing brace, but not | ||||
15643 | # if it was a one-line block. | ||||
15644 | if ( $i_opening > 2 ) { | ||||
15645 | my $i_prev = | ||||
15646 | ( $types_to_go[ $i_opening - 1 ] eq 'b' ) | ||||
15647 | ? $i_opening - 2 | ||||
15648 | : $i_opening - 1; | ||||
15649 | |||||
15650 | if ( $types_to_go[$i_prev] eq ',' | ||||
15651 | && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) | ||||
15652 | { | ||||
15653 | set_forced_breakpoint($i_prev); | ||||
15654 | } | ||||
15655 | |||||
15656 | # also break before something like ':(' or '?(' | ||||
15657 | # if appropriate. | ||||
15658 | elsif ( | ||||
15659 | $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) | ||||
15660 | { | ||||
15661 | my $token_prev = $tokens_to_go[$i_prev]; | ||||
15662 | if ( $want_break_before{$token_prev} ) { | ||||
15663 | set_forced_breakpoint($i_prev); | ||||
15664 | } | ||||
15665 | } ## end elsif ( $types_to_go[$i_prev...]) | ||||
15666 | } ## end if ( $i_opening > 2 ) | ||||
15667 | } ## end if ( $minimum_depth <=...) | ||||
15668 | |||||
15669 | # break after comma following closing structure | ||||
15670 | if ( $next_type eq ',' ) { | ||||
15671 | set_forced_breakpoint( $i + 1 ); | ||||
15672 | } | ||||
15673 | |||||
15674 | # break before an '=' following closing structure | ||||
15675 | if ( | ||||
15676 | $is_assignment{$next_nonblank_type} | ||||
15677 | && ( $breakpoint_stack[$current_depth] != | ||||
15678 | $forced_breakpoint_count ) | ||||
15679 | ) | ||||
15680 | { | ||||
15681 | set_forced_breakpoint($i); | ||||
15682 | } ## end if ( $is_assignment{$next_nonblank_type...}) | ||||
15683 | |||||
15684 | # break at any comma before the opening structure Added | ||||
15685 | # for -lp, but seems to be good in general. It isn't | ||||
15686 | # obvious how far back to look; the '5' below seems to | ||||
15687 | # work well and will catch the comma in something like | ||||
15688 | # push @list, myfunc( $param, $param, .. | ||||
15689 | |||||
15690 | my $icomma = $last_comma_index[$depth]; | ||||
15691 | if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { | ||||
15692 | unless ( $forced_breakpoint_to_go[$icomma] ) { | ||||
15693 | set_forced_breakpoint($icomma); | ||||
15694 | } | ||||
15695 | } | ||||
15696 | } # end logic to open up a container | ||||
15697 | |||||
15698 | # Break open a logical container open if it was already open | ||||
15699 | elsif ($is_simple_logical_expression | ||||
15700 | && $has_old_logical_breakpoints[$current_depth] ) | ||||
15701 | { | ||||
15702 | set_logical_breakpoints($current_depth); | ||||
15703 | } | ||||
15704 | |||||
15705 | # Handle long container which does not get opened up | ||||
15706 | elsif ($is_long_term) { | ||||
15707 | |||||
15708 | # must set fake breakpoint to alert outer containers that | ||||
15709 | # they are complex | ||||
15710 | set_fake_breakpoint(); | ||||
15711 | } ## end elsif ($is_long_term) | ||||
15712 | |||||
15713 | } ## end elsif ( $depth < $current_depth) | ||||
15714 | |||||
15715 | #------------------------------------------------------------ | ||||
15716 | # Handle this token | ||||
15717 | #------------------------------------------------------------ | ||||
15718 | |||||
15719 | $current_depth = $depth; | ||||
15720 | |||||
15721 | # handle comma-arrow | ||||
15722 | if ( $type eq '=>' ) { | ||||
15723 | next if ( $last_nonblank_type eq '=>' ); | ||||
15724 | next if $rOpts_break_at_old_comma_breakpoints; | ||||
15725 | next if $rOpts_comma_arrow_breakpoints == 3; | ||||
15726 | $want_comma_break[$depth] = 1; | ||||
15727 | $index_before_arrow[$depth] = $i_last_nonblank_token; | ||||
15728 | next; | ||||
15729 | } ## end if ( $type eq '=>' ) | ||||
15730 | |||||
15731 | elsif ( $type eq '.' ) { | ||||
15732 | $last_dot_index[$depth] = $i; | ||||
15733 | } | ||||
15734 | |||||
15735 | # Turn off alignment if we are sure that this is not a list | ||||
15736 | # environment. To be safe, we will do this if we see certain | ||||
15737 | # non-list tokens, such as ';', and also the environment is | ||||
15738 | # not a list. Note that '=' could be in any of the = operators | ||||
15739 | # (lextest.t). We can't just use the reported environment | ||||
15740 | # because it can be incorrect in some cases. | ||||
15741 | elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) | ||||
15742 | && $container_environment_to_go[$i] ne 'LIST' ) | ||||
15743 | { | ||||
15744 | $dont_align[$depth] = 1; | ||||
15745 | $want_comma_break[$depth] = 0; | ||||
15746 | $index_before_arrow[$depth] = -1; | ||||
15747 | } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) | ||||
15748 | |||||
15749 | # now just handle any commas | ||||
15750 | next unless ( $type eq ',' ); | ||||
15751 | |||||
15752 | $last_dot_index[$depth] = undef; | ||||
15753 | $last_comma_index[$depth] = $i; | ||||
15754 | |||||
15755 | # break here if this comma follows a '=>' | ||||
15756 | # but not if there is a side comment after the comma | ||||
15757 | if ( $want_comma_break[$depth] ) { | ||||
15758 | |||||
15759 | if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { | ||||
15760 | if ($rOpts_comma_arrow_breakpoints) { | ||||
15761 | $want_comma_break[$depth] = 0; | ||||
15762 | ##$index_before_arrow[$depth] = -1; | ||||
15763 | next; | ||||
15764 | } | ||||
15765 | } | ||||
15766 | |||||
15767 | set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); | ||||
15768 | |||||
15769 | # break before the previous token if it looks safe | ||||
15770 | # Example of something that we will not try to break before: | ||||
15771 | # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, | ||||
15772 | # Also we don't want to break at a binary operator (like +): | ||||
15773 | # $c->createOval( | ||||
15774 | # $x + $R, $y + | ||||
15775 | # $R => $x - $R, | ||||
15776 | # $y - $R, -fill => 'black', | ||||
15777 | # ); | ||||
15778 | my $ibreak = $index_before_arrow[$depth] - 1; | ||||
15779 | if ( $ibreak > 0 | ||||
15780 | && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) | ||||
15781 | { | ||||
15782 | if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } | ||||
15783 | if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } | ||||
15784 | if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { | ||||
15785 | |||||
15786 | # don't break pointer calls, such as the following: | ||||
15787 | # File::Spec->curdir => 1, | ||||
15788 | # (This is tokenized as adjacent 'w' tokens) | ||||
15789 | ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { | ||||
15790 | |||||
15791 | # And don't break before a comma, as in the following: | ||||
15792 | # ( LONGER_THAN,=> 1, | ||||
15793 | # EIGHTY_CHARACTERS,=> 2, | ||||
15794 | # CAUSES_FORMATTING,=> 3, | ||||
15795 | # LIKE_THIS,=> 4, | ||||
15796 | # ); | ||||
15797 | # This example is for -tso but should be general rule | ||||
15798 | if ( $tokens_to_go[ $ibreak + 1 ] ne '->' | ||||
15799 | && $tokens_to_go[ $ibreak + 1 ] ne ',' ) | ||||
15800 | { | ||||
15801 | set_forced_breakpoint($ibreak); | ||||
15802 | } | ||||
15803 | } ## end if ( $types_to_go[$ibreak...]) | ||||
15804 | } ## end if ( $ibreak > 0 && $tokens_to_go...) | ||||
15805 | |||||
15806 | $want_comma_break[$depth] = 0; | ||||
15807 | $index_before_arrow[$depth] = -1; | ||||
15808 | |||||
15809 | # handle list which mixes '=>'s and ','s: | ||||
15810 | # treat any list items so far as an interrupted list | ||||
15811 | $interrupted_list[$depth] = 1; | ||||
15812 | next; | ||||
15813 | } ## end if ( $want_comma_break...) | ||||
15814 | |||||
15815 | # break after all commas above starting depth | ||||
15816 | if ( $depth < $starting_depth && !$dont_align[$depth] ) { | ||||
15817 | set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); | ||||
15818 | next; | ||||
15819 | } | ||||
15820 | |||||
15821 | # add this comma to the list.. | ||||
15822 | my $item_count = $item_count_stack[$depth]; | ||||
15823 | if ( $item_count == 0 ) { | ||||
15824 | |||||
15825 | # but do not form a list with no opening structure | ||||
15826 | # for example: | ||||
15827 | |||||
15828 | # open INFILE_COPY, ">$input_file_copy" | ||||
15829 | # or die ("very long message"); | ||||
15830 | |||||
15831 | if ( ( $opening_structure_index_stack[$depth] < 0 ) | ||||
15832 | && $container_environment_to_go[$i] eq 'BLOCK' ) | ||||
15833 | { | ||||
15834 | $dont_align[$depth] = 1; | ||||
15835 | } | ||||
15836 | } ## end if ( $item_count == 0 ) | ||||
15837 | |||||
15838 | $comma_index[$depth][$item_count] = $i; | ||||
15839 | ++$item_count_stack[$depth]; | ||||
15840 | if ( $last_nonblank_type =~ /^[iR\]]$/ ) { | ||||
15841 | $identifier_count_stack[$depth]++; | ||||
15842 | } | ||||
15843 | } ## end while ( ++$i <= $max_index_to_go) | ||||
15844 | |||||
15845 | #------------------------------------------- | ||||
15846 | # end of loop over all tokens in this batch | ||||
15847 | #------------------------------------------- | ||||
15848 | |||||
15849 | # set breaks for any unfinished lists .. | ||||
15850 | for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { | ||||
15851 | |||||
15852 | $interrupted_list[$dd] = 1; | ||||
15853 | $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); | ||||
15854 | set_comma_breakpoints($dd); | ||||
15855 | set_logical_breakpoints($dd) | ||||
15856 | if ( $has_old_logical_breakpoints[$dd] ); | ||||
15857 | set_for_semicolon_breakpoints($dd); | ||||
15858 | |||||
15859 | # break open container... | ||||
15860 | my $i_opening = $opening_structure_index_stack[$dd]; | ||||
15861 | set_forced_breakpoint($i_opening) | ||||
15862 | unless ( | ||||
15863 | is_unbreakable_container($dd) | ||||
15864 | |||||
15865 | # Avoid a break which would place an isolated ' or " | ||||
15866 | # on a line | ||||
15867 | || ( $type eq 'Q' | ||||
15868 | && $i_opening >= $max_index_to_go - 2 | ||||
15869 | && $token =~ /^['"]$/ ) | ||||
15870 | ); | ||||
15871 | } ## end for ( my $dd = $current_depth...) | ||||
15872 | |||||
15873 | # Return a flag indicating if the input file had some good breakpoints. | ||||
15874 | # This flag will be used to force a break in a line shorter than the | ||||
15875 | # allowed line length. | ||||
15876 | if ( $has_old_logical_breakpoints[$current_depth] ) { | ||||
15877 | $saw_good_breakpoint = 1; | ||||
15878 | } | ||||
15879 | |||||
15880 | # A complex line with one break at an = has a good breakpoint. | ||||
15881 | # This is not complex ($total_depth_variation=0): | ||||
15882 | # $res1 | ||||
15883 | # = 10; | ||||
15884 | # | ||||
15885 | # This is complex ($total_depth_variation=6): | ||||
15886 | # $res2 = | ||||
15887 | # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); | ||||
15888 | elsif ($i_old_assignment_break | ||||
15889 | && $total_depth_variation > 4 | ||||
15890 | && $old_breakpoint_count == 1 ) | ||||
15891 | { | ||||
15892 | $saw_good_breakpoint = 1; | ||||
15893 | } ## end elsif ( $i_old_assignment_break...) | ||||
15894 | |||||
15895 | return $saw_good_breakpoint; | ||||
15896 | } ## end sub scan_list | ||||
15897 | } # end scan_list | ||||
15898 | |||||
15899 | sub find_token_starting_list { | ||||
15900 | |||||
15901 | # When testing to see if a block will fit on one line, some | ||||
15902 | # previous token(s) may also need to be on the line; particularly | ||||
15903 | # if this is a sub call. So we will look back at least one | ||||
15904 | # token. NOTE: This isn't perfect, but not critical, because | ||||
15905 | # if we mis-identify a block, it will be wrapped and therefore | ||||
15906 | # fixed the next time it is formatted. | ||||
15907 | my $i_opening_paren = shift; | ||||
15908 | my $i_opening_minus = $i_opening_paren; | ||||
15909 | my $im1 = $i_opening_paren - 1; | ||||
15910 | my $im2 = $i_opening_paren - 2; | ||||
15911 | my $im3 = $i_opening_paren - 3; | ||||
15912 | my $typem1 = $types_to_go[$im1]; | ||||
15913 | my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; | ||||
15914 | if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { | ||||
15915 | $i_opening_minus = $i_opening_paren; | ||||
15916 | } | ||||
15917 | elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { | ||||
15918 | $i_opening_minus = $im1 if $im1 >= 0; | ||||
15919 | |||||
15920 | # walk back to improve length estimate | ||||
15921 | for ( my $j = $im1 ; $j >= 0 ; $j-- ) { | ||||
15922 | last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); | ||||
15923 | $i_opening_minus = $j; | ||||
15924 | } | ||||
15925 | if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } | ||||
15926 | } | ||||
15927 | elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } | ||||
15928 | elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { | ||||
15929 | $i_opening_minus = $im2; | ||||
15930 | } | ||||
15931 | return $i_opening_minus; | ||||
15932 | } | ||||
15933 | |||||
15934 | { # begin set_comma_breakpoints_do | ||||
15935 | |||||
15936 | 2 | 100ns | my %is_keyword_with_special_leading_term; | ||
15937 | |||||
15938 | # spent 12µs within Perl::Tidy::Formatter::BEGIN@15938 which was called:
# once (12µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 15945 | ||||
15939 | |||||
15940 | # These keywords have prototypes which allow a special leading item | ||||
15941 | # followed by a list | ||||
15942 | 1 | 2µs | @_ = | ||
15943 | qw(formline grep kill map printf sprintf push chmod join pack unshift); | ||||
15944 | 1 | 12µs | @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); | ||
15945 | 1 | 2.92ms | 1 | 12µs | } # spent 12µs making 1 call to Perl::Tidy::Formatter::BEGIN@15938 |
15946 | |||||
15947 | sub set_comma_breakpoints_do { | ||||
15948 | |||||
15949 | # Given a list with some commas, set breakpoints at some of the | ||||
15950 | # commas, if necessary, to make it easy to read. This list is | ||||
15951 | # an example: | ||||
15952 | my ( | ||||
15953 | $depth, $i_opening_paren, $i_closing_paren, | ||||
15954 | $item_count, $identifier_count, $rcomma_index, | ||||
15955 | $next_nonblank_type, $list_type, $interrupted, | ||||
15956 | $rdo_not_break_apart, $must_break_open, | ||||
15957 | ) = @_; | ||||
15958 | |||||
15959 | # nothing to do if no commas seen | ||||
15960 | return if ( $item_count < 1 ); | ||||
15961 | my $i_first_comma = $$rcomma_index[0]; | ||||
15962 | my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; | ||||
15963 | my $i_last_comma = $i_true_last_comma; | ||||
15964 | if ( $i_last_comma >= $max_index_to_go ) { | ||||
15965 | $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; | ||||
15966 | return if ( $item_count < 1 ); | ||||
15967 | } | ||||
15968 | |||||
15969 | #--------------------------------------------------------------- | ||||
15970 | # find lengths of all items in the list to calculate page layout | ||||
15971 | #--------------------------------------------------------------- | ||||
15972 | my $comma_count = $item_count; | ||||
15973 | my @item_lengths; | ||||
15974 | my @i_term_begin; | ||||
15975 | my @i_term_end; | ||||
15976 | my @i_term_comma; | ||||
15977 | my $i_prev_plus; | ||||
15978 | my @max_length = ( 0, 0 ); | ||||
15979 | my $first_term_length; | ||||
15980 | my $i = $i_opening_paren; | ||||
15981 | my $is_odd = 1; | ||||
15982 | |||||
15983 | for ( my $j = 0 ; $j < $comma_count ; $j++ ) { | ||||
15984 | $is_odd = 1 - $is_odd; | ||||
15985 | $i_prev_plus = $i + 1; | ||||
15986 | $i = $$rcomma_index[$j]; | ||||
15987 | |||||
15988 | my $i_term_end = | ||||
15989 | ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; | ||||
15990 | my $i_term_begin = | ||||
15991 | ( $types_to_go[$i_prev_plus] eq 'b' ) | ||||
15992 | ? $i_prev_plus + 1 | ||||
15993 | : $i_prev_plus; | ||||
15994 | push @i_term_begin, $i_term_begin; | ||||
15995 | push @i_term_end, $i_term_end; | ||||
15996 | push @i_term_comma, $i; | ||||
15997 | |||||
15998 | # note: currently adding 2 to all lengths (for comma and space) | ||||
15999 | my $length = | ||||
16000 | 2 + token_sequence_length( $i_term_begin, $i_term_end ); | ||||
16001 | push @item_lengths, $length; | ||||
16002 | |||||
16003 | if ( $j == 0 ) { | ||||
16004 | $first_term_length = $length; | ||||
16005 | } | ||||
16006 | else { | ||||
16007 | |||||
16008 | if ( $length > $max_length[$is_odd] ) { | ||||
16009 | $max_length[$is_odd] = $length; | ||||
16010 | } | ||||
16011 | } | ||||
16012 | } | ||||
16013 | |||||
16014 | # now we have to make a distinction between the comma count and item | ||||
16015 | # count, because the item count will be one greater than the comma | ||||
16016 | # count if the last item is not terminated with a comma | ||||
16017 | my $i_b = | ||||
16018 | ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) | ||||
16019 | ? $i_last_comma + 1 | ||||
16020 | : $i_last_comma; | ||||
16021 | my $i_e = | ||||
16022 | ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) | ||||
16023 | ? $i_closing_paren - 2 | ||||
16024 | : $i_closing_paren - 1; | ||||
16025 | my $i_effective_last_comma = $i_last_comma; | ||||
16026 | |||||
16027 | my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); | ||||
16028 | |||||
16029 | if ( $last_item_length > 0 ) { | ||||
16030 | |||||
16031 | # add 2 to length because other lengths include a comma and a blank | ||||
16032 | $last_item_length += 2; | ||||
16033 | push @item_lengths, $last_item_length; | ||||
16034 | push @i_term_begin, $i_b + 1; | ||||
16035 | push @i_term_end, $i_e; | ||||
16036 | push @i_term_comma, undef; | ||||
16037 | |||||
16038 | my $i_odd = $item_count % 2; | ||||
16039 | |||||
16040 | if ( $last_item_length > $max_length[$i_odd] ) { | ||||
16041 | $max_length[$i_odd] = $last_item_length; | ||||
16042 | } | ||||
16043 | |||||
16044 | $item_count++; | ||||
16045 | $i_effective_last_comma = $i_e + 1; | ||||
16046 | |||||
16047 | if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { | ||||
16048 | $identifier_count++; | ||||
16049 | } | ||||
16050 | } | ||||
16051 | |||||
16052 | #--------------------------------------------------------------- | ||||
16053 | # End of length calculations | ||||
16054 | #--------------------------------------------------------------- | ||||
16055 | |||||
16056 | #--------------------------------------------------------------- | ||||
16057 | # Compound List Rule 1: | ||||
16058 | # Break at (almost) every comma for a list containing a broken | ||||
16059 | # sublist. This has higher priority than the Interrupted List | ||||
16060 | # Rule. | ||||
16061 | #--------------------------------------------------------------- | ||||
16062 | if ( $has_broken_sublist[$depth] ) { | ||||
16063 | |||||
16064 | # Break at every comma except for a comma between two | ||||
16065 | # simple, small terms. This prevents long vertical | ||||
16066 | # columns of, say, just 0's. | ||||
16067 | my $small_length = 10; # 2 + actual maximum length wanted | ||||
16068 | |||||
16069 | # We'll insert a break in long runs of small terms to | ||||
16070 | # allow alignment in uniform tables. | ||||
16071 | my $skipped_count = 0; | ||||
16072 | my $columns = table_columns_available($i_first_comma); | ||||
16073 | my $fields = int( $columns / $small_length ); | ||||
16074 | if ( $rOpts_maximum_fields_per_table | ||||
16075 | && $fields > $rOpts_maximum_fields_per_table ) | ||||
16076 | { | ||||
16077 | $fields = $rOpts_maximum_fields_per_table; | ||||
16078 | } | ||||
16079 | my $max_skipped_count = $fields - 1; | ||||
16080 | |||||
16081 | my $is_simple_last_term = 0; | ||||
16082 | my $is_simple_next_term = 0; | ||||
16083 | foreach my $j ( 0 .. $item_count ) { | ||||
16084 | $is_simple_last_term = $is_simple_next_term; | ||||
16085 | $is_simple_next_term = 0; | ||||
16086 | if ( $j < $item_count | ||||
16087 | && $i_term_end[$j] == $i_term_begin[$j] | ||||
16088 | && $item_lengths[$j] <= $small_length ) | ||||
16089 | { | ||||
16090 | $is_simple_next_term = 1; | ||||
16091 | } | ||||
16092 | next if $j == 0; | ||||
16093 | if ( $is_simple_last_term | ||||
16094 | && $is_simple_next_term | ||||
16095 | && $skipped_count < $max_skipped_count ) | ||||
16096 | { | ||||
16097 | $skipped_count++; | ||||
16098 | } | ||||
16099 | else { | ||||
16100 | $skipped_count = 0; | ||||
16101 | my $i = $i_term_comma[ $j - 1 ]; | ||||
16102 | last unless defined $i; | ||||
16103 | set_forced_breakpoint($i); | ||||
16104 | } | ||||
16105 | } | ||||
16106 | |||||
16107 | # always break at the last comma if this list is | ||||
16108 | # interrupted; we wouldn't want to leave a terminal '{', for | ||||
16109 | # example. | ||||
16110 | if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } | ||||
16111 | return; | ||||
16112 | } | ||||
16113 | |||||
16114 | #my ( $a, $b, $c ) = caller(); | ||||
16115 | #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count | ||||
16116 | #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; | ||||
16117 | #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; | ||||
16118 | |||||
16119 | #--------------------------------------------------------------- | ||||
16120 | # Interrupted List Rule: | ||||
16121 | # A list is forced to use old breakpoints if it was interrupted | ||||
16122 | # by side comments or blank lines, or requested by user. | ||||
16123 | #--------------------------------------------------------------- | ||||
16124 | if ( $rOpts_break_at_old_comma_breakpoints | ||||
16125 | || $interrupted | ||||
16126 | || $i_opening_paren < 0 ) | ||||
16127 | { | ||||
16128 | copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); | ||||
16129 | return; | ||||
16130 | } | ||||
16131 | |||||
16132 | #--------------------------------------------------------------- | ||||
16133 | # Looks like a list of items. We have to look at it and size it up. | ||||
16134 | #--------------------------------------------------------------- | ||||
16135 | |||||
16136 | my $opening_token = $tokens_to_go[$i_opening_paren]; | ||||
16137 | my $opening_environment = | ||||
16138 | $container_environment_to_go[$i_opening_paren]; | ||||
16139 | |||||
16140 | #------------------------------------------------------------------- | ||||
16141 | # Return if this will fit on one line | ||||
16142 | #------------------------------------------------------------------- | ||||
16143 | |||||
16144 | my $i_opening_minus = find_token_starting_list($i_opening_paren); | ||||
16145 | return | ||||
16146 | unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; | ||||
16147 | |||||
16148 | #------------------------------------------------------------------- | ||||
16149 | # Now we know that this block spans multiple lines; we have to set | ||||
16150 | # at least one breakpoint -- real or fake -- as a signal to break | ||||
16151 | # open any outer containers. | ||||
16152 | #------------------------------------------------------------------- | ||||
16153 | set_fake_breakpoint(); | ||||
16154 | |||||
16155 | # be sure we do not extend beyond the current list length | ||||
16156 | if ( $i_effective_last_comma >= $max_index_to_go ) { | ||||
16157 | $i_effective_last_comma = $max_index_to_go - 1; | ||||
16158 | } | ||||
16159 | |||||
16160 | # Set a flag indicating if we need to break open to keep -lp | ||||
16161 | # items aligned. This is necessary if any of the list terms | ||||
16162 | # exceeds the available space after the '('. | ||||
16163 | my $need_lp_break_open = $must_break_open; | ||||
16164 | if ( $rOpts_line_up_parentheses && !$must_break_open ) { | ||||
16165 | my $columns_if_unbroken = | ||||
16166 | maximum_line_length($i_opening_minus) - | ||||
16167 | total_line_length( $i_opening_minus, $i_opening_paren ); | ||||
16168 | $need_lp_break_open = | ||||
16169 | ( $max_length[0] > $columns_if_unbroken ) | ||||
16170 | || ( $max_length[1] > $columns_if_unbroken ) | ||||
16171 | || ( $first_term_length > $columns_if_unbroken ); | ||||
16172 | } | ||||
16173 | |||||
16174 | # Specify if the list must have an even number of fields or not. | ||||
16175 | # It is generally safest to assume an even number, because the | ||||
16176 | # list items might be a hash list. But if we can be sure that | ||||
16177 | # it is not a hash, then we can allow an odd number for more | ||||
16178 | # flexibility. | ||||
16179 | my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count | ||||
16180 | |||||
16181 | if ( $identifier_count >= $item_count - 1 | ||||
16182 | || $is_assignment{$next_nonblank_type} | ||||
16183 | || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) | ||||
16184 | ) | ||||
16185 | { | ||||
16186 | $odd_or_even = 1; | ||||
16187 | } | ||||
16188 | |||||
16189 | # do we have a long first term which should be | ||||
16190 | # left on a line by itself? | ||||
16191 | my $use_separate_first_term = ( | ||||
16192 | $odd_or_even == 1 # only if we can use 1 field/line | ||||
16193 | && $item_count > 3 # need several items | ||||
16194 | && $first_term_length > | ||||
16195 | 2 * $max_length[0] - 2 # need long first term | ||||
16196 | && $first_term_length > | ||||
16197 | 2 * $max_length[1] - 2 # need long first term | ||||
16198 | ); | ||||
16199 | |||||
16200 | # or do we know from the type of list that the first term should | ||||
16201 | # be placed alone? | ||||
16202 | if ( !$use_separate_first_term ) { | ||||
16203 | if ( $is_keyword_with_special_leading_term{$list_type} ) { | ||||
16204 | $use_separate_first_term = 1; | ||||
16205 | |||||
16206 | # should the container be broken open? | ||||
16207 | if ( $item_count < 3 ) { | ||||
16208 | if ( $i_first_comma - $i_opening_paren < 4 ) { | ||||
16209 | $$rdo_not_break_apart = 1; | ||||
16210 | } | ||||
16211 | } | ||||
16212 | elsif ($first_term_length < 20 | ||||
16213 | && $i_first_comma - $i_opening_paren < 4 ) | ||||
16214 | { | ||||
16215 | my $columns = table_columns_available($i_first_comma); | ||||
16216 | if ( $first_term_length < $columns ) { | ||||
16217 | $$rdo_not_break_apart = 1; | ||||
16218 | } | ||||
16219 | } | ||||
16220 | } | ||||
16221 | } | ||||
16222 | |||||
16223 | # if so, | ||||
16224 | if ($use_separate_first_term) { | ||||
16225 | |||||
16226 | # ..set a break and update starting values | ||||
16227 | $use_separate_first_term = 1; | ||||
16228 | set_forced_breakpoint($i_first_comma); | ||||
16229 | $i_opening_paren = $i_first_comma; | ||||
16230 | $i_first_comma = $$rcomma_index[1]; | ||||
16231 | $item_count--; | ||||
16232 | return if $comma_count == 1; | ||||
16233 | shift @item_lengths; | ||||
16234 | shift @i_term_begin; | ||||
16235 | shift @i_term_end; | ||||
16236 | shift @i_term_comma; | ||||
16237 | } | ||||
16238 | |||||
16239 | # if not, update the metrics to include the first term | ||||
16240 | else { | ||||
16241 | if ( $first_term_length > $max_length[0] ) { | ||||
16242 | $max_length[0] = $first_term_length; | ||||
16243 | } | ||||
16244 | } | ||||
16245 | |||||
16246 | # Field width parameters | ||||
16247 | my $pair_width = ( $max_length[0] + $max_length[1] ); | ||||
16248 | my $max_width = | ||||
16249 | ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; | ||||
16250 | |||||
16251 | # Number of free columns across the page width for laying out tables | ||||
16252 | my $columns = table_columns_available($i_first_comma); | ||||
16253 | |||||
16254 | # Estimated maximum number of fields which fit this space | ||||
16255 | # This will be our first guess | ||||
16256 | my $number_of_fields_max = | ||||
16257 | maximum_number_of_fields( $columns, $odd_or_even, $max_width, | ||||
16258 | $pair_width ); | ||||
16259 | my $number_of_fields = $number_of_fields_max; | ||||
16260 | |||||
16261 | # Find the best-looking number of fields | ||||
16262 | # and make this our second guess if possible | ||||
16263 | my ( $number_of_fields_best, $ri_ragged_break_list, | ||||
16264 | $new_identifier_count ) | ||||
16265 | = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, | ||||
16266 | $max_width ); | ||||
16267 | |||||
16268 | if ( $number_of_fields_best != 0 | ||||
16269 | && $number_of_fields_best < $number_of_fields_max ) | ||||
16270 | { | ||||
16271 | $number_of_fields = $number_of_fields_best; | ||||
16272 | } | ||||
16273 | |||||
16274 | # ---------------------------------------------------------------------- | ||||
16275 | # If we are crowded and the -lp option is being used, try to | ||||
16276 | # undo some indentation | ||||
16277 | # ---------------------------------------------------------------------- | ||||
16278 | if ( | ||||
16279 | $rOpts_line_up_parentheses | ||||
16280 | && ( | ||||
16281 | $number_of_fields == 0 | ||||
16282 | || ( $number_of_fields == 1 | ||||
16283 | && $number_of_fields != $number_of_fields_best ) | ||||
16284 | ) | ||||
16285 | ) | ||||
16286 | { | ||||
16287 | my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); | ||||
16288 | if ( $available_spaces > 0 ) { | ||||
16289 | |||||
16290 | my $spaces_wanted = $max_width - $columns; # for 1 field | ||||
16291 | |||||
16292 | if ( $number_of_fields_best == 0 ) { | ||||
16293 | $number_of_fields_best = | ||||
16294 | get_maximum_fields_wanted( \@item_lengths ); | ||||
16295 | } | ||||
16296 | |||||
16297 | if ( $number_of_fields_best != 1 ) { | ||||
16298 | my $spaces_wanted_2 = | ||||
16299 | 1 + $pair_width - $columns; # for 2 fields | ||||
16300 | if ( $available_spaces > $spaces_wanted_2 ) { | ||||
16301 | $spaces_wanted = $spaces_wanted_2; | ||||
16302 | } | ||||
16303 | } | ||||
16304 | |||||
16305 | if ( $spaces_wanted > 0 ) { | ||||
16306 | my $deleted_spaces = | ||||
16307 | reduce_lp_indentation( $i_first_comma, $spaces_wanted ); | ||||
16308 | |||||
16309 | # redo the math | ||||
16310 | if ( $deleted_spaces > 0 ) { | ||||
16311 | $columns = table_columns_available($i_first_comma); | ||||
16312 | $number_of_fields_max = | ||||
16313 | maximum_number_of_fields( $columns, $odd_or_even, | ||||
16314 | $max_width, $pair_width ); | ||||
16315 | $number_of_fields = $number_of_fields_max; | ||||
16316 | |||||
16317 | if ( $number_of_fields_best == 1 | ||||
16318 | && $number_of_fields >= 1 ) | ||||
16319 | { | ||||
16320 | $number_of_fields = $number_of_fields_best; | ||||
16321 | } | ||||
16322 | } | ||||
16323 | } | ||||
16324 | } | ||||
16325 | } | ||||
16326 | |||||
16327 | # try for one column if two won't work | ||||
16328 | if ( $number_of_fields <= 0 ) { | ||||
16329 | $number_of_fields = int( $columns / $max_width ); | ||||
16330 | } | ||||
16331 | |||||
16332 | # The user can place an upper bound on the number of fields, | ||||
16333 | # which can be useful for doing maintenance on tables | ||||
16334 | if ( $rOpts_maximum_fields_per_table | ||||
16335 | && $number_of_fields > $rOpts_maximum_fields_per_table ) | ||||
16336 | { | ||||
16337 | $number_of_fields = $rOpts_maximum_fields_per_table; | ||||
16338 | } | ||||
16339 | |||||
16340 | # How many columns (characters) and lines would this container take | ||||
16341 | # if no additional whitespace were added? | ||||
16342 | my $packed_columns = token_sequence_length( $i_opening_paren + 1, | ||||
16343 | $i_effective_last_comma + 1 ); | ||||
16344 | if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero | ||||
16345 | my $packed_lines = 1 + int( $packed_columns / $columns ); | ||||
16346 | |||||
16347 | # are we an item contained in an outer list? | ||||
16348 | my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; | ||||
16349 | |||||
16350 | if ( $number_of_fields <= 0 ) { | ||||
16351 | |||||
16352 | # #--------------------------------------------------------------- | ||||
16353 | # # We're in trouble. We can't find a single field width that works. | ||||
16354 | # # There is no simple answer here; we may have a single long list | ||||
16355 | # # item, or many. | ||||
16356 | # #--------------------------------------------------------------- | ||||
16357 | # | ||||
16358 | # In many cases, it may be best to not force a break if there is just one | ||||
16359 | # comma, because the standard continuation break logic will do a better | ||||
16360 | # job without it. | ||||
16361 | # | ||||
16362 | # In the common case that all but one of the terms can fit | ||||
16363 | # on a single line, it may look better not to break open the | ||||
16364 | # containing parens. Consider, for example | ||||
16365 | # | ||||
16366 | # $color = | ||||
16367 | # join ( '/', | ||||
16368 | # sort { $color_value{$::a} <=> $color_value{$::b}; } | ||||
16369 | # keys %colors ); | ||||
16370 | # | ||||
16371 | # which will look like this with the container broken: | ||||
16372 | # | ||||
16373 | # $color = join ( | ||||
16374 | # '/', | ||||
16375 | # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors | ||||
16376 | # ); | ||||
16377 | # | ||||
16378 | # Here is an example of this rule for a long last term: | ||||
16379 | # | ||||
16380 | # log_message( 0, 256, 128, | ||||
16381 | # "Number of routes in adj-RIB-in to be considered: $peercount" ); | ||||
16382 | # | ||||
16383 | # And here is an example with a long first term: | ||||
16384 | # | ||||
16385 | # $s = sprintf( | ||||
16386 | # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", | ||||
16387 | # $r, $pu, $ps, $cu, $cs, $tt | ||||
16388 | # ) | ||||
16389 | # if $style eq 'all'; | ||||
16390 | |||||
16391 | my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; | ||||
16392 | my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; | ||||
16393 | my $long_first_term = | ||||
16394 | excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; | ||||
16395 | |||||
16396 | # break at every comma ... | ||||
16397 | if ( | ||||
16398 | |||||
16399 | # if requested by user or is best looking | ||||
16400 | $number_of_fields_best == 1 | ||||
16401 | |||||
16402 | # or if this is a sublist of a larger list | ||||
16403 | || $in_hierarchical_list | ||||
16404 | |||||
16405 | # or if multiple commas and we don't have a long first or last | ||||
16406 | # term | ||||
16407 | || ( $comma_count > 1 | ||||
16408 | && !( $long_last_term || $long_first_term ) ) | ||||
16409 | ) | ||||
16410 | { | ||||
16411 | foreach ( 0 .. $comma_count - 1 ) { | ||||
16412 | set_forced_breakpoint( $$rcomma_index[$_] ); | ||||
16413 | } | ||||
16414 | } | ||||
16415 | elsif ($long_last_term) { | ||||
16416 | |||||
16417 | set_forced_breakpoint($i_last_comma); | ||||
16418 | $$rdo_not_break_apart = 1 unless $must_break_open; | ||||
16419 | } | ||||
16420 | elsif ($long_first_term) { | ||||
16421 | |||||
16422 | set_forced_breakpoint($i_first_comma); | ||||
16423 | } | ||||
16424 | else { | ||||
16425 | |||||
16426 | # let breaks be defined by default bond strength logic | ||||
16427 | } | ||||
16428 | return; | ||||
16429 | } | ||||
16430 | |||||
16431 | # -------------------------------------------------------- | ||||
16432 | # We have a tentative field count that seems to work. | ||||
16433 | # How many lines will this require? | ||||
16434 | # -------------------------------------------------------- | ||||
16435 | my $formatted_lines = $item_count / ($number_of_fields); | ||||
16436 | if ( $formatted_lines != int $formatted_lines ) { | ||||
16437 | $formatted_lines = 1 + int $formatted_lines; | ||||
16438 | } | ||||
16439 | |||||
16440 | # So far we've been trying to fill out to the right margin. But | ||||
16441 | # compact tables are easier to read, so let's see if we can use fewer | ||||
16442 | # fields without increasing the number of lines. | ||||
16443 | $number_of_fields = | ||||
16444 | compactify_table( $item_count, $number_of_fields, $formatted_lines, | ||||
16445 | $odd_or_even ); | ||||
16446 | |||||
16447 | # How many spaces across the page will we fill? | ||||
16448 | my $columns_per_line = | ||||
16449 | ( int $number_of_fields / 2 ) * $pair_width + | ||||
16450 | ( $number_of_fields % 2 ) * $max_width; | ||||
16451 | |||||
16452 | my $formatted_columns; | ||||
16453 | |||||
16454 | if ( $number_of_fields > 1 ) { | ||||
16455 | $formatted_columns = | ||||
16456 | ( $pair_width * ( int( $item_count / 2 ) ) + | ||||
16457 | ( $item_count % 2 ) * $max_width ); | ||||
16458 | } | ||||
16459 | else { | ||||
16460 | $formatted_columns = $max_width * $item_count; | ||||
16461 | } | ||||
16462 | if ( $formatted_columns < $packed_columns ) { | ||||
16463 | $formatted_columns = $packed_columns; | ||||
16464 | } | ||||
16465 | |||||
16466 | my $unused_columns = $formatted_columns - $packed_columns; | ||||
16467 | |||||
16468 | # set some empirical parameters to help decide if we should try to | ||||
16469 | # align; high sparsity does not look good, especially with few lines | ||||
16470 | my $sparsity = ($unused_columns) / ($formatted_columns); | ||||
16471 | my $max_allowed_sparsity = | ||||
16472 | ( $item_count < 3 ) ? 0.1 | ||||
16473 | : ( $packed_lines == 1 ) ? 0.15 | ||||
16474 | : ( $packed_lines == 2 ) ? 0.4 | ||||
16475 | : 0.7; | ||||
16476 | |||||
16477 | # Begin check for shortcut methods, which avoid treating a list | ||||
16478 | # as a table for relatively small parenthesized lists. These | ||||
16479 | # are usually easier to read if not formatted as tables. | ||||
16480 | if ( | ||||
16481 | $packed_lines <= 2 # probably can fit in 2 lines | ||||
16482 | && $item_count < 9 # doesn't have too many items | ||||
16483 | && $opening_environment eq 'BLOCK' # not a sub-container | ||||
16484 | && $opening_token eq '(' # is paren list | ||||
16485 | ) | ||||
16486 | { | ||||
16487 | |||||
16488 | # Shortcut method 1: for -lp and just one comma: | ||||
16489 | # This is a no-brainer, just break at the comma. | ||||
16490 | if ( | ||||
16491 | $rOpts_line_up_parentheses # -lp | ||||
16492 | && $item_count == 2 # two items, one comma | ||||
16493 | && !$must_break_open | ||||
16494 | ) | ||||
16495 | { | ||||
16496 | my $i_break = $$rcomma_index[0]; | ||||
16497 | set_forced_breakpoint($i_break); | ||||
16498 | $$rdo_not_break_apart = 1; | ||||
16499 | set_non_alignment_flags( $comma_count, $rcomma_index ); | ||||
16500 | return; | ||||
16501 | |||||
16502 | } | ||||
16503 | |||||
16504 | # method 2 is for most small ragged lists which might look | ||||
16505 | # best if not displayed as a table. | ||||
16506 | if ( | ||||
16507 | ( $number_of_fields == 2 && $item_count == 3 ) | ||||
16508 | || ( | ||||
16509 | $new_identifier_count > 0 # isn't all quotes | ||||
16510 | && $sparsity > 0.15 | ||||
16511 | ) # would be fairly spaced gaps if aligned | ||||
16512 | ) | ||||
16513 | { | ||||
16514 | |||||
16515 | my $break_count = set_ragged_breakpoints( \@i_term_comma, | ||||
16516 | $ri_ragged_break_list ); | ||||
16517 | ++$break_count if ($use_separate_first_term); | ||||
16518 | |||||
16519 | # NOTE: we should really use the true break count here, | ||||
16520 | # which can be greater if there are large terms and | ||||
16521 | # little space, but usually this will work well enough. | ||||
16522 | unless ($must_break_open) { | ||||
16523 | |||||
16524 | if ( $break_count <= 1 ) { | ||||
16525 | $$rdo_not_break_apart = 1; | ||||
16526 | } | ||||
16527 | elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) | ||||
16528 | { | ||||
16529 | $$rdo_not_break_apart = 1; | ||||
16530 | } | ||||
16531 | } | ||||
16532 | set_non_alignment_flags( $comma_count, $rcomma_index ); | ||||
16533 | return; | ||||
16534 | } | ||||
16535 | |||||
16536 | } # end shortcut methods | ||||
16537 | |||||
16538 | # debug stuff | ||||
16539 | |||||
16540 | FORMATTER_DEBUG_FLAG_SPARSE && do { | ||||
16541 | print STDOUT | ||||
16542 | "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; | ||||
16543 | |||||
16544 | }; | ||||
16545 | |||||
16546 | #--------------------------------------------------------------- | ||||
16547 | # Compound List Rule 2: | ||||
16548 | # If this list is too long for one line, and it is an item of a | ||||
16549 | # larger list, then we must format it, regardless of sparsity | ||||
16550 | # (ian.t). One reason that we have to do this is to trigger | ||||
16551 | # Compound List Rule 1, above, which causes breaks at all commas of | ||||
16552 | # all outer lists. In this way, the structure will be properly | ||||
16553 | # displayed. | ||||
16554 | #--------------------------------------------------------------- | ||||
16555 | |||||
16556 | # Decide if this list is too long for one line unless broken | ||||
16557 | my $total_columns = table_columns_available($i_opening_paren); | ||||
16558 | my $too_long = $packed_columns > $total_columns; | ||||
16559 | |||||
16560 | # For a paren list, include the length of the token just before the | ||||
16561 | # '(' because this is likely a sub call, and we would have to | ||||
16562 | # include the sub name on the same line as the list. This is still | ||||
16563 | # imprecise, but not too bad. (steve.t) | ||||
16564 | if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { | ||||
16565 | |||||
16566 | $too_long = excess_line_length( $i_opening_minus, | ||||
16567 | $i_effective_last_comma + 1 ) > 0; | ||||
16568 | } | ||||
16569 | |||||
16570 | # FIXME: For an item after a '=>', try to include the length of the | ||||
16571 | # thing before the '=>'. This is crude and should be improved by | ||||
16572 | # actually looking back token by token. | ||||
16573 | if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { | ||||
16574 | my $i_opening_minus = $i_opening_paren - 4; | ||||
16575 | if ( $i_opening_minus >= 0 ) { | ||||
16576 | $too_long = excess_line_length( $i_opening_minus, | ||||
16577 | $i_effective_last_comma + 1 ) > 0; | ||||
16578 | } | ||||
16579 | } | ||||
16580 | |||||
16581 | # Always break lists contained in '[' and '{' if too long for 1 line, | ||||
16582 | # and always break lists which are too long and part of a more complex | ||||
16583 | # structure. | ||||
16584 | my $must_break_open_container = $must_break_open | ||||
16585 | || ( $too_long | ||||
16586 | && ( $in_hierarchical_list || $opening_token ne '(' ) ); | ||||
16587 | |||||
16588 | #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; | ||||
16589 | |||||
16590 | #--------------------------------------------------------------- | ||||
16591 | # The main decision: | ||||
16592 | # Now decide if we will align the data into aligned columns. Do not | ||||
16593 | # attempt to align columns if this is a tiny table or it would be | ||||
16594 | # too spaced. It seems that the more packed lines we have, the | ||||
16595 | # sparser the list that can be allowed and still look ok. | ||||
16596 | #--------------------------------------------------------------- | ||||
16597 | |||||
16598 | if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) | ||||
16599 | || ( $formatted_lines < 2 ) | ||||
16600 | || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) | ||||
16601 | ) | ||||
16602 | { | ||||
16603 | |||||
16604 | #--------------------------------------------------------------- | ||||
16605 | # too sparse: would look ugly if aligned in a table; | ||||
16606 | #--------------------------------------------------------------- | ||||
16607 | |||||
16608 | # use old breakpoints if this is a 'big' list | ||||
16609 | # FIXME: goal is to improve set_ragged_breakpoints so that | ||||
16610 | # this is not necessary. | ||||
16611 | if ( $packed_lines > 2 && $item_count > 10 ) { | ||||
16612 | write_logfile_entry("List sparse: using old breakpoints\n"); | ||||
16613 | copy_old_breakpoints( $i_first_comma, $i_last_comma ); | ||||
16614 | } | ||||
16615 | |||||
16616 | # let the continuation logic handle it if 2 lines | ||||
16617 | else { | ||||
16618 | |||||
16619 | my $break_count = set_ragged_breakpoints( \@i_term_comma, | ||||
16620 | $ri_ragged_break_list ); | ||||
16621 | ++$break_count if ($use_separate_first_term); | ||||
16622 | |||||
16623 | unless ($must_break_open_container) { | ||||
16624 | if ( $break_count <= 1 ) { | ||||
16625 | $$rdo_not_break_apart = 1; | ||||
16626 | } | ||||
16627 | elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) | ||||
16628 | { | ||||
16629 | $$rdo_not_break_apart = 1; | ||||
16630 | } | ||||
16631 | } | ||||
16632 | set_non_alignment_flags( $comma_count, $rcomma_index ); | ||||
16633 | } | ||||
16634 | return; | ||||
16635 | } | ||||
16636 | |||||
16637 | #--------------------------------------------------------------- | ||||
16638 | # go ahead and format as a table | ||||
16639 | #--------------------------------------------------------------- | ||||
16640 | write_logfile_entry( | ||||
16641 | "List: auto formatting with $number_of_fields fields/row\n"); | ||||
16642 | |||||
16643 | my $j_first_break = | ||||
16644 | $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; | ||||
16645 | |||||
16646 | for ( | ||||
16647 | my $j = $j_first_break ; | ||||
16648 | $j < $comma_count ; | ||||
16649 | $j += $number_of_fields | ||||
16650 | ) | ||||
16651 | { | ||||
16652 | my $i = $$rcomma_index[$j]; | ||||
16653 | set_forced_breakpoint($i); | ||||
16654 | } | ||||
16655 | return; | ||||
16656 | } | ||||
16657 | } | ||||
16658 | |||||
16659 | sub set_non_alignment_flags { | ||||
16660 | |||||
16661 | # set flag which indicates that these commas should not be | ||||
16662 | # aligned | ||||
16663 | my ( $comma_count, $rcomma_index ) = @_; | ||||
16664 | foreach ( 0 .. $comma_count - 1 ) { | ||||
16665 | $matching_token_to_go[ $$rcomma_index[$_] ] = 1; | ||||
16666 | } | ||||
16667 | } | ||||
16668 | |||||
16669 | sub study_list_complexity { | ||||
16670 | |||||
16671 | # Look for complex tables which should be formatted with one term per line. | ||||
16672 | # Returns the following: | ||||
16673 | # | ||||
16674 | # \@i_ragged_break_list = list of good breakpoints to avoid lines | ||||
16675 | # which are hard to read | ||||
16676 | # $number_of_fields_best = suggested number of fields based on | ||||
16677 | # complexity; = 0 if any number may be used. | ||||
16678 | # | ||||
16679 | my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; | ||||
16680 | my $item_count = @{$ri_term_begin}; | ||||
16681 | my $complex_item_count = 0; | ||||
16682 | my $number_of_fields_best = $rOpts_maximum_fields_per_table; | ||||
16683 | my $i_max = @{$ritem_lengths} - 1; | ||||
16684 | ##my @item_complexity; | ||||
16685 | |||||
16686 | my $i_last_last_break = -3; | ||||
16687 | my $i_last_break = -2; | ||||
16688 | my @i_ragged_break_list; | ||||
16689 | |||||
16690 | my $definitely_complex = 30; | ||||
16691 | my $definitely_simple = 12; | ||||
16692 | my $quote_count = 0; | ||||
16693 | |||||
16694 | for my $i ( 0 .. $i_max ) { | ||||
16695 | my $ib = $ri_term_begin->[$i]; | ||||
16696 | my $ie = $ri_term_end->[$i]; | ||||
16697 | |||||
16698 | # define complexity: start with the actual term length | ||||
16699 | my $weighted_length = ( $ritem_lengths->[$i] - 2 ); | ||||
16700 | |||||
16701 | ##TBD: join types here and check for variations | ||||
16702 | ##my $str=join "", @tokens_to_go[$ib..$ie]; | ||||
16703 | |||||
16704 | my $is_quote = 0; | ||||
16705 | if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { | ||||
16706 | $is_quote = 1; | ||||
16707 | $quote_count++; | ||||
16708 | } | ||||
16709 | elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { | ||||
16710 | $quote_count++; | ||||
16711 | } | ||||
16712 | |||||
16713 | if ( $ib eq $ie ) { | ||||
16714 | if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { | ||||
16715 | $complex_item_count++; | ||||
16716 | $weighted_length *= 2; | ||||
16717 | } | ||||
16718 | else { | ||||
16719 | } | ||||
16720 | } | ||||
16721 | else { | ||||
16722 | if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { | ||||
16723 | $complex_item_count++; | ||||
16724 | $weighted_length *= 2; | ||||
16725 | } | ||||
16726 | if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { | ||||
16727 | $weighted_length += 4; | ||||
16728 | } | ||||
16729 | } | ||||
16730 | |||||
16731 | # add weight for extra tokens. | ||||
16732 | $weighted_length += 2 * ( $ie - $ib ); | ||||
16733 | |||||
16734 | ## my $BUB = join '', @tokens_to_go[$ib..$ie]; | ||||
16735 | ## print "# COMPLEXITY:$weighted_length $BUB\n"; | ||||
16736 | |||||
16737 | ##push @item_complexity, $weighted_length; | ||||
16738 | |||||
16739 | # now mark a ragged break after this item it if it is 'long and | ||||
16740 | # complex': | ||||
16741 | if ( $weighted_length >= $definitely_complex ) { | ||||
16742 | |||||
16743 | # if we broke after the previous term | ||||
16744 | # then break before it too | ||||
16745 | if ( $i_last_break == $i - 1 | ||||
16746 | && $i > 1 | ||||
16747 | && $i_last_last_break != $i - 2 ) | ||||
16748 | { | ||||
16749 | |||||
16750 | ## FIXME: don't strand a small term | ||||
16751 | pop @i_ragged_break_list; | ||||
16752 | push @i_ragged_break_list, $i - 2; | ||||
16753 | push @i_ragged_break_list, $i - 1; | ||||
16754 | } | ||||
16755 | |||||
16756 | push @i_ragged_break_list, $i; | ||||
16757 | $i_last_last_break = $i_last_break; | ||||
16758 | $i_last_break = $i; | ||||
16759 | } | ||||
16760 | |||||
16761 | # don't break before a small last term -- it will | ||||
16762 | # not look good on a line by itself. | ||||
16763 | elsif ($i == $i_max | ||||
16764 | && $i_last_break == $i - 1 | ||||
16765 | && $weighted_length <= $definitely_simple ) | ||||
16766 | { | ||||
16767 | pop @i_ragged_break_list; | ||||
16768 | } | ||||
16769 | } | ||||
16770 | |||||
16771 | my $identifier_count = $i_max + 1 - $quote_count; | ||||
16772 | |||||
16773 | # Need more tuning here.. | ||||
16774 | if ( $max_width > 12 | ||||
16775 | && $complex_item_count > $item_count / 2 | ||||
16776 | && $number_of_fields_best != 2 ) | ||||
16777 | { | ||||
16778 | $number_of_fields_best = 1; | ||||
16779 | } | ||||
16780 | |||||
16781 | return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); | ||||
16782 | } | ||||
16783 | |||||
16784 | sub get_maximum_fields_wanted { | ||||
16785 | |||||
16786 | # Not all tables look good with more than one field of items. | ||||
16787 | # This routine looks at a table and decides if it should be | ||||
16788 | # formatted with just one field or not. | ||||
16789 | # This coding is still under development. | ||||
16790 | my ($ritem_lengths) = @_; | ||||
16791 | |||||
16792 | my $number_of_fields_best = 0; | ||||
16793 | |||||
16794 | # For just a few items, we tentatively assume just 1 field. | ||||
16795 | my $item_count = @{$ritem_lengths}; | ||||
16796 | if ( $item_count <= 5 ) { | ||||
16797 | $number_of_fields_best = 1; | ||||
16798 | } | ||||
16799 | |||||
16800 | # For larger tables, look at it both ways and see what looks best | ||||
16801 | else { | ||||
16802 | |||||
16803 | my $is_odd = 1; | ||||
16804 | my @max_length = ( 0, 0 ); | ||||
16805 | my @last_length_2 = ( undef, undef ); | ||||
16806 | my @first_length_2 = ( undef, undef ); | ||||
16807 | my $last_length = undef; | ||||
16808 | my $total_variation_1 = 0; | ||||
16809 | my $total_variation_2 = 0; | ||||
16810 | my @total_variation_2 = ( 0, 0 ); | ||||
16811 | for ( my $j = 0 ; $j < $item_count ; $j++ ) { | ||||
16812 | |||||
16813 | $is_odd = 1 - $is_odd; | ||||
16814 | my $length = $ritem_lengths->[$j]; | ||||
16815 | if ( $length > $max_length[$is_odd] ) { | ||||
16816 | $max_length[$is_odd] = $length; | ||||
16817 | } | ||||
16818 | |||||
16819 | if ( defined($last_length) ) { | ||||
16820 | my $dl = abs( $length - $last_length ); | ||||
16821 | $total_variation_1 += $dl; | ||||
16822 | } | ||||
16823 | $last_length = $length; | ||||
16824 | |||||
16825 | my $ll = $last_length_2[$is_odd]; | ||||
16826 | if ( defined($ll) ) { | ||||
16827 | my $dl = abs( $length - $ll ); | ||||
16828 | $total_variation_2[$is_odd] += $dl; | ||||
16829 | } | ||||
16830 | else { | ||||
16831 | $first_length_2[$is_odd] = $length; | ||||
16832 | } | ||||
16833 | $last_length_2[$is_odd] = $length; | ||||
16834 | } | ||||
16835 | $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; | ||||
16836 | |||||
16837 | my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; | ||||
16838 | unless ( $total_variation_2 < $factor * $total_variation_1 ) { | ||||
16839 | $number_of_fields_best = 1; | ||||
16840 | } | ||||
16841 | } | ||||
16842 | return ($number_of_fields_best); | ||||
16843 | } | ||||
16844 | |||||
16845 | sub table_columns_available { | ||||
16846 | my $i_first_comma = shift; | ||||
16847 | my $columns = | ||||
16848 | maximum_line_length($i_first_comma) - | ||||
16849 | leading_spaces_to_go($i_first_comma); | ||||
16850 | |||||
16851 | # Patch: the vertical formatter does not line up lines whose lengths | ||||
16852 | # exactly equal the available line length because of allowances | ||||
16853 | # that must be made for side comments. Therefore, the number of | ||||
16854 | # available columns is reduced by 1 character. | ||||
16855 | $columns -= 1; | ||||
16856 | return $columns; | ||||
16857 | } | ||||
16858 | |||||
16859 | sub maximum_number_of_fields { | ||||
16860 | |||||
16861 | # how many fields will fit in the available space? | ||||
16862 | my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; | ||||
16863 | my $max_pairs = int( $columns / $pair_width ); | ||||
16864 | my $number_of_fields = $max_pairs * 2; | ||||
16865 | if ( $odd_or_even == 1 | ||||
16866 | && $max_pairs * $pair_width + $max_width <= $columns ) | ||||
16867 | { | ||||
16868 | $number_of_fields++; | ||||
16869 | } | ||||
16870 | return $number_of_fields; | ||||
16871 | } | ||||
16872 | |||||
16873 | sub compactify_table { | ||||
16874 | |||||
16875 | # given a table with a certain number of fields and a certain number | ||||
16876 | # of lines, see if reducing the number of fields will make it look | ||||
16877 | # better. | ||||
16878 | my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; | ||||
16879 | if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { | ||||
16880 | my $min_fields; | ||||
16881 | |||||
16882 | for ( | ||||
16883 | $min_fields = $number_of_fields ; | ||||
16884 | $min_fields >= $odd_or_even | ||||
16885 | && $min_fields * $formatted_lines >= $item_count ; | ||||
16886 | $min_fields -= $odd_or_even | ||||
16887 | ) | ||||
16888 | { | ||||
16889 | $number_of_fields = $min_fields; | ||||
16890 | } | ||||
16891 | } | ||||
16892 | return $number_of_fields; | ||||
16893 | } | ||||
16894 | |||||
16895 | sub set_ragged_breakpoints { | ||||
16896 | |||||
16897 | # Set breakpoints in a list that cannot be formatted nicely as a | ||||
16898 | # table. | ||||
16899 | my ( $ri_term_comma, $ri_ragged_break_list ) = @_; | ||||
16900 | |||||
16901 | my $break_count = 0; | ||||
16902 | foreach (@$ri_ragged_break_list) { | ||||
16903 | my $j = $ri_term_comma->[$_]; | ||||
16904 | if ($j) { | ||||
16905 | set_forced_breakpoint($j); | ||||
16906 | $break_count++; | ||||
16907 | } | ||||
16908 | } | ||||
16909 | return $break_count; | ||||
16910 | } | ||||
16911 | |||||
16912 | sub copy_old_breakpoints { | ||||
16913 | my ( $i_first_comma, $i_last_comma ) = @_; | ||||
16914 | for my $i ( $i_first_comma .. $i_last_comma ) { | ||||
16915 | if ( $old_breakpoint_to_go[$i] ) { | ||||
16916 | set_forced_breakpoint($i); | ||||
16917 | } | ||||
16918 | } | ||||
16919 | } | ||||
16920 | |||||
16921 | sub set_nobreaks { | ||||
16922 | my ( $i, $j ) = @_; | ||||
16923 | if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { | ||||
16924 | |||||
16925 | FORMATTER_DEBUG_FLAG_NOBREAK && do { | ||||
16926 | my ( $a, $b, $c ) = caller(); | ||||
16927 | print STDOUT | ||||
16928 | "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; | ||||
16929 | }; | ||||
16930 | |||||
16931 | @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); | ||||
16932 | } | ||||
16933 | |||||
16934 | # shouldn't happen; non-critical error | ||||
16935 | else { | ||||
16936 | FORMATTER_DEBUG_FLAG_NOBREAK && do { | ||||
16937 | my ( $a, $b, $c ) = caller(); | ||||
16938 | print STDOUT | ||||
16939 | "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; | ||||
16940 | }; | ||||
16941 | } | ||||
16942 | } | ||||
16943 | |||||
16944 | sub set_fake_breakpoint { | ||||
16945 | |||||
16946 | # Just bump up the breakpoint count as a signal that there are breaks. | ||||
16947 | # This is useful if we have breaks but may want to postpone deciding where | ||||
16948 | # to make them. | ||||
16949 | $forced_breakpoint_count++; | ||||
16950 | } | ||||
16951 | |||||
16952 | sub set_forced_breakpoint { | ||||
16953 | my $i = shift; | ||||
16954 | |||||
16955 | return unless defined $i && $i >= 0; | ||||
16956 | |||||
16957 | # when called with certain tokens, use bond strengths to decide | ||||
16958 | # if we break before or after it | ||||
16959 | my $token = $tokens_to_go[$i]; | ||||
16960 | |||||
16961 | if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { | ||||
16962 | if ( $want_break_before{$token} && $i >= 0 ) { $i-- } | ||||
16963 | } | ||||
16964 | |||||
16965 | # breaks are forced before 'if' and 'unless' | ||||
16966 | elsif ( $is_if_unless{$token} ) { $i-- } | ||||
16967 | |||||
16968 | if ( $i >= 0 && $i <= $max_index_to_go ) { | ||||
16969 | my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; | ||||
16970 | |||||
16971 | FORMATTER_DEBUG_FLAG_FORCE && do { | ||||
16972 | my ( $a, $b, $c ) = caller(); | ||||
16973 | print STDOUT | ||||
16974 | "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; | ||||
16975 | }; | ||||
16976 | |||||
16977 | if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { | ||||
16978 | $forced_breakpoint_to_go[$i_nonblank] = 1; | ||||
16979 | |||||
16980 | if ( $i_nonblank > $index_max_forced_break ) { | ||||
16981 | $index_max_forced_break = $i_nonblank; | ||||
16982 | } | ||||
16983 | $forced_breakpoint_count++; | ||||
16984 | $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = | ||||
16985 | $i_nonblank; | ||||
16986 | |||||
16987 | # if we break at an opening container..break at the closing | ||||
16988 | if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { | ||||
16989 | set_closing_breakpoint($i_nonblank); | ||||
16990 | } | ||||
16991 | } | ||||
16992 | } | ||||
16993 | } | ||||
16994 | |||||
16995 | sub clear_breakpoint_undo_stack { | ||||
16996 | $forced_breakpoint_undo_count = 0; | ||||
16997 | } | ||||
16998 | |||||
16999 | sub undo_forced_breakpoint_stack { | ||||
17000 | |||||
17001 | my $i_start = shift; | ||||
17002 | if ( $i_start < 0 ) { | ||||
17003 | $i_start = 0; | ||||
17004 | my ( $a, $b, $c ) = caller(); | ||||
17005 | warning( | ||||
17006 | "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " | ||||
17007 | ); | ||||
17008 | } | ||||
17009 | |||||
17010 | while ( $forced_breakpoint_undo_count > $i_start ) { | ||||
17011 | my $i = | ||||
17012 | $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; | ||||
17013 | if ( $i >= 0 && $i <= $max_index_to_go ) { | ||||
17014 | $forced_breakpoint_to_go[$i] = 0; | ||||
17015 | $forced_breakpoint_count--; | ||||
17016 | |||||
17017 | FORMATTER_DEBUG_FLAG_UNDOBP && do { | ||||
17018 | my ( $a, $b, $c ) = caller(); | ||||
17019 | print STDOUT | ||||
17020 | "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; | ||||
17021 | }; | ||||
17022 | } | ||||
17023 | |||||
17024 | # shouldn't happen, but not a critical error | ||||
17025 | else { | ||||
17026 | FORMATTER_DEBUG_FLAG_UNDOBP && do { | ||||
17027 | my ( $a, $b, $c ) = caller(); | ||||
17028 | print STDOUT | ||||
17029 | "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; | ||||
17030 | }; | ||||
17031 | } | ||||
17032 | } | ||||
17033 | } | ||||
17034 | |||||
17035 | { # begin recombine_breakpoints | ||||
17036 | |||||
17037 | 2 | 100ns | my %is_amp_amp; | ||
17038 | 1 | 0s | my %is_ternary; | ||
17039 | 1 | 0s | my %is_math_op; | ||
17040 | 1 | 0s | my %is_plus_minus; | ||
17041 | 1 | 300ns | my %is_mult_div; | ||
17042 | |||||
17043 | # spent 15µs within Perl::Tidy::Formatter::BEGIN@17043 which was called:
# once (15µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 17059 | ||||
17044 | |||||
17045 | 1 | 1µs | @_ = qw( && || ); | ||
17046 | 1 | 2µs | @is_amp_amp{@_} = (1) x scalar(@_); | ||
17047 | |||||
17048 | 1 | 1µs | @_ = qw( ? : ); | ||
17049 | 1 | 900ns | @is_ternary{@_} = (1) x scalar(@_); | ||
17050 | |||||
17051 | 1 | 1µs | @_ = qw( + - * / ); | ||
17052 | 1 | 1µs | @is_math_op{@_} = (1) x scalar(@_); | ||
17053 | |||||
17054 | 1 | 900ns | @_ = qw( + - ); | ||
17055 | 1 | 700ns | @is_plus_minus{@_} = (1) x scalar(@_); | ||
17056 | |||||
17057 | 1 | 600ns | @_ = qw( * / ); | ||
17058 | 1 | 7µs | @is_mult_div{@_} = (1) x scalar(@_); | ||
17059 | 1 | 4.57ms | 1 | 15µs | } # spent 15µs making 1 call to Perl::Tidy::Formatter::BEGIN@17043 |
17060 | |||||
17061 | sub recombine_breakpoints { | ||||
17062 | |||||
17063 | # sub set_continuation_breaks is very liberal in setting line breaks | ||||
17064 | # for long lines, always setting breaks at good breakpoints, even | ||||
17065 | # when that creates small lines. Sometimes small line fragments | ||||
17066 | # are produced which would look better if they were combined. | ||||
17067 | # That's the task of this routine. | ||||
17068 | # | ||||
17069 | # We are given indexes to the current lines: | ||||
17070 | # $ri_beg = ref to array of BEGinning indexes of each line | ||||
17071 | # $ri_end = ref to array of ENDing indexes of each line | ||||
17072 | my ( $ri_beg, $ri_end ) = @_; | ||||
17073 | |||||
17074 | # Make a list of all good joining tokens between the lines | ||||
17075 | # n-1 and n. | ||||
17076 | my @joint; | ||||
17077 | my $nmax = @$ri_end - 1; | ||||
17078 | for my $n ( 1 .. $nmax ) { | ||||
17079 | my $ibeg_1 = $$ri_beg[ $n - 1 ]; | ||||
17080 | my $iend_1 = $$ri_end[ $n - 1 ]; | ||||
17081 | my $iend_2 = $$ri_end[$n]; | ||||
17082 | my $ibeg_2 = $$ri_beg[$n]; | ||||
17083 | |||||
17084 | my ( $itok, $itokp, $itokm ); | ||||
17085 | |||||
17086 | foreach my $itest ( $iend_1, $ibeg_2 ) { | ||||
17087 | my $type = $types_to_go[$itest]; | ||||
17088 | if ( $is_math_op{$type} | ||||
17089 | || $is_amp_amp{$type} | ||||
17090 | || $is_assignment{$type} | ||||
17091 | || $type eq ':' ) | ||||
17092 | { | ||||
17093 | $itok = $itest; | ||||
17094 | } | ||||
17095 | } | ||||
17096 | $joint[$n] = [$itok]; | ||||
17097 | } | ||||
17098 | |||||
17099 | my $more_to_do = 1; | ||||
17100 | |||||
17101 | # We keep looping over all of the lines of this batch | ||||
17102 | # until there are no more possible recombinations | ||||
17103 | my $nmax_last = @$ri_end; | ||||
17104 | while ($more_to_do) { | ||||
17105 | my $n_best = 0; | ||||
17106 | my $bs_best; | ||||
17107 | my $n; | ||||
17108 | my $nmax = @$ri_end - 1; | ||||
17109 | |||||
17110 | # Safety check for infinite loop | ||||
17111 | unless ( $nmax < $nmax_last ) { | ||||
17112 | |||||
17113 | # Shouldn't happen because splice below decreases nmax on each | ||||
17114 | # pass. | ||||
17115 | Perl::Tidy::Die | ||||
17116 | "Program bug-infinite loop in recombine breakpoints\n"; | ||||
17117 | } | ||||
17118 | $nmax_last = $nmax; | ||||
17119 | $more_to_do = 0; | ||||
17120 | my $previous_outdentable_closing_paren; | ||||
17121 | my $leading_amp_count = 0; | ||||
17122 | my $this_line_is_semicolon_terminated; | ||||
17123 | |||||
17124 | # loop over all remaining lines in this batch | ||||
17125 | for $n ( 1 .. $nmax ) { | ||||
17126 | |||||
17127 | #---------------------------------------------------------- | ||||
17128 | # If we join the current pair of lines, | ||||
17129 | # line $n-1 will become the left part of the joined line | ||||
17130 | # line $n will become the right part of the joined line | ||||
17131 | # | ||||
17132 | # Here are Indexes of the endpoint tokens of the two lines: | ||||
17133 | # | ||||
17134 | # -----line $n-1--- | -----line $n----- | ||||
17135 | # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 | ||||
17136 | # ^ | ||||
17137 | # | | ||||
17138 | # We want to decide if we should remove the line break | ||||
17139 | # between the tokens at $iend_1 and $ibeg_2 | ||||
17140 | # | ||||
17141 | # We will apply a number of ad-hoc tests to see if joining | ||||
17142 | # here will look ok. The code will just issue a 'next' | ||||
17143 | # command if the join doesn't look good. If we get through | ||||
17144 | # the gauntlet of tests, the lines will be recombined. | ||||
17145 | #---------------------------------------------------------- | ||||
17146 | # | ||||
17147 | # beginning and ending tokens of the lines we are working on | ||||
17148 | my $ibeg_1 = $$ri_beg[ $n - 1 ]; | ||||
17149 | my $iend_1 = $$ri_end[ $n - 1 ]; | ||||
17150 | my $iend_2 = $$ri_end[$n]; | ||||
17151 | my $ibeg_2 = $$ri_beg[$n]; | ||||
17152 | my $ibeg_nmax = $$ri_beg[$nmax]; | ||||
17153 | |||||
17154 | my $type_iend_1 = $types_to_go[$iend_1]; | ||||
17155 | my $type_iend_2 = $types_to_go[$iend_2]; | ||||
17156 | my $type_ibeg_1 = $types_to_go[$ibeg_1]; | ||||
17157 | my $type_ibeg_2 = $types_to_go[$ibeg_2]; | ||||
17158 | |||||
17159 | # some beginning indexes of other lines, which may not exist | ||||
17160 | my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; | ||||
17161 | my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; | ||||
17162 | my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; | ||||
17163 | |||||
17164 | my $bs_tweak = 0; | ||||
17165 | |||||
17166 | #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - | ||||
17167 | # $nesting_depth_to_go[$ibeg_1] ); | ||||
17168 | |||||
17169 | FORMATTER_DEBUG_FLAG_RECOMBINE && do { | ||||
17170 | print STDERR | ||||
17171 | "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; | ||||
17172 | }; | ||||
17173 | |||||
17174 | # If line $n is the last line, we set some flags and | ||||
17175 | # do any special checks for it | ||||
17176 | if ( $n == $nmax ) { | ||||
17177 | |||||
17178 | # a terminal '{' should stay where it is | ||||
17179 | next if $type_ibeg_2 eq '{'; | ||||
17180 | |||||
17181 | # set flag if statement $n ends in ';' | ||||
17182 | $this_line_is_semicolon_terminated = $type_iend_2 eq ';' | ||||
17183 | |||||
17184 | # with possible side comment | ||||
17185 | || ( $type_iend_2 eq '#' | ||||
17186 | && $iend_2 - $ibeg_2 >= 2 | ||||
17187 | && $types_to_go[ $iend_2 - 2 ] eq ';' | ||||
17188 | && $types_to_go[ $iend_2 - 1 ] eq 'b' ); | ||||
17189 | } | ||||
17190 | |||||
17191 | #---------------------------------------------------------- | ||||
17192 | # Recombine Section 1: | ||||
17193 | # Examine the special token joining this line pair, if any. | ||||
17194 | # Put as many tests in this section to avoid duplicate code and | ||||
17195 | # to make formatting independent of whether breaks are to the | ||||
17196 | # left or right of an operator. | ||||
17197 | #---------------------------------------------------------- | ||||
17198 | |||||
17199 | my ($itok) = @{ $joint[$n] }; | ||||
17200 | if ($itok) { | ||||
17201 | |||||
17202 | # FIXME: Patch - may not be necessary | ||||
17203 | my $iend_1 = | ||||
17204 | $type_iend_1 eq 'b' | ||||
17205 | ? $iend_1 - 1 | ||||
17206 | : $iend_1; | ||||
17207 | |||||
17208 | my $iend_2 = | ||||
17209 | $type_iend_2 eq 'b' | ||||
17210 | ? $iend_2 - 1 | ||||
17211 | : $iend_2; | ||||
17212 | ## END PATCH | ||||
17213 | |||||
17214 | my $type = $types_to_go[$itok]; | ||||
17215 | |||||
17216 | if ( $type eq ':' ) { | ||||
17217 | |||||
17218 | # do not join at a colon unless it disobeys the break request | ||||
17219 | if ( $itok eq $iend_1 ) { | ||||
17220 | next unless $want_break_before{$type}; | ||||
17221 | } | ||||
17222 | else { | ||||
17223 | $leading_amp_count++; | ||||
17224 | next if $want_break_before{$type}; | ||||
17225 | } | ||||
17226 | } ## end if ':' | ||||
17227 | |||||
17228 | # handle math operators + - * / | ||||
17229 | elsif ( $is_math_op{$type} ) { | ||||
17230 | |||||
17231 | # Combine these lines if this line is a single | ||||
17232 | # number, or if it is a short term with same | ||||
17233 | # operator as the previous line. For example, in | ||||
17234 | # the following code we will combine all of the | ||||
17235 | # short terms $A, $B, $C, $D, $E, $F, together | ||||
17236 | # instead of leaving them one per line: | ||||
17237 | # my $time = | ||||
17238 | # $A * $B * $C * $D * $E * $F * | ||||
17239 | # ( 2. * $eps * $sigma * $area ) * | ||||
17240 | # ( 1. / $tcold**3 - 1. / $thot**3 ); | ||||
17241 | |||||
17242 | # This can be important in math-intensive code. | ||||
17243 | |||||
17244 | my $good_combo; | ||||
17245 | |||||
17246 | my $itokp = min( $inext_to_go[$itok], $iend_2 ); | ||||
17247 | my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); | ||||
17248 | my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); | ||||
17249 | my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); | ||||
17250 | |||||
17251 | # check for a number on the right | ||||
17252 | if ( $types_to_go[$itokp] eq 'n' ) { | ||||
17253 | |||||
17254 | # ok if nothing else on right | ||||
17255 | if ( $itokp == $iend_2 ) { | ||||
17256 | $good_combo = 1; | ||||
17257 | } | ||||
17258 | else { | ||||
17259 | |||||
17260 | # look one more token to right.. | ||||
17261 | # okay if math operator or some termination | ||||
17262 | $good_combo = | ||||
17263 | ( ( $itokpp == $iend_2 ) | ||||
17264 | && $is_math_op{ $types_to_go[$itokpp] } ) | ||||
17265 | || $types_to_go[$itokpp] =~ /^[#,;]$/; | ||||
17266 | } | ||||
17267 | } | ||||
17268 | |||||
17269 | # check for a number on the left | ||||
17270 | if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { | ||||
17271 | |||||
17272 | # okay if nothing else to left | ||||
17273 | if ( $itokm == $ibeg_1 ) { | ||||
17274 | $good_combo = 1; | ||||
17275 | } | ||||
17276 | |||||
17277 | # otherwise look one more token to left | ||||
17278 | else { | ||||
17279 | |||||
17280 | # okay if math operator, comma, or assignment | ||||
17281 | $good_combo = ( $itokmm == $ibeg_1 ) | ||||
17282 | && ( $is_math_op{ $types_to_go[$itokmm] } | ||||
17283 | || $types_to_go[$itokmm] =~ /^[,]$/ | ||||
17284 | || $is_assignment{ $types_to_go[$itokmm] } | ||||
17285 | ); | ||||
17286 | } | ||||
17287 | } | ||||
17288 | |||||
17289 | # look for a single short token either side of the | ||||
17290 | # operator | ||||
17291 | if ( !$good_combo ) { | ||||
17292 | |||||
17293 | # Slight adjustment factor to make results | ||||
17294 | # independent of break before or after operator in | ||||
17295 | # long summed lists. (An operator and a space make | ||||
17296 | # two spaces). | ||||
17297 | my $two = ( $itok eq $iend_1 ) ? 2 : 0; | ||||
17298 | |||||
17299 | $good_combo = | ||||
17300 | |||||
17301 | # numbers or id's on both sides of this joint | ||||
17302 | $types_to_go[$itokp] =~ /^[in]$/ | ||||
17303 | && $types_to_go[$itokm] =~ /^[in]$/ | ||||
17304 | |||||
17305 | # one of the two lines must be short: | ||||
17306 | && ( | ||||
17307 | ( | ||||
17308 | # no more than 2 nonblank tokens right of | ||||
17309 | # joint | ||||
17310 | $itokpp == $iend_2 | ||||
17311 | |||||
17312 | # short | ||||
17313 | && token_sequence_length( $itokp, $iend_2 ) | ||||
17314 | < $two + | ||||
17315 | $rOpts_short_concatenation_item_length | ||||
17316 | ) | ||||
17317 | || ( | ||||
17318 | # no more than 2 nonblank tokens left of | ||||
17319 | # joint | ||||
17320 | $itokmm == $ibeg_1 | ||||
17321 | |||||
17322 | # short | ||||
17323 | && token_sequence_length( $ibeg_1, $itokm ) | ||||
17324 | < 2 - $two + | ||||
17325 | $rOpts_short_concatenation_item_length | ||||
17326 | ) | ||||
17327 | |||||
17328 | ) | ||||
17329 | |||||
17330 | # keep pure terms; don't mix +- with */ | ||||
17331 | && !( | ||||
17332 | $is_plus_minus{$type} | ||||
17333 | && ( $is_mult_div{ $types_to_go[$itokmm] } | ||||
17334 | || $is_mult_div{ $types_to_go[$itokpp] } ) | ||||
17335 | ) | ||||
17336 | && !( | ||||
17337 | $is_mult_div{$type} | ||||
17338 | && ( $is_plus_minus{ $types_to_go[$itokmm] } | ||||
17339 | || $is_plus_minus{ $types_to_go[$itokpp] } ) | ||||
17340 | ) | ||||
17341 | |||||
17342 | ; | ||||
17343 | } | ||||
17344 | |||||
17345 | # it is also good to combine if we can reduce to 2 lines | ||||
17346 | if ( !$good_combo ) { | ||||
17347 | |||||
17348 | # index on other line where same token would be in a | ||||
17349 | # long chain. | ||||
17350 | my $iother = | ||||
17351 | ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; | ||||
17352 | |||||
17353 | $good_combo = | ||||
17354 | $n == 2 | ||||
17355 | && $n == $nmax | ||||
17356 | && $types_to_go[$iother] ne $type; | ||||
17357 | } | ||||
17358 | |||||
17359 | next unless ($good_combo); | ||||
17360 | |||||
17361 | } ## end math | ||||
17362 | |||||
17363 | elsif ( $is_amp_amp{$type} ) { | ||||
17364 | ##TBD | ||||
17365 | } ## end &&, || | ||||
17366 | |||||
17367 | elsif ( $is_assignment{$type} ) { | ||||
17368 | ##TBD | ||||
17369 | } ## end assignment | ||||
17370 | } | ||||
17371 | |||||
17372 | #---------------------------------------------------------- | ||||
17373 | # Recombine Section 2: | ||||
17374 | # Examine token at $iend_1 (right end of first line of pair) | ||||
17375 | #---------------------------------------------------------- | ||||
17376 | |||||
17377 | # an isolated '}' may join with a ';' terminated segment | ||||
17378 | if ( $type_iend_1 eq '}' ) { | ||||
17379 | |||||
17380 | # Check for cases where combining a semicolon terminated | ||||
17381 | # statement with a previous isolated closing paren will | ||||
17382 | # allow the combined line to be outdented. This is | ||||
17383 | # generally a good move. For example, we can join up | ||||
17384 | # the last two lines here: | ||||
17385 | # ( | ||||
17386 | # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, | ||||
17387 | # $size, $atime, $mtime, $ctime, $blksize, $blocks | ||||
17388 | # ) | ||||
17389 | # = stat($file); | ||||
17390 | # | ||||
17391 | # to get: | ||||
17392 | # ( | ||||
17393 | # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, | ||||
17394 | # $size, $atime, $mtime, $ctime, $blksize, $blocks | ||||
17395 | # ) = stat($file); | ||||
17396 | # | ||||
17397 | # which makes the parens line up. | ||||
17398 | # | ||||
17399 | # Another example, from Joe Matarazzo, probably looks best | ||||
17400 | # with the 'or' clause appended to the trailing paren: | ||||
17401 | # $self->some_method( | ||||
17402 | # PARAM1 => 'foo', | ||||
17403 | # PARAM2 => 'bar' | ||||
17404 | # ) or die "Some_method didn't work"; | ||||
17405 | # | ||||
17406 | # But we do not want to do this for something like the -lp | ||||
17407 | # option where the paren is not outdentable because the | ||||
17408 | # trailing clause will be far to the right. | ||||
17409 | # | ||||
17410 | # The logic here is synchronized with the logic in sub | ||||
17411 | # sub set_adjusted_indentation, which actually does | ||||
17412 | # the outdenting. | ||||
17413 | # | ||||
17414 | $previous_outdentable_closing_paren = | ||||
17415 | $this_line_is_semicolon_terminated | ||||
17416 | |||||
17417 | # only one token on last line | ||||
17418 | && $ibeg_1 == $iend_1 | ||||
17419 | |||||
17420 | # must be structural paren | ||||
17421 | && $tokens_to_go[$iend_1] eq ')' | ||||
17422 | |||||
17423 | # style must allow outdenting, | ||||
17424 | && !$closing_token_indentation{')'} | ||||
17425 | |||||
17426 | # only leading '&&', '||', and ':' if no others seen | ||||
17427 | # (but note: our count made below could be wrong | ||||
17428 | # due to intervening comments) | ||||
17429 | && ( $leading_amp_count == 0 | ||||
17430 | || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) | ||||
17431 | |||||
17432 | # but leading colons probably line up with a | ||||
17433 | # previous colon or question (count could be wrong). | ||||
17434 | && $type_ibeg_2 ne ':' | ||||
17435 | |||||
17436 | # only one step in depth allowed. this line must not | ||||
17437 | # begin with a ')' itself. | ||||
17438 | && ( $nesting_depth_to_go[$iend_1] == | ||||
17439 | $nesting_depth_to_go[$iend_2] + 1 ); | ||||
17440 | |||||
17441 | # YVES patch 2 of 2: | ||||
17442 | # Allow cuddled eval chains, like this: | ||||
17443 | # eval { | ||||
17444 | # #STUFF; | ||||
17445 | # 1; # return true | ||||
17446 | # } or do { | ||||
17447 | # #handle error | ||||
17448 | # }; | ||||
17449 | # This patch works together with a patch in | ||||
17450 | # setting adjusted indentation (where the closing eval | ||||
17451 | # brace is outdented if possible). | ||||
17452 | # The problem is that an 'eval' block has continuation | ||||
17453 | # indentation and it looks better to undo it in some | ||||
17454 | # cases. If we do not use this patch we would get: | ||||
17455 | # eval { | ||||
17456 | # #STUFF; | ||||
17457 | # 1; # return true | ||||
17458 | # } | ||||
17459 | # or do { | ||||
17460 | # #handle error | ||||
17461 | # }; | ||||
17462 | # The alternative, for uncuddled style, is to create | ||||
17463 | # a patch in set_adjusted_indentation which undoes | ||||
17464 | # the indentation of a leading line like 'or do {'. | ||||
17465 | # This doesn't work well with -icb through | ||||
17466 | if ( | ||||
17467 | $block_type_to_go[$iend_1] eq 'eval' | ||||
17468 | && !$rOpts->{'line-up-parentheses'} | ||||
17469 | && !$rOpts->{'indent-closing-brace'} | ||||
17470 | && $tokens_to_go[$iend_2] eq '{' | ||||
17471 | && ( | ||||
17472 | ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) | ||||
17473 | || ( $type_ibeg_2 eq 'k' | ||||
17474 | && $is_and_or{ $tokens_to_go[$ibeg_2] } ) | ||||
17475 | || $is_if_unless{ $tokens_to_go[$ibeg_2] } | ||||
17476 | ) | ||||
17477 | ) | ||||
17478 | { | ||||
17479 | $previous_outdentable_closing_paren ||= 1; | ||||
17480 | } | ||||
17481 | |||||
17482 | next | ||||
17483 | unless ( | ||||
17484 | $previous_outdentable_closing_paren | ||||
17485 | |||||
17486 | # handle '.' and '?' specially below | ||||
17487 | || ( $type_ibeg_2 =~ /^[\.\?]$/ ) | ||||
17488 | ); | ||||
17489 | } | ||||
17490 | |||||
17491 | # YVES | ||||
17492 | # honor breaks at opening brace | ||||
17493 | # Added to prevent recombining something like this: | ||||
17494 | # } || eval { package main; | ||||
17495 | elsif ( $type_iend_1 eq '{' ) { | ||||
17496 | next if $forced_breakpoint_to_go[$iend_1]; | ||||
17497 | } | ||||
17498 | |||||
17499 | # do not recombine lines with ending &&, ||, | ||||
17500 | elsif ( $is_amp_amp{$type_iend_1} ) { | ||||
17501 | next unless $want_break_before{$type_iend_1}; | ||||
17502 | } | ||||
17503 | |||||
17504 | # Identify and recombine a broken ?/: chain | ||||
17505 | elsif ( $type_iend_1 eq '?' ) { | ||||
17506 | |||||
17507 | # Do not recombine different levels | ||||
17508 | next | ||||
17509 | if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); | ||||
17510 | |||||
17511 | # do not recombine unless next line ends in : | ||||
17512 | next unless $type_iend_2 eq ':'; | ||||
17513 | } | ||||
17514 | |||||
17515 | # for lines ending in a comma... | ||||
17516 | elsif ( $type_iend_1 eq ',' ) { | ||||
17517 | |||||
17518 | # Do not recombine at comma which is following the | ||||
17519 | # input bias. | ||||
17520 | # TODO: might be best to make a special flag | ||||
17521 | next if ( $old_breakpoint_to_go[$iend_1] ); | ||||
17522 | |||||
17523 | # an isolated '},' may join with an identifier + ';' | ||||
17524 | # this is useful for the class of a 'bless' statement (bless.t) | ||||
17525 | if ( $type_ibeg_1 eq '}' | ||||
17526 | && $type_ibeg_2 eq 'i' ) | ||||
17527 | { | ||||
17528 | next | ||||
17529 | unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) | ||||
17530 | && ( $iend_2 == ( $ibeg_2 + 1 ) ) | ||||
17531 | && $this_line_is_semicolon_terminated ); | ||||
17532 | |||||
17533 | # override breakpoint | ||||
17534 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
17535 | } | ||||
17536 | |||||
17537 | # but otherwise .. | ||||
17538 | else { | ||||
17539 | |||||
17540 | # do not recombine after a comma unless this will leave | ||||
17541 | # just 1 more line | ||||
17542 | next unless ( $n + 1 >= $nmax ); | ||||
17543 | |||||
17544 | # do not recombine if there is a change in indentation depth | ||||
17545 | next | ||||
17546 | if ( | ||||
17547 | $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); | ||||
17548 | |||||
17549 | # do not recombine a "complex expression" after a | ||||
17550 | # comma. "complex" means no parens. | ||||
17551 | my $saw_paren; | ||||
17552 | foreach my $ii ( $ibeg_2 .. $iend_2 ) { | ||||
17553 | if ( $tokens_to_go[$ii] eq '(' ) { | ||||
17554 | $saw_paren = 1; | ||||
17555 | last; | ||||
17556 | } | ||||
17557 | } | ||||
17558 | next if $saw_paren; | ||||
17559 | } | ||||
17560 | } | ||||
17561 | |||||
17562 | # opening paren.. | ||||
17563 | elsif ( $type_iend_1 eq '(' ) { | ||||
17564 | |||||
17565 | # No longer doing this | ||||
17566 | } | ||||
17567 | |||||
17568 | elsif ( $type_iend_1 eq ')' ) { | ||||
17569 | |||||
17570 | # No longer doing this | ||||
17571 | } | ||||
17572 | |||||
17573 | # keep a terminal for-semicolon | ||||
17574 | elsif ( $type_iend_1 eq 'f' ) { | ||||
17575 | next; | ||||
17576 | } | ||||
17577 | |||||
17578 | # if '=' at end of line ... | ||||
17579 | elsif ( $is_assignment{$type_iend_1} ) { | ||||
17580 | |||||
17581 | # keep break after = if it was in input stream | ||||
17582 | # this helps prevent 'blinkers' | ||||
17583 | next if $old_breakpoint_to_go[$iend_1] | ||||
17584 | |||||
17585 | # don't strand an isolated '=' | ||||
17586 | && $iend_1 != $ibeg_1; | ||||
17587 | |||||
17588 | my $is_short_quote = | ||||
17589 | ( $type_ibeg_2 eq 'Q' | ||||
17590 | && $ibeg_2 == $iend_2 | ||||
17591 | && token_sequence_length( $ibeg_2, $ibeg_2 ) < | ||||
17592 | $rOpts_short_concatenation_item_length ); | ||||
17593 | my $is_ternary = | ||||
17594 | ( $type_ibeg_1 eq '?' | ||||
17595 | && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); | ||||
17596 | |||||
17597 | # always join an isolated '=', a short quote, or if this | ||||
17598 | # will put ?/: at start of adjacent lines | ||||
17599 | if ( $ibeg_1 != $iend_1 | ||||
17600 | && !$is_short_quote | ||||
17601 | && !$is_ternary ) | ||||
17602 | { | ||||
17603 | next | ||||
17604 | unless ( | ||||
17605 | ( | ||||
17606 | |||||
17607 | # unless we can reduce this to two lines | ||||
17608 | $nmax < $n + 2 | ||||
17609 | |||||
17610 | # or three lines, the last with a leading semicolon | ||||
17611 | || ( $nmax == $n + 2 | ||||
17612 | && $types_to_go[$ibeg_nmax] eq ';' ) | ||||
17613 | |||||
17614 | # or the next line ends with a here doc | ||||
17615 | || $type_iend_2 eq 'h' | ||||
17616 | |||||
17617 | # or the next line ends in an open paren or brace | ||||
17618 | # and the break hasn't been forced [dima.t] | ||||
17619 | || ( !$forced_breakpoint_to_go[$iend_1] | ||||
17620 | && $type_iend_2 eq '{' ) | ||||
17621 | ) | ||||
17622 | |||||
17623 | # do not recombine if the two lines might align well | ||||
17624 | # this is a very approximate test for this | ||||
17625 | && ( $ibeg_3 >= 0 | ||||
17626 | && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) | ||||
17627 | ); | ||||
17628 | |||||
17629 | if ( | ||||
17630 | |||||
17631 | # Recombine if we can make two lines | ||||
17632 | $nmax >= $n + 2 | ||||
17633 | |||||
17634 | # -lp users often prefer this: | ||||
17635 | # my $title = function($env, $env, $sysarea, | ||||
17636 | # "bubba Borrower Entry"); | ||||
17637 | # so we will recombine if -lp is used we have | ||||
17638 | # ending comma | ||||
17639 | && ( !$rOpts_line_up_parentheses | ||||
17640 | || $type_iend_2 ne ',' ) | ||||
17641 | ) | ||||
17642 | { | ||||
17643 | |||||
17644 | # otherwise, scan the rhs line up to last token for | ||||
17645 | # complexity. Note that we are not counting the last | ||||
17646 | # token in case it is an opening paren. | ||||
17647 | my $tv = 0; | ||||
17648 | my $depth = $nesting_depth_to_go[$ibeg_2]; | ||||
17649 | for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { | ||||
17650 | if ( $nesting_depth_to_go[$i] != $depth ) { | ||||
17651 | $tv++; | ||||
17652 | last if ( $tv > 1 ); | ||||
17653 | } | ||||
17654 | $depth = $nesting_depth_to_go[$i]; | ||||
17655 | } | ||||
17656 | |||||
17657 | # ok to recombine if no level changes before last token | ||||
17658 | if ( $tv > 0 ) { | ||||
17659 | |||||
17660 | # otherwise, do not recombine if more than two | ||||
17661 | # level changes. | ||||
17662 | next if ( $tv > 1 ); | ||||
17663 | |||||
17664 | # check total complexity of the two adjacent lines | ||||
17665 | # that will occur if we do this join | ||||
17666 | my $istop = | ||||
17667 | ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; | ||||
17668 | for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { | ||||
17669 | if ( $nesting_depth_to_go[$i] != $depth ) { | ||||
17670 | $tv++; | ||||
17671 | last if ( $tv > 2 ); | ||||
17672 | } | ||||
17673 | $depth = $nesting_depth_to_go[$i]; | ||||
17674 | } | ||||
17675 | |||||
17676 | # do not recombine if total is more than 2 level changes | ||||
17677 | next if ( $tv > 2 ); | ||||
17678 | } | ||||
17679 | } | ||||
17680 | } | ||||
17681 | |||||
17682 | unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { | ||||
17683 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
17684 | } | ||||
17685 | } | ||||
17686 | |||||
17687 | # for keywords.. | ||||
17688 | elsif ( $type_iend_1 eq 'k' ) { | ||||
17689 | |||||
17690 | # make major control keywords stand out | ||||
17691 | # (recombine.t) | ||||
17692 | next | ||||
17693 | if ( | ||||
17694 | |||||
17695 | #/^(last|next|redo|return)$/ | ||||
17696 | $is_last_next_redo_return{ $tokens_to_go[$iend_1] } | ||||
17697 | |||||
17698 | # but only if followed by multiple lines | ||||
17699 | && $n < $nmax | ||||
17700 | ); | ||||
17701 | |||||
17702 | if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { | ||||
17703 | next | ||||
17704 | unless $want_break_before{ $tokens_to_go[$iend_1] }; | ||||
17705 | } | ||||
17706 | } | ||||
17707 | |||||
17708 | #---------------------------------------------------------- | ||||
17709 | # Recombine Section 3: | ||||
17710 | # Examine token at $ibeg_2 (left end of second line of pair) | ||||
17711 | #---------------------------------------------------------- | ||||
17712 | |||||
17713 | # join lines identified above as capable of | ||||
17714 | # causing an outdented line with leading closing paren | ||||
17715 | # Note that we are skipping the rest of this section | ||||
17716 | if ($previous_outdentable_closing_paren) { | ||||
17717 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
17718 | } | ||||
17719 | |||||
17720 | # handle lines with leading &&, || | ||||
17721 | elsif ( $is_amp_amp{$type_ibeg_2} ) { | ||||
17722 | |||||
17723 | $leading_amp_count++; | ||||
17724 | |||||
17725 | # ok to recombine if it follows a ? or : | ||||
17726 | # and is followed by an open paren.. | ||||
17727 | my $ok = | ||||
17728 | ( $is_ternary{$type_ibeg_1} | ||||
17729 | && $tokens_to_go[$iend_2] eq '(' ) | ||||
17730 | |||||
17731 | # or is followed by a ? or : at same depth | ||||
17732 | # | ||||
17733 | # We are looking for something like this. We can | ||||
17734 | # recombine the && line with the line above to make the | ||||
17735 | # structure more clear: | ||||
17736 | # return | ||||
17737 | # exists $G->{Attr}->{V} | ||||
17738 | # && exists $G->{Attr}->{V}->{$u} | ||||
17739 | # ? %{ $G->{Attr}->{V}->{$u} } | ||||
17740 | # : (); | ||||
17741 | # | ||||
17742 | # We should probably leave something like this alone: | ||||
17743 | # return | ||||
17744 | # exists $G->{Attr}->{E} | ||||
17745 | # && exists $G->{Attr}->{E}->{$u} | ||||
17746 | # && exists $G->{Attr}->{E}->{$u}->{$v} | ||||
17747 | # ? %{ $G->{Attr}->{E}->{$u}->{$v} } | ||||
17748 | # : (); | ||||
17749 | # so that we either have all of the &&'s (or ||'s) | ||||
17750 | # on one line, as in the first example, or break at | ||||
17751 | # each one as in the second example. However, it | ||||
17752 | # sometimes makes things worse to check for this because | ||||
17753 | # it prevents multiple recombinations. So this is not done. | ||||
17754 | || ( $ibeg_3 >= 0 | ||||
17755 | && $is_ternary{ $types_to_go[$ibeg_3] } | ||||
17756 | && $nesting_depth_to_go[$ibeg_3] == | ||||
17757 | $nesting_depth_to_go[$ibeg_2] ); | ||||
17758 | |||||
17759 | next if !$ok && $want_break_before{$type_ibeg_2}; | ||||
17760 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
17761 | |||||
17762 | # tweak the bond strength to give this joint priority | ||||
17763 | # over ? and : | ||||
17764 | $bs_tweak = 0.25; | ||||
17765 | } | ||||
17766 | |||||
17767 | # Identify and recombine a broken ?/: chain | ||||
17768 | elsif ( $type_ibeg_2 eq '?' ) { | ||||
17769 | |||||
17770 | # Do not recombine different levels | ||||
17771 | my $lev = $levels_to_go[$ibeg_2]; | ||||
17772 | next if ( $lev ne $levels_to_go[$ibeg_1] ); | ||||
17773 | |||||
17774 | # Do not recombine a '?' if either next line or | ||||
17775 | # previous line does not start with a ':'. The reasons | ||||
17776 | # are that (1) no alignment of the ? will be possible | ||||
17777 | # and (2) the expression is somewhat complex, so the | ||||
17778 | # '?' is harder to see in the interior of the line. | ||||
17779 | my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; | ||||
17780 | my $precedes_colon = | ||||
17781 | $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; | ||||
17782 | next unless ( $follows_colon || $precedes_colon ); | ||||
17783 | |||||
17784 | # we will always combining a ? line following a : line | ||||
17785 | if ( !$follows_colon ) { | ||||
17786 | |||||
17787 | # ...otherwise recombine only if it looks like a chain. | ||||
17788 | # we will just look at a few nearby lines to see if | ||||
17789 | # this looks like a chain. | ||||
17790 | my $local_count = 0; | ||||
17791 | foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { | ||||
17792 | $local_count++ | ||||
17793 | if $ii >= 0 | ||||
17794 | && $types_to_go[$ii] eq ':' | ||||
17795 | && $levels_to_go[$ii] == $lev; | ||||
17796 | } | ||||
17797 | next unless ( $local_count > 1 ); | ||||
17798 | } | ||||
17799 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
17800 | } | ||||
17801 | |||||
17802 | # do not recombine lines with leading '.' | ||||
17803 | elsif ( $type_ibeg_2 eq '.' ) { | ||||
17804 | my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); | ||||
17805 | next | ||||
17806 | unless ( | ||||
17807 | |||||
17808 | # ... unless there is just one and we can reduce | ||||
17809 | # this to two lines if we do. For example, this | ||||
17810 | # | ||||
17811 | # | ||||
17812 | # $bodyA .= | ||||
17813 | # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' | ||||
17814 | # | ||||
17815 | # looks better than this: | ||||
17816 | # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' | ||||
17817 | # . '$args .= $pat;' | ||||
17818 | |||||
17819 | ( | ||||
17820 | $n == 2 | ||||
17821 | && $n == $nmax | ||||
17822 | && $type_ibeg_1 ne $type_ibeg_2 | ||||
17823 | ) | ||||
17824 | |||||
17825 | # ... or this would strand a short quote , like this | ||||
17826 | # . "some long quote" | ||||
17827 | # . "\n"; | ||||
17828 | |||||
17829 | || ( $types_to_go[$i_next_nonblank] eq 'Q' | ||||
17830 | && $i_next_nonblank >= $iend_2 - 1 | ||||
17831 | && $token_lengths_to_go[$i_next_nonblank] < | ||||
17832 | $rOpts_short_concatenation_item_length ) | ||||
17833 | ); | ||||
17834 | } | ||||
17835 | |||||
17836 | # handle leading keyword.. | ||||
17837 | elsif ( $type_ibeg_2 eq 'k' ) { | ||||
17838 | |||||
17839 | # handle leading "or" | ||||
17840 | if ( $tokens_to_go[$ibeg_2] eq 'or' ) { | ||||
17841 | next | ||||
17842 | unless ( | ||||
17843 | $this_line_is_semicolon_terminated | ||||
17844 | && ( | ||||
17845 | |||||
17846 | # following 'if' or 'unless' or 'or' | ||||
17847 | $type_ibeg_1 eq 'k' | ||||
17848 | && $is_if_unless{ $tokens_to_go[$ibeg_1] } | ||||
17849 | |||||
17850 | # important: only combine a very simple or | ||||
17851 | # statement because the step below may have | ||||
17852 | # combined a trailing 'and' with this or, | ||||
17853 | # and we do not want to then combine | ||||
17854 | # everything together | ||||
17855 | && ( $iend_2 - $ibeg_2 <= 7 ) | ||||
17856 | ) | ||||
17857 | ); | ||||
17858 | ##X: RT #81854 | ||||
17859 | $forced_breakpoint_to_go[$iend_1] = 0 | ||||
17860 | unless $old_breakpoint_to_go[$iend_1]; | ||||
17861 | } | ||||
17862 | |||||
17863 | # handle leading 'and' | ||||
17864 | elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { | ||||
17865 | |||||
17866 | # Decide if we will combine a single terminal 'and' | ||||
17867 | # after an 'if' or 'unless'. | ||||
17868 | |||||
17869 | # This looks best with the 'and' on the same | ||||
17870 | # line as the 'if': | ||||
17871 | # | ||||
17872 | # $a = 1 | ||||
17873 | # if $seconds and $nu < 2; | ||||
17874 | # | ||||
17875 | # But this looks better as shown: | ||||
17876 | # | ||||
17877 | # $a = 1 | ||||
17878 | # if !$this->{Parents}{$_} | ||||
17879 | # or $this->{Parents}{$_} eq $_; | ||||
17880 | # | ||||
17881 | next | ||||
17882 | unless ( | ||||
17883 | $this_line_is_semicolon_terminated | ||||
17884 | && ( | ||||
17885 | |||||
17886 | # following 'if' or 'unless' or 'or' | ||||
17887 | $type_ibeg_1 eq 'k' | ||||
17888 | && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } | ||||
17889 | || $tokens_to_go[$ibeg_1] eq 'or' ) | ||||
17890 | ) | ||||
17891 | ); | ||||
17892 | } | ||||
17893 | |||||
17894 | # handle leading "if" and "unless" | ||||
17895 | elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { | ||||
17896 | |||||
17897 | # FIXME: This is still experimental..may not be too useful | ||||
17898 | next | ||||
17899 | unless ( | ||||
17900 | $this_line_is_semicolon_terminated | ||||
17901 | |||||
17902 | # previous line begins with 'and' or 'or' | ||||
17903 | && $type_ibeg_1 eq 'k' | ||||
17904 | && $is_and_or{ $tokens_to_go[$ibeg_1] } | ||||
17905 | |||||
17906 | ); | ||||
17907 | } | ||||
17908 | |||||
17909 | # handle all other leading keywords | ||||
17910 | else { | ||||
17911 | |||||
17912 | # keywords look best at start of lines, | ||||
17913 | # but combine things like "1 while" | ||||
17914 | unless ( $is_assignment{$type_iend_1} ) { | ||||
17915 | next | ||||
17916 | if ( ( $type_iend_1 ne 'k' ) | ||||
17917 | && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); | ||||
17918 | } | ||||
17919 | } | ||||
17920 | } | ||||
17921 | |||||
17922 | # similar treatment of && and || as above for 'and' and 'or': | ||||
17923 | # NOTE: This block of code is currently bypassed because | ||||
17924 | # of a previous block but is retained for possible future use. | ||||
17925 | elsif ( $is_amp_amp{$type_ibeg_2} ) { | ||||
17926 | |||||
17927 | # maybe looking at something like: | ||||
17928 | # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; | ||||
17929 | |||||
17930 | next | ||||
17931 | unless ( | ||||
17932 | $this_line_is_semicolon_terminated | ||||
17933 | |||||
17934 | # previous line begins with an 'if' or 'unless' keyword | ||||
17935 | && $type_ibeg_1 eq 'k' | ||||
17936 | && $is_if_unless{ $tokens_to_go[$ibeg_1] } | ||||
17937 | |||||
17938 | ); | ||||
17939 | } | ||||
17940 | |||||
17941 | # handle line with leading = or similar | ||||
17942 | elsif ( $is_assignment{$type_ibeg_2} ) { | ||||
17943 | next unless ( $n == 1 || $n == $nmax ); | ||||
17944 | next if $old_breakpoint_to_go[$iend_1]; | ||||
17945 | next | ||||
17946 | unless ( | ||||
17947 | |||||
17948 | # unless we can reduce this to two lines | ||||
17949 | $nmax == 2 | ||||
17950 | |||||
17951 | # or three lines, the last with a leading semicolon | ||||
17952 | || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) | ||||
17953 | |||||
17954 | # or the next line ends with a here doc | ||||
17955 | || $type_iend_2 eq 'h' | ||||
17956 | |||||
17957 | # or this is a short line ending in ; | ||||
17958 | || ( $n == $nmax && $this_line_is_semicolon_terminated ) | ||||
17959 | ); | ||||
17960 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
17961 | } | ||||
17962 | |||||
17963 | #---------------------------------------------------------- | ||||
17964 | # Recombine Section 4: | ||||
17965 | # Combine the lines if we arrive here and it is possible | ||||
17966 | #---------------------------------------------------------- | ||||
17967 | |||||
17968 | # honor hard breakpoints | ||||
17969 | next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); | ||||
17970 | |||||
17971 | my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; | ||||
17972 | |||||
17973 | # combined line cannot be too long | ||||
17974 | my $excess = excess_line_length( $ibeg_1, $iend_2 ); | ||||
17975 | next if ( $excess > 0 ); | ||||
17976 | |||||
17977 | # Require a few extra spaces before recombining lines if we are | ||||
17978 | # at an old breakpoint unless this is a simple list or terminal | ||||
17979 | # line. The goal is to avoid oscillating between two | ||||
17980 | # quasi-stable end states. For example this snippet caused | ||||
17981 | # problems: | ||||
17982 | ## my $this = | ||||
17983 | ## bless { | ||||
17984 | ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" | ||||
17985 | ## }, | ||||
17986 | ## $type; | ||||
17987 | next | ||||
17988 | if ( $old_breakpoint_to_go[$iend_1] | ||||
17989 | && !$this_line_is_semicolon_terminated | ||||
17990 | && $n < $nmax | ||||
17991 | && $excess + 4 > 0 | ||||
17992 | && $type_iend_2 ne ',' ); | ||||
17993 | |||||
17994 | # do not recombine if we would skip in indentation levels | ||||
17995 | if ( $n < $nmax ) { | ||||
17996 | my $if_next = $$ri_beg[ $n + 1 ]; | ||||
17997 | next | ||||
17998 | if ( | ||||
17999 | $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] | ||||
18000 | && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] | ||||
18001 | |||||
18002 | # but an isolated 'if (' is undesirable | ||||
18003 | && !( | ||||
18004 | $n == 1 | ||||
18005 | && $iend_1 - $ibeg_1 <= 2 | ||||
18006 | && $type_ibeg_1 eq 'k' | ||||
18007 | && $tokens_to_go[$ibeg_1] eq 'if' | ||||
18008 | && $tokens_to_go[$iend_1] ne '(' | ||||
18009 | ) | ||||
18010 | ); | ||||
18011 | } | ||||
18012 | |||||
18013 | # honor no-break's | ||||
18014 | next if ( $bs >= NO_BREAK - 1 ); | ||||
18015 | |||||
18016 | # remember the pair with the greatest bond strength | ||||
18017 | if ( !$n_best ) { | ||||
18018 | $n_best = $n; | ||||
18019 | $bs_best = $bs; | ||||
18020 | } | ||||
18021 | else { | ||||
18022 | |||||
18023 | if ( $bs > $bs_best ) { | ||||
18024 | $n_best = $n; | ||||
18025 | $bs_best = $bs; | ||||
18026 | } | ||||
18027 | } | ||||
18028 | } | ||||
18029 | |||||
18030 | # recombine the pair with the greatest bond strength | ||||
18031 | if ($n_best) { | ||||
18032 | splice @$ri_beg, $n_best, 1; | ||||
18033 | splice @$ri_end, $n_best - 1, 1; | ||||
18034 | splice @joint, $n_best, 1; | ||||
18035 | |||||
18036 | # keep going if we are still making progress | ||||
18037 | $more_to_do++; | ||||
18038 | } | ||||
18039 | } | ||||
18040 | return ( $ri_beg, $ri_end ); | ||||
18041 | } | ||||
18042 | } # end recombine_breakpoints | ||||
18043 | |||||
18044 | sub break_all_chain_tokens { | ||||
18045 | |||||
18046 | # scan the current breakpoints looking for breaks at certain "chain | ||||
18047 | # operators" (. : && || + etc) which often occur repeatedly in a long | ||||
18048 | # statement. If we see a break at any one, break at all similar tokens | ||||
18049 | # within the same container. | ||||
18050 | # | ||||
18051 | my ( $ri_left, $ri_right ) = @_; | ||||
18052 | |||||
18053 | my %saw_chain_type; | ||||
18054 | my %left_chain_type; | ||||
18055 | my %right_chain_type; | ||||
18056 | my %interior_chain_type; | ||||
18057 | my $nmax = @$ri_right - 1; | ||||
18058 | |||||
18059 | # scan the left and right end tokens of all lines | ||||
18060 | my $count = 0; | ||||
18061 | for my $n ( 0 .. $nmax ) { | ||||
18062 | my $il = $$ri_left[$n]; | ||||
18063 | my $ir = $$ri_right[$n]; | ||||
18064 | my $typel = $types_to_go[$il]; | ||||
18065 | my $typer = $types_to_go[$ir]; | ||||
18066 | $typel = '+' if ( $typel eq '-' ); # treat + and - the same | ||||
18067 | $typer = '+' if ( $typer eq '-' ); | ||||
18068 | $typel = '*' if ( $typel eq '/' ); # treat * and / the same | ||||
18069 | $typer = '*' if ( $typer eq '/' ); | ||||
18070 | my $tokenl = $tokens_to_go[$il]; | ||||
18071 | my $tokenr = $tokens_to_go[$ir]; | ||||
18072 | |||||
18073 | if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { | ||||
18074 | next if ( $typel eq '?' ); | ||||
18075 | push @{ $left_chain_type{$typel} }, $il; | ||||
18076 | $saw_chain_type{$typel} = 1; | ||||
18077 | $count++; | ||||
18078 | } | ||||
18079 | if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { | ||||
18080 | next if ( $typer eq '?' ); | ||||
18081 | push @{ $right_chain_type{$typer} }, $ir; | ||||
18082 | $saw_chain_type{$typer} = 1; | ||||
18083 | $count++; | ||||
18084 | } | ||||
18085 | } | ||||
18086 | return unless $count; | ||||
18087 | |||||
18088 | # now look for any interior tokens of the same types | ||||
18089 | $count = 0; | ||||
18090 | for my $n ( 0 .. $nmax ) { | ||||
18091 | my $il = $$ri_left[$n]; | ||||
18092 | my $ir = $$ri_right[$n]; | ||||
18093 | for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { | ||||
18094 | my $type = $types_to_go[$i]; | ||||
18095 | $type = '+' if ( $type eq '-' ); | ||||
18096 | $type = '*' if ( $type eq '/' ); | ||||
18097 | if ( $saw_chain_type{$type} ) { | ||||
18098 | push @{ $interior_chain_type{$type} }, $i; | ||||
18099 | $count++; | ||||
18100 | } | ||||
18101 | } | ||||
18102 | } | ||||
18103 | return unless $count; | ||||
18104 | |||||
18105 | # now make a list of all new break points | ||||
18106 | my @insert_list; | ||||
18107 | |||||
18108 | # loop over all chain types | ||||
18109 | foreach my $type ( keys %saw_chain_type ) { | ||||
18110 | |||||
18111 | # quit if just ONE continuation line with leading . For example-- | ||||
18112 | # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' | ||||
18113 | # . $contents; | ||||
18114 | last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); | ||||
18115 | |||||
18116 | # loop over all interior chain tokens | ||||
18117 | foreach my $itest ( @{ $interior_chain_type{$type} } ) { | ||||
18118 | |||||
18119 | # loop over all left end tokens of same type | ||||
18120 | if ( $left_chain_type{$type} ) { | ||||
18121 | next if $nobreak_to_go[ $itest - 1 ]; | ||||
18122 | foreach my $i ( @{ $left_chain_type{$type} } ) { | ||||
18123 | next unless in_same_container( $i, $itest ); | ||||
18124 | push @insert_list, $itest - 1; | ||||
18125 | |||||
18126 | # Break at matching ? if this : is at a different level. | ||||
18127 | # For example, the ? before $THRf_DEAD in the following | ||||
18128 | # should get a break if its : gets a break. | ||||
18129 | # | ||||
18130 | # my $flags = | ||||
18131 | # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE | ||||
18132 | # : ( $_ & 4 ) ? $THRf_R_DETACHED | ||||
18133 | # : $THRf_R_JOINABLE; | ||||
18134 | if ( $type eq ':' | ||||
18135 | && $levels_to_go[$i] != $levels_to_go[$itest] ) | ||||
18136 | { | ||||
18137 | my $i_question = $mate_index_to_go[$itest]; | ||||
18138 | if ( $i_question > 0 ) { | ||||
18139 | push @insert_list, $i_question - 1; | ||||
18140 | } | ||||
18141 | } | ||||
18142 | last; | ||||
18143 | } | ||||
18144 | } | ||||
18145 | |||||
18146 | # loop over all right end tokens of same type | ||||
18147 | if ( $right_chain_type{$type} ) { | ||||
18148 | next if $nobreak_to_go[$itest]; | ||||
18149 | foreach my $i ( @{ $right_chain_type{$type} } ) { | ||||
18150 | next unless in_same_container( $i, $itest ); | ||||
18151 | push @insert_list, $itest; | ||||
18152 | |||||
18153 | # break at matching ? if this : is at a different level | ||||
18154 | if ( $type eq ':' | ||||
18155 | && $levels_to_go[$i] != $levels_to_go[$itest] ) | ||||
18156 | { | ||||
18157 | my $i_question = $mate_index_to_go[$itest]; | ||||
18158 | if ( $i_question >= 0 ) { | ||||
18159 | push @insert_list, $i_question; | ||||
18160 | } | ||||
18161 | } | ||||
18162 | last; | ||||
18163 | } | ||||
18164 | } | ||||
18165 | } | ||||
18166 | } | ||||
18167 | |||||
18168 | # insert any new break points | ||||
18169 | if (@insert_list) { | ||||
18170 | insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); | ||||
18171 | } | ||||
18172 | } | ||||
18173 | |||||
18174 | sub break_equals { | ||||
18175 | |||||
18176 | # Look for assignment operators that could use a breakpoint. | ||||
18177 | # For example, in the following snippet | ||||
18178 | # | ||||
18179 | # $HOME = $ENV{HOME} | ||||
18180 | # || $ENV{LOGDIR} | ||||
18181 | # || $pw[7] | ||||
18182 | # || die "no home directory for user $<"; | ||||
18183 | # | ||||
18184 | # we could break at the = to get this, which is a little nicer: | ||||
18185 | # $HOME = | ||||
18186 | # $ENV{HOME} | ||||
18187 | # || $ENV{LOGDIR} | ||||
18188 | # || $pw[7] | ||||
18189 | # || die "no home directory for user $<"; | ||||
18190 | # | ||||
18191 | # The logic here follows the logic in set_logical_padding, which | ||||
18192 | # will add the padding in the second line to improve alignment. | ||||
18193 | # | ||||
18194 | my ( $ri_left, $ri_right ) = @_; | ||||
18195 | my $nmax = @$ri_right - 1; | ||||
18196 | return unless ( $nmax >= 2 ); | ||||
18197 | |||||
18198 | # scan the left ends of first two lines | ||||
18199 | my $tokbeg = ""; | ||||
18200 | my $depth_beg; | ||||
18201 | for my $n ( 1 .. 2 ) { | ||||
18202 | my $il = $$ri_left[$n]; | ||||
18203 | my $typel = $types_to_go[$il]; | ||||
18204 | my $tokenl = $tokens_to_go[$il]; | ||||
18205 | |||||
18206 | my $has_leading_op = ( $tokenl =~ /^\w/ ) | ||||
18207 | ? $is_chain_operator{$tokenl} # + - * / : ? && || | ||||
18208 | : $is_chain_operator{$typel}; # and, or | ||||
18209 | return unless ($has_leading_op); | ||||
18210 | if ( $n > 1 ) { | ||||
18211 | return | ||||
18212 | unless ( $tokenl eq $tokbeg | ||||
18213 | && $nesting_depth_to_go[$il] eq $depth_beg ); | ||||
18214 | } | ||||
18215 | $tokbeg = $tokenl; | ||||
18216 | $depth_beg = $nesting_depth_to_go[$il]; | ||||
18217 | } | ||||
18218 | |||||
18219 | # now look for any interior tokens of the same types | ||||
18220 | my $il = $$ri_left[0]; | ||||
18221 | my $ir = $$ri_right[0]; | ||||
18222 | |||||
18223 | # now make a list of all new break points | ||||
18224 | my @insert_list; | ||||
18225 | for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { | ||||
18226 | my $type = $types_to_go[$i]; | ||||
18227 | if ( $is_assignment{$type} | ||||
18228 | && $nesting_depth_to_go[$i] eq $depth_beg ) | ||||
18229 | { | ||||
18230 | if ( $want_break_before{$type} ) { | ||||
18231 | push @insert_list, $i - 1; | ||||
18232 | } | ||||
18233 | else { | ||||
18234 | push @insert_list, $i; | ||||
18235 | } | ||||
18236 | } | ||||
18237 | } | ||||
18238 | |||||
18239 | # Break after a 'return' followed by a chain of operators | ||||
18240 | # return ( $^O !~ /win32|dos/i ) | ||||
18241 | # && ( $^O ne 'VMS' ) | ||||
18242 | # && ( $^O ne 'OS2' ) | ||||
18243 | # && ( $^O ne 'MacOS' ); | ||||
18244 | # To give: | ||||
18245 | # return | ||||
18246 | # ( $^O !~ /win32|dos/i ) | ||||
18247 | # && ( $^O ne 'VMS' ) | ||||
18248 | # && ( $^O ne 'OS2' ) | ||||
18249 | # && ( $^O ne 'MacOS' ); | ||||
18250 | my $i = 0; | ||||
18251 | if ( $types_to_go[$i] eq 'k' | ||||
18252 | && $tokens_to_go[$i] eq 'return' | ||||
18253 | && $ir > $il | ||||
18254 | && $nesting_depth_to_go[$i] eq $depth_beg ) | ||||
18255 | { | ||||
18256 | push @insert_list, $i; | ||||
18257 | } | ||||
18258 | |||||
18259 | return unless (@insert_list); | ||||
18260 | |||||
18261 | # One final check... | ||||
18262 | # scan second and third lines and be sure there are no assignments | ||||
18263 | # we want to avoid breaking at an = to make something like this: | ||||
18264 | # unless ( $icon = | ||||
18265 | # $html_icons{"$type-$state"} | ||||
18266 | # or $icon = $html_icons{$type} | ||||
18267 | # or $icon = $html_icons{$state} ) | ||||
18268 | for my $n ( 1 .. 2 ) { | ||||
18269 | my $il = $$ri_left[$n]; | ||||
18270 | my $ir = $$ri_right[$n]; | ||||
18271 | for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { | ||||
18272 | my $type = $types_to_go[$i]; | ||||
18273 | return | ||||
18274 | if ( $is_assignment{$type} | ||||
18275 | && $nesting_depth_to_go[$i] eq $depth_beg ); | ||||
18276 | } | ||||
18277 | } | ||||
18278 | |||||
18279 | # ok, insert any new break point | ||||
18280 | if (@insert_list) { | ||||
18281 | insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); | ||||
18282 | } | ||||
18283 | } | ||||
18284 | |||||
18285 | sub insert_final_breaks { | ||||
18286 | |||||
18287 | my ( $ri_left, $ri_right ) = @_; | ||||
18288 | |||||
18289 | my $nmax = @$ri_right - 1; | ||||
18290 | |||||
18291 | # scan the left and right end tokens of all lines | ||||
18292 | my $count = 0; | ||||
18293 | my $i_first_colon = -1; | ||||
18294 | for my $n ( 0 .. $nmax ) { | ||||
18295 | my $il = $$ri_left[$n]; | ||||
18296 | my $ir = $$ri_right[$n]; | ||||
18297 | my $typel = $types_to_go[$il]; | ||||
18298 | my $typer = $types_to_go[$ir]; | ||||
18299 | return if ( $typel eq '?' ); | ||||
18300 | return if ( $typer eq '?' ); | ||||
18301 | if ( $typel eq ':' ) { $i_first_colon = $il; last; } | ||||
18302 | elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } | ||||
18303 | } | ||||
18304 | |||||
18305 | # For long ternary chains, | ||||
18306 | # if the first : we see has its # ? is in the interior | ||||
18307 | # of a preceding line, then see if there are any good | ||||
18308 | # breakpoints before the ?. | ||||
18309 | if ( $i_first_colon > 0 ) { | ||||
18310 | my $i_question = $mate_index_to_go[$i_first_colon]; | ||||
18311 | if ( $i_question > 0 ) { | ||||
18312 | my @insert_list; | ||||
18313 | for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { | ||||
18314 | my $token = $tokens_to_go[$ii]; | ||||
18315 | my $type = $types_to_go[$ii]; | ||||
18316 | |||||
18317 | # For now, a good break is either a comma or a 'return'. | ||||
18318 | if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) | ||||
18319 | && in_same_container( $ii, $i_question ) ) | ||||
18320 | { | ||||
18321 | push @insert_list, $ii; | ||||
18322 | last; | ||||
18323 | } | ||||
18324 | } | ||||
18325 | |||||
18326 | # insert any new break points | ||||
18327 | if (@insert_list) { | ||||
18328 | insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); | ||||
18329 | } | ||||
18330 | } | ||||
18331 | } | ||||
18332 | } | ||||
18333 | |||||
18334 | sub in_same_container { | ||||
18335 | |||||
18336 | # check to see if tokens at i1 and i2 are in the | ||||
18337 | # same container, and not separated by a comma, ? or : | ||||
18338 | my ( $i1, $i2 ) = @_; | ||||
18339 | my $type = $types_to_go[$i1]; | ||||
18340 | my $depth = $nesting_depth_to_go[$i1]; | ||||
18341 | return unless ( $nesting_depth_to_go[$i2] == $depth ); | ||||
18342 | if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } | ||||
18343 | |||||
18344 | ########################################################### | ||||
18345 | # This is potentially a very slow routine and not critical. | ||||
18346 | # For safety just give up for large differences. | ||||
18347 | # See test file 'infinite_loop.txt' | ||||
18348 | # TODO: replace this loop with a data structure | ||||
18349 | ########################################################### | ||||
18350 | return if ( $i2 - $i1 > 200 ); | ||||
18351 | |||||
18352 | for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { | ||||
18353 | next if ( $nesting_depth_to_go[$i] > $depth ); | ||||
18354 | return if ( $nesting_depth_to_go[$i] < $depth ); | ||||
18355 | |||||
18356 | my $tok = $tokens_to_go[$i]; | ||||
18357 | $tok = ',' if $tok eq '=>'; # treat => same as , | ||||
18358 | |||||
18359 | # Example: we would not want to break at any of these .'s | ||||
18360 | # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>" | ||||
18361 | if ( $type ne ':' ) { | ||||
18362 | return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; | ||||
18363 | } | ||||
18364 | else { | ||||
18365 | return if ( $tok =~ /^[\,]$/ ); | ||||
18366 | } | ||||
18367 | } | ||||
18368 | return 1; | ||||
18369 | } | ||||
18370 | |||||
18371 | sub set_continuation_breaks { | ||||
18372 | |||||
18373 | # Define an array of indexes for inserting newline characters to | ||||
18374 | # keep the line lengths below the maximum desired length. There is | ||||
18375 | # an implied break after the last token, so it need not be included. | ||||
18376 | |||||
18377 | # Method: | ||||
18378 | # This routine is part of series of routines which adjust line | ||||
18379 | # lengths. It is only called if a statement is longer than the | ||||
18380 | # maximum line length, or if a preliminary scanning located | ||||
18381 | # desirable break points. Sub scan_list has already looked at | ||||
18382 | # these tokens and set breakpoints (in array | ||||
18383 | # $forced_breakpoint_to_go[$i]) where it wants breaks (for example | ||||
18384 | # after commas, after opening parens, and before closing parens). | ||||
18385 | # This routine will honor these breakpoints and also add additional | ||||
18386 | # breakpoints as necessary to keep the line length below the maximum | ||||
18387 | # requested. It bases its decision on where the 'bond strength' is | ||||
18388 | # lowest. | ||||
18389 | |||||
18390 | # Output: returns references to the arrays: | ||||
18391 | # @i_first | ||||
18392 | # @i_last | ||||
18393 | # which contain the indexes $i of the first and last tokens on each | ||||
18394 | # line. | ||||
18395 | |||||
18396 | # In addition, the array: | ||||
18397 | # $forced_breakpoint_to_go[$i] | ||||
18398 | # may be updated to be =1 for any index $i after which there must be | ||||
18399 | # a break. This signals later routines not to undo the breakpoint. | ||||
18400 | |||||
18401 | my $saw_good_break = shift; | ||||
18402 | my @i_first = (); # the first index to output | ||||
18403 | my @i_last = (); # the last index to output | ||||
18404 | my @i_colon_breaks = (); # needed to decide if we have to break at ?'s | ||||
18405 | if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } | ||||
18406 | |||||
18407 | set_bond_strengths(); | ||||
18408 | |||||
18409 | my $imin = 0; | ||||
18410 | my $imax = $max_index_to_go; | ||||
18411 | if ( $types_to_go[$imin] eq 'b' ) { $imin++ } | ||||
18412 | if ( $types_to_go[$imax] eq 'b' ) { $imax-- } | ||||
18413 | my $i_begin = $imin; # index for starting next iteration | ||||
18414 | |||||
18415 | my $leading_spaces = leading_spaces_to_go($imin); | ||||
18416 | my $line_count = 0; | ||||
18417 | my $last_break_strength = NO_BREAK; | ||||
18418 | my $i_last_break = -1; | ||||
18419 | my $max_bias = 0.001; | ||||
18420 | my $tiny_bias = 0.0001; | ||||
18421 | my $leading_alignment_token = ""; | ||||
18422 | my $leading_alignment_type = ""; | ||||
18423 | |||||
18424 | # see if any ?/:'s are in order | ||||
18425 | my $colons_in_order = 1; | ||||
18426 | my $last_tok = ""; | ||||
18427 | my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ]; | ||||
18428 | my $colon_count = @colon_list; | ||||
18429 | foreach (@colon_list) { | ||||
18430 | if ( $_ eq $last_tok ) { $colons_in_order = 0; last } | ||||
18431 | $last_tok = $_; | ||||
18432 | } | ||||
18433 | |||||
18434 | # This is a sufficient but not necessary condition for colon chain | ||||
18435 | my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); | ||||
18436 | |||||
18437 | #------------------------------------------------------- | ||||
18438 | # BEGINNING of main loop to set continuation breakpoints | ||||
18439 | # Keep iterating until we reach the end | ||||
18440 | #------------------------------------------------------- | ||||
18441 | while ( $i_begin <= $imax ) { | ||||
18442 | my $lowest_strength = NO_BREAK; | ||||
18443 | my $starting_sum = $summed_lengths_to_go[$i_begin]; | ||||
18444 | my $i_lowest = -1; | ||||
18445 | my $i_test = -1; | ||||
18446 | my $lowest_next_token = ''; | ||||
18447 | my $lowest_next_type = 'b'; | ||||
18448 | my $i_lowest_next_nonblank = -1; | ||||
18449 | |||||
18450 | #------------------------------------------------------- | ||||
18451 | # BEGINNING of inner loop to find the best next breakpoint | ||||
18452 | #------------------------------------------------------- | ||||
18453 | for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { | ||||
18454 | my $type = $types_to_go[$i_test]; | ||||
18455 | my $token = $tokens_to_go[$i_test]; | ||||
18456 | my $next_type = $types_to_go[ $i_test + 1 ]; | ||||
18457 | my $next_token = $tokens_to_go[ $i_test + 1 ]; | ||||
18458 | my $i_next_nonblank = $inext_to_go[$i_test]; | ||||
18459 | my $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
18460 | my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
18461 | my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; | ||||
18462 | my $strength = $bond_strength_to_go[$i_test]; | ||||
18463 | my $maximum_line_length = maximum_line_length($i_begin); | ||||
18464 | |||||
18465 | # use old breaks as a tie-breaker. For example to | ||||
18466 | # prevent blinkers with -pbp in this code: | ||||
18467 | |||||
18468 | ##@keywords{ | ||||
18469 | ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} | ||||
18470 | ## = (); | ||||
18471 | |||||
18472 | # At the same time try to prevent a leading * in this code | ||||
18473 | # with the default formatting: | ||||
18474 | # | ||||
18475 | ## return | ||||
18476 | ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) | ||||
18477 | ## * ( $x**( $a - 1 ) ) | ||||
18478 | ## * ( ( 1 - $x )**( $b - 1 ) ); | ||||
18479 | |||||
18480 | # reduce strength a bit to break ties at an old breakpoint ... | ||||
18481 | if ( | ||||
18482 | $old_breakpoint_to_go[$i_test] | ||||
18483 | |||||
18484 | # which is a 'good' breakpoint, meaning ... | ||||
18485 | # we don't want to break before it | ||||
18486 | && !$want_break_before{$type} | ||||
18487 | |||||
18488 | # and either we want to break before the next token | ||||
18489 | # or the next token is not short (i.e. not a '*', '/' etc.) | ||||
18490 | && $i_next_nonblank <= $imax | ||||
18491 | && ( $want_break_before{$next_nonblank_type} | ||||
18492 | || $token_lengths_to_go[$i_next_nonblank] > 2 | ||||
18493 | || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) | ||||
18494 | ) | ||||
18495 | { | ||||
18496 | $strength -= $tiny_bias; | ||||
18497 | } | ||||
18498 | |||||
18499 | # otherwise increase strength a bit if this token would be at the | ||||
18500 | # maximum line length. This is necessary to avoid blinking | ||||
18501 | # in the above example when the -iob flag is added. | ||||
18502 | else { | ||||
18503 | my $len = | ||||
18504 | $leading_spaces + | ||||
18505 | $summed_lengths_to_go[ $i_test + 1 ] - | ||||
18506 | $starting_sum; | ||||
18507 | if ( $len >= $maximum_line_length ) { | ||||
18508 | $strength += $tiny_bias; | ||||
18509 | } | ||||
18510 | } | ||||
18511 | |||||
18512 | my $must_break = 0; | ||||
18513 | |||||
18514 | # Force an immediate break at certain operators | ||||
18515 | # with lower level than the start of the line, | ||||
18516 | # unless we've already seen a better break. | ||||
18517 | # | ||||
18518 | ############################################## | ||||
18519 | # Note on an issue with a preceding ? | ||||
18520 | ############################################## | ||||
18521 | # We don't include a ? in the above list, but there may | ||||
18522 | # be a break at a previous ? if the line is long. | ||||
18523 | # Because of this we do not want to force a break if | ||||
18524 | # there is a previous ? on this line. For now the best way | ||||
18525 | # to do this is to not break if we have seen a lower strength | ||||
18526 | # point, which is probably a ?. | ||||
18527 | # | ||||
18528 | # Example of unwanted breaks we are avoiding at a '.' following a ? | ||||
18529 | # from pod2html using perltidy -gnu: | ||||
18530 | # ) | ||||
18531 | # ? "\n<A NAME=\"" | ||||
18532 | # . $value | ||||
18533 | # . "\">\n$text</A>\n" | ||||
18534 | # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; | ||||
18535 | if ( | ||||
18536 | ( | ||||
18537 | $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ | ||||
18538 | || ( $next_nonblank_type eq 'k' | ||||
18539 | && $next_nonblank_token =~ /^(and|or)$/ ) | ||||
18540 | ) | ||||
18541 | && ( $nesting_depth_to_go[$i_begin] > | ||||
18542 | $nesting_depth_to_go[$i_next_nonblank] ) | ||||
18543 | && ( $strength <= $lowest_strength ) | ||||
18544 | ) | ||||
18545 | { | ||||
18546 | set_forced_breakpoint($i_next_nonblank); | ||||
18547 | } | ||||
18548 | |||||
18549 | if ( | ||||
18550 | |||||
18551 | # Try to put a break where requested by scan_list | ||||
18552 | $forced_breakpoint_to_go[$i_test] | ||||
18553 | |||||
18554 | # break between ) { in a continued line so that the '{' can | ||||
18555 | # be outdented | ||||
18556 | # See similar logic in scan_list which catches instances | ||||
18557 | # where a line is just something like ') {'. We have to | ||||
18558 | # be careful because the corresponding block keyword might | ||||
18559 | # not be on the first line, such as 'for' here: | ||||
18560 | # | ||||
18561 | # eval { | ||||
18562 | # for ("a") { | ||||
18563 | # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } | ||||
18564 | # } | ||||
18565 | # }; | ||||
18566 | # | ||||
18567 | || ( $line_count | ||||
18568 | && ( $token eq ')' ) | ||||
18569 | && ( $next_nonblank_type eq '{' ) | ||||
18570 | && ($next_nonblank_block_type) | ||||
18571 | && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) | ||||
18572 | && !$rOpts->{'opening-brace-always-on-right'} ) | ||||
18573 | |||||
18574 | # There is an implied forced break at a terminal opening brace | ||||
18575 | || ( ( $type eq '{' ) && ( $i_test == $imax ) ) | ||||
18576 | ) | ||||
18577 | { | ||||
18578 | |||||
18579 | # Forced breakpoints must sometimes be overridden, for example | ||||
18580 | # because of a side comment causing a NO_BREAK. It is easier | ||||
18581 | # to catch this here than when they are set. | ||||
18582 | if ( $strength < NO_BREAK - 1 ) { | ||||
18583 | $strength = $lowest_strength - $tiny_bias; | ||||
18584 | $must_break = 1; | ||||
18585 | } | ||||
18586 | } | ||||
18587 | |||||
18588 | # quit if a break here would put a good terminal token on | ||||
18589 | # the next line and we already have a possible break | ||||
18590 | if ( | ||||
18591 | !$must_break | ||||
18592 | && ( $next_nonblank_type =~ /^[\;\,]$/ ) | ||||
18593 | && ( | ||||
18594 | ( | ||||
18595 | $leading_spaces + | ||||
18596 | $summed_lengths_to_go[ $i_next_nonblank + 1 ] - | ||||
18597 | $starting_sum | ||||
18598 | ) > $maximum_line_length | ||||
18599 | ) | ||||
18600 | ) | ||||
18601 | { | ||||
18602 | last if ( $i_lowest >= 0 ); | ||||
18603 | } | ||||
18604 | |||||
18605 | # Avoid a break which would strand a single punctuation | ||||
18606 | # token. For example, we do not want to strand a leading | ||||
18607 | # '.' which is followed by a long quoted string. | ||||
18608 | # But note that we do want to do this with -extrude (l=1) | ||||
18609 | # so please test any changes to this code on -extrude. | ||||
18610 | if ( | ||||
18611 | !$must_break | ||||
18612 | && ( $i_test == $i_begin ) | ||||
18613 | && ( $i_test < $imax ) | ||||
18614 | && ( $token eq $type ) | ||||
18615 | && ( | ||||
18616 | ( | ||||
18617 | $leading_spaces + | ||||
18618 | $summed_lengths_to_go[ $i_test + 1 ] - | ||||
18619 | $starting_sum | ||||
18620 | ) < $maximum_line_length | ||||
18621 | ) | ||||
18622 | ) | ||||
18623 | { | ||||
18624 | $i_test = min( $imax, $inext_to_go[$i_test] ); | ||||
18625 | redo; | ||||
18626 | } | ||||
18627 | |||||
18628 | if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) | ||||
18629 | { | ||||
18630 | |||||
18631 | # break at previous best break if it would have produced | ||||
18632 | # a leading alignment of certain common tokens, and it | ||||
18633 | # is different from the latest candidate break | ||||
18634 | last | ||||
18635 | if ($leading_alignment_type); | ||||
18636 | |||||
18637 | # Force at least one breakpoint if old code had good | ||||
18638 | # break It is only called if a breakpoint is required or | ||||
18639 | # desired. This will probably need some adjustments | ||||
18640 | # over time. A goal is to try to be sure that, if a new | ||||
18641 | # side comment is introduced into formatted text, then | ||||
18642 | # the same breakpoints will occur. scbreak.t | ||||
18643 | last | ||||
18644 | if ( | ||||
18645 | $i_test == $imax # we are at the end | ||||
18646 | && !$forced_breakpoint_count # | ||||
18647 | && $saw_good_break # old line had good break | ||||
18648 | && $type =~ /^[#;\{]$/ # and this line ends in | ||||
18649 | # ';' or side comment | ||||
18650 | && $i_last_break < 0 # and we haven't made a break | ||||
18651 | && $i_lowest >= 0 # and we saw a possible break | ||||
18652 | && $i_lowest < $imax - 1 # (but not just before this ;) | ||||
18653 | && $strength - $lowest_strength < 0.5 * WEAK # and it's good | ||||
18654 | ); | ||||
18655 | |||||
18656 | # Do not skip past an important break point in a short final | ||||
18657 | # segment. For example, without this check we would miss the | ||||
18658 | # break at the final / in the following code: | ||||
18659 | # | ||||
18660 | # $depth_stop = | ||||
18661 | # ( $tau * $mass_pellet * $q_0 * | ||||
18662 | # ( 1. - exp( -$t_stop / $tau ) ) - | ||||
18663 | # 4. * $pi * $factor * $k_ice * | ||||
18664 | # ( $t_melt - $t_ice ) * | ||||
18665 | # $r_pellet * | ||||
18666 | # $t_stop ) / | ||||
18667 | # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); | ||||
18668 | # | ||||
18669 | if ( $line_count > 2 | ||||
18670 | && $i_lowest < $i_test | ||||
18671 | && $i_test > $imax - 2 | ||||
18672 | && $nesting_depth_to_go[$i_begin] > | ||||
18673 | $nesting_depth_to_go[$i_lowest] | ||||
18674 | && $lowest_strength < $last_break_strength - .5 * WEAK ) | ||||
18675 | { | ||||
18676 | # Make this break for math operators for now | ||||
18677 | my $ir = $inext_to_go[$i_lowest]; | ||||
18678 | my $il = $iprev_to_go[$ir]; | ||||
18679 | last | ||||
18680 | if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ | ||||
18681 | || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); | ||||
18682 | } | ||||
18683 | |||||
18684 | # Update the minimum bond strength location | ||||
18685 | $lowest_strength = $strength; | ||||
18686 | $i_lowest = $i_test; | ||||
18687 | $lowest_next_token = $next_nonblank_token; | ||||
18688 | $lowest_next_type = $next_nonblank_type; | ||||
18689 | $i_lowest_next_nonblank = $i_next_nonblank; | ||||
18690 | last if $must_break; | ||||
18691 | |||||
18692 | # set flags to remember if a break here will produce a | ||||
18693 | # leading alignment of certain common tokens | ||||
18694 | if ( $line_count > 0 | ||||
18695 | && $i_test < $imax | ||||
18696 | && ( $lowest_strength - $last_break_strength <= $max_bias ) | ||||
18697 | ) | ||||
18698 | { | ||||
18699 | my $i_last_end = $iprev_to_go[$i_begin]; | ||||
18700 | my $tok_beg = $tokens_to_go[$i_begin]; | ||||
18701 | my $type_beg = $types_to_go[$i_begin]; | ||||
18702 | if ( | ||||
18703 | |||||
18704 | # check for leading alignment of certain tokens | ||||
18705 | ( | ||||
18706 | $tok_beg eq $next_nonblank_token | ||||
18707 | && $is_chain_operator{$tok_beg} | ||||
18708 | && ( $type_beg eq 'k' | ||||
18709 | || $type_beg eq $tok_beg ) | ||||
18710 | && $nesting_depth_to_go[$i_begin] >= | ||||
18711 | $nesting_depth_to_go[$i_next_nonblank] | ||||
18712 | ) | ||||
18713 | |||||
18714 | || ( $tokens_to_go[$i_last_end] eq $token | ||||
18715 | && $is_chain_operator{$token} | ||||
18716 | && ( $type eq 'k' || $type eq $token ) | ||||
18717 | && $nesting_depth_to_go[$i_last_end] >= | ||||
18718 | $nesting_depth_to_go[$i_test] ) | ||||
18719 | ) | ||||
18720 | { | ||||
18721 | $leading_alignment_token = $next_nonblank_token; | ||||
18722 | $leading_alignment_type = $next_nonblank_type; | ||||
18723 | } | ||||
18724 | } | ||||
18725 | } | ||||
18726 | |||||
18727 | my $too_long = ( $i_test >= $imax ); | ||||
18728 | if ( !$too_long ) { | ||||
18729 | my $next_length = | ||||
18730 | $leading_spaces + | ||||
18731 | $summed_lengths_to_go[ $i_test + 2 ] - | ||||
18732 | $starting_sum; | ||||
18733 | $too_long = $next_length > $maximum_line_length; | ||||
18734 | |||||
18735 | # To prevent blinkers we will avoid leaving a token exactly at | ||||
18736 | # the line length limit unless it is the last token or one of | ||||
18737 | # several "good" types. | ||||
18738 | # | ||||
18739 | # The following code was a blinker with -pbp before this | ||||
18740 | # modification: | ||||
18741 | ## $last_nonblank_token eq '(' | ||||
18742 | ## && $is_indirect_object_taker{ $paren_type | ||||
18743 | ## [$paren_depth] } | ||||
18744 | # The issue causing the problem is that if the | ||||
18745 | # term [$paren_depth] gets broken across a line then | ||||
18746 | # the whitespace routine doesn't see both opening and closing | ||||
18747 | # brackets and will format like '[ $paren_depth ]'. This | ||||
18748 | # leads to an oscillation in length depending if we break | ||||
18749 | # before the closing bracket or not. | ||||
18750 | if ( !$too_long | ||||
18751 | && $i_test + 1 < $imax | ||||
18752 | && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) | ||||
18753 | { | ||||
18754 | $too_long = $next_length >= $maximum_line_length; | ||||
18755 | } | ||||
18756 | } | ||||
18757 | |||||
18758 | FORMATTER_DEBUG_FLAG_BREAK | ||||
18759 | && do { | ||||
18760 | my $ltok = $token; | ||||
18761 | my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; | ||||
18762 | my $i_testp2 = $i_test + 2; | ||||
18763 | if ( $i_testp2 > $max_index_to_go + 1 ) { | ||||
18764 | $i_testp2 = $max_index_to_go + 1; | ||||
18765 | } | ||||
18766 | if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } | ||||
18767 | if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } | ||||
18768 | print STDOUT | ||||
18769 | "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; | ||||
18770 | }; | ||||
18771 | |||||
18772 | # allow one extra terminal token after exceeding line length | ||||
18773 | # if it would strand this token. | ||||
18774 | if ( $rOpts_fuzzy_line_length | ||||
18775 | && $too_long | ||||
18776 | && $i_lowest == $i_test | ||||
18777 | && $token_lengths_to_go[$i_test] > 1 | ||||
18778 | && $next_nonblank_type =~ /^[\;\,]$/ ) | ||||
18779 | { | ||||
18780 | $too_long = 0; | ||||
18781 | } | ||||
18782 | |||||
18783 | last | ||||
18784 | if ( | ||||
18785 | ( $i_test == $imax ) # we're done if no more tokens, | ||||
18786 | || ( | ||||
18787 | ( $i_lowest >= 0 ) # or no more space and we have a break | ||||
18788 | && $too_long | ||||
18789 | ) | ||||
18790 | ); | ||||
18791 | } | ||||
18792 | |||||
18793 | #------------------------------------------------------- | ||||
18794 | # END of inner loop to find the best next breakpoint | ||||
18795 | # Now decide exactly where to put the breakpoint | ||||
18796 | #------------------------------------------------------- | ||||
18797 | |||||
18798 | # it's always ok to break at imax if no other break was found | ||||
18799 | if ( $i_lowest < 0 ) { $i_lowest = $imax } | ||||
18800 | |||||
18801 | # semi-final index calculation | ||||
18802 | my $i_next_nonblank = $inext_to_go[$i_lowest]; | ||||
18803 | my $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
18804 | my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
18805 | |||||
18806 | #------------------------------------------------------- | ||||
18807 | # ?/: rule 1 : if a break here will separate a '?' on this | ||||
18808 | # line from its closing ':', then break at the '?' instead. | ||||
18809 | #------------------------------------------------------- | ||||
18810 | my $i; | ||||
18811 | foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { | ||||
18812 | next unless ( $tokens_to_go[$i] eq '?' ); | ||||
18813 | |||||
18814 | # do not break if probable sequence of ?/: statements | ||||
18815 | next if ($is_colon_chain); | ||||
18816 | |||||
18817 | # do not break if statement is broken by side comment | ||||
18818 | next | ||||
18819 | if ( | ||||
18820 | $tokens_to_go[$max_index_to_go] eq '#' | ||||
18821 | && terminal_type( \@types_to_go, \@block_type_to_go, 0, | ||||
18822 | $max_index_to_go ) !~ /^[\;\}]$/ | ||||
18823 | ); | ||||
18824 | |||||
18825 | # no break needed if matching : is also on the line | ||||
18826 | next | ||||
18827 | if ( $mate_index_to_go[$i] >= 0 | ||||
18828 | && $mate_index_to_go[$i] <= $i_next_nonblank ); | ||||
18829 | |||||
18830 | $i_lowest = $i; | ||||
18831 | if ( $want_break_before{'?'} ) { $i_lowest-- } | ||||
18832 | last; | ||||
18833 | } | ||||
18834 | |||||
18835 | #------------------------------------------------------- | ||||
18836 | # END of inner loop to find the best next breakpoint: | ||||
18837 | # Break the line after the token with index i=$i_lowest | ||||
18838 | #------------------------------------------------------- | ||||
18839 | |||||
18840 | # final index calculation | ||||
18841 | $i_next_nonblank = $inext_to_go[$i_lowest]; | ||||
18842 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
18843 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
18844 | |||||
18845 | FORMATTER_DEBUG_FLAG_BREAK | ||||
18846 | && print STDOUT | ||||
18847 | "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; | ||||
18848 | |||||
18849 | #------------------------------------------------------- | ||||
18850 | # ?/: rule 2 : if we break at a '?', then break at its ':' | ||||
18851 | # | ||||
18852 | # Note: this rule is also in sub scan_list to handle a break | ||||
18853 | # at the start and end of a line (in case breaks are dictated | ||||
18854 | # by side comments). | ||||
18855 | #------------------------------------------------------- | ||||
18856 | if ( $next_nonblank_type eq '?' ) { | ||||
18857 | set_closing_breakpoint($i_next_nonblank); | ||||
18858 | } | ||||
18859 | elsif ( $types_to_go[$i_lowest] eq '?' ) { | ||||
18860 | set_closing_breakpoint($i_lowest); | ||||
18861 | } | ||||
18862 | |||||
18863 | #------------------------------------------------------- | ||||
18864 | # ?/: rule 3 : if we break at a ':' then we save | ||||
18865 | # its location for further work below. We may need to go | ||||
18866 | # back and break at its '?'. | ||||
18867 | #------------------------------------------------------- | ||||
18868 | if ( $next_nonblank_type eq ':' ) { | ||||
18869 | push @i_colon_breaks, $i_next_nonblank; | ||||
18870 | } | ||||
18871 | elsif ( $types_to_go[$i_lowest] eq ':' ) { | ||||
18872 | push @i_colon_breaks, $i_lowest; | ||||
18873 | } | ||||
18874 | |||||
18875 | # here we should set breaks for all '?'/':' pairs which are | ||||
18876 | # separated by this line | ||||
18877 | |||||
18878 | $line_count++; | ||||
18879 | |||||
18880 | # save this line segment, after trimming blanks at the ends | ||||
18881 | push( @i_first, | ||||
18882 | ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); | ||||
18883 | push( @i_last, | ||||
18884 | ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); | ||||
18885 | |||||
18886 | # set a forced breakpoint at a container opening, if necessary, to | ||||
18887 | # signal a break at a closing container. Excepting '(' for now. | ||||
18888 | if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ | ||||
18889 | && !$forced_breakpoint_to_go[$i_lowest] ) | ||||
18890 | { | ||||
18891 | set_closing_breakpoint($i_lowest); | ||||
18892 | } | ||||
18893 | |||||
18894 | # get ready to go again | ||||
18895 | $i_begin = $i_lowest + 1; | ||||
18896 | $last_break_strength = $lowest_strength; | ||||
18897 | $i_last_break = $i_lowest; | ||||
18898 | $leading_alignment_token = ""; | ||||
18899 | $leading_alignment_type = ""; | ||||
18900 | $lowest_next_token = ''; | ||||
18901 | $lowest_next_type = 'b'; | ||||
18902 | |||||
18903 | if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { | ||||
18904 | $i_begin++; | ||||
18905 | } | ||||
18906 | |||||
18907 | # update indentation size | ||||
18908 | if ( $i_begin <= $imax ) { | ||||
18909 | $leading_spaces = leading_spaces_to_go($i_begin); | ||||
18910 | } | ||||
18911 | } | ||||
18912 | |||||
18913 | #------------------------------------------------------- | ||||
18914 | # END of main loop to set continuation breakpoints | ||||
18915 | # Now go back and make any necessary corrections | ||||
18916 | #------------------------------------------------------- | ||||
18917 | |||||
18918 | #------------------------------------------------------- | ||||
18919 | # ?/: rule 4 -- if we broke at a ':', then break at | ||||
18920 | # corresponding '?' unless this is a chain of ?: expressions | ||||
18921 | #------------------------------------------------------- | ||||
18922 | if (@i_colon_breaks) { | ||||
18923 | |||||
18924 | # using a simple method for deciding if we are in a ?/: chain -- | ||||
18925 | # this is a chain if it has multiple ?/: pairs all in order; | ||||
18926 | # otherwise not. | ||||
18927 | # Note that if line starts in a ':' we count that above as a break | ||||
18928 | my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); | ||||
18929 | |||||
18930 | unless ($is_chain) { | ||||
18931 | my @insert_list = (); | ||||
18932 | foreach (@i_colon_breaks) { | ||||
18933 | my $i_question = $mate_index_to_go[$_]; | ||||
18934 | if ( $i_question >= 0 ) { | ||||
18935 | if ( $want_break_before{'?'} ) { | ||||
18936 | $i_question = $iprev_to_go[$i_question]; | ||||
18937 | } | ||||
18938 | |||||
18939 | if ( $i_question >= 0 ) { | ||||
18940 | push @insert_list, $i_question; | ||||
18941 | } | ||||
18942 | } | ||||
18943 | insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); | ||||
18944 | } | ||||
18945 | } | ||||
18946 | } | ||||
18947 | return ( \@i_first, \@i_last, $colon_count ); | ||||
18948 | } | ||||
18949 | |||||
18950 | sub insert_additional_breaks { | ||||
18951 | |||||
18952 | # this routine will add line breaks at requested locations after | ||||
18953 | # sub set_continuation_breaks has made preliminary breaks. | ||||
18954 | |||||
18955 | my ( $ri_break_list, $ri_first, $ri_last ) = @_; | ||||
18956 | my $i_f; | ||||
18957 | my $i_l; | ||||
18958 | my $line_number = 0; | ||||
18959 | my $i_break_left; | ||||
18960 | foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) { | ||||
18961 | |||||
18962 | $i_f = $$ri_first[$line_number]; | ||||
18963 | $i_l = $$ri_last[$line_number]; | ||||
18964 | while ( $i_break_left >= $i_l ) { | ||||
18965 | $line_number++; | ||||
18966 | |||||
18967 | # shouldn't happen unless caller passes bad indexes | ||||
18968 | if ( $line_number >= @$ri_last ) { | ||||
18969 | warning( | ||||
18970 | "Non-fatal program bug: couldn't set break at $i_break_left\n" | ||||
18971 | ); | ||||
18972 | report_definite_bug(); | ||||
18973 | return; | ||||
18974 | } | ||||
18975 | $i_f = $$ri_first[$line_number]; | ||||
18976 | $i_l = $$ri_last[$line_number]; | ||||
18977 | } | ||||
18978 | |||||
18979 | # Do not leave a blank at the end of a line; back up if necessary | ||||
18980 | if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } | ||||
18981 | |||||
18982 | my $i_break_right = $inext_to_go[$i_break_left]; | ||||
18983 | if ( $i_break_left >= $i_f | ||||
18984 | && $i_break_left < $i_l | ||||
18985 | && $i_break_right > $i_f | ||||
18986 | && $i_break_right <= $i_l ) | ||||
18987 | { | ||||
18988 | splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); | ||||
18989 | splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); | ||||
18990 | } | ||||
18991 | } | ||||
18992 | } | ||||
18993 | |||||
18994 | sub set_closing_breakpoint { | ||||
18995 | |||||
18996 | # set a breakpoint at a matching closing token | ||||
18997 | # at present, this is only used to break at a ':' which matches a '?' | ||||
18998 | my $i_break = shift; | ||||
18999 | |||||
19000 | if ( $mate_index_to_go[$i_break] >= 0 ) { | ||||
19001 | |||||
19002 | # CAUTION: infinite recursion possible here: | ||||
19003 | # set_closing_breakpoint calls set_forced_breakpoint, and | ||||
19004 | # set_forced_breakpoint call set_closing_breakpoint | ||||
19005 | # ( test files attrib.t, BasicLyx.pm.html). | ||||
19006 | # Don't reduce the '2' in the statement below | ||||
19007 | if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { | ||||
19008 | |||||
19009 | # break before } ] and ), but sub set_forced_breakpoint will decide | ||||
19010 | # to break before or after a ? and : | ||||
19011 | my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; | ||||
19012 | set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); | ||||
19013 | } | ||||
19014 | } | ||||
19015 | else { | ||||
19016 | my $type_sequence = $type_sequence_to_go[$i_break]; | ||||
19017 | if ($type_sequence) { | ||||
19018 | my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; | ||||
19019 | $postponed_breakpoint{$type_sequence} = 1; | ||||
19020 | } | ||||
19021 | } | ||||
19022 | } | ||||
19023 | |||||
19024 | sub compare_indentation_levels { | ||||
19025 | |||||
19026 | # check to see if output line tabbing agrees with input line | ||||
19027 | # this can be very useful for debugging a script which has an extra | ||||
19028 | # or missing brace | ||||
19029 | my ( $guessed_indentation_level, $structural_indentation_level ) = @_; | ||||
19030 | if ( $guessed_indentation_level ne $structural_indentation_level ) { | ||||
19031 | $last_tabbing_disagreement = $input_line_number; | ||||
19032 | |||||
19033 | if ($in_tabbing_disagreement) { | ||||
19034 | } | ||||
19035 | else { | ||||
19036 | $tabbing_disagreement_count++; | ||||
19037 | |||||
19038 | if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { | ||||
19039 | write_logfile_entry( | ||||
19040 | "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" | ||||
19041 | ); | ||||
19042 | } | ||||
19043 | $in_tabbing_disagreement = $input_line_number; | ||||
19044 | $first_tabbing_disagreement = $in_tabbing_disagreement | ||||
19045 | unless ($first_tabbing_disagreement); | ||||
19046 | } | ||||
19047 | } | ||||
19048 | else { | ||||
19049 | |||||
19050 | if ($in_tabbing_disagreement) { | ||||
19051 | |||||
19052 | if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { | ||||
19053 | write_logfile_entry( | ||||
19054 | "End indentation disagreement from input line $in_tabbing_disagreement\n" | ||||
19055 | ); | ||||
19056 | |||||
19057 | if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { | ||||
19058 | write_logfile_entry( | ||||
19059 | "No further tabbing disagreements will be noted\n"); | ||||
19060 | } | ||||
19061 | } | ||||
19062 | $in_tabbing_disagreement = 0; | ||||
19063 | } | ||||
19064 | } | ||||
19065 | } | ||||
19066 | |||||
19067 | ##################################################################### | ||||
19068 | # | ||||
19069 | # the Perl::Tidy::IndentationItem class supplies items which contain | ||||
19070 | # how much whitespace should be used at the start of a line | ||||
19071 | # | ||||
19072 | ##################################################################### | ||||
19073 | |||||
19074 | package Perl::Tidy::IndentationItem; | ||||
19075 | |||||
19076 | # Indexes for indentation items | ||||
19077 | 2 | 28µs | 2 | 133µs | # spent 72µs (11+61) within Perl::Tidy::IndentationItem::BEGIN@19077 which was called:
# once (11µs+61µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19077 # spent 72µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19077
# spent 61µs making 1 call to constant::import |
19078 | 2 | 23µs | 2 | 72µs | # spent 39µs (7+33) within Perl::Tidy::IndentationItem::BEGIN@19078 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19078 # spent 39µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19078
# spent 32µs making 1 call to constant::import |
19079 | 2 | 22µs | 2 | 67µs | # spent 37µs (7+30) within Perl::Tidy::IndentationItem::BEGIN@19079 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19079 # spent 37µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19079
# spent 30µs making 1 call to constant::import |
19080 | 2 | 21µs | 2 | 74µs | # spent 40µs (6+34) within Perl::Tidy::IndentationItem::BEGIN@19080 which was called:
# once (6µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19080 # spent 40µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19080
# spent 34µs making 1 call to constant::import |
19081 | # for this level | ||||
19082 | 2 | 24µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19082 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19082 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19082
# spent 29µs making 1 call to constant::import |
19083 | 2 | 21µs | 2 | 65µs | # spent 36µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19083 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19083 # spent 36µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19083
# spent 29µs making 1 call to constant::import |
19084 | 2 | 20µs | 2 | 65µs | # spent 36µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19084 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19084 # spent 36µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19084
# spent 29µs making 1 call to constant::import |
19085 | 2 | 20µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19085 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19085 # spent 34µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19085
# spent 28µs making 1 call to constant::import |
19086 | 2 | 20µs | 2 | 63µs | # spent 35µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19086 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19086 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19086
# spent 28µs making 1 call to constant::import |
19087 | 2 | 35µs | 2 | 90µs | # spent 48µs (6+42) within Perl::Tidy::IndentationItem::BEGIN@19087 which was called:
# once (6µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19087 # spent 48µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19087
# spent 42µs making 1 call to constant::import |
19088 | # we would like to move to get | ||||
19089 | # alignment (negative if left) | ||||
19090 | 2 | 31µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::IndentationItem::BEGIN@19090 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19090 # spent 36µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19090
# spent 30µs making 1 call to constant::import |
19091 | # with an opening structure? | ||||
19092 | 2 | 29µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19092 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19092 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19092
# spent 29µs making 1 call to constant::import |
19093 | 2 | 19µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19093 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19093 # spent 34µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19093
# spent 28µs making 1 call to constant::import |
19094 | 2 | 23µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19094 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19094 # spent 34µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19094
# spent 28µs making 1 call to constant::import |
19095 | 2 | 926µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19095 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19095 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19095
# spent 29µs making 1 call to constant::import |
19096 | |||||
19097 | sub new { | ||||
19098 | |||||
19099 | # Create an 'indentation_item' which describes one level of leading | ||||
19100 | # whitespace when the '-lp' indentation is used. We return | ||||
19101 | # a reference to an anonymous array of associated variables. | ||||
19102 | # See above constants for storage scheme. | ||||
19103 | my ( | ||||
19104 | $class, $spaces, $level, | ||||
19105 | $ci_level, $available_spaces, $index, | ||||
19106 | $gnu_sequence_number, $align_paren, $stack_depth, | ||||
19107 | $starting_index, | ||||
19108 | ) = @_; | ||||
19109 | my $closed = -1; | ||||
19110 | my $arrow_count = 0; | ||||
19111 | my $comma_count = 0; | ||||
19112 | my $have_child = 0; | ||||
19113 | my $want_right_spaces = 0; | ||||
19114 | my $marked = 0; | ||||
19115 | bless [ | ||||
19116 | $spaces, $level, $ci_level, | ||||
19117 | $available_spaces, $closed, $comma_count, | ||||
19118 | $gnu_sequence_number, $index, $have_child, | ||||
19119 | $want_right_spaces, $align_paren, $marked, | ||||
19120 | $stack_depth, $starting_index, $arrow_count, | ||||
19121 | ], $class; | ||||
19122 | } | ||||
19123 | |||||
19124 | sub permanently_decrease_AVAILABLE_SPACES { | ||||
19125 | |||||
19126 | # make a permanent reduction in the available indentation spaces | ||||
19127 | # at one indentation item. NOTE: if there are child nodes, their | ||||
19128 | # total SPACES must be reduced by the caller. | ||||
19129 | |||||
19130 | my ( $item, $spaces_needed ) = @_; | ||||
19131 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
19132 | my $deleted_spaces = | ||||
19133 | ( $available_spaces > $spaces_needed ) | ||||
19134 | ? $spaces_needed | ||||
19135 | : $available_spaces; | ||||
19136 | $item->decrease_AVAILABLE_SPACES($deleted_spaces); | ||||
19137 | $item->decrease_SPACES($deleted_spaces); | ||||
19138 | $item->set_RECOVERABLE_SPACES(0); | ||||
19139 | |||||
19140 | return $deleted_spaces; | ||||
19141 | } | ||||
19142 | |||||
19143 | sub tentatively_decrease_AVAILABLE_SPACES { | ||||
19144 | |||||
19145 | # We are asked to tentatively delete $spaces_needed of indentation | ||||
19146 | # for a indentation item. We may want to undo this later. NOTE: if | ||||
19147 | # there are child nodes, their total SPACES must be reduced by the | ||||
19148 | # caller. | ||||
19149 | my ( $item, $spaces_needed ) = @_; | ||||
19150 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
19151 | my $deleted_spaces = | ||||
19152 | ( $available_spaces > $spaces_needed ) | ||||
19153 | ? $spaces_needed | ||||
19154 | : $available_spaces; | ||||
19155 | $item->decrease_AVAILABLE_SPACES($deleted_spaces); | ||||
19156 | $item->decrease_SPACES($deleted_spaces); | ||||
19157 | $item->increase_RECOVERABLE_SPACES($deleted_spaces); | ||||
19158 | return $deleted_spaces; | ||||
19159 | } | ||||
19160 | |||||
19161 | sub get_STACK_DEPTH { | ||||
19162 | my $self = shift; | ||||
19163 | return $self->[STACK_DEPTH]; | ||||
19164 | } | ||||
19165 | |||||
19166 | sub get_SPACES { | ||||
19167 | my $self = shift; | ||||
19168 | return $self->[SPACES]; | ||||
19169 | } | ||||
19170 | |||||
19171 | sub get_MARKED { | ||||
19172 | my $self = shift; | ||||
19173 | return $self->[MARKED]; | ||||
19174 | } | ||||
19175 | |||||
19176 | sub set_MARKED { | ||||
19177 | my ( $self, $value ) = @_; | ||||
19178 | if ( defined($value) ) { | ||||
19179 | $self->[MARKED] = $value; | ||||
19180 | } | ||||
19181 | return $self->[MARKED]; | ||||
19182 | } | ||||
19183 | |||||
19184 | sub get_AVAILABLE_SPACES { | ||||
19185 | my $self = shift; | ||||
19186 | return $self->[AVAILABLE_SPACES]; | ||||
19187 | } | ||||
19188 | |||||
19189 | sub decrease_SPACES { | ||||
19190 | my ( $self, $value ) = @_; | ||||
19191 | if ( defined($value) ) { | ||||
19192 | $self->[SPACES] -= $value; | ||||
19193 | } | ||||
19194 | return $self->[SPACES]; | ||||
19195 | } | ||||
19196 | |||||
19197 | sub decrease_AVAILABLE_SPACES { | ||||
19198 | my ( $self, $value ) = @_; | ||||
19199 | if ( defined($value) ) { | ||||
19200 | $self->[AVAILABLE_SPACES] -= $value; | ||||
19201 | } | ||||
19202 | return $self->[AVAILABLE_SPACES]; | ||||
19203 | } | ||||
19204 | |||||
19205 | sub get_ALIGN_PAREN { | ||||
19206 | my $self = shift; | ||||
19207 | return $self->[ALIGN_PAREN]; | ||||
19208 | } | ||||
19209 | |||||
19210 | sub get_RECOVERABLE_SPACES { | ||||
19211 | my $self = shift; | ||||
19212 | return $self->[RECOVERABLE_SPACES]; | ||||
19213 | } | ||||
19214 | |||||
19215 | sub set_RECOVERABLE_SPACES { | ||||
19216 | my ( $self, $value ) = @_; | ||||
19217 | if ( defined($value) ) { | ||||
19218 | $self->[RECOVERABLE_SPACES] = $value; | ||||
19219 | } | ||||
19220 | return $self->[RECOVERABLE_SPACES]; | ||||
19221 | } | ||||
19222 | |||||
19223 | sub increase_RECOVERABLE_SPACES { | ||||
19224 | my ( $self, $value ) = @_; | ||||
19225 | if ( defined($value) ) { | ||||
19226 | $self->[RECOVERABLE_SPACES] += $value; | ||||
19227 | } | ||||
19228 | return $self->[RECOVERABLE_SPACES]; | ||||
19229 | } | ||||
19230 | |||||
19231 | sub get_CI_LEVEL { | ||||
19232 | my $self = shift; | ||||
19233 | return $self->[CI_LEVEL]; | ||||
19234 | } | ||||
19235 | |||||
19236 | sub get_LEVEL { | ||||
19237 | my $self = shift; | ||||
19238 | return $self->[LEVEL]; | ||||
19239 | } | ||||
19240 | |||||
19241 | sub get_SEQUENCE_NUMBER { | ||||
19242 | my $self = shift; | ||||
19243 | return $self->[SEQUENCE_NUMBER]; | ||||
19244 | } | ||||
19245 | |||||
19246 | sub get_INDEX { | ||||
19247 | my $self = shift; | ||||
19248 | return $self->[INDEX]; | ||||
19249 | } | ||||
19250 | |||||
19251 | sub get_STARTING_INDEX { | ||||
19252 | my $self = shift; | ||||
19253 | return $self->[STARTING_INDEX]; | ||||
19254 | } | ||||
19255 | |||||
19256 | sub set_HAVE_CHILD { | ||||
19257 | my ( $self, $value ) = @_; | ||||
19258 | if ( defined($value) ) { | ||||
19259 | $self->[HAVE_CHILD] = $value; | ||||
19260 | } | ||||
19261 | return $self->[HAVE_CHILD]; | ||||
19262 | } | ||||
19263 | |||||
19264 | sub get_HAVE_CHILD { | ||||
19265 | my $self = shift; | ||||
19266 | return $self->[HAVE_CHILD]; | ||||
19267 | } | ||||
19268 | |||||
19269 | sub set_ARROW_COUNT { | ||||
19270 | my ( $self, $value ) = @_; | ||||
19271 | if ( defined($value) ) { | ||||
19272 | $self->[ARROW_COUNT] = $value; | ||||
19273 | } | ||||
19274 | return $self->[ARROW_COUNT]; | ||||
19275 | } | ||||
19276 | |||||
19277 | sub get_ARROW_COUNT { | ||||
19278 | my $self = shift; | ||||
19279 | return $self->[ARROW_COUNT]; | ||||
19280 | } | ||||
19281 | |||||
19282 | sub set_COMMA_COUNT { | ||||
19283 | my ( $self, $value ) = @_; | ||||
19284 | if ( defined($value) ) { | ||||
19285 | $self->[COMMA_COUNT] = $value; | ||||
19286 | } | ||||
19287 | return $self->[COMMA_COUNT]; | ||||
19288 | } | ||||
19289 | |||||
19290 | sub get_COMMA_COUNT { | ||||
19291 | my $self = shift; | ||||
19292 | return $self->[COMMA_COUNT]; | ||||
19293 | } | ||||
19294 | |||||
19295 | sub set_CLOSED { | ||||
19296 | my ( $self, $value ) = @_; | ||||
19297 | if ( defined($value) ) { | ||||
19298 | $self->[CLOSED] = $value; | ||||
19299 | } | ||||
19300 | return $self->[CLOSED]; | ||||
19301 | } | ||||
19302 | |||||
19303 | sub get_CLOSED { | ||||
19304 | my $self = shift; | ||||
19305 | return $self->[CLOSED]; | ||||
19306 | } | ||||
19307 | |||||
19308 | ##################################################################### | ||||
19309 | # | ||||
19310 | # the Perl::Tidy::VerticalAligner::Line class supplies an object to | ||||
19311 | # contain a single output line | ||||
19312 | # | ||||
19313 | ##################################################################### | ||||
19314 | |||||
19315 | package Perl::Tidy::VerticalAligner::Line; | ||||
19316 | |||||
19317 | { | ||||
19318 | |||||
19319 | 3 | 25µs | 2 | 48µs | # spent 29µs (11+19) within Perl::Tidy::VerticalAligner::Line::BEGIN@19319 which was called:
# once (11µs+19µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19319 # spent 29µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19319
# spent 19µs making 1 call to strict::import |
19320 | 2 | 26µs | 2 | 111µs | # spent 61µs (10+50) within Perl::Tidy::VerticalAligner::Line::BEGIN@19320 which was called:
# once (10µs+50µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19320 # spent 61µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19320
# spent 50µs making 1 call to Exporter::import |
19321 | |||||
19322 | 2 | 24µs | 2 | 82µs | # spent 46µs (11+35) within Perl::Tidy::VerticalAligner::Line::BEGIN@19322 which was called:
# once (11µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19322 # spent 46µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19322
# spent 35µs making 1 call to constant::import |
19323 | 2 | 29µs | 2 | 74µs | # spent 40µs (7+33) within Perl::Tidy::VerticalAligner::Line::BEGIN@19323 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19323 # spent 40µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19323
# spent 33µs making 1 call to constant::import |
19324 | 2 | 22µs | 2 | 71µs | # spent 39µs (7+32) within Perl::Tidy::VerticalAligner::Line::BEGIN@19324 which was called:
# once (7µs+32µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19324 # spent 39µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19324
# spent 32µs making 1 call to constant::import |
19325 | 2 | 25µs | 2 | 71µs | # spent 39µs (6+32) within Perl::Tidy::VerticalAligner::Line::BEGIN@19325 which was called:
# once (6µs+32µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19325 # spent 39µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19325
# spent 32µs making 1 call to constant::import |
19326 | 2 | 21µs | 2 | 70µs | # spent 38µs (7+32) within Perl::Tidy::VerticalAligner::Line::BEGIN@19326 which was called:
# once (7µs+32µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19326 # spent 38µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19326
# spent 32µs making 1 call to constant::import |
19327 | 2 | 20µs | 2 | 84µs | # spent 46µs (6+39) within Perl::Tidy::VerticalAligner::Line::BEGIN@19327 which was called:
# once (6µs+39µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19327 # spent 46µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19327
# spent 39µs making 1 call to constant::import |
19328 | 2 | 26µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19328 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19328 # spent 35µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19328
# spent 29µs making 1 call to constant::import |
19329 | 2 | 20µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19329 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19329 # spent 35µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19329
# spent 29µs making 1 call to constant::import |
19330 | 2 | 20µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::VerticalAligner::Line::BEGIN@19330 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19330 # spent 34µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19330
# spent 28µs making 1 call to constant::import |
19331 | 2 | 19µs | 2 | 63µs | # spent 35µs (6+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19331 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19331 # spent 35µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19331
# spent 29µs making 1 call to constant::import |
19332 | 2 | 19µs | 2 | 61µs | # spent 34µs (6+28) within Perl::Tidy::VerticalAligner::Line::BEGIN@19332 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19332 # spent 34µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19332
# spent 28µs making 1 call to constant::import |
19333 | 2 | 20µs | 2 | 71µs | # spent 39µs (6+33) within Perl::Tidy::VerticalAligner::Line::BEGIN@19333 which was called:
# once (6µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19333 # spent 39µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19333
# spent 33µs making 1 call to constant::import |
19334 | 2 | 271µs | 2 | 67µs | # spent 38µs (9+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19334 which was called:
# once (9µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19334 # spent 38µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19334
# spent 29µs making 1 call to constant::import |
19335 | |||||
19336 | 1 | 0s | my %_index_map; | ||
19337 | 1 | 600ns | $_index_map{jmax} = JMAX; | ||
19338 | 1 | 200ns | $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; | ||
19339 | 1 | 200ns | $_index_map{rtokens} = RTOKENS; | ||
19340 | 1 | 200ns | $_index_map{rfields} = RFIELDS; | ||
19341 | 1 | 200ns | $_index_map{rpatterns} = RPATTERNS; | ||
19342 | 1 | 100ns | $_index_map{indentation} = INDENTATION; | ||
19343 | 1 | 200ns | $_index_map{leading_space_count} = LEADING_SPACE_COUNT; | ||
19344 | 1 | 500ns | $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; | ||
19345 | 1 | 200ns | $_index_map{list_type} = LIST_TYPE; | ||
19346 | 1 | 100ns | $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; | ||
19347 | 1 | 100ns | $_index_map{ralignments} = RALIGNMENTS; | ||
19348 | 1 | 100ns | $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; | ||
19349 | 1 | 100ns | $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; | ||
19350 | |||||
19351 | 1 | 400ns | my @_default_data = (); | ||
19352 | 1 | 300ns | $_default_data[JMAX] = undef; | ||
19353 | 1 | 0s | $_default_data[JMAX_ORIGINAL_LINE] = undef; | ||
19354 | 1 | 0s | $_default_data[RTOKENS] = undef; | ||
19355 | 1 | 0s | $_default_data[RFIELDS] = undef; | ||
19356 | 1 | 400ns | $_default_data[RPATTERNS] = undef; | ||
19357 | 1 | 100ns | $_default_data[INDENTATION] = undef; | ||
19358 | 1 | 200ns | $_default_data[LEADING_SPACE_COUNT] = undef; | ||
19359 | 1 | 0s | $_default_data[OUTDENT_LONG_LINES] = undef; | ||
19360 | 1 | 300ns | $_default_data[LIST_TYPE] = undef; | ||
19361 | 1 | 100ns | $_default_data[IS_HANGING_SIDE_COMMENT] = undef; | ||
19362 | 1 | 500ns | $_default_data[RALIGNMENTS] = []; | ||
19363 | 1 | 100ns | $_default_data[MAXIMUM_LINE_LENGTH] = undef; | ||
19364 | 1 | 200ns | $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; | ||
19365 | |||||
19366 | { | ||||
19367 | |||||
19368 | # methods to count object population | ||||
19369 | 2 | 400ns | my $_count = 0; | ||
19370 | sub get_count { $_count; } | ||||
19371 | sub _increment_count { ++$_count } | ||||
19372 | sub _decrement_count { --$_count } | ||||
19373 | } | ||||
19374 | |||||
19375 | # Constructor may be called as a class method | ||||
19376 | sub new { | ||||
19377 | my ( $caller, %arg ) = @_; | ||||
19378 | my $caller_is_obj = ref($caller); | ||||
19379 | my $class = $caller_is_obj || $caller; | ||||
19380 | 2 | 885µs | 2 | 30µs | # spent 19µs (8+11) within Perl::Tidy::VerticalAligner::Line::BEGIN@19380 which was called:
# once (8µs+11µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19380 # spent 19µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19380
# spent 11µs making 1 call to strict::unimport |
19381 | my $self = bless [], $class; | ||||
19382 | |||||
19383 | $self->[RALIGNMENTS] = []; | ||||
19384 | |||||
19385 | my $index; | ||||
19386 | foreach ( keys %_index_map ) { | ||||
19387 | $index = $_index_map{$_}; | ||||
19388 | if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } | ||||
19389 | elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } | ||||
19390 | else { $self->[$index] = $_default_data[$index] } | ||||
19391 | } | ||||
19392 | |||||
19393 | $self->_increment_count(); | ||||
19394 | return $self; | ||||
19395 | } | ||||
19396 | |||||
19397 | sub DESTROY { | ||||
19398 | $_[0]->_decrement_count(); | ||||
19399 | } | ||||
19400 | |||||
19401 | sub get_jmax { $_[0]->[JMAX] } | ||||
19402 | sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } | ||||
19403 | sub get_rtokens { $_[0]->[RTOKENS] } | ||||
19404 | sub get_rfields { $_[0]->[RFIELDS] } | ||||
19405 | sub get_rpatterns { $_[0]->[RPATTERNS] } | ||||
19406 | sub get_indentation { $_[0]->[INDENTATION] } | ||||
19407 | sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } | ||||
19408 | sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } | ||||
19409 | sub get_list_type { $_[0]->[LIST_TYPE] } | ||||
19410 | sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } | ||||
19411 | sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } | ||||
19412 | |||||
19413 | sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } | ||||
19414 | sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } | ||||
19415 | sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } | ||||
19416 | sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } | ||||
19417 | |||||
19418 | sub get_starting_column { | ||||
19419 | $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); | ||||
19420 | } | ||||
19421 | |||||
19422 | sub increment_column { | ||||
19423 | $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); | ||||
19424 | } | ||||
19425 | sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } | ||||
19426 | |||||
19427 | sub current_field_width { | ||||
19428 | my $self = shift; | ||||
19429 | my ($j) = @_; | ||||
19430 | if ( $j == 0 ) { | ||||
19431 | return $self->get_column($j); | ||||
19432 | } | ||||
19433 | else { | ||||
19434 | return $self->get_column($j) - $self->get_column( $j - 1 ); | ||||
19435 | } | ||||
19436 | } | ||||
19437 | |||||
19438 | sub field_width_growth { | ||||
19439 | my $self = shift; | ||||
19440 | my $j = shift; | ||||
19441 | return $self->get_column($j) - $self->get_starting_column($j); | ||||
19442 | } | ||||
19443 | |||||
19444 | sub starting_field_width { | ||||
19445 | my $self = shift; | ||||
19446 | my $j = shift; | ||||
19447 | if ( $j == 0 ) { | ||||
19448 | return $self->get_starting_column($j); | ||||
19449 | } | ||||
19450 | else { | ||||
19451 | return $self->get_starting_column($j) - | ||||
19452 | $self->get_starting_column( $j - 1 ); | ||||
19453 | } | ||||
19454 | } | ||||
19455 | |||||
19456 | sub increase_field_width { | ||||
19457 | |||||
19458 | my $self = shift; | ||||
19459 | my ( $j, $pad ) = @_; | ||||
19460 | my $jmax = $self->get_jmax(); | ||||
19461 | for my $k ( $j .. $jmax ) { | ||||
19462 | $self->increment_column( $k, $pad ); | ||||
19463 | } | ||||
19464 | } | ||||
19465 | |||||
19466 | sub get_available_space_on_right { | ||||
19467 | my $self = shift; | ||||
19468 | my $jmax = $self->get_jmax(); | ||||
19469 | return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); | ||||
19470 | } | ||||
19471 | |||||
19472 | sub set_jmax { $_[0]->[JMAX] = $_[1] } | ||||
19473 | sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } | ||||
19474 | sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } | ||||
19475 | sub set_rfields { $_[0]->[RFIELDS] = $_[1] } | ||||
19476 | sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } | ||||
19477 | sub set_indentation { $_[0]->[INDENTATION] = $_[1] } | ||||
19478 | sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } | ||||
19479 | sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } | ||||
19480 | sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } | ||||
19481 | sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } | ||||
19482 | sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } | ||||
19483 | |||||
19484 | } | ||||
19485 | |||||
19486 | ##################################################################### | ||||
19487 | # | ||||
19488 | # the Perl::Tidy::VerticalAligner::Alignment class holds information | ||||
19489 | # on a single column being aligned | ||||
19490 | # | ||||
19491 | ##################################################################### | ||||
19492 | package Perl::Tidy::VerticalAligner::Alignment; | ||||
19493 | |||||
19494 | { | ||||
19495 | |||||
19496 | 3 | 24µs | 2 | 34µs | # spent 22µs (10+12) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19496 which was called:
# once (10µs+12µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19496 # spent 22µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19496
# spent 12µs making 1 call to strict::import |
19497 | |||||
19498 | #use Carp; | ||||
19499 | |||||
19500 | # Symbolic array indexes | ||||
19501 | 2 | 22µs | 2 | 72µs | # spent 40µs (7+33) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19501 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19501 # spent 40µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19501
# spent 33µs making 1 call to constant::import |
19502 | 2 | 22µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19502 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19502 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19502
# spent 30µs making 1 call to constant::import |
19503 | 2 | 21µs | 2 | 73µs | # spent 40µs (7+33) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19503 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19503 # spent 40µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19503
# spent 33µs making 1 call to constant::import |
19504 | 2 | 31µs | 2 | 65µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19504 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19504 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19504
# spent 30µs making 1 call to constant::import |
19505 | 2 | 30µs | 2 | 66µs | # spent 36µs (7+29) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19505 which was called:
# once (7µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19505 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19505
# spent 29µs making 1 call to constant::import |
19506 | 2 | 21µs | 2 | 67µs | # spent 37µs (8+30) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19506 which was called:
# once (8µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19506 # spent 37µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19506
# spent 30µs making 1 call to constant::import |
19507 | 2 | 183µs | 2 | 65µs | # spent 36µs (7+29) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19507 which was called:
# once (7µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19507 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19507
# spent 29µs making 1 call to constant::import |
19508 | # (just its index in an array) | ||||
19509 | |||||
19510 | # Correspondence between variables and array indexes | ||||
19511 | 1 | 0s | my %_index_map; | ||
19512 | 1 | 400ns | $_index_map{column} = COLUMN; | ||
19513 | 1 | 200ns | $_index_map{starting_column} = STARTING_COLUMN; | ||
19514 | 1 | 300ns | $_index_map{matching_token} = MATCHING_TOKEN; | ||
19515 | 1 | 100ns | $_index_map{starting_line} = STARTING_LINE; | ||
19516 | 1 | 100ns | $_index_map{ending_line} = ENDING_LINE; | ||
19517 | 1 | 100ns | $_index_map{saved_column} = SAVED_COLUMN; | ||
19518 | 1 | 100ns | $_index_map{serial_number} = SERIAL_NUMBER; | ||
19519 | |||||
19520 | 1 | 200ns | my @_default_data = (); | ||
19521 | 1 | 200ns | $_default_data[COLUMN] = undef; | ||
19522 | 1 | 100ns | $_default_data[STARTING_COLUMN] = undef; | ||
19523 | 1 | 100ns | $_default_data[MATCHING_TOKEN] = undef; | ||
19524 | 1 | 100ns | $_default_data[STARTING_LINE] = undef; | ||
19525 | 1 | 200ns | $_default_data[ENDING_LINE] = undef; | ||
19526 | 1 | 100ns | $_default_data[SAVED_COLUMN] = undef; | ||
19527 | 1 | 200ns | $_default_data[SERIAL_NUMBER] = undef; | ||
19528 | |||||
19529 | # class population count | ||||
19530 | { | ||||
19531 | 2 | 200ns | my $_count = 0; | ||
19532 | sub get_count { $_count; } | ||||
19533 | sub _increment_count { ++$_count } | ||||
19534 | sub _decrement_count { --$_count } | ||||
19535 | } | ||||
19536 | |||||
19537 | # constructor | ||||
19538 | sub new { | ||||
19539 | my ( $caller, %arg ) = @_; | ||||
19540 | my $caller_is_obj = ref($caller); | ||||
19541 | my $class = $caller_is_obj || $caller; | ||||
19542 | 2 | 405µs | 2 | 26µs | # spent 16µs (7+10) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19542 which was called:
# once (7µs+10µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19542 # spent 16µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19542
# spent 10µs making 1 call to strict::unimport |
19543 | my $self = bless [], $class; | ||||
19544 | |||||
19545 | foreach ( keys %_index_map ) { | ||||
19546 | my $index = $_index_map{$_}; | ||||
19547 | if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } | ||||
19548 | elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } | ||||
19549 | else { $self->[$index] = $_default_data[$index] } | ||||
19550 | } | ||||
19551 | $self->_increment_count(); | ||||
19552 | return $self; | ||||
19553 | } | ||||
19554 | |||||
19555 | sub DESTROY { | ||||
19556 | $_[0]->_decrement_count(); | ||||
19557 | } | ||||
19558 | |||||
19559 | sub get_column { return $_[0]->[COLUMN] } | ||||
19560 | sub get_starting_column { return $_[0]->[STARTING_COLUMN] } | ||||
19561 | sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } | ||||
19562 | sub get_starting_line { return $_[0]->[STARTING_LINE] } | ||||
19563 | sub get_ending_line { return $_[0]->[ENDING_LINE] } | ||||
19564 | sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } | ||||
19565 | |||||
19566 | sub set_column { $_[0]->[COLUMN] = $_[1] } | ||||
19567 | sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } | ||||
19568 | sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } | ||||
19569 | sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } | ||||
19570 | sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } | ||||
19571 | sub increment_column { $_[0]->[COLUMN] += $_[1] } | ||||
19572 | |||||
19573 | sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } | ||||
19574 | sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } | ||||
19575 | |||||
19576 | } | ||||
19577 | |||||
19578 | package Perl::Tidy::VerticalAligner; | ||||
19579 | |||||
19580 | # The Perl::Tidy::VerticalAligner package collects output lines and | ||||
19581 | # attempts to line up certain common tokens, such as => and #, which are | ||||
19582 | # identified by the calling routine. | ||||
19583 | # | ||||
19584 | # There are two main routines: valign_input and flush. Append acts as a | ||||
19585 | # storage buffer, collecting lines into a group which can be vertically | ||||
19586 | # aligned. When alignment is no longer possible or desirable, it dumps | ||||
19587 | # the group to flush. | ||||
19588 | # | ||||
19589 | # valign_input -----> flush | ||||
19590 | # | ||||
19591 | # collects writes | ||||
19592 | # vertical one | ||||
19593 | # groups group | ||||
19594 | |||||
19595 | # spent 6µs within Perl::Tidy::VerticalAligner::BEGIN@19595 which was called:
# once (6µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19614 | ||||
19596 | |||||
19597 | # Caution: these debug flags produce a lot of output | ||||
19598 | # They should all be 0 except when debugging small scripts | ||||
19599 | |||||
19600 | 2 | 22µs | 2 | 74µs | # spent 41µs (8+33) within Perl::Tidy::VerticalAligner::BEGIN@19600 which was called:
# once (8µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19600 # spent 41µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19600
# spent 33µs making 1 call to constant::import |
19601 | 2 | 24µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::BEGIN@19601 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19601 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19601
# spent 30µs making 1 call to constant::import |
19602 | 2 | 20µs | 2 | 67µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::BEGIN@19602 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19602 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19602
# spent 30µs making 1 call to constant::import |
19603 | 2 | 85µs | 2 | 63µs | # spent 34µs (6+29) within Perl::Tidy::VerticalAligner::BEGIN@19603 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19603 # spent 34µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19603
# spent 29µs making 1 call to constant::import |
19604 | |||||
19605 | my $debug_warning = sub { | ||||
19606 | print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; | ||||
19607 | 1 | 2µs | }; | ||
19608 | |||||
19609 | VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); | ||||
19610 | 1 | 0s | VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); | ||
19611 | VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); | ||||
19612 | 1 | 6µs | VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); | ||
19613 | |||||
19614 | 1 | 80µs | 1 | 6µs | } # spent 6µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19595 |
19615 | |||||
19616 | 1 | 600ns | # spent 580µs (8+572) within Perl::Tidy::VerticalAligner::BEGIN@19616 which was called:
# once (8µs+572µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19677 | ||
19617 | $vertical_aligner_self | ||||
19618 | $current_line | ||||
19619 | $maximum_alignment_index | ||||
19620 | $ralignment_list | ||||
19621 | $maximum_jmax_seen | ||||
19622 | $minimum_jmax_seen | ||||
19623 | $previous_minimum_jmax_seen | ||||
19624 | $previous_maximum_jmax_seen | ||||
19625 | $maximum_line_index | ||||
19626 | $group_level | ||||
19627 | $group_type | ||||
19628 | $group_maximum_gap | ||||
19629 | $marginal_match | ||||
19630 | $last_level_written | ||||
19631 | $last_leading_space_count | ||||
19632 | $extra_indent_ok | ||||
19633 | $zero_count | ||||
19634 | @group_lines | ||||
19635 | $last_comment_column | ||||
19636 | $last_side_comment_line_number | ||||
19637 | $last_side_comment_length | ||||
19638 | $last_side_comment_level | ||||
19639 | $outdented_line_count | ||||
19640 | $first_outdented_line_at | ||||
19641 | $last_outdented_line_at | ||||
19642 | $diagnostics_object | ||||
19643 | $logger_object | ||||
19644 | $file_writer_object | ||||
19645 | @side_comment_history | ||||
19646 | $comment_leading_space_count | ||||
19647 | $is_matching_terminal_line | ||||
19648 | $consecutive_block_comments | ||||
19649 | |||||
19650 | $cached_line_text | ||||
19651 | $cached_line_type | ||||
19652 | $cached_line_flag | ||||
19653 | $cached_seqno | ||||
19654 | $cached_line_valid | ||||
19655 | $cached_line_leading_space_count | ||||
19656 | $cached_seqno_string | ||||
19657 | |||||
19658 | $valign_buffer_filling | ||||
19659 | @valign_buffer | ||||
19660 | |||||
19661 | $seqno_string | ||||
19662 | $last_nonblank_seqno_string | ||||
19663 | |||||
19664 | $rOpts | ||||
19665 | |||||
19666 | $rOpts_maximum_line_length | ||||
19667 | $rOpts_variable_maximum_line_length | ||||
19668 | $rOpts_continuation_indentation | ||||
19669 | $rOpts_indent_columns | ||||
19670 | $rOpts_tabs | ||||
19671 | $rOpts_entab_leading_whitespace | ||||
19672 | $rOpts_valign | ||||
19673 | |||||
19674 | $rOpts_fixed_position_side_comment | ||||
19675 | $rOpts_minimum_space_to_comment | ||||
19676 | |||||
19677 | 1 | 4.90ms | 2 | 1.15ms | ); # spent 580µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19616
# spent 572µs making 1 call to vars::import |
19678 | |||||
19679 | sub initialize { | ||||
19680 | |||||
19681 | my $class; | ||||
19682 | |||||
19683 | ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) | ||||
19684 | = @_; | ||||
19685 | |||||
19686 | # variables describing the entire space group: | ||||
19687 | $ralignment_list = []; | ||||
19688 | $group_level = 0; | ||||
19689 | $last_level_written = -1; | ||||
19690 | $extra_indent_ok = 0; # can we move all lines to the right? | ||||
19691 | $last_side_comment_length = 0; | ||||
19692 | $maximum_jmax_seen = 0; | ||||
19693 | $minimum_jmax_seen = 0; | ||||
19694 | $previous_minimum_jmax_seen = 0; | ||||
19695 | $previous_maximum_jmax_seen = 0; | ||||
19696 | |||||
19697 | # variables describing each line of the group | ||||
19698 | @group_lines = (); # list of all lines in group | ||||
19699 | |||||
19700 | $outdented_line_count = 0; | ||||
19701 | $first_outdented_line_at = 0; | ||||
19702 | $last_outdented_line_at = 0; | ||||
19703 | $last_side_comment_line_number = 0; | ||||
19704 | $last_side_comment_level = -1; | ||||
19705 | $is_matching_terminal_line = 0; | ||||
19706 | |||||
19707 | # most recent 3 side comments; [ line number, column ] | ||||
19708 | $side_comment_history[0] = [ -300, 0 ]; | ||||
19709 | $side_comment_history[1] = [ -200, 0 ]; | ||||
19710 | $side_comment_history[2] = [ -100, 0 ]; | ||||
19711 | |||||
19712 | # valign_output_step_B cache: | ||||
19713 | $cached_line_text = ""; | ||||
19714 | $cached_line_type = 0; | ||||
19715 | $cached_line_flag = 0; | ||||
19716 | $cached_seqno = 0; | ||||
19717 | $cached_line_valid = 0; | ||||
19718 | $cached_line_leading_space_count = 0; | ||||
19719 | $cached_seqno_string = ""; | ||||
19720 | |||||
19721 | # string of sequence numbers joined together | ||||
19722 | $seqno_string = ""; | ||||
19723 | $last_nonblank_seqno_string = ""; | ||||
19724 | |||||
19725 | # frequently used parameters | ||||
19726 | $rOpts_indent_columns = $rOpts->{'indent-columns'}; | ||||
19727 | $rOpts_tabs = $rOpts->{'tabs'}; | ||||
19728 | $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; | ||||
19729 | $rOpts_fixed_position_side_comment = | ||||
19730 | $rOpts->{'fixed-position-side-comment'}; | ||||
19731 | $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; | ||||
19732 | $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; | ||||
19733 | $rOpts_variable_maximum_line_length = | ||||
19734 | $rOpts->{'variable-maximum-line-length'}; | ||||
19735 | $rOpts_valign = $rOpts->{'valign'}; | ||||
19736 | |||||
19737 | $consecutive_block_comments = 0; | ||||
19738 | forget_side_comment(); | ||||
19739 | |||||
19740 | initialize_for_new_group(); | ||||
19741 | |||||
19742 | $vertical_aligner_self = {}; | ||||
19743 | bless $vertical_aligner_self, $class; | ||||
19744 | return $vertical_aligner_self; | ||||
19745 | } | ||||
19746 | |||||
19747 | sub initialize_for_new_group { | ||||
19748 | $maximum_line_index = -1; # lines in the current group | ||||
19749 | $maximum_alignment_index = -1; # alignments in current group | ||||
19750 | $zero_count = 0; # count consecutive lines without tokens | ||||
19751 | $current_line = undef; # line being matched for alignment | ||||
19752 | $group_maximum_gap = 0; # largest gap introduced | ||||
19753 | $group_type = ""; | ||||
19754 | $marginal_match = 0; | ||||
19755 | $comment_leading_space_count = 0; | ||||
19756 | $last_leading_space_count = 0; | ||||
19757 | } | ||||
19758 | |||||
19759 | # interface to Perl::Tidy::Diagnostics routines | ||||
19760 | sub write_diagnostics { | ||||
19761 | if ($diagnostics_object) { | ||||
19762 | $diagnostics_object->write_diagnostics(@_); | ||||
19763 | } | ||||
19764 | } | ||||
19765 | |||||
19766 | # interface to Perl::Tidy::Logger routines | ||||
19767 | sub warning { | ||||
19768 | if ($logger_object) { | ||||
19769 | $logger_object->warning(@_); | ||||
19770 | } | ||||
19771 | } | ||||
19772 | |||||
19773 | sub write_logfile_entry { | ||||
19774 | if ($logger_object) { | ||||
19775 | $logger_object->write_logfile_entry(@_); | ||||
19776 | } | ||||
19777 | } | ||||
19778 | |||||
19779 | sub report_definite_bug { | ||||
19780 | if ($logger_object) { | ||||
19781 | $logger_object->report_definite_bug(); | ||||
19782 | } | ||||
19783 | } | ||||
19784 | |||||
19785 | sub get_SPACES { | ||||
19786 | |||||
19787 | # return the number of leading spaces associated with an indentation | ||||
19788 | # variable $indentation is either a constant number of spaces or an | ||||
19789 | # object with a get_SPACES method. | ||||
19790 | my $indentation = shift; | ||||
19791 | return ref($indentation) ? $indentation->get_SPACES() : $indentation; | ||||
19792 | } | ||||
19793 | |||||
19794 | sub get_RECOVERABLE_SPACES { | ||||
19795 | |||||
19796 | # return the number of spaces (+ means shift right, - means shift left) | ||||
19797 | # that we would like to shift a group of lines with the same indentation | ||||
19798 | # to get them to line up with their opening parens | ||||
19799 | my $indentation = shift; | ||||
19800 | return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; | ||||
19801 | } | ||||
19802 | |||||
19803 | sub get_STACK_DEPTH { | ||||
19804 | |||||
19805 | my $indentation = shift; | ||||
19806 | return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; | ||||
19807 | } | ||||
19808 | |||||
19809 | sub make_alignment { | ||||
19810 | my ( $col, $token ) = @_; | ||||
19811 | |||||
19812 | # make one new alignment at column $col which aligns token $token | ||||
19813 | ++$maximum_alignment_index; | ||||
19814 | my $alignment = new Perl::Tidy::VerticalAligner::Alignment( | ||||
19815 | column => $col, | ||||
19816 | starting_column => $col, | ||||
19817 | matching_token => $token, | ||||
19818 | starting_line => $maximum_line_index, | ||||
19819 | ending_line => $maximum_line_index, | ||||
19820 | serial_number => $maximum_alignment_index, | ||||
19821 | ); | ||||
19822 | $ralignment_list->[$maximum_alignment_index] = $alignment; | ||||
19823 | return $alignment; | ||||
19824 | } | ||||
19825 | |||||
19826 | sub dump_alignments { | ||||
19827 | print STDOUT | ||||
19828 | "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; | ||||
19829 | for my $i ( 0 .. $maximum_alignment_index ) { | ||||
19830 | my $column = $ralignment_list->[$i]->get_column(); | ||||
19831 | my $starting_column = $ralignment_list->[$i]->get_starting_column(); | ||||
19832 | my $matching_token = $ralignment_list->[$i]->get_matching_token(); | ||||
19833 | my $starting_line = $ralignment_list->[$i]->get_starting_line(); | ||||
19834 | my $ending_line = $ralignment_list->[$i]->get_ending_line(); | ||||
19835 | print STDOUT | ||||
19836 | "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; | ||||
19837 | } | ||||
19838 | } | ||||
19839 | |||||
19840 | sub save_alignment_columns { | ||||
19841 | for my $i ( 0 .. $maximum_alignment_index ) { | ||||
19842 | $ralignment_list->[$i]->save_column(); | ||||
19843 | } | ||||
19844 | } | ||||
19845 | |||||
19846 | sub restore_alignment_columns { | ||||
19847 | for my $i ( 0 .. $maximum_alignment_index ) { | ||||
19848 | $ralignment_list->[$i]->restore_column(); | ||||
19849 | } | ||||
19850 | } | ||||
19851 | |||||
19852 | sub forget_side_comment { | ||||
19853 | $last_comment_column = 0; | ||||
19854 | } | ||||
19855 | |||||
19856 | sub maximum_line_length_for_level { | ||||
19857 | |||||
19858 | # return maximum line length for line starting with a given level | ||||
19859 | my $maximum_line_length = $rOpts_maximum_line_length; | ||||
19860 | if ($rOpts_variable_maximum_line_length) { | ||||
19861 | my $level = shift; | ||||
19862 | if ( $level < 0 ) { $level = 0 } | ||||
19863 | $maximum_line_length += $level * $rOpts_indent_columns; | ||||
19864 | } | ||||
19865 | return $maximum_line_length; | ||||
19866 | } | ||||
19867 | |||||
19868 | sub valign_input { | ||||
19869 | |||||
19870 | # Place one line in the current vertical group. | ||||
19871 | # | ||||
19872 | # The input parameters are: | ||||
19873 | # $level = indentation level of this line | ||||
19874 | # $rfields = reference to array of fields | ||||
19875 | # $rpatterns = reference to array of patterns, one per field | ||||
19876 | # $rtokens = reference to array of tokens starting fields 1,2,.. | ||||
19877 | # | ||||
19878 | # Here is an example of what this package does. In this example, | ||||
19879 | # we are trying to line up both the '=>' and the '#'. | ||||
19880 | # | ||||
19881 | # '18' => 'grave', # \` | ||||
19882 | # '19' => 'acute', # `' | ||||
19883 | # '20' => 'caron', # \v | ||||
19884 | # <-tabs-><f1-><--field 2 ---><-f3-> | ||||
19885 | # | | | | | ||||
19886 | # | | | | | ||||
19887 | # col1 col2 col3 col4 | ||||
19888 | # | ||||
19889 | # The calling routine has already broken the entire line into 3 fields as | ||||
19890 | # indicated. (So the work of identifying promising common tokens has | ||||
19891 | # already been done). | ||||
19892 | # | ||||
19893 | # In this example, there will be 2 tokens being matched: '=>' and '#'. | ||||
19894 | # They are the leading parts of fields 2 and 3, but we do need to know | ||||
19895 | # what they are so that we can dump a group of lines when these tokens | ||||
19896 | # change. | ||||
19897 | # | ||||
19898 | # The fields contain the actual characters of each field. The patterns | ||||
19899 | # are like the fields, but they contain mainly token types instead | ||||
19900 | # of tokens, so they have fewer characters. They are used to be | ||||
19901 | # sure we are matching fields of similar type. | ||||
19902 | # | ||||
19903 | # In this example, there will be 4 column indexes being adjusted. The | ||||
19904 | # first one is always at zero. The interior columns are at the start of | ||||
19905 | # the matching tokens, and the last one tracks the maximum line length. | ||||
19906 | # | ||||
19907 | # Each time a new line comes in, it joins the current vertical | ||||
19908 | # group if possible. Otherwise it causes the current group to be dumped | ||||
19909 | # and a new group is started. | ||||
19910 | # | ||||
19911 | # For each new group member, the column locations are increased, as | ||||
19912 | # necessary, to make room for the new fields. When the group is finally | ||||
19913 | # output, these column numbers are used to compute the amount of spaces of | ||||
19914 | # padding needed for each field. | ||||
19915 | # | ||||
19916 | # Programming note: the fields are assumed not to have any tab characters. | ||||
19917 | # Tabs have been previously removed except for tabs in quoted strings and | ||||
19918 | # side comments. Tabs in these fields can mess up the column counting. | ||||
19919 | # The log file warns the user if there are any such tabs. | ||||
19920 | |||||
19921 | my ( | ||||
19922 | $level, $level_end, | ||||
19923 | $indentation, $rfields, | ||||
19924 | $rtokens, $rpatterns, | ||||
19925 | $is_forced_break, $outdent_long_lines, | ||||
19926 | $is_terminal_ternary, $is_terminal_statement, | ||||
19927 | $do_not_pad, $rvertical_tightness_flags, | ||||
19928 | $level_jump, | ||||
19929 | ) = @_; | ||||
19930 | |||||
19931 | # number of fields is $jmax | ||||
19932 | # number of tokens between fields is $jmax-1 | ||||
19933 | my $jmax = $#{$rfields}; | ||||
19934 | |||||
19935 | my $leading_space_count = get_SPACES($indentation); | ||||
19936 | |||||
19937 | # set outdented flag to be sure we either align within statements or | ||||
19938 | # across statement boundaries, but not both. | ||||
19939 | my $is_outdented = $last_leading_space_count > $leading_space_count; | ||||
19940 | $last_leading_space_count = $leading_space_count; | ||||
19941 | |||||
19942 | # Patch: undo for hanging side comment | ||||
19943 | my $is_hanging_side_comment = | ||||
19944 | ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); | ||||
19945 | $is_outdented = 0 if $is_hanging_side_comment; | ||||
19946 | |||||
19947 | # Forget side comment alignment after seeing 2 or more block comments | ||||
19948 | my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); | ||||
19949 | if ($is_block_comment) { | ||||
19950 | $consecutive_block_comments++; | ||||
19951 | } | ||||
19952 | else { | ||||
19953 | if ( $consecutive_block_comments > 1 ) { forget_side_comment() } | ||||
19954 | $consecutive_block_comments = 0; | ||||
19955 | } | ||||
19956 | |||||
19957 | VALIGN_DEBUG_FLAG_APPEND0 && do { | ||||
19958 | print STDOUT | ||||
19959 | "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; | ||||
19960 | }; | ||||
19961 | |||||
19962 | # Validate cached line if necessary: If we can produce a container | ||||
19963 | # with just 2 lines total by combining an existing cached opening | ||||
19964 | # token with the closing token to follow, then we will mark both | ||||
19965 | # cached flags as valid. | ||||
19966 | if ($rvertical_tightness_flags) { | ||||
19967 | if ( $maximum_line_index <= 0 | ||||
19968 | && $cached_line_type | ||||
19969 | && $cached_seqno | ||||
19970 | && $rvertical_tightness_flags->[2] | ||||
19971 | && $rvertical_tightness_flags->[2] == $cached_seqno ) | ||||
19972 | { | ||||
19973 | $rvertical_tightness_flags->[3] ||= 1; | ||||
19974 | $cached_line_valid ||= 1; | ||||
19975 | } | ||||
19976 | } | ||||
19977 | |||||
19978 | # do not join an opening block brace with an unbalanced line | ||||
19979 | # unless requested with a flag value of 2 | ||||
19980 | if ( $cached_line_type == 3 | ||||
19981 | && $maximum_line_index < 0 | ||||
19982 | && $cached_line_flag < 2 | ||||
19983 | && $level_jump != 0 ) | ||||
19984 | { | ||||
19985 | $cached_line_valid = 0; | ||||
19986 | } | ||||
19987 | |||||
19988 | # patch until new aligner is finished | ||||
19989 | if ($do_not_pad) { my_flush() } | ||||
19990 | |||||
19991 | # shouldn't happen: | ||||
19992 | if ( $level < 0 ) { $level = 0 } | ||||
19993 | |||||
19994 | # do not align code across indentation level changes | ||||
19995 | # or if vertical alignment is turned off for debugging | ||||
19996 | if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { | ||||
19997 | |||||
19998 | # we are allowed to shift a group of lines to the right if its | ||||
19999 | # level is greater than the previous and next group | ||||
20000 | $extra_indent_ok = | ||||
20001 | ( $level < $group_level && $last_level_written < $group_level ); | ||||
20002 | |||||
20003 | my_flush(); | ||||
20004 | |||||
20005 | # If we know that this line will get flushed out by itself because | ||||
20006 | # of level changes, we can leave the extra_indent_ok flag set. | ||||
20007 | # That way, if we get an external flush call, we will still be | ||||
20008 | # able to do some -lp alignment if necessary. | ||||
20009 | $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); | ||||
20010 | |||||
20011 | $group_level = $level; | ||||
20012 | |||||
20013 | # wait until after the above flush to get the leading space | ||||
20014 | # count because it may have been changed if the -icp flag is in | ||||
20015 | # effect | ||||
20016 | $leading_space_count = get_SPACES($indentation); | ||||
20017 | |||||
20018 | } | ||||
20019 | |||||
20020 | # -------------------------------------------------------------------- | ||||
20021 | # Patch to collect outdentable block COMMENTS | ||||
20022 | # -------------------------------------------------------------------- | ||||
20023 | my $is_blank_line = ""; | ||||
20024 | if ( $group_type eq 'COMMENT' ) { | ||||
20025 | if ( | ||||
20026 | ( | ||||
20027 | $is_block_comment | ||||
20028 | && $outdent_long_lines | ||||
20029 | && $leading_space_count == $comment_leading_space_count | ||||
20030 | ) | ||||
20031 | || $is_blank_line | ||||
20032 | ) | ||||
20033 | { | ||||
20034 | $group_lines[ ++$maximum_line_index ] = $rfields->[0]; | ||||
20035 | return; | ||||
20036 | } | ||||
20037 | else { | ||||
20038 | my_flush(); | ||||
20039 | } | ||||
20040 | } | ||||
20041 | |||||
20042 | # -------------------------------------------------------------------- | ||||
20043 | # add dummy fields for terminal ternary | ||||
20044 | # -------------------------------------------------------------------- | ||||
20045 | my $j_terminal_match; | ||||
20046 | if ( $is_terminal_ternary && $current_line ) { | ||||
20047 | $j_terminal_match = | ||||
20048 | fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); | ||||
20049 | $jmax = @{$rfields} - 1; | ||||
20050 | } | ||||
20051 | |||||
20052 | # -------------------------------------------------------------------- | ||||
20053 | # add dummy fields for else statement | ||||
20054 | # -------------------------------------------------------------------- | ||||
20055 | if ( $rfields->[0] =~ /^else\s*$/ | ||||
20056 | && $current_line | ||||
20057 | && $level_jump == 0 ) | ||||
20058 | { | ||||
20059 | $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); | ||||
20060 | $jmax = @{$rfields} - 1; | ||||
20061 | } | ||||
20062 | |||||
20063 | # -------------------------------------------------------------------- | ||||
20064 | # Step 1. Handle simple line of code with no fields to match. | ||||
20065 | # -------------------------------------------------------------------- | ||||
20066 | if ( $jmax <= 0 ) { | ||||
20067 | $zero_count++; | ||||
20068 | |||||
20069 | if ( $maximum_line_index >= 0 | ||||
20070 | && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) | ||||
20071 | { | ||||
20072 | |||||
20073 | # flush the current group if it has some aligned columns.. | ||||
20074 | if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } | ||||
20075 | |||||
20076 | # flush current group if we are just collecting side comments.. | ||||
20077 | elsif ( | ||||
20078 | |||||
20079 | # ...and we haven't seen a comment lately | ||||
20080 | ( $zero_count > 3 ) | ||||
20081 | |||||
20082 | # ..or if this new line doesn't fit to the left of the comments | ||||
20083 | || ( ( $leading_space_count + length( $$rfields[0] ) ) > | ||||
20084 | $group_lines[0]->get_column(0) ) | ||||
20085 | ) | ||||
20086 | { | ||||
20087 | my_flush(); | ||||
20088 | } | ||||
20089 | } | ||||
20090 | |||||
20091 | # patch to start new COMMENT group if this comment may be outdented | ||||
20092 | if ( $is_block_comment | ||||
20093 | && $outdent_long_lines | ||||
20094 | && $maximum_line_index < 0 ) | ||||
20095 | { | ||||
20096 | $group_type = 'COMMENT'; | ||||
20097 | $comment_leading_space_count = $leading_space_count; | ||||
20098 | $group_lines[ ++$maximum_line_index ] = $rfields->[0]; | ||||
20099 | return; | ||||
20100 | } | ||||
20101 | |||||
20102 | # just write this line directly if no current group, no side comment, | ||||
20103 | # and no space recovery is needed. | ||||
20104 | if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) | ||||
20105 | { | ||||
20106 | valign_output_step_B( $leading_space_count, $$rfields[0], 0, | ||||
20107 | $outdent_long_lines, $rvertical_tightness_flags, $level ); | ||||
20108 | return; | ||||
20109 | } | ||||
20110 | } | ||||
20111 | else { | ||||
20112 | $zero_count = 0; | ||||
20113 | } | ||||
20114 | |||||
20115 | # programming check: (shouldn't happen) | ||||
20116 | # an error here implies an incorrect call was made | ||||
20117 | if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { | ||||
20118 | warning( | ||||
20119 | "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" | ||||
20120 | ); | ||||
20121 | report_definite_bug(); | ||||
20122 | } | ||||
20123 | |||||
20124 | # -------------------------------------------------------------------- | ||||
20125 | # create an object to hold this line | ||||
20126 | # -------------------------------------------------------------------- | ||||
20127 | my $new_line = new Perl::Tidy::VerticalAligner::Line( | ||||
20128 | jmax => $jmax, | ||||
20129 | jmax_original_line => $jmax, | ||||
20130 | rtokens => $rtokens, | ||||
20131 | rfields => $rfields, | ||||
20132 | rpatterns => $rpatterns, | ||||
20133 | indentation => $indentation, | ||||
20134 | leading_space_count => $leading_space_count, | ||||
20135 | outdent_long_lines => $outdent_long_lines, | ||||
20136 | list_type => "", | ||||
20137 | is_hanging_side_comment => $is_hanging_side_comment, | ||||
20138 | maximum_line_length => maximum_line_length_for_level($level), | ||||
20139 | rvertical_tightness_flags => $rvertical_tightness_flags, | ||||
20140 | ); | ||||
20141 | |||||
20142 | # Initialize a global flag saying if the last line of the group should | ||||
20143 | # match end of group and also terminate the group. There should be no | ||||
20144 | # returns between here and where the flag is handled at the bottom. | ||||
20145 | my $col_matching_terminal = 0; | ||||
20146 | if ( defined($j_terminal_match) ) { | ||||
20147 | |||||
20148 | # remember the column of the terminal ? or { to match with | ||||
20149 | $col_matching_terminal = $current_line->get_column($j_terminal_match); | ||||
20150 | |||||
20151 | # set global flag for sub decide_if_aligned | ||||
20152 | $is_matching_terminal_line = 1; | ||||
20153 | } | ||||
20154 | |||||
20155 | # -------------------------------------------------------------------- | ||||
20156 | # It simplifies things to create a zero length side comment | ||||
20157 | # if none exists. | ||||
20158 | # -------------------------------------------------------------------- | ||||
20159 | make_side_comment( $new_line, $level_end ); | ||||
20160 | |||||
20161 | # -------------------------------------------------------------------- | ||||
20162 | # Decide if this is a simple list of items. | ||||
20163 | # There are 3 list types: none, comma, comma-arrow. | ||||
20164 | # We use this below to be less restrictive in deciding what to align. | ||||
20165 | # -------------------------------------------------------------------- | ||||
20166 | if ($is_forced_break) { | ||||
20167 | decide_if_list($new_line); | ||||
20168 | } | ||||
20169 | |||||
20170 | if ($current_line) { | ||||
20171 | |||||
20172 | # -------------------------------------------------------------------- | ||||
20173 | # Allow hanging side comment to join current group, if any | ||||
20174 | # This will help keep side comments aligned, because otherwise we | ||||
20175 | # will have to start a new group, making alignment less likely. | ||||
20176 | # -------------------------------------------------------------------- | ||||
20177 | join_hanging_comment( $new_line, $current_line ) | ||||
20178 | if $is_hanging_side_comment; | ||||
20179 | |||||
20180 | # -------------------------------------------------------------------- | ||||
20181 | # If there is just one previous line, and it has more fields | ||||
20182 | # than the new line, try to join fields together to get a match with | ||||
20183 | # the new line. At the present time, only a single leading '=' is | ||||
20184 | # allowed to be compressed out. This is useful in rare cases where | ||||
20185 | # a table is forced to use old breakpoints because of side comments, | ||||
20186 | # and the table starts out something like this: | ||||
20187 | # my %MonthChars = ('0', 'Jan', # side comment | ||||
20188 | # '1', 'Feb', | ||||
20189 | # '2', 'Mar', | ||||
20190 | # Eliminating the '=' field will allow the remaining fields to line up. | ||||
20191 | # This situation does not occur if there are no side comments | ||||
20192 | # because scan_list would put a break after the opening '('. | ||||
20193 | # -------------------------------------------------------------------- | ||||
20194 | eliminate_old_fields( $new_line, $current_line ); | ||||
20195 | |||||
20196 | # -------------------------------------------------------------------- | ||||
20197 | # If the new line has more fields than the current group, | ||||
20198 | # see if we can match the first fields and combine the remaining | ||||
20199 | # fields of the new line. | ||||
20200 | # -------------------------------------------------------------------- | ||||
20201 | eliminate_new_fields( $new_line, $current_line ); | ||||
20202 | |||||
20203 | # -------------------------------------------------------------------- | ||||
20204 | # Flush previous group unless all common tokens and patterns match.. | ||||
20205 | # -------------------------------------------------------------------- | ||||
20206 | check_match( $new_line, $current_line ); | ||||
20207 | |||||
20208 | # -------------------------------------------------------------------- | ||||
20209 | # See if there is space for this line in the current group (if any) | ||||
20210 | # -------------------------------------------------------------------- | ||||
20211 | if ($current_line) { | ||||
20212 | check_fit( $new_line, $current_line ); | ||||
20213 | } | ||||
20214 | } | ||||
20215 | |||||
20216 | # -------------------------------------------------------------------- | ||||
20217 | # Append this line to the current group (or start new group) | ||||
20218 | # -------------------------------------------------------------------- | ||||
20219 | add_to_group($new_line); | ||||
20220 | |||||
20221 | # Future update to allow this to vary: | ||||
20222 | $current_line = $new_line if ( $maximum_line_index == 0 ); | ||||
20223 | |||||
20224 | # output this group if it ends in a terminal else or ternary line | ||||
20225 | if ( defined($j_terminal_match) ) { | ||||
20226 | |||||
20227 | # if there is only one line in the group (maybe due to failure to match | ||||
20228 | # perfectly with previous lines), then align the ? or { of this | ||||
20229 | # terminal line with the previous one unless that would make the line | ||||
20230 | # too long | ||||
20231 | if ( $maximum_line_index == 0 ) { | ||||
20232 | my $col_now = $current_line->get_column($j_terminal_match); | ||||
20233 | my $pad = $col_matching_terminal - $col_now; | ||||
20234 | my $padding_available = | ||||
20235 | $current_line->get_available_space_on_right(); | ||||
20236 | if ( $pad > 0 && $pad <= $padding_available ) { | ||||
20237 | $current_line->increase_field_width( $j_terminal_match, $pad ); | ||||
20238 | } | ||||
20239 | } | ||||
20240 | my_flush(); | ||||
20241 | $is_matching_terminal_line = 0; | ||||
20242 | } | ||||
20243 | |||||
20244 | # -------------------------------------------------------------------- | ||||
20245 | # Step 8. Some old debugging stuff | ||||
20246 | # -------------------------------------------------------------------- | ||||
20247 | VALIGN_DEBUG_FLAG_APPEND && do { | ||||
20248 | print STDOUT "APPEND fields:"; | ||||
20249 | dump_array(@$rfields); | ||||
20250 | print STDOUT "APPEND tokens:"; | ||||
20251 | dump_array(@$rtokens); | ||||
20252 | print STDOUT "APPEND patterns:"; | ||||
20253 | dump_array(@$rpatterns); | ||||
20254 | dump_alignments(); | ||||
20255 | }; | ||||
20256 | |||||
20257 | return; | ||||
20258 | } | ||||
20259 | |||||
20260 | sub join_hanging_comment { | ||||
20261 | |||||
20262 | my $line = shift; | ||||
20263 | my $jmax = $line->get_jmax(); | ||||
20264 | return 0 unless $jmax == 1; # must be 2 fields | ||||
20265 | my $rtokens = $line->get_rtokens(); | ||||
20266 | return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. | ||||
20267 | my $rfields = $line->get_rfields(); | ||||
20268 | return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... | ||||
20269 | my $old_line = shift; | ||||
20270 | my $maximum_field_index = $old_line->get_jmax(); | ||||
20271 | return 0 | ||||
20272 | unless $maximum_field_index > $jmax; # the current line has more fields | ||||
20273 | my $rpatterns = $line->get_rpatterns(); | ||||
20274 | |||||
20275 | $line->set_is_hanging_side_comment(1); | ||||
20276 | $jmax = $maximum_field_index; | ||||
20277 | $line->set_jmax($jmax); | ||||
20278 | $$rfields[$jmax] = $$rfields[1]; | ||||
20279 | $$rtokens[ $jmax - 1 ] = $$rtokens[0]; | ||||
20280 | $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; | ||||
20281 | for ( my $j = 1 ; $j < $jmax ; $j++ ) { | ||||
20282 | $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? | ||||
20283 | $$rtokens[ $j - 1 ] = ""; | ||||
20284 | $$rpatterns[ $j - 1 ] = ""; | ||||
20285 | } | ||||
20286 | return 1; | ||||
20287 | } | ||||
20288 | |||||
20289 | sub eliminate_old_fields { | ||||
20290 | |||||
20291 | my $new_line = shift; | ||||
20292 | my $jmax = $new_line->get_jmax(); | ||||
20293 | if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } | ||||
20294 | if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } | ||||
20295 | |||||
20296 | # there must be one previous line | ||||
20297 | return unless ( $maximum_line_index == 0 ); | ||||
20298 | |||||
20299 | my $old_line = shift; | ||||
20300 | my $maximum_field_index = $old_line->get_jmax(); | ||||
20301 | |||||
20302 | ############################################### | ||||
20303 | # this line must have fewer fields | ||||
20304 | return unless $maximum_field_index > $jmax; | ||||
20305 | ############################################### | ||||
20306 | |||||
20307 | # Identify specific cases where field elimination is allowed: | ||||
20308 | # case=1: both lines have comma-separated lists, and the first | ||||
20309 | # line has an equals | ||||
20310 | # case=2: both lines have leading equals | ||||
20311 | |||||
20312 | # case 1 is the default | ||||
20313 | my $case = 1; | ||||
20314 | |||||
20315 | # See if case 2: both lines have leading '=' | ||||
20316 | # We'll require similar leading patterns in this case | ||||
20317 | my $old_rtokens = $old_line->get_rtokens(); | ||||
20318 | my $rtokens = $new_line->get_rtokens(); | ||||
20319 | my $rpatterns = $new_line->get_rpatterns(); | ||||
20320 | my $old_rpatterns = $old_line->get_rpatterns(); | ||||
20321 | if ( $rtokens->[0] =~ /^=\d*$/ | ||||
20322 | && $old_rtokens->[0] eq $rtokens->[0] | ||||
20323 | && $old_rpatterns->[0] eq $rpatterns->[0] ) | ||||
20324 | { | ||||
20325 | $case = 2; | ||||
20326 | } | ||||
20327 | |||||
20328 | # not too many fewer fields in new line for case 1 | ||||
20329 | return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); | ||||
20330 | |||||
20331 | # case 1 must have side comment | ||||
20332 | my $old_rfields = $old_line->get_rfields(); | ||||
20333 | return | ||||
20334 | if ( $case == 1 | ||||
20335 | && length( $$old_rfields[$maximum_field_index] ) == 0 ); | ||||
20336 | |||||
20337 | my $rfields = $new_line->get_rfields(); | ||||
20338 | |||||
20339 | my $hid_equals = 0; | ||||
20340 | |||||
20341 | my @new_alignments = (); | ||||
20342 | my @new_fields = (); | ||||
20343 | my @new_matching_patterns = (); | ||||
20344 | my @new_matching_tokens = (); | ||||
20345 | |||||
20346 | my $j = 0; | ||||
20347 | my $k; | ||||
20348 | my $current_field = ''; | ||||
20349 | my $current_pattern = ''; | ||||
20350 | |||||
20351 | # loop over all old tokens | ||||
20352 | my $in_match = 0; | ||||
20353 | for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { | ||||
20354 | $current_field .= $$old_rfields[$k]; | ||||
20355 | $current_pattern .= $$old_rpatterns[$k]; | ||||
20356 | last if ( $j > $jmax - 1 ); | ||||
20357 | |||||
20358 | if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { | ||||
20359 | $in_match = 1; | ||||
20360 | $new_fields[$j] = $current_field; | ||||
20361 | $new_matching_patterns[$j] = $current_pattern; | ||||
20362 | $current_field = ''; | ||||
20363 | $current_pattern = ''; | ||||
20364 | $new_matching_tokens[$j] = $$old_rtokens[$k]; | ||||
20365 | $new_alignments[$j] = $old_line->get_alignment($k); | ||||
20366 | $j++; | ||||
20367 | } | ||||
20368 | else { | ||||
20369 | |||||
20370 | if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { | ||||
20371 | last if ( $case == 2 ); # avoid problems with stuff | ||||
20372 | # like: $a=$b=$c=$d; | ||||
20373 | $hid_equals = 1; | ||||
20374 | } | ||||
20375 | last | ||||
20376 | if ( $in_match && $case == 1 ) | ||||
20377 | ; # disallow gaps in matching field types in case 1 | ||||
20378 | } | ||||
20379 | } | ||||
20380 | |||||
20381 | # Modify the current state if we are successful. | ||||
20382 | # We must exactly reach the ends of both lists for success. | ||||
20383 | if ( ( $j == $jmax ) | ||||
20384 | && ( $current_field eq '' ) | ||||
20385 | && ( $case != 1 || $hid_equals ) ) | ||||
20386 | { | ||||
20387 | $k = $maximum_field_index; | ||||
20388 | $current_field .= $$old_rfields[$k]; | ||||
20389 | $current_pattern .= $$old_rpatterns[$k]; | ||||
20390 | $new_fields[$j] = $current_field; | ||||
20391 | $new_matching_patterns[$j] = $current_pattern; | ||||
20392 | |||||
20393 | $new_alignments[$j] = $old_line->get_alignment($k); | ||||
20394 | $maximum_field_index = $j; | ||||
20395 | |||||
20396 | $old_line->set_alignments(@new_alignments); | ||||
20397 | $old_line->set_jmax($jmax); | ||||
20398 | $old_line->set_rtokens( \@new_matching_tokens ); | ||||
20399 | $old_line->set_rfields( \@new_fields ); | ||||
20400 | $old_line->set_rpatterns( \@$rpatterns ); | ||||
20401 | } | ||||
20402 | } | ||||
20403 | |||||
20404 | # create an empty side comment if none exists | ||||
20405 | sub make_side_comment { | ||||
20406 | my $new_line = shift; | ||||
20407 | my $level_end = shift; | ||||
20408 | my $jmax = $new_line->get_jmax(); | ||||
20409 | my $rtokens = $new_line->get_rtokens(); | ||||
20410 | |||||
20411 | # if line does not have a side comment... | ||||
20412 | if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { | ||||
20413 | my $rfields = $new_line->get_rfields(); | ||||
20414 | my $rpatterns = $new_line->get_rpatterns(); | ||||
20415 | $$rtokens[$jmax] = '#'; | ||||
20416 | $$rfields[ ++$jmax ] = ''; | ||||
20417 | $$rpatterns[$jmax] = '#'; | ||||
20418 | $new_line->set_jmax($jmax); | ||||
20419 | $new_line->set_jmax_original_line($jmax); | ||||
20420 | } | ||||
20421 | |||||
20422 | # line has a side comment.. | ||||
20423 | else { | ||||
20424 | |||||
20425 | # don't remember old side comment location for very long | ||||
20426 | my $line_number = $vertical_aligner_self->get_output_line_number(); | ||||
20427 | my $rfields = $new_line->get_rfields(); | ||||
20428 | if ( | ||||
20429 | $line_number - $last_side_comment_line_number > 12 | ||||
20430 | |||||
20431 | # and don't remember comment location across block level changes | ||||
20432 | || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) | ||||
20433 | ) | ||||
20434 | { | ||||
20435 | forget_side_comment(); | ||||
20436 | } | ||||
20437 | $last_side_comment_line_number = $line_number; | ||||
20438 | $last_side_comment_level = $level_end; | ||||
20439 | } | ||||
20440 | } | ||||
20441 | |||||
20442 | sub decide_if_list { | ||||
20443 | |||||
20444 | my $line = shift; | ||||
20445 | |||||
20446 | # A list will be taken to be a line with a forced break in which all | ||||
20447 | # of the field separators are commas or comma-arrows (except for the | ||||
20448 | # trailing #) | ||||
20449 | |||||
20450 | # List separator tokens are things like ',3' or '=>2', | ||||
20451 | # where the trailing digit is the nesting depth. Allow braces | ||||
20452 | # to allow nested list items. | ||||
20453 | my $rtokens = $line->get_rtokens(); | ||||
20454 | my $test_token = $$rtokens[0]; | ||||
20455 | if ( $test_token =~ /^(\,|=>)/ ) { | ||||
20456 | my $list_type = $test_token; | ||||
20457 | my $jmax = $line->get_jmax(); | ||||
20458 | |||||
20459 | foreach ( 1 .. $jmax - 2 ) { | ||||
20460 | if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { | ||||
20461 | $list_type = ""; | ||||
20462 | last; | ||||
20463 | } | ||||
20464 | } | ||||
20465 | $line->set_list_type($list_type); | ||||
20466 | } | ||||
20467 | } | ||||
20468 | |||||
20469 | sub eliminate_new_fields { | ||||
20470 | |||||
20471 | return unless ( $maximum_line_index >= 0 ); | ||||
20472 | my ( $new_line, $old_line ) = @_; | ||||
20473 | my $jmax = $new_line->get_jmax(); | ||||
20474 | |||||
20475 | my $old_rtokens = $old_line->get_rtokens(); | ||||
20476 | my $rtokens = $new_line->get_rtokens(); | ||||
20477 | my $is_assignment = | ||||
20478 | ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); | ||||
20479 | |||||
20480 | # must be monotonic variation | ||||
20481 | return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); | ||||
20482 | |||||
20483 | # must be more fields in the new line | ||||
20484 | my $maximum_field_index = $old_line->get_jmax(); | ||||
20485 | return unless ( $maximum_field_index < $jmax ); | ||||
20486 | |||||
20487 | unless ($is_assignment) { | ||||
20488 | return | ||||
20489 | unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) | ||||
20490 | ; # only if monotonic | ||||
20491 | |||||
20492 | # never combine fields of a comma list | ||||
20493 | return | ||||
20494 | unless ( $maximum_field_index > 1 ) | ||||
20495 | && ( $new_line->get_list_type() !~ /^,/ ); | ||||
20496 | } | ||||
20497 | |||||
20498 | my $rfields = $new_line->get_rfields(); | ||||
20499 | my $rpatterns = $new_line->get_rpatterns(); | ||||
20500 | my $old_rpatterns = $old_line->get_rpatterns(); | ||||
20501 | |||||
20502 | # loop over all OLD tokens except comment and check match | ||||
20503 | my $match = 1; | ||||
20504 | my $k; | ||||
20505 | for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { | ||||
20506 | if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) | ||||
20507 | || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) | ||||
20508 | { | ||||
20509 | $match = 0; | ||||
20510 | last; | ||||
20511 | } | ||||
20512 | } | ||||
20513 | |||||
20514 | # first tokens agree, so combine extra new tokens | ||||
20515 | if ($match) { | ||||
20516 | for $k ( $maximum_field_index .. $jmax - 1 ) { | ||||
20517 | |||||
20518 | $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; | ||||
20519 | $$rfields[$k] = ""; | ||||
20520 | $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; | ||||
20521 | $$rpatterns[$k] = ""; | ||||
20522 | } | ||||
20523 | |||||
20524 | $$rtokens[ $maximum_field_index - 1 ] = '#'; | ||||
20525 | $$rfields[$maximum_field_index] = $$rfields[$jmax]; | ||||
20526 | $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; | ||||
20527 | $jmax = $maximum_field_index; | ||||
20528 | } | ||||
20529 | $new_line->set_jmax($jmax); | ||||
20530 | } | ||||
20531 | |||||
20532 | sub fix_terminal_ternary { | ||||
20533 | |||||
20534 | # Add empty fields as necessary to align a ternary term | ||||
20535 | # like this: | ||||
20536 | # | ||||
20537 | # my $leapyear = | ||||
20538 | # $year % 4 ? 0 | ||||
20539 | # : $year % 100 ? 1 | ||||
20540 | # : $year % 400 ? 0 | ||||
20541 | # : 1; | ||||
20542 | # | ||||
20543 | # returns 1 if the terminal item should be indented | ||||
20544 | |||||
20545 | my ( $rfields, $rtokens, $rpatterns ) = @_; | ||||
20546 | |||||
20547 | my $jmax = @{$rfields} - 1; | ||||
20548 | my $old_line = $group_lines[$maximum_line_index]; | ||||
20549 | my $rfields_old = $old_line->get_rfields(); | ||||
20550 | |||||
20551 | my $rpatterns_old = $old_line->get_rpatterns(); | ||||
20552 | my $rtokens_old = $old_line->get_rtokens(); | ||||
20553 | my $maximum_field_index = $old_line->get_jmax(); | ||||
20554 | |||||
20555 | # look for the question mark after the : | ||||
20556 | my ($jquestion); | ||||
20557 | my $depth_question; | ||||
20558 | my $pad = ""; | ||||
20559 | for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) { | ||||
20560 | my $tok = $rtokens_old->[$j]; | ||||
20561 | if ( $tok =~ /^\?(\d+)$/ ) { | ||||
20562 | $depth_question = $1; | ||||
20563 | |||||
20564 | # depth must be correct | ||||
20565 | next unless ( $depth_question eq $group_level ); | ||||
20566 | |||||
20567 | $jquestion = $j; | ||||
20568 | if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { | ||||
20569 | $pad = " " x length($1); | ||||
20570 | } | ||||
20571 | else { | ||||
20572 | return; # shouldn't happen | ||||
20573 | } | ||||
20574 | last; | ||||
20575 | } | ||||
20576 | } | ||||
20577 | return unless ( defined($jquestion) ); # shouldn't happen | ||||
20578 | |||||
20579 | # Now splice the tokens and patterns of the previous line | ||||
20580 | # into the else line to insure a match. Add empty fields | ||||
20581 | # as necessary. | ||||
20582 | my $jadd = $jquestion; | ||||
20583 | |||||
20584 | # Work on copies of the actual arrays in case we have | ||||
20585 | # to return due to an error | ||||
20586 | my @fields = @{$rfields}; | ||||
20587 | my @patterns = @{$rpatterns}; | ||||
20588 | my @tokens = @{$rtokens}; | ||||
20589 | |||||
20590 | VALIGN_DEBUG_FLAG_TERNARY && do { | ||||
20591 | local $" = '><'; | ||||
20592 | print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; | ||||
20593 | print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; | ||||
20594 | print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; | ||||
20595 | print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; | ||||
20596 | print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; | ||||
20597 | print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; | ||||
20598 | }; | ||||
20599 | |||||
20600 | # handle cases of leading colon on this line | ||||
20601 | if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { | ||||
20602 | |||||
20603 | my ( $colon, $therest ) = ( $1, $2 ); | ||||
20604 | |||||
20605 | # Handle sub-case of first field with leading colon plus additional code | ||||
20606 | # This is the usual situation as at the '1' below: | ||||
20607 | # ... | ||||
20608 | # : $year % 400 ? 0 | ||||
20609 | # : 1; | ||||
20610 | if ($therest) { | ||||
20611 | |||||
20612 | # Split the first field after the leading colon and insert padding. | ||||
20613 | # Note that this padding will remain even if the terminal value goes | ||||
20614 | # out on a separate line. This does not seem to look to bad, so no | ||||
20615 | # mechanism has been included to undo it. | ||||
20616 | my $field1 = shift @fields; | ||||
20617 | unshift @fields, ( $colon, $pad . $therest ); | ||||
20618 | |||||
20619 | # change the leading pattern from : to ? | ||||
20620 | return unless ( $patterns[0] =~ s/^\:/?/ ); | ||||
20621 | |||||
20622 | # install leading tokens and patterns of existing line | ||||
20623 | unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); | ||||
20624 | unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); | ||||
20625 | |||||
20626 | # insert appropriate number of empty fields | ||||
20627 | splice( @fields, 1, 0, ('') x $jadd ) if $jadd; | ||||
20628 | } | ||||
20629 | |||||
20630 | # handle sub-case of first field just equal to leading colon. | ||||
20631 | # This can happen for example in the example below where | ||||
20632 | # the leading '(' would create a new alignment token | ||||
20633 | # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) | ||||
20634 | # : ( $mname = $name . '->' ); | ||||
20635 | else { | ||||
20636 | |||||
20637 | return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen | ||||
20638 | |||||
20639 | # prepend a leading ? onto the second pattern | ||||
20640 | $patterns[1] = "?b" . $patterns[1]; | ||||
20641 | |||||
20642 | # pad the second field | ||||
20643 | $fields[1] = $pad . $fields[1]; | ||||
20644 | |||||
20645 | # install leading tokens and patterns of existing line, replacing | ||||
20646 | # leading token and inserting appropriate number of empty fields | ||||
20647 | splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); | ||||
20648 | splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); | ||||
20649 | splice( @fields, 1, 0, ('') x $jadd ) if $jadd; | ||||
20650 | } | ||||
20651 | } | ||||
20652 | |||||
20653 | # Handle case of no leading colon on this line. This will | ||||
20654 | # be the case when -wba=':' is used. For example, | ||||
20655 | # $year % 400 ? 0 : | ||||
20656 | # 1; | ||||
20657 | else { | ||||
20658 | |||||
20659 | # install leading tokens and patterns of existing line | ||||
20660 | $patterns[0] = '?' . 'b' . $patterns[0]; | ||||
20661 | unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); | ||||
20662 | unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); | ||||
20663 | |||||
20664 | # insert appropriate number of empty fields | ||||
20665 | $jadd = $jquestion + 1; | ||||
20666 | $fields[0] = $pad . $fields[0]; | ||||
20667 | splice( @fields, 0, 0, ('') x $jadd ) if $jadd; | ||||
20668 | } | ||||
20669 | |||||
20670 | VALIGN_DEBUG_FLAG_TERNARY && do { | ||||
20671 | local $" = '><'; | ||||
20672 | print STDOUT "MODIFIED TOKENS=<@tokens>\n"; | ||||
20673 | print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; | ||||
20674 | print STDOUT "MODIFIED FIELDS=<@fields>\n"; | ||||
20675 | }; | ||||
20676 | |||||
20677 | # all ok .. update the arrays | ||||
20678 | @{$rfields} = @fields; | ||||
20679 | @{$rtokens} = @tokens; | ||||
20680 | @{$rpatterns} = @patterns; | ||||
20681 | |||||
20682 | # force a flush after this line | ||||
20683 | return $jquestion; | ||||
20684 | } | ||||
20685 | |||||
20686 | sub fix_terminal_else { | ||||
20687 | |||||
20688 | # Add empty fields as necessary to align a balanced terminal | ||||
20689 | # else block to a previous if/elsif/unless block, | ||||
20690 | # like this: | ||||
20691 | # | ||||
20692 | # if ( 1 || $x ) { print "ok 13\n"; } | ||||
20693 | # else { print "not ok 13\n"; } | ||||
20694 | # | ||||
20695 | # returns 1 if the else block should be indented | ||||
20696 | # | ||||
20697 | my ( $rfields, $rtokens, $rpatterns ) = @_; | ||||
20698 | my $jmax = @{$rfields} - 1; | ||||
20699 | return unless ( $jmax > 0 ); | ||||
20700 | |||||
20701 | # check for balanced else block following if/elsif/unless | ||||
20702 | my $rfields_old = $current_line->get_rfields(); | ||||
20703 | |||||
20704 | # TBD: add handling for 'case' | ||||
20705 | return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); | ||||
20706 | |||||
20707 | # look for the opening brace after the else, and extract the depth | ||||
20708 | my $tok_brace = $rtokens->[0]; | ||||
20709 | my $depth_brace; | ||||
20710 | if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } | ||||
20711 | |||||
20712 | # probably: "else # side_comment" | ||||
20713 | else { return } | ||||
20714 | |||||
20715 | my $rpatterns_old = $current_line->get_rpatterns(); | ||||
20716 | my $rtokens_old = $current_line->get_rtokens(); | ||||
20717 | my $maximum_field_index = $current_line->get_jmax(); | ||||
20718 | |||||
20719 | # be sure the previous if/elsif is followed by an opening paren | ||||
20720 | my $jparen = 0; | ||||
20721 | my $tok_paren = '(' . $depth_brace; | ||||
20722 | my $tok_test = $rtokens_old->[$jparen]; | ||||
20723 | return unless ( $tok_test eq $tok_paren ); # shouldn't happen | ||||
20724 | |||||
20725 | # Now find the opening block brace | ||||
20726 | my ($jbrace); | ||||
20727 | for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) { | ||||
20728 | my $tok = $rtokens_old->[$j]; | ||||
20729 | if ( $tok eq $tok_brace ) { | ||||
20730 | $jbrace = $j; | ||||
20731 | last; | ||||
20732 | } | ||||
20733 | } | ||||
20734 | return unless ( defined($jbrace) ); # shouldn't happen | ||||
20735 | |||||
20736 | # Now splice the tokens and patterns of the previous line | ||||
20737 | # into the else line to insure a match. Add empty fields | ||||
20738 | # as necessary. | ||||
20739 | my $jadd = $jbrace - $jparen; | ||||
20740 | splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); | ||||
20741 | splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); | ||||
20742 | splice( @{$rfields}, 1, 0, ('') x $jadd ); | ||||
20743 | |||||
20744 | # force a flush after this line if it does not follow a case | ||||
20745 | return $jbrace | ||||
20746 | unless ( $rfields_old->[0] =~ /^case\s*$/ ); | ||||
20747 | } | ||||
20748 | |||||
20749 | { # sub check_match | ||||
20750 | 2 | 200ns | my %is_good_alignment; | ||
20751 | |||||
20752 | # spent 14µs within Perl::Tidy::VerticalAligner::BEGIN@20752 which was called:
# once (14µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 20759 | ||||
20753 | |||||
20754 | # Vertically aligning on certain "good" tokens is usually okay | ||||
20755 | # so we can be less restrictive in marginal cases. | ||||
20756 | 1 | 2µs | @_ = qw( { ? => = ); | ||
20757 | 1 | 1µs | push @_, (','); | ||
20758 | 1 | 13µs | @is_good_alignment{@_} = (1) x scalar(@_); | ||
20759 | 1 | 4.90ms | 1 | 14µs | } # spent 14µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@20752 |
20760 | |||||
20761 | sub check_match { | ||||
20762 | |||||
20763 | # See if the current line matches the current vertical alignment group. | ||||
20764 | # If not, flush the current group. | ||||
20765 | my $new_line = shift; | ||||
20766 | my $old_line = shift; | ||||
20767 | |||||
20768 | # uses global variables: | ||||
20769 | # $previous_minimum_jmax_seen | ||||
20770 | # $maximum_jmax_seen | ||||
20771 | # $maximum_line_index | ||||
20772 | # $marginal_match | ||||
20773 | my $jmax = $new_line->get_jmax(); | ||||
20774 | my $maximum_field_index = $old_line->get_jmax(); | ||||
20775 | |||||
20776 | # flush if this line has too many fields | ||||
20777 | if ( $jmax > $maximum_field_index ) { goto NO_MATCH } | ||||
20778 | |||||
20779 | # flush if adding this line would make a non-monotonic field count | ||||
20780 | if ( | ||||
20781 | ( $maximum_field_index > $jmax ) # this has too few fields | ||||
20782 | && ( | ||||
20783 | ( $previous_minimum_jmax_seen < | ||||
20784 | $jmax ) # and wouldn't be monotonic | ||||
20785 | || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) | ||||
20786 | ) | ||||
20787 | ) | ||||
20788 | { | ||||
20789 | goto NO_MATCH; | ||||
20790 | } | ||||
20791 | |||||
20792 | # otherwise see if this line matches the current group | ||||
20793 | my $jmax_original_line = $new_line->get_jmax_original_line(); | ||||
20794 | my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); | ||||
20795 | my $rtokens = $new_line->get_rtokens(); | ||||
20796 | my $rfields = $new_line->get_rfields(); | ||||
20797 | my $rpatterns = $new_line->get_rpatterns(); | ||||
20798 | my $list_type = $new_line->get_list_type(); | ||||
20799 | |||||
20800 | my $group_list_type = $old_line->get_list_type(); | ||||
20801 | my $old_rpatterns = $old_line->get_rpatterns(); | ||||
20802 | my $old_rtokens = $old_line->get_rtokens(); | ||||
20803 | |||||
20804 | my $jlimit = $jmax - 1; | ||||
20805 | if ( $maximum_field_index > $jmax ) { | ||||
20806 | $jlimit = $jmax_original_line; | ||||
20807 | --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); | ||||
20808 | } | ||||
20809 | |||||
20810 | # handle comma-separated lists .. | ||||
20811 | if ( $group_list_type && ( $list_type eq $group_list_type ) ) { | ||||
20812 | for my $j ( 0 .. $jlimit ) { | ||||
20813 | my $old_tok = $$old_rtokens[$j]; | ||||
20814 | next unless $old_tok; | ||||
20815 | my $new_tok = $$rtokens[$j]; | ||||
20816 | next unless $new_tok; | ||||
20817 | |||||
20818 | # lists always match ... | ||||
20819 | # unless they would align any '=>'s with ','s | ||||
20820 | goto NO_MATCH | ||||
20821 | if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ | ||||
20822 | || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); | ||||
20823 | } | ||||
20824 | } | ||||
20825 | |||||
20826 | # do detailed check for everything else except hanging side comments | ||||
20827 | elsif ( !$is_hanging_side_comment ) { | ||||
20828 | |||||
20829 | my $leading_space_count = $new_line->get_leading_space_count(); | ||||
20830 | |||||
20831 | my $max_pad = 0; | ||||
20832 | my $min_pad = 0; | ||||
20833 | my $saw_good_alignment; | ||||
20834 | |||||
20835 | for my $j ( 0 .. $jlimit ) { | ||||
20836 | |||||
20837 | my $old_tok = $$old_rtokens[$j]; | ||||
20838 | my $new_tok = $$rtokens[$j]; | ||||
20839 | |||||
20840 | # Note on encoding used for alignment tokens: | ||||
20841 | # ------------------------------------------- | ||||
20842 | # Tokens are "decorated" with information which can help | ||||
20843 | # prevent unwanted alignments. Consider for example the | ||||
20844 | # following two lines: | ||||
20845 | # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); | ||||
20846 | # local ( $i, $f ) = &'bdiv( $xn, $xd ); | ||||
20847 | # There are three alignment tokens in each line, a comma, | ||||
20848 | # an =, and a comma. In the first line these three tokens | ||||
20849 | # are encoded as: | ||||
20850 | # ,4+local-18 =3 ,4+split-7 | ||||
20851 | # and in the second line they are encoded as | ||||
20852 | # ,4+local-18 =3 ,4+&'bdiv-8 | ||||
20853 | # Tokens always at least have token name and nesting | ||||
20854 | # depth. So in this example the ='s are at depth 3 and | ||||
20855 | # the ,'s are at depth 4. This prevents aligning tokens | ||||
20856 | # of different depths. Commas contain additional | ||||
20857 | # information, as follows: | ||||
20858 | # , {depth} + {container name} - {spaces to opening paren} | ||||
20859 | # This allows us to reject matching the rightmost commas | ||||
20860 | # in the above two lines, since they are for different | ||||
20861 | # function calls. This encoding is done in | ||||
20862 | # 'sub send_lines_to_vertical_aligner'. | ||||
20863 | |||||
20864 | # Pick off actual token. | ||||
20865 | # Everything up to the first digit is the actual token. | ||||
20866 | my $alignment_token = $new_tok; | ||||
20867 | if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } | ||||
20868 | |||||
20869 | # see if the decorated tokens match | ||||
20870 | my $tokens_match = $new_tok eq $old_tok | ||||
20871 | |||||
20872 | # Exception for matching terminal : of ternary statement.. | ||||
20873 | # consider containers prefixed by ? and : a match | ||||
20874 | || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); | ||||
20875 | |||||
20876 | # No match if the alignment tokens differ... | ||||
20877 | if ( !$tokens_match ) { | ||||
20878 | |||||
20879 | # ...Unless this is a side comment | ||||
20880 | if ( | ||||
20881 | $j == $jlimit | ||||
20882 | |||||
20883 | # and there is either at least one alignment token | ||||
20884 | # or this is a single item following a list. This | ||||
20885 | # latter rule is required for 'December' to join | ||||
20886 | # the following list: | ||||
20887 | # my (@months) = ( | ||||
20888 | # '', 'January', 'February', 'March', | ||||
20889 | # 'April', 'May', 'June', 'July', | ||||
20890 | # 'August', 'September', 'October', 'November', | ||||
20891 | # 'December' | ||||
20892 | # ); | ||||
20893 | # If it doesn't then the -lp formatting will fail. | ||||
20894 | && ( $j > 0 || $old_tok =~ /^,/ ) | ||||
20895 | ) | ||||
20896 | { | ||||
20897 | $marginal_match = 1 | ||||
20898 | if ( $marginal_match == 0 | ||||
20899 | && $maximum_line_index == 0 ); | ||||
20900 | last; | ||||
20901 | } | ||||
20902 | |||||
20903 | goto NO_MATCH; | ||||
20904 | } | ||||
20905 | |||||
20906 | # Calculate amount of padding required to fit this in. | ||||
20907 | # $pad is the number of spaces by which we must increase | ||||
20908 | # the current field to squeeze in this field. | ||||
20909 | my $pad = | ||||
20910 | length( $$rfields[$j] ) - $old_line->current_field_width($j); | ||||
20911 | if ( $j == 0 ) { $pad += $leading_space_count; } | ||||
20912 | |||||
20913 | # remember max pads to limit marginal cases | ||||
20914 | if ( $alignment_token ne '#' ) { | ||||
20915 | if ( $pad > $max_pad ) { $max_pad = $pad } | ||||
20916 | if ( $pad < $min_pad ) { $min_pad = $pad } | ||||
20917 | } | ||||
20918 | if ( $is_good_alignment{$alignment_token} ) { | ||||
20919 | $saw_good_alignment = 1; | ||||
20920 | } | ||||
20921 | |||||
20922 | # If patterns don't match, we have to be careful... | ||||
20923 | if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { | ||||
20924 | |||||
20925 | # flag this as a marginal match since patterns differ | ||||
20926 | $marginal_match = 1 | ||||
20927 | if ( $marginal_match == 0 && $maximum_line_index == 0 ); | ||||
20928 | |||||
20929 | # We have to be very careful about aligning commas | ||||
20930 | # when the pattern's don't match, because it can be | ||||
20931 | # worse to create an alignment where none is needed | ||||
20932 | # than to omit one. Here's an example where the ','s | ||||
20933 | # are not in named containers. The first line below | ||||
20934 | # should not match the next two: | ||||
20935 | # ( $a, $b ) = ( $b, $r ); | ||||
20936 | # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); | ||||
20937 | # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); | ||||
20938 | if ( $alignment_token eq ',' ) { | ||||
20939 | |||||
20940 | # do not align commas unless they are in named containers | ||||
20941 | goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); | ||||
20942 | } | ||||
20943 | |||||
20944 | # do not align parens unless patterns match; | ||||
20945 | # large ugly spaces can occur in math expressions. | ||||
20946 | elsif ( $alignment_token eq '(' ) { | ||||
20947 | |||||
20948 | # But we can allow a match if the parens don't | ||||
20949 | # require any padding. | ||||
20950 | if ( $pad != 0 ) { goto NO_MATCH } | ||||
20951 | } | ||||
20952 | |||||
20953 | # Handle an '=' alignment with different patterns to | ||||
20954 | # the left. | ||||
20955 | elsif ( $alignment_token eq '=' ) { | ||||
20956 | |||||
20957 | # It is best to be a little restrictive when | ||||
20958 | # aligning '=' tokens. Here is an example of | ||||
20959 | # two lines that we will not align: | ||||
20960 | # my $variable=6; | ||||
20961 | # $bb=4; | ||||
20962 | # The problem is that one is a 'my' declaration, | ||||
20963 | # and the other isn't, so they're not very similar. | ||||
20964 | # We will filter these out by comparing the first | ||||
20965 | # letter of the pattern. This is crude, but works | ||||
20966 | # well enough. | ||||
20967 | if ( | ||||
20968 | substr( $$old_rpatterns[$j], 0, 1 ) ne | ||||
20969 | substr( $$rpatterns[$j], 0, 1 ) ) | ||||
20970 | { | ||||
20971 | goto NO_MATCH; | ||||
20972 | } | ||||
20973 | |||||
20974 | # If we pass that test, we'll call it a marginal match. | ||||
20975 | # Here is an example of a marginal match: | ||||
20976 | # $done{$$op} = 1; | ||||
20977 | # $op = compile_bblock($op); | ||||
20978 | # The left tokens are both identifiers, but | ||||
20979 | # one accesses a hash and the other doesn't. | ||||
20980 | # We'll let this be a tentative match and undo | ||||
20981 | # it later if we don't find more than 2 lines | ||||
20982 | # in the group. | ||||
20983 | elsif ( $maximum_line_index == 0 ) { | ||||
20984 | $marginal_match = | ||||
20985 | 2; # =2 prevents being undone below | ||||
20986 | } | ||||
20987 | } | ||||
20988 | } | ||||
20989 | |||||
20990 | # Don't let line with fewer fields increase column widths | ||||
20991 | # ( align3.t ) | ||||
20992 | if ( $maximum_field_index > $jmax ) { | ||||
20993 | |||||
20994 | # Exception: suspend this rule to allow last lines to join | ||||
20995 | if ( $pad > 0 ) { goto NO_MATCH; } | ||||
20996 | } | ||||
20997 | } ## end for my $j ( 0 .. $jlimit) | ||||
20998 | |||||
20999 | # Turn off the "marginal match" flag in some cases... | ||||
21000 | # A "marginal match" occurs when the alignment tokens agree | ||||
21001 | # but there are differences in the other tokens (patterns). | ||||
21002 | # If we leave the marginal match flag set, then the rule is that we | ||||
21003 | # will align only if there are more than two lines in the group. | ||||
21004 | # We will turn of the flag if we almost have a match | ||||
21005 | # and either we have seen a good alignment token or we | ||||
21006 | # just need a small pad (2 spaces) to fit. These rules are | ||||
21007 | # the result of experimentation. Tokens which misaligned by just | ||||
21008 | # one or two characters are annoying. On the other hand, | ||||
21009 | # large gaps to less important alignment tokens are also annoying. | ||||
21010 | if ( $marginal_match == 1 | ||||
21011 | && $jmax == $maximum_field_index | ||||
21012 | && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) | ||||
21013 | ) | ||||
21014 | { | ||||
21015 | $marginal_match = 0; | ||||
21016 | } | ||||
21017 | ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; | ||||
21018 | } | ||||
21019 | |||||
21020 | # We have a match (even if marginal). | ||||
21021 | # If the current line has fewer fields than the current group | ||||
21022 | # but otherwise matches, copy the remaining group fields to | ||||
21023 | # make it a perfect match. | ||||
21024 | if ( $maximum_field_index > $jmax ) { | ||||
21025 | my $comment = $$rfields[$jmax]; | ||||
21026 | for $jmax ( $jlimit .. $maximum_field_index ) { | ||||
21027 | $$rtokens[$jmax] = $$old_rtokens[$jmax]; | ||||
21028 | $$rfields[ ++$jmax ] = ''; | ||||
21029 | $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; | ||||
21030 | } | ||||
21031 | $$rfields[$jmax] = $comment; | ||||
21032 | $new_line->set_jmax($jmax); | ||||
21033 | } | ||||
21034 | return; | ||||
21035 | |||||
21036 | NO_MATCH: | ||||
21037 | ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; | ||||
21038 | my_flush(); | ||||
21039 | return; | ||||
21040 | } | ||||
21041 | } | ||||
21042 | |||||
21043 | sub check_fit { | ||||
21044 | |||||
21045 | return unless ( $maximum_line_index >= 0 ); | ||||
21046 | my $new_line = shift; | ||||
21047 | my $old_line = shift; | ||||
21048 | |||||
21049 | my $jmax = $new_line->get_jmax(); | ||||
21050 | my $leading_space_count = $new_line->get_leading_space_count(); | ||||
21051 | my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); | ||||
21052 | my $rtokens = $new_line->get_rtokens(); | ||||
21053 | my $rfields = $new_line->get_rfields(); | ||||
21054 | my $rpatterns = $new_line->get_rpatterns(); | ||||
21055 | |||||
21056 | my $group_list_type = $group_lines[0]->get_list_type(); | ||||
21057 | |||||
21058 | my $padding_so_far = 0; | ||||
21059 | my $padding_available = $old_line->get_available_space_on_right(); | ||||
21060 | |||||
21061 | # save current columns in case this doesn't work | ||||
21062 | save_alignment_columns(); | ||||
21063 | |||||
21064 | my ( $j, $pad, $eight ); | ||||
21065 | my $maximum_field_index = $old_line->get_jmax(); | ||||
21066 | for $j ( 0 .. $jmax ) { | ||||
21067 | |||||
21068 | $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); | ||||
21069 | |||||
21070 | if ( $j == 0 ) { | ||||
21071 | $pad += $leading_space_count; | ||||
21072 | } | ||||
21073 | |||||
21074 | # remember largest gap of the group, excluding gap to side comment | ||||
21075 | if ( $pad < 0 | ||||
21076 | && $group_maximum_gap < -$pad | ||||
21077 | && $j > 0 | ||||
21078 | && $j < $jmax - 1 ) | ||||
21079 | { | ||||
21080 | $group_maximum_gap = -$pad; | ||||
21081 | } | ||||
21082 | |||||
21083 | next if $pad < 0; | ||||
21084 | |||||
21085 | ## This patch helps sometimes, but it doesn't check to see if | ||||
21086 | ## the line is too long even without the side comment. It needs | ||||
21087 | ## to be reworked. | ||||
21088 | ##don't let a long token with no trailing side comment push | ||||
21089 | ##side comments out, or end a group. (sidecmt1.t) | ||||
21090 | ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0); | ||||
21091 | |||||
21092 | # This line will need space; lets see if we want to accept it.. | ||||
21093 | if ( | ||||
21094 | |||||
21095 | # not if this won't fit | ||||
21096 | ( $pad > $padding_available ) | ||||
21097 | |||||
21098 | # previously, there were upper bounds placed on padding here | ||||
21099 | # (maximum_whitespace_columns), but they were not really helpful | ||||
21100 | |||||
21101 | ) | ||||
21102 | { | ||||
21103 | |||||
21104 | # revert to starting state then flush; things didn't work out | ||||
21105 | restore_alignment_columns(); | ||||
21106 | my_flush(); | ||||
21107 | last; | ||||
21108 | } | ||||
21109 | |||||
21110 | # patch to avoid excessive gaps in previous lines, | ||||
21111 | # due to a line of fewer fields. | ||||
21112 | # return join( ".", | ||||
21113 | # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, | ||||
21114 | # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); | ||||
21115 | next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); | ||||
21116 | |||||
21117 | # looks ok, squeeze this field in | ||||
21118 | $old_line->increase_field_width( $j, $pad ); | ||||
21119 | $padding_available -= $pad; | ||||
21120 | |||||
21121 | # remember largest gap of the group, excluding gap to side comment | ||||
21122 | if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { | ||||
21123 | $group_maximum_gap = $pad; | ||||
21124 | } | ||||
21125 | } | ||||
21126 | } | ||||
21127 | |||||
21128 | sub add_to_group { | ||||
21129 | |||||
21130 | # The current line either starts a new alignment group or is | ||||
21131 | # accepted into the current alignment group. | ||||
21132 | my $new_line = shift; | ||||
21133 | $group_lines[ ++$maximum_line_index ] = $new_line; | ||||
21134 | |||||
21135 | # initialize field lengths if starting new group | ||||
21136 | if ( $maximum_line_index == 0 ) { | ||||
21137 | |||||
21138 | my $jmax = $new_line->get_jmax(); | ||||
21139 | my $rfields = $new_line->get_rfields(); | ||||
21140 | my $rtokens = $new_line->get_rtokens(); | ||||
21141 | my $j; | ||||
21142 | my $col = $new_line->get_leading_space_count(); | ||||
21143 | |||||
21144 | for $j ( 0 .. $jmax ) { | ||||
21145 | $col += length( $$rfields[$j] ); | ||||
21146 | |||||
21147 | # create initial alignments for the new group | ||||
21148 | my $token = ""; | ||||
21149 | if ( $j < $jmax ) { $token = $$rtokens[$j] } | ||||
21150 | my $alignment = make_alignment( $col, $token ); | ||||
21151 | $new_line->set_alignment( $j, $alignment ); | ||||
21152 | } | ||||
21153 | |||||
21154 | $maximum_jmax_seen = $jmax; | ||||
21155 | $minimum_jmax_seen = $jmax; | ||||
21156 | } | ||||
21157 | |||||
21158 | # use previous alignments otherwise | ||||
21159 | else { | ||||
21160 | my @new_alignments = | ||||
21161 | $group_lines[ $maximum_line_index - 1 ]->get_alignments(); | ||||
21162 | $new_line->set_alignments(@new_alignments); | ||||
21163 | } | ||||
21164 | |||||
21165 | # remember group jmax extremes for next call to valign_input | ||||
21166 | $previous_minimum_jmax_seen = $minimum_jmax_seen; | ||||
21167 | $previous_maximum_jmax_seen = $maximum_jmax_seen; | ||||
21168 | } | ||||
21169 | |||||
21170 | sub dump_array { | ||||
21171 | |||||
21172 | # debug routine to dump array contents | ||||
21173 | local $" = ')('; | ||||
21174 | print STDOUT "(@_)\n"; | ||||
21175 | } | ||||
21176 | |||||
21177 | # flush() sends the current Perl::Tidy::VerticalAligner group down the | ||||
21178 | # pipeline to Perl::Tidy::FileWriter. | ||||
21179 | |||||
21180 | # This is the external flush, which also empties the buffer and cache | ||||
21181 | sub flush { | ||||
21182 | |||||
21183 | # the buffer must be emptied first, then any cached text | ||||
21184 | dump_valign_buffer(); | ||||
21185 | |||||
21186 | if ( $maximum_line_index < 0 ) { | ||||
21187 | if ($cached_line_type) { | ||||
21188 | $seqno_string = $cached_seqno_string; | ||||
21189 | valign_output_step_C( $cached_line_text, | ||||
21190 | $cached_line_leading_space_count, | ||||
21191 | $last_level_written ); | ||||
21192 | $cached_line_type = 0; | ||||
21193 | $cached_line_text = ""; | ||||
21194 | $cached_seqno_string = ""; | ||||
21195 | } | ||||
21196 | } | ||||
21197 | else { | ||||
21198 | my_flush(); | ||||
21199 | } | ||||
21200 | } | ||||
21201 | |||||
21202 | sub reduce_valign_buffer_indentation { | ||||
21203 | |||||
21204 | my ($diff) = @_; | ||||
21205 | if ( $valign_buffer_filling && $diff ) { | ||||
21206 | my $max_valign_buffer = @valign_buffer; | ||||
21207 | for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) { | ||||
21208 | my ( $line, $leading_space_count, $level ) = | ||||
21209 | @{ $valign_buffer[$i] }; | ||||
21210 | my $ws = substr( $line, 0, $diff ); | ||||
21211 | if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { | ||||
21212 | $line = substr( $line, $diff ); | ||||
21213 | } | ||||
21214 | if ( $leading_space_count >= $diff ) { | ||||
21215 | $leading_space_count -= $diff; | ||||
21216 | $level = level_change( $leading_space_count, $diff, $level ); | ||||
21217 | } | ||||
21218 | $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; | ||||
21219 | } | ||||
21220 | } | ||||
21221 | } | ||||
21222 | |||||
21223 | sub level_change { | ||||
21224 | |||||
21225 | # compute decrease in level when we remove $diff spaces from the | ||||
21226 | # leading spaces | ||||
21227 | my ( $leading_space_count, $diff, $level ) = @_; | ||||
21228 | if ($rOpts_indent_columns) { | ||||
21229 | my $olev = | ||||
21230 | int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); | ||||
21231 | my $nlev = int( $leading_space_count / $rOpts_indent_columns ); | ||||
21232 | $level -= ( $olev - $nlev ); | ||||
21233 | if ( $level < 0 ) { $level = 0 } | ||||
21234 | } | ||||
21235 | return $level; | ||||
21236 | } | ||||
21237 | |||||
21238 | sub dump_valign_buffer { | ||||
21239 | if (@valign_buffer) { | ||||
21240 | foreach (@valign_buffer) { | ||||
21241 | valign_output_step_D( @{$_} ); | ||||
21242 | } | ||||
21243 | @valign_buffer = (); | ||||
21244 | } | ||||
21245 | $valign_buffer_filling = ""; | ||||
21246 | } | ||||
21247 | |||||
21248 | # This is the internal flush, which leaves the cache intact | ||||
21249 | sub my_flush { | ||||
21250 | |||||
21251 | return if ( $maximum_line_index < 0 ); | ||||
21252 | |||||
21253 | # handle a group of comment lines | ||||
21254 | if ( $group_type eq 'COMMENT' ) { | ||||
21255 | |||||
21256 | VALIGN_DEBUG_FLAG_APPEND0 && do { | ||||
21257 | my ( $a, $b, $c ) = caller(); | ||||
21258 | print STDOUT | ||||
21259 | "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; | ||||
21260 | |||||
21261 | }; | ||||
21262 | my $leading_space_count = $comment_leading_space_count; | ||||
21263 | my $leading_string = get_leading_string($leading_space_count); | ||||
21264 | |||||
21265 | # zero leading space count if any lines are too long | ||||
21266 | my $max_excess = 0; | ||||
21267 | for my $i ( 0 .. $maximum_line_index ) { | ||||
21268 | my $str = $group_lines[$i]; | ||||
21269 | my $excess = | ||||
21270 | length($str) + | ||||
21271 | $leading_space_count - | ||||
21272 | maximum_line_length_for_level($group_level); | ||||
21273 | if ( $excess > $max_excess ) { | ||||
21274 | $max_excess = $excess; | ||||
21275 | } | ||||
21276 | } | ||||
21277 | |||||
21278 | if ( $max_excess > 0 ) { | ||||
21279 | $leading_space_count -= $max_excess; | ||||
21280 | if ( $leading_space_count < 0 ) { $leading_space_count = 0 } | ||||
21281 | $last_outdented_line_at = | ||||
21282 | $file_writer_object->get_output_line_number(); | ||||
21283 | unless ($outdented_line_count) { | ||||
21284 | $first_outdented_line_at = $last_outdented_line_at; | ||||
21285 | } | ||||
21286 | $outdented_line_count += ( $maximum_line_index + 1 ); | ||||
21287 | } | ||||
21288 | |||||
21289 | # write the group of lines | ||||
21290 | my $outdent_long_lines = 0; | ||||
21291 | for my $i ( 0 .. $maximum_line_index ) { | ||||
21292 | valign_output_step_B( $leading_space_count, $group_lines[$i], 0, | ||||
21293 | $outdent_long_lines, "", $group_level ); | ||||
21294 | } | ||||
21295 | } | ||||
21296 | |||||
21297 | # handle a group of code lines | ||||
21298 | else { | ||||
21299 | |||||
21300 | VALIGN_DEBUG_FLAG_APPEND0 && do { | ||||
21301 | my $group_list_type = $group_lines[0]->get_list_type(); | ||||
21302 | my ( $a, $b, $c ) = caller(); | ||||
21303 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
21304 | print STDOUT | ||||
21305 | "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; | ||||
21306 | |||||
21307 | }; | ||||
21308 | |||||
21309 | # some small groups are best left unaligned | ||||
21310 | my $do_not_align = decide_if_aligned(); | ||||
21311 | |||||
21312 | # optimize side comment location | ||||
21313 | $do_not_align = adjust_side_comment($do_not_align); | ||||
21314 | |||||
21315 | # recover spaces for -lp option if possible | ||||
21316 | my $extra_leading_spaces = get_extra_leading_spaces(); | ||||
21317 | |||||
21318 | # all lines of this group have the same basic leading spacing | ||||
21319 | my $group_leader_length = $group_lines[0]->get_leading_space_count(); | ||||
21320 | |||||
21321 | # add extra leading spaces if helpful | ||||
21322 | my $min_ci_gap = improve_continuation_indentation( $do_not_align, | ||||
21323 | $group_leader_length ); | ||||
21324 | |||||
21325 | # loop to output all lines | ||||
21326 | for my $i ( 0 .. $maximum_line_index ) { | ||||
21327 | my $line = $group_lines[$i]; | ||||
21328 | valign_output_step_A( $line, $min_ci_gap, $do_not_align, | ||||
21329 | $group_leader_length, $extra_leading_spaces ); | ||||
21330 | } | ||||
21331 | } | ||||
21332 | initialize_for_new_group(); | ||||
21333 | } | ||||
21334 | |||||
21335 | sub decide_if_aligned { | ||||
21336 | |||||
21337 | # Do not try to align two lines which are not really similar | ||||
21338 | return unless $maximum_line_index == 1; | ||||
21339 | return if ($is_matching_terminal_line); | ||||
21340 | |||||
21341 | my $group_list_type = $group_lines[0]->get_list_type(); | ||||
21342 | |||||
21343 | my $do_not_align = ( | ||||
21344 | |||||
21345 | # always align lists | ||||
21346 | !$group_list_type | ||||
21347 | |||||
21348 | && ( | ||||
21349 | |||||
21350 | # don't align if it was just a marginal match | ||||
21351 | $marginal_match | ||||
21352 | |||||
21353 | # don't align two lines with big gap | ||||
21354 | || $group_maximum_gap > 12 | ||||
21355 | |||||
21356 | # or lines with differing number of alignment tokens | ||||
21357 | # TODO: this could be improved. It occasionally rejects | ||||
21358 | # good matches. | ||||
21359 | || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen | ||||
21360 | ) | ||||
21361 | ); | ||||
21362 | |||||
21363 | # But try to convert them into a simple comment group if the first line | ||||
21364 | # a has side comment | ||||
21365 | my $rfields = $group_lines[0]->get_rfields(); | ||||
21366 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
21367 | if ( $do_not_align | ||||
21368 | && ( $maximum_line_index > 0 ) | ||||
21369 | && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) | ||||
21370 | { | ||||
21371 | combine_fields(); | ||||
21372 | $do_not_align = 0; | ||||
21373 | } | ||||
21374 | return $do_not_align; | ||||
21375 | } | ||||
21376 | |||||
21377 | sub adjust_side_comment { | ||||
21378 | |||||
21379 | my $do_not_align = shift; | ||||
21380 | |||||
21381 | # let's see if we can move the side comment field out a little | ||||
21382 | # to improve readability (the last field is always a side comment field) | ||||
21383 | my $have_side_comment = 0; | ||||
21384 | my $first_side_comment_line = -1; | ||||
21385 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
21386 | for my $i ( 0 .. $maximum_line_index ) { | ||||
21387 | my $line = $group_lines[$i]; | ||||
21388 | |||||
21389 | if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { | ||||
21390 | $have_side_comment = 1; | ||||
21391 | $first_side_comment_line = $i; | ||||
21392 | last; | ||||
21393 | } | ||||
21394 | } | ||||
21395 | |||||
21396 | my $kmax = $maximum_field_index + 1; | ||||
21397 | |||||
21398 | if ($have_side_comment) { | ||||
21399 | |||||
21400 | my $line = $group_lines[0]; | ||||
21401 | |||||
21402 | # the maximum space without exceeding the line length: | ||||
21403 | my $avail = $line->get_available_space_on_right(); | ||||
21404 | |||||
21405 | # try to use the previous comment column | ||||
21406 | my $side_comment_column = $line->get_column( $kmax - 2 ); | ||||
21407 | my $move = $last_comment_column - $side_comment_column; | ||||
21408 | |||||
21409 | ## my $sc_line0 = $side_comment_history[0]->[0]; | ||||
21410 | ## my $sc_col0 = $side_comment_history[0]->[1]; | ||||
21411 | ## my $sc_line1 = $side_comment_history[1]->[0]; | ||||
21412 | ## my $sc_col1 = $side_comment_history[1]->[1]; | ||||
21413 | ## my $sc_line2 = $side_comment_history[2]->[0]; | ||||
21414 | ## my $sc_col2 = $side_comment_history[2]->[1]; | ||||
21415 | ## | ||||
21416 | ## # FUTURE UPDATES: | ||||
21417 | ## # Be sure to ignore 'do not align' and '} # end comments' | ||||
21418 | ## # Find first $move > 0 and $move <= $avail as follows: | ||||
21419 | ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 | ||||
21420 | ## # 2. try sc_col2 if (line-sc_line2) < 12 | ||||
21421 | ## # 3. try min possible space, plus up to 8, | ||||
21422 | ## # 4. try min possible space | ||||
21423 | |||||
21424 | if ( $kmax > 0 && !$do_not_align ) { | ||||
21425 | |||||
21426 | # but if this doesn't work, give up and use the minimum space | ||||
21427 | if ( $move > $avail ) { | ||||
21428 | $move = $rOpts_minimum_space_to_comment - 1; | ||||
21429 | } | ||||
21430 | |||||
21431 | # but we want some minimum space to the comment | ||||
21432 | my $min_move = $rOpts_minimum_space_to_comment - 1; | ||||
21433 | if ( $move >= 0 | ||||
21434 | && $last_side_comment_length > 0 | ||||
21435 | && ( $first_side_comment_line == 0 ) | ||||
21436 | && $group_level == $last_level_written ) | ||||
21437 | { | ||||
21438 | $min_move = 0; | ||||
21439 | } | ||||
21440 | |||||
21441 | if ( $move < $min_move ) { | ||||
21442 | $move = $min_move; | ||||
21443 | } | ||||
21444 | |||||
21445 | # previously, an upper bound was placed on $move here, | ||||
21446 | # (maximum_space_to_comment), but it was not helpful | ||||
21447 | |||||
21448 | # don't exceed the available space | ||||
21449 | if ( $move > $avail ) { $move = $avail } | ||||
21450 | |||||
21451 | # we can only increase space, never decrease | ||||
21452 | if ( $move > 0 ) { | ||||
21453 | $line->increase_field_width( $maximum_field_index - 1, $move ); | ||||
21454 | } | ||||
21455 | |||||
21456 | # remember this column for the next group | ||||
21457 | $last_comment_column = $line->get_column( $kmax - 2 ); | ||||
21458 | } | ||||
21459 | else { | ||||
21460 | |||||
21461 | # try to at least line up the existing side comment location | ||||
21462 | if ( $kmax > 0 && $move > 0 && $move < $avail ) { | ||||
21463 | $line->increase_field_width( $maximum_field_index - 1, $move ); | ||||
21464 | $do_not_align = 0; | ||||
21465 | } | ||||
21466 | |||||
21467 | # reset side comment column if we can't align | ||||
21468 | else { | ||||
21469 | forget_side_comment(); | ||||
21470 | } | ||||
21471 | } | ||||
21472 | } | ||||
21473 | return $do_not_align; | ||||
21474 | } | ||||
21475 | |||||
21476 | sub improve_continuation_indentation { | ||||
21477 | my ( $do_not_align, $group_leader_length ) = @_; | ||||
21478 | |||||
21479 | # See if we can increase the continuation indentation | ||||
21480 | # to move all continuation lines closer to the next field | ||||
21481 | # (unless it is a comment). | ||||
21482 | # | ||||
21483 | # '$min_ci_gap'is the extra indentation that we may need to introduce. | ||||
21484 | # We will only introduce this to fields which already have some ci. | ||||
21485 | # Without this variable, we would occasionally get something like this | ||||
21486 | # (Complex.pm): | ||||
21487 | # | ||||
21488 | # use overload '+' => \&plus, | ||||
21489 | # '-' => \&minus, | ||||
21490 | # '*' => \&multiply, | ||||
21491 | # ... | ||||
21492 | # 'tan' => \&tan, | ||||
21493 | # 'atan2' => \&atan2, | ||||
21494 | # | ||||
21495 | # Whereas with this variable, we can shift variables over to get this: | ||||
21496 | # | ||||
21497 | # use overload '+' => \&plus, | ||||
21498 | # '-' => \&minus, | ||||
21499 | # '*' => \&multiply, | ||||
21500 | # ... | ||||
21501 | # 'tan' => \&tan, | ||||
21502 | # 'atan2' => \&atan2, | ||||
21503 | |||||
21504 | ## Deactivated#################### | ||||
21505 | # The trouble with this patch is that it may, for example, | ||||
21506 | # move in some 'or's or ':'s, and leave some out, so that the | ||||
21507 | # left edge alignment suffers. | ||||
21508 | return 0; | ||||
21509 | ########################################### | ||||
21510 | |||||
21511 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
21512 | |||||
21513 | my $min_ci_gap = maximum_line_length_for_level($group_level); | ||||
21514 | if ( $maximum_field_index > 1 && !$do_not_align ) { | ||||
21515 | |||||
21516 | for my $i ( 0 .. $maximum_line_index ) { | ||||
21517 | my $line = $group_lines[$i]; | ||||
21518 | my $leading_space_count = $line->get_leading_space_count(); | ||||
21519 | my $rfields = $line->get_rfields(); | ||||
21520 | |||||
21521 | my $gap = | ||||
21522 | $line->get_column(0) - | ||||
21523 | $leading_space_count - | ||||
21524 | length( $$rfields[0] ); | ||||
21525 | |||||
21526 | if ( $leading_space_count > $group_leader_length ) { | ||||
21527 | if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } | ||||
21528 | } | ||||
21529 | } | ||||
21530 | |||||
21531 | if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) { | ||||
21532 | $min_ci_gap = 0; | ||||
21533 | } | ||||
21534 | } | ||||
21535 | else { | ||||
21536 | $min_ci_gap = 0; | ||||
21537 | } | ||||
21538 | return $min_ci_gap; | ||||
21539 | } | ||||
21540 | |||||
21541 | sub valign_output_step_A { | ||||
21542 | |||||
21543 | ############################################################### | ||||
21544 | # This is Step A in writing vertically aligned lines. | ||||
21545 | # The line is prepared according to the alignments which have | ||||
21546 | # been found and shipped to the next step. | ||||
21547 | ############################################################### | ||||
21548 | |||||
21549 | my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, | ||||
21550 | $extra_leading_spaces ) | ||||
21551 | = @_; | ||||
21552 | my $rfields = $line->get_rfields(); | ||||
21553 | my $leading_space_count = $line->get_leading_space_count(); | ||||
21554 | my $outdent_long_lines = $line->get_outdent_long_lines(); | ||||
21555 | my $maximum_field_index = $line->get_jmax(); | ||||
21556 | my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); | ||||
21557 | |||||
21558 | # add any extra spaces | ||||
21559 | if ( $leading_space_count > $group_leader_length ) { | ||||
21560 | $leading_space_count += $min_ci_gap; | ||||
21561 | } | ||||
21562 | |||||
21563 | my $str = $$rfields[0]; | ||||
21564 | |||||
21565 | # loop to concatenate all fields of this line and needed padding | ||||
21566 | my $total_pad_count = 0; | ||||
21567 | my ( $j, $pad ); | ||||
21568 | for $j ( 1 .. $maximum_field_index ) { | ||||
21569 | |||||
21570 | # skip zero-length side comments | ||||
21571 | last | ||||
21572 | if ( ( $j == $maximum_field_index ) | ||||
21573 | && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) | ||||
21574 | ); | ||||
21575 | |||||
21576 | # compute spaces of padding before this field | ||||
21577 | my $col = $line->get_column( $j - 1 ); | ||||
21578 | $pad = $col - ( length($str) + $leading_space_count ); | ||||
21579 | |||||
21580 | if ($do_not_align) { | ||||
21581 | $pad = | ||||
21582 | ( $j < $maximum_field_index ) | ||||
21583 | ? 0 | ||||
21584 | : $rOpts_minimum_space_to_comment - 1; | ||||
21585 | } | ||||
21586 | |||||
21587 | # if the -fpsc flag is set, move the side comment to the selected | ||||
21588 | # column if and only if it is possible, ignoring constraints on | ||||
21589 | # line length and minimum space to comment | ||||
21590 | if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) | ||||
21591 | { | ||||
21592 | my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; | ||||
21593 | if ( $newpad >= 0 ) { $pad = $newpad; } | ||||
21594 | } | ||||
21595 | |||||
21596 | # accumulate the padding | ||||
21597 | if ( $pad > 0 ) { $total_pad_count += $pad; } | ||||
21598 | |||||
21599 | # add this field | ||||
21600 | if ( !defined $$rfields[$j] ) { | ||||
21601 | write_diagnostics("UNDEFined field at j=$j\n"); | ||||
21602 | } | ||||
21603 | |||||
21604 | # only add padding when we have a finite field; | ||||
21605 | # this avoids extra terminal spaces if we have empty fields | ||||
21606 | if ( length( $$rfields[$j] ) > 0 ) { | ||||
21607 | $str .= ' ' x $total_pad_count; | ||||
21608 | $total_pad_count = 0; | ||||
21609 | $str .= $$rfields[$j]; | ||||
21610 | } | ||||
21611 | else { | ||||
21612 | $total_pad_count = 0; | ||||
21613 | } | ||||
21614 | |||||
21615 | # update side comment history buffer | ||||
21616 | if ( $j == $maximum_field_index ) { | ||||
21617 | my $lineno = $file_writer_object->get_output_line_number(); | ||||
21618 | shift @side_comment_history; | ||||
21619 | push @side_comment_history, [ $lineno, $col ]; | ||||
21620 | } | ||||
21621 | } | ||||
21622 | |||||
21623 | my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); | ||||
21624 | |||||
21625 | # ship this line off | ||||
21626 | valign_output_step_B( $leading_space_count + $extra_leading_spaces, | ||||
21627 | $str, $side_comment_length, $outdent_long_lines, | ||||
21628 | $rvertical_tightness_flags, $group_level ); | ||||
21629 | } | ||||
21630 | |||||
21631 | sub get_extra_leading_spaces { | ||||
21632 | |||||
21633 | #---------------------------------------------------------- | ||||
21634 | # Define any extra indentation space (for the -lp option). | ||||
21635 | # Here is why: | ||||
21636 | # If a list has side comments, sub scan_list must dump the | ||||
21637 | # list before it sees everything. When this happens, it sets | ||||
21638 | # the indentation to the standard scheme, but notes how | ||||
21639 | # many spaces it would have liked to use. We may be able | ||||
21640 | # to recover that space here in the event that all of the | ||||
21641 | # lines of a list are back together again. | ||||
21642 | #---------------------------------------------------------- | ||||
21643 | |||||
21644 | my $extra_leading_spaces = 0; | ||||
21645 | if ($extra_indent_ok) { | ||||
21646 | my $object = $group_lines[0]->get_indentation(); | ||||
21647 | if ( ref($object) ) { | ||||
21648 | my $extra_indentation_spaces_wanted = | ||||
21649 | get_RECOVERABLE_SPACES($object); | ||||
21650 | |||||
21651 | # all indentation objects must be the same | ||||
21652 | my $i; | ||||
21653 | for $i ( 1 .. $maximum_line_index ) { | ||||
21654 | if ( $object != $group_lines[$i]->get_indentation() ) { | ||||
21655 | $extra_indentation_spaces_wanted = 0; | ||||
21656 | last; | ||||
21657 | } | ||||
21658 | } | ||||
21659 | |||||
21660 | if ($extra_indentation_spaces_wanted) { | ||||
21661 | |||||
21662 | # the maximum space without exceeding the line length: | ||||
21663 | my $avail = $group_lines[0]->get_available_space_on_right(); | ||||
21664 | $extra_leading_spaces = | ||||
21665 | ( $avail > $extra_indentation_spaces_wanted ) | ||||
21666 | ? $extra_indentation_spaces_wanted | ||||
21667 | : $avail; | ||||
21668 | |||||
21669 | # update the indentation object because with -icp the terminal | ||||
21670 | # ');' will use the same adjustment. | ||||
21671 | $object->permanently_decrease_AVAILABLE_SPACES( | ||||
21672 | -$extra_leading_spaces ); | ||||
21673 | } | ||||
21674 | } | ||||
21675 | } | ||||
21676 | return $extra_leading_spaces; | ||||
21677 | } | ||||
21678 | |||||
21679 | sub combine_fields { | ||||
21680 | |||||
21681 | # combine all fields except for the comment field ( sidecmt.t ) | ||||
21682 | # Uses global variables: | ||||
21683 | # @group_lines | ||||
21684 | # $maximum_line_index | ||||
21685 | my ( $j, $k ); | ||||
21686 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
21687 | for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { | ||||
21688 | my $line = $group_lines[$j]; | ||||
21689 | my $rfields = $line->get_rfields(); | ||||
21690 | foreach ( 1 .. $maximum_field_index - 1 ) { | ||||
21691 | $$rfields[0] .= $$rfields[$_]; | ||||
21692 | } | ||||
21693 | $$rfields[1] = $$rfields[$maximum_field_index]; | ||||
21694 | |||||
21695 | $line->set_jmax(1); | ||||
21696 | $line->set_column( 0, 0 ); | ||||
21697 | $line->set_column( 1, 0 ); | ||||
21698 | |||||
21699 | } | ||||
21700 | $maximum_field_index = 1; | ||||
21701 | |||||
21702 | for $j ( 0 .. $maximum_line_index ) { | ||||
21703 | my $line = $group_lines[$j]; | ||||
21704 | my $rfields = $line->get_rfields(); | ||||
21705 | for $k ( 0 .. $maximum_field_index ) { | ||||
21706 | my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); | ||||
21707 | if ( $k == 0 ) { | ||||
21708 | $pad += $group_lines[$j]->get_leading_space_count(); | ||||
21709 | } | ||||
21710 | |||||
21711 | if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } | ||||
21712 | |||||
21713 | } | ||||
21714 | } | ||||
21715 | } | ||||
21716 | |||||
21717 | sub get_output_line_number { | ||||
21718 | |||||
21719 | # the output line number reported to a caller is the number of items | ||||
21720 | # written plus the number of items in the buffer | ||||
21721 | my $self = shift; | ||||
21722 | 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); | ||||
21723 | } | ||||
21724 | |||||
21725 | sub valign_output_step_B { | ||||
21726 | |||||
21727 | ############################################################### | ||||
21728 | # This is Step B in writing vertically aligned lines. | ||||
21729 | # Vertical tightness is applied according to preset flags. | ||||
21730 | # In particular this routine handles stacking of opening | ||||
21731 | # and closing tokens. | ||||
21732 | ############################################################### | ||||
21733 | |||||
21734 | my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, | ||||
21735 | $rvertical_tightness_flags, $level ) | ||||
21736 | = @_; | ||||
21737 | |||||
21738 | # handle outdenting of long lines: | ||||
21739 | if ($outdent_long_lines) { | ||||
21740 | my $excess = | ||||
21741 | length($str) - | ||||
21742 | $side_comment_length + | ||||
21743 | $leading_space_count - | ||||
21744 | maximum_line_length_for_level($level); | ||||
21745 | if ( $excess > 0 ) { | ||||
21746 | $leading_space_count = 0; | ||||
21747 | $last_outdented_line_at = | ||||
21748 | $file_writer_object->get_output_line_number(); | ||||
21749 | |||||
21750 | unless ($outdented_line_count) { | ||||
21751 | $first_outdented_line_at = $last_outdented_line_at; | ||||
21752 | } | ||||
21753 | $outdented_line_count++; | ||||
21754 | } | ||||
21755 | } | ||||
21756 | |||||
21757 | # Make preliminary leading whitespace. It could get changed | ||||
21758 | # later by entabbing, so we have to keep track of any changes | ||||
21759 | # to the leading_space_count from here on. | ||||
21760 | my $leading_string = | ||||
21761 | $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; | ||||
21762 | |||||
21763 | # Unpack any recombination data; it was packed by | ||||
21764 | # sub send_lines_to_vertical_aligner. Contents: | ||||
21765 | # | ||||
21766 | # [0] type: 1=opening non-block 2=closing non-block | ||||
21767 | # 3=opening block brace 4=closing block brace | ||||
21768 | # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok | ||||
21769 | # if closing: spaces of padding to use | ||||
21770 | # [2] sequence number of container | ||||
21771 | # [3] valid flag: do not append if this flag is false | ||||
21772 | # | ||||
21773 | my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, | ||||
21774 | $seqno_end ); | ||||
21775 | if ($rvertical_tightness_flags) { | ||||
21776 | ( | ||||
21777 | $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, | ||||
21778 | $seqno_end | ||||
21779 | ) = @{$rvertical_tightness_flags}; | ||||
21780 | } | ||||
21781 | |||||
21782 | $seqno_string = $seqno_end; | ||||
21783 | |||||
21784 | # handle any cached line .. | ||||
21785 | # either append this line to it or write it out | ||||
21786 | if ( length($cached_line_text) ) { | ||||
21787 | |||||
21788 | # Dump an invalid cached line | ||||
21789 | if ( !$cached_line_valid ) { | ||||
21790 | valign_output_step_C( $cached_line_text, | ||||
21791 | $cached_line_leading_space_count, | ||||
21792 | $last_level_written ); | ||||
21793 | } | ||||
21794 | |||||
21795 | # Handle cached line ending in OPENING tokens | ||||
21796 | elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { | ||||
21797 | |||||
21798 | my $gap = $leading_space_count - length($cached_line_text); | ||||
21799 | |||||
21800 | # handle option of just one tight opening per line: | ||||
21801 | if ( $cached_line_flag == 1 ) { | ||||
21802 | if ( defined($open_or_close) && $open_or_close == 1 ) { | ||||
21803 | $gap = -1; | ||||
21804 | } | ||||
21805 | } | ||||
21806 | |||||
21807 | if ( $gap >= 0 && defined($seqno_beg) ) { | ||||
21808 | $leading_string = $cached_line_text . ' ' x $gap; | ||||
21809 | $leading_space_count = $cached_line_leading_space_count; | ||||
21810 | $seqno_string = $cached_seqno_string . ':' . $seqno_beg; | ||||
21811 | $level = $last_level_written; | ||||
21812 | } | ||||
21813 | else { | ||||
21814 | valign_output_step_C( $cached_line_text, | ||||
21815 | $cached_line_leading_space_count, | ||||
21816 | $last_level_written ); | ||||
21817 | } | ||||
21818 | } | ||||
21819 | |||||
21820 | # Handle cached line ending in CLOSING tokens | ||||
21821 | else { | ||||
21822 | my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; | ||||
21823 | if ( | ||||
21824 | |||||
21825 | # The new line must start with container | ||||
21826 | $seqno_beg | ||||
21827 | |||||
21828 | # The container combination must be okay.. | ||||
21829 | && ( | ||||
21830 | |||||
21831 | # okay to combine like types | ||||
21832 | ( $open_or_close == $cached_line_type ) | ||||
21833 | |||||
21834 | # closing block brace may append to non-block | ||||
21835 | || ( $cached_line_type == 2 && $open_or_close == 4 ) | ||||
21836 | |||||
21837 | # something like ');' | ||||
21838 | || ( !$open_or_close && $cached_line_type == 2 ) | ||||
21839 | |||||
21840 | ) | ||||
21841 | |||||
21842 | # The combined line must fit | ||||
21843 | && ( | ||||
21844 | length($test_line) <= | ||||
21845 | maximum_line_length_for_level($last_level_written) ) | ||||
21846 | ) | ||||
21847 | { | ||||
21848 | |||||
21849 | $seqno_string = $cached_seqno_string . ':' . $seqno_beg; | ||||
21850 | |||||
21851 | # Patch to outdent closing tokens ending # in ');' | ||||
21852 | # If we are joining a line like ');' to a previous stacked | ||||
21853 | # set of closing tokens, then decide if we may outdent the | ||||
21854 | # combined stack to the indentation of the ');'. Since we | ||||
21855 | # should not normally outdent any of the other tokens more than | ||||
21856 | # the indentation of the lines that contained them, we will | ||||
21857 | # only do this if all of the corresponding opening | ||||
21858 | # tokens were on the same line. This can happen with | ||||
21859 | # -sot and -sct. For example, it is ok here: | ||||
21860 | # __PACKAGE__->load_components( qw( | ||||
21861 | # PK::Auto | ||||
21862 | # Core | ||||
21863 | # )); | ||||
21864 | # | ||||
21865 | # But, for example, we do not outdent in this example because | ||||
21866 | # that would put the closing sub brace out farther than the | ||||
21867 | # opening sub brace: | ||||
21868 | # | ||||
21869 | # perltidy -sot -sct | ||||
21870 | # $c->Tk::bind( | ||||
21871 | # '<Control-f>' => sub { | ||||
21872 | # my ($c) = @_; | ||||
21873 | # my $e = $c->XEvent; | ||||
21874 | # itemsUnderArea $c; | ||||
21875 | # } ); | ||||
21876 | # | ||||
21877 | if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { | ||||
21878 | |||||
21879 | # The way to tell this is if the stacked sequence numbers | ||||
21880 | # of this output line are the reverse of the stacked | ||||
21881 | # sequence numbers of the previous non-blank line of | ||||
21882 | # sequence numbers. So we can join if the previous | ||||
21883 | # nonblank string of tokens is the mirror image. For | ||||
21884 | # example if stack )}] is 13:8:6 then we are looking for a | ||||
21885 | # leading stack like [{( which is 6:8:13 We only need to | ||||
21886 | # check the two ends, because the intermediate tokens must | ||||
21887 | # fall in order. Note on speed: having to split on colons | ||||
21888 | # and eliminate multiple colons might appear to be slow, | ||||
21889 | # but it's not an issue because we almost never come | ||||
21890 | # through here. In a typical file we don't. | ||||
21891 | $seqno_string =~ s/^:+//; | ||||
21892 | $last_nonblank_seqno_string =~ s/^:+//; | ||||
21893 | $seqno_string =~ s/:+/:/g; | ||||
21894 | $last_nonblank_seqno_string =~ s/:+/:/g; | ||||
21895 | |||||
21896 | # how many spaces can we outdent? | ||||
21897 | my $diff = | ||||
21898 | $cached_line_leading_space_count - $leading_space_count; | ||||
21899 | if ( $diff > 0 | ||||
21900 | && length($seqno_string) | ||||
21901 | && length($last_nonblank_seqno_string) == | ||||
21902 | length($seqno_string) ) | ||||
21903 | { | ||||
21904 | my @seqno_last = | ||||
21905 | ( split ':', $last_nonblank_seqno_string ); | ||||
21906 | my @seqno_now = ( split ':', $seqno_string ); | ||||
21907 | if ( $seqno_now[-1] == $seqno_last[0] | ||||
21908 | && $seqno_now[0] == $seqno_last[-1] ) | ||||
21909 | { | ||||
21910 | |||||
21911 | # OK to outdent .. | ||||
21912 | # for absolute safety, be sure we only remove | ||||
21913 | # whitespace | ||||
21914 | my $ws = substr( $test_line, 0, $diff ); | ||||
21915 | if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { | ||||
21916 | |||||
21917 | $test_line = substr( $test_line, $diff ); | ||||
21918 | $cached_line_leading_space_count -= $diff; | ||||
21919 | $last_level_written = | ||||
21920 | level_change( | ||||
21921 | $cached_line_leading_space_count, | ||||
21922 | $diff, $last_level_written ); | ||||
21923 | reduce_valign_buffer_indentation($diff); | ||||
21924 | } | ||||
21925 | |||||
21926 | # shouldn't happen, but not critical: | ||||
21927 | ##else { | ||||
21928 | ## ERROR transferring indentation here | ||||
21929 | ##} | ||||
21930 | } | ||||
21931 | } | ||||
21932 | } | ||||
21933 | |||||
21934 | $str = $test_line; | ||||
21935 | $leading_string = ""; | ||||
21936 | $leading_space_count = $cached_line_leading_space_count; | ||||
21937 | $level = $last_level_written; | ||||
21938 | } | ||||
21939 | else { | ||||
21940 | valign_output_step_C( $cached_line_text, | ||||
21941 | $cached_line_leading_space_count, | ||||
21942 | $last_level_written ); | ||||
21943 | } | ||||
21944 | } | ||||
21945 | } | ||||
21946 | $cached_line_type = 0; | ||||
21947 | $cached_line_text = ""; | ||||
21948 | |||||
21949 | # make the line to be written | ||||
21950 | my $line = $leading_string . $str; | ||||
21951 | |||||
21952 | # write or cache this line | ||||
21953 | if ( !$open_or_close || $side_comment_length > 0 ) { | ||||
21954 | valign_output_step_C( $line, $leading_space_count, $level ); | ||||
21955 | } | ||||
21956 | else { | ||||
21957 | $cached_line_text = $line; | ||||
21958 | $cached_line_type = $open_or_close; | ||||
21959 | $cached_line_flag = $tightness_flag; | ||||
21960 | $cached_seqno = $seqno; | ||||
21961 | $cached_line_valid = $valid; | ||||
21962 | $cached_line_leading_space_count = $leading_space_count; | ||||
21963 | $cached_seqno_string = $seqno_string; | ||||
21964 | } | ||||
21965 | |||||
21966 | $last_level_written = $level; | ||||
21967 | $last_side_comment_length = $side_comment_length; | ||||
21968 | $extra_indent_ok = 0; | ||||
21969 | } | ||||
21970 | |||||
21971 | sub valign_output_step_C { | ||||
21972 | |||||
21973 | ############################################################### | ||||
21974 | # This is Step C in writing vertically aligned lines. | ||||
21975 | # Lines are either stored in a buffer or passed along to the next step. | ||||
21976 | # The reason for storing lines is that we may later want to reduce their | ||||
21977 | # indentation when -sot and -sct are both used. | ||||
21978 | ############################################################### | ||||
21979 | my @args = @_; | ||||
21980 | |||||
21981 | # Dump any saved lines if we see a line with an unbalanced opening or | ||||
21982 | # closing token. | ||||
21983 | dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); | ||||
21984 | |||||
21985 | # Either store or write this line | ||||
21986 | if ($valign_buffer_filling) { | ||||
21987 | push @valign_buffer, [@args]; | ||||
21988 | } | ||||
21989 | else { | ||||
21990 | valign_output_step_D(@args); | ||||
21991 | } | ||||
21992 | |||||
21993 | # For lines starting or ending with opening or closing tokens.. | ||||
21994 | if ($seqno_string) { | ||||
21995 | $last_nonblank_seqno_string = $seqno_string; | ||||
21996 | |||||
21997 | # Start storing lines when we see a line with multiple stacked opening | ||||
21998 | # tokens. | ||||
21999 | if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) { | ||||
22000 | $valign_buffer_filling = $seqno_string; | ||||
22001 | } | ||||
22002 | } | ||||
22003 | } | ||||
22004 | |||||
22005 | sub valign_output_step_D { | ||||
22006 | |||||
22007 | ############################################################### | ||||
22008 | # This is Step D in writing vertically aligned lines. | ||||
22009 | # Write one vertically aligned line of code to the output object. | ||||
22010 | ############################################################### | ||||
22011 | |||||
22012 | my ( $line, $leading_space_count, $level ) = @_; | ||||
22013 | |||||
22014 | # The line is currently correct if there is no tabbing (recommended!) | ||||
22015 | # We may have to lop off some leading spaces and replace with tabs. | ||||
22016 | if ( $leading_space_count > 0 ) { | ||||
22017 | |||||
22018 | # Nothing to do if no tabs | ||||
22019 | if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) | ||||
22020 | || $rOpts_indent_columns <= 0 ) | ||||
22021 | { | ||||
22022 | |||||
22023 | # nothing to do | ||||
22024 | } | ||||
22025 | |||||
22026 | # Handle entab option | ||||
22027 | elsif ($rOpts_entab_leading_whitespace) { | ||||
22028 | my $space_count = | ||||
22029 | $leading_space_count % $rOpts_entab_leading_whitespace; | ||||
22030 | my $tab_count = | ||||
22031 | int( $leading_space_count / $rOpts_entab_leading_whitespace ); | ||||
22032 | my $leading_string = "\t" x $tab_count . ' ' x $space_count; | ||||
22033 | if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { | ||||
22034 | substr( $line, 0, $leading_space_count ) = $leading_string; | ||||
22035 | } | ||||
22036 | else { | ||||
22037 | |||||
22038 | # shouldn't happen - program error counting whitespace | ||||
22039 | # - skip entabbing | ||||
22040 | VALIGN_DEBUG_FLAG_TABS | ||||
22041 | && warning( | ||||
22042 | "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" | ||||
22043 | ); | ||||
22044 | } | ||||
22045 | } | ||||
22046 | |||||
22047 | # Handle option of one tab per level | ||||
22048 | else { | ||||
22049 | my $leading_string = ( "\t" x $level ); | ||||
22050 | my $space_count = | ||||
22051 | $leading_space_count - $level * $rOpts_indent_columns; | ||||
22052 | |||||
22053 | # shouldn't happen: | ||||
22054 | if ( $space_count < 0 ) { | ||||
22055 | |||||
22056 | # But it could be an outdented comment | ||||
22057 | if ( $line !~ /^\s*#/ ) { | ||||
22058 | VALIGN_DEBUG_FLAG_TABS | ||||
22059 | && warning( | ||||
22060 | "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" | ||||
22061 | ); | ||||
22062 | } | ||||
22063 | $leading_string = ( ' ' x $leading_space_count ); | ||||
22064 | } | ||||
22065 | else { | ||||
22066 | $leading_string .= ( ' ' x $space_count ); | ||||
22067 | } | ||||
22068 | if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { | ||||
22069 | substr( $line, 0, $leading_space_count ) = $leading_string; | ||||
22070 | } | ||||
22071 | else { | ||||
22072 | |||||
22073 | # shouldn't happen - program error counting whitespace | ||||
22074 | # we'll skip entabbing | ||||
22075 | VALIGN_DEBUG_FLAG_TABS | ||||
22076 | && warning( | ||||
22077 | "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" | ||||
22078 | ); | ||||
22079 | } | ||||
22080 | } | ||||
22081 | } | ||||
22082 | $file_writer_object->write_code_line( $line . "\n" ); | ||||
22083 | } | ||||
22084 | |||||
22085 | { # begin get_leading_string | ||||
22086 | |||||
22087 | 2 | 100ns | my @leading_string_cache; | ||
22088 | |||||
22089 | sub get_leading_string { | ||||
22090 | |||||
22091 | # define the leading whitespace string for this line.. | ||||
22092 | my $leading_whitespace_count = shift; | ||||
22093 | |||||
22094 | # Handle case of zero whitespace, which includes multi-line quotes | ||||
22095 | # (which may have a finite level; this prevents tab problems) | ||||
22096 | if ( $leading_whitespace_count <= 0 ) { | ||||
22097 | return ""; | ||||
22098 | } | ||||
22099 | |||||
22100 | # look for previous result | ||||
22101 | elsif ( $leading_string_cache[$leading_whitespace_count] ) { | ||||
22102 | return $leading_string_cache[$leading_whitespace_count]; | ||||
22103 | } | ||||
22104 | |||||
22105 | # must compute a string for this number of spaces | ||||
22106 | my $leading_string; | ||||
22107 | |||||
22108 | # Handle simple case of no tabs | ||||
22109 | if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) | ||||
22110 | || $rOpts_indent_columns <= 0 ) | ||||
22111 | { | ||||
22112 | $leading_string = ( ' ' x $leading_whitespace_count ); | ||||
22113 | } | ||||
22114 | |||||
22115 | # Handle entab option | ||||
22116 | elsif ($rOpts_entab_leading_whitespace) { | ||||
22117 | my $space_count = | ||||
22118 | $leading_whitespace_count % $rOpts_entab_leading_whitespace; | ||||
22119 | my $tab_count = int( | ||||
22120 | $leading_whitespace_count / $rOpts_entab_leading_whitespace ); | ||||
22121 | $leading_string = "\t" x $tab_count . ' ' x $space_count; | ||||
22122 | } | ||||
22123 | |||||
22124 | # Handle option of one tab per level | ||||
22125 | else { | ||||
22126 | $leading_string = ( "\t" x $group_level ); | ||||
22127 | my $space_count = | ||||
22128 | $leading_whitespace_count - $group_level * $rOpts_indent_columns; | ||||
22129 | |||||
22130 | # shouldn't happen: | ||||
22131 | if ( $space_count < 0 ) { | ||||
22132 | VALIGN_DEBUG_FLAG_TABS | ||||
22133 | && warning( | ||||
22134 | "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" | ||||
22135 | ); | ||||
22136 | |||||
22137 | # -- skip entabbing | ||||
22138 | $leading_string = ( ' ' x $leading_whitespace_count ); | ||||
22139 | } | ||||
22140 | else { | ||||
22141 | $leading_string .= ( ' ' x $space_count ); | ||||
22142 | } | ||||
22143 | } | ||||
22144 | $leading_string_cache[$leading_whitespace_count] = $leading_string; | ||||
22145 | return $leading_string; | ||||
22146 | } | ||||
22147 | } # end get_leading_string | ||||
22148 | |||||
22149 | sub report_anything_unusual { | ||||
22150 | my $self = shift; | ||||
22151 | if ( $outdented_line_count > 0 ) { | ||||
22152 | write_logfile_entry( | ||||
22153 | "$outdented_line_count long lines were outdented:\n"); | ||||
22154 | write_logfile_entry( | ||||
22155 | " First at output line $first_outdented_line_at\n"); | ||||
22156 | |||||
22157 | if ( $outdented_line_count > 1 ) { | ||||
22158 | write_logfile_entry( | ||||
22159 | " Last at output line $last_outdented_line_at\n"); | ||||
22160 | } | ||||
22161 | write_logfile_entry( | ||||
22162 | " use -noll to prevent outdenting, -l=n to increase line length\n" | ||||
22163 | ); | ||||
22164 | write_logfile_entry("\n"); | ||||
22165 | } | ||||
22166 | } | ||||
22167 | |||||
22168 | ##################################################################### | ||||
22169 | # | ||||
22170 | # the Perl::Tidy::FileWriter class writes the output file | ||||
22171 | # | ||||
22172 | ##################################################################### | ||||
22173 | |||||
22174 | package Perl::Tidy::FileWriter; | ||||
22175 | |||||
22176 | # Maximum number of little messages; probably need not be changed. | ||||
22177 | 2 | 1.68ms | 2 | 126µs | # spent 69µs (12+57) within Perl::Tidy::FileWriter::BEGIN@22177 which was called:
# once (12µs+57µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22177 # spent 69µs making 1 call to Perl::Tidy::FileWriter::BEGIN@22177
# spent 57µs making 1 call to constant::import |
22178 | |||||
22179 | sub write_logfile_entry { | ||||
22180 | my $self = shift; | ||||
22181 | my $logger_object = $self->{_logger_object}; | ||||
22182 | if ($logger_object) { | ||||
22183 | $logger_object->write_logfile_entry(@_); | ||||
22184 | } | ||||
22185 | } | ||||
22186 | |||||
22187 | sub new { | ||||
22188 | my $class = shift; | ||||
22189 | my ( $line_sink_object, $rOpts, $logger_object ) = @_; | ||||
22190 | |||||
22191 | bless { | ||||
22192 | _line_sink_object => $line_sink_object, | ||||
22193 | _logger_object => $logger_object, | ||||
22194 | _rOpts => $rOpts, | ||||
22195 | _output_line_number => 1, | ||||
22196 | _consecutive_blank_lines => 0, | ||||
22197 | _consecutive_nonblank_lines => 0, | ||||
22198 | _first_line_length_error => 0, | ||||
22199 | _max_line_length_error => 0, | ||||
22200 | _last_line_length_error => 0, | ||||
22201 | _first_line_length_error_at => 0, | ||||
22202 | _max_line_length_error_at => 0, | ||||
22203 | _last_line_length_error_at => 0, | ||||
22204 | _line_length_error_count => 0, | ||||
22205 | _max_output_line_length => 0, | ||||
22206 | _max_output_line_length_at => 0, | ||||
22207 | }, $class; | ||||
22208 | } | ||||
22209 | |||||
22210 | sub tee_on { | ||||
22211 | my $self = shift; | ||||
22212 | $self->{_line_sink_object}->tee_on(); | ||||
22213 | } | ||||
22214 | |||||
22215 | sub tee_off { | ||||
22216 | my $self = shift; | ||||
22217 | $self->{_line_sink_object}->tee_off(); | ||||
22218 | } | ||||
22219 | |||||
22220 | sub get_output_line_number { | ||||
22221 | my $self = shift; | ||||
22222 | return $self->{_output_line_number}; | ||||
22223 | } | ||||
22224 | |||||
22225 | sub decrement_output_line_number { | ||||
22226 | my $self = shift; | ||||
22227 | $self->{_output_line_number}--; | ||||
22228 | } | ||||
22229 | |||||
22230 | sub get_consecutive_nonblank_lines { | ||||
22231 | my $self = shift; | ||||
22232 | return $self->{_consecutive_nonblank_lines}; | ||||
22233 | } | ||||
22234 | |||||
22235 | sub reset_consecutive_blank_lines { | ||||
22236 | my $self = shift; | ||||
22237 | $self->{_consecutive_blank_lines} = 0; | ||||
22238 | } | ||||
22239 | |||||
22240 | sub want_blank_line { | ||||
22241 | my $self = shift; | ||||
22242 | unless ( $self->{_consecutive_blank_lines} ) { | ||||
22243 | $self->write_blank_code_line(); | ||||
22244 | } | ||||
22245 | } | ||||
22246 | |||||
22247 | sub require_blank_code_lines { | ||||
22248 | |||||
22249 | # write out the requested number of blanks regardless of the value of -mbl | ||||
22250 | # unless -mbl=0. This allows extra blank lines to be written for subs and | ||||
22251 | # packages even with the default -mbl=1 | ||||
22252 | my $self = shift; | ||||
22253 | my $count = shift; | ||||
22254 | my $need = $count - $self->{_consecutive_blank_lines}; | ||||
22255 | my $rOpts = $self->{_rOpts}; | ||||
22256 | my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; | ||||
22257 | for ( my $i = 0 ; $i < $need ; $i++ ) { | ||||
22258 | $self->write_blank_code_line($forced); | ||||
22259 | } | ||||
22260 | } | ||||
22261 | |||||
22262 | sub write_blank_code_line { | ||||
22263 | my $self = shift; | ||||
22264 | my $forced = shift; | ||||
22265 | my $rOpts = $self->{_rOpts}; | ||||
22266 | return | ||||
22267 | if (!$forced | ||||
22268 | && $self->{_consecutive_blank_lines} >= | ||||
22269 | $rOpts->{'maximum-consecutive-blank-lines'} ); | ||||
22270 | $self->{_consecutive_blank_lines}++; | ||||
22271 | $self->{_consecutive_nonblank_lines} = 0; | ||||
22272 | $self->write_line("\n"); | ||||
22273 | } | ||||
22274 | |||||
22275 | sub write_code_line { | ||||
22276 | my $self = shift; | ||||
22277 | my $a = shift; | ||||
22278 | |||||
22279 | if ( $a =~ /^\s*$/ ) { | ||||
22280 | my $rOpts = $self->{_rOpts}; | ||||
22281 | return | ||||
22282 | if ( $self->{_consecutive_blank_lines} >= | ||||
22283 | $rOpts->{'maximum-consecutive-blank-lines'} ); | ||||
22284 | $self->{_consecutive_blank_lines}++; | ||||
22285 | $self->{_consecutive_nonblank_lines} = 0; | ||||
22286 | } | ||||
22287 | else { | ||||
22288 | $self->{_consecutive_blank_lines} = 0; | ||||
22289 | $self->{_consecutive_nonblank_lines}++; | ||||
22290 | } | ||||
22291 | $self->write_line($a); | ||||
22292 | } | ||||
22293 | |||||
22294 | sub write_line { | ||||
22295 | my $self = shift; | ||||
22296 | my $a = shift; | ||||
22297 | |||||
22298 | # TODO: go through and see if the test is necessary here | ||||
22299 | if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } | ||||
22300 | |||||
22301 | $self->{_line_sink_object}->write_line($a); | ||||
22302 | |||||
22303 | # This calculation of excess line length ignores any internal tabs | ||||
22304 | my $rOpts = $self->{_rOpts}; | ||||
22305 | my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; | ||||
22306 | if ( $a =~ /^\t+/g ) { | ||||
22307 | $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); | ||||
22308 | } | ||||
22309 | |||||
22310 | # Note that we just incremented output line number to future value | ||||
22311 | # so we must subtract 1 for current line number | ||||
22312 | if ( length($a) > 1 + $self->{_max_output_line_length} ) { | ||||
22313 | $self->{_max_output_line_length} = length($a) - 1; | ||||
22314 | $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; | ||||
22315 | } | ||||
22316 | |||||
22317 | if ( $exceed > 0 ) { | ||||
22318 | my $output_line_number = $self->{_output_line_number}; | ||||
22319 | $self->{_last_line_length_error} = $exceed; | ||||
22320 | $self->{_last_line_length_error_at} = $output_line_number - 1; | ||||
22321 | if ( $self->{_line_length_error_count} == 0 ) { | ||||
22322 | $self->{_first_line_length_error} = $exceed; | ||||
22323 | $self->{_first_line_length_error_at} = $output_line_number - 1; | ||||
22324 | } | ||||
22325 | |||||
22326 | if ( | ||||
22327 | $self->{_last_line_length_error} > $self->{_max_line_length_error} ) | ||||
22328 | { | ||||
22329 | $self->{_max_line_length_error} = $exceed; | ||||
22330 | $self->{_max_line_length_error_at} = $output_line_number - 1; | ||||
22331 | } | ||||
22332 | |||||
22333 | if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { | ||||
22334 | $self->write_logfile_entry( | ||||
22335 | "Line length exceeded by $exceed characters\n"); | ||||
22336 | } | ||||
22337 | $self->{_line_length_error_count}++; | ||||
22338 | } | ||||
22339 | |||||
22340 | } | ||||
22341 | |||||
22342 | sub report_line_length_errors { | ||||
22343 | my $self = shift; | ||||
22344 | my $rOpts = $self->{_rOpts}; | ||||
22345 | my $line_length_error_count = $self->{_line_length_error_count}; | ||||
22346 | if ( $line_length_error_count == 0 ) { | ||||
22347 | $self->write_logfile_entry( | ||||
22348 | "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); | ||||
22349 | my $max_output_line_length = $self->{_max_output_line_length}; | ||||
22350 | my $max_output_line_length_at = $self->{_max_output_line_length_at}; | ||||
22351 | $self->write_logfile_entry( | ||||
22352 | " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" | ||||
22353 | ); | ||||
22354 | |||||
22355 | } | ||||
22356 | else { | ||||
22357 | |||||
22358 | my $word = ( $line_length_error_count > 1 ) ? "s" : ""; | ||||
22359 | $self->write_logfile_entry( | ||||
22360 | "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" | ||||
22361 | ); | ||||
22362 | |||||
22363 | $word = ( $line_length_error_count > 1 ) ? "First" : ""; | ||||
22364 | my $first_line_length_error = $self->{_first_line_length_error}; | ||||
22365 | my $first_line_length_error_at = $self->{_first_line_length_error_at}; | ||||
22366 | $self->write_logfile_entry( | ||||
22367 | " $word at line $first_line_length_error_at by $first_line_length_error characters\n" | ||||
22368 | ); | ||||
22369 | |||||
22370 | if ( $line_length_error_count > 1 ) { | ||||
22371 | my $max_line_length_error = $self->{_max_line_length_error}; | ||||
22372 | my $max_line_length_error_at = $self->{_max_line_length_error_at}; | ||||
22373 | my $last_line_length_error = $self->{_last_line_length_error}; | ||||
22374 | my $last_line_length_error_at = $self->{_last_line_length_error_at}; | ||||
22375 | $self->write_logfile_entry( | ||||
22376 | " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" | ||||
22377 | ); | ||||
22378 | $self->write_logfile_entry( | ||||
22379 | " Last at line $last_line_length_error_at by $last_line_length_error characters\n" | ||||
22380 | ); | ||||
22381 | } | ||||
22382 | } | ||||
22383 | } | ||||
22384 | |||||
22385 | ##################################################################### | ||||
22386 | # | ||||
22387 | # The Perl::Tidy::Debugger class shows line tokenization | ||||
22388 | # | ||||
22389 | ##################################################################### | ||||
22390 | |||||
22391 | package Perl::Tidy::Debugger; | ||||
22392 | |||||
22393 | sub new { | ||||
22394 | |||||
22395 | my ( $class, $filename ) = @_; | ||||
22396 | |||||
22397 | bless { | ||||
22398 | _debug_file => $filename, | ||||
22399 | _debug_file_opened => 0, | ||||
22400 | _fh => undef, | ||||
22401 | }, $class; | ||||
22402 | } | ||||
22403 | |||||
22404 | sub really_open_debug_file { | ||||
22405 | |||||
22406 | my $self = shift; | ||||
22407 | my $debug_file = $self->{_debug_file}; | ||||
22408 | my $fh; | ||||
22409 | unless ( $fh = IO::File->new("> $debug_file") ) { | ||||
22410 | Perl::Tidy::Warn("can't open $debug_file: $!\n"); | ||||
22411 | } | ||||
22412 | $self->{_debug_file_opened} = 1; | ||||
22413 | $self->{_fh} = $fh; | ||||
22414 | print $fh | ||||
22415 | "Use -dump-token-types (-dtt) to get a list of token type codes\n"; | ||||
22416 | } | ||||
22417 | |||||
22418 | sub close_debug_file { | ||||
22419 | |||||
22420 | my $self = shift; | ||||
22421 | my $fh = $self->{_fh}; | ||||
22422 | if ( $self->{_debug_file_opened} ) { | ||||
22423 | |||||
22424 | eval { $self->{_fh}->close() }; | ||||
22425 | } | ||||
22426 | } | ||||
22427 | |||||
22428 | sub write_debug_entry { | ||||
22429 | |||||
22430 | # This is a debug dump routine which may be modified as necessary | ||||
22431 | # to dump tokens on a line-by-line basis. The output will be written | ||||
22432 | # to the .DEBUG file when the -D flag is entered. | ||||
22433 | my $self = shift; | ||||
22434 | my $line_of_tokens = shift; | ||||
22435 | |||||
22436 | my $input_line = $line_of_tokens->{_line_text}; | ||||
22437 | my $rtoken_type = $line_of_tokens->{_rtoken_type}; | ||||
22438 | my $rtokens = $line_of_tokens->{_rtokens}; | ||||
22439 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
22440 | my $rslevels = $line_of_tokens->{_rslevels}; | ||||
22441 | my $rblock_type = $line_of_tokens->{_rblock_type}; | ||||
22442 | my $input_line_number = $line_of_tokens->{_line_number}; | ||||
22443 | my $line_type = $line_of_tokens->{_line_type}; | ||||
22444 | |||||
22445 | my ( $j, $num ); | ||||
22446 | |||||
22447 | my $token_str = "$input_line_number: "; | ||||
22448 | my $reconstructed_original = "$input_line_number: "; | ||||
22449 | my $block_str = "$input_line_number: "; | ||||
22450 | |||||
22451 | #$token_str .= "$line_type: "; | ||||
22452 | #$reconstructed_original .= "$line_type: "; | ||||
22453 | |||||
22454 | my $pattern = ""; | ||||
22455 | my @next_char = ( '"', '"' ); | ||||
22456 | my $i_next = 0; | ||||
22457 | unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } | ||||
22458 | my $fh = $self->{_fh}; | ||||
22459 | |||||
22460 | for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { | ||||
22461 | |||||
22462 | # testing patterns | ||||
22463 | if ( $$rtoken_type[$j] eq 'k' ) { | ||||
22464 | $pattern .= $$rtokens[$j]; | ||||
22465 | } | ||||
22466 | else { | ||||
22467 | $pattern .= $$rtoken_type[$j]; | ||||
22468 | } | ||||
22469 | $reconstructed_original .= $$rtokens[$j]; | ||||
22470 | $block_str .= "($$rblock_type[$j])"; | ||||
22471 | $num = length( $$rtokens[$j] ); | ||||
22472 | my $type_str = $$rtoken_type[$j]; | ||||
22473 | |||||
22474 | # be sure there are no blank tokens (shouldn't happen) | ||||
22475 | # This can only happen if a programming error has been made | ||||
22476 | # because all valid tokens are non-blank | ||||
22477 | if ( $type_str eq ' ' ) { | ||||
22478 | print $fh "BLANK TOKEN on the next line\n"; | ||||
22479 | $type_str = $next_char[$i_next]; | ||||
22480 | $i_next = 1 - $i_next; | ||||
22481 | } | ||||
22482 | |||||
22483 | if ( length($type_str) == 1 ) { | ||||
22484 | $type_str = $type_str x $num; | ||||
22485 | } | ||||
22486 | $token_str .= $type_str; | ||||
22487 | } | ||||
22488 | |||||
22489 | # Write what you want here ... | ||||
22490 | # print $fh "$input_line\n"; | ||||
22491 | # print $fh "$pattern\n"; | ||||
22492 | print $fh "$reconstructed_original\n"; | ||||
22493 | print $fh "$token_str\n"; | ||||
22494 | |||||
22495 | #print $fh "$block_str\n"; | ||||
22496 | } | ||||
22497 | |||||
22498 | ##################################################################### | ||||
22499 | # | ||||
22500 | # The Perl::Tidy::LineBuffer class supplies a 'get_line()' | ||||
22501 | # method for returning the next line to be parsed, as well as a | ||||
22502 | # 'peek_ahead()' method | ||||
22503 | # | ||||
22504 | # The input parameter is an object with a 'get_line()' method | ||||
22505 | # which returns the next line to be parsed | ||||
22506 | # | ||||
22507 | ##################################################################### | ||||
22508 | |||||
22509 | package Perl::Tidy::LineBuffer; | ||||
22510 | |||||
22511 | sub new { | ||||
22512 | |||||
22513 | my $class = shift; | ||||
22514 | my $line_source_object = shift; | ||||
22515 | |||||
22516 | return bless { | ||||
22517 | _line_source_object => $line_source_object, | ||||
22518 | _rlookahead_buffer => [], | ||||
22519 | }, $class; | ||||
22520 | } | ||||
22521 | |||||
22522 | sub peek_ahead { | ||||
22523 | my $self = shift; | ||||
22524 | my $buffer_index = shift; | ||||
22525 | my $line = undef; | ||||
22526 | my $line_source_object = $self->{_line_source_object}; | ||||
22527 | my $rlookahead_buffer = $self->{_rlookahead_buffer}; | ||||
22528 | if ( $buffer_index < scalar(@$rlookahead_buffer) ) { | ||||
22529 | $line = $$rlookahead_buffer[$buffer_index]; | ||||
22530 | } | ||||
22531 | else { | ||||
22532 | $line = $line_source_object->get_line(); | ||||
22533 | push( @$rlookahead_buffer, $line ); | ||||
22534 | } | ||||
22535 | return $line; | ||||
22536 | } | ||||
22537 | |||||
22538 | sub get_line { | ||||
22539 | my $self = shift; | ||||
22540 | my $line = undef; | ||||
22541 | my $line_source_object = $self->{_line_source_object}; | ||||
22542 | my $rlookahead_buffer = $self->{_rlookahead_buffer}; | ||||
22543 | |||||
22544 | if ( scalar(@$rlookahead_buffer) ) { | ||||
22545 | $line = shift @$rlookahead_buffer; | ||||
22546 | } | ||||
22547 | else { | ||||
22548 | $line = $line_source_object->get_line(); | ||||
22549 | } | ||||
22550 | return $line; | ||||
22551 | } | ||||
22552 | |||||
22553 | ######################################################################## | ||||
22554 | # | ||||
22555 | # the Perl::Tidy::Tokenizer package is essentially a filter which | ||||
22556 | # reads lines of perl source code from a source object and provides | ||||
22557 | # corresponding tokenized lines through its get_line() method. Lines | ||||
22558 | # flow from the source_object to the caller like this: | ||||
22559 | # | ||||
22560 | # source_object --> LineBuffer_object --> Tokenizer --> calling routine | ||||
22561 | # get_line() get_line() get_line() line_of_tokens | ||||
22562 | # | ||||
22563 | # The source object can be any object with a get_line() method which | ||||
22564 | # supplies one line (a character string) perl call. | ||||
22565 | # The LineBuffer object is created by the Tokenizer. | ||||
22566 | # The Tokenizer returns a reference to a data structure 'line_of_tokens' | ||||
22567 | # containing one tokenized line for each call to its get_line() method. | ||||
22568 | # | ||||
22569 | # WARNING: This is not a real class yet. Only one tokenizer my be used. | ||||
22570 | # | ||||
22571 | ######################################################################## | ||||
22572 | |||||
22573 | package Perl::Tidy::Tokenizer; | ||||
22574 | |||||
22575 | # spent 8µs within Perl::Tidy::Tokenizer::BEGIN@22575 which was called:
# once (8µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22596 | ||||
22576 | |||||
22577 | # Caution: these debug flags produce a lot of output | ||||
22578 | # They should all be 0 except when debugging small scripts | ||||
22579 | |||||
22580 | 2 | 30µs | 2 | 106µs | # spent 58µs (11+47) within Perl::Tidy::Tokenizer::BEGIN@22580 which was called:
# once (11µs+47µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22580 # spent 58µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22580
# spent 47µs making 1 call to constant::import |
22581 | 2 | 31µs | 2 | 92µs | # spent 50µs (9+41) within Perl::Tidy::Tokenizer::BEGIN@22581 which was called:
# once (9µs+41µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22581 # spent 50µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22581
# spent 41µs making 1 call to constant::import |
22582 | 2 | 26µs | 2 | 81µs | # spent 44µs (8+36) within Perl::Tidy::Tokenizer::BEGIN@22582 which was called:
# once (8µs+36µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22582 # spent 44µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22582
# spent 36µs making 1 call to constant::import |
22583 | 2 | 25µs | 2 | 98µs | # spent 53µs (8+45) within Perl::Tidy::Tokenizer::BEGIN@22583 which was called:
# once (8µs+45µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22583 # spent 53µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22583
# spent 45µs making 1 call to constant::import |
22584 | 2 | 113µs | 2 | 79µs | # spent 43µs (8+35) within Perl::Tidy::Tokenizer::BEGIN@22584 which was called:
# once (8µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22584 # spent 43µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22584
# spent 36µs making 1 call to constant::import |
22585 | |||||
22586 | my $debug_warning = sub { | ||||
22587 | print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; | ||||
22588 | 1 | 2µs | }; | ||
22589 | |||||
22590 | TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); | ||||
22591 | 1 | 0s | TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); | ||
22592 | TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); | ||||
22593 | 1 | 0s | TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); | ||
22594 | 1 | 10µs | TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); | ||
22595 | |||||
22596 | 1 | 22µs | 1 | 8µs | } # spent 8µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22575 |
22597 | |||||
22598 | 2 | 56µs | 2 | 96µs | # spent 53µs (11+42) within Perl::Tidy::Tokenizer::BEGIN@22598 which was called:
# once (11µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22598 # spent 53µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22598
# spent 42µs making 1 call to Exporter::import |
22599 | |||||
22600 | # PACKAGE VARIABLES for processing an entire FILE. | ||||
22601 | 1 | 400ns | # spent 4.38ms (9µs+4.37) within Perl::Tidy::Tokenizer::BEGIN@22601 which was called:
# once (9µs+4.37ms) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22641 | ||
22602 | $tokenizer_self | ||||
22603 | |||||
22604 | $last_nonblank_token | ||||
22605 | $last_nonblank_type | ||||
22606 | $last_nonblank_block_type | ||||
22607 | $statement_type | ||||
22608 | $in_attribute_list | ||||
22609 | $current_package | ||||
22610 | $context | ||||
22611 | |||||
22612 | %is_constant | ||||
22613 | %is_user_function | ||||
22614 | %user_function_prototype | ||||
22615 | %is_block_function | ||||
22616 | %is_block_list_function | ||||
22617 | %saw_function_definition | ||||
22618 | |||||
22619 | $brace_depth | ||||
22620 | $paren_depth | ||||
22621 | $square_bracket_depth | ||||
22622 | |||||
22623 | @current_depth | ||||
22624 | @total_depth | ||||
22625 | $total_depth | ||||
22626 | @nesting_sequence_number | ||||
22627 | @current_sequence_number | ||||
22628 | @paren_type | ||||
22629 | @paren_semicolon_count | ||||
22630 | @paren_structural_type | ||||
22631 | @brace_type | ||||
22632 | @brace_structural_type | ||||
22633 | @brace_context | ||||
22634 | @brace_package | ||||
22635 | @square_bracket_type | ||||
22636 | @square_bracket_structural_type | ||||
22637 | @depth_array | ||||
22638 | @nested_ternary_flag | ||||
22639 | @nested_statement_type | ||||
22640 | @starting_line_of_current_depth | ||||
22641 | 1 | 45µs | 2 | 8.75ms | }; # spent 4.38ms making 1 call to Perl::Tidy::Tokenizer::BEGIN@22601
# spent 4.37ms making 1 call to vars::import |
22642 | |||||
22643 | # GLOBAL CONSTANTS for routines in this package | ||||
22644 | 1 | 400ns | # spent 201µs (8+193) within Perl::Tidy::Tokenizer::BEGIN@22644 which was called:
# once (8µs+193µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22662 | ||
22645 | %is_indirect_object_taker | ||||
22646 | %is_block_operator | ||||
22647 | %expecting_operator_token | ||||
22648 | %expecting_operator_types | ||||
22649 | %expecting_term_types | ||||
22650 | %expecting_term_token | ||||
22651 | %is_digraph | ||||
22652 | %is_file_test_operator | ||||
22653 | %is_trigraph | ||||
22654 | %is_valid_token_type | ||||
22655 | %is_keyword | ||||
22656 | %is_code_block_token | ||||
22657 | %really_want_term | ||||
22658 | @opening_brace_names | ||||
22659 | @closing_brace_names | ||||
22660 | %is_keyword_taking_list | ||||
22661 | %is_q_qq_qw_qx_qr_s_y_tr_m | ||||
22662 | 1 | 28µs | 2 | 394µs | }; # spent 201µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22644
# spent 193µs making 1 call to vars::import |
22663 | |||||
22664 | # possible values of operator_expected() | ||||
22665 | 2 | 26µs | 2 | 74µs | # spent 40µs (7+33) within Perl::Tidy::Tokenizer::BEGIN@22665 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22665 # spent 40µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22665
# spent 34µs making 1 call to constant::import |
22666 | 2 | 22µs | 2 | 77µs | # spent 44µs (11+33) within Perl::Tidy::Tokenizer::BEGIN@22666 which was called:
# once (11µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22666 # spent 44µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22666
# spent 33µs making 1 call to constant::import |
22667 | 2 | 22µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Tokenizer::BEGIN@22667 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22667 # spent 35µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22667
# spent 29µs making 1 call to constant::import |
22668 | |||||
22669 | # possible values of context | ||||
22670 | 2 | 21µs | 2 | 74µs | # spent 40µs (6+34) within Perl::Tidy::Tokenizer::BEGIN@22670 which was called:
# once (6µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22670 # spent 40µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22670
# spent 34µs making 1 call to constant::import |
22671 | 2 | 20µs | 2 | 65µs | # spent 36µs (7+29) within Perl::Tidy::Tokenizer::BEGIN@22671 which was called:
# once (7µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22671 # spent 36µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22671
# spent 29µs making 1 call to constant::import |
22672 | 2 | 21µs | 2 | 72µs | # spent 44µs (15+29) within Perl::Tidy::Tokenizer::BEGIN@22672 which was called:
# once (15µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22672 # spent 44µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22672
# spent 29µs making 1 call to constant::import |
22673 | |||||
22674 | # Maximum number of little messages; probably need not be changed. | ||||
22675 | 2 | 2.76ms | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Tokenizer::BEGIN@22675 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22675 # spent 35µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22675
# spent 29µs making 1 call to constant::import |
22676 | |||||
22677 | { | ||||
22678 | |||||
22679 | # methods to count instances | ||||
22680 | 2 | 200ns | my $_count = 0; | ||
22681 | sub get_count { $_count; } | ||||
22682 | sub _increment_count { ++$_count } | ||||
22683 | sub _decrement_count { --$_count } | ||||
22684 | } | ||||
22685 | |||||
22686 | sub DESTROY { | ||||
22687 | $_[0]->_decrement_count(); | ||||
22688 | } | ||||
22689 | |||||
22690 | sub new { | ||||
22691 | |||||
22692 | my $class = shift; | ||||
22693 | |||||
22694 | # Note: 'tabs' and 'indent_columns' are temporary and should be | ||||
22695 | # removed asap | ||||
22696 | my %defaults = ( | ||||
22697 | source_object => undef, | ||||
22698 | debugger_object => undef, | ||||
22699 | diagnostics_object => undef, | ||||
22700 | logger_object => undef, | ||||
22701 | starting_level => undef, | ||||
22702 | indent_columns => 4, | ||||
22703 | tabsize => 8, | ||||
22704 | look_for_hash_bang => 0, | ||||
22705 | trim_qw => 1, | ||||
22706 | look_for_autoloader => 1, | ||||
22707 | look_for_selfloader => 1, | ||||
22708 | starting_line_number => 1, | ||||
22709 | ); | ||||
22710 | my %args = ( %defaults, @_ ); | ||||
22711 | |||||
22712 | # we are given an object with a get_line() method to supply source lines | ||||
22713 | my $source_object = $args{source_object}; | ||||
22714 | |||||
22715 | # we create another object with a get_line() and peek_ahead() method | ||||
22716 | my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); | ||||
22717 | |||||
22718 | # Tokenizer state data is as follows: | ||||
22719 | # _rhere_target_list reference to list of here-doc targets | ||||
22720 | # _here_doc_target the target string for a here document | ||||
22721 | # _here_quote_character the type of here-doc quoting (" ' ` or none) | ||||
22722 | # to determine if interpolation is done | ||||
22723 | # _quote_target character we seek if chasing a quote | ||||
22724 | # _line_start_quote line where we started looking for a long quote | ||||
22725 | # _in_here_doc flag indicating if we are in a here-doc | ||||
22726 | # _in_pod flag set if we are in pod documentation | ||||
22727 | # _in_error flag set if we saw severe error (binary in script) | ||||
22728 | # _in_data flag set if we are in __DATA__ section | ||||
22729 | # _in_end flag set if we are in __END__ section | ||||
22730 | # _in_format flag set if we are in a format description | ||||
22731 | # _in_attribute_list flag telling if we are looking for attributes | ||||
22732 | # _in_quote flag telling if we are chasing a quote | ||||
22733 | # _starting_level indentation level of first line | ||||
22734 | # _line_buffer_object object with get_line() method to supply source code | ||||
22735 | # _diagnostics_object place to write debugging information | ||||
22736 | # _unexpected_error_count error count used to limit output | ||||
22737 | # _lower_case_labels_at line numbers where lower case labels seen | ||||
22738 | $tokenizer_self = { | ||||
22739 | _rhere_target_list => [], | ||||
22740 | _in_here_doc => 0, | ||||
22741 | _here_doc_target => "", | ||||
22742 | _here_quote_character => "", | ||||
22743 | _in_data => 0, | ||||
22744 | _in_end => 0, | ||||
22745 | _in_format => 0, | ||||
22746 | _in_error => 0, | ||||
22747 | _in_pod => 0, | ||||
22748 | _in_attribute_list => 0, | ||||
22749 | _in_quote => 0, | ||||
22750 | _quote_target => "", | ||||
22751 | _line_start_quote => -1, | ||||
22752 | _starting_level => $args{starting_level}, | ||||
22753 | _know_starting_level => defined( $args{starting_level} ), | ||||
22754 | _tabsize => $args{tabsize}, | ||||
22755 | _indent_columns => $args{indent_columns}, | ||||
22756 | _look_for_hash_bang => $args{look_for_hash_bang}, | ||||
22757 | _trim_qw => $args{trim_qw}, | ||||
22758 | _continuation_indentation => $args{continuation_indentation}, | ||||
22759 | _outdent_labels => $args{outdent_labels}, | ||||
22760 | _last_line_number => $args{starting_line_number} - 1, | ||||
22761 | _saw_perl_dash_P => 0, | ||||
22762 | _saw_perl_dash_w => 0, | ||||
22763 | _saw_use_strict => 0, | ||||
22764 | _saw_v_string => 0, | ||||
22765 | _look_for_autoloader => $args{look_for_autoloader}, | ||||
22766 | _look_for_selfloader => $args{look_for_selfloader}, | ||||
22767 | _saw_autoloader => 0, | ||||
22768 | _saw_selfloader => 0, | ||||
22769 | _saw_hash_bang => 0, | ||||
22770 | _saw_end => 0, | ||||
22771 | _saw_data => 0, | ||||
22772 | _saw_negative_indentation => 0, | ||||
22773 | _started_tokenizing => 0, | ||||
22774 | _line_buffer_object => $line_buffer_object, | ||||
22775 | _debugger_object => $args{debugger_object}, | ||||
22776 | _diagnostics_object => $args{diagnostics_object}, | ||||
22777 | _logger_object => $args{logger_object}, | ||||
22778 | _unexpected_error_count => 0, | ||||
22779 | _started_looking_for_here_target_at => 0, | ||||
22780 | _nearly_matched_here_target_at => undef, | ||||
22781 | _line_text => "", | ||||
22782 | _rlower_case_labels_at => undef, | ||||
22783 | }; | ||||
22784 | |||||
22785 | prepare_for_a_new_file(); | ||||
22786 | find_starting_indentation_level(); | ||||
22787 | |||||
22788 | bless $tokenizer_self, $class; | ||||
22789 | |||||
22790 | # This is not a full class yet, so die if an attempt is made to | ||||
22791 | # create more than one object. | ||||
22792 | |||||
22793 | if ( _increment_count() > 1 ) { | ||||
22794 | confess | ||||
22795 | "Attempt to create more than 1 object in $class, which is not a true class yet\n"; | ||||
22796 | } | ||||
22797 | |||||
22798 | return $tokenizer_self; | ||||
22799 | |||||
22800 | } | ||||
22801 | |||||
22802 | # interface to Perl::Tidy::Logger routines | ||||
22803 | sub warning { | ||||
22804 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22805 | if ($logger_object) { | ||||
22806 | $logger_object->warning(@_); | ||||
22807 | } | ||||
22808 | } | ||||
22809 | |||||
22810 | sub complain { | ||||
22811 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22812 | if ($logger_object) { | ||||
22813 | $logger_object->complain(@_); | ||||
22814 | } | ||||
22815 | } | ||||
22816 | |||||
22817 | sub write_logfile_entry { | ||||
22818 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22819 | if ($logger_object) { | ||||
22820 | $logger_object->write_logfile_entry(@_); | ||||
22821 | } | ||||
22822 | } | ||||
22823 | |||||
22824 | sub interrupt_logfile { | ||||
22825 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22826 | if ($logger_object) { | ||||
22827 | $logger_object->interrupt_logfile(); | ||||
22828 | } | ||||
22829 | } | ||||
22830 | |||||
22831 | sub resume_logfile { | ||||
22832 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22833 | if ($logger_object) { | ||||
22834 | $logger_object->resume_logfile(); | ||||
22835 | } | ||||
22836 | } | ||||
22837 | |||||
22838 | sub increment_brace_error { | ||||
22839 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22840 | if ($logger_object) { | ||||
22841 | $logger_object->increment_brace_error(); | ||||
22842 | } | ||||
22843 | } | ||||
22844 | |||||
22845 | sub report_definite_bug { | ||||
22846 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22847 | if ($logger_object) { | ||||
22848 | $logger_object->report_definite_bug(); | ||||
22849 | } | ||||
22850 | } | ||||
22851 | |||||
22852 | sub brace_warning { | ||||
22853 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22854 | if ($logger_object) { | ||||
22855 | $logger_object->brace_warning(@_); | ||||
22856 | } | ||||
22857 | } | ||||
22858 | |||||
22859 | sub get_saw_brace_error { | ||||
22860 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
22861 | if ($logger_object) { | ||||
22862 | $logger_object->get_saw_brace_error(); | ||||
22863 | } | ||||
22864 | else { | ||||
22865 | 0; | ||||
22866 | } | ||||
22867 | } | ||||
22868 | |||||
22869 | # interface to Perl::Tidy::Diagnostics routines | ||||
22870 | sub write_diagnostics { | ||||
22871 | if ( $tokenizer_self->{_diagnostics_object} ) { | ||||
22872 | $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); | ||||
22873 | } | ||||
22874 | } | ||||
22875 | |||||
22876 | sub report_tokenization_errors { | ||||
22877 | |||||
22878 | my $self = shift; | ||||
22879 | |||||
22880 | my $level = get_indentation_level(); | ||||
22881 | if ( $level != $tokenizer_self->{_starting_level} ) { | ||||
22882 | warning("final indentation level: $level\n"); | ||||
22883 | } | ||||
22884 | |||||
22885 | check_final_nesting_depths(); | ||||
22886 | |||||
22887 | if ( $tokenizer_self->{_look_for_hash_bang} | ||||
22888 | && !$tokenizer_self->{_saw_hash_bang} ) | ||||
22889 | { | ||||
22890 | warning( | ||||
22891 | "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); | ||||
22892 | } | ||||
22893 | |||||
22894 | if ( $tokenizer_self->{_in_format} ) { | ||||
22895 | warning("hit EOF while in format description\n"); | ||||
22896 | } | ||||
22897 | |||||
22898 | if ( $tokenizer_self->{_in_pod} ) { | ||||
22899 | |||||
22900 | # Just write log entry if this is after __END__ or __DATA__ | ||||
22901 | # because this happens to often, and it is not likely to be | ||||
22902 | # a parsing error. | ||||
22903 | if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { | ||||
22904 | write_logfile_entry( | ||||
22905 | "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" | ||||
22906 | ); | ||||
22907 | } | ||||
22908 | |||||
22909 | else { | ||||
22910 | complain( | ||||
22911 | "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" | ||||
22912 | ); | ||||
22913 | } | ||||
22914 | |||||
22915 | } | ||||
22916 | |||||
22917 | if ( $tokenizer_self->{_in_here_doc} ) { | ||||
22918 | my $here_doc_target = $tokenizer_self->{_here_doc_target}; | ||||
22919 | my $started_looking_for_here_target_at = | ||||
22920 | $tokenizer_self->{_started_looking_for_here_target_at}; | ||||
22921 | if ($here_doc_target) { | ||||
22922 | warning( | ||||
22923 | "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" | ||||
22924 | ); | ||||
22925 | } | ||||
22926 | else { | ||||
22927 | warning( | ||||
22928 | "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" | ||||
22929 | ); | ||||
22930 | } | ||||
22931 | my $nearly_matched_here_target_at = | ||||
22932 | $tokenizer_self->{_nearly_matched_here_target_at}; | ||||
22933 | if ($nearly_matched_here_target_at) { | ||||
22934 | warning( | ||||
22935 | "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" | ||||
22936 | ); | ||||
22937 | } | ||||
22938 | } | ||||
22939 | |||||
22940 | if ( $tokenizer_self->{_in_quote} ) { | ||||
22941 | my $line_start_quote = $tokenizer_self->{_line_start_quote}; | ||||
22942 | my $quote_target = $tokenizer_self->{_quote_target}; | ||||
22943 | my $what = | ||||
22944 | ( $tokenizer_self->{_in_attribute_list} ) | ||||
22945 | ? "attribute list" | ||||
22946 | : "quote/pattern"; | ||||
22947 | warning( | ||||
22948 | "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" | ||||
22949 | ); | ||||
22950 | } | ||||
22951 | |||||
22952 | unless ( $tokenizer_self->{_saw_perl_dash_w} ) { | ||||
22953 | if ( $] < 5.006 ) { | ||||
22954 | write_logfile_entry("Suggest including '-w parameter'\n"); | ||||
22955 | } | ||||
22956 | else { | ||||
22957 | write_logfile_entry("Suggest including 'use warnings;'\n"); | ||||
22958 | } | ||||
22959 | } | ||||
22960 | |||||
22961 | if ( $tokenizer_self->{_saw_perl_dash_P} ) { | ||||
22962 | write_logfile_entry("Use of -P parameter for defines is discouraged\n"); | ||||
22963 | } | ||||
22964 | |||||
22965 | unless ( $tokenizer_self->{_saw_use_strict} ) { | ||||
22966 | write_logfile_entry("Suggest including 'use strict;'\n"); | ||||
22967 | } | ||||
22968 | |||||
22969 | # it is suggested that labels have at least one upper case character | ||||
22970 | # for legibility and to avoid code breakage as new keywords are introduced | ||||
22971 | if ( $tokenizer_self->{_rlower_case_labels_at} ) { | ||||
22972 | my @lower_case_labels_at = | ||||
22973 | @{ $tokenizer_self->{_rlower_case_labels_at} }; | ||||
22974 | write_logfile_entry( | ||||
22975 | "Suggest using upper case characters in label(s)\n"); | ||||
22976 | local $" = ')('; | ||||
22977 | write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); | ||||
22978 | } | ||||
22979 | } | ||||
22980 | |||||
22981 | sub report_v_string { | ||||
22982 | |||||
22983 | # warn if this version can't handle v-strings | ||||
22984 | my $tok = shift; | ||||
22985 | unless ( $tokenizer_self->{_saw_v_string} ) { | ||||
22986 | $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; | ||||
22987 | } | ||||
22988 | if ( $] < 5.006 ) { | ||||
22989 | warning( | ||||
22990 | "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" | ||||
22991 | ); | ||||
22992 | } | ||||
22993 | } | ||||
22994 | |||||
22995 | sub get_input_line_number { | ||||
22996 | return $tokenizer_self->{_last_line_number}; | ||||
22997 | } | ||||
22998 | |||||
22999 | # returns the next tokenized line | ||||
23000 | sub get_line { | ||||
23001 | |||||
23002 | my $self = shift; | ||||
23003 | |||||
23004 | # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, | ||||
23005 | # $square_bracket_depth, $paren_depth | ||||
23006 | |||||
23007 | my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); | ||||
23008 | $tokenizer_self->{_line_text} = $input_line; | ||||
23009 | |||||
23010 | return undef unless ($input_line); | ||||
23011 | |||||
23012 | my $input_line_number = ++$tokenizer_self->{_last_line_number}; | ||||
23013 | |||||
23014 | # Find and remove what characters terminate this line, including any | ||||
23015 | # control r | ||||
23016 | my $input_line_separator = ""; | ||||
23017 | if ( chomp($input_line) ) { $input_line_separator = $/ } | ||||
23018 | |||||
23019 | # TODO: what other characters should be included here? | ||||
23020 | if ( $input_line =~ s/((\r|\035|\032)+)$// ) { | ||||
23021 | $input_line_separator = $2 . $input_line_separator; | ||||
23022 | } | ||||
23023 | |||||
23024 | # for backwards compatibility we keep the line text terminated with | ||||
23025 | # a newline character | ||||
23026 | $input_line .= "\n"; | ||||
23027 | $tokenizer_self->{_line_text} = $input_line; # update | ||||
23028 | |||||
23029 | # create a data structure describing this line which will be | ||||
23030 | # returned to the caller. | ||||
23031 | |||||
23032 | # _line_type codes are: | ||||
23033 | # SYSTEM - system-specific code before hash-bang line | ||||
23034 | # CODE - line of perl code (including comments) | ||||
23035 | # POD_START - line starting pod, such as '=head' | ||||
23036 | # POD - pod documentation text | ||||
23037 | # POD_END - last line of pod section, '=cut' | ||||
23038 | # HERE - text of here-document | ||||
23039 | # HERE_END - last line of here-doc (target word) | ||||
23040 | # FORMAT - format section | ||||
23041 | # FORMAT_END - last line of format section, '.' | ||||
23042 | # DATA_START - __DATA__ line | ||||
23043 | # DATA - unidentified text following __DATA__ | ||||
23044 | # END_START - __END__ line | ||||
23045 | # END - unidentified text following __END__ | ||||
23046 | # ERROR - we are in big trouble, probably not a perl script | ||||
23047 | |||||
23048 | # Other variables: | ||||
23049 | # _curly_brace_depth - depth of curly braces at start of line | ||||
23050 | # _square_bracket_depth - depth of square brackets at start of line | ||||
23051 | # _paren_depth - depth of parens at start of line | ||||
23052 | # _starting_in_quote - this line continues a multi-line quote | ||||
23053 | # (so don't trim leading blanks!) | ||||
23054 | # _ending_in_quote - this line ends in a multi-line quote | ||||
23055 | # (so don't trim trailing blanks!) | ||||
23056 | my $line_of_tokens = { | ||||
23057 | _line_type => 'EOF', | ||||
23058 | _line_text => $input_line, | ||||
23059 | _line_number => $input_line_number, | ||||
23060 | _rtoken_type => undef, | ||||
23061 | _rtokens => undef, | ||||
23062 | _rlevels => undef, | ||||
23063 | _rslevels => undef, | ||||
23064 | _rblock_type => undef, | ||||
23065 | _rcontainer_type => undef, | ||||
23066 | _rcontainer_environment => undef, | ||||
23067 | _rtype_sequence => undef, | ||||
23068 | _rnesting_tokens => undef, | ||||
23069 | _rci_levels => undef, | ||||
23070 | _rnesting_blocks => undef, | ||||
23071 | _guessed_indentation_level => 0, | ||||
23072 | _starting_in_quote => 0, # to be set by subroutine | ||||
23073 | _ending_in_quote => 0, | ||||
23074 | _curly_brace_depth => $brace_depth, | ||||
23075 | _square_bracket_depth => $square_bracket_depth, | ||||
23076 | _paren_depth => $paren_depth, | ||||
23077 | _quote_character => '', | ||||
23078 | }; | ||||
23079 | |||||
23080 | # must print line unchanged if we are in a here document | ||||
23081 | if ( $tokenizer_self->{_in_here_doc} ) { | ||||
23082 | |||||
23083 | $line_of_tokens->{_line_type} = 'HERE'; | ||||
23084 | my $here_doc_target = $tokenizer_self->{_here_doc_target}; | ||||
23085 | my $here_quote_character = $tokenizer_self->{_here_quote_character}; | ||||
23086 | my $candidate_target = $input_line; | ||||
23087 | chomp $candidate_target; | ||||
23088 | if ( $candidate_target eq $here_doc_target ) { | ||||
23089 | $tokenizer_self->{_nearly_matched_here_target_at} = undef; | ||||
23090 | $line_of_tokens->{_line_type} = 'HERE_END'; | ||||
23091 | write_logfile_entry("Exiting HERE document $here_doc_target\n"); | ||||
23092 | |||||
23093 | my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; | ||||
23094 | if (@$rhere_target_list) { # there can be multiple here targets | ||||
23095 | ( $here_doc_target, $here_quote_character ) = | ||||
23096 | @{ shift @$rhere_target_list }; | ||||
23097 | $tokenizer_self->{_here_doc_target} = $here_doc_target; | ||||
23098 | $tokenizer_self->{_here_quote_character} = | ||||
23099 | $here_quote_character; | ||||
23100 | write_logfile_entry( | ||||
23101 | "Entering HERE document $here_doc_target\n"); | ||||
23102 | $tokenizer_self->{_nearly_matched_here_target_at} = undef; | ||||
23103 | $tokenizer_self->{_started_looking_for_here_target_at} = | ||||
23104 | $input_line_number; | ||||
23105 | } | ||||
23106 | else { | ||||
23107 | $tokenizer_self->{_in_here_doc} = 0; | ||||
23108 | $tokenizer_self->{_here_doc_target} = ""; | ||||
23109 | $tokenizer_self->{_here_quote_character} = ""; | ||||
23110 | } | ||||
23111 | } | ||||
23112 | |||||
23113 | # check for error of extra whitespace | ||||
23114 | # note for PERL6: leading whitespace is allowed | ||||
23115 | else { | ||||
23116 | $candidate_target =~ s/\s*$//; | ||||
23117 | $candidate_target =~ s/^\s*//; | ||||
23118 | if ( $candidate_target eq $here_doc_target ) { | ||||
23119 | $tokenizer_self->{_nearly_matched_here_target_at} = | ||||
23120 | $input_line_number; | ||||
23121 | } | ||||
23122 | } | ||||
23123 | return $line_of_tokens; | ||||
23124 | } | ||||
23125 | |||||
23126 | # must print line unchanged if we are in a format section | ||||
23127 | elsif ( $tokenizer_self->{_in_format} ) { | ||||
23128 | |||||
23129 | if ( $input_line =~ /^\.[\s#]*$/ ) { | ||||
23130 | write_logfile_entry("Exiting format section\n"); | ||||
23131 | $tokenizer_self->{_in_format} = 0; | ||||
23132 | $line_of_tokens->{_line_type} = 'FORMAT_END'; | ||||
23133 | } | ||||
23134 | else { | ||||
23135 | $line_of_tokens->{_line_type} = 'FORMAT'; | ||||
23136 | } | ||||
23137 | return $line_of_tokens; | ||||
23138 | } | ||||
23139 | |||||
23140 | # must print line unchanged if we are in pod documentation | ||||
23141 | elsif ( $tokenizer_self->{_in_pod} ) { | ||||
23142 | |||||
23143 | $line_of_tokens->{_line_type} = 'POD'; | ||||
23144 | if ( $input_line =~ /^=cut/ ) { | ||||
23145 | $line_of_tokens->{_line_type} = 'POD_END'; | ||||
23146 | write_logfile_entry("Exiting POD section\n"); | ||||
23147 | $tokenizer_self->{_in_pod} = 0; | ||||
23148 | } | ||||
23149 | if ( $input_line =~ /^\#\!.*perl\b/ ) { | ||||
23150 | warning( | ||||
23151 | "Hash-bang in pod can cause older versions of perl to fail! \n" | ||||
23152 | ); | ||||
23153 | } | ||||
23154 | |||||
23155 | return $line_of_tokens; | ||||
23156 | } | ||||
23157 | |||||
23158 | # must print line unchanged if we have seen a severe error (i.e., we | ||||
23159 | # are seeing illegal tokens and cannot continue. Syntax errors do | ||||
23160 | # not pass this route). Calling routine can decide what to do, but | ||||
23161 | # the default can be to just pass all lines as if they were after __END__ | ||||
23162 | elsif ( $tokenizer_self->{_in_error} ) { | ||||
23163 | $line_of_tokens->{_line_type} = 'ERROR'; | ||||
23164 | return $line_of_tokens; | ||||
23165 | } | ||||
23166 | |||||
23167 | # print line unchanged if we are __DATA__ section | ||||
23168 | elsif ( $tokenizer_self->{_in_data} ) { | ||||
23169 | |||||
23170 | # ...but look for POD | ||||
23171 | # Note that the _in_data and _in_end flags remain set | ||||
23172 | # so that we return to that state after seeing the | ||||
23173 | # end of a pod section | ||||
23174 | if ( $input_line =~ /^=(?!cut)/ ) { | ||||
23175 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
23176 | write_logfile_entry("Entering POD section\n"); | ||||
23177 | $tokenizer_self->{_in_pod} = 1; | ||||
23178 | return $line_of_tokens; | ||||
23179 | } | ||||
23180 | else { | ||||
23181 | $line_of_tokens->{_line_type} = 'DATA'; | ||||
23182 | return $line_of_tokens; | ||||
23183 | } | ||||
23184 | } | ||||
23185 | |||||
23186 | # print line unchanged if we are in __END__ section | ||||
23187 | elsif ( $tokenizer_self->{_in_end} ) { | ||||
23188 | |||||
23189 | # ...but look for POD | ||||
23190 | # Note that the _in_data and _in_end flags remain set | ||||
23191 | # so that we return to that state after seeing the | ||||
23192 | # end of a pod section | ||||
23193 | if ( $input_line =~ /^=(?!cut)/ ) { | ||||
23194 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
23195 | write_logfile_entry("Entering POD section\n"); | ||||
23196 | $tokenizer_self->{_in_pod} = 1; | ||||
23197 | return $line_of_tokens; | ||||
23198 | } | ||||
23199 | else { | ||||
23200 | $line_of_tokens->{_line_type} = 'END'; | ||||
23201 | return $line_of_tokens; | ||||
23202 | } | ||||
23203 | } | ||||
23204 | |||||
23205 | # check for a hash-bang line if we haven't seen one | ||||
23206 | if ( !$tokenizer_self->{_saw_hash_bang} ) { | ||||
23207 | if ( $input_line =~ /^\#\!.*perl\b/ ) { | ||||
23208 | $tokenizer_self->{_saw_hash_bang} = $input_line_number; | ||||
23209 | |||||
23210 | # check for -w and -P flags | ||||
23211 | if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { | ||||
23212 | $tokenizer_self->{_saw_perl_dash_P} = 1; | ||||
23213 | } | ||||
23214 | |||||
23215 | if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { | ||||
23216 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
23217 | } | ||||
23218 | |||||
23219 | if ( ( $input_line_number > 1 ) | ||||
23220 | && ( !$tokenizer_self->{_look_for_hash_bang} ) ) | ||||
23221 | { | ||||
23222 | |||||
23223 | # this is helpful for VMS systems; we may have accidentally | ||||
23224 | # tokenized some DCL commands | ||||
23225 | if ( $tokenizer_self->{_started_tokenizing} ) { | ||||
23226 | warning( | ||||
23227 | "There seems to be a hash-bang after line 1; do you need to run with -x ?\n" | ||||
23228 | ); | ||||
23229 | } | ||||
23230 | else { | ||||
23231 | complain("Useless hash-bang after line 1\n"); | ||||
23232 | } | ||||
23233 | } | ||||
23234 | |||||
23235 | # Report the leading hash-bang as a system line | ||||
23236 | # This will prevent -dac from deleting it | ||||
23237 | else { | ||||
23238 | $line_of_tokens->{_line_type} = 'SYSTEM'; | ||||
23239 | return $line_of_tokens; | ||||
23240 | } | ||||
23241 | } | ||||
23242 | } | ||||
23243 | |||||
23244 | # wait for a hash-bang before parsing if the user invoked us with -x | ||||
23245 | if ( $tokenizer_self->{_look_for_hash_bang} | ||||
23246 | && !$tokenizer_self->{_saw_hash_bang} ) | ||||
23247 | { | ||||
23248 | $line_of_tokens->{_line_type} = 'SYSTEM'; | ||||
23249 | return $line_of_tokens; | ||||
23250 | } | ||||
23251 | |||||
23252 | # a first line of the form ': #' will be marked as SYSTEM | ||||
23253 | # since lines of this form may be used by tcsh | ||||
23254 | if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { | ||||
23255 | $line_of_tokens->{_line_type} = 'SYSTEM'; | ||||
23256 | return $line_of_tokens; | ||||
23257 | } | ||||
23258 | |||||
23259 | # now we know that it is ok to tokenize the line... | ||||
23260 | # the line tokenizer will modify any of these private variables: | ||||
23261 | # _rhere_target_list | ||||
23262 | # _in_data | ||||
23263 | # _in_end | ||||
23264 | # _in_format | ||||
23265 | # _in_error | ||||
23266 | # _in_pod | ||||
23267 | # _in_quote | ||||
23268 | my $ending_in_quote_last = $tokenizer_self->{_in_quote}; | ||||
23269 | tokenize_this_line($line_of_tokens); | ||||
23270 | |||||
23271 | # Now finish defining the return structure and return it | ||||
23272 | $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; | ||||
23273 | |||||
23274 | # handle severe error (binary data in script) | ||||
23275 | if ( $tokenizer_self->{_in_error} ) { | ||||
23276 | $tokenizer_self->{_in_quote} = 0; # to avoid any more messages | ||||
23277 | warning("Giving up after error\n"); | ||||
23278 | $line_of_tokens->{_line_type} = 'ERROR'; | ||||
23279 | reset_indentation_level(0); # avoid error messages | ||||
23280 | return $line_of_tokens; | ||||
23281 | } | ||||
23282 | |||||
23283 | # handle start of pod documentation | ||||
23284 | if ( $tokenizer_self->{_in_pod} ) { | ||||
23285 | |||||
23286 | # This gets tricky..above a __DATA__ or __END__ section, perl | ||||
23287 | # accepts '=cut' as the start of pod section. But afterwards, | ||||
23288 | # only pod utilities see it and they may ignore an =cut without | ||||
23289 | # leading =head. In any case, this isn't good. | ||||
23290 | if ( $input_line =~ /^=cut\b/ ) { | ||||
23291 | if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { | ||||
23292 | complain("=cut while not in pod ignored\n"); | ||||
23293 | $tokenizer_self->{_in_pod} = 0; | ||||
23294 | $line_of_tokens->{_line_type} = 'POD_END'; | ||||
23295 | } | ||||
23296 | else { | ||||
23297 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
23298 | complain( | ||||
23299 | "=cut starts a pod section .. this can fool pod utilities.\n" | ||||
23300 | ); | ||||
23301 | write_logfile_entry("Entering POD section\n"); | ||||
23302 | } | ||||
23303 | } | ||||
23304 | |||||
23305 | else { | ||||
23306 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
23307 | write_logfile_entry("Entering POD section\n"); | ||||
23308 | } | ||||
23309 | |||||
23310 | return $line_of_tokens; | ||||
23311 | } | ||||
23312 | |||||
23313 | # update indentation levels for log messages | ||||
23314 | if ( $input_line !~ /^\s*$/ ) { | ||||
23315 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
23316 | $line_of_tokens->{_guessed_indentation_level} = | ||||
23317 | guess_old_indentation_level($input_line); | ||||
23318 | } | ||||
23319 | |||||
23320 | # see if this line contains here doc targets | ||||
23321 | my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; | ||||
23322 | if (@$rhere_target_list) { | ||||
23323 | |||||
23324 | my ( $here_doc_target, $here_quote_character ) = | ||||
23325 | @{ shift @$rhere_target_list }; | ||||
23326 | $tokenizer_self->{_in_here_doc} = 1; | ||||
23327 | $tokenizer_self->{_here_doc_target} = $here_doc_target; | ||||
23328 | $tokenizer_self->{_here_quote_character} = $here_quote_character; | ||||
23329 | write_logfile_entry("Entering HERE document $here_doc_target\n"); | ||||
23330 | $tokenizer_self->{_started_looking_for_here_target_at} = | ||||
23331 | $input_line_number; | ||||
23332 | } | ||||
23333 | |||||
23334 | # NOTE: __END__ and __DATA__ statements are written unformatted | ||||
23335 | # because they can theoretically contain additional characters | ||||
23336 | # which are not tokenized (and cannot be read with <DATA> either!). | ||||
23337 | if ( $tokenizer_self->{_in_data} ) { | ||||
23338 | $line_of_tokens->{_line_type} = 'DATA_START'; | ||||
23339 | write_logfile_entry("Starting __DATA__ section\n"); | ||||
23340 | $tokenizer_self->{_saw_data} = 1; | ||||
23341 | |||||
23342 | # keep parsing after __DATA__ if use SelfLoader was seen | ||||
23343 | if ( $tokenizer_self->{_saw_selfloader} ) { | ||||
23344 | $tokenizer_self->{_in_data} = 0; | ||||
23345 | write_logfile_entry( | ||||
23346 | "SelfLoader seen, continuing; -nlsl deactivates\n"); | ||||
23347 | } | ||||
23348 | |||||
23349 | return $line_of_tokens; | ||||
23350 | } | ||||
23351 | |||||
23352 | elsif ( $tokenizer_self->{_in_end} ) { | ||||
23353 | $line_of_tokens->{_line_type} = 'END_START'; | ||||
23354 | write_logfile_entry("Starting __END__ section\n"); | ||||
23355 | $tokenizer_self->{_saw_end} = 1; | ||||
23356 | |||||
23357 | # keep parsing after __END__ if use AutoLoader was seen | ||||
23358 | if ( $tokenizer_self->{_saw_autoloader} ) { | ||||
23359 | $tokenizer_self->{_in_end} = 0; | ||||
23360 | write_logfile_entry( | ||||
23361 | "AutoLoader seen, continuing; -nlal deactivates\n"); | ||||
23362 | } | ||||
23363 | return $line_of_tokens; | ||||
23364 | } | ||||
23365 | |||||
23366 | # now, finally, we know that this line is type 'CODE' | ||||
23367 | $line_of_tokens->{_line_type} = 'CODE'; | ||||
23368 | |||||
23369 | # remember if we have seen any real code | ||||
23370 | if ( !$tokenizer_self->{_started_tokenizing} | ||||
23371 | && $input_line !~ /^\s*$/ | ||||
23372 | && $input_line !~ /^\s*#/ ) | ||||
23373 | { | ||||
23374 | $tokenizer_self->{_started_tokenizing} = 1; | ||||
23375 | } | ||||
23376 | |||||
23377 | if ( $tokenizer_self->{_debugger_object} ) { | ||||
23378 | $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); | ||||
23379 | } | ||||
23380 | |||||
23381 | # Note: if keyword 'format' occurs in this line code, it is still CODE | ||||
23382 | # (keyword 'format' need not start a line) | ||||
23383 | if ( $tokenizer_self->{_in_format} ) { | ||||
23384 | write_logfile_entry("Entering format section\n"); | ||||
23385 | } | ||||
23386 | |||||
23387 | if ( $tokenizer_self->{_in_quote} | ||||
23388 | and ( $tokenizer_self->{_line_start_quote} < 0 ) ) | ||||
23389 | { | ||||
23390 | |||||
23391 | #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { | ||||
23392 | if ( | ||||
23393 | ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) | ||||
23394 | { | ||||
23395 | $tokenizer_self->{_line_start_quote} = $input_line_number; | ||||
23396 | write_logfile_entry( | ||||
23397 | "Start multi-line quote or pattern ending in $quote_target\n"); | ||||
23398 | } | ||||
23399 | } | ||||
23400 | elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) | ||||
23401 | and !$tokenizer_self->{_in_quote} ) | ||||
23402 | { | ||||
23403 | $tokenizer_self->{_line_start_quote} = -1; | ||||
23404 | write_logfile_entry("End of multi-line quote or pattern\n"); | ||||
23405 | } | ||||
23406 | |||||
23407 | # we are returning a line of CODE | ||||
23408 | return $line_of_tokens; | ||||
23409 | } | ||||
23410 | |||||
23411 | sub find_starting_indentation_level { | ||||
23412 | |||||
23413 | # We need to find the indentation level of the first line of the | ||||
23414 | # script being formatted. Often it will be zero for an entire file, | ||||
23415 | # but if we are formatting a local block of code (within an editor for | ||||
23416 | # example) it may not be zero. The user may specify this with the | ||||
23417 | # -sil=n parameter but normally doesn't so we have to guess. | ||||
23418 | # | ||||
23419 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
23420 | my $starting_level = 0; | ||||
23421 | |||||
23422 | # use value if given as parameter | ||||
23423 | if ( $tokenizer_self->{_know_starting_level} ) { | ||||
23424 | $starting_level = $tokenizer_self->{_starting_level}; | ||||
23425 | } | ||||
23426 | |||||
23427 | # if we know there is a hash_bang line, the level must be zero | ||||
23428 | elsif ( $tokenizer_self->{_look_for_hash_bang} ) { | ||||
23429 | $tokenizer_self->{_know_starting_level} = 1; | ||||
23430 | } | ||||
23431 | |||||
23432 | # otherwise figure it out from the input file | ||||
23433 | else { | ||||
23434 | my $line; | ||||
23435 | my $i = 0; | ||||
23436 | |||||
23437 | # keep looking at lines until we find a hash bang or piece of code | ||||
23438 | my $msg = ""; | ||||
23439 | while ( $line = | ||||
23440 | $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | ||||
23441 | { | ||||
23442 | |||||
23443 | # if first line is #! then assume starting level is zero | ||||
23444 | if ( $i == 1 && $line =~ /^\#\!/ ) { | ||||
23445 | $starting_level = 0; | ||||
23446 | last; | ||||
23447 | } | ||||
23448 | next if ( $line =~ /^\s*#/ ); # skip past comments | ||||
23449 | next if ( $line =~ /^\s*$/ ); # skip past blank lines | ||||
23450 | $starting_level = guess_old_indentation_level($line); | ||||
23451 | last; | ||||
23452 | } | ||||
23453 | $msg = "Line $i implies starting-indentation-level = $starting_level\n"; | ||||
23454 | write_logfile_entry("$msg"); | ||||
23455 | } | ||||
23456 | $tokenizer_self->{_starting_level} = $starting_level; | ||||
23457 | reset_indentation_level($starting_level); | ||||
23458 | } | ||||
23459 | |||||
23460 | sub guess_old_indentation_level { | ||||
23461 | my ($line) = @_; | ||||
23462 | |||||
23463 | # Guess the indentation level of an input line. | ||||
23464 | # | ||||
23465 | # For the first line of code this result will define the starting | ||||
23466 | # indentation level. It will mainly be non-zero when perltidy is applied | ||||
23467 | # within an editor to a local block of code. | ||||
23468 | # | ||||
23469 | # This is an impossible task in general because we can't know what tabs | ||||
23470 | # meant for the old script and how many spaces were used for one | ||||
23471 | # indentation level in the given input script. For example it may have | ||||
23472 | # been previously formatted with -i=7 -et=3. But we can at least try to | ||||
23473 | # make sure that perltidy guesses correctly if it is applied repeatedly to | ||||
23474 | # a block of code within an editor, so that the block stays at the same | ||||
23475 | # level when perltidy is applied repeatedly. | ||||
23476 | # | ||||
23477 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
23478 | my $level = 0; | ||||
23479 | |||||
23480 | # find leading tabs, spaces, and any statement label | ||||
23481 | my $spaces = 0; | ||||
23482 | if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { | ||||
23483 | |||||
23484 | # If there are leading tabs, we use the tab scheme for this run, if | ||||
23485 | # any, so that the code will remain stable when editing. | ||||
23486 | if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } | ||||
23487 | |||||
23488 | if ($2) { $spaces += length($2) } | ||||
23489 | |||||
23490 | # correct for outdented labels | ||||
23491 | if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { | ||||
23492 | $spaces += $tokenizer_self->{_continuation_indentation}; | ||||
23493 | } | ||||
23494 | } | ||||
23495 | |||||
23496 | # compute indentation using the value of -i for this run. | ||||
23497 | # If -i=0 is used for this run (which is possible) it doesn't matter | ||||
23498 | # what we do here but we'll guess that the old run used 4 spaces per level. | ||||
23499 | my $indent_columns = $tokenizer_self->{_indent_columns}; | ||||
23500 | $indent_columns = 4 if ( !$indent_columns ); | ||||
23501 | $level = int( $spaces / $indent_columns ); | ||||
23502 | return ($level); | ||||
23503 | } | ||||
23504 | |||||
23505 | # This is a currently unused debug routine | ||||
23506 | sub dump_functions { | ||||
23507 | |||||
23508 | my $fh = *STDOUT; | ||||
23509 | my ( $pkg, $sub ); | ||||
23510 | foreach $pkg ( keys %is_user_function ) { | ||||
23511 | print $fh "\nnon-constant subs in package $pkg\n"; | ||||
23512 | |||||
23513 | foreach $sub ( keys %{ $is_user_function{$pkg} } ) { | ||||
23514 | my $msg = ""; | ||||
23515 | if ( $is_block_list_function{$pkg}{$sub} ) { | ||||
23516 | $msg = 'block_list'; | ||||
23517 | } | ||||
23518 | |||||
23519 | if ( $is_block_function{$pkg}{$sub} ) { | ||||
23520 | $msg = 'block'; | ||||
23521 | } | ||||
23522 | print $fh "$sub $msg\n"; | ||||
23523 | } | ||||
23524 | } | ||||
23525 | |||||
23526 | foreach $pkg ( keys %is_constant ) { | ||||
23527 | print $fh "\nconstants and constant subs in package $pkg\n"; | ||||
23528 | |||||
23529 | foreach $sub ( keys %{ $is_constant{$pkg} } ) { | ||||
23530 | print $fh "$sub\n"; | ||||
23531 | } | ||||
23532 | } | ||||
23533 | } | ||||
23534 | |||||
23535 | sub ones_count { | ||||
23536 | |||||
23537 | # count number of 1's in a string of 1's and 0's | ||||
23538 | # example: ones_count("010101010101") gives 6 | ||||
23539 | return ( my $cis = $_[0] ) =~ tr/1/0/; | ||||
23540 | } | ||||
23541 | |||||
23542 | sub prepare_for_a_new_file { | ||||
23543 | |||||
23544 | # previous tokens needed to determine what to expect next | ||||
23545 | $last_nonblank_token = ';'; # the only possible starting state which | ||||
23546 | $last_nonblank_type = ';'; # will make a leading brace a code block | ||||
23547 | $last_nonblank_block_type = ''; | ||||
23548 | |||||
23549 | # scalars for remembering statement types across multiple lines | ||||
23550 | $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' | ||||
23551 | $in_attribute_list = 0; | ||||
23552 | |||||
23553 | # scalars for remembering where we are in the file | ||||
23554 | $current_package = "main"; | ||||
23555 | $context = UNKNOWN_CONTEXT; | ||||
23556 | |||||
23557 | # hashes used to remember function information | ||||
23558 | %is_constant = (); # user-defined constants | ||||
23559 | %is_user_function = (); # user-defined functions | ||||
23560 | %user_function_prototype = (); # their prototypes | ||||
23561 | %is_block_function = (); | ||||
23562 | %is_block_list_function = (); | ||||
23563 | %saw_function_definition = (); | ||||
23564 | |||||
23565 | # variables used to track depths of various containers | ||||
23566 | # and report nesting errors | ||||
23567 | $paren_depth = 0; | ||||
23568 | $brace_depth = 0; | ||||
23569 | $square_bracket_depth = 0; | ||||
23570 | @current_depth[ 0 .. $#closing_brace_names ] = | ||||
23571 | (0) x scalar @closing_brace_names; | ||||
23572 | $total_depth = 0; | ||||
23573 | @total_depth = (); | ||||
23574 | @nesting_sequence_number[ 0 .. $#closing_brace_names ] = | ||||
23575 | ( 0 .. $#closing_brace_names ); | ||||
23576 | @current_sequence_number = (); | ||||
23577 | $paren_type[$paren_depth] = ''; | ||||
23578 | $paren_semicolon_count[$paren_depth] = 0; | ||||
23579 | $paren_structural_type[$brace_depth] = ''; | ||||
23580 | $brace_type[$brace_depth] = ';'; # identify opening brace as code block | ||||
23581 | $brace_structural_type[$brace_depth] = ''; | ||||
23582 | $brace_context[$brace_depth] = UNKNOWN_CONTEXT; | ||||
23583 | $brace_package[$paren_depth] = $current_package; | ||||
23584 | $square_bracket_type[$square_bracket_depth] = ''; | ||||
23585 | $square_bracket_structural_type[$square_bracket_depth] = ''; | ||||
23586 | |||||
23587 | initialize_tokenizer_state(); | ||||
23588 | } | ||||
23589 | |||||
23590 | { # begin tokenize_this_line | ||||
23591 | |||||
23592 | 3 | 29µs | 2 | 81µs | # spent 45µs (9+36) within Perl::Tidy::Tokenizer::BEGIN@23592 which was called:
# once (9µs+36µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23592 # spent 45µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23592
# spent 36µs making 1 call to constant::import |
23593 | 2 | 26µs | 2 | 78µs | # spent 43µs (7+35) within Perl::Tidy::Tokenizer::BEGIN@23593 which was called:
# once (7µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23593 # spent 43µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23593
# spent 35µs making 1 call to constant::import |
23594 | 2 | 22µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::Tokenizer::BEGIN@23594 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23594 # spent 36µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23594
# spent 30µs making 1 call to constant::import |
23595 | 2 | 14.5ms | 2 | 65µs | # spent 36µs (6+30) within Perl::Tidy::Tokenizer::BEGIN@23595 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23595 # spent 36µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23595
# spent 30µs making 1 call to constant::import |
23596 | |||||
23597 | # TV1: scalars for processing one LINE. | ||||
23598 | # Re-initialized on each entry to sub tokenize_this_line. | ||||
23599 | my ( | ||||
23600 | 1 | 100ns | $block_type, $container_type, $expecting, | ||
23601 | $i, $i_tok, $input_line, | ||||
23602 | $input_line_number, $last_nonblank_i, $max_token_index, | ||||
23603 | $next_tok, $next_type, $peeked_ahead, | ||||
23604 | $prototype, $rhere_target_list, $rtoken_map, | ||||
23605 | $rtoken_type, $rtokens, $tok, | ||||
23606 | $type, $type_sequence, $indent_flag, | ||||
23607 | ); | ||||
23608 | |||||
23609 | # TV2: refs to ARRAYS for processing one LINE | ||||
23610 | # Re-initialized on each call. | ||||
23611 | 1 | 200ns | my $routput_token_list = []; # stack of output token indexes | ||
23612 | 1 | 100ns | my $routput_token_type = []; # token types | ||
23613 | 1 | 100ns | my $routput_block_type = []; # types of code block | ||
23614 | 1 | 200ns | my $routput_container_type = []; # paren types, such as if, elsif, .. | ||
23615 | 1 | 100ns | my $routput_type_sequence = []; # nesting sequential number | ||
23616 | 1 | 100ns | my $routput_indent_flag = []; # | ||
23617 | |||||
23618 | # TV3: SCALARS for quote variables. These are initialized with a | ||||
23619 | # subroutine call and continually updated as lines are processed. | ||||
23620 | 1 | 100ns | my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, | ||
23621 | $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); | ||||
23622 | |||||
23623 | # TV4: SCALARS for multi-line identifiers and | ||||
23624 | # statements. These are initialized with a subroutine call | ||||
23625 | # and continually updated as lines are processed. | ||||
23626 | my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); | ||||
23627 | |||||
23628 | # TV5: SCALARS for tracking indentation level. | ||||
23629 | # Initialized once and continually updated as lines are | ||||
23630 | # processed. | ||||
23631 | my ( | ||||
23632 | $nesting_token_string, $nesting_type_string, | ||||
23633 | $nesting_block_string, $nesting_block_flag, | ||||
23634 | $nesting_list_string, $nesting_list_flag, | ||||
23635 | $ci_string_in_tokenizer, $continuation_string_in_tokenizer, | ||||
23636 | $in_statement_continuation, $level_in_tokenizer, | ||||
23637 | $slevel_in_tokenizer, $rslevel_stack, | ||||
23638 | ); | ||||
23639 | |||||
23640 | # TV6: SCALARS for remembering several previous | ||||
23641 | # tokens. Initialized once and continually updated as | ||||
23642 | # lines are processed. | ||||
23643 | my ( | ||||
23644 | $last_nonblank_container_type, $last_nonblank_type_sequence, | ||||
23645 | $last_last_nonblank_token, $last_last_nonblank_type, | ||||
23646 | $last_last_nonblank_block_type, $last_last_nonblank_container_type, | ||||
23647 | $last_last_nonblank_type_sequence, $last_nonblank_prototype, | ||||
23648 | ); | ||||
23649 | |||||
23650 | # ---------------------------------------------------------------- | ||||
23651 | # beginning of tokenizer variable access and manipulation routines | ||||
23652 | # ---------------------------------------------------------------- | ||||
23653 | |||||
23654 | sub initialize_tokenizer_state { | ||||
23655 | |||||
23656 | # TV1: initialized on each call | ||||
23657 | # TV2: initialized on each call | ||||
23658 | # TV3: | ||||
23659 | $in_quote = 0; | ||||
23660 | $quote_type = 'Q'; | ||||
23661 | $quote_character = ""; | ||||
23662 | $quote_pos = 0; | ||||
23663 | $quote_depth = 0; | ||||
23664 | $quoted_string_1 = ""; | ||||
23665 | $quoted_string_2 = ""; | ||||
23666 | $allowed_quote_modifiers = ""; | ||||
23667 | |||||
23668 | # TV4: | ||||
23669 | $id_scan_state = ''; | ||||
23670 | $identifier = ''; | ||||
23671 | $want_paren = ""; | ||||
23672 | $indented_if_level = 0; | ||||
23673 | |||||
23674 | # TV5: | ||||
23675 | $nesting_token_string = ""; | ||||
23676 | $nesting_type_string = ""; | ||||
23677 | $nesting_block_string = '1'; # initially in a block | ||||
23678 | $nesting_block_flag = 1; | ||||
23679 | $nesting_list_string = '0'; # initially not in a list | ||||
23680 | $nesting_list_flag = 0; # initially not in a list | ||||
23681 | $ci_string_in_tokenizer = ""; | ||||
23682 | $continuation_string_in_tokenizer = "0"; | ||||
23683 | $in_statement_continuation = 0; | ||||
23684 | $level_in_tokenizer = 0; | ||||
23685 | $slevel_in_tokenizer = 0; | ||||
23686 | $rslevel_stack = []; | ||||
23687 | |||||
23688 | # TV6: | ||||
23689 | $last_nonblank_container_type = ''; | ||||
23690 | $last_nonblank_type_sequence = ''; | ||||
23691 | $last_last_nonblank_token = ';'; | ||||
23692 | $last_last_nonblank_type = ';'; | ||||
23693 | $last_last_nonblank_block_type = ''; | ||||
23694 | $last_last_nonblank_container_type = ''; | ||||
23695 | $last_last_nonblank_type_sequence = ''; | ||||
23696 | $last_nonblank_prototype = ""; | ||||
23697 | } | ||||
23698 | |||||
23699 | sub save_tokenizer_state { | ||||
23700 | |||||
23701 | my $rTV1 = [ | ||||
23702 | $block_type, $container_type, $expecting, | ||||
23703 | $i, $i_tok, $input_line, | ||||
23704 | $input_line_number, $last_nonblank_i, $max_token_index, | ||||
23705 | $next_tok, $next_type, $peeked_ahead, | ||||
23706 | $prototype, $rhere_target_list, $rtoken_map, | ||||
23707 | $rtoken_type, $rtokens, $tok, | ||||
23708 | $type, $type_sequence, $indent_flag, | ||||
23709 | ]; | ||||
23710 | |||||
23711 | my $rTV2 = [ | ||||
23712 | $routput_token_list, $routput_token_type, | ||||
23713 | $routput_block_type, $routput_container_type, | ||||
23714 | $routput_type_sequence, $routput_indent_flag, | ||||
23715 | ]; | ||||
23716 | |||||
23717 | my $rTV3 = [ | ||||
23718 | $in_quote, $quote_type, | ||||
23719 | $quote_character, $quote_pos, | ||||
23720 | $quote_depth, $quoted_string_1, | ||||
23721 | $quoted_string_2, $allowed_quote_modifiers, | ||||
23722 | ]; | ||||
23723 | |||||
23724 | my $rTV4 = | ||||
23725 | [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; | ||||
23726 | |||||
23727 | my $rTV5 = [ | ||||
23728 | $nesting_token_string, $nesting_type_string, | ||||
23729 | $nesting_block_string, $nesting_block_flag, | ||||
23730 | $nesting_list_string, $nesting_list_flag, | ||||
23731 | $ci_string_in_tokenizer, $continuation_string_in_tokenizer, | ||||
23732 | $in_statement_continuation, $level_in_tokenizer, | ||||
23733 | $slevel_in_tokenizer, $rslevel_stack, | ||||
23734 | ]; | ||||
23735 | |||||
23736 | my $rTV6 = [ | ||||
23737 | $last_nonblank_container_type, | ||||
23738 | $last_nonblank_type_sequence, | ||||
23739 | $last_last_nonblank_token, | ||||
23740 | $last_last_nonblank_type, | ||||
23741 | $last_last_nonblank_block_type, | ||||
23742 | $last_last_nonblank_container_type, | ||||
23743 | $last_last_nonblank_type_sequence, | ||||
23744 | $last_nonblank_prototype, | ||||
23745 | ]; | ||||
23746 | return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; | ||||
23747 | } | ||||
23748 | |||||
23749 | sub restore_tokenizer_state { | ||||
23750 | my ($rstate) = @_; | ||||
23751 | my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; | ||||
23752 | ( | ||||
23753 | $block_type, $container_type, $expecting, | ||||
23754 | $i, $i_tok, $input_line, | ||||
23755 | $input_line_number, $last_nonblank_i, $max_token_index, | ||||
23756 | $next_tok, $next_type, $peeked_ahead, | ||||
23757 | $prototype, $rhere_target_list, $rtoken_map, | ||||
23758 | $rtoken_type, $rtokens, $tok, | ||||
23759 | $type, $type_sequence, $indent_flag, | ||||
23760 | ) = @{$rTV1}; | ||||
23761 | |||||
23762 | ( | ||||
23763 | $routput_token_list, $routput_token_type, | ||||
23764 | $routput_block_type, $routput_container_type, | ||||
23765 | $routput_type_sequence, $routput_type_sequence, | ||||
23766 | ) = @{$rTV2}; | ||||
23767 | |||||
23768 | ( | ||||
23769 | $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, | ||||
23770 | $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, | ||||
23771 | ) = @{$rTV3}; | ||||
23772 | |||||
23773 | ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = | ||||
23774 | @{$rTV4}; | ||||
23775 | |||||
23776 | ( | ||||
23777 | $nesting_token_string, $nesting_type_string, | ||||
23778 | $nesting_block_string, $nesting_block_flag, | ||||
23779 | $nesting_list_string, $nesting_list_flag, | ||||
23780 | $ci_string_in_tokenizer, $continuation_string_in_tokenizer, | ||||
23781 | $in_statement_continuation, $level_in_tokenizer, | ||||
23782 | $slevel_in_tokenizer, $rslevel_stack, | ||||
23783 | ) = @{$rTV5}; | ||||
23784 | |||||
23785 | ( | ||||
23786 | $last_nonblank_container_type, | ||||
23787 | $last_nonblank_type_sequence, | ||||
23788 | $last_last_nonblank_token, | ||||
23789 | $last_last_nonblank_type, | ||||
23790 | $last_last_nonblank_block_type, | ||||
23791 | $last_last_nonblank_container_type, | ||||
23792 | $last_last_nonblank_type_sequence, | ||||
23793 | $last_nonblank_prototype, | ||||
23794 | ) = @{$rTV6}; | ||||
23795 | } | ||||
23796 | |||||
23797 | sub get_indentation_level { | ||||
23798 | |||||
23799 | # patch to avoid reporting error if indented if is not terminated | ||||
23800 | if ($indented_if_level) { return $level_in_tokenizer - 1 } | ||||
23801 | return $level_in_tokenizer; | ||||
23802 | } | ||||
23803 | |||||
23804 | sub reset_indentation_level { | ||||
23805 | $level_in_tokenizer = $_[0]; | ||||
23806 | $slevel_in_tokenizer = $_[0]; | ||||
23807 | push @{$rslevel_stack}, $slevel_in_tokenizer; | ||||
23808 | } | ||||
23809 | |||||
23810 | sub peeked_ahead { | ||||
23811 | $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead; | ||||
23812 | } | ||||
23813 | |||||
23814 | # ------------------------------------------------------------ | ||||
23815 | # end of tokenizer variable access and manipulation routines | ||||
23816 | # ------------------------------------------------------------ | ||||
23817 | |||||
23818 | # ------------------------------------------------------------ | ||||
23819 | # beginning of various scanner interface routines | ||||
23820 | # ------------------------------------------------------------ | ||||
23821 | sub scan_replacement_text { | ||||
23822 | |||||
23823 | # check for here-docs in replacement text invoked by | ||||
23824 | # a substitution operator with executable modifier 'e'. | ||||
23825 | # | ||||
23826 | # given: | ||||
23827 | # $replacement_text | ||||
23828 | # return: | ||||
23829 | # $rht = reference to any here-doc targets | ||||
23830 | my ($replacement_text) = @_; | ||||
23831 | |||||
23832 | # quick check | ||||
23833 | return undef unless ( $replacement_text =~ /<</ ); | ||||
23834 | |||||
23835 | write_logfile_entry("scanning replacement text for here-doc targets\n"); | ||||
23836 | |||||
23837 | # save the logger object for error messages | ||||
23838 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
23839 | |||||
23840 | # localize all package variables | ||||
23841 | local ( | ||||
23842 | $tokenizer_self, $last_nonblank_token, | ||||
23843 | $last_nonblank_type, $last_nonblank_block_type, | ||||
23844 | $statement_type, $in_attribute_list, | ||||
23845 | $current_package, $context, | ||||
23846 | %is_constant, %is_user_function, | ||||
23847 | %user_function_prototype, %is_block_function, | ||||
23848 | %is_block_list_function, %saw_function_definition, | ||||
23849 | $brace_depth, $paren_depth, | ||||
23850 | $square_bracket_depth, @current_depth, | ||||
23851 | @total_depth, $total_depth, | ||||
23852 | @nesting_sequence_number, @current_sequence_number, | ||||
23853 | @paren_type, @paren_semicolon_count, | ||||
23854 | @paren_structural_type, @brace_type, | ||||
23855 | @brace_structural_type, @brace_context, | ||||
23856 | @brace_package, @square_bracket_type, | ||||
23857 | @square_bracket_structural_type, @depth_array, | ||||
23858 | @starting_line_of_current_depth, @nested_ternary_flag, | ||||
23859 | @nested_statement_type, | ||||
23860 | ); | ||||
23861 | |||||
23862 | # save all lexical variables | ||||
23863 | my $rstate = save_tokenizer_state(); | ||||
23864 | _decrement_count(); # avoid error check for multiple tokenizers | ||||
23865 | |||||
23866 | # make a new tokenizer | ||||
23867 | my $rOpts = {}; | ||||
23868 | my $rpending_logfile_message; | ||||
23869 | my $source_object = | ||||
23870 | Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, | ||||
23871 | $rpending_logfile_message ); | ||||
23872 | my $tokenizer = Perl::Tidy::Tokenizer->new( | ||||
23873 | source_object => $source_object, | ||||
23874 | logger_object => $logger_object, | ||||
23875 | starting_line_number => $input_line_number, | ||||
23876 | ); | ||||
23877 | |||||
23878 | # scan the replacement text | ||||
23879 | 1 while ( $tokenizer->get_line() ); | ||||
23880 | |||||
23881 | # remove any here doc targets | ||||
23882 | my $rht = undef; | ||||
23883 | if ( $tokenizer_self->{_in_here_doc} ) { | ||||
23884 | $rht = []; | ||||
23885 | push @{$rht}, | ||||
23886 | [ | ||||
23887 | $tokenizer_self->{_here_doc_target}, | ||||
23888 | $tokenizer_self->{_here_quote_character} | ||||
23889 | ]; | ||||
23890 | if ( $tokenizer_self->{_rhere_target_list} ) { | ||||
23891 | push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; | ||||
23892 | $tokenizer_self->{_rhere_target_list} = undef; | ||||
23893 | } | ||||
23894 | $tokenizer_self->{_in_here_doc} = undef; | ||||
23895 | } | ||||
23896 | |||||
23897 | # now its safe to report errors | ||||
23898 | $tokenizer->report_tokenization_errors(); | ||||
23899 | |||||
23900 | # restore all tokenizer lexical variables | ||||
23901 | restore_tokenizer_state($rstate); | ||||
23902 | |||||
23903 | # return the here doc targets | ||||
23904 | return $rht; | ||||
23905 | } | ||||
23906 | |||||
23907 | sub scan_bare_identifier { | ||||
23908 | ( $i, $tok, $type, $prototype ) = | ||||
23909 | scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, | ||||
23910 | $rtoken_map, $max_token_index ); | ||||
23911 | } | ||||
23912 | |||||
23913 | sub scan_identifier { | ||||
23914 | ( $i, $tok, $type, $id_scan_state, $identifier ) = | ||||
23915 | scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, | ||||
23916 | $max_token_index, $expecting ); | ||||
23917 | } | ||||
23918 | |||||
23919 | sub scan_id { | ||||
23920 | ( $i, $tok, $type, $id_scan_state ) = | ||||
23921 | scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, | ||||
23922 | $id_scan_state, $max_token_index ); | ||||
23923 | } | ||||
23924 | |||||
23925 | sub scan_number { | ||||
23926 | my $number; | ||||
23927 | ( $i, $type, $number ) = | ||||
23928 | scan_number_do( $input_line, $i, $rtoken_map, $type, | ||||
23929 | $max_token_index ); | ||||
23930 | return $number; | ||||
23931 | } | ||||
23932 | |||||
23933 | # a sub to warn if token found where term expected | ||||
23934 | sub error_if_expecting_TERM { | ||||
23935 | if ( $expecting == TERM ) { | ||||
23936 | if ( $really_want_term{$last_nonblank_type} ) { | ||||
23937 | unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map, | ||||
23938 | $rtoken_type, $input_line ); | ||||
23939 | 1; | ||||
23940 | } | ||||
23941 | } | ||||
23942 | } | ||||
23943 | |||||
23944 | # a sub to warn if token found where operator expected | ||||
23945 | sub error_if_expecting_OPERATOR { | ||||
23946 | if ( $expecting == OPERATOR ) { | ||||
23947 | my $thing = defined $_[0] ? $_[0] : $tok; | ||||
23948 | unexpected( $thing, "operator", $i_tok, $last_nonblank_i, | ||||
23949 | $rtoken_map, $rtoken_type, $input_line ); | ||||
23950 | if ( $i_tok == 0 ) { | ||||
23951 | interrupt_logfile(); | ||||
23952 | warning("Missing ';' above?\n"); | ||||
23953 | resume_logfile(); | ||||
23954 | } | ||||
23955 | 1; | ||||
23956 | } | ||||
23957 | } | ||||
23958 | |||||
23959 | # ------------------------------------------------------------ | ||||
23960 | # end scanner interfaces | ||||
23961 | # ------------------------------------------------------------ | ||||
23962 | |||||
23963 | my %is_for_foreach; | ||||
23964 | 1 | 900ns | @_ = qw(for foreach); | ||
23965 | 1 | 2µs | @is_for_foreach{@_} = (1) x scalar(@_); | ||
23966 | |||||
23967 | 1 | 0s | my %is_my_our; | ||
23968 | 1 | 1µs | @_ = qw(my our); | ||
23969 | 1 | 1µs | @is_my_our{@_} = (1) x scalar(@_); | ||
23970 | |||||
23971 | # These keywords may introduce blocks after parenthesized expressions, | ||||
23972 | # in the form: | ||||
23973 | # keyword ( .... ) { BLOCK } | ||||
23974 | # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' | ||||
23975 | 1 | 0s | my %is_blocktype_with_paren; | ||
23976 | 1 | 3µs | @_ = qw(if elsif unless while until for foreach switch case given when); | ||
23977 | 1 | 3µs | @is_blocktype_with_paren{@_} = (1) x scalar(@_); | ||
23978 | |||||
23979 | # ------------------------------------------------------------ | ||||
23980 | # begin hash of code for handling most token types | ||||
23981 | # ------------------------------------------------------------ | ||||
23982 | my $tokenization_code = { | ||||
23983 | |||||
23984 | # no special code for these types yet, but syntax checks | ||||
23985 | # could be added | ||||
23986 | |||||
23987 | ## '!' => undef, | ||||
23988 | ## '!=' => undef, | ||||
23989 | ## '!~' => undef, | ||||
23990 | ## '%=' => undef, | ||||
23991 | ## '&&=' => undef, | ||||
23992 | ## '&=' => undef, | ||||
23993 | ## '+=' => undef, | ||||
23994 | ## '-=' => undef, | ||||
23995 | ## '..' => undef, | ||||
23996 | ## '..' => undef, | ||||
23997 | ## '...' => undef, | ||||
23998 | ## '.=' => undef, | ||||
23999 | ## '<<=' => undef, | ||||
24000 | ## '<=' => undef, | ||||
24001 | ## '<=>' => undef, | ||||
24002 | ## '<>' => undef, | ||||
24003 | ## '=' => undef, | ||||
24004 | ## '==' => undef, | ||||
24005 | ## '=~' => undef, | ||||
24006 | ## '>=' => undef, | ||||
24007 | ## '>>' => undef, | ||||
24008 | ## '>>=' => undef, | ||||
24009 | ## '\\' => undef, | ||||
24010 | ## '^=' => undef, | ||||
24011 | ## '|=' => undef, | ||||
24012 | ## '||=' => undef, | ||||
24013 | ## '//=' => undef, | ||||
24014 | ## '~' => undef, | ||||
24015 | ## '~~' => undef, | ||||
24016 | ## '!~~' => undef, | ||||
24017 | |||||
24018 | '>' => sub { | ||||
24019 | error_if_expecting_TERM() | ||||
24020 | if ( $expecting == TERM ); | ||||
24021 | }, | ||||
24022 | '|' => sub { | ||||
24023 | error_if_expecting_TERM() | ||||
24024 | if ( $expecting == TERM ); | ||||
24025 | }, | ||||
24026 | '$' => sub { | ||||
24027 | |||||
24028 | # start looking for a scalar | ||||
24029 | error_if_expecting_OPERATOR("Scalar") | ||||
24030 | if ( $expecting == OPERATOR ); | ||||
24031 | scan_identifier(); | ||||
24032 | |||||
24033 | if ( $identifier eq '$^W' ) { | ||||
24034 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
24035 | } | ||||
24036 | |||||
24037 | # Check for identifier in indirect object slot | ||||
24038 | # (vorboard.pl, sort.t). Something like: | ||||
24039 | # /^(print|printf|sort|exec|system)$/ | ||||
24040 | if ( | ||||
24041 | $is_indirect_object_taker{$last_nonblank_token} | ||||
24042 | |||||
24043 | || ( ( $last_nonblank_token eq '(' ) | ||||
24044 | && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) | ||||
24045 | || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object | ||||
24046 | ) | ||||
24047 | { | ||||
24048 | $type = 'Z'; | ||||
24049 | } | ||||
24050 | }, | ||||
24051 | '(' => sub { | ||||
24052 | |||||
24053 | ++$paren_depth; | ||||
24054 | $paren_semicolon_count[$paren_depth] = 0; | ||||
24055 | if ($want_paren) { | ||||
24056 | $container_type = $want_paren; | ||||
24057 | $want_paren = ""; | ||||
24058 | } | ||||
24059 | else { | ||||
24060 | $container_type = $last_nonblank_token; | ||||
24061 | |||||
24062 | # We can check for a syntax error here of unexpected '(', | ||||
24063 | # but this is going to get messy... | ||||
24064 | if ( | ||||
24065 | $expecting == OPERATOR | ||||
24066 | |||||
24067 | # be sure this is not a method call of the form | ||||
24068 | # &method(...), $method->(..), &{method}(...), | ||||
24069 | # $ref[2](list) is ok & short for $ref[2]->(list) | ||||
24070 | # NOTE: at present, braces in something like &{ xxx } | ||||
24071 | # are not marked as a block, we might have a method call | ||||
24072 | && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ | ||||
24073 | |||||
24074 | ) | ||||
24075 | { | ||||
24076 | |||||
24077 | # ref: camel 3 p 703. | ||||
24078 | if ( $last_last_nonblank_token eq 'do' ) { | ||||
24079 | complain( | ||||
24080 | "do SUBROUTINE is deprecated; consider & or -> notation\n" | ||||
24081 | ); | ||||
24082 | } | ||||
24083 | else { | ||||
24084 | |||||
24085 | # if this is an empty list, (), then it is not an | ||||
24086 | # error; for example, we might have a constant pi and | ||||
24087 | # invoke it with pi() or just pi; | ||||
24088 | my ( $next_nonblank_token, $i_next ) = | ||||
24089 | find_next_nonblank_token( $i, $rtokens, | ||||
24090 | $max_token_index ); | ||||
24091 | if ( $next_nonblank_token ne ')' ) { | ||||
24092 | my $hint; | ||||
24093 | error_if_expecting_OPERATOR('('); | ||||
24094 | |||||
24095 | if ( $last_nonblank_type eq 'C' ) { | ||||
24096 | $hint = | ||||
24097 | "$last_nonblank_token has a void prototype\n"; | ||||
24098 | } | ||||
24099 | elsif ( $last_nonblank_type eq 'i' ) { | ||||
24100 | if ( $i_tok > 0 | ||||
24101 | && $last_nonblank_token =~ /^\$/ ) | ||||
24102 | { | ||||
24103 | $hint = | ||||
24104 | "Do you mean '$last_nonblank_token->(' ?\n"; | ||||
24105 | } | ||||
24106 | } | ||||
24107 | if ($hint) { | ||||
24108 | interrupt_logfile(); | ||||
24109 | warning($hint); | ||||
24110 | resume_logfile(); | ||||
24111 | } | ||||
24112 | } ## end if ( $next_nonblank_token... | ||||
24113 | } ## end else [ if ( $last_last_nonblank_token... | ||||
24114 | } ## end if ( $expecting == OPERATOR... | ||||
24115 | } | ||||
24116 | $paren_type[$paren_depth] = $container_type; | ||||
24117 | ( $type_sequence, $indent_flag ) = | ||||
24118 | increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); | ||||
24119 | |||||
24120 | # propagate types down through nested parens | ||||
24121 | # for example: the second paren in 'if ((' would be structural | ||||
24122 | # since the first is. | ||||
24123 | |||||
24124 | if ( $last_nonblank_token eq '(' ) { | ||||
24125 | $type = $last_nonblank_type; | ||||
24126 | } | ||||
24127 | |||||
24128 | # We exclude parens as structural after a ',' because it | ||||
24129 | # causes subtle problems with continuation indentation for | ||||
24130 | # something like this, where the first 'or' will not get | ||||
24131 | # indented. | ||||
24132 | # | ||||
24133 | # assert( | ||||
24134 | # __LINE__, | ||||
24135 | # ( not defined $check ) | ||||
24136 | # or ref $check | ||||
24137 | # or $check eq "new" | ||||
24138 | # or $check eq "old", | ||||
24139 | # ); | ||||
24140 | # | ||||
24141 | # Likewise, we exclude parens where a statement can start | ||||
24142 | # because of problems with continuation indentation, like | ||||
24143 | # these: | ||||
24144 | # | ||||
24145 | # ($firstline =~ /^#\!.*perl/) | ||||
24146 | # and (print $File::Find::name, "\n") | ||||
24147 | # and (return 1); | ||||
24148 | # | ||||
24149 | # (ref($usage_fref) =~ /CODE/) | ||||
24150 | # ? &$usage_fref | ||||
24151 | # : (&blast_usage, &blast_params, &blast_general_params); | ||||
24152 | |||||
24153 | else { | ||||
24154 | $type = '{'; | ||||
24155 | } | ||||
24156 | |||||
24157 | if ( $last_nonblank_type eq ')' ) { | ||||
24158 | warning( | ||||
24159 | "Syntax error? found token '$last_nonblank_type' then '('\n" | ||||
24160 | ); | ||||
24161 | } | ||||
24162 | $paren_structural_type[$paren_depth] = $type; | ||||
24163 | |||||
24164 | }, | ||||
24165 | ')' => sub { | ||||
24166 | ( $type_sequence, $indent_flag ) = | ||||
24167 | decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); | ||||
24168 | |||||
24169 | if ( $paren_structural_type[$paren_depth] eq '{' ) { | ||||
24170 | $type = '}'; | ||||
24171 | } | ||||
24172 | |||||
24173 | $container_type = $paren_type[$paren_depth]; | ||||
24174 | |||||
24175 | # /^(for|foreach)$/ | ||||
24176 | if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { | ||||
24177 | my $num_sc = $paren_semicolon_count[$paren_depth]; | ||||
24178 | if ( $num_sc > 0 && $num_sc != 2 ) { | ||||
24179 | warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); | ||||
24180 | } | ||||
24181 | } | ||||
24182 | |||||
24183 | if ( $paren_depth > 0 ) { $paren_depth-- } | ||||
24184 | }, | ||||
24185 | ',' => sub { | ||||
24186 | if ( $last_nonblank_type eq ',' ) { | ||||
24187 | complain("Repeated ','s \n"); | ||||
24188 | } | ||||
24189 | |||||
24190 | # patch for operator_expected: note if we are in the list (use.t) | ||||
24191 | if ( $statement_type eq 'use' ) { $statement_type = '_use' } | ||||
24192 | ## FIXME: need to move this elsewhere, perhaps check after a '(' | ||||
24193 | ## elsif ($last_nonblank_token eq '(') { | ||||
24194 | ## warning("Leading ','s illegal in some versions of perl\n"); | ||||
24195 | ## } | ||||
24196 | }, | ||||
24197 | ';' => sub { | ||||
24198 | $context = UNKNOWN_CONTEXT; | ||||
24199 | $statement_type = ''; | ||||
24200 | $want_paren = ""; | ||||
24201 | |||||
24202 | # /^(for|foreach)$/ | ||||
24203 | if ( $is_for_foreach{ $paren_type[$paren_depth] } ) | ||||
24204 | { # mark ; in for loop | ||||
24205 | |||||
24206 | # Be careful: we do not want a semicolon such as the | ||||
24207 | # following to be included: | ||||
24208 | # | ||||
24209 | # for (sort {strcoll($a,$b);} keys %investments) { | ||||
24210 | |||||
24211 | if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] | ||||
24212 | && $square_bracket_depth == | ||||
24213 | $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) | ||||
24214 | { | ||||
24215 | |||||
24216 | $type = 'f'; | ||||
24217 | $paren_semicolon_count[$paren_depth]++; | ||||
24218 | } | ||||
24219 | } | ||||
24220 | |||||
24221 | }, | ||||
24222 | '"' => sub { | ||||
24223 | error_if_expecting_OPERATOR("String") | ||||
24224 | if ( $expecting == OPERATOR ); | ||||
24225 | $in_quote = 1; | ||||
24226 | $type = 'Q'; | ||||
24227 | $allowed_quote_modifiers = ""; | ||||
24228 | }, | ||||
24229 | "'" => sub { | ||||
24230 | error_if_expecting_OPERATOR("String") | ||||
24231 | if ( $expecting == OPERATOR ); | ||||
24232 | $in_quote = 1; | ||||
24233 | $type = 'Q'; | ||||
24234 | $allowed_quote_modifiers = ""; | ||||
24235 | }, | ||||
24236 | '`' => sub { | ||||
24237 | error_if_expecting_OPERATOR("String") | ||||
24238 | if ( $expecting == OPERATOR ); | ||||
24239 | $in_quote = 1; | ||||
24240 | $type = 'Q'; | ||||
24241 | $allowed_quote_modifiers = ""; | ||||
24242 | }, | ||||
24243 | '/' => sub { | ||||
24244 | my $is_pattern; | ||||
24245 | |||||
24246 | if ( $expecting == UNKNOWN ) { # indeterminate, must guess.. | ||||
24247 | my $msg; | ||||
24248 | ( $is_pattern, $msg ) = | ||||
24249 | guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, | ||||
24250 | $max_token_index ); | ||||
24251 | |||||
24252 | if ($msg) { | ||||
24253 | write_diagnostics("DIVIDE:$msg\n"); | ||||
24254 | write_logfile_entry($msg); | ||||
24255 | } | ||||
24256 | } | ||||
24257 | else { $is_pattern = ( $expecting == TERM ) } | ||||
24258 | |||||
24259 | if ($is_pattern) { | ||||
24260 | $in_quote = 1; | ||||
24261 | $type = 'Q'; | ||||
24262 | $allowed_quote_modifiers = '[msixpodualgc]'; | ||||
24263 | } | ||||
24264 | else { # not a pattern; check for a /= token | ||||
24265 | |||||
24266 | if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= | ||||
24267 | $i++; | ||||
24268 | $tok = '/='; | ||||
24269 | $type = $tok; | ||||
24270 | } | ||||
24271 | |||||
24272 | #DEBUG - collecting info on what tokens follow a divide | ||||
24273 | # for development of guessing algorithm | ||||
24274 | #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { | ||||
24275 | # #write_diagnostics( "DIVIDE? $input_line\n" ); | ||||
24276 | #} | ||||
24277 | } | ||||
24278 | }, | ||||
24279 | '{' => sub { | ||||
24280 | |||||
24281 | # if we just saw a ')', we will label this block with | ||||
24282 | # its type. We need to do this to allow sub | ||||
24283 | # code_block_type to determine if this brace starts a | ||||
24284 | # code block or anonymous hash. (The type of a paren | ||||
24285 | # pair is the preceding token, such as 'if', 'else', | ||||
24286 | # etc). | ||||
24287 | $container_type = ""; | ||||
24288 | |||||
24289 | # ATTRS: for a '{' following an attribute list, reset | ||||
24290 | # things to look like we just saw the sub name | ||||
24291 | if ( $statement_type =~ /^sub/ ) { | ||||
24292 | $last_nonblank_token = $statement_type; | ||||
24293 | $last_nonblank_type = 'i'; | ||||
24294 | $statement_type = ""; | ||||
24295 | } | ||||
24296 | |||||
24297 | # patch for SWITCH/CASE: hide these keywords from an immediately | ||||
24298 | # following opening brace | ||||
24299 | elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) | ||||
24300 | && $statement_type eq $last_nonblank_token ) | ||||
24301 | { | ||||
24302 | $last_nonblank_token = ";"; | ||||
24303 | } | ||||
24304 | |||||
24305 | elsif ( $last_nonblank_token eq ')' ) { | ||||
24306 | $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; | ||||
24307 | |||||
24308 | # defensive move in case of a nesting error (pbug.t) | ||||
24309 | # in which this ')' had no previous '(' | ||||
24310 | # this nesting error will have been caught | ||||
24311 | if ( !defined($last_nonblank_token) ) { | ||||
24312 | $last_nonblank_token = 'if'; | ||||
24313 | } | ||||
24314 | |||||
24315 | # check for syntax error here; | ||||
24316 | unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { | ||||
24317 | my $list = join( ' ', sort keys %is_blocktype_with_paren ); | ||||
24318 | warning( | ||||
24319 | "syntax error at ') {', didn't see one of: $list\n"); | ||||
24320 | } | ||||
24321 | } | ||||
24322 | |||||
24323 | # patch for paren-less for/foreach glitch, part 2. | ||||
24324 | # see note below under 'qw' | ||||
24325 | elsif ($last_nonblank_token eq 'qw' | ||||
24326 | && $is_for_foreach{$want_paren} ) | ||||
24327 | { | ||||
24328 | $last_nonblank_token = $want_paren; | ||||
24329 | if ( $last_last_nonblank_token eq $want_paren ) { | ||||
24330 | warning( | ||||
24331 | "syntax error at '$want_paren .. {' -- missing \$ loop variable\n" | ||||
24332 | ); | ||||
24333 | |||||
24334 | } | ||||
24335 | $want_paren = ""; | ||||
24336 | } | ||||
24337 | |||||
24338 | # now identify which of the three possible types of | ||||
24339 | # curly braces we have: hash index container, anonymous | ||||
24340 | # hash reference, or code block. | ||||
24341 | |||||
24342 | # non-structural (hash index) curly brace pair | ||||
24343 | # get marked 'L' and 'R' | ||||
24344 | if ( is_non_structural_brace() ) { | ||||
24345 | $type = 'L'; | ||||
24346 | |||||
24347 | # patch for SWITCH/CASE: | ||||
24348 | # allow paren-less identifier after 'when' | ||||
24349 | # if the brace is preceded by a space | ||||
24350 | if ( $statement_type eq 'when' | ||||
24351 | && $last_nonblank_type eq 'i' | ||||
24352 | && $last_last_nonblank_type eq 'k' | ||||
24353 | && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) | ||||
24354 | { | ||||
24355 | $type = '{'; | ||||
24356 | $block_type = $statement_type; | ||||
24357 | } | ||||
24358 | } | ||||
24359 | |||||
24360 | # code and anonymous hash have the same type, '{', but are | ||||
24361 | # distinguished by 'block_type', | ||||
24362 | # which will be blank for an anonymous hash | ||||
24363 | else { | ||||
24364 | |||||
24365 | $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, | ||||
24366 | $max_token_index ); | ||||
24367 | |||||
24368 | # remember a preceding smartmatch operator | ||||
24369 | ## SMARTMATCH | ||||
24370 | ##if ( $last_nonblank_type eq '~~' ) { | ||||
24371 | ## $block_type = $last_nonblank_type; | ||||
24372 | ##} | ||||
24373 | |||||
24374 | # patch to promote bareword type to function taking block | ||||
24375 | if ( $block_type | ||||
24376 | && $last_nonblank_type eq 'w' | ||||
24377 | && $last_nonblank_i >= 0 ) | ||||
24378 | { | ||||
24379 | if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { | ||||
24380 | $routput_token_type->[$last_nonblank_i] = 'G'; | ||||
24381 | } | ||||
24382 | } | ||||
24383 | |||||
24384 | # patch for SWITCH/CASE: if we find a stray opening block brace | ||||
24385 | # where we might accept a 'case' or 'when' block, then take it | ||||
24386 | if ( $statement_type eq 'case' | ||||
24387 | || $statement_type eq 'when' ) | ||||
24388 | { | ||||
24389 | if ( !$block_type || $block_type eq '}' ) { | ||||
24390 | $block_type = $statement_type; | ||||
24391 | } | ||||
24392 | } | ||||
24393 | } | ||||
24394 | $brace_type[ ++$brace_depth ] = $block_type; | ||||
24395 | $brace_package[$brace_depth] = $current_package; | ||||
24396 | $brace_structural_type[$brace_depth] = $type; | ||||
24397 | $brace_context[$brace_depth] = $context; | ||||
24398 | ( $type_sequence, $indent_flag ) = | ||||
24399 | increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); | ||||
24400 | }, | ||||
24401 | '}' => sub { | ||||
24402 | $block_type = $brace_type[$brace_depth]; | ||||
24403 | if ($block_type) { $statement_type = '' } | ||||
24404 | if ( defined( $brace_package[$brace_depth] ) ) { | ||||
24405 | $current_package = $brace_package[$brace_depth]; | ||||
24406 | } | ||||
24407 | |||||
24408 | # can happen on brace error (caught elsewhere) | ||||
24409 | else { | ||||
24410 | } | ||||
24411 | ( $type_sequence, $indent_flag ) = | ||||
24412 | decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); | ||||
24413 | |||||
24414 | if ( $brace_structural_type[$brace_depth] eq 'L' ) { | ||||
24415 | $type = 'R'; | ||||
24416 | } | ||||
24417 | |||||
24418 | # propagate type information for 'do' and 'eval' blocks, and also | ||||
24419 | # for smartmatch operator. This is necessary to enable us to know | ||||
24420 | # if an operator or term is expected next. | ||||
24421 | ## SMARTMATCH | ||||
24422 | ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) { | ||||
24423 | if ( $is_block_operator{$block_type} ) { | ||||
24424 | $tok = $block_type; | ||||
24425 | } | ||||
24426 | |||||
24427 | $context = $brace_context[$brace_depth]; | ||||
24428 | if ( $brace_depth > 0 ) { $brace_depth--; } | ||||
24429 | }, | ||||
24430 | '&' => sub { # maybe sub call? start looking | ||||
24431 | |||||
24432 | # We have to check for sub call unless we are sure we | ||||
24433 | # are expecting an operator. This example from s2p | ||||
24434 | # got mistaken as a q operator in an early version: | ||||
24435 | # print BODY &q(<<'EOT'); | ||||
24436 | if ( $expecting != OPERATOR ) { | ||||
24437 | |||||
24438 | # But only look for a sub call if we are expecting a term or | ||||
24439 | # if there is no existing space after the &. | ||||
24440 | # For example we probably don't want & as sub call here: | ||||
24441 | # Fcntl::S_IRUSR & $mode; | ||||
24442 | if ( $expecting == TERM || $next_type ne 'b' ) { | ||||
24443 | scan_identifier(); | ||||
24444 | } | ||||
24445 | } | ||||
24446 | else { | ||||
24447 | } | ||||
24448 | }, | ||||
24449 | '<' => sub { # angle operator or less than? | ||||
24450 | |||||
24451 | if ( $expecting != OPERATOR ) { | ||||
24452 | ( $i, $type ) = | ||||
24453 | find_angle_operator_termination( $input_line, $i, $rtoken_map, | ||||
24454 | $expecting, $max_token_index ); | ||||
24455 | |||||
24456 | if ( $type eq '<' && $expecting == TERM ) { | ||||
24457 | error_if_expecting_TERM(); | ||||
24458 | interrupt_logfile(); | ||||
24459 | warning("Unterminated <> operator?\n"); | ||||
24460 | resume_logfile(); | ||||
24461 | } | ||||
24462 | } | ||||
24463 | else { | ||||
24464 | } | ||||
24465 | }, | ||||
24466 | '?' => sub { # ?: conditional or starting pattern? | ||||
24467 | |||||
24468 | my $is_pattern; | ||||
24469 | |||||
24470 | if ( $expecting == UNKNOWN ) { | ||||
24471 | |||||
24472 | my $msg; | ||||
24473 | ( $is_pattern, $msg ) = | ||||
24474 | guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, | ||||
24475 | $max_token_index ); | ||||
24476 | |||||
24477 | if ($msg) { write_logfile_entry($msg) } | ||||
24478 | } | ||||
24479 | else { $is_pattern = ( $expecting == TERM ) } | ||||
24480 | |||||
24481 | if ($is_pattern) { | ||||
24482 | $in_quote = 1; | ||||
24483 | $type = 'Q'; | ||||
24484 | $allowed_quote_modifiers = '[msixpodualgc]'; | ||||
24485 | } | ||||
24486 | else { | ||||
24487 | ( $type_sequence, $indent_flag ) = | ||||
24488 | increase_nesting_depth( QUESTION_COLON, | ||||
24489 | $$rtoken_map[$i_tok] ); | ||||
24490 | } | ||||
24491 | }, | ||||
24492 | '*' => sub { # typeglob, or multiply? | ||||
24493 | |||||
24494 | if ( $expecting == TERM ) { | ||||
24495 | scan_identifier(); | ||||
24496 | } | ||||
24497 | else { | ||||
24498 | |||||
24499 | if ( $$rtokens[ $i + 1 ] eq '=' ) { | ||||
24500 | $tok = '*='; | ||||
24501 | $type = $tok; | ||||
24502 | $i++; | ||||
24503 | } | ||||
24504 | elsif ( $$rtokens[ $i + 1 ] eq '*' ) { | ||||
24505 | $tok = '**'; | ||||
24506 | $type = $tok; | ||||
24507 | $i++; | ||||
24508 | if ( $$rtokens[ $i + 1 ] eq '=' ) { | ||||
24509 | $tok = '**='; | ||||
24510 | $type = $tok; | ||||
24511 | $i++; | ||||
24512 | } | ||||
24513 | } | ||||
24514 | } | ||||
24515 | }, | ||||
24516 | '.' => sub { # what kind of . ? | ||||
24517 | |||||
24518 | if ( $expecting != OPERATOR ) { | ||||
24519 | scan_number(); | ||||
24520 | if ( $type eq '.' ) { | ||||
24521 | error_if_expecting_TERM() | ||||
24522 | if ( $expecting == TERM ); | ||||
24523 | } | ||||
24524 | } | ||||
24525 | else { | ||||
24526 | } | ||||
24527 | }, | ||||
24528 | ':' => sub { | ||||
24529 | |||||
24530 | # if this is the first nonblank character, call it a label | ||||
24531 | # since perl seems to just swallow it | ||||
24532 | if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { | ||||
24533 | $type = 'J'; | ||||
24534 | } | ||||
24535 | |||||
24536 | # ATTRS: check for a ':' which introduces an attribute list | ||||
24537 | # (this might eventually get its own token type) | ||||
24538 | elsif ( $statement_type =~ /^sub/ ) { | ||||
24539 | $type = 'A'; | ||||
24540 | $in_attribute_list = 1; | ||||
24541 | } | ||||
24542 | |||||
24543 | # check for scalar attribute, such as | ||||
24544 | # my $foo : shared = 1; | ||||
24545 | elsif ($is_my_our{$statement_type} | ||||
24546 | && $current_depth[QUESTION_COLON] == 0 ) | ||||
24547 | { | ||||
24548 | $type = 'A'; | ||||
24549 | $in_attribute_list = 1; | ||||
24550 | } | ||||
24551 | |||||
24552 | # otherwise, it should be part of a ?/: operator | ||||
24553 | else { | ||||
24554 | ( $type_sequence, $indent_flag ) = | ||||
24555 | decrease_nesting_depth( QUESTION_COLON, | ||||
24556 | $$rtoken_map[$i_tok] ); | ||||
24557 | if ( $last_nonblank_token eq '?' ) { | ||||
24558 | warning("Syntax error near ? :\n"); | ||||
24559 | } | ||||
24560 | } | ||||
24561 | }, | ||||
24562 | '+' => sub { # what kind of plus? | ||||
24563 | |||||
24564 | if ( $expecting == TERM ) { | ||||
24565 | my $number = scan_number(); | ||||
24566 | |||||
24567 | # unary plus is safest assumption if not a number | ||||
24568 | if ( !defined($number) ) { $type = 'p'; } | ||||
24569 | } | ||||
24570 | elsif ( $expecting == OPERATOR ) { | ||||
24571 | } | ||||
24572 | else { | ||||
24573 | if ( $next_type eq 'w' ) { $type = 'p' } | ||||
24574 | } | ||||
24575 | }, | ||||
24576 | '@' => sub { | ||||
24577 | |||||
24578 | error_if_expecting_OPERATOR("Array") | ||||
24579 | if ( $expecting == OPERATOR ); | ||||
24580 | scan_identifier(); | ||||
24581 | }, | ||||
24582 | '%' => sub { # hash or modulo? | ||||
24583 | |||||
24584 | # first guess is hash if no following blank | ||||
24585 | if ( $expecting == UNKNOWN ) { | ||||
24586 | if ( $next_type ne 'b' ) { $expecting = TERM } | ||||
24587 | } | ||||
24588 | if ( $expecting == TERM ) { | ||||
24589 | scan_identifier(); | ||||
24590 | } | ||||
24591 | }, | ||||
24592 | '[' => sub { | ||||
24593 | $square_bracket_type[ ++$square_bracket_depth ] = | ||||
24594 | $last_nonblank_token; | ||||
24595 | ( $type_sequence, $indent_flag ) = | ||||
24596 | increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); | ||||
24597 | |||||
24598 | # It may seem odd, but structural square brackets have | ||||
24599 | # type '{' and '}'. This simplifies the indentation logic. | ||||
24600 | if ( !is_non_structural_brace() ) { | ||||
24601 | $type = '{'; | ||||
24602 | } | ||||
24603 | $square_bracket_structural_type[$square_bracket_depth] = $type; | ||||
24604 | }, | ||||
24605 | ']' => sub { | ||||
24606 | ( $type_sequence, $indent_flag ) = | ||||
24607 | decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); | ||||
24608 | |||||
24609 | if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) | ||||
24610 | { | ||||
24611 | $type = '}'; | ||||
24612 | } | ||||
24613 | |||||
24614 | # propagate type information for smartmatch operator. This is | ||||
24615 | # necessary to enable us to know if an operator or term is expected | ||||
24616 | # next. | ||||
24617 | if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { | ||||
24618 | $tok = $square_bracket_type[$square_bracket_depth]; | ||||
24619 | } | ||||
24620 | |||||
24621 | if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } | ||||
24622 | }, | ||||
24623 | '-' => sub { # what kind of minus? | ||||
24624 | |||||
24625 | if ( ( $expecting != OPERATOR ) | ||||
24626 | && $is_file_test_operator{$next_tok} ) | ||||
24627 | { | ||||
24628 | my ( $next_nonblank_token, $i_next ) = | ||||
24629 | find_next_nonblank_token( $i + 1, $rtokens, | ||||
24630 | $max_token_index ); | ||||
24631 | |||||
24632 | # check for a quoted word like "-w=>xx"; | ||||
24633 | # it is sufficient to just check for a following '=' | ||||
24634 | if ( $next_nonblank_token eq '=' ) { | ||||
24635 | $type = 'm'; | ||||
24636 | } | ||||
24637 | else { | ||||
24638 | $i++; | ||||
24639 | $tok .= $next_tok; | ||||
24640 | $type = 'F'; | ||||
24641 | } | ||||
24642 | } | ||||
24643 | elsif ( $expecting == TERM ) { | ||||
24644 | my $number = scan_number(); | ||||
24645 | |||||
24646 | # maybe part of bareword token? unary is safest | ||||
24647 | if ( !defined($number) ) { $type = 'm'; } | ||||
24648 | |||||
24649 | } | ||||
24650 | elsif ( $expecting == OPERATOR ) { | ||||
24651 | } | ||||
24652 | else { | ||||
24653 | |||||
24654 | if ( $next_type eq 'w' ) { | ||||
24655 | $type = 'm'; | ||||
24656 | } | ||||
24657 | } | ||||
24658 | }, | ||||
24659 | |||||
24660 | '^' => sub { | ||||
24661 | |||||
24662 | # check for special variables like ${^WARNING_BITS} | ||||
24663 | if ( $expecting == TERM ) { | ||||
24664 | |||||
24665 | # FIXME: this should work but will not catch errors | ||||
24666 | # because we also have to be sure that previous token is | ||||
24667 | # a type character ($,@,%). | ||||
24668 | if ( $last_nonblank_token eq '{' | ||||
24669 | && ( $next_tok =~ /^[A-Za-z_]/ ) ) | ||||
24670 | { | ||||
24671 | |||||
24672 | if ( $next_tok eq 'W' ) { | ||||
24673 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
24674 | } | ||||
24675 | $tok = $tok . $next_tok; | ||||
24676 | $i = $i + 1; | ||||
24677 | $type = 'w'; | ||||
24678 | } | ||||
24679 | |||||
24680 | else { | ||||
24681 | unless ( error_if_expecting_TERM() ) { | ||||
24682 | |||||
24683 | # Something like this is valid but strange: | ||||
24684 | # undef ^I; | ||||
24685 | complain("The '^' seems unusual here\n"); | ||||
24686 | } | ||||
24687 | } | ||||
24688 | } | ||||
24689 | }, | ||||
24690 | |||||
24691 | '::' => sub { # probably a sub call | ||||
24692 | scan_bare_identifier(); | ||||
24693 | }, | ||||
24694 | '<<' => sub { # maybe a here-doc? | ||||
24695 | return | ||||
24696 | unless ( $i < $max_token_index ) | ||||
24697 | ; # here-doc not possible if end of line | ||||
24698 | |||||
24699 | if ( $expecting != OPERATOR ) { | ||||
24700 | my ( $found_target, $here_doc_target, $here_quote_character, | ||||
24701 | $saw_error ); | ||||
24702 | ( | ||||
24703 | $found_target, $here_doc_target, $here_quote_character, $i, | ||||
24704 | $saw_error | ||||
24705 | ) | ||||
24706 | = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, | ||||
24707 | $max_token_index ); | ||||
24708 | |||||
24709 | if ($found_target) { | ||||
24710 | push @{$rhere_target_list}, | ||||
24711 | [ $here_doc_target, $here_quote_character ]; | ||||
24712 | $type = 'h'; | ||||
24713 | if ( length($here_doc_target) > 80 ) { | ||||
24714 | my $truncated = substr( $here_doc_target, 0, 80 ); | ||||
24715 | complain("Long here-target: '$truncated' ...\n"); | ||||
24716 | } | ||||
24717 | elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { | ||||
24718 | complain( | ||||
24719 | "Unconventional here-target: '$here_doc_target'\n" | ||||
24720 | ); | ||||
24721 | } | ||||
24722 | } | ||||
24723 | elsif ( $expecting == TERM ) { | ||||
24724 | unless ($saw_error) { | ||||
24725 | |||||
24726 | # shouldn't happen.. | ||||
24727 | warning("Program bug; didn't find here doc target\n"); | ||||
24728 | report_definite_bug(); | ||||
24729 | } | ||||
24730 | } | ||||
24731 | } | ||||
24732 | else { | ||||
24733 | } | ||||
24734 | }, | ||||
24735 | '->' => sub { | ||||
24736 | |||||
24737 | # if -> points to a bare word, we must scan for an identifier, | ||||
24738 | # otherwise something like ->y would look like the y operator | ||||
24739 | scan_identifier(); | ||||
24740 | }, | ||||
24741 | |||||
24742 | # type = 'pp' for pre-increment, '++' for post-increment | ||||
24743 | '++' => sub { | ||||
24744 | if ( $expecting == TERM ) { $type = 'pp' } | ||||
24745 | elsif ( $expecting == UNKNOWN ) { | ||||
24746 | my ( $next_nonblank_token, $i_next ) = | ||||
24747 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
24748 | if ( $next_nonblank_token eq '$' ) { $type = 'pp' } | ||||
24749 | } | ||||
24750 | }, | ||||
24751 | |||||
24752 | '=>' => sub { | ||||
24753 | if ( $last_nonblank_type eq $tok ) { | ||||
24754 | complain("Repeated '=>'s \n"); | ||||
24755 | } | ||||
24756 | |||||
24757 | # patch for operator_expected: note if we are in the list (use.t) | ||||
24758 | # TODO: make version numbers a new token type | ||||
24759 | if ( $statement_type eq 'use' ) { $statement_type = '_use' } | ||||
24760 | }, | ||||
24761 | |||||
24762 | # type = 'mm' for pre-decrement, '--' for post-decrement | ||||
24763 | '--' => sub { | ||||
24764 | |||||
24765 | if ( $expecting == TERM ) { $type = 'mm' } | ||||
24766 | elsif ( $expecting == UNKNOWN ) { | ||||
24767 | my ( $next_nonblank_token, $i_next ) = | ||||
24768 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
24769 | if ( $next_nonblank_token eq '$' ) { $type = 'mm' } | ||||
24770 | } | ||||
24771 | }, | ||||
24772 | |||||
24773 | '&&' => sub { | ||||
24774 | error_if_expecting_TERM() | ||||
24775 | if ( $expecting == TERM ); | ||||
24776 | }, | ||||
24777 | |||||
24778 | '||' => sub { | ||||
24779 | error_if_expecting_TERM() | ||||
24780 | if ( $expecting == TERM ); | ||||
24781 | }, | ||||
24782 | |||||
24783 | '//' => sub { | ||||
24784 | error_if_expecting_TERM() | ||||
24785 | if ( $expecting == TERM ); | ||||
24786 | }, | ||||
24787 | 1 | 46µs | }; | ||
24788 | |||||
24789 | # ------------------------------------------------------------ | ||||
24790 | # end hash of code for handling individual token types | ||||
24791 | # ------------------------------------------------------------ | ||||
24792 | |||||
24793 | 1 | 1µs | my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); | ||
24794 | |||||
24795 | # These block types terminate statements and do not need a trailing | ||||
24796 | # semicolon | ||||
24797 | # patched for SWITCH/CASE/ | ||||
24798 | 1 | 100ns | my %is_zero_continuation_block_type; | ||
24799 | 1 | 6µs | @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; | ||
24800 | if elsif else unless while until for foreach switch case given when); | ||||
24801 | 1 | 5µs | @is_zero_continuation_block_type{@_} = (1) x scalar(@_); | ||
24802 | |||||
24803 | 1 | 0s | my %is_not_zero_continuation_block_type; | ||
24804 | 1 | 3µs | @_ = qw(sort grep map do eval); | ||
24805 | 1 | 1µs | @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); | ||
24806 | |||||
24807 | 1 | 0s | my %is_logical_container; | ||
24808 | 1 | 3µs | @_ = qw(if elsif unless while and or err not && ! || for foreach); | ||
24809 | 1 | 3µs | @is_logical_container{@_} = (1) x scalar(@_); | ||
24810 | |||||
24811 | 1 | 100ns | my %is_binary_type; | ||
24812 | 1 | 2µs | @_ = qw(|| &&); | ||
24813 | 1 | 800ns | @is_binary_type{@_} = (1) x scalar(@_); | ||
24814 | |||||
24815 | 1 | 0s | my %is_binary_keyword; | ||
24816 | 1 | 1µs | @_ = qw(and or err eq ne cmp); | ||
24817 | 1 | 1µs | @is_binary_keyword{@_} = (1) x scalar(@_); | ||
24818 | |||||
24819 | # 'L' is token for opening { at hash key | ||||
24820 | 1 | 0s | my %is_opening_type; | ||
24821 | 1 | 1µs | @_ = qw" L { ( [ "; | ||
24822 | 1 | 900ns | @is_opening_type{@_} = (1) x scalar(@_); | ||
24823 | |||||
24824 | # 'R' is token for closing } at hash key | ||||
24825 | 1 | 0s | my %is_closing_type; | ||
24826 | 1 | 1µs | @_ = qw" R } ) ] "; | ||
24827 | 1 | 900ns | @is_closing_type{@_} = (1) x scalar(@_); | ||
24828 | |||||
24829 | 1 | 100ns | my %is_redo_last_next_goto; | ||
24830 | 1 | 1µs | @_ = qw(redo last next goto); | ||
24831 | 1 | 1µs | @is_redo_last_next_goto{@_} = (1) x scalar(@_); | ||
24832 | |||||
24833 | 1 | 0s | my %is_use_require; | ||
24834 | 1 | 800ns | @_ = qw(use require); | ||
24835 | 1 | 600ns | @is_use_require{@_} = (1) x scalar(@_); | ||
24836 | |||||
24837 | 1 | 0s | my %is_sub_package; | ||
24838 | 1 | 700ns | @_ = qw(sub package); | ||
24839 | 1 | 700ns | @is_sub_package{@_} = (1) x scalar(@_); | ||
24840 | |||||
24841 | # This hash holds the hash key in $tokenizer_self for these keywords: | ||||
24842 | 1 | 2µs | my %is_format_END_DATA = ( | ||
24843 | 'format' => '_in_format', | ||||
24844 | '__END__' => '_in_end', | ||||
24845 | '__DATA__' => '_in_data', | ||||
24846 | ); | ||||
24847 | |||||
24848 | # ref: camel 3 p 147, | ||||
24849 | # but perl may accept undocumented flags | ||||
24850 | # perl 5.10 adds 'p' (preserve) | ||||
24851 | # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these: | ||||
24852 | # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc | ||||
24853 | # s/PATTERN/REPLACEMENT/msixpodualgcer | ||||
24854 | # y/SEARCHLIST/REPLACEMENTLIST/cdsr | ||||
24855 | # tr/SEARCHLIST/REPLACEMENTLIST/cdsr | ||||
24856 | # qr/STRING/msixpodual | ||||
24857 | 1 | 3µs | my %quote_modifiers = ( | ||
24858 | 's' => '[msixpodualgcer]', | ||||
24859 | 'y' => '[cdsr]', | ||||
24860 | 'tr' => '[cdsr]', | ||||
24861 | 'm' => '[msixpodualgc]', | ||||
24862 | 'qr' => '[msixpodual]', | ||||
24863 | 'q' => "", | ||||
24864 | 'qq' => "", | ||||
24865 | 'qw' => "", | ||||
24866 | 'qx' => "", | ||||
24867 | ); | ||||
24868 | |||||
24869 | # table showing how many quoted things to look for after quote operator.. | ||||
24870 | # s, y, tr have 2 (pattern and replacement) | ||||
24871 | # others have 1 (pattern only) | ||||
24872 | 1 | 3µs | my %quote_items = ( | ||
24873 | 's' => 2, | ||||
24874 | 'y' => 2, | ||||
24875 | 'tr' => 2, | ||||
24876 | 'm' => 1, | ||||
24877 | 'qr' => 1, | ||||
24878 | 'q' => 1, | ||||
24879 | 'qq' => 1, | ||||
24880 | 'qw' => 1, | ||||
24881 | 'qx' => 1, | ||||
24882 | ); | ||||
24883 | |||||
24884 | sub tokenize_this_line { | ||||
24885 | |||||
24886 | # This routine breaks a line of perl code into tokens which are of use in | ||||
24887 | # indentation and reformatting. One of my goals has been to define tokens | ||||
24888 | # such that a newline may be inserted between any pair of tokens without | ||||
24889 | # changing or invalidating the program. This version comes close to this, | ||||
24890 | # although there are necessarily a few exceptions which must be caught by | ||||
24891 | # the formatter. Many of these involve the treatment of bare words. | ||||
24892 | # | ||||
24893 | # The tokens and their types are returned in arrays. See previous | ||||
24894 | # routine for their names. | ||||
24895 | # | ||||
24896 | # See also the array "valid_token_types" in the BEGIN section for an | ||||
24897 | # up-to-date list. | ||||
24898 | # | ||||
24899 | # To simplify things, token types are either a single character, or they | ||||
24900 | # are identical to the tokens themselves. | ||||
24901 | # | ||||
24902 | # As a debugging aid, the -D flag creates a file containing a side-by-side | ||||
24903 | # comparison of the input string and its tokenization for each line of a file. | ||||
24904 | # This is an invaluable debugging aid. | ||||
24905 | # | ||||
24906 | # In addition to tokens, and some associated quantities, the tokenizer | ||||
24907 | # also returns flags indication any special line types. These include | ||||
24908 | # quotes, here_docs, formats. | ||||
24909 | # | ||||
24910 | # ----------------------------------------------------------------------- | ||||
24911 | # | ||||
24912 | # How to add NEW_TOKENS: | ||||
24913 | # | ||||
24914 | # New token types will undoubtedly be needed in the future both to keep up | ||||
24915 | # with changes in perl and to help adapt the tokenizer to other applications. | ||||
24916 | # | ||||
24917 | # Here are some notes on the minimal steps. I wrote these notes while | ||||
24918 | # adding the 'v' token type for v-strings, which are things like version | ||||
24919 | # numbers 5.6.0, and ip addresses, and will use that as an example. ( You | ||||
24920 | # can use your editor to search for the string "NEW_TOKENS" to find the | ||||
24921 | # appropriate sections to change): | ||||
24922 | # | ||||
24923 | # *. Try to talk somebody else into doing it! If not, .. | ||||
24924 | # | ||||
24925 | # *. Make a backup of your current version in case things don't work out! | ||||
24926 | # | ||||
24927 | # *. Think of a new, unused character for the token type, and add to | ||||
24928 | # the array @valid_token_types in the BEGIN section of this package. | ||||
24929 | # For example, I used 'v' for v-strings. | ||||
24930 | # | ||||
24931 | # *. Implement coding to recognize the $type of the token in this routine. | ||||
24932 | # This is the hardest part, and is best done by imitating or modifying | ||||
24933 | # some of the existing coding. For example, to recognize v-strings, I | ||||
24934 | # patched 'sub scan_bare_identifier' to recognize v-strings beginning with | ||||
24935 | # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. | ||||
24936 | # | ||||
24937 | # *. Update sub operator_expected. This update is critically important but | ||||
24938 | # the coding is trivial. Look at the comments in that routine for help. | ||||
24939 | # For v-strings, which should behave like numbers, I just added 'v' to the | ||||
24940 | # regex used to handle numbers and strings (types 'n' and 'Q'). | ||||
24941 | # | ||||
24942 | # *. Implement a 'bond strength' rule in sub set_bond_strengths in | ||||
24943 | # Perl::Tidy::Formatter for breaking lines around this token type. You can | ||||
24944 | # skip this step and take the default at first, then adjust later to get | ||||
24945 | # desired results. For adding type 'v', I looked at sub bond_strength and | ||||
24946 | # saw that number type 'n' was using default strengths, so I didn't do | ||||
24947 | # anything. I may tune it up someday if I don't like the way line | ||||
24948 | # breaks with v-strings look. | ||||
24949 | # | ||||
24950 | # *. Implement a 'whitespace' rule in sub set_white_space_flag in | ||||
24951 | # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine | ||||
24952 | # and saw that type 'n' used spaces on both sides, so I just added 'v' | ||||
24953 | # to the array @spaces_both_sides. | ||||
24954 | # | ||||
24955 | # *. Update HtmlWriter package so that users can colorize the token as | ||||
24956 | # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in | ||||
24957 | # that package. For v-strings, I initially chose to use a default color | ||||
24958 | # equal to the default for numbers, but it might be nice to change that | ||||
24959 | # eventually. | ||||
24960 | # | ||||
24961 | # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. | ||||
24962 | # | ||||
24963 | # *. Run lots and lots of debug tests. Start with special files designed | ||||
24964 | # to test the new token type. Run with the -D flag to create a .DEBUG | ||||
24965 | # file which shows the tokenization. When these work ok, test as many old | ||||
24966 | # scripts as possible. Start with all of the '.t' files in the 'test' | ||||
24967 | # directory of the distribution file. Compare .tdy output with previous | ||||
24968 | # version and updated version to see the differences. Then include as | ||||
24969 | # many more files as possible. My own technique has been to collect a huge | ||||
24970 | # number of perl scripts (thousands!) into one directory and run perltidy | ||||
24971 | # *, then run diff between the output of the previous version and the | ||||
24972 | # current version. | ||||
24973 | # | ||||
24974 | # *. For another example, search for the smartmatch operator '~~' | ||||
24975 | # with your editor to see where updates were made for it. | ||||
24976 | # | ||||
24977 | # ----------------------------------------------------------------------- | ||||
24978 | |||||
24979 | my $line_of_tokens = shift; | ||||
24980 | my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; | ||||
24981 | |||||
24982 | # patch while coding change is underway | ||||
24983 | # make callers private data to allow access | ||||
24984 | # $tokenizer_self = $caller_tokenizer_self; | ||||
24985 | |||||
24986 | # extract line number for use in error messages | ||||
24987 | $input_line_number = $line_of_tokens->{_line_number}; | ||||
24988 | |||||
24989 | # reinitialize for multi-line quote | ||||
24990 | $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; | ||||
24991 | |||||
24992 | # check for pod documentation | ||||
24993 | if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { | ||||
24994 | |||||
24995 | # must not be in multi-line quote | ||||
24996 | # and must not be in an equation | ||||
24997 | if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) | ||||
24998 | { | ||||
24999 | $tokenizer_self->{_in_pod} = 1; | ||||
25000 | return; | ||||
25001 | } | ||||
25002 | } | ||||
25003 | |||||
25004 | $input_line = $untrimmed_input_line; | ||||
25005 | |||||
25006 | chomp $input_line; | ||||
25007 | |||||
25008 | # trim start of this line unless we are continuing a quoted line | ||||
25009 | # do not trim end because we might end in a quote (test: deken4.pl) | ||||
25010 | # Perl::Tidy::Formatter will delete needless trailing blanks | ||||
25011 | unless ( $in_quote && ( $quote_type eq 'Q' ) ) { | ||||
25012 | $input_line =~ s/^\s*//; # trim left end | ||||
25013 | } | ||||
25014 | |||||
25015 | # update the copy of the line for use in error messages | ||||
25016 | # This must be exactly what we give the pre_tokenizer | ||||
25017 | $tokenizer_self->{_line_text} = $input_line; | ||||
25018 | |||||
25019 | # re-initialize for the main loop | ||||
25020 | $routput_token_list = []; # stack of output token indexes | ||||
25021 | $routput_token_type = []; # token types | ||||
25022 | $routput_block_type = []; # types of code block | ||||
25023 | $routput_container_type = []; # paren types, such as if, elsif, .. | ||||
25024 | $routput_type_sequence = []; # nesting sequential number | ||||
25025 | |||||
25026 | $rhere_target_list = []; | ||||
25027 | |||||
25028 | $tok = $last_nonblank_token; | ||||
25029 | $type = $last_nonblank_type; | ||||
25030 | $prototype = $last_nonblank_prototype; | ||||
25031 | $last_nonblank_i = -1; | ||||
25032 | $block_type = $last_nonblank_block_type; | ||||
25033 | $container_type = $last_nonblank_container_type; | ||||
25034 | $type_sequence = $last_nonblank_type_sequence; | ||||
25035 | $indent_flag = 0; | ||||
25036 | $peeked_ahead = 0; | ||||
25037 | |||||
25038 | # tokenization is done in two stages.. | ||||
25039 | # stage 1 is a very simple pre-tokenization | ||||
25040 | my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens | ||||
25041 | |||||
25042 | # a little optimization for a full-line comment | ||||
25043 | if ( !$in_quote && ( $input_line =~ /^#/ ) ) { | ||||
25044 | $max_tokens_wanted = 1 # no use tokenizing a comment | ||||
25045 | } | ||||
25046 | |||||
25047 | # start by breaking the line into pre-tokens | ||||
25048 | ( $rtokens, $rtoken_map, $rtoken_type ) = | ||||
25049 | pre_tokenize( $input_line, $max_tokens_wanted ); | ||||
25050 | |||||
25051 | $max_token_index = scalar(@$rtokens) - 1; | ||||
25052 | push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic | ||||
25053 | push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced | ||||
25054 | push( @$rtoken_type, 'b', 'b', 'b' ); | ||||
25055 | |||||
25056 | # initialize for main loop | ||||
25057 | for $i ( 0 .. $max_token_index + 3 ) { | ||||
25058 | $routput_token_type->[$i] = ""; | ||||
25059 | $routput_block_type->[$i] = ""; | ||||
25060 | $routput_container_type->[$i] = ""; | ||||
25061 | $routput_type_sequence->[$i] = ""; | ||||
25062 | $routput_indent_flag->[$i] = 0; | ||||
25063 | } | ||||
25064 | $i = -1; | ||||
25065 | $i_tok = -1; | ||||
25066 | |||||
25067 | # ------------------------------------------------------------ | ||||
25068 | # begin main tokenization loop | ||||
25069 | # ------------------------------------------------------------ | ||||
25070 | |||||
25071 | # we are looking at each pre-token of one line and combining them | ||||
25072 | # into tokens | ||||
25073 | while ( ++$i <= $max_token_index ) { | ||||
25074 | |||||
25075 | if ($in_quote) { # continue looking for end of a quote | ||||
25076 | $type = $quote_type; | ||||
25077 | |||||
25078 | unless ( @{$routput_token_list} ) | ||||
25079 | { # initialize if continuation line | ||||
25080 | push( @{$routput_token_list}, $i ); | ||||
25081 | $routput_token_type->[$i] = $type; | ||||
25082 | |||||
25083 | } | ||||
25084 | $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); | ||||
25085 | |||||
25086 | # scan for the end of the quote or pattern | ||||
25087 | ( | ||||
25088 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
25089 | $quoted_string_1, $quoted_string_2 | ||||
25090 | ) | ||||
25091 | = do_quote( | ||||
25092 | $i, $in_quote, $quote_character, | ||||
25093 | $quote_pos, $quote_depth, $quoted_string_1, | ||||
25094 | $quoted_string_2, $rtokens, $rtoken_map, | ||||
25095 | $max_token_index | ||||
25096 | ); | ||||
25097 | |||||
25098 | # all done if we didn't find it | ||||
25099 | last if ($in_quote); | ||||
25100 | |||||
25101 | # save pattern and replacement text for rescanning | ||||
25102 | my $qs1 = $quoted_string_1; | ||||
25103 | my $qs2 = $quoted_string_2; | ||||
25104 | |||||
25105 | # re-initialize for next search | ||||
25106 | $quote_character = ''; | ||||
25107 | $quote_pos = 0; | ||||
25108 | $quote_type = 'Q'; | ||||
25109 | $quoted_string_1 = ""; | ||||
25110 | $quoted_string_2 = ""; | ||||
25111 | last if ( ++$i > $max_token_index ); | ||||
25112 | |||||
25113 | # look for any modifiers | ||||
25114 | if ($allowed_quote_modifiers) { | ||||
25115 | |||||
25116 | # check for exact quote modifiers | ||||
25117 | if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { | ||||
25118 | my $str = $$rtokens[$i]; | ||||
25119 | my $saw_modifier_e; | ||||
25120 | while ( $str =~ /\G$allowed_quote_modifiers/gc ) { | ||||
25121 | my $pos = pos($str); | ||||
25122 | my $char = substr( $str, $pos - 1, 1 ); | ||||
25123 | $saw_modifier_e ||= ( $char eq 'e' ); | ||||
25124 | } | ||||
25125 | |||||
25126 | # For an 'e' quote modifier we must scan the replacement | ||||
25127 | # text for here-doc targets. | ||||
25128 | if ($saw_modifier_e) { | ||||
25129 | |||||
25130 | my $rht = scan_replacement_text($qs1); | ||||
25131 | |||||
25132 | # Change type from 'Q' to 'h' for quotes with | ||||
25133 | # here-doc targets so that the formatter (see sub | ||||
25134 | # print_line_of_tokens) will not make any line | ||||
25135 | # breaks after this point. | ||||
25136 | if ($rht) { | ||||
25137 | push @{$rhere_target_list}, @{$rht}; | ||||
25138 | $type = 'h'; | ||||
25139 | if ( $i_tok < 0 ) { | ||||
25140 | my $ilast = $routput_token_list->[-1]; | ||||
25141 | $routput_token_type->[$ilast] = $type; | ||||
25142 | } | ||||
25143 | } | ||||
25144 | } | ||||
25145 | |||||
25146 | if ( defined( pos($str) ) ) { | ||||
25147 | |||||
25148 | # matched | ||||
25149 | if ( pos($str) == length($str) ) { | ||||
25150 | last if ( ++$i > $max_token_index ); | ||||
25151 | } | ||||
25152 | |||||
25153 | # Looks like a joined quote modifier | ||||
25154 | # and keyword, maybe something like | ||||
25155 | # s/xxx/yyy/gefor @k=... | ||||
25156 | # Example is "galgen.pl". Would have to split | ||||
25157 | # the word and insert a new token in the | ||||
25158 | # pre-token list. This is so rare that I haven't | ||||
25159 | # done it. Will just issue a warning citation. | ||||
25160 | |||||
25161 | # This error might also be triggered if my quote | ||||
25162 | # modifier characters are incomplete | ||||
25163 | else { | ||||
25164 | warning(<<EOM); | ||||
25165 | |||||
25166 | Partial match to quote modifier $allowed_quote_modifiers at word: '$str' | ||||
25167 | Please put a space between quote modifiers and trailing keywords. | ||||
25168 | EOM | ||||
25169 | |||||
25170 | # print "token $$rtokens[$i]\n"; | ||||
25171 | # my $num = length($str) - pos($str); | ||||
25172 | # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); | ||||
25173 | # print "continuing with new token $$rtokens[$i]\n"; | ||||
25174 | |||||
25175 | # skipping past this token does least damage | ||||
25176 | last if ( ++$i > $max_token_index ); | ||||
25177 | } | ||||
25178 | } | ||||
25179 | else { | ||||
25180 | |||||
25181 | # example file: rokicki4.pl | ||||
25182 | # This error might also be triggered if my quote | ||||
25183 | # modifier characters are incomplete | ||||
25184 | write_logfile_entry( | ||||
25185 | "Note: found word $str at quote modifier location\n" | ||||
25186 | ); | ||||
25187 | } | ||||
25188 | } | ||||
25189 | |||||
25190 | # re-initialize | ||||
25191 | $allowed_quote_modifiers = ""; | ||||
25192 | } | ||||
25193 | } | ||||
25194 | |||||
25195 | unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { | ||||
25196 | |||||
25197 | # try to catch some common errors | ||||
25198 | if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { | ||||
25199 | |||||
25200 | if ( $last_nonblank_token eq 'eq' ) { | ||||
25201 | complain("Should 'eq' be '==' here ?\n"); | ||||
25202 | } | ||||
25203 | elsif ( $last_nonblank_token eq 'ne' ) { | ||||
25204 | complain("Should 'ne' be '!=' here ?\n"); | ||||
25205 | } | ||||
25206 | } | ||||
25207 | |||||
25208 | $last_last_nonblank_token = $last_nonblank_token; | ||||
25209 | $last_last_nonblank_type = $last_nonblank_type; | ||||
25210 | $last_last_nonblank_block_type = $last_nonblank_block_type; | ||||
25211 | $last_last_nonblank_container_type = | ||||
25212 | $last_nonblank_container_type; | ||||
25213 | $last_last_nonblank_type_sequence = | ||||
25214 | $last_nonblank_type_sequence; | ||||
25215 | $last_nonblank_token = $tok; | ||||
25216 | $last_nonblank_type = $type; | ||||
25217 | $last_nonblank_prototype = $prototype; | ||||
25218 | $last_nonblank_block_type = $block_type; | ||||
25219 | $last_nonblank_container_type = $container_type; | ||||
25220 | $last_nonblank_type_sequence = $type_sequence; | ||||
25221 | $last_nonblank_i = $i_tok; | ||||
25222 | } | ||||
25223 | |||||
25224 | # store previous token type | ||||
25225 | if ( $i_tok >= 0 ) { | ||||
25226 | $routput_token_type->[$i_tok] = $type; | ||||
25227 | $routput_block_type->[$i_tok] = $block_type; | ||||
25228 | $routput_container_type->[$i_tok] = $container_type; | ||||
25229 | $routput_type_sequence->[$i_tok] = $type_sequence; | ||||
25230 | $routput_indent_flag->[$i_tok] = $indent_flag; | ||||
25231 | } | ||||
25232 | my $pre_tok = $$rtokens[$i]; # get the next pre-token | ||||
25233 | my $pre_type = $$rtoken_type[$i]; # and type | ||||
25234 | $tok = $pre_tok; | ||||
25235 | $type = $pre_type; # to be modified as necessary | ||||
25236 | $block_type = ""; # blank for all tokens except code block braces | ||||
25237 | $container_type = ""; # blank for all tokens except some parens | ||||
25238 | $type_sequence = ""; # blank for all tokens except ?/: | ||||
25239 | $indent_flag = 0; | ||||
25240 | $prototype = ""; # blank for all tokens except user defined subs | ||||
25241 | $i_tok = $i; | ||||
25242 | |||||
25243 | # this pre-token will start an output token | ||||
25244 | push( @{$routput_token_list}, $i_tok ); | ||||
25245 | |||||
25246 | # continue gathering identifier if necessary | ||||
25247 | # but do not start on blanks and comments | ||||
25248 | if ( $id_scan_state && $pre_type !~ /[b#]/ ) { | ||||
25249 | |||||
25250 | if ( $id_scan_state =~ /^(sub|package)/ ) { | ||||
25251 | scan_id(); | ||||
25252 | } | ||||
25253 | else { | ||||
25254 | scan_identifier(); | ||||
25255 | } | ||||
25256 | |||||
25257 | last if ($id_scan_state); | ||||
25258 | next if ( ( $i > 0 ) || $type ); | ||||
25259 | |||||
25260 | # didn't find any token; start over | ||||
25261 | $type = $pre_type; | ||||
25262 | $tok = $pre_tok; | ||||
25263 | } | ||||
25264 | |||||
25265 | # handle whitespace tokens.. | ||||
25266 | next if ( $type eq 'b' ); | ||||
25267 | my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; | ||||
25268 | my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; | ||||
25269 | |||||
25270 | # Build larger tokens where possible, since we are not in a quote. | ||||
25271 | # | ||||
25272 | # First try to assemble digraphs. The following tokens are | ||||
25273 | # excluded and handled specially: | ||||
25274 | # '/=' is excluded because the / might start a pattern. | ||||
25275 | # 'x=' is excluded since it might be $x=, with $ on previous line | ||||
25276 | # '**' and *= might be typeglobs of punctuation variables | ||||
25277 | # I have allowed tokens starting with <, such as <=, | ||||
25278 | # because I don't think these could be valid angle operators. | ||||
25279 | # test file: storrs4.pl | ||||
25280 | my $test_tok = $tok . $$rtokens[ $i + 1 ]; | ||||
25281 | my $combine_ok = $is_digraph{$test_tok}; | ||||
25282 | |||||
25283 | # check for special cases which cannot be combined | ||||
25284 | if ($combine_ok) { | ||||
25285 | |||||
25286 | # '//' must be defined_or operator if an operator is expected. | ||||
25287 | # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) | ||||
25288 | # could be migrated here for clarity | ||||
25289 | if ( $test_tok eq '//' ) { | ||||
25290 | my $next_type = $$rtokens[ $i + 1 ]; | ||||
25291 | my $expecting = | ||||
25292 | operator_expected( $prev_type, $tok, $next_type ); | ||||
25293 | $combine_ok = 0 unless ( $expecting == OPERATOR ); | ||||
25294 | } | ||||
25295 | } | ||||
25296 | |||||
25297 | if ( | ||||
25298 | $combine_ok | ||||
25299 | && ( $test_tok ne '/=' ) # might be pattern | ||||
25300 | && ( $test_tok ne 'x=' ) # might be $x | ||||
25301 | && ( $test_tok ne '**' ) # typeglob? | ||||
25302 | && ( $test_tok ne '*=' ) # typeglob? | ||||
25303 | ) | ||||
25304 | { | ||||
25305 | $tok = $test_tok; | ||||
25306 | $i++; | ||||
25307 | |||||
25308 | # Now try to assemble trigraphs. Note that all possible | ||||
25309 | # perl trigraphs can be constructed by appending a character | ||||
25310 | # to a digraph. | ||||
25311 | $test_tok = $tok . $$rtokens[ $i + 1 ]; | ||||
25312 | |||||
25313 | if ( $is_trigraph{$test_tok} ) { | ||||
25314 | $tok = $test_tok; | ||||
25315 | $i++; | ||||
25316 | } | ||||
25317 | } | ||||
25318 | |||||
25319 | $type = $tok; | ||||
25320 | $next_tok = $$rtokens[ $i + 1 ]; | ||||
25321 | $next_type = $$rtoken_type[ $i + 1 ]; | ||||
25322 | |||||
25323 | TOKENIZER_DEBUG_FLAG_TOKENIZE && do { | ||||
25324 | local $" = ')('; | ||||
25325 | my @debug_list = ( | ||||
25326 | $last_nonblank_token, $tok, | ||||
25327 | $next_tok, $brace_depth, | ||||
25328 | $brace_type[$brace_depth], $paren_depth, | ||||
25329 | $paren_type[$paren_depth] | ||||
25330 | ); | ||||
25331 | print STDOUT "TOKENIZE:(@debug_list)\n"; | ||||
25332 | }; | ||||
25333 | |||||
25334 | # turn off attribute list on first non-blank, non-bareword | ||||
25335 | if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } | ||||
25336 | |||||
25337 | ############################################################### | ||||
25338 | # We have the next token, $tok. | ||||
25339 | # Now we have to examine this token and decide what it is | ||||
25340 | # and define its $type | ||||
25341 | # | ||||
25342 | # section 1: bare words | ||||
25343 | ############################################################### | ||||
25344 | |||||
25345 | if ( $pre_type eq 'w' ) { | ||||
25346 | $expecting = operator_expected( $prev_type, $tok, $next_type ); | ||||
25347 | my ( $next_nonblank_token, $i_next ) = | ||||
25348 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
25349 | |||||
25350 | # ATTRS: handle sub and variable attributes | ||||
25351 | if ($in_attribute_list) { | ||||
25352 | |||||
25353 | # treat bare word followed by open paren like qw( | ||||
25354 | if ( $next_nonblank_token eq '(' ) { | ||||
25355 | $in_quote = $quote_items{'q'}; | ||||
25356 | $allowed_quote_modifiers = $quote_modifiers{'q'}; | ||||
25357 | $type = 'q'; | ||||
25358 | $quote_type = 'q'; | ||||
25359 | next; | ||||
25360 | } | ||||
25361 | |||||
25362 | # handle bareword not followed by open paren | ||||
25363 | else { | ||||
25364 | $type = 'w'; | ||||
25365 | next; | ||||
25366 | } | ||||
25367 | } | ||||
25368 | |||||
25369 | # quote a word followed by => operator | ||||
25370 | if ( $next_nonblank_token eq '=' ) { | ||||
25371 | |||||
25372 | if ( $$rtokens[ $i_next + 1 ] eq '>' ) { | ||||
25373 | if ( $is_constant{$current_package}{$tok} ) { | ||||
25374 | $type = 'C'; | ||||
25375 | } | ||||
25376 | elsif ( $is_user_function{$current_package}{$tok} ) { | ||||
25377 | $type = 'U'; | ||||
25378 | $prototype = | ||||
25379 | $user_function_prototype{$current_package}{$tok}; | ||||
25380 | } | ||||
25381 | elsif ( $tok =~ /^v\d+$/ ) { | ||||
25382 | $type = 'v'; | ||||
25383 | report_v_string($tok); | ||||
25384 | } | ||||
25385 | else { $type = 'w' } | ||||
25386 | |||||
25387 | next; | ||||
25388 | } | ||||
25389 | } | ||||
25390 | |||||
25391 | # quote a bare word within braces..like xxx->{s}; note that we | ||||
25392 | # must be sure this is not a structural brace, to avoid | ||||
25393 | # mistaking {s} in the following for a quoted bare word: | ||||
25394 | # for(@[){s}bla}BLA} | ||||
25395 | # Also treat q in something like var{-q} as a bare word, not qoute operator | ||||
25396 | if ( | ||||
25397 | $next_nonblank_token eq '}' | ||||
25398 | && ( | ||||
25399 | $last_nonblank_type eq 'L' | ||||
25400 | || ( $last_nonblank_type eq 'm' | ||||
25401 | && $last_last_nonblank_type eq 'L' ) | ||||
25402 | ) | ||||
25403 | ) | ||||
25404 | { | ||||
25405 | $type = 'w'; | ||||
25406 | next; | ||||
25407 | } | ||||
25408 | |||||
25409 | # a bare word immediately followed by :: is not a keyword; | ||||
25410 | # use $tok_kw when testing for keywords to avoid a mistake | ||||
25411 | my $tok_kw = $tok; | ||||
25412 | if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) | ||||
25413 | { | ||||
25414 | $tok_kw .= '::'; | ||||
25415 | } | ||||
25416 | |||||
25417 | # handle operator x (now we know it isn't $x=) | ||||
25418 | if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { | ||||
25419 | if ( $tok eq 'x' ) { | ||||
25420 | |||||
25421 | if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= | ||||
25422 | $tok = 'x='; | ||||
25423 | $type = $tok; | ||||
25424 | $i++; | ||||
25425 | } | ||||
25426 | else { | ||||
25427 | $type = 'x'; | ||||
25428 | } | ||||
25429 | } | ||||
25430 | |||||
25431 | # FIXME: Patch: mark something like x4 as an integer for now | ||||
25432 | # It gets fixed downstream. This is easier than | ||||
25433 | # splitting the pretoken. | ||||
25434 | else { | ||||
25435 | $type = 'n'; | ||||
25436 | } | ||||
25437 | } | ||||
25438 | elsif ( $tok_kw eq 'CORE::' ) { | ||||
25439 | $type = $tok = $tok_kw; | ||||
25440 | $i += 2; | ||||
25441 | } | ||||
25442 | elsif ( ( $tok eq 'strict' ) | ||||
25443 | and ( $last_nonblank_token eq 'use' ) ) | ||||
25444 | { | ||||
25445 | $tokenizer_self->{_saw_use_strict} = 1; | ||||
25446 | scan_bare_identifier(); | ||||
25447 | } | ||||
25448 | |||||
25449 | elsif ( ( $tok eq 'warnings' ) | ||||
25450 | and ( $last_nonblank_token eq 'use' ) ) | ||||
25451 | { | ||||
25452 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
25453 | |||||
25454 | # scan as identifier, so that we pick up something like: | ||||
25455 | # use warnings::register | ||||
25456 | scan_bare_identifier(); | ||||
25457 | } | ||||
25458 | |||||
25459 | elsif ( | ||||
25460 | $tok eq 'AutoLoader' | ||||
25461 | && $tokenizer_self->{_look_for_autoloader} | ||||
25462 | && ( | ||||
25463 | $last_nonblank_token eq 'use' | ||||
25464 | |||||
25465 | # these regexes are from AutoSplit.pm, which we want | ||||
25466 | # to mimic | ||||
25467 | || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ | ||||
25468 | || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ | ||||
25469 | ) | ||||
25470 | ) | ||||
25471 | { | ||||
25472 | write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); | ||||
25473 | $tokenizer_self->{_saw_autoloader} = 1; | ||||
25474 | $tokenizer_self->{_look_for_autoloader} = 0; | ||||
25475 | scan_bare_identifier(); | ||||
25476 | } | ||||
25477 | |||||
25478 | elsif ( | ||||
25479 | $tok eq 'SelfLoader' | ||||
25480 | && $tokenizer_self->{_look_for_selfloader} | ||||
25481 | && ( $last_nonblank_token eq 'use' | ||||
25482 | || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ | ||||
25483 | || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) | ||||
25484 | ) | ||||
25485 | { | ||||
25486 | write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); | ||||
25487 | $tokenizer_self->{_saw_selfloader} = 1; | ||||
25488 | $tokenizer_self->{_look_for_selfloader} = 0; | ||||
25489 | scan_bare_identifier(); | ||||
25490 | } | ||||
25491 | |||||
25492 | elsif ( ( $tok eq 'constant' ) | ||||
25493 | and ( $last_nonblank_token eq 'use' ) ) | ||||
25494 | { | ||||
25495 | scan_bare_identifier(); | ||||
25496 | my ( $next_nonblank_token, $i_next ) = | ||||
25497 | find_next_nonblank_token( $i, $rtokens, | ||||
25498 | $max_token_index ); | ||||
25499 | |||||
25500 | if ($next_nonblank_token) { | ||||
25501 | |||||
25502 | if ( $is_keyword{$next_nonblank_token} ) { | ||||
25503 | |||||
25504 | # Assume qw is used as a quote and okay, as in: | ||||
25505 | # use constant qw{ DEBUG 0 }; | ||||
25506 | # Not worth trying to parse for just a warning | ||||
25507 | |||||
25508 | # NOTE: This warning is deactivated because recent | ||||
25509 | # versions of perl do not complain here, but | ||||
25510 | # the coding is retained for reference. | ||||
25511 | if ( 0 && $next_nonblank_token ne 'qw' ) { | ||||
25512 | warning( | ||||
25513 | "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" | ||||
25514 | ); | ||||
25515 | } | ||||
25516 | } | ||||
25517 | |||||
25518 | # FIXME: could check for error in which next token is | ||||
25519 | # not a word (number, punctuation, ..) | ||||
25520 | else { | ||||
25521 | $is_constant{$current_package}{$next_nonblank_token} | ||||
25522 | = 1; | ||||
25523 | } | ||||
25524 | } | ||||
25525 | } | ||||
25526 | |||||
25527 | # various quote operators | ||||
25528 | elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { | ||||
25529 | ##NICOL PATCH | ||||
25530 | if ( $expecting == OPERATOR ) { | ||||
25531 | |||||
25532 | # Be careful not to call an error for a qw quote | ||||
25533 | # where a parenthesized list is allowed. For example, | ||||
25534 | # it could also be a for/foreach construct such as | ||||
25535 | # | ||||
25536 | # foreach my $key qw\Uno Due Tres Quadro\ { | ||||
25537 | # print "Set $key\n"; | ||||
25538 | # } | ||||
25539 | # | ||||
25540 | |||||
25541 | # Or it could be a function call. | ||||
25542 | # NOTE: Braces in something like &{ xxx } are not | ||||
25543 | # marked as a block, we might have a method call. | ||||
25544 | # &method(...), $method->(..), &{method}(...), | ||||
25545 | # $ref[2](list) is ok & short for $ref[2]->(list) | ||||
25546 | # | ||||
25547 | # See notes in 'sub code_block_type' and | ||||
25548 | # 'sub is_non_structural_brace' | ||||
25549 | |||||
25550 | unless ( | ||||
25551 | $tok eq 'qw' | ||||
25552 | && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ | ||||
25553 | || $is_for_foreach{$want_paren} ) | ||||
25554 | ) | ||||
25555 | { | ||||
25556 | error_if_expecting_OPERATOR(); | ||||
25557 | } | ||||
25558 | } | ||||
25559 | $in_quote = $quote_items{$tok}; | ||||
25560 | $allowed_quote_modifiers = $quote_modifiers{$tok}; | ||||
25561 | |||||
25562 | # All quote types are 'Q' except possibly qw quotes. | ||||
25563 | # qw quotes are special in that they may generally be trimmed | ||||
25564 | # of leading and trailing whitespace. So they are given a | ||||
25565 | # separate type, 'q', unless requested otherwise. | ||||
25566 | $type = | ||||
25567 | ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) | ||||
25568 | ? 'q' | ||||
25569 | : 'Q'; | ||||
25570 | $quote_type = $type; | ||||
25571 | } | ||||
25572 | |||||
25573 | # check for a statement label | ||||
25574 | elsif ( | ||||
25575 | ( $next_nonblank_token eq ':' ) | ||||
25576 | && ( $$rtokens[ $i_next + 1 ] ne ':' ) | ||||
25577 | && ( $i_next <= $max_token_index ) # colon on same line | ||||
25578 | && label_ok() | ||||
25579 | ) | ||||
25580 | { | ||||
25581 | if ( $tok !~ /[A-Z]/ ) { | ||||
25582 | push @{ $tokenizer_self->{_rlower_case_labels_at} }, | ||||
25583 | $input_line_number; | ||||
25584 | } | ||||
25585 | $type = 'J'; | ||||
25586 | $tok .= ':'; | ||||
25587 | $i = $i_next; | ||||
25588 | next; | ||||
25589 | } | ||||
25590 | |||||
25591 | # 'sub' || 'package' | ||||
25592 | elsif ( $is_sub_package{$tok_kw} ) { | ||||
25593 | error_if_expecting_OPERATOR() | ||||
25594 | if ( $expecting == OPERATOR ); | ||||
25595 | scan_id(); | ||||
25596 | } | ||||
25597 | |||||
25598 | # Note on token types for format, __DATA__, __END__: | ||||
25599 | # It simplifies things to give these type ';', so that when we | ||||
25600 | # start rescanning we will be expecting a token of type TERM. | ||||
25601 | # We will switch to type 'k' before outputting the tokens. | ||||
25602 | elsif ( $is_format_END_DATA{$tok_kw} ) { | ||||
25603 | $type = ';'; # make tokenizer look for TERM next | ||||
25604 | $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; | ||||
25605 | last; | ||||
25606 | } | ||||
25607 | |||||
25608 | elsif ( $is_keyword{$tok_kw} ) { | ||||
25609 | $type = 'k'; | ||||
25610 | |||||
25611 | # Since for and foreach may not be followed immediately | ||||
25612 | # by an opening paren, we have to remember which keyword | ||||
25613 | # is associated with the next '(' | ||||
25614 | if ( $is_for_foreach{$tok} ) { | ||||
25615 | if ( new_statement_ok() ) { | ||||
25616 | $want_paren = $tok; | ||||
25617 | } | ||||
25618 | } | ||||
25619 | |||||
25620 | # recognize 'use' statements, which are special | ||||
25621 | elsif ( $is_use_require{$tok} ) { | ||||
25622 | $statement_type = $tok; | ||||
25623 | error_if_expecting_OPERATOR() | ||||
25624 | if ( $expecting == OPERATOR ); | ||||
25625 | } | ||||
25626 | |||||
25627 | # remember my and our to check for trailing ": shared" | ||||
25628 | elsif ( $is_my_our{$tok} ) { | ||||
25629 | $statement_type = $tok; | ||||
25630 | } | ||||
25631 | |||||
25632 | # Check for misplaced 'elsif' and 'else', but allow isolated | ||||
25633 | # else or elsif blocks to be formatted. This is indicated | ||||
25634 | # by a last noblank token of ';' | ||||
25635 | elsif ( $tok eq 'elsif' ) { | ||||
25636 | if ( $last_nonblank_token ne ';' | ||||
25637 | && $last_nonblank_block_type !~ | ||||
25638 | /^(if|elsif|unless)$/ ) | ||||
25639 | { | ||||
25640 | warning( | ||||
25641 | "expecting '$tok' to follow one of 'if|elsif|unless'\n" | ||||
25642 | ); | ||||
25643 | } | ||||
25644 | } | ||||
25645 | elsif ( $tok eq 'else' ) { | ||||
25646 | |||||
25647 | # patched for SWITCH/CASE | ||||
25648 | if ( $last_nonblank_token ne ';' | ||||
25649 | && $last_nonblank_block_type !~ | ||||
25650 | /^(if|elsif|unless|case|when)$/ ) | ||||
25651 | { | ||||
25652 | warning( | ||||
25653 | "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" | ||||
25654 | ); | ||||
25655 | } | ||||
25656 | } | ||||
25657 | elsif ( $tok eq 'continue' ) { | ||||
25658 | if ( $last_nonblank_token ne ';' | ||||
25659 | && $last_nonblank_block_type !~ | ||||
25660 | /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) | ||||
25661 | { | ||||
25662 | |||||
25663 | # note: ';' '{' and '}' in list above | ||||
25664 | # because continues can follow bare blocks; | ||||
25665 | # ':' is labeled block | ||||
25666 | # | ||||
25667 | ############################################ | ||||
25668 | # NOTE: This check has been deactivated because | ||||
25669 | # continue has an alternative usage for given/when | ||||
25670 | # blocks in perl 5.10 | ||||
25671 | ## warning("'$tok' should follow a block\n"); | ||||
25672 | ############################################ | ||||
25673 | } | ||||
25674 | } | ||||
25675 | |||||
25676 | # patch for SWITCH/CASE if 'case' and 'when are | ||||
25677 | # treated as keywords. | ||||
25678 | elsif ( $tok eq 'when' || $tok eq 'case' ) { | ||||
25679 | $statement_type = $tok; # next '{' is block | ||||
25680 | } | ||||
25681 | |||||
25682 | # | ||||
25683 | # indent trailing if/unless/while/until | ||||
25684 | # outdenting will be handled by later indentation loop | ||||
25685 | ## DEACTIVATED: unfortunately this can cause some unwanted indentation like: | ||||
25686 | ##$opt_o = 1 | ||||
25687 | ## if !( | ||||
25688 | ## $opt_b | ||||
25689 | ## || $opt_c | ||||
25690 | ## || $opt_d | ||||
25691 | ## || $opt_f | ||||
25692 | ## || $opt_i | ||||
25693 | ## || $opt_l | ||||
25694 | ## || $opt_o | ||||
25695 | ## || $opt_x | ||||
25696 | ## ); | ||||
25697 | ## if ( $tok =~ /^(if|unless|while|until)$/ | ||||
25698 | ## && $next_nonblank_token ne '(' ) | ||||
25699 | ## { | ||||
25700 | ## $indent_flag = 1; | ||||
25701 | ## } | ||||
25702 | } | ||||
25703 | |||||
25704 | # check for inline label following | ||||
25705 | # /^(redo|last|next|goto)$/ | ||||
25706 | elsif (( $last_nonblank_type eq 'k' ) | ||||
25707 | && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) | ||||
25708 | { | ||||
25709 | $type = 'j'; | ||||
25710 | next; | ||||
25711 | } | ||||
25712 | |||||
25713 | # something else -- | ||||
25714 | else { | ||||
25715 | |||||
25716 | scan_bare_identifier(); | ||||
25717 | if ( $type eq 'w' ) { | ||||
25718 | |||||
25719 | if ( $expecting == OPERATOR ) { | ||||
25720 | |||||
25721 | # don't complain about possible indirect object | ||||
25722 | # notation. | ||||
25723 | # For example: | ||||
25724 | # package main; | ||||
25725 | # sub new($) { ... } | ||||
25726 | # $b = new A::; # calls A::new | ||||
25727 | # $c = new A; # same thing but suspicious | ||||
25728 | # This will call A::new but we have a 'new' in | ||||
25729 | # main:: which looks like a constant. | ||||
25730 | # | ||||
25731 | if ( $last_nonblank_type eq 'C' ) { | ||||
25732 | if ( $tok !~ /::$/ ) { | ||||
25733 | complain(<<EOM); | ||||
25734 | Expecting operator after '$last_nonblank_token' but found bare word '$tok' | ||||
25735 | Maybe indirectet object notation? | ||||
25736 | EOM | ||||
25737 | } | ||||
25738 | } | ||||
25739 | else { | ||||
25740 | error_if_expecting_OPERATOR("bareword"); | ||||
25741 | } | ||||
25742 | } | ||||
25743 | |||||
25744 | # mark bare words immediately followed by a paren as | ||||
25745 | # functions | ||||
25746 | $next_tok = $$rtokens[ $i + 1 ]; | ||||
25747 | if ( $next_tok eq '(' ) { | ||||
25748 | $type = 'U'; | ||||
25749 | } | ||||
25750 | |||||
25751 | # underscore after file test operator is file handle | ||||
25752 | if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { | ||||
25753 | $type = 'Z'; | ||||
25754 | } | ||||
25755 | |||||
25756 | # patch for SWITCH/CASE if 'case' and 'when are | ||||
25757 | # not treated as keywords: | ||||
25758 | if ( | ||||
25759 | ( | ||||
25760 | $tok eq 'case' | ||||
25761 | && $brace_type[$brace_depth] eq 'switch' | ||||
25762 | ) | ||||
25763 | || ( $tok eq 'when' | ||||
25764 | && $brace_type[$brace_depth] eq 'given' ) | ||||
25765 | ) | ||||
25766 | { | ||||
25767 | $statement_type = $tok; # next '{' is block | ||||
25768 | $type = 'k'; # for keyword syntax coloring | ||||
25769 | } | ||||
25770 | |||||
25771 | # patch for SWITCH/CASE if switch and given not keywords | ||||
25772 | # Switch is not a perl 5 keyword, but we will gamble | ||||
25773 | # and mark switch followed by paren as a keyword. This | ||||
25774 | # is only necessary to get html syntax coloring nice, | ||||
25775 | # and does not commit this as being a switch/case. | ||||
25776 | if ( $next_nonblank_token eq '(' | ||||
25777 | && ( $tok eq 'switch' || $tok eq 'given' ) ) | ||||
25778 | { | ||||
25779 | $type = 'k'; # for keyword syntax coloring | ||||
25780 | } | ||||
25781 | } | ||||
25782 | } | ||||
25783 | } | ||||
25784 | |||||
25785 | ############################################################### | ||||
25786 | # section 2: strings of digits | ||||
25787 | ############################################################### | ||||
25788 | elsif ( $pre_type eq 'd' ) { | ||||
25789 | $expecting = operator_expected( $prev_type, $tok, $next_type ); | ||||
25790 | error_if_expecting_OPERATOR("Number") | ||||
25791 | if ( $expecting == OPERATOR ); | ||||
25792 | my $number = scan_number(); | ||||
25793 | if ( !defined($number) ) { | ||||
25794 | |||||
25795 | # shouldn't happen - we should always get a number | ||||
25796 | warning("non-number beginning with digit--program bug\n"); | ||||
25797 | report_definite_bug(); | ||||
25798 | } | ||||
25799 | } | ||||
25800 | |||||
25801 | ############################################################### | ||||
25802 | # section 3: all other tokens | ||||
25803 | ############################################################### | ||||
25804 | |||||
25805 | else { | ||||
25806 | last if ( $tok eq '#' ); | ||||
25807 | my $code = $tokenization_code->{$tok}; | ||||
25808 | if ($code) { | ||||
25809 | $expecting = | ||||
25810 | operator_expected( $prev_type, $tok, $next_type ); | ||||
25811 | $code->(); | ||||
25812 | redo if $in_quote; | ||||
25813 | } | ||||
25814 | } | ||||
25815 | } | ||||
25816 | |||||
25817 | # ----------------------------- | ||||
25818 | # end of main tokenization loop | ||||
25819 | # ----------------------------- | ||||
25820 | |||||
25821 | if ( $i_tok >= 0 ) { | ||||
25822 | $routput_token_type->[$i_tok] = $type; | ||||
25823 | $routput_block_type->[$i_tok] = $block_type; | ||||
25824 | $routput_container_type->[$i_tok] = $container_type; | ||||
25825 | $routput_type_sequence->[$i_tok] = $type_sequence; | ||||
25826 | $routput_indent_flag->[$i_tok] = $indent_flag; | ||||
25827 | } | ||||
25828 | |||||
25829 | unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { | ||||
25830 | $last_last_nonblank_token = $last_nonblank_token; | ||||
25831 | $last_last_nonblank_type = $last_nonblank_type; | ||||
25832 | $last_last_nonblank_block_type = $last_nonblank_block_type; | ||||
25833 | $last_last_nonblank_container_type = $last_nonblank_container_type; | ||||
25834 | $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; | ||||
25835 | $last_nonblank_token = $tok; | ||||
25836 | $last_nonblank_type = $type; | ||||
25837 | $last_nonblank_block_type = $block_type; | ||||
25838 | $last_nonblank_container_type = $container_type; | ||||
25839 | $last_nonblank_type_sequence = $type_sequence; | ||||
25840 | $last_nonblank_prototype = $prototype; | ||||
25841 | } | ||||
25842 | |||||
25843 | # reset indentation level if necessary at a sub or package | ||||
25844 | # in an attempt to recover from a nesting error | ||||
25845 | if ( $level_in_tokenizer < 0 ) { | ||||
25846 | if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { | ||||
25847 | reset_indentation_level(0); | ||||
25848 | brace_warning("resetting level to 0 at $1 $2\n"); | ||||
25849 | } | ||||
25850 | } | ||||
25851 | |||||
25852 | # all done tokenizing this line ... | ||||
25853 | # now prepare the final list of tokens and types | ||||
25854 | |||||
25855 | my @token_type = (); # stack of output token types | ||||
25856 | my @block_type = (); # stack of output code block types | ||||
25857 | my @container_type = (); # stack of output code container types | ||||
25858 | my @type_sequence = (); # stack of output type sequence numbers | ||||
25859 | my @tokens = (); # output tokens | ||||
25860 | my @levels = (); # structural brace levels of output tokens | ||||
25861 | my @slevels = (); # secondary nesting levels of output tokens | ||||
25862 | my @nesting_tokens = (); # string of tokens leading to this depth | ||||
25863 | my @nesting_types = (); # string of token types leading to this depth | ||||
25864 | my @nesting_blocks = (); # string of block types leading to this depth | ||||
25865 | my @nesting_lists = (); # string of list types leading to this depth | ||||
25866 | my @ci_string = (); # string needed to compute continuation indentation | ||||
25867 | my @container_environment = (); # BLOCK or LIST | ||||
25868 | my $container_environment = ''; | ||||
25869 | my $im = -1; # previous $i value | ||||
25870 | my $num; | ||||
25871 | my $ci_string_sum = ones_count($ci_string_in_tokenizer); | ||||
25872 | |||||
25873 | # Computing Token Indentation | ||||
25874 | # | ||||
25875 | # The final section of the tokenizer forms tokens and also computes | ||||
25876 | # parameters needed to find indentation. It is much easier to do it | ||||
25877 | # in the tokenizer than elsewhere. Here is a brief description of how | ||||
25878 | # indentation is computed. Perl::Tidy computes indentation as the sum | ||||
25879 | # of 2 terms: | ||||
25880 | # | ||||
25881 | # (1) structural indentation, such as if/else/elsif blocks | ||||
25882 | # (2) continuation indentation, such as long parameter call lists. | ||||
25883 | # | ||||
25884 | # These are occasionally called primary and secondary indentation. | ||||
25885 | # | ||||
25886 | # Structural indentation is introduced by tokens of type '{', although | ||||
25887 | # the actual tokens might be '{', '(', or '['. Structural indentation | ||||
25888 | # is of two types: BLOCK and non-BLOCK. Default structural indentation | ||||
25889 | # is 4 characters if the standard indentation scheme is used. | ||||
25890 | # | ||||
25891 | # Continuation indentation is introduced whenever a line at BLOCK level | ||||
25892 | # is broken before its termination. Default continuation indentation | ||||
25893 | # is 2 characters in the standard indentation scheme. | ||||
25894 | # | ||||
25895 | # Both types of indentation may be nested arbitrarily deep and | ||||
25896 | # interlaced. The distinction between the two is somewhat arbitrary. | ||||
25897 | # | ||||
25898 | # For each token, we will define two variables which would apply if | ||||
25899 | # the current statement were broken just before that token, so that | ||||
25900 | # that token started a new line: | ||||
25901 | # | ||||
25902 | # $level = the structural indentation level, | ||||
25903 | # $ci_level = the continuation indentation level | ||||
25904 | # | ||||
25905 | # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), | ||||
25906 | # assuming defaults. However, in some special cases it is customary | ||||
25907 | # to modify $ci_level from this strict value. | ||||
25908 | # | ||||
25909 | # The total structural indentation is easy to compute by adding and | ||||
25910 | # subtracting 1 from a saved value as types '{' and '}' are seen. The | ||||
25911 | # running value of this variable is $level_in_tokenizer. | ||||
25912 | # | ||||
25913 | # The total continuation is much more difficult to compute, and requires | ||||
25914 | # several variables. These variables are: | ||||
25915 | # | ||||
25916 | # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for | ||||
25917 | # each indentation level, if there are intervening open secondary | ||||
25918 | # structures just prior to that level. | ||||
25919 | # $continuation_string_in_tokenizer = a string of 1's and 0's indicating | ||||
25920 | # if the last token at that level is "continued", meaning that it | ||||
25921 | # is not the first token of an expression. | ||||
25922 | # $nesting_block_string = a string of 1's and 0's indicating, for each | ||||
25923 | # indentation level, if the level is of type BLOCK or not. | ||||
25924 | # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string | ||||
25925 | # $nesting_list_string = a string of 1's and 0's indicating, for each | ||||
25926 | # indentation level, if it is appropriate for list formatting. | ||||
25927 | # If so, continuation indentation is used to indent long list items. | ||||
25928 | # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string | ||||
25929 | # @{$rslevel_stack} = a stack of total nesting depths at each | ||||
25930 | # structural indentation level, where "total nesting depth" means | ||||
25931 | # the nesting depth that would occur if every nesting token -- '{', '[', | ||||
25932 | # and '(' -- , regardless of context, is used to compute a nesting | ||||
25933 | # depth. | ||||
25934 | |||||
25935 | #my $nesting_block_flag = ($nesting_block_string =~ /1$/); | ||||
25936 | #my $nesting_list_flag = ($nesting_list_string =~ /1$/); | ||||
25937 | |||||
25938 | my ( $ci_string_i, $level_i, $nesting_block_string_i, | ||||
25939 | $nesting_list_string_i, $nesting_token_string_i, | ||||
25940 | $nesting_type_string_i, ); | ||||
25941 | |||||
25942 | foreach $i ( @{$routput_token_list} ) | ||||
25943 | { # scan the list of pre-tokens indexes | ||||
25944 | |||||
25945 | # self-checking for valid token types | ||||
25946 | my $type = $routput_token_type->[$i]; | ||||
25947 | my $forced_indentation_flag = $routput_indent_flag->[$i]; | ||||
25948 | |||||
25949 | # See if we should undo the $forced_indentation_flag. | ||||
25950 | # Forced indentation after 'if', 'unless', 'while' and 'until' | ||||
25951 | # expressions without trailing parens is optional and doesn't | ||||
25952 | # always look good. It is usually okay for a trailing logical | ||||
25953 | # expression, but if the expression is a function call, code block, | ||||
25954 | # or some kind of list it puts in an unwanted extra indentation | ||||
25955 | # level which is hard to remove. | ||||
25956 | # | ||||
25957 | # Example where extra indentation looks ok: | ||||
25958 | # return 1 | ||||
25959 | # if $det_a < 0 and $det_b > 0 | ||||
25960 | # or $det_a > 0 and $det_b < 0; | ||||
25961 | # | ||||
25962 | # Example where extra indentation is not needed because | ||||
25963 | # the eval brace also provides indentation: | ||||
25964 | # print "not " if defined eval { | ||||
25965 | # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; | ||||
25966 | # }; | ||||
25967 | # | ||||
25968 | # The following rule works fairly well: | ||||
25969 | # Undo the flag if the end of this line, or start of the next | ||||
25970 | # line, is an opening container token or a comma. | ||||
25971 | # This almost always works, but if not after another pass it will | ||||
25972 | # be stable. | ||||
25973 | if ( $forced_indentation_flag && $type eq 'k' ) { | ||||
25974 | my $ixlast = -1; | ||||
25975 | my $ilast = $routput_token_list->[$ixlast]; | ||||
25976 | my $toklast = $routput_token_type->[$ilast]; | ||||
25977 | if ( $toklast eq '#' ) { | ||||
25978 | $ixlast--; | ||||
25979 | $ilast = $routput_token_list->[$ixlast]; | ||||
25980 | $toklast = $routput_token_type->[$ilast]; | ||||
25981 | } | ||||
25982 | if ( $toklast eq 'b' ) { | ||||
25983 | $ixlast--; | ||||
25984 | $ilast = $routput_token_list->[$ixlast]; | ||||
25985 | $toklast = $routput_token_type->[$ilast]; | ||||
25986 | } | ||||
25987 | if ( $toklast =~ /^[\{,]$/ ) { | ||||
25988 | $forced_indentation_flag = 0; | ||||
25989 | } | ||||
25990 | else { | ||||
25991 | ( $toklast, my $i_next ) = | ||||
25992 | find_next_nonblank_token( $max_token_index, $rtokens, | ||||
25993 | $max_token_index ); | ||||
25994 | if ( $toklast =~ /^[\{,]$/ ) { | ||||
25995 | $forced_indentation_flag = 0; | ||||
25996 | } | ||||
25997 | } | ||||
25998 | } | ||||
25999 | |||||
26000 | # if we are already in an indented if, see if we should outdent | ||||
26001 | if ($indented_if_level) { | ||||
26002 | |||||
26003 | # don't try to nest trailing if's - shouldn't happen | ||||
26004 | if ( $type eq 'k' ) { | ||||
26005 | $forced_indentation_flag = 0; | ||||
26006 | } | ||||
26007 | |||||
26008 | # check for the normal case - outdenting at next ';' | ||||
26009 | elsif ( $type eq ';' ) { | ||||
26010 | if ( $level_in_tokenizer == $indented_if_level ) { | ||||
26011 | $forced_indentation_flag = -1; | ||||
26012 | $indented_if_level = 0; | ||||
26013 | } | ||||
26014 | } | ||||
26015 | |||||
26016 | # handle case of missing semicolon | ||||
26017 | elsif ( $type eq '}' ) { | ||||
26018 | if ( $level_in_tokenizer == $indented_if_level ) { | ||||
26019 | $indented_if_level = 0; | ||||
26020 | |||||
26021 | # TBD: This could be a subroutine call | ||||
26022 | $level_in_tokenizer--; | ||||
26023 | if ( @{$rslevel_stack} > 1 ) { | ||||
26024 | pop( @{$rslevel_stack} ); | ||||
26025 | } | ||||
26026 | if ( length($nesting_block_string) > 1 ) | ||||
26027 | { # true for valid script | ||||
26028 | chop $nesting_block_string; | ||||
26029 | chop $nesting_list_string; | ||||
26030 | } | ||||
26031 | |||||
26032 | } | ||||
26033 | } | ||||
26034 | } | ||||
26035 | |||||
26036 | my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken | ||||
26037 | $level_i = $level_in_tokenizer; | ||||
26038 | |||||
26039 | # This can happen by running perltidy on non-scripts | ||||
26040 | # although it could also be bug introduced by programming change. | ||||
26041 | # Perl silently accepts a 032 (^Z) and takes it as the end | ||||
26042 | if ( !$is_valid_token_type{$type} ) { | ||||
26043 | my $val = ord($type); | ||||
26044 | warning( | ||||
26045 | "unexpected character decimal $val ($type) in script\n"); | ||||
26046 | $tokenizer_self->{_in_error} = 1; | ||||
26047 | } | ||||
26048 | |||||
26049 | # ---------------------------------------------------------------- | ||||
26050 | # TOKEN TYPE PATCHES | ||||
26051 | # output __END__, __DATA__, and format as type 'k' instead of ';' | ||||
26052 | # to make html colors correct, etc. | ||||
26053 | my $fix_type = $type; | ||||
26054 | if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } | ||||
26055 | |||||
26056 | # output anonymous 'sub' as keyword | ||||
26057 | if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } | ||||
26058 | |||||
26059 | # ----------------------------------------------------------------- | ||||
26060 | |||||
26061 | $nesting_token_string_i = $nesting_token_string; | ||||
26062 | $nesting_type_string_i = $nesting_type_string; | ||||
26063 | $nesting_block_string_i = $nesting_block_string; | ||||
26064 | $nesting_list_string_i = $nesting_list_string; | ||||
26065 | |||||
26066 | # set primary indentation levels based on structural braces | ||||
26067 | # Note: these are set so that the leading braces have a HIGHER | ||||
26068 | # level than their CONTENTS, which is convenient for indentation | ||||
26069 | # Also, define continuation indentation for each token. | ||||
26070 | if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) | ||||
26071 | { | ||||
26072 | |||||
26073 | # use environment before updating | ||||
26074 | $container_environment = | ||||
26075 | $nesting_block_flag ? 'BLOCK' | ||||
26076 | : $nesting_list_flag ? 'LIST' | ||||
26077 | : ""; | ||||
26078 | |||||
26079 | # if the difference between total nesting levels is not 1, | ||||
26080 | # there are intervening non-structural nesting types between | ||||
26081 | # this '{' and the previous unclosed '{' | ||||
26082 | my $intervening_secondary_structure = 0; | ||||
26083 | if ( @{$rslevel_stack} ) { | ||||
26084 | $intervening_secondary_structure = | ||||
26085 | $slevel_in_tokenizer - $rslevel_stack->[-1]; | ||||
26086 | } | ||||
26087 | |||||
26088 | # Continuation Indentation | ||||
26089 | # | ||||
26090 | # Having tried setting continuation indentation both in the formatter and | ||||
26091 | # in the tokenizer, I can say that setting it in the tokenizer is much, | ||||
26092 | # much easier. The formatter already has too much to do, and can't | ||||
26093 | # make decisions on line breaks without knowing what 'ci' will be at | ||||
26094 | # arbitrary locations. | ||||
26095 | # | ||||
26096 | # But a problem with setting the continuation indentation (ci) here | ||||
26097 | # in the tokenizer is that we do not know where line breaks will actually | ||||
26098 | # be. As a result, we don't know if we should propagate continuation | ||||
26099 | # indentation to higher levels of structure. | ||||
26100 | # | ||||
26101 | # For nesting of only structural indentation, we never need to do this. | ||||
26102 | # For example, in a long if statement, like this | ||||
26103 | # | ||||
26104 | # if ( !$output_block_type[$i] | ||||
26105 | # && ($in_statement_continuation) ) | ||||
26106 | # { <--outdented | ||||
26107 | # do_something(); | ||||
26108 | # } | ||||
26109 | # | ||||
26110 | # the second line has ci but we do normally give the lines within the BLOCK | ||||
26111 | # any ci. This would be true if we had blocks nested arbitrarily deeply. | ||||
26112 | # | ||||
26113 | # But consider something like this, where we have created a break after | ||||
26114 | # an opening paren on line 1, and the paren is not (currently) a | ||||
26115 | # structural indentation token: | ||||
26116 | # | ||||
26117 | # my $file = $menubar->Menubutton( | ||||
26118 | # qw/-text File -underline 0 -menuitems/ => [ | ||||
26119 | # [ | ||||
26120 | # Cascade => '~View', | ||||
26121 | # -menuitems => [ | ||||
26122 | # ... | ||||
26123 | # | ||||
26124 | # The second line has ci, so it would seem reasonable to propagate it | ||||
26125 | # down, giving the third line 1 ci + 1 indentation. This suggests the | ||||
26126 | # following rule, which is currently used to propagating ci down: if there | ||||
26127 | # are any non-structural opening parens (or brackets, or braces), before | ||||
26128 | # an opening structural brace, then ci is propagated down, and otherwise | ||||
26129 | # not. The variable $intervening_secondary_structure contains this | ||||
26130 | # information for the current token, and the string | ||||
26131 | # "$ci_string_in_tokenizer" is a stack of previous values of this | ||||
26132 | # variable. | ||||
26133 | |||||
26134 | # save the current states | ||||
26135 | push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); | ||||
26136 | $level_in_tokenizer++; | ||||
26137 | |||||
26138 | if ($forced_indentation_flag) { | ||||
26139 | |||||
26140 | # break BEFORE '?' when there is forced indentation | ||||
26141 | if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } | ||||
26142 | if ( $type eq 'k' ) { | ||||
26143 | $indented_if_level = $level_in_tokenizer; | ||||
26144 | } | ||||
26145 | |||||
26146 | # do not change container environment here if we are not | ||||
26147 | # at a real list. Adding this check prevents "blinkers" | ||||
26148 | # often near 'unless" clauses, such as in the following | ||||
26149 | # code: | ||||
26150 | ## next | ||||
26151 | ## unless -e ( | ||||
26152 | ## $archive = | ||||
26153 | ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) | ||||
26154 | ## ); | ||||
26155 | |||||
26156 | $nesting_block_string .= "$nesting_block_flag"; | ||||
26157 | } | ||||
26158 | else { | ||||
26159 | |||||
26160 | if ( $routput_block_type->[$i] ) { | ||||
26161 | $nesting_block_flag = 1; | ||||
26162 | $nesting_block_string .= '1'; | ||||
26163 | } | ||||
26164 | else { | ||||
26165 | $nesting_block_flag = 0; | ||||
26166 | $nesting_block_string .= '0'; | ||||
26167 | } | ||||
26168 | } | ||||
26169 | |||||
26170 | # we will use continuation indentation within containers | ||||
26171 | # which are not blocks and not logical expressions | ||||
26172 | my $bit = 0; | ||||
26173 | if ( !$routput_block_type->[$i] ) { | ||||
26174 | |||||
26175 | # propagate flag down at nested open parens | ||||
26176 | if ( $routput_container_type->[$i] eq '(' ) { | ||||
26177 | $bit = 1 if $nesting_list_flag; | ||||
26178 | } | ||||
26179 | |||||
26180 | # use list continuation if not a logical grouping | ||||
26181 | # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ | ||||
26182 | else { | ||||
26183 | $bit = 1 | ||||
26184 | unless | ||||
26185 | $is_logical_container{ $routput_container_type->[$i] | ||||
26186 | }; | ||||
26187 | } | ||||
26188 | } | ||||
26189 | $nesting_list_string .= $bit; | ||||
26190 | $nesting_list_flag = $bit; | ||||
26191 | |||||
26192 | $ci_string_in_tokenizer .= | ||||
26193 | ( $intervening_secondary_structure != 0 ) ? '1' : '0'; | ||||
26194 | $ci_string_sum = ones_count($ci_string_in_tokenizer); | ||||
26195 | $continuation_string_in_tokenizer .= | ||||
26196 | ( $in_statement_continuation > 0 ) ? '1' : '0'; | ||||
26197 | |||||
26198 | # Sometimes we want to give an opening brace continuation indentation, | ||||
26199 | # and sometimes not. For code blocks, we don't do it, so that the leading | ||||
26200 | # '{' gets outdented, like this: | ||||
26201 | # | ||||
26202 | # if ( !$output_block_type[$i] | ||||
26203 | # && ($in_statement_continuation) ) | ||||
26204 | # { <--outdented | ||||
26205 | # | ||||
26206 | # For other types, we will give them continuation indentation. For example, | ||||
26207 | # here is how a list looks with the opening paren indented: | ||||
26208 | # | ||||
26209 | # @LoL = | ||||
26210 | # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], | ||||
26211 | # [ "homer", "marge", "bart" ], ); | ||||
26212 | # | ||||
26213 | # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) | ||||
26214 | |||||
26215 | my $total_ci = $ci_string_sum; | ||||
26216 | if ( | ||||
26217 | !$routput_block_type->[$i] # patch: skip for BLOCK | ||||
26218 | && ($in_statement_continuation) | ||||
26219 | && !( $forced_indentation_flag && $type eq ':' ) | ||||
26220 | ) | ||||
26221 | { | ||||
26222 | $total_ci += $in_statement_continuation | ||||
26223 | unless ( $ci_string_in_tokenizer =~ /1$/ ); | ||||
26224 | } | ||||
26225 | |||||
26226 | $ci_string_i = $total_ci; | ||||
26227 | $in_statement_continuation = 0; | ||||
26228 | } | ||||
26229 | |||||
26230 | elsif ($type eq '}' | ||||
26231 | || $type eq 'R' | ||||
26232 | || $forced_indentation_flag < 0 ) | ||||
26233 | { | ||||
26234 | |||||
26235 | # only a nesting error in the script would prevent popping here | ||||
26236 | if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } | ||||
26237 | |||||
26238 | $level_i = --$level_in_tokenizer; | ||||
26239 | |||||
26240 | # restore previous level values | ||||
26241 | if ( length($nesting_block_string) > 1 ) | ||||
26242 | { # true for valid script | ||||
26243 | chop $nesting_block_string; | ||||
26244 | $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); | ||||
26245 | chop $nesting_list_string; | ||||
26246 | $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); | ||||
26247 | |||||
26248 | chop $ci_string_in_tokenizer; | ||||
26249 | $ci_string_sum = ones_count($ci_string_in_tokenizer); | ||||
26250 | |||||
26251 | $in_statement_continuation = | ||||
26252 | chop $continuation_string_in_tokenizer; | ||||
26253 | |||||
26254 | # zero continuation flag at terminal BLOCK '}' which | ||||
26255 | # ends a statement. | ||||
26256 | if ( $routput_block_type->[$i] ) { | ||||
26257 | |||||
26258 | # ...These include non-anonymous subs | ||||
26259 | # note: could be sub ::abc { or sub 'abc | ||||
26260 | if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { | ||||
26261 | |||||
26262 | # note: older versions of perl require the /gc modifier | ||||
26263 | # here or else the \G does not work. | ||||
26264 | if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) | ||||
26265 | { | ||||
26266 | $in_statement_continuation = 0; | ||||
26267 | } | ||||
26268 | } | ||||
26269 | |||||
26270 | # ...and include all block types except user subs with | ||||
26271 | # block prototypes and these: (sort|grep|map|do|eval) | ||||
26272 | # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ | ||||
26273 | elsif ( | ||||
26274 | $is_zero_continuation_block_type{ | ||||
26275 | $routput_block_type->[$i] | ||||
26276 | } ) | ||||
26277 | { | ||||
26278 | $in_statement_continuation = 0; | ||||
26279 | } | ||||
26280 | |||||
26281 | # ..but these are not terminal types: | ||||
26282 | # /^(sort|grep|map|do|eval)$/ ) | ||||
26283 | elsif ( | ||||
26284 | $is_not_zero_continuation_block_type{ | ||||
26285 | $routput_block_type->[$i] | ||||
26286 | } ) | ||||
26287 | { | ||||
26288 | } | ||||
26289 | |||||
26290 | # ..and a block introduced by a label | ||||
26291 | # /^\w+\s*:$/gc ) { | ||||
26292 | elsif ( $routput_block_type->[$i] =~ /:$/ ) { | ||||
26293 | $in_statement_continuation = 0; | ||||
26294 | } | ||||
26295 | |||||
26296 | # user function with block prototype | ||||
26297 | else { | ||||
26298 | $in_statement_continuation = 0; | ||||
26299 | } | ||||
26300 | } | ||||
26301 | |||||
26302 | # If we are in a list, then | ||||
26303 | # we must set continuation indentation at the closing | ||||
26304 | # paren of something like this (paren after $check): | ||||
26305 | # assert( | ||||
26306 | # __LINE__, | ||||
26307 | # ( not defined $check ) | ||||
26308 | # or ref $check | ||||
26309 | # or $check eq "new" | ||||
26310 | # or $check eq "old", | ||||
26311 | # ); | ||||
26312 | elsif ( $tok eq ')' ) { | ||||
26313 | $in_statement_continuation = 1 | ||||
26314 | if $routput_container_type->[$i] =~ /^[;,\{\}]$/; | ||||
26315 | } | ||||
26316 | |||||
26317 | elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } | ||||
26318 | } | ||||
26319 | |||||
26320 | # use environment after updating | ||||
26321 | $container_environment = | ||||
26322 | $nesting_block_flag ? 'BLOCK' | ||||
26323 | : $nesting_list_flag ? 'LIST' | ||||
26324 | : ""; | ||||
26325 | $ci_string_i = $ci_string_sum + $in_statement_continuation; | ||||
26326 | $nesting_block_string_i = $nesting_block_string; | ||||
26327 | $nesting_list_string_i = $nesting_list_string; | ||||
26328 | } | ||||
26329 | |||||
26330 | # not a structural indentation type.. | ||||
26331 | else { | ||||
26332 | |||||
26333 | $container_environment = | ||||
26334 | $nesting_block_flag ? 'BLOCK' | ||||
26335 | : $nesting_list_flag ? 'LIST' | ||||
26336 | : ""; | ||||
26337 | |||||
26338 | # zero the continuation indentation at certain tokens so | ||||
26339 | # that they will be at the same level as its container. For | ||||
26340 | # commas, this simplifies the -lp indentation logic, which | ||||
26341 | # counts commas. For ?: it makes them stand out. | ||||
26342 | if ($nesting_list_flag) { | ||||
26343 | if ( $type =~ /^[,\?\:]$/ ) { | ||||
26344 | $in_statement_continuation = 0; | ||||
26345 | } | ||||
26346 | } | ||||
26347 | |||||
26348 | # be sure binary operators get continuation indentation | ||||
26349 | if ( | ||||
26350 | $container_environment | ||||
26351 | && ( $type eq 'k' && $is_binary_keyword{$tok} | ||||
26352 | || $is_binary_type{$type} ) | ||||
26353 | ) | ||||
26354 | { | ||||
26355 | $in_statement_continuation = 1; | ||||
26356 | } | ||||
26357 | |||||
26358 | # continuation indentation is sum of any open ci from previous | ||||
26359 | # levels plus the current level | ||||
26360 | $ci_string_i = $ci_string_sum + $in_statement_continuation; | ||||
26361 | |||||
26362 | # update continuation flag ... | ||||
26363 | # if this isn't a blank or comment.. | ||||
26364 | if ( $type ne 'b' && $type ne '#' ) { | ||||
26365 | |||||
26366 | # and we are in a BLOCK | ||||
26367 | if ($nesting_block_flag) { | ||||
26368 | |||||
26369 | # the next token after a ';' and label starts a new stmt | ||||
26370 | if ( $type eq ';' || $type eq 'J' ) { | ||||
26371 | $in_statement_continuation = 0; | ||||
26372 | } | ||||
26373 | |||||
26374 | # otherwise, we are continuing the current statement | ||||
26375 | else { | ||||
26376 | $in_statement_continuation = 1; | ||||
26377 | } | ||||
26378 | } | ||||
26379 | |||||
26380 | # if we are not in a BLOCK.. | ||||
26381 | else { | ||||
26382 | |||||
26383 | # do not use continuation indentation if not list | ||||
26384 | # environment (could be within if/elsif clause) | ||||
26385 | if ( !$nesting_list_flag ) { | ||||
26386 | $in_statement_continuation = 0; | ||||
26387 | } | ||||
26388 | |||||
26389 | # otherwise, the next token after a ',' starts a new term | ||||
26390 | elsif ( $type eq ',' ) { | ||||
26391 | $in_statement_continuation = 0; | ||||
26392 | } | ||||
26393 | |||||
26394 | # otherwise, we are continuing the current term | ||||
26395 | else { | ||||
26396 | $in_statement_continuation = 1; | ||||
26397 | } | ||||
26398 | } | ||||
26399 | } | ||||
26400 | } | ||||
26401 | |||||
26402 | if ( $level_in_tokenizer < 0 ) { | ||||
26403 | unless ( $tokenizer_self->{_saw_negative_indentation} ) { | ||||
26404 | $tokenizer_self->{_saw_negative_indentation} = 1; | ||||
26405 | warning("Starting negative indentation\n"); | ||||
26406 | } | ||||
26407 | } | ||||
26408 | |||||
26409 | # set secondary nesting levels based on all containment token types | ||||
26410 | # Note: these are set so that the nesting depth is the depth | ||||
26411 | # of the PREVIOUS TOKEN, which is convenient for setting | ||||
26412 | # the strength of token bonds | ||||
26413 | my $slevel_i = $slevel_in_tokenizer; | ||||
26414 | |||||
26415 | # /^[L\{\(\[]$/ | ||||
26416 | if ( $is_opening_type{$type} ) { | ||||
26417 | $slevel_in_tokenizer++; | ||||
26418 | $nesting_token_string .= $tok; | ||||
26419 | $nesting_type_string .= $type; | ||||
26420 | } | ||||
26421 | |||||
26422 | # /^[R\}\)\]]$/ | ||||
26423 | elsif ( $is_closing_type{$type} ) { | ||||
26424 | $slevel_in_tokenizer--; | ||||
26425 | my $char = chop $nesting_token_string; | ||||
26426 | |||||
26427 | if ( $char ne $matching_start_token{$tok} ) { | ||||
26428 | $nesting_token_string .= $char . $tok; | ||||
26429 | $nesting_type_string .= $type; | ||||
26430 | } | ||||
26431 | else { | ||||
26432 | chop $nesting_type_string; | ||||
26433 | } | ||||
26434 | } | ||||
26435 | |||||
26436 | push( @block_type, $routput_block_type->[$i] ); | ||||
26437 | push( @ci_string, $ci_string_i ); | ||||
26438 | push( @container_environment, $container_environment ); | ||||
26439 | push( @container_type, $routput_container_type->[$i] ); | ||||
26440 | push( @levels, $level_i ); | ||||
26441 | push( @nesting_tokens, $nesting_token_string_i ); | ||||
26442 | push( @nesting_types, $nesting_type_string_i ); | ||||
26443 | push( @slevels, $slevel_i ); | ||||
26444 | push( @token_type, $fix_type ); | ||||
26445 | push( @type_sequence, $routput_type_sequence->[$i] ); | ||||
26446 | push( @nesting_blocks, $nesting_block_string ); | ||||
26447 | push( @nesting_lists, $nesting_list_string ); | ||||
26448 | |||||
26449 | # now form the previous token | ||||
26450 | if ( $im >= 0 ) { | ||||
26451 | $num = | ||||
26452 | $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters | ||||
26453 | |||||
26454 | if ( $num > 0 ) { | ||||
26455 | push( @tokens, | ||||
26456 | substr( $input_line, $$rtoken_map[$im], $num ) ); | ||||
26457 | } | ||||
26458 | } | ||||
26459 | $im = $i; | ||||
26460 | } | ||||
26461 | |||||
26462 | $num = length($input_line) - $$rtoken_map[$im]; # make the last token | ||||
26463 | if ( $num > 0 ) { | ||||
26464 | push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); | ||||
26465 | } | ||||
26466 | |||||
26467 | $tokenizer_self->{_in_attribute_list} = $in_attribute_list; | ||||
26468 | $tokenizer_self->{_in_quote} = $in_quote; | ||||
26469 | $tokenizer_self->{_quote_target} = | ||||
26470 | $in_quote ? matching_end_token($quote_character) : ""; | ||||
26471 | $tokenizer_self->{_rhere_target_list} = $rhere_target_list; | ||||
26472 | |||||
26473 | $line_of_tokens->{_rtoken_type} = \@token_type; | ||||
26474 | $line_of_tokens->{_rtokens} = \@tokens; | ||||
26475 | $line_of_tokens->{_rblock_type} = \@block_type; | ||||
26476 | $line_of_tokens->{_rcontainer_type} = \@container_type; | ||||
26477 | $line_of_tokens->{_rcontainer_environment} = \@container_environment; | ||||
26478 | $line_of_tokens->{_rtype_sequence} = \@type_sequence; | ||||
26479 | $line_of_tokens->{_rlevels} = \@levels; | ||||
26480 | $line_of_tokens->{_rslevels} = \@slevels; | ||||
26481 | $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; | ||||
26482 | $line_of_tokens->{_rci_levels} = \@ci_string; | ||||
26483 | $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; | ||||
26484 | |||||
26485 | return; | ||||
26486 | } | ||||
26487 | } # end tokenize_this_line | ||||
26488 | |||||
26489 | #########i############################################################# | ||||
26490 | # Tokenizer routines which assist in identifying token types | ||||
26491 | ####################################################################### | ||||
26492 | |||||
26493 | sub operator_expected { | ||||
26494 | |||||
26495 | # Many perl symbols have two or more meanings. For example, '<<' | ||||
26496 | # can be a shift operator or a here-doc operator. The | ||||
26497 | # interpretation of these symbols depends on the current state of | ||||
26498 | # the tokenizer, which may either be expecting a term or an | ||||
26499 | # operator. For this example, a << would be a shift if an operator | ||||
26500 | # is expected, and a here-doc if a term is expected. This routine | ||||
26501 | # is called to make this decision for any current token. It returns | ||||
26502 | # one of three possible values: | ||||
26503 | # | ||||
26504 | # OPERATOR - operator expected (or at least, not a term) | ||||
26505 | # UNKNOWN - can't tell | ||||
26506 | # TERM - a term is expected (or at least, not an operator) | ||||
26507 | # | ||||
26508 | # The decision is based on what has been seen so far. This | ||||
26509 | # information is stored in the "$last_nonblank_type" and | ||||
26510 | # "$last_nonblank_token" variables. For example, if the | ||||
26511 | # $last_nonblank_type is '=~', then we are expecting a TERM, whereas | ||||
26512 | # if $last_nonblank_type is 'n' (numeric), we are expecting an | ||||
26513 | # OPERATOR. | ||||
26514 | # | ||||
26515 | # If a UNKNOWN is returned, the calling routine must guess. A major | ||||
26516 | # goal of this tokenizer is to minimize the possibility of returning | ||||
26517 | # UNKNOWN, because a wrong guess can spoil the formatting of a | ||||
26518 | # script. | ||||
26519 | # | ||||
26520 | # adding NEW_TOKENS: it is critically important that this routine be | ||||
26521 | # updated to allow it to determine if an operator or term is to be | ||||
26522 | # expected after the new token. Doing this simply involves adding | ||||
26523 | # the new token character to one of the regexes in this routine or | ||||
26524 | # to one of the hash lists | ||||
26525 | # that it uses, which are initialized in the BEGIN section. | ||||
26526 | # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, | ||||
26527 | # $statement_type | ||||
26528 | |||||
26529 | my ( $prev_type, $tok, $next_type ) = @_; | ||||
26530 | |||||
26531 | my $op_expected = UNKNOWN; | ||||
26532 | |||||
26533 | ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; | ||||
26534 | |||||
26535 | # Note: function prototype is available for token type 'U' for future | ||||
26536 | # program development. It contains the leading and trailing parens, | ||||
26537 | # and no blanks. It might be used to eliminate token type 'C', for | ||||
26538 | # example (prototype = '()'). Thus: | ||||
26539 | # if ($last_nonblank_type eq 'U') { | ||||
26540 | # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; | ||||
26541 | # } | ||||
26542 | |||||
26543 | # A possible filehandle (or object) requires some care... | ||||
26544 | if ( $last_nonblank_type eq 'Z' ) { | ||||
26545 | |||||
26546 | # angle.t | ||||
26547 | if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { | ||||
26548 | $op_expected = UNKNOWN; | ||||
26549 | } | ||||
26550 | |||||
26551 | # For possible file handle like "$a", Perl uses weird parsing rules. | ||||
26552 | # For example: | ||||
26553 | # print $a/2,"/hi"; - division | ||||
26554 | # print $a / 2,"/hi"; - division | ||||
26555 | # print $a/ 2,"/hi"; - division | ||||
26556 | # print $a /2,"/hi"; - pattern (and error)! | ||||
26557 | elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { | ||||
26558 | $op_expected = TERM; | ||||
26559 | } | ||||
26560 | |||||
26561 | # Note when an operation is being done where a | ||||
26562 | # filehandle might be expected, since a change in whitespace | ||||
26563 | # could change the interpretation of the statement. | ||||
26564 | else { | ||||
26565 | if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { | ||||
26566 | complain("operator in print statement not recommended\n"); | ||||
26567 | $op_expected = OPERATOR; | ||||
26568 | } | ||||
26569 | } | ||||
26570 | } | ||||
26571 | |||||
26572 | # Check for smartmatch operator before preceding brace or square bracket. | ||||
26573 | # For example, at the ? after the ] in the following expressions we are | ||||
26574 | # expecting an operator: | ||||
26575 | # | ||||
26576 | # qr/3/ ~~ ['1234'] ? 1 : 0; | ||||
26577 | # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; | ||||
26578 | elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { | ||||
26579 | $op_expected = OPERATOR; | ||||
26580 | } | ||||
26581 | |||||
26582 | # handle something after 'do' and 'eval' | ||||
26583 | elsif ( $is_block_operator{$last_nonblank_token} ) { | ||||
26584 | |||||
26585 | # something like $a = eval "expression"; | ||||
26586 | # ^ | ||||
26587 | if ( $last_nonblank_type eq 'k' ) { | ||||
26588 | $op_expected = TERM; # expression or list mode following keyword | ||||
26589 | } | ||||
26590 | |||||
26591 | # something like $a = do { BLOCK } / 2; | ||||
26592 | # or this ? after a smartmatch anonynmous hash or array reference: | ||||
26593 | # qr/3/ ~~ ['1234'] ? 1 : 0; | ||||
26594 | # ^ | ||||
26595 | else { | ||||
26596 | $op_expected = OPERATOR; # block mode following } | ||||
26597 | } | ||||
26598 | } | ||||
26599 | |||||
26600 | # handle bare word.. | ||||
26601 | elsif ( $last_nonblank_type eq 'w' ) { | ||||
26602 | |||||
26603 | # unfortunately, we can't tell what type of token to expect next | ||||
26604 | # after most bare words | ||||
26605 | $op_expected = UNKNOWN; | ||||
26606 | } | ||||
26607 | |||||
26608 | # operator, but not term possible after these types | ||||
26609 | # Note: moved ')' from type to token because parens in list context | ||||
26610 | # get marked as '{' '}' now. This is a minor glitch in the following: | ||||
26611 | # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); | ||||
26612 | # | ||||
26613 | elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) | ||||
26614 | || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) | ||||
26615 | { | ||||
26616 | $op_expected = OPERATOR; | ||||
26617 | |||||
26618 | # in a 'use' statement, numbers and v-strings are not true | ||||
26619 | # numbers, so to avoid incorrect error messages, we will | ||||
26620 | # mark them as unknown for now (use.t) | ||||
26621 | # TODO: it would be much nicer to create a new token V for VERSION | ||||
26622 | # number in a use statement. Then this could be a check on type V | ||||
26623 | # and related patches which change $statement_type for '=>' | ||||
26624 | # and ',' could be removed. Further, it would clean things up to | ||||
26625 | # scan the 'use' statement with a separate subroutine. | ||||
26626 | if ( ( $statement_type eq 'use' ) | ||||
26627 | && ( $last_nonblank_type =~ /^[nv]$/ ) ) | ||||
26628 | { | ||||
26629 | $op_expected = UNKNOWN; | ||||
26630 | } | ||||
26631 | |||||
26632 | # expecting VERSION or {} after package NAMESPACE | ||||
26633 | elsif ($statement_type =~ /^package\b/ | ||||
26634 | && $last_nonblank_token =~ /^package\b/ ) | ||||
26635 | { | ||||
26636 | $op_expected = TERM; | ||||
26637 | } | ||||
26638 | } | ||||
26639 | |||||
26640 | # no operator after many keywords, such as "die", "warn", etc | ||||
26641 | elsif ( $expecting_term_token{$last_nonblank_token} ) { | ||||
26642 | |||||
26643 | # patch for dor.t (defined or). | ||||
26644 | # perl functions which may be unary operators | ||||
26645 | # TODO: This list is incomplete, and these should be put | ||||
26646 | # into a hash. | ||||
26647 | if ( $tok eq '/' | ||||
26648 | && $next_type eq '/' | ||||
26649 | && $last_nonblank_type eq 'k' | ||||
26650 | && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) | ||||
26651 | { | ||||
26652 | $op_expected = OPERATOR; | ||||
26653 | } | ||||
26654 | else { | ||||
26655 | $op_expected = TERM; | ||||
26656 | } | ||||
26657 | } | ||||
26658 | |||||
26659 | # no operator after things like + - ** (i.e., other operators) | ||||
26660 | elsif ( $expecting_term_types{$last_nonblank_type} ) { | ||||
26661 | $op_expected = TERM; | ||||
26662 | } | ||||
26663 | |||||
26664 | # a few operators, like "time", have an empty prototype () and so | ||||
26665 | # take no parameters but produce a value to operate on | ||||
26666 | elsif ( $expecting_operator_token{$last_nonblank_token} ) { | ||||
26667 | $op_expected = OPERATOR; | ||||
26668 | } | ||||
26669 | |||||
26670 | # post-increment and decrement produce values to be operated on | ||||
26671 | elsif ( $expecting_operator_types{$last_nonblank_type} ) { | ||||
26672 | $op_expected = OPERATOR; | ||||
26673 | } | ||||
26674 | |||||
26675 | # no value to operate on after sub block | ||||
26676 | elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } | ||||
26677 | |||||
26678 | # a right brace here indicates the end of a simple block. | ||||
26679 | # all non-structural right braces have type 'R' | ||||
26680 | # all braces associated with block operator keywords have been given those | ||||
26681 | # keywords as "last_nonblank_token" and caught above. | ||||
26682 | # (This statement is order dependent, and must come after checking | ||||
26683 | # $last_nonblank_token). | ||||
26684 | elsif ( $last_nonblank_type eq '}' ) { | ||||
26685 | |||||
26686 | # patch for dor.t (defined or). | ||||
26687 | if ( $tok eq '/' | ||||
26688 | && $next_type eq '/' | ||||
26689 | && $last_nonblank_token eq ']' ) | ||||
26690 | { | ||||
26691 | $op_expected = OPERATOR; | ||||
26692 | } | ||||
26693 | else { | ||||
26694 | $op_expected = TERM; | ||||
26695 | } | ||||
26696 | } | ||||
26697 | |||||
26698 | # something else..what did I forget? | ||||
26699 | else { | ||||
26700 | |||||
26701 | # collecting diagnostics on unknown operator types..see what was missed | ||||
26702 | $op_expected = UNKNOWN; | ||||
26703 | write_diagnostics( | ||||
26704 | "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" | ||||
26705 | ); | ||||
26706 | } | ||||
26707 | |||||
26708 | TOKENIZER_DEBUG_FLAG_EXPECT && do { | ||||
26709 | print STDOUT | ||||
26710 | "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; | ||||
26711 | }; | ||||
26712 | return $op_expected; | ||||
26713 | } | ||||
26714 | |||||
26715 | sub new_statement_ok { | ||||
26716 | |||||
26717 | # return true if the current token can start a new statement | ||||
26718 | # USES GLOBAL VARIABLES: $last_nonblank_type | ||||
26719 | |||||
26720 | return label_ok() # a label would be ok here | ||||
26721 | |||||
26722 | || $last_nonblank_type eq 'J'; # or we follow a label | ||||
26723 | |||||
26724 | } | ||||
26725 | |||||
26726 | sub label_ok { | ||||
26727 | |||||
26728 | # Decide if a bare word followed by a colon here is a label | ||||
26729 | # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, | ||||
26730 | # $brace_depth, @brace_type | ||||
26731 | |||||
26732 | # if it follows an opening or closing code block curly brace.. | ||||
26733 | if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) | ||||
26734 | && $last_nonblank_type eq $last_nonblank_token ) | ||||
26735 | { | ||||
26736 | |||||
26737 | # it is a label if and only if the curly encloses a code block | ||||
26738 | return $brace_type[$brace_depth]; | ||||
26739 | } | ||||
26740 | |||||
26741 | # otherwise, it is a label if and only if it follows a ';' (real or fake) | ||||
26742 | # or another label | ||||
26743 | else { | ||||
26744 | return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); | ||||
26745 | } | ||||
26746 | } | ||||
26747 | |||||
26748 | sub code_block_type { | ||||
26749 | |||||
26750 | # Decide if this is a block of code, and its type. | ||||
26751 | # Must be called only when $type = $token = '{' | ||||
26752 | # The problem is to distinguish between the start of a block of code | ||||
26753 | # and the start of an anonymous hash reference | ||||
26754 | # Returns "" if not code block, otherwise returns 'last_nonblank_token' | ||||
26755 | # to indicate the type of code block. (For example, 'last_nonblank_token' | ||||
26756 | # might be 'if' for an if block, 'else' for an else block, etc). | ||||
26757 | # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, | ||||
26758 | # $last_nonblank_block_type, $brace_depth, @brace_type | ||||
26759 | |||||
26760 | # handle case of multiple '{'s | ||||
26761 | |||||
26762 | # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; | ||||
26763 | |||||
26764 | my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; | ||||
26765 | if ( $last_nonblank_token eq '{' | ||||
26766 | && $last_nonblank_type eq $last_nonblank_token ) | ||||
26767 | { | ||||
26768 | |||||
26769 | # opening brace where a statement may appear is probably | ||||
26770 | # a code block but might be and anonymous hash reference | ||||
26771 | if ( $brace_type[$brace_depth] ) { | ||||
26772 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
26773 | $max_token_index ); | ||||
26774 | } | ||||
26775 | |||||
26776 | # cannot start a code block within an anonymous hash | ||||
26777 | else { | ||||
26778 | return ""; | ||||
26779 | } | ||||
26780 | } | ||||
26781 | |||||
26782 | elsif ( $last_nonblank_token eq ';' ) { | ||||
26783 | |||||
26784 | # an opening brace where a statement may appear is probably | ||||
26785 | # a code block but might be and anonymous hash reference | ||||
26786 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
26787 | $max_token_index ); | ||||
26788 | } | ||||
26789 | |||||
26790 | # handle case of '}{' | ||||
26791 | elsif ($last_nonblank_token eq '}' | ||||
26792 | && $last_nonblank_type eq $last_nonblank_token ) | ||||
26793 | { | ||||
26794 | |||||
26795 | # a } { situation ... | ||||
26796 | # could be hash reference after code block..(blktype1.t) | ||||
26797 | if ($last_nonblank_block_type) { | ||||
26798 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
26799 | $max_token_index ); | ||||
26800 | } | ||||
26801 | |||||
26802 | # must be a block if it follows a closing hash reference | ||||
26803 | else { | ||||
26804 | return $last_nonblank_token; | ||||
26805 | } | ||||
26806 | } | ||||
26807 | |||||
26808 | # NOTE: braces after type characters start code blocks, but for | ||||
26809 | # simplicity these are not identified as such. See also | ||||
26810 | # sub is_non_structural_brace. | ||||
26811 | |||||
26812 | ## elsif ( $last_nonblank_type eq 't' ) { | ||||
26813 | ## return $last_nonblank_token; | ||||
26814 | ## } | ||||
26815 | |||||
26816 | # brace after label: | ||||
26817 | elsif ( $last_nonblank_type eq 'J' ) { | ||||
26818 | return $last_nonblank_token; | ||||
26819 | } | ||||
26820 | |||||
26821 | # otherwise, look at previous token. This must be a code block if | ||||
26822 | # it follows any of these: | ||||
26823 | # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ | ||||
26824 | elsif ( $is_code_block_token{$last_nonblank_token} ) { | ||||
26825 | |||||
26826 | # Bug Patch: Note that the opening brace after the 'if' in the following | ||||
26827 | # snippet is an anonymous hash ref and not a code block! | ||||
26828 | # print 'hi' if { x => 1, }->{x}; | ||||
26829 | # We can identify this situation because the last nonblank type | ||||
26830 | # will be a keyword (instead of a closing peren) | ||||
26831 | if ( $last_nonblank_token =~ /^(if|unless)$/ | ||||
26832 | && $last_nonblank_type eq 'k' ) | ||||
26833 | { | ||||
26834 | return ""; | ||||
26835 | } | ||||
26836 | else { | ||||
26837 | return $last_nonblank_token; | ||||
26838 | } | ||||
26839 | } | ||||
26840 | |||||
26841 | # or a sub or package BLOCK | ||||
26842 | elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) | ||||
26843 | && $last_nonblank_token =~ /^(sub|package)\b/ ) | ||||
26844 | { | ||||
26845 | return $last_nonblank_token; | ||||
26846 | } | ||||
26847 | |||||
26848 | elsif ( $statement_type =~ /^(sub|package)\b/ ) { | ||||
26849 | return $statement_type; | ||||
26850 | } | ||||
26851 | |||||
26852 | # user-defined subs with block parameters (like grep/map/eval) | ||||
26853 | elsif ( $last_nonblank_type eq 'G' ) { | ||||
26854 | return $last_nonblank_token; | ||||
26855 | } | ||||
26856 | |||||
26857 | # check bareword | ||||
26858 | elsif ( $last_nonblank_type eq 'w' ) { | ||||
26859 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
26860 | $max_token_index ); | ||||
26861 | } | ||||
26862 | |||||
26863 | # anything else must be anonymous hash reference | ||||
26864 | else { | ||||
26865 | return ""; | ||||
26866 | } | ||||
26867 | } | ||||
26868 | |||||
26869 | sub decide_if_code_block { | ||||
26870 | |||||
26871 | # USES GLOBAL VARIABLES: $last_nonblank_token | ||||
26872 | my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; | ||||
26873 | my ( $next_nonblank_token, $i_next ) = | ||||
26874 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
26875 | |||||
26876 | # we are at a '{' where a statement may appear. | ||||
26877 | # We must decide if this brace starts an anonymous hash or a code | ||||
26878 | # block. | ||||
26879 | # return "" if anonymous hash, and $last_nonblank_token otherwise | ||||
26880 | |||||
26881 | # initialize to be code BLOCK | ||||
26882 | my $code_block_type = $last_nonblank_token; | ||||
26883 | |||||
26884 | # Check for the common case of an empty anonymous hash reference: | ||||
26885 | # Maybe something like sub { { } } | ||||
26886 | if ( $next_nonblank_token eq '}' ) { | ||||
26887 | $code_block_type = ""; | ||||
26888 | } | ||||
26889 | |||||
26890 | else { | ||||
26891 | |||||
26892 | # To guess if this '{' is an anonymous hash reference, look ahead | ||||
26893 | # and test as follows: | ||||
26894 | # | ||||
26895 | # it is a hash reference if next come: | ||||
26896 | # - a string or digit followed by a comma or => | ||||
26897 | # - bareword followed by => | ||||
26898 | # otherwise it is a code block | ||||
26899 | # | ||||
26900 | # Examples of anonymous hash ref: | ||||
26901 | # {'aa',}; | ||||
26902 | # {1,2} | ||||
26903 | # | ||||
26904 | # Examples of code blocks: | ||||
26905 | # {1; print "hello\n", 1;} | ||||
26906 | # {$a,1}; | ||||
26907 | |||||
26908 | # We are only going to look ahead one more (nonblank/comment) line. | ||||
26909 | # Strange formatting could cause a bad guess, but that's unlikely. | ||||
26910 | my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; | ||||
26911 | my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; | ||||
26912 | my ( $rpre_tokens, $rpre_types ) = | ||||
26913 | peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but | ||||
26914 | # generous, and prevents | ||||
26915 | # wasting lots of | ||||
26916 | # time in mangled files | ||||
26917 | if ( defined($rpre_types) && @$rpre_types ) { | ||||
26918 | push @pre_types, @$rpre_types; | ||||
26919 | push @pre_tokens, @$rpre_tokens; | ||||
26920 | } | ||||
26921 | |||||
26922 | # put a sentinel token to simplify stopping the search | ||||
26923 | push @pre_types, '}'; | ||||
26924 | |||||
26925 | my $jbeg = 0; | ||||
26926 | $jbeg = 1 if $pre_types[0] eq 'b'; | ||||
26927 | |||||
26928 | # first look for one of these | ||||
26929 | # - bareword | ||||
26930 | # - bareword with leading - | ||||
26931 | # - digit | ||||
26932 | # - quoted string | ||||
26933 | my $j = $jbeg; | ||||
26934 | if ( $pre_types[$j] =~ /^[\'\"]/ ) { | ||||
26935 | |||||
26936 | # find the closing quote; don't worry about escapes | ||||
26937 | my $quote_mark = $pre_types[$j]; | ||||
26938 | for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { | ||||
26939 | if ( $pre_types[$k] eq $quote_mark ) { | ||||
26940 | $j = $k + 1; | ||||
26941 | my $next = $pre_types[$j]; | ||||
26942 | last; | ||||
26943 | } | ||||
26944 | } | ||||
26945 | } | ||||
26946 | elsif ( $pre_types[$j] eq 'd' ) { | ||||
26947 | $j++; | ||||
26948 | } | ||||
26949 | elsif ( $pre_types[$j] eq 'w' ) { | ||||
26950 | unless ( $is_keyword{ $pre_tokens[$j] } ) { | ||||
26951 | $j++; | ||||
26952 | } | ||||
26953 | } | ||||
26954 | elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { | ||||
26955 | $j++; | ||||
26956 | } | ||||
26957 | if ( $j > $jbeg ) { | ||||
26958 | |||||
26959 | $j++ if $pre_types[$j] eq 'b'; | ||||
26960 | |||||
26961 | # it's a hash ref if a comma or => follow next | ||||
26962 | if ( $pre_types[$j] eq ',' | ||||
26963 | || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) | ||||
26964 | { | ||||
26965 | $code_block_type = ""; | ||||
26966 | } | ||||
26967 | } | ||||
26968 | } | ||||
26969 | |||||
26970 | return $code_block_type; | ||||
26971 | } | ||||
26972 | |||||
26973 | sub unexpected { | ||||
26974 | |||||
26975 | # report unexpected token type and show where it is | ||||
26976 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
26977 | my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, | ||||
26978 | $rpretoken_type, $input_line ) | ||||
26979 | = @_; | ||||
26980 | |||||
26981 | if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { | ||||
26982 | my $msg = "found $found where $expecting expected"; | ||||
26983 | my $pos = $$rpretoken_map[$i_tok]; | ||||
26984 | interrupt_logfile(); | ||||
26985 | my $input_line_number = $tokenizer_self->{_last_line_number}; | ||||
26986 | my ( $offset, $numbered_line, $underline ) = | ||||
26987 | make_numbered_line( $input_line_number, $input_line, $pos ); | ||||
26988 | $underline = write_on_underline( $underline, $pos - $offset, '^' ); | ||||
26989 | |||||
26990 | my $trailer = ""; | ||||
26991 | if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { | ||||
26992 | my $pos_prev = $$rpretoken_map[$last_nonblank_i]; | ||||
26993 | my $num; | ||||
26994 | if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { | ||||
26995 | $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; | ||||
26996 | } | ||||
26997 | else { | ||||
26998 | $num = $pos - $pos_prev; | ||||
26999 | } | ||||
27000 | if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } | ||||
27001 | |||||
27002 | $underline = | ||||
27003 | write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); | ||||
27004 | $trailer = " (previous token underlined)"; | ||||
27005 | } | ||||
27006 | warning( $numbered_line . "\n" ); | ||||
27007 | warning( $underline . "\n" ); | ||||
27008 | warning( $msg . $trailer . "\n" ); | ||||
27009 | resume_logfile(); | ||||
27010 | } | ||||
27011 | } | ||||
27012 | |||||
27013 | sub is_non_structural_brace { | ||||
27014 | |||||
27015 | # Decide if a brace or bracket is structural or non-structural | ||||
27016 | # by looking at the previous token and type | ||||
27017 | # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token | ||||
27018 | |||||
27019 | # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. | ||||
27020 | # Tentatively deactivated because it caused the wrong operator expectation | ||||
27021 | # for this code: | ||||
27022 | # $user = @vars[1] / 100; | ||||
27023 | # Must update sub operator_expected before re-implementing. | ||||
27024 | # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { | ||||
27025 | # return 0; | ||||
27026 | # } | ||||
27027 | |||||
27028 | # NOTE: braces after type characters start code blocks, but for | ||||
27029 | # simplicity these are not identified as such. See also | ||||
27030 | # sub code_block_type | ||||
27031 | |||||
27032 | ##if ($last_nonblank_type eq 't') {return 0} | ||||
27033 | |||||
27034 | # otherwise, it is non-structural if it is decorated | ||||
27035 | # by type information. | ||||
27036 | # For example, the '{' here is non-structural: ${xxx} | ||||
27037 | ( | ||||
27038 | $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ | ||||
27039 | |||||
27040 | # or if we follow a hash or array closing curly brace or bracket | ||||
27041 | # For example, the second '{' in this is non-structural: $a{'x'}{'y'} | ||||
27042 | # because the first '}' would have been given type 'R' | ||||
27043 | || $last_nonblank_type =~ /^([R\]])$/ | ||||
27044 | ); | ||||
27045 | } | ||||
27046 | |||||
27047 | #########i############################################################# | ||||
27048 | # Tokenizer routines for tracking container nesting depths | ||||
27049 | ####################################################################### | ||||
27050 | |||||
27051 | # The following routines keep track of nesting depths of the nesting | ||||
27052 | # types, ( [ { and ?. This is necessary for determining the indentation | ||||
27053 | # level, and also for debugging programs. Not only do they keep track of | ||||
27054 | # nesting depths of the individual brace types, but they check that each | ||||
27055 | # of the other brace types is balanced within matching pairs. For | ||||
27056 | # example, if the program sees this sequence: | ||||
27057 | # | ||||
27058 | # { ( ( ) } | ||||
27059 | # | ||||
27060 | # then it can determine that there is an extra left paren somewhere | ||||
27061 | # between the { and the }. And so on with every other possible | ||||
27062 | # combination of outer and inner brace types. For another | ||||
27063 | # example: | ||||
27064 | # | ||||
27065 | # ( [ ..... ] ] ) | ||||
27066 | # | ||||
27067 | # which has an extra ] within the parens. | ||||
27068 | # | ||||
27069 | # The brace types have indexes 0 .. 3 which are indexes into | ||||
27070 | # the matrices. | ||||
27071 | # | ||||
27072 | # The pair ? : are treated as just another nesting type, with ? acting | ||||
27073 | # as the opening brace and : acting as the closing brace. | ||||
27074 | # | ||||
27075 | # The matrix | ||||
27076 | # | ||||
27077 | # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; | ||||
27078 | # | ||||
27079 | # saves the nesting depth of brace type $b (where $b is either of the other | ||||
27080 | # nesting types) when brace type $a enters a new depth. When this depth | ||||
27081 | # decreases, a check is made that the current depth of brace types $b is | ||||
27082 | # unchanged, or otherwise there must have been an error. This can | ||||
27083 | # be very useful for localizing errors, particularly when perl runs to | ||||
27084 | # the end of a large file (such as this one) and announces that there | ||||
27085 | # is a problem somewhere. | ||||
27086 | # | ||||
27087 | # A numerical sequence number is maintained for every nesting type, | ||||
27088 | # so that each matching pair can be uniquely identified in a simple | ||||
27089 | # way. | ||||
27090 | |||||
27091 | sub increase_nesting_depth { | ||||
27092 | my ( $aa, $pos ) = @_; | ||||
27093 | |||||
27094 | # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, | ||||
27095 | # @current_sequence_number, @depth_array, @starting_line_of_current_depth, | ||||
27096 | # $statement_type | ||||
27097 | my $bb; | ||||
27098 | $current_depth[$aa]++; | ||||
27099 | $total_depth++; | ||||
27100 | $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; | ||||
27101 | my $input_line_number = $tokenizer_self->{_last_line_number}; | ||||
27102 | my $input_line = $tokenizer_self->{_line_text}; | ||||
27103 | |||||
27104 | # Sequence numbers increment by number of items. This keeps | ||||
27105 | # a unique set of numbers but still allows the relative location | ||||
27106 | # of any type to be determined. | ||||
27107 | $nesting_sequence_number[$aa] += scalar(@closing_brace_names); | ||||
27108 | my $seqno = $nesting_sequence_number[$aa]; | ||||
27109 | $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; | ||||
27110 | |||||
27111 | $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = | ||||
27112 | [ $input_line_number, $input_line, $pos ]; | ||||
27113 | |||||
27114 | for $bb ( 0 .. $#closing_brace_names ) { | ||||
27115 | next if ( $bb == $aa ); | ||||
27116 | $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; | ||||
27117 | } | ||||
27118 | |||||
27119 | # set a flag for indenting a nested ternary statement | ||||
27120 | my $indent = 0; | ||||
27121 | if ( $aa == QUESTION_COLON ) { | ||||
27122 | $nested_ternary_flag[ $current_depth[$aa] ] = 0; | ||||
27123 | if ( $current_depth[$aa] > 1 ) { | ||||
27124 | if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { | ||||
27125 | my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; | ||||
27126 | if ( $pdepth == $total_depth - 1 ) { | ||||
27127 | $indent = 1; | ||||
27128 | $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; | ||||
27129 | } | ||||
27130 | } | ||||
27131 | } | ||||
27132 | } | ||||
27133 | $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; | ||||
27134 | $statement_type = ""; | ||||
27135 | return ( $seqno, $indent ); | ||||
27136 | } | ||||
27137 | |||||
27138 | sub decrease_nesting_depth { | ||||
27139 | |||||
27140 | my ( $aa, $pos ) = @_; | ||||
27141 | |||||
27142 | # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, | ||||
27143 | # @current_sequence_number, @depth_array, @starting_line_of_current_depth | ||||
27144 | # $statement_type | ||||
27145 | my $bb; | ||||
27146 | my $seqno = 0; | ||||
27147 | my $input_line_number = $tokenizer_self->{_last_line_number}; | ||||
27148 | my $input_line = $tokenizer_self->{_line_text}; | ||||
27149 | |||||
27150 | my $outdent = 0; | ||||
27151 | $total_depth--; | ||||
27152 | if ( $current_depth[$aa] > 0 ) { | ||||
27153 | |||||
27154 | # set a flag for un-indenting after seeing a nested ternary statement | ||||
27155 | $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; | ||||
27156 | if ( $aa == QUESTION_COLON ) { | ||||
27157 | $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; | ||||
27158 | } | ||||
27159 | $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; | ||||
27160 | |||||
27161 | # check that any brace types $bb contained within are balanced | ||||
27162 | for $bb ( 0 .. $#closing_brace_names ) { | ||||
27163 | next if ( $bb == $aa ); | ||||
27164 | |||||
27165 | unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == | ||||
27166 | $current_depth[$bb] ) | ||||
27167 | { | ||||
27168 | my $diff = | ||||
27169 | $current_depth[$bb] - | ||||
27170 | $depth_array[$aa][$bb][ $current_depth[$aa] ]; | ||||
27171 | |||||
27172 | # don't whine too many times | ||||
27173 | my $saw_brace_error = get_saw_brace_error(); | ||||
27174 | if ( | ||||
27175 | $saw_brace_error <= MAX_NAG_MESSAGES | ||||
27176 | |||||
27177 | # if too many closing types have occurred, we probably | ||||
27178 | # already caught this error | ||||
27179 | && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) | ||||
27180 | ) | ||||
27181 | { | ||||
27182 | interrupt_logfile(); | ||||
27183 | my $rsl = | ||||
27184 | $starting_line_of_current_depth[$aa] | ||||
27185 | [ $current_depth[$aa] ]; | ||||
27186 | my $sl = $$rsl[0]; | ||||
27187 | my $rel = [ $input_line_number, $input_line, $pos ]; | ||||
27188 | my $el = $$rel[0]; | ||||
27189 | my ($ess); | ||||
27190 | |||||
27191 | if ( $diff == 1 || $diff == -1 ) { | ||||
27192 | $ess = ''; | ||||
27193 | } | ||||
27194 | else { | ||||
27195 | $ess = 's'; | ||||
27196 | } | ||||
27197 | my $bname = | ||||
27198 | ( $diff > 0 ) | ||||
27199 | ? $opening_brace_names[$bb] | ||||
27200 | : $closing_brace_names[$bb]; | ||||
27201 | write_error_indicator_pair( @$rsl, '^' ); | ||||
27202 | my $msg = <<"EOM"; | ||||
27203 | Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el | ||||
27204 | EOM | ||||
27205 | |||||
27206 | if ( $diff > 0 ) { | ||||
27207 | my $rml = | ||||
27208 | $starting_line_of_current_depth[$bb] | ||||
27209 | [ $current_depth[$bb] ]; | ||||
27210 | my $ml = $$rml[0]; | ||||
27211 | $msg .= | ||||
27212 | " The most recent un-matched $bname is on line $ml\n"; | ||||
27213 | write_error_indicator_pair( @$rml, '^' ); | ||||
27214 | } | ||||
27215 | write_error_indicator_pair( @$rel, '^' ); | ||||
27216 | warning($msg); | ||||
27217 | resume_logfile(); | ||||
27218 | } | ||||
27219 | increment_brace_error(); | ||||
27220 | } | ||||
27221 | } | ||||
27222 | $current_depth[$aa]--; | ||||
27223 | } | ||||
27224 | else { | ||||
27225 | |||||
27226 | my $saw_brace_error = get_saw_brace_error(); | ||||
27227 | if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { | ||||
27228 | my $msg = <<"EOM"; | ||||
27229 | There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number | ||||
27230 | EOM | ||||
27231 | indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); | ||||
27232 | } | ||||
27233 | increment_brace_error(); | ||||
27234 | } | ||||
27235 | return ( $seqno, $outdent ); | ||||
27236 | } | ||||
27237 | |||||
27238 | sub check_final_nesting_depths { | ||||
27239 | my ($aa); | ||||
27240 | |||||
27241 | # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth | ||||
27242 | |||||
27243 | for $aa ( 0 .. $#closing_brace_names ) { | ||||
27244 | |||||
27245 | if ( $current_depth[$aa] ) { | ||||
27246 | my $rsl = | ||||
27247 | $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; | ||||
27248 | my $sl = $$rsl[0]; | ||||
27249 | my $msg = <<"EOM"; | ||||
27250 | Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] | ||||
27251 | The most recent un-matched $opening_brace_names[$aa] is on line $sl | ||||
27252 | EOM | ||||
27253 | indicate_error( $msg, @$rsl, '^' ); | ||||
27254 | increment_brace_error(); | ||||
27255 | } | ||||
27256 | } | ||||
27257 | } | ||||
27258 | |||||
27259 | #########i############################################################# | ||||
27260 | # Tokenizer routines for looking ahead in input stream | ||||
27261 | ####################################################################### | ||||
27262 | |||||
27263 | sub peek_ahead_for_n_nonblank_pre_tokens { | ||||
27264 | |||||
27265 | # returns next n pretokens if they exist | ||||
27266 | # returns undef's if hits eof without seeing any pretokens | ||||
27267 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
27268 | my $max_pretokens = shift; | ||||
27269 | my $line; | ||||
27270 | my $i = 0; | ||||
27271 | my ( $rpre_tokens, $rmap, $rpre_types ); | ||||
27272 | |||||
27273 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | ||||
27274 | { | ||||
27275 | $line =~ s/^\s*//; # trim leading blanks | ||||
27276 | next if ( length($line) <= 0 ); # skip blank | ||||
27277 | next if ( $line =~ /^#/ ); # skip comment | ||||
27278 | ( $rpre_tokens, $rmap, $rpre_types ) = | ||||
27279 | pre_tokenize( $line, $max_pretokens ); | ||||
27280 | last; | ||||
27281 | } | ||||
27282 | return ( $rpre_tokens, $rpre_types ); | ||||
27283 | } | ||||
27284 | |||||
27285 | # look ahead for next non-blank, non-comment line of code | ||||
27286 | sub peek_ahead_for_nonblank_token { | ||||
27287 | |||||
27288 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
27289 | my ( $rtokens, $max_token_index ) = @_; | ||||
27290 | my $line; | ||||
27291 | my $i = 0; | ||||
27292 | |||||
27293 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | ||||
27294 | { | ||||
27295 | $line =~ s/^\s*//; # trim leading blanks | ||||
27296 | next if ( length($line) <= 0 ); # skip blank | ||||
27297 | next if ( $line =~ /^#/ ); # skip comment | ||||
27298 | my ( $rtok, $rmap, $rtype ) = | ||||
27299 | pre_tokenize( $line, 2 ); # only need 2 pre-tokens | ||||
27300 | my $j = $max_token_index + 1; | ||||
27301 | my $tok; | ||||
27302 | |||||
27303 | foreach $tok (@$rtok) { | ||||
27304 | last if ( $tok =~ "\n" ); | ||||
27305 | $$rtokens[ ++$j ] = $tok; | ||||
27306 | } | ||||
27307 | last; | ||||
27308 | } | ||||
27309 | return $rtokens; | ||||
27310 | } | ||||
27311 | |||||
27312 | #########i############################################################# | ||||
27313 | # Tokenizer guessing routines for ambiguous situations | ||||
27314 | ####################################################################### | ||||
27315 | |||||
27316 | sub guess_if_pattern_or_conditional { | ||||
27317 | |||||
27318 | # this routine is called when we have encountered a ? following an | ||||
27319 | # unknown bareword, and we must decide if it starts a pattern or not | ||||
27320 | # input parameters: | ||||
27321 | # $i - token index of the ? starting possible pattern | ||||
27322 | # output parameters: | ||||
27323 | # $is_pattern = 0 if probably not pattern, =1 if probably a pattern | ||||
27324 | # msg = a warning or diagnostic message | ||||
27325 | # USES GLOBAL VARIABLES: $last_nonblank_token | ||||
27326 | my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; | ||||
27327 | my $is_pattern = 0; | ||||
27328 | my $msg = "guessing that ? after $last_nonblank_token starts a "; | ||||
27329 | |||||
27330 | if ( $i >= $max_token_index ) { | ||||
27331 | $msg .= "conditional (no end to pattern found on the line)\n"; | ||||
27332 | } | ||||
27333 | else { | ||||
27334 | my $ibeg = $i; | ||||
27335 | $i = $ibeg + 1; | ||||
27336 | my $next_token = $$rtokens[$i]; # first token after ? | ||||
27337 | |||||
27338 | # look for a possible ending ? on this line.. | ||||
27339 | my $in_quote = 1; | ||||
27340 | my $quote_depth = 0; | ||||
27341 | my $quote_character = ''; | ||||
27342 | my $quote_pos = 0; | ||||
27343 | my $quoted_string; | ||||
27344 | ( | ||||
27345 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
27346 | $quoted_string | ||||
27347 | ) | ||||
27348 | = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | ||||
27349 | $quote_pos, $quote_depth, $max_token_index ); | ||||
27350 | |||||
27351 | if ($in_quote) { | ||||
27352 | |||||
27353 | # we didn't find an ending ? on this line, | ||||
27354 | # so we bias towards conditional | ||||
27355 | $is_pattern = 0; | ||||
27356 | $msg .= "conditional (no ending ? on this line)\n"; | ||||
27357 | |||||
27358 | # we found an ending ?, so we bias towards a pattern | ||||
27359 | } | ||||
27360 | else { | ||||
27361 | |||||
27362 | if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { | ||||
27363 | $is_pattern = 1; | ||||
27364 | $msg .= "pattern (found ending ? and pattern expected)\n"; | ||||
27365 | } | ||||
27366 | else { | ||||
27367 | $msg .= "pattern (uncertain, but found ending ?)\n"; | ||||
27368 | } | ||||
27369 | } | ||||
27370 | } | ||||
27371 | return ( $is_pattern, $msg ); | ||||
27372 | } | ||||
27373 | |||||
27374 | sub guess_if_pattern_or_division { | ||||
27375 | |||||
27376 | # this routine is called when we have encountered a / following an | ||||
27377 | # unknown bareword, and we must decide if it starts a pattern or is a | ||||
27378 | # division | ||||
27379 | # input parameters: | ||||
27380 | # $i - token index of the / starting possible pattern | ||||
27381 | # output parameters: | ||||
27382 | # $is_pattern = 0 if probably division, =1 if probably a pattern | ||||
27383 | # msg = a warning or diagnostic message | ||||
27384 | # USES GLOBAL VARIABLES: $last_nonblank_token | ||||
27385 | my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; | ||||
27386 | my $is_pattern = 0; | ||||
27387 | my $msg = "guessing that / after $last_nonblank_token starts a "; | ||||
27388 | |||||
27389 | if ( $i >= $max_token_index ) { | ||||
27390 | $msg .= "division (no end to pattern found on the line)\n"; | ||||
27391 | } | ||||
27392 | else { | ||||
27393 | my $ibeg = $i; | ||||
27394 | my $divide_expected = | ||||
27395 | numerator_expected( $i, $rtokens, $max_token_index ); | ||||
27396 | $i = $ibeg + 1; | ||||
27397 | my $next_token = $$rtokens[$i]; # first token after slash | ||||
27398 | |||||
27399 | # look for a possible ending / on this line.. | ||||
27400 | my $in_quote = 1; | ||||
27401 | my $quote_depth = 0; | ||||
27402 | my $quote_character = ''; | ||||
27403 | my $quote_pos = 0; | ||||
27404 | my $quoted_string; | ||||
27405 | ( | ||||
27406 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
27407 | $quoted_string | ||||
27408 | ) | ||||
27409 | = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | ||||
27410 | $quote_pos, $quote_depth, $max_token_index ); | ||||
27411 | |||||
27412 | if ($in_quote) { | ||||
27413 | |||||
27414 | # we didn't find an ending / on this line, | ||||
27415 | # so we bias towards division | ||||
27416 | if ( $divide_expected >= 0 ) { | ||||
27417 | $is_pattern = 0; | ||||
27418 | $msg .= "division (no ending / on this line)\n"; | ||||
27419 | } | ||||
27420 | else { | ||||
27421 | $msg = "multi-line pattern (division not possible)\n"; | ||||
27422 | $is_pattern = 1; | ||||
27423 | } | ||||
27424 | |||||
27425 | } | ||||
27426 | |||||
27427 | # we found an ending /, so we bias towards a pattern | ||||
27428 | else { | ||||
27429 | |||||
27430 | if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { | ||||
27431 | |||||
27432 | if ( $divide_expected >= 0 ) { | ||||
27433 | |||||
27434 | if ( $i - $ibeg > 60 ) { | ||||
27435 | $msg .= "division (matching / too distant)\n"; | ||||
27436 | $is_pattern = 0; | ||||
27437 | } | ||||
27438 | else { | ||||
27439 | $msg .= "pattern (but division possible too)\n"; | ||||
27440 | $is_pattern = 1; | ||||
27441 | } | ||||
27442 | } | ||||
27443 | else { | ||||
27444 | $is_pattern = 1; | ||||
27445 | $msg .= "pattern (division not possible)\n"; | ||||
27446 | } | ||||
27447 | } | ||||
27448 | else { | ||||
27449 | |||||
27450 | if ( $divide_expected >= 0 ) { | ||||
27451 | $is_pattern = 0; | ||||
27452 | $msg .= "division (pattern not possible)\n"; | ||||
27453 | } | ||||
27454 | else { | ||||
27455 | $is_pattern = 1; | ||||
27456 | $msg .= | ||||
27457 | "pattern (uncertain, but division would not work here)\n"; | ||||
27458 | } | ||||
27459 | } | ||||
27460 | } | ||||
27461 | } | ||||
27462 | return ( $is_pattern, $msg ); | ||||
27463 | } | ||||
27464 | |||||
27465 | # try to resolve here-doc vs. shift by looking ahead for | ||||
27466 | # non-code or the end token (currently only looks for end token) | ||||
27467 | # returns 1 if it is probably a here doc, 0 if not | ||||
27468 | sub guess_if_here_doc { | ||||
27469 | |||||
27470 | # This is how many lines we will search for a target as part of the | ||||
27471 | # guessing strategy. It is a constant because there is probably | ||||
27472 | # little reason to change it. | ||||
27473 | # USES GLOBAL VARIABLES: $tokenizer_self, $current_package | ||||
27474 | # %is_constant, | ||||
27475 | 2 | 7.55ms | 2 | 156µs | # spent 86µs (16+70) within Perl::Tidy::Tokenizer::BEGIN@27475 which was called:
# once (16µs+70µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 27475 # spent 86µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@27475
# spent 70µs making 1 call to constant::import |
27476 | |||||
27477 | my $next_token = shift; | ||||
27478 | my $here_doc_expected = 0; | ||||
27479 | my $line; | ||||
27480 | my $k = 0; | ||||
27481 | my $msg = "checking <<"; | ||||
27482 | |||||
27483 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) | ||||
27484 | { | ||||
27485 | chomp $line; | ||||
27486 | |||||
27487 | if ( $line =~ /^$next_token$/ ) { | ||||
27488 | $msg .= " -- found target $next_token ahead $k lines\n"; | ||||
27489 | $here_doc_expected = 1; # got it | ||||
27490 | last; | ||||
27491 | } | ||||
27492 | last if ( $k >= HERE_DOC_WINDOW ); | ||||
27493 | } | ||||
27494 | |||||
27495 | unless ($here_doc_expected) { | ||||
27496 | |||||
27497 | if ( !defined($line) ) { | ||||
27498 | $here_doc_expected = -1; # hit eof without seeing target | ||||
27499 | $msg .= " -- must be shift; target $next_token not in file\n"; | ||||
27500 | |||||
27501 | } | ||||
27502 | else { # still unsure..taking a wild guess | ||||
27503 | |||||
27504 | if ( !$is_constant{$current_package}{$next_token} ) { | ||||
27505 | $here_doc_expected = 1; | ||||
27506 | $msg .= | ||||
27507 | " -- guessing it's a here-doc ($next_token not a constant)\n"; | ||||
27508 | } | ||||
27509 | else { | ||||
27510 | $msg .= | ||||
27511 | " -- guessing it's a shift ($next_token is a constant)\n"; | ||||
27512 | } | ||||
27513 | } | ||||
27514 | } | ||||
27515 | write_logfile_entry($msg); | ||||
27516 | return $here_doc_expected; | ||||
27517 | } | ||||
27518 | |||||
27519 | #########i############################################################# | ||||
27520 | # Tokenizer Routines for scanning identifiers and related items | ||||
27521 | ####################################################################### | ||||
27522 | |||||
27523 | sub scan_bare_identifier_do { | ||||
27524 | |||||
27525 | # this routine is called to scan a token starting with an alphanumeric | ||||
27526 | # variable or package separator, :: or '. | ||||
27527 | # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, | ||||
27528 | # $last_nonblank_type,@paren_type, $paren_depth | ||||
27529 | |||||
27530 | my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, | ||||
27531 | $max_token_index ) | ||||
27532 | = @_; | ||||
27533 | my $i_begin = $i; | ||||
27534 | my $package = undef; | ||||
27535 | |||||
27536 | my $i_beg = $i; | ||||
27537 | |||||
27538 | # we have to back up one pretoken at a :: since each : is one pretoken | ||||
27539 | if ( $tok eq '::' ) { $i_beg-- } | ||||
27540 | if ( $tok eq '->' ) { $i_beg-- } | ||||
27541 | my $pos_beg = $$rtoken_map[$i_beg]; | ||||
27542 | pos($input_line) = $pos_beg; | ||||
27543 | |||||
27544 | # Examples: | ||||
27545 | # A::B::C | ||||
27546 | # A:: | ||||
27547 | # ::A | ||||
27548 | # A'B | ||||
27549 | if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { | ||||
27550 | |||||
27551 | my $pos = pos($input_line); | ||||
27552 | my $numc = $pos - $pos_beg; | ||||
27553 | $tok = substr( $input_line, $pos_beg, $numc ); | ||||
27554 | |||||
27555 | # type 'w' includes anything without leading type info | ||||
27556 | # ($,%,@,*) including something like abc::def::ghi | ||||
27557 | $type = 'w'; | ||||
27558 | |||||
27559 | my $sub_name = ""; | ||||
27560 | if ( defined($2) ) { $sub_name = $2; } | ||||
27561 | if ( defined($1) ) { | ||||
27562 | $package = $1; | ||||
27563 | |||||
27564 | # patch: don't allow isolated package name which just ends | ||||
27565 | # in the old style package separator (single quote). Example: | ||||
27566 | # use CGI':all'; | ||||
27567 | if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { | ||||
27568 | $pos--; | ||||
27569 | } | ||||
27570 | |||||
27571 | $package =~ s/\'/::/g; | ||||
27572 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | ||||
27573 | $package =~ s/::$//; | ||||
27574 | } | ||||
27575 | else { | ||||
27576 | $package = $current_package; | ||||
27577 | |||||
27578 | if ( $is_keyword{$tok} ) { | ||||
27579 | $type = 'k'; | ||||
27580 | } | ||||
27581 | } | ||||
27582 | |||||
27583 | # if it is a bareword.. | ||||
27584 | if ( $type eq 'w' ) { | ||||
27585 | |||||
27586 | # check for v-string with leading 'v' type character | ||||
27587 | # (This seems to have precedence over filehandle, type 'Y') | ||||
27588 | if ( $tok =~ /^v\d[_\d]*$/ ) { | ||||
27589 | |||||
27590 | # we only have the first part - something like 'v101' - | ||||
27591 | # look for more | ||||
27592 | if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { | ||||
27593 | $pos = pos($input_line); | ||||
27594 | $numc = $pos - $pos_beg; | ||||
27595 | $tok = substr( $input_line, $pos_beg, $numc ); | ||||
27596 | } | ||||
27597 | $type = 'v'; | ||||
27598 | |||||
27599 | # warn if this version can't handle v-strings | ||||
27600 | report_v_string($tok); | ||||
27601 | } | ||||
27602 | |||||
27603 | elsif ( $is_constant{$package}{$sub_name} ) { | ||||
27604 | $type = 'C'; | ||||
27605 | } | ||||
27606 | |||||
27607 | # bareword after sort has implied empty prototype; for example: | ||||
27608 | # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); | ||||
27609 | # This has priority over whatever the user has specified. | ||||
27610 | elsif ($last_nonblank_token eq 'sort' | ||||
27611 | && $last_nonblank_type eq 'k' ) | ||||
27612 | { | ||||
27613 | $type = 'Z'; | ||||
27614 | } | ||||
27615 | |||||
27616 | # Note: strangely, perl does not seem to really let you create | ||||
27617 | # functions which act like eval and do, in the sense that eval | ||||
27618 | # and do may have operators following the final }, but any operators | ||||
27619 | # that you create with prototype (&) apparently do not allow | ||||
27620 | # trailing operators, only terms. This seems strange. | ||||
27621 | # If this ever changes, here is the update | ||||
27622 | # to make perltidy behave accordingly: | ||||
27623 | |||||
27624 | # elsif ( $is_block_function{$package}{$tok} ) { | ||||
27625 | # $tok='eval'; # patch to do braces like eval - doesn't work | ||||
27626 | # $type = 'k'; | ||||
27627 | #} | ||||
27628 | # FIXME: This could become a separate type to allow for different | ||||
27629 | # future behavior: | ||||
27630 | elsif ( $is_block_function{$package}{$sub_name} ) { | ||||
27631 | $type = 'G'; | ||||
27632 | } | ||||
27633 | |||||
27634 | elsif ( $is_block_list_function{$package}{$sub_name} ) { | ||||
27635 | $type = 'G'; | ||||
27636 | } | ||||
27637 | elsif ( $is_user_function{$package}{$sub_name} ) { | ||||
27638 | $type = 'U'; | ||||
27639 | $prototype = $user_function_prototype{$package}{$sub_name}; | ||||
27640 | } | ||||
27641 | |||||
27642 | # check for indirect object | ||||
27643 | elsif ( | ||||
27644 | |||||
27645 | # added 2001-03-27: must not be followed immediately by '(' | ||||
27646 | # see fhandle.t | ||||
27647 | ( $input_line !~ m/\G\(/gc ) | ||||
27648 | |||||
27649 | # and | ||||
27650 | && ( | ||||
27651 | |||||
27652 | # preceded by keyword like 'print', 'printf' and friends | ||||
27653 | $is_indirect_object_taker{$last_nonblank_token} | ||||
27654 | |||||
27655 | # or preceded by something like 'print(' or 'printf(' | ||||
27656 | || ( | ||||
27657 | ( $last_nonblank_token eq '(' ) | ||||
27658 | && $is_indirect_object_taker{ $paren_type[$paren_depth] | ||||
27659 | } | ||||
27660 | |||||
27661 | ) | ||||
27662 | ) | ||||
27663 | ) | ||||
27664 | { | ||||
27665 | |||||
27666 | # may not be indirect object unless followed by a space | ||||
27667 | if ( $input_line =~ m/\G\s+/gc ) { | ||||
27668 | $type = 'Y'; | ||||
27669 | |||||
27670 | # Abandon Hope ... | ||||
27671 | # Perl's indirect object notation is a very bad | ||||
27672 | # thing and can cause subtle bugs, especially for | ||||
27673 | # beginning programmers. And I haven't even been | ||||
27674 | # able to figure out a sane warning scheme which | ||||
27675 | # doesn't get in the way of good scripts. | ||||
27676 | |||||
27677 | # Complain if a filehandle has any lower case | ||||
27678 | # letters. This is suggested good practice. | ||||
27679 | # Use 'sub_name' because something like | ||||
27680 | # main::MYHANDLE is ok for filehandle | ||||
27681 | if ( $sub_name =~ /[a-z]/ ) { | ||||
27682 | |||||
27683 | # could be bug caused by older perltidy if | ||||
27684 | # followed by '(' | ||||
27685 | if ( $input_line =~ m/\G\s*\(/gc ) { | ||||
27686 | complain( | ||||
27687 | "Caution: unknown word '$tok' in indirect object slot\n" | ||||
27688 | ); | ||||
27689 | } | ||||
27690 | } | ||||
27691 | } | ||||
27692 | |||||
27693 | # bareword not followed by a space -- may not be filehandle | ||||
27694 | # (may be function call defined in a 'use' statement) | ||||
27695 | else { | ||||
27696 | $type = 'Z'; | ||||
27697 | } | ||||
27698 | } | ||||
27699 | } | ||||
27700 | |||||
27701 | # Now we must convert back from character position | ||||
27702 | # to pre_token index. | ||||
27703 | # I don't think an error flag can occur here ..but who knows | ||||
27704 | my $error; | ||||
27705 | ( $i, $error ) = | ||||
27706 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
27707 | if ($error) { | ||||
27708 | warning("scan_bare_identifier: Possibly invalid tokenization\n"); | ||||
27709 | } | ||||
27710 | } | ||||
27711 | |||||
27712 | # no match but line not blank - could be syntax error | ||||
27713 | # perl will take '::' alone without complaint | ||||
27714 | else { | ||||
27715 | $type = 'w'; | ||||
27716 | |||||
27717 | # change this warning to log message if it becomes annoying | ||||
27718 | warning("didn't find identifier after leading ::\n"); | ||||
27719 | } | ||||
27720 | return ( $i, $tok, $type, $prototype ); | ||||
27721 | } | ||||
27722 | |||||
27723 | sub scan_id_do { | ||||
27724 | |||||
27725 | # This is the new scanner and will eventually replace scan_identifier. | ||||
27726 | # Only type 'sub' and 'package' are implemented. | ||||
27727 | # Token types $ * % @ & -> are not yet implemented. | ||||
27728 | # | ||||
27729 | # Scan identifier following a type token. | ||||
27730 | # The type of call depends on $id_scan_state: $id_scan_state = '' | ||||
27731 | # for starting call, in which case $tok must be the token defining | ||||
27732 | # the type. | ||||
27733 | # | ||||
27734 | # If the type token is the last nonblank token on the line, a value | ||||
27735 | # of $id_scan_state = $tok is returned, indicating that further | ||||
27736 | # calls must be made to get the identifier. If the type token is | ||||
27737 | # not the last nonblank token on the line, the identifier is | ||||
27738 | # scanned and handled and a value of '' is returned. | ||||
27739 | # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, | ||||
27740 | # $statement_type, $tokenizer_self | ||||
27741 | |||||
27742 | my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, | ||||
27743 | $max_token_index ) | ||||
27744 | = @_; | ||||
27745 | my $type = ''; | ||||
27746 | my ( $i_beg, $pos_beg ); | ||||
27747 | |||||
27748 | #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; | ||||
27749 | #my ($a,$b,$c) = caller; | ||||
27750 | #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; | ||||
27751 | |||||
27752 | # on re-entry, start scanning at first token on the line | ||||
27753 | if ($id_scan_state) { | ||||
27754 | $i_beg = $i; | ||||
27755 | $type = ''; | ||||
27756 | } | ||||
27757 | |||||
27758 | # on initial entry, start scanning just after type token | ||||
27759 | else { | ||||
27760 | $i_beg = $i + 1; | ||||
27761 | $id_scan_state = $tok; | ||||
27762 | $type = 't'; | ||||
27763 | } | ||||
27764 | |||||
27765 | # find $i_beg = index of next nonblank token, | ||||
27766 | # and handle empty lines | ||||
27767 | my $blank_line = 0; | ||||
27768 | my $next_nonblank_token = $$rtokens[$i_beg]; | ||||
27769 | if ( $i_beg > $max_token_index ) { | ||||
27770 | $blank_line = 1; | ||||
27771 | } | ||||
27772 | else { | ||||
27773 | |||||
27774 | # only a '#' immediately after a '$' is not a comment | ||||
27775 | if ( $next_nonblank_token eq '#' ) { | ||||
27776 | unless ( $tok eq '$' ) { | ||||
27777 | $blank_line = 1; | ||||
27778 | } | ||||
27779 | } | ||||
27780 | |||||
27781 | if ( $next_nonblank_token =~ /^\s/ ) { | ||||
27782 | ( $next_nonblank_token, $i_beg ) = | ||||
27783 | find_next_nonblank_token_on_this_line( $i_beg, $rtokens, | ||||
27784 | $max_token_index ); | ||||
27785 | if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { | ||||
27786 | $blank_line = 1; | ||||
27787 | } | ||||
27788 | } | ||||
27789 | } | ||||
27790 | |||||
27791 | # handle non-blank line; identifier, if any, must follow | ||||
27792 | unless ($blank_line) { | ||||
27793 | |||||
27794 | if ( $id_scan_state eq 'sub' ) { | ||||
27795 | ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( | ||||
27796 | $input_line, $i, $i_beg, | ||||
27797 | $tok, $type, $rtokens, | ||||
27798 | $rtoken_map, $id_scan_state, $max_token_index | ||||
27799 | ); | ||||
27800 | } | ||||
27801 | |||||
27802 | elsif ( $id_scan_state eq 'package' ) { | ||||
27803 | ( $i, $tok, $type ) = | ||||
27804 | do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, | ||||
27805 | $rtoken_map, $max_token_index ); | ||||
27806 | $id_scan_state = ''; | ||||
27807 | } | ||||
27808 | |||||
27809 | else { | ||||
27810 | warning("invalid token in scan_id: $tok\n"); | ||||
27811 | $id_scan_state = ''; | ||||
27812 | } | ||||
27813 | } | ||||
27814 | |||||
27815 | if ( $id_scan_state && ( !defined($type) || !$type ) ) { | ||||
27816 | |||||
27817 | # shouldn't happen: | ||||
27818 | warning( | ||||
27819 | "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" | ||||
27820 | ); | ||||
27821 | report_definite_bug(); | ||||
27822 | } | ||||
27823 | |||||
27824 | TOKENIZER_DEBUG_FLAG_NSCAN && do { | ||||
27825 | print STDOUT | ||||
27826 | "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; | ||||
27827 | }; | ||||
27828 | return ( $i, $tok, $type, $id_scan_state ); | ||||
27829 | } | ||||
27830 | |||||
27831 | sub check_prototype { | ||||
27832 | my ( $proto, $package, $subname ) = @_; | ||||
27833 | return unless ( defined($package) && defined($subname) ); | ||||
27834 | if ( defined($proto) ) { | ||||
27835 | $proto =~ s/^\s*\(\s*//; | ||||
27836 | $proto =~ s/\s*\)$//; | ||||
27837 | if ($proto) { | ||||
27838 | $is_user_function{$package}{$subname} = 1; | ||||
27839 | $user_function_prototype{$package}{$subname} = "($proto)"; | ||||
27840 | |||||
27841 | # prototypes containing '&' must be treated specially.. | ||||
27842 | if ( $proto =~ /\&/ ) { | ||||
27843 | |||||
27844 | # right curly braces of prototypes ending in | ||||
27845 | # '&' may be followed by an operator | ||||
27846 | if ( $proto =~ /\&$/ ) { | ||||
27847 | $is_block_function{$package}{$subname} = 1; | ||||
27848 | } | ||||
27849 | |||||
27850 | # right curly braces of prototypes NOT ending in | ||||
27851 | # '&' may NOT be followed by an operator | ||||
27852 | elsif ( $proto !~ /\&$/ ) { | ||||
27853 | $is_block_list_function{$package}{$subname} = 1; | ||||
27854 | } | ||||
27855 | } | ||||
27856 | } | ||||
27857 | else { | ||||
27858 | $is_constant{$package}{$subname} = 1; | ||||
27859 | } | ||||
27860 | } | ||||
27861 | else { | ||||
27862 | $is_user_function{$package}{$subname} = 1; | ||||
27863 | } | ||||
27864 | } | ||||
27865 | |||||
27866 | sub do_scan_package { | ||||
27867 | |||||
27868 | # do_scan_package parses a package name | ||||
27869 | # it is called with $i_beg equal to the index of the first nonblank | ||||
27870 | # token following a 'package' token. | ||||
27871 | # USES GLOBAL VARIABLES: $current_package, | ||||
27872 | |||||
27873 | # package NAMESPACE | ||||
27874 | # package NAMESPACE VERSION | ||||
27875 | # package NAMESPACE BLOCK | ||||
27876 | # package NAMESPACE VERSION BLOCK | ||||
27877 | # | ||||
27878 | # If VERSION is provided, package sets the $VERSION variable in the given | ||||
27879 | # namespace to a version object with the VERSION provided. VERSION must be | ||||
27880 | # a "strict" style version number as defined by the version module: a | ||||
27881 | # positive decimal number (integer or decimal-fraction) without | ||||
27882 | # exponentiation or else a dotted-decimal v-string with a leading 'v' | ||||
27883 | # character and at least three components. | ||||
27884 | # reference http://perldoc.perl.org/functions/package.html | ||||
27885 | |||||
27886 | my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, | ||||
27887 | $max_token_index ) | ||||
27888 | = @_; | ||||
27889 | my $package = undef; | ||||
27890 | my $pos_beg = $$rtoken_map[$i_beg]; | ||||
27891 | pos($input_line) = $pos_beg; | ||||
27892 | |||||
27893 | # handle non-blank line; package name, if any, must follow | ||||
27894 | if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { | ||||
27895 | $package = $1; | ||||
27896 | $package = ( defined($1) && $1 ) ? $1 : 'main'; | ||||
27897 | $package =~ s/\'/::/g; | ||||
27898 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | ||||
27899 | $package =~ s/::$//; | ||||
27900 | my $pos = pos($input_line); | ||||
27901 | my $numc = $pos - $pos_beg; | ||||
27902 | $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); | ||||
27903 | $type = 'i'; | ||||
27904 | |||||
27905 | # Now we must convert back from character position | ||||
27906 | # to pre_token index. | ||||
27907 | # I don't think an error flag can occur here ..but ? | ||||
27908 | my $error; | ||||
27909 | ( $i, $error ) = | ||||
27910 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
27911 | if ($error) { warning("Possibly invalid package\n") } | ||||
27912 | $current_package = $package; | ||||
27913 | |||||
27914 | # we should now have package NAMESPACE | ||||
27915 | # now expecting VERSION, BLOCK, or ; to follow ... | ||||
27916 | # package NAMESPACE VERSION | ||||
27917 | # package NAMESPACE BLOCK | ||||
27918 | # package NAMESPACE VERSION BLOCK | ||||
27919 | my ( $next_nonblank_token, $i_next ) = | ||||
27920 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
27921 | |||||
27922 | # check that something recognizable follows, but do not parse. | ||||
27923 | # A VERSION number will be parsed later as a number or v-string in the | ||||
27924 | # normal way. What is important is to set the statement type if | ||||
27925 | # everything looks okay so that the operator_expected() routine | ||||
27926 | # knows that the number is in a package statement. | ||||
27927 | # Examples of valid primitive tokens that might follow are: | ||||
27928 | # 1235 . ; { } v3 v | ||||
27929 | if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { | ||||
27930 | $statement_type = $tok; | ||||
27931 | } | ||||
27932 | else { | ||||
27933 | warning( | ||||
27934 | "Unexpected '$next_nonblank_token' after package name '$tok'\n" | ||||
27935 | ); | ||||
27936 | } | ||||
27937 | } | ||||
27938 | |||||
27939 | # no match but line not blank -- | ||||
27940 | # could be a label with name package, like package: , for example. | ||||
27941 | else { | ||||
27942 | $type = 'k'; | ||||
27943 | } | ||||
27944 | |||||
27945 | return ( $i, $tok, $type ); | ||||
27946 | } | ||||
27947 | |||||
27948 | sub scan_identifier_do { | ||||
27949 | |||||
27950 | # This routine assembles tokens into identifiers. It maintains a | ||||
27951 | # scan state, id_scan_state. It updates id_scan_state based upon | ||||
27952 | # current id_scan_state and token, and returns an updated | ||||
27953 | # id_scan_state and the next index after the identifier. | ||||
27954 | # USES GLOBAL VARIABLES: $context, $last_nonblank_token, | ||||
27955 | # $last_nonblank_type | ||||
27956 | |||||
27957 | my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, | ||||
27958 | $expecting ) | ||||
27959 | = @_; | ||||
27960 | my $i_begin = $i; | ||||
27961 | my $type = ''; | ||||
27962 | my $tok_begin = $$rtokens[$i_begin]; | ||||
27963 | if ( $tok_begin eq ':' ) { $tok_begin = '::' } | ||||
27964 | my $id_scan_state_begin = $id_scan_state; | ||||
27965 | my $identifier_begin = $identifier; | ||||
27966 | my $tok = $tok_begin; | ||||
27967 | my $message = ""; | ||||
27968 | |||||
27969 | # these flags will be used to help figure out the type: | ||||
27970 | my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); | ||||
27971 | my $saw_type; | ||||
27972 | |||||
27973 | # allow old package separator (') except in 'use' statement | ||||
27974 | my $allow_tick = ( $last_nonblank_token ne 'use' ); | ||||
27975 | |||||
27976 | # get started by defining a type and a state if necessary | ||||
27977 | unless ($id_scan_state) { | ||||
27978 | $context = UNKNOWN_CONTEXT; | ||||
27979 | |||||
27980 | # fixup for digraph | ||||
27981 | if ( $tok eq '>' ) { | ||||
27982 | $tok = '->'; | ||||
27983 | $tok_begin = $tok; | ||||
27984 | } | ||||
27985 | $identifier = $tok; | ||||
27986 | |||||
27987 | if ( $tok eq '$' || $tok eq '*' ) { | ||||
27988 | $id_scan_state = '$'; | ||||
27989 | $context = SCALAR_CONTEXT; | ||||
27990 | } | ||||
27991 | elsif ( $tok eq '%' || $tok eq '@' ) { | ||||
27992 | $id_scan_state = '$'; | ||||
27993 | $context = LIST_CONTEXT; | ||||
27994 | } | ||||
27995 | elsif ( $tok eq '&' ) { | ||||
27996 | $id_scan_state = '&'; | ||||
27997 | } | ||||
27998 | elsif ( $tok eq 'sub' or $tok eq 'package' ) { | ||||
27999 | $saw_alpha = 0; # 'sub' is considered type info here | ||||
28000 | $id_scan_state = '$'; | ||||
28001 | $identifier .= ' '; # need a space to separate sub from sub name | ||||
28002 | } | ||||
28003 | elsif ( $tok eq '::' ) { | ||||
28004 | $id_scan_state = 'A'; | ||||
28005 | } | ||||
28006 | elsif ( $tok =~ /^[A-Za-z_]/ ) { | ||||
28007 | $id_scan_state = ':'; | ||||
28008 | } | ||||
28009 | elsif ( $tok eq '->' ) { | ||||
28010 | $id_scan_state = '$'; | ||||
28011 | } | ||||
28012 | else { | ||||
28013 | |||||
28014 | # shouldn't happen | ||||
28015 | my ( $a, $b, $c ) = caller; | ||||
28016 | warning("Program Bug: scan_identifier given bad token = $tok \n"); | ||||
28017 | warning(" called from sub $a line: $c\n"); | ||||
28018 | report_definite_bug(); | ||||
28019 | } | ||||
28020 | $saw_type = !$saw_alpha; | ||||
28021 | } | ||||
28022 | else { | ||||
28023 | $i--; | ||||
28024 | $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); | ||||
28025 | } | ||||
28026 | |||||
28027 | # now loop to gather the identifier | ||||
28028 | my $i_save = $i; | ||||
28029 | |||||
28030 | while ( $i < $max_token_index ) { | ||||
28031 | $i_save = $i unless ( $tok =~ /^\s*$/ ); | ||||
28032 | $tok = $$rtokens[ ++$i ]; | ||||
28033 | |||||
28034 | if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { | ||||
28035 | $tok = '::'; | ||||
28036 | $i++; | ||||
28037 | } | ||||
28038 | |||||
28039 | if ( $id_scan_state eq '$' ) { # starting variable name | ||||
28040 | |||||
28041 | if ( $tok eq '$' ) { | ||||
28042 | |||||
28043 | $identifier .= $tok; | ||||
28044 | |||||
28045 | # we've got a punctuation variable if end of line (punct.t) | ||||
28046 | if ( $i == $max_token_index ) { | ||||
28047 | $type = 'i'; | ||||
28048 | $id_scan_state = ''; | ||||
28049 | last; | ||||
28050 | } | ||||
28051 | } | ||||
28052 | elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. | ||||
28053 | $saw_alpha = 1; | ||||
28054 | $id_scan_state = ':'; # now need :: | ||||
28055 | $identifier .= $tok; | ||||
28056 | } | ||||
28057 | elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. | ||||
28058 | $saw_alpha = 1; | ||||
28059 | $id_scan_state = ':'; # now need :: | ||||
28060 | $identifier .= $tok; | ||||
28061 | |||||
28062 | # Perl will accept leading digits in identifiers, | ||||
28063 | # although they may not always produce useful results. | ||||
28064 | # Something like $main::0 is ok. But this also works: | ||||
28065 | # | ||||
28066 | # sub howdy::123::bubba{ print "bubba $54321!\n" } | ||||
28067 | # howdy::123::bubba(); | ||||
28068 | # | ||||
28069 | } | ||||
28070 | elsif ( $tok =~ /^[0-9]/ ) { # numeric | ||||
28071 | $saw_alpha = 1; | ||||
28072 | $id_scan_state = ':'; # now need :: | ||||
28073 | $identifier .= $tok; | ||||
28074 | } | ||||
28075 | elsif ( $tok eq '::' ) { | ||||
28076 | $id_scan_state = 'A'; | ||||
28077 | $identifier .= $tok; | ||||
28078 | } | ||||
28079 | elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array | ||||
28080 | $identifier .= $tok; # keep same state, a $ could follow | ||||
28081 | } | ||||
28082 | elsif ( $tok eq '{' ) { | ||||
28083 | |||||
28084 | # check for something like ${#} or ${©} | ||||
28085 | ##if ( $identifier eq '$' | ||||
28086 | if ( | ||||
28087 | ( | ||||
28088 | $identifier eq '$' | ||||
28089 | || $identifier eq '@' | ||||
28090 | || $identifier eq '$#' | ||||
28091 | ) | ||||
28092 | && $i + 2 <= $max_token_index | ||||
28093 | && $$rtokens[ $i + 2 ] eq '}' | ||||
28094 | && $$rtokens[ $i + 1 ] !~ /[\s\w]/ | ||||
28095 | ) | ||||
28096 | { | ||||
28097 | my $next2 = $$rtokens[ $i + 2 ]; | ||||
28098 | my $next1 = $$rtokens[ $i + 1 ]; | ||||
28099 | $identifier .= $tok . $next1 . $next2; | ||||
28100 | $i += 2; | ||||
28101 | $id_scan_state = ''; | ||||
28102 | last; | ||||
28103 | } | ||||
28104 | |||||
28105 | # skip something like ${xxx} or ->{ | ||||
28106 | $id_scan_state = ''; | ||||
28107 | |||||
28108 | # if this is the first token of a line, any tokens for this | ||||
28109 | # identifier have already been accumulated | ||||
28110 | if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } | ||||
28111 | $i = $i_save; | ||||
28112 | last; | ||||
28113 | } | ||||
28114 | |||||
28115 | # space ok after leading $ % * & @ | ||||
28116 | elsif ( $tok =~ /^\s*$/ ) { | ||||
28117 | |||||
28118 | if ( $identifier =~ /^[\$\%\*\&\@]/ ) { | ||||
28119 | |||||
28120 | if ( length($identifier) > 1 ) { | ||||
28121 | $id_scan_state = ''; | ||||
28122 | $i = $i_save; | ||||
28123 | $type = 'i'; # probably punctuation variable | ||||
28124 | last; | ||||
28125 | } | ||||
28126 | else { | ||||
28127 | |||||
28128 | # spaces after $'s are common, and space after @ | ||||
28129 | # is harmless, so only complain about space | ||||
28130 | # after other type characters. Space after $ and | ||||
28131 | # @ will be removed in formatting. Report space | ||||
28132 | # after % and * because they might indicate a | ||||
28133 | # parsing error. In other words '% ' might be a | ||||
28134 | # modulo operator. Delete this warning if it | ||||
28135 | # gets annoying. | ||||
28136 | if ( $identifier !~ /^[\@\$]$/ ) { | ||||
28137 | $message = | ||||
28138 | "Space in identifier, following $identifier\n"; | ||||
28139 | } | ||||
28140 | } | ||||
28141 | } | ||||
28142 | |||||
28143 | # else: | ||||
28144 | # space after '->' is ok | ||||
28145 | } | ||||
28146 | elsif ( $tok eq '^' ) { | ||||
28147 | |||||
28148 | # check for some special variables like $^W | ||||
28149 | if ( $identifier =~ /^[\$\*\@\%]$/ ) { | ||||
28150 | $identifier .= $tok; | ||||
28151 | $id_scan_state = 'A'; | ||||
28152 | |||||
28153 | # Perl accepts '$^]' or '@^]', but | ||||
28154 | # there must not be a space before the ']'. | ||||
28155 | my $next1 = $$rtokens[ $i + 1 ]; | ||||
28156 | if ( $next1 eq ']' ) { | ||||
28157 | $i++; | ||||
28158 | $identifier .= $next1; | ||||
28159 | $id_scan_state = ""; | ||||
28160 | last; | ||||
28161 | } | ||||
28162 | } | ||||
28163 | else { | ||||
28164 | $id_scan_state = ''; | ||||
28165 | } | ||||
28166 | } | ||||
28167 | else { # something else | ||||
28168 | |||||
28169 | # check for various punctuation variables | ||||
28170 | if ( $identifier =~ /^[\$\*\@\%]$/ ) { | ||||
28171 | $identifier .= $tok; | ||||
28172 | } | ||||
28173 | |||||
28174 | elsif ( $identifier eq '$#' ) { | ||||
28175 | |||||
28176 | if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } | ||||
28177 | |||||
28178 | # perl seems to allow just these: $#: $#- $#+ | ||||
28179 | elsif ( $tok =~ /^[\:\-\+]$/ ) { | ||||
28180 | $type = 'i'; | ||||
28181 | $identifier .= $tok; | ||||
28182 | } | ||||
28183 | else { | ||||
28184 | $i = $i_save; | ||||
28185 | write_logfile_entry( 'Use of $# is deprecated' . "\n" ); | ||||
28186 | } | ||||
28187 | } | ||||
28188 | elsif ( $identifier eq '$$' ) { | ||||
28189 | |||||
28190 | # perl does not allow references to punctuation | ||||
28191 | # variables without braces. For example, this | ||||
28192 | # won't work: | ||||
28193 | # $:=\4; | ||||
28194 | # $a = $$:; | ||||
28195 | # You would have to use | ||||
28196 | # $a = ${$:}; | ||||
28197 | |||||
28198 | $i = $i_save; | ||||
28199 | if ( $tok eq '{' ) { $type = 't' } | ||||
28200 | else { $type = 'i' } | ||||
28201 | } | ||||
28202 | elsif ( $identifier eq '->' ) { | ||||
28203 | $i = $i_save; | ||||
28204 | } | ||||
28205 | else { | ||||
28206 | $i = $i_save; | ||||
28207 | if ( length($identifier) == 1 ) { $identifier = ''; } | ||||
28208 | } | ||||
28209 | $id_scan_state = ''; | ||||
28210 | last; | ||||
28211 | } | ||||
28212 | } | ||||
28213 | elsif ( $id_scan_state eq '&' ) { # starting sub call? | ||||
28214 | |||||
28215 | if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. | ||||
28216 | $id_scan_state = ':'; # now need :: | ||||
28217 | $saw_alpha = 1; | ||||
28218 | $identifier .= $tok; | ||||
28219 | } | ||||
28220 | elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. | ||||
28221 | $id_scan_state = ':'; # now need :: | ||||
28222 | $saw_alpha = 1; | ||||
28223 | $identifier .= $tok; | ||||
28224 | } | ||||
28225 | elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above | ||||
28226 | $id_scan_state = ':'; # now need :: | ||||
28227 | $saw_alpha = 1; | ||||
28228 | $identifier .= $tok; | ||||
28229 | } | ||||
28230 | elsif ( $tok =~ /^\s*$/ ) { # allow space | ||||
28231 | } | ||||
28232 | elsif ( $tok eq '::' ) { # leading :: | ||||
28233 | $id_scan_state = 'A'; # accept alpha next | ||||
28234 | $identifier .= $tok; | ||||
28235 | } | ||||
28236 | elsif ( $tok eq '{' ) { | ||||
28237 | if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } | ||||
28238 | $i = $i_save; | ||||
28239 | $id_scan_state = ''; | ||||
28240 | last; | ||||
28241 | } | ||||
28242 | else { | ||||
28243 | |||||
28244 | # punctuation variable? | ||||
28245 | # testfile: cunningham4.pl | ||||
28246 | # | ||||
28247 | # We have to be careful here. If we are in an unknown state, | ||||
28248 | # we will reject the punctuation variable. In the following | ||||
28249 | # example the '&' is a binary operator but we are in an unknown | ||||
28250 | # state because there is no sigil on 'Prima', so we don't | ||||
28251 | # know what it is. But it is a bad guess that | ||||
28252 | # '&~' is a function variable. | ||||
28253 | # $self->{text}->{colorMap}->[ | ||||
28254 | # Prima::PodView::COLOR_CODE_FOREGROUND | ||||
28255 | # & ~tb::COLOR_INDEX ] = | ||||
28256 | # $sec->{ColorCode} | ||||
28257 | if ( $identifier eq '&' && $expecting ) { | ||||
28258 | $identifier .= $tok; | ||||
28259 | } | ||||
28260 | else { | ||||
28261 | $identifier = ''; | ||||
28262 | $i = $i_save; | ||||
28263 | $type = '&'; | ||||
28264 | } | ||||
28265 | $id_scan_state = ''; | ||||
28266 | last; | ||||
28267 | } | ||||
28268 | } | ||||
28269 | elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) | ||||
28270 | |||||
28271 | if ( $tok =~ /^[A-Za-z_]/ ) { # found it | ||||
28272 | $identifier .= $tok; | ||||
28273 | $id_scan_state = ':'; # now need :: | ||||
28274 | $saw_alpha = 1; | ||||
28275 | } | ||||
28276 | elsif ( $tok eq "'" && $allow_tick ) { | ||||
28277 | $identifier .= $tok; | ||||
28278 | $id_scan_state = ':'; # now need :: | ||||
28279 | $saw_alpha = 1; | ||||
28280 | } | ||||
28281 | elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above | ||||
28282 | $identifier .= $tok; | ||||
28283 | $id_scan_state = ':'; # now need :: | ||||
28284 | $saw_alpha = 1; | ||||
28285 | } | ||||
28286 | elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { | ||||
28287 | $id_scan_state = '('; | ||||
28288 | $identifier .= $tok; | ||||
28289 | } | ||||
28290 | elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { | ||||
28291 | $id_scan_state = ')'; | ||||
28292 | $identifier .= $tok; | ||||
28293 | } | ||||
28294 | else { | ||||
28295 | $id_scan_state = ''; | ||||
28296 | $i = $i_save; | ||||
28297 | last; | ||||
28298 | } | ||||
28299 | } | ||||
28300 | elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha | ||||
28301 | |||||
28302 | if ( $tok eq '::' ) { # got it | ||||
28303 | $identifier .= $tok; | ||||
28304 | $id_scan_state = 'A'; # now require alpha | ||||
28305 | } | ||||
28306 | elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here | ||||
28307 | $identifier .= $tok; | ||||
28308 | $id_scan_state = ':'; # now need :: | ||||
28309 | $saw_alpha = 1; | ||||
28310 | } | ||||
28311 | elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above | ||||
28312 | $identifier .= $tok; | ||||
28313 | $id_scan_state = ':'; # now need :: | ||||
28314 | $saw_alpha = 1; | ||||
28315 | } | ||||
28316 | elsif ( $tok eq "'" && $allow_tick ) { # tick | ||||
28317 | |||||
28318 | if ( $is_keyword{$identifier} ) { | ||||
28319 | $id_scan_state = ''; # that's all | ||||
28320 | $i = $i_save; | ||||
28321 | } | ||||
28322 | else { | ||||
28323 | $identifier .= $tok; | ||||
28324 | } | ||||
28325 | } | ||||
28326 | elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { | ||||
28327 | $id_scan_state = '('; | ||||
28328 | $identifier .= $tok; | ||||
28329 | } | ||||
28330 | elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { | ||||
28331 | $id_scan_state = ')'; | ||||
28332 | $identifier .= $tok; | ||||
28333 | } | ||||
28334 | else { | ||||
28335 | $id_scan_state = ''; # that's all | ||||
28336 | $i = $i_save; | ||||
28337 | last; | ||||
28338 | } | ||||
28339 | } | ||||
28340 | elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype | ||||
28341 | |||||
28342 | if ( $tok eq '(' ) { # got it | ||||
28343 | $identifier .= $tok; | ||||
28344 | $id_scan_state = ')'; # now find the end of it | ||||
28345 | } | ||||
28346 | elsif ( $tok =~ /^\s*$/ ) { # blank - keep going | ||||
28347 | $identifier .= $tok; | ||||
28348 | } | ||||
28349 | else { | ||||
28350 | $id_scan_state = ''; # that's all - no prototype | ||||
28351 | $i = $i_save; | ||||
28352 | last; | ||||
28353 | } | ||||
28354 | } | ||||
28355 | elsif ( $id_scan_state eq ')' ) { # looking for ) to end | ||||
28356 | |||||
28357 | if ( $tok eq ')' ) { # got it | ||||
28358 | $identifier .= $tok; | ||||
28359 | $id_scan_state = ''; # all done | ||||
28360 | last; | ||||
28361 | } | ||||
28362 | elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { | ||||
28363 | $identifier .= $tok; | ||||
28364 | } | ||||
28365 | else { # probable error in script, but keep going | ||||
28366 | warning("Unexpected '$tok' while seeking end of prototype\n"); | ||||
28367 | $identifier .= $tok; | ||||
28368 | } | ||||
28369 | } | ||||
28370 | else { # can get here due to error in initialization | ||||
28371 | $id_scan_state = ''; | ||||
28372 | $i = $i_save; | ||||
28373 | last; | ||||
28374 | } | ||||
28375 | } | ||||
28376 | |||||
28377 | if ( $id_scan_state eq ')' ) { | ||||
28378 | warning("Hit end of line while seeking ) to end prototype\n"); | ||||
28379 | } | ||||
28380 | |||||
28381 | # once we enter the actual identifier, it may not extend beyond | ||||
28382 | # the end of the current line | ||||
28383 | if ( $id_scan_state =~ /^[A\:\(\)]/ ) { | ||||
28384 | $id_scan_state = ''; | ||||
28385 | } | ||||
28386 | if ( $i < 0 ) { $i = 0 } | ||||
28387 | |||||
28388 | unless ($type) { | ||||
28389 | |||||
28390 | if ($saw_type) { | ||||
28391 | |||||
28392 | if ($saw_alpha) { | ||||
28393 | if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { | ||||
28394 | $type = 'w'; | ||||
28395 | } | ||||
28396 | else { $type = 'i' } | ||||
28397 | } | ||||
28398 | elsif ( $identifier eq '->' ) { | ||||
28399 | $type = '->'; | ||||
28400 | } | ||||
28401 | elsif ( | ||||
28402 | ( length($identifier) > 1 ) | ||||
28403 | |||||
28404 | # In something like '@$=' we have an identifier '@$' | ||||
28405 | # In something like '$${' we have type '$$' (and only | ||||
28406 | # part of an identifier) | ||||
28407 | && !( $identifier =~ /\$$/ && $tok eq '{' ) | ||||
28408 | && ( $identifier !~ /^(sub |package )$/ ) | ||||
28409 | ) | ||||
28410 | { | ||||
28411 | $type = 'i'; | ||||
28412 | } | ||||
28413 | else { $type = 't' } | ||||
28414 | } | ||||
28415 | elsif ($saw_alpha) { | ||||
28416 | |||||
28417 | # type 'w' includes anything without leading type info | ||||
28418 | # ($,%,@,*) including something like abc::def::ghi | ||||
28419 | $type = 'w'; | ||||
28420 | } | ||||
28421 | else { | ||||
28422 | $type = ''; | ||||
28423 | } # this can happen on a restart | ||||
28424 | } | ||||
28425 | |||||
28426 | if ($identifier) { | ||||
28427 | $tok = $identifier; | ||||
28428 | if ($message) { write_logfile_entry($message) } | ||||
28429 | } | ||||
28430 | else { | ||||
28431 | $tok = $tok_begin; | ||||
28432 | $i = $i_begin; | ||||
28433 | } | ||||
28434 | |||||
28435 | TOKENIZER_DEBUG_FLAG_SCAN_ID && do { | ||||
28436 | my ( $a, $b, $c ) = caller; | ||||
28437 | print STDOUT | ||||
28438 | "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; | ||||
28439 | print STDOUT | ||||
28440 | "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; | ||||
28441 | }; | ||||
28442 | return ( $i, $tok, $type, $id_scan_state, $identifier ); | ||||
28443 | } | ||||
28444 | |||||
28445 | { | ||||
28446 | |||||
28447 | # saved package and subnames in case prototype is on separate line | ||||
28448 | 2 | 300ns | my ( $package_saved, $subname_saved ); | ||
28449 | |||||
28450 | sub do_scan_sub { | ||||
28451 | |||||
28452 | # do_scan_sub parses a sub name and prototype | ||||
28453 | # it is called with $i_beg equal to the index of the first nonblank | ||||
28454 | # token following a 'sub' token. | ||||
28455 | |||||
28456 | # TODO: add future error checks to be sure we have a valid | ||||
28457 | # sub name. For example, 'sub &doit' is wrong. Also, be sure | ||||
28458 | # a name is given if and only if a non-anonymous sub is | ||||
28459 | # appropriate. | ||||
28460 | # USES GLOBAL VARS: $current_package, $last_nonblank_token, | ||||
28461 | # $in_attribute_list, %saw_function_definition, | ||||
28462 | # $statement_type | ||||
28463 | |||||
28464 | my ( | ||||
28465 | $input_line, $i, $i_beg, | ||||
28466 | $tok, $type, $rtokens, | ||||
28467 | $rtoken_map, $id_scan_state, $max_token_index | ||||
28468 | ) = @_; | ||||
28469 | $id_scan_state = ""; # normally we get everything in one call | ||||
28470 | my $subname = undef; | ||||
28471 | my $package = undef; | ||||
28472 | my $proto = undef; | ||||
28473 | my $attrs = undef; | ||||
28474 | my $match; | ||||
28475 | |||||
28476 | my $pos_beg = $$rtoken_map[$i_beg]; | ||||
28477 | pos($input_line) = $pos_beg; | ||||
28478 | |||||
28479 | # sub NAME PROTO ATTRS | ||||
28480 | if ( | ||||
28481 | $input_line =~ m/\G\s* | ||||
28482 | ((?:\w*(?:'|::))*) # package - something that ends in :: or ' | ||||
28483 | (\w+) # NAME - required | ||||
28484 | (\s*\([^){]*\))? # PROTO - something in parens | ||||
28485 | (\s*:)? # ATTRS - leading : of attribute list | ||||
28486 | /gcx | ||||
28487 | ) | ||||
28488 | { | ||||
28489 | $match = 1; | ||||
28490 | $subname = $2; | ||||
28491 | $proto = $3; | ||||
28492 | $attrs = $4; | ||||
28493 | |||||
28494 | $package = ( defined($1) && $1 ) ? $1 : $current_package; | ||||
28495 | $package =~ s/\'/::/g; | ||||
28496 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | ||||
28497 | $package =~ s/::$//; | ||||
28498 | my $pos = pos($input_line); | ||||
28499 | my $numc = $pos - $pos_beg; | ||||
28500 | $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); | ||||
28501 | $type = 'i'; | ||||
28502 | } | ||||
28503 | |||||
28504 | # Look for prototype/attributes not preceded on this line by subname; | ||||
28505 | # This might be an anonymous sub with attributes, | ||||
28506 | # or a prototype on a separate line from its sub name | ||||
28507 | elsif ( | ||||
28508 | $input_line =~ m/\G(\s*\([^){]*\))? # PROTO | ||||
28509 | (\s*:)? # ATTRS leading ':' | ||||
28510 | /gcx | ||||
28511 | && ( $1 || $2 ) | ||||
28512 | ) | ||||
28513 | { | ||||
28514 | $match = 1; | ||||
28515 | $proto = $1; | ||||
28516 | $attrs = $2; | ||||
28517 | |||||
28518 | # Handle prototype on separate line from subname | ||||
28519 | if ($subname_saved) { | ||||
28520 | $package = $package_saved; | ||||
28521 | $subname = $subname_saved; | ||||
28522 | $tok = $last_nonblank_token; | ||||
28523 | } | ||||
28524 | $type = 'i'; | ||||
28525 | } | ||||
28526 | |||||
28527 | if ($match) { | ||||
28528 | |||||
28529 | # ATTRS: if there are attributes, back up and let the ':' be | ||||
28530 | # found later by the scanner. | ||||
28531 | my $pos = pos($input_line); | ||||
28532 | if ($attrs) { | ||||
28533 | $pos -= length($attrs); | ||||
28534 | } | ||||
28535 | |||||
28536 | my $next_nonblank_token = $tok; | ||||
28537 | |||||
28538 | # catch case of line with leading ATTR ':' after anonymous sub | ||||
28539 | if ( $pos == $pos_beg && $tok eq ':' ) { | ||||
28540 | $type = 'A'; | ||||
28541 | $in_attribute_list = 1; | ||||
28542 | } | ||||
28543 | |||||
28544 | # We must convert back from character position | ||||
28545 | # to pre_token index. | ||||
28546 | else { | ||||
28547 | |||||
28548 | # I don't think an error flag can occur here ..but ? | ||||
28549 | my $error; | ||||
28550 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, | ||||
28551 | $max_token_index ); | ||||
28552 | if ($error) { warning("Possibly invalid sub\n") } | ||||
28553 | |||||
28554 | # check for multiple definitions of a sub | ||||
28555 | ( $next_nonblank_token, my $i_next ) = | ||||
28556 | find_next_nonblank_token_on_this_line( $i, $rtokens, | ||||
28557 | $max_token_index ); | ||||
28558 | } | ||||
28559 | |||||
28560 | if ( $next_nonblank_token =~ /^(\s*|#)$/ ) | ||||
28561 | { # skip blank or side comment | ||||
28562 | my ( $rpre_tokens, $rpre_types ) = | ||||
28563 | peek_ahead_for_n_nonblank_pre_tokens(1); | ||||
28564 | if ( defined($rpre_tokens) && @$rpre_tokens ) { | ||||
28565 | $next_nonblank_token = $rpre_tokens->[0]; | ||||
28566 | } | ||||
28567 | else { | ||||
28568 | $next_nonblank_token = '}'; | ||||
28569 | } | ||||
28570 | } | ||||
28571 | $package_saved = ""; | ||||
28572 | $subname_saved = ""; | ||||
28573 | if ( $next_nonblank_token eq '{' ) { | ||||
28574 | if ($subname) { | ||||
28575 | |||||
28576 | # Check for multiple definitions of a sub, but | ||||
28577 | # it is ok to have multiple sub BEGIN, etc, | ||||
28578 | # so we do not complain if name is all caps | ||||
28579 | if ( $saw_function_definition{$package}{$subname} | ||||
28580 | && $subname !~ /^[A-Z]+$/ ) | ||||
28581 | { | ||||
28582 | my $lno = $saw_function_definition{$package}{$subname}; | ||||
28583 | warning( | ||||
28584 | "already saw definition of 'sub $subname' in package '$package' at line $lno\n" | ||||
28585 | ); | ||||
28586 | } | ||||
28587 | $saw_function_definition{$package}{$subname} = | ||||
28588 | $tokenizer_self->{_last_line_number}; | ||||
28589 | } | ||||
28590 | } | ||||
28591 | elsif ( $next_nonblank_token eq ';' ) { | ||||
28592 | } | ||||
28593 | elsif ( $next_nonblank_token eq '}' ) { | ||||
28594 | } | ||||
28595 | |||||
28596 | # ATTRS - if an attribute list follows, remember the name | ||||
28597 | # of the sub so the next opening brace can be labeled. | ||||
28598 | # Setting 'statement_type' causes any ':'s to introduce | ||||
28599 | # attributes. | ||||
28600 | elsif ( $next_nonblank_token eq ':' ) { | ||||
28601 | $statement_type = $tok; | ||||
28602 | } | ||||
28603 | |||||
28604 | # see if PROTO follows on another line: | ||||
28605 | elsif ( $next_nonblank_token eq '(' ) { | ||||
28606 | if ( $attrs || $proto ) { | ||||
28607 | warning( | ||||
28608 | "unexpected '(' after definition or declaration of sub '$subname'\n" | ||||
28609 | ); | ||||
28610 | } | ||||
28611 | else { | ||||
28612 | $id_scan_state = 'sub'; # we must come back to get proto | ||||
28613 | $statement_type = $tok; | ||||
28614 | $package_saved = $package; | ||||
28615 | $subname_saved = $subname; | ||||
28616 | } | ||||
28617 | } | ||||
28618 | elsif ($next_nonblank_token) { # EOF technically ok | ||||
28619 | warning( | ||||
28620 | "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" | ||||
28621 | ); | ||||
28622 | } | ||||
28623 | check_prototype( $proto, $package, $subname ); | ||||
28624 | } | ||||
28625 | |||||
28626 | # no match but line not blank | ||||
28627 | else { | ||||
28628 | } | ||||
28629 | return ( $i, $tok, $type, $id_scan_state ); | ||||
28630 | } | ||||
28631 | } | ||||
28632 | |||||
28633 | #########i############################################################### | ||||
28634 | # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS | ||||
28635 | ######################################################################### | ||||
28636 | |||||
28637 | sub find_next_nonblank_token { | ||||
28638 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
28639 | |||||
28640 | if ( $i >= $max_token_index ) { | ||||
28641 | if ( !peeked_ahead() ) { | ||||
28642 | peeked_ahead(1); | ||||
28643 | $rtokens = | ||||
28644 | peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); | ||||
28645 | } | ||||
28646 | } | ||||
28647 | my $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
28648 | |||||
28649 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
28650 | $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
28651 | } | ||||
28652 | return ( $next_nonblank_token, $i ); | ||||
28653 | } | ||||
28654 | |||||
28655 | sub numerator_expected { | ||||
28656 | |||||
28657 | # this is a filter for a possible numerator, in support of guessing | ||||
28658 | # for the / pattern delimiter token. | ||||
28659 | # returns - | ||||
28660 | # 1 - yes | ||||
28661 | # 0 - can't tell | ||||
28662 | # -1 - no | ||||
28663 | # Note: I am using the convention that variables ending in | ||||
28664 | # _expected have these 3 possible values. | ||||
28665 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
28666 | my $next_token = $$rtokens[ $i + 1 ]; | ||||
28667 | if ( $next_token eq '=' ) { $i++; } # handle /= | ||||
28668 | my ( $next_nonblank_token, $i_next ) = | ||||
28669 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
28670 | |||||
28671 | if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { | ||||
28672 | 1; | ||||
28673 | } | ||||
28674 | else { | ||||
28675 | |||||
28676 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
28677 | 0; | ||||
28678 | } | ||||
28679 | else { | ||||
28680 | -1; | ||||
28681 | } | ||||
28682 | } | ||||
28683 | } | ||||
28684 | |||||
28685 | sub pattern_expected { | ||||
28686 | |||||
28687 | # This is the start of a filter for a possible pattern. | ||||
28688 | # It looks at the token after a possible pattern and tries to | ||||
28689 | # determine if that token could end a pattern. | ||||
28690 | # returns - | ||||
28691 | # 1 - yes | ||||
28692 | # 0 - can't tell | ||||
28693 | # -1 - no | ||||
28694 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
28695 | my $next_token = $$rtokens[ $i + 1 ]; | ||||
28696 | if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier | ||||
28697 | my ( $next_nonblank_token, $i_next ) = | ||||
28698 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
28699 | |||||
28700 | # list of tokens which may follow a pattern | ||||
28701 | # (can probably be expanded) | ||||
28702 | if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) | ||||
28703 | { | ||||
28704 | 1; | ||||
28705 | } | ||||
28706 | else { | ||||
28707 | |||||
28708 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
28709 | 0; | ||||
28710 | } | ||||
28711 | else { | ||||
28712 | -1; | ||||
28713 | } | ||||
28714 | } | ||||
28715 | } | ||||
28716 | |||||
28717 | sub find_next_nonblank_token_on_this_line { | ||||
28718 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
28719 | my $next_nonblank_token; | ||||
28720 | |||||
28721 | if ( $i < $max_token_index ) { | ||||
28722 | $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
28723 | |||||
28724 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
28725 | |||||
28726 | if ( $i < $max_token_index ) { | ||||
28727 | $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
28728 | } | ||||
28729 | } | ||||
28730 | } | ||||
28731 | else { | ||||
28732 | $next_nonblank_token = ""; | ||||
28733 | } | ||||
28734 | return ( $next_nonblank_token, $i ); | ||||
28735 | } | ||||
28736 | |||||
28737 | sub find_angle_operator_termination { | ||||
28738 | |||||
28739 | # We are looking at a '<' and want to know if it is an angle operator. | ||||
28740 | # We are to return: | ||||
28741 | # $i = pretoken index of ending '>' if found, current $i otherwise | ||||
28742 | # $type = 'Q' if found, '>' otherwise | ||||
28743 | my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; | ||||
28744 | my $i = $i_beg; | ||||
28745 | my $type = '<'; | ||||
28746 | pos($input_line) = 1 + $$rtoken_map[$i]; | ||||
28747 | |||||
28748 | my $filter; | ||||
28749 | |||||
28750 | # we just have to find the next '>' if a term is expected | ||||
28751 | if ( $expecting == TERM ) { $filter = '[\>]' } | ||||
28752 | |||||
28753 | # we have to guess if we don't know what is expected | ||||
28754 | elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } | ||||
28755 | |||||
28756 | # shouldn't happen - we shouldn't be here if operator is expected | ||||
28757 | else { warning("Program Bug in find_angle_operator_termination\n") } | ||||
28758 | |||||
28759 | # To illustrate what we might be looking at, in case we are | ||||
28760 | # guessing, here are some examples of valid angle operators | ||||
28761 | # (or file globs): | ||||
28762 | # <tmp_imp/*> | ||||
28763 | # <FH> | ||||
28764 | # <$fh> | ||||
28765 | # <*.c *.h> | ||||
28766 | # <_> | ||||
28767 | # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) | ||||
28768 | # <${PREFIX}*img*.$IMAGE_TYPE> | ||||
28769 | # <img*.$IMAGE_TYPE> | ||||
28770 | # <Timg*.$IMAGE_TYPE> | ||||
28771 | # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> | ||||
28772 | # | ||||
28773 | # Here are some examples of lines which do not have angle operators: | ||||
28774 | # return undef unless $self->[2]++ < $#{$self->[1]}; | ||||
28775 | # < 2 || @$t > | ||||
28776 | # | ||||
28777 | # the following line from dlister.pl caused trouble: | ||||
28778 | # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; | ||||
28779 | # | ||||
28780 | # If the '<' starts an angle operator, it must end on this line and | ||||
28781 | # it must not have certain characters like ';' and '=' in it. I use | ||||
28782 | # this to limit the testing. This filter should be improved if | ||||
28783 | # possible. | ||||
28784 | |||||
28785 | if ( $input_line =~ /($filter)/g ) { | ||||
28786 | |||||
28787 | if ( $1 eq '>' ) { | ||||
28788 | |||||
28789 | # We MAY have found an angle operator termination if we get | ||||
28790 | # here, but we need to do more to be sure we haven't been | ||||
28791 | # fooled. | ||||
28792 | my $pos = pos($input_line); | ||||
28793 | |||||
28794 | my $pos_beg = $$rtoken_map[$i]; | ||||
28795 | my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); | ||||
28796 | |||||
28797 | # Reject if the closing '>' follows a '-' as in: | ||||
28798 | # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } | ||||
28799 | if ( $expecting eq UNKNOWN ) { | ||||
28800 | my $check = substr( $input_line, $pos - 2, 1 ); | ||||
28801 | if ( $check eq '-' ) { | ||||
28802 | return ( $i, $type ); | ||||
28803 | } | ||||
28804 | } | ||||
28805 | |||||
28806 | ######################################debug##### | ||||
28807 | #write_diagnostics( "ANGLE? :$str\n"); | ||||
28808 | #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; | ||||
28809 | ######################################debug##### | ||||
28810 | $type = 'Q'; | ||||
28811 | my $error; | ||||
28812 | ( $i, $error ) = | ||||
28813 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
28814 | |||||
28815 | # It may be possible that a quote ends midway in a pretoken. | ||||
28816 | # If this happens, it may be necessary to split the pretoken. | ||||
28817 | if ($error) { | ||||
28818 | warning( | ||||
28819 | "Possible tokinization error..please check this line\n"); | ||||
28820 | report_possible_bug(); | ||||
28821 | } | ||||
28822 | |||||
28823 | # Now let's see where we stand.... | ||||
28824 | # OK if math op not possible | ||||
28825 | if ( $expecting == TERM ) { | ||||
28826 | } | ||||
28827 | |||||
28828 | # OK if there are no more than 2 pre-tokens inside | ||||
28829 | # (not possible to write 2 token math between < and >) | ||||
28830 | # This catches most common cases | ||||
28831 | elsif ( $i <= $i_beg + 3 ) { | ||||
28832 | write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); | ||||
28833 | } | ||||
28834 | |||||
28835 | # Not sure.. | ||||
28836 | else { | ||||
28837 | |||||
28838 | # Let's try a Brace Test: any braces inside must balance | ||||
28839 | my $br = 0; | ||||
28840 | while ( $str =~ /\{/g ) { $br++ } | ||||
28841 | while ( $str =~ /\}/g ) { $br-- } | ||||
28842 | my $sb = 0; | ||||
28843 | while ( $str =~ /\[/g ) { $sb++ } | ||||
28844 | while ( $str =~ /\]/g ) { $sb-- } | ||||
28845 | my $pr = 0; | ||||
28846 | while ( $str =~ /\(/g ) { $pr++ } | ||||
28847 | while ( $str =~ /\)/g ) { $pr-- } | ||||
28848 | |||||
28849 | # if braces do not balance - not angle operator | ||||
28850 | if ( $br || $sb || $pr ) { | ||||
28851 | $i = $i_beg; | ||||
28852 | $type = '<'; | ||||
28853 | write_diagnostics( | ||||
28854 | "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); | ||||
28855 | } | ||||
28856 | |||||
28857 | # we should keep doing more checks here...to be continued | ||||
28858 | # Tentatively accepting this as a valid angle operator. | ||||
28859 | # There are lots more things that can be checked. | ||||
28860 | else { | ||||
28861 | write_diagnostics( | ||||
28862 | "ANGLE-Guessing yes: $str expecting=$expecting\n"); | ||||
28863 | write_logfile_entry("Guessing angle operator here: $str\n"); | ||||
28864 | } | ||||
28865 | } | ||||
28866 | } | ||||
28867 | |||||
28868 | # didn't find ending > | ||||
28869 | else { | ||||
28870 | if ( $expecting == TERM ) { | ||||
28871 | warning("No ending > for angle operator\n"); | ||||
28872 | } | ||||
28873 | } | ||||
28874 | } | ||||
28875 | return ( $i, $type ); | ||||
28876 | } | ||||
28877 | |||||
28878 | sub scan_number_do { | ||||
28879 | |||||
28880 | # scan a number in any of the formats that Perl accepts | ||||
28881 | # Underbars (_) are allowed in decimal numbers. | ||||
28882 | # input parameters - | ||||
28883 | # $input_line - the string to scan | ||||
28884 | # $i - pre_token index to start scanning | ||||
28885 | # $rtoken_map - reference to the pre_token map giving starting | ||||
28886 | # character position in $input_line of token $i | ||||
28887 | # output parameters - | ||||
28888 | # $i - last pre_token index of the number just scanned | ||||
28889 | # number - the number (characters); or undef if not a number | ||||
28890 | |||||
28891 | my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; | ||||
28892 | my $pos_beg = $$rtoken_map[$i]; | ||||
28893 | my $pos; | ||||
28894 | my $i_begin = $i; | ||||
28895 | my $number = undef; | ||||
28896 | my $type = $input_type; | ||||
28897 | |||||
28898 | my $first_char = substr( $input_line, $pos_beg, 1 ); | ||||
28899 | |||||
28900 | # Look for bad starting characters; Shouldn't happen.. | ||||
28901 | if ( $first_char !~ /[\d\.\+\-Ee]/ ) { | ||||
28902 | warning("Program bug - scan_number given character $first_char\n"); | ||||
28903 | report_definite_bug(); | ||||
28904 | return ( $i, $type, $number ); | ||||
28905 | } | ||||
28906 | |||||
28907 | # handle v-string without leading 'v' character ('Two Dot' rule) | ||||
28908 | # (vstring.t) | ||||
28909 | # TODO: v-strings may contain underscores | ||||
28910 | pos($input_line) = $pos_beg; | ||||
28911 | if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { | ||||
28912 | $pos = pos($input_line); | ||||
28913 | my $numc = $pos - $pos_beg; | ||||
28914 | $number = substr( $input_line, $pos_beg, $numc ); | ||||
28915 | $type = 'v'; | ||||
28916 | report_v_string($number); | ||||
28917 | } | ||||
28918 | |||||
28919 | # handle octal, hex, binary | ||||
28920 | if ( !defined($number) ) { | ||||
28921 | pos($input_line) = $pos_beg; | ||||
28922 | if ( $input_line =~ | ||||
28923 | /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) | ||||
28924 | { | ||||
28925 | $pos = pos($input_line); | ||||
28926 | my $numc = $pos - $pos_beg; | ||||
28927 | $number = substr( $input_line, $pos_beg, $numc ); | ||||
28928 | $type = 'n'; | ||||
28929 | } | ||||
28930 | } | ||||
28931 | |||||
28932 | # handle decimal | ||||
28933 | if ( !defined($number) ) { | ||||
28934 | pos($input_line) = $pos_beg; | ||||
28935 | |||||
28936 | if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { | ||||
28937 | $pos = pos($input_line); | ||||
28938 | |||||
28939 | # watch out for things like 0..40 which would give 0. by this; | ||||
28940 | if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) | ||||
28941 | && ( substr( $input_line, $pos, 1 ) eq '.' ) ) | ||||
28942 | { | ||||
28943 | $pos--; | ||||
28944 | } | ||||
28945 | my $numc = $pos - $pos_beg; | ||||
28946 | $number = substr( $input_line, $pos_beg, $numc ); | ||||
28947 | $type = 'n'; | ||||
28948 | } | ||||
28949 | } | ||||
28950 | |||||
28951 | # filter out non-numbers like e + - . e2 .e3 +e6 | ||||
28952 | # the rule: at least one digit, and any 'e' must be preceded by a digit | ||||
28953 | if ( | ||||
28954 | $number !~ /\d/ # no digits | ||||
28955 | || ( $number =~ /^(.*)[eE]/ | ||||
28956 | && $1 !~ /\d/ ) # or no digits before the 'e' | ||||
28957 | ) | ||||
28958 | { | ||||
28959 | $number = undef; | ||||
28960 | $type = $input_type; | ||||
28961 | return ( $i, $type, $number ); | ||||
28962 | } | ||||
28963 | |||||
28964 | # Found a number; now we must convert back from character position | ||||
28965 | # to pre_token index. An error here implies user syntax error. | ||||
28966 | # An example would be an invalid octal number like '009'. | ||||
28967 | my $error; | ||||
28968 | ( $i, $error ) = | ||||
28969 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
28970 | if ($error) { warning("Possibly invalid number\n") } | ||||
28971 | |||||
28972 | return ( $i, $type, $number ); | ||||
28973 | } | ||||
28974 | |||||
28975 | sub inverse_pretoken_map { | ||||
28976 | |||||
28977 | # Starting with the current pre_token index $i, scan forward until | ||||
28978 | # finding the index of the next pre_token whose position is $pos. | ||||
28979 | my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; | ||||
28980 | my $error = 0; | ||||
28981 | |||||
28982 | while ( ++$i <= $max_token_index ) { | ||||
28983 | |||||
28984 | if ( $pos <= $$rtoken_map[$i] ) { | ||||
28985 | |||||
28986 | # Let the calling routine handle errors in which we do not | ||||
28987 | # land on a pre-token boundary. It can happen by running | ||||
28988 | # perltidy on some non-perl scripts, for example. | ||||
28989 | if ( $pos < $$rtoken_map[$i] ) { $error = 1 } | ||||
28990 | $i--; | ||||
28991 | last; | ||||
28992 | } | ||||
28993 | } | ||||
28994 | return ( $i, $error ); | ||||
28995 | } | ||||
28996 | |||||
28997 | sub find_here_doc { | ||||
28998 | |||||
28999 | # find the target of a here document, if any | ||||
29000 | # input parameters: | ||||
29001 | # $i - token index of the second < of << | ||||
29002 | # ($i must be less than the last token index if this is called) | ||||
29003 | # output parameters: | ||||
29004 | # $found_target = 0 didn't find target; =1 found target | ||||
29005 | # HERE_TARGET - the target string (may be empty string) | ||||
29006 | # $i - unchanged if not here doc, | ||||
29007 | # or index of the last token of the here target | ||||
29008 | # $saw_error - flag noting unbalanced quote on here target | ||||
29009 | my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; | ||||
29010 | my $ibeg = $i; | ||||
29011 | my $found_target = 0; | ||||
29012 | my $here_doc_target = ''; | ||||
29013 | my $here_quote_character = ''; | ||||
29014 | my $saw_error = 0; | ||||
29015 | my ( $next_nonblank_token, $i_next_nonblank, $next_token ); | ||||
29016 | $next_token = $$rtokens[ $i + 1 ]; | ||||
29017 | |||||
29018 | # perl allows a backslash before the target string (heredoc.t) | ||||
29019 | my $backslash = 0; | ||||
29020 | if ( $next_token eq '\\' ) { | ||||
29021 | $backslash = 1; | ||||
29022 | $next_token = $$rtokens[ $i + 2 ]; | ||||
29023 | } | ||||
29024 | |||||
29025 | ( $next_nonblank_token, $i_next_nonblank ) = | ||||
29026 | find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); | ||||
29027 | |||||
29028 | if ( $next_nonblank_token =~ /[\'\"\`]/ ) { | ||||
29029 | |||||
29030 | my $in_quote = 1; | ||||
29031 | my $quote_depth = 0; | ||||
29032 | my $quote_pos = 0; | ||||
29033 | my $quoted_string; | ||||
29034 | |||||
29035 | ( | ||||
29036 | $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, | ||||
29037 | $quoted_string | ||||
29038 | ) | ||||
29039 | = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, | ||||
29040 | $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); | ||||
29041 | |||||
29042 | if ($in_quote) { # didn't find end of quote, so no target found | ||||
29043 | $i = $ibeg; | ||||
29044 | if ( $expecting == TERM ) { | ||||
29045 | warning( | ||||
29046 | "Did not find here-doc string terminator ($here_quote_character) before end of line \n" | ||||
29047 | ); | ||||
29048 | $saw_error = 1; | ||||
29049 | } | ||||
29050 | } | ||||
29051 | else { # found ending quote | ||||
29052 | my $j; | ||||
29053 | $found_target = 1; | ||||
29054 | |||||
29055 | my $tokj; | ||||
29056 | for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { | ||||
29057 | $tokj = $$rtokens[$j]; | ||||
29058 | |||||
29059 | # we have to remove any backslash before the quote character | ||||
29060 | # so that the here-doc-target exactly matches this string | ||||
29061 | next | ||||
29062 | if ( $tokj eq "\\" | ||||
29063 | && $j < $i - 1 | ||||
29064 | && $$rtokens[ $j + 1 ] eq $here_quote_character ); | ||||
29065 | $here_doc_target .= $tokj; | ||||
29066 | } | ||||
29067 | } | ||||
29068 | } | ||||
29069 | |||||
29070 | elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { | ||||
29071 | $found_target = 1; | ||||
29072 | write_logfile_entry( | ||||
29073 | "found blank here-target after <<; suggest using \"\"\n"); | ||||
29074 | $i = $ibeg; | ||||
29075 | } | ||||
29076 | elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << | ||||
29077 | |||||
29078 | my $here_doc_expected; | ||||
29079 | if ( $expecting == UNKNOWN ) { | ||||
29080 | $here_doc_expected = guess_if_here_doc($next_token); | ||||
29081 | } | ||||
29082 | else { | ||||
29083 | $here_doc_expected = 1; | ||||
29084 | } | ||||
29085 | |||||
29086 | if ($here_doc_expected) { | ||||
29087 | $found_target = 1; | ||||
29088 | $here_doc_target = $next_token; | ||||
29089 | $i = $ibeg + 1; | ||||
29090 | } | ||||
29091 | |||||
29092 | } | ||||
29093 | else { | ||||
29094 | |||||
29095 | if ( $expecting == TERM ) { | ||||
29096 | $found_target = 1; | ||||
29097 | write_logfile_entry("Note: bare here-doc operator <<\n"); | ||||
29098 | } | ||||
29099 | else { | ||||
29100 | $i = $ibeg; | ||||
29101 | } | ||||
29102 | } | ||||
29103 | |||||
29104 | # patch to neglect any prepended backslash | ||||
29105 | if ( $found_target && $backslash ) { $i++ } | ||||
29106 | |||||
29107 | return ( $found_target, $here_doc_target, $here_quote_character, $i, | ||||
29108 | $saw_error ); | ||||
29109 | } | ||||
29110 | |||||
29111 | sub do_quote { | ||||
29112 | |||||
29113 | # follow (or continue following) quoted string(s) | ||||
29114 | # $in_quote return code: | ||||
29115 | # 0 - ok, found end | ||||
29116 | # 1 - still must find end of quote whose target is $quote_character | ||||
29117 | # 2 - still looking for end of first of two quotes | ||||
29118 | # | ||||
29119 | # Returns updated strings: | ||||
29120 | # $quoted_string_1 = quoted string seen while in_quote=1 | ||||
29121 | # $quoted_string_2 = quoted string seen while in_quote=2 | ||||
29122 | my ( | ||||
29123 | $i, $in_quote, $quote_character, | ||||
29124 | $quote_pos, $quote_depth, $quoted_string_1, | ||||
29125 | $quoted_string_2, $rtokens, $rtoken_map, | ||||
29126 | $max_token_index | ||||
29127 | ) = @_; | ||||
29128 | |||||
29129 | my $in_quote_starting = $in_quote; | ||||
29130 | |||||
29131 | my $quoted_string; | ||||
29132 | if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow | ||||
29133 | my $ibeg = $i; | ||||
29134 | ( | ||||
29135 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
29136 | $quoted_string | ||||
29137 | ) | ||||
29138 | = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, | ||||
29139 | $quote_pos, $quote_depth, $max_token_index ); | ||||
29140 | $quoted_string_2 .= $quoted_string; | ||||
29141 | if ( $in_quote == 1 ) { | ||||
29142 | if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } | ||||
29143 | $quote_character = ''; | ||||
29144 | } | ||||
29145 | else { | ||||
29146 | $quoted_string_2 .= "\n"; | ||||
29147 | } | ||||
29148 | } | ||||
29149 | |||||
29150 | if ( $in_quote == 1 ) { # one (more) quote to follow | ||||
29151 | my $ibeg = $i; | ||||
29152 | ( | ||||
29153 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
29154 | $quoted_string | ||||
29155 | ) | ||||
29156 | = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | ||||
29157 | $quote_pos, $quote_depth, $max_token_index ); | ||||
29158 | $quoted_string_1 .= $quoted_string; | ||||
29159 | if ( $in_quote == 1 ) { | ||||
29160 | $quoted_string_1 .= "\n"; | ||||
29161 | } | ||||
29162 | } | ||||
29163 | return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
29164 | $quoted_string_1, $quoted_string_2 ); | ||||
29165 | } | ||||
29166 | |||||
29167 | sub follow_quoted_string { | ||||
29168 | |||||
29169 | # scan for a specific token, skipping escaped characters | ||||
29170 | # if the quote character is blank, use the first non-blank character | ||||
29171 | # input parameters: | ||||
29172 | # $rtokens = reference to the array of tokens | ||||
29173 | # $i = the token index of the first character to search | ||||
29174 | # $in_quote = number of quoted strings being followed | ||||
29175 | # $beginning_tok = the starting quote character | ||||
29176 | # $quote_pos = index to check next for alphanumeric delimiter | ||||
29177 | # output parameters: | ||||
29178 | # $i = the token index of the ending quote character | ||||
29179 | # $in_quote = decremented if found end, unchanged if not | ||||
29180 | # $beginning_tok = the starting quote character | ||||
29181 | # $quote_pos = index to check next for alphanumeric delimiter | ||||
29182 | # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. | ||||
29183 | # $quoted_string = the text of the quote (without quotation tokens) | ||||
29184 | my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, | ||||
29185 | $max_token_index ) | ||||
29186 | = @_; | ||||
29187 | my ( $tok, $end_tok ); | ||||
29188 | my $i = $i_beg - 1; | ||||
29189 | my $quoted_string = ""; | ||||
29190 | |||||
29191 | TOKENIZER_DEBUG_FLAG_QUOTE && do { | ||||
29192 | print STDOUT | ||||
29193 | "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; | ||||
29194 | }; | ||||
29195 | |||||
29196 | # get the corresponding end token | ||||
29197 | if ( $beginning_tok !~ /^\s*$/ ) { | ||||
29198 | $end_tok = matching_end_token($beginning_tok); | ||||
29199 | } | ||||
29200 | |||||
29201 | # a blank token means we must find and use the first non-blank one | ||||
29202 | else { | ||||
29203 | my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> | ||||
29204 | |||||
29205 | while ( $i < $max_token_index ) { | ||||
29206 | $tok = $$rtokens[ ++$i ]; | ||||
29207 | |||||
29208 | if ( $tok !~ /^\s*$/ ) { | ||||
29209 | |||||
29210 | if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { | ||||
29211 | $i = $max_token_index; | ||||
29212 | } | ||||
29213 | else { | ||||
29214 | |||||
29215 | if ( length($tok) > 1 ) { | ||||
29216 | if ( $quote_pos <= 0 ) { $quote_pos = 1 } | ||||
29217 | $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); | ||||
29218 | } | ||||
29219 | else { | ||||
29220 | $beginning_tok = $tok; | ||||
29221 | $quote_pos = 0; | ||||
29222 | } | ||||
29223 | $end_tok = matching_end_token($beginning_tok); | ||||
29224 | $quote_depth = 1; | ||||
29225 | last; | ||||
29226 | } | ||||
29227 | } | ||||
29228 | else { | ||||
29229 | $allow_quote_comments = 1; | ||||
29230 | } | ||||
29231 | } | ||||
29232 | } | ||||
29233 | |||||
29234 | # There are two different loops which search for the ending quote | ||||
29235 | # character. In the rare case of an alphanumeric quote delimiter, we | ||||
29236 | # have to look through alphanumeric tokens character-by-character, since | ||||
29237 | # the pre-tokenization process combines multiple alphanumeric | ||||
29238 | # characters, whereas for a non-alphanumeric delimiter, only tokens of | ||||
29239 | # length 1 can match. | ||||
29240 | |||||
29241 | ################################################################### | ||||
29242 | # Case 1 (rare): loop for case of alphanumeric quote delimiter.. | ||||
29243 | # "quote_pos" is the position the current word to begin searching | ||||
29244 | ################################################################### | ||||
29245 | if ( $beginning_tok =~ /\w/ ) { | ||||
29246 | |||||
29247 | # Note this because it is not recommended practice except | ||||
29248 | # for obfuscated perl contests | ||||
29249 | if ( $in_quote == 1 ) { | ||||
29250 | write_logfile_entry( | ||||
29251 | "Note: alphanumeric quote delimiter ($beginning_tok) \n"); | ||||
29252 | } | ||||
29253 | |||||
29254 | while ( $i < $max_token_index ) { | ||||
29255 | |||||
29256 | if ( $quote_pos == 0 || ( $i < 0 ) ) { | ||||
29257 | $tok = $$rtokens[ ++$i ]; | ||||
29258 | |||||
29259 | if ( $tok eq '\\' ) { | ||||
29260 | |||||
29261 | # retain backslash unless it hides the end token | ||||
29262 | $quoted_string .= $tok | ||||
29263 | unless $$rtokens[ $i + 1 ] eq $end_tok; | ||||
29264 | $quote_pos++; | ||||
29265 | last if ( $i >= $max_token_index ); | ||||
29266 | $tok = $$rtokens[ ++$i ]; | ||||
29267 | } | ||||
29268 | } | ||||
29269 | my $old_pos = $quote_pos; | ||||
29270 | |||||
29271 | unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) | ||||
29272 | { | ||||
29273 | |||||
29274 | } | ||||
29275 | $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); | ||||
29276 | |||||
29277 | if ( $quote_pos > 0 ) { | ||||
29278 | |||||
29279 | $quoted_string .= | ||||
29280 | substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); | ||||
29281 | |||||
29282 | $quote_depth--; | ||||
29283 | |||||
29284 | if ( $quote_depth == 0 ) { | ||||
29285 | $in_quote--; | ||||
29286 | last; | ||||
29287 | } | ||||
29288 | } | ||||
29289 | else { | ||||
29290 | $quoted_string .= substr( $tok, $old_pos ); | ||||
29291 | } | ||||
29292 | } | ||||
29293 | } | ||||
29294 | |||||
29295 | ######################################################################## | ||||
29296 | # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. | ||||
29297 | ######################################################################## | ||||
29298 | else { | ||||
29299 | |||||
29300 | while ( $i < $max_token_index ) { | ||||
29301 | $tok = $$rtokens[ ++$i ]; | ||||
29302 | |||||
29303 | if ( $tok eq $end_tok ) { | ||||
29304 | $quote_depth--; | ||||
29305 | |||||
29306 | if ( $quote_depth == 0 ) { | ||||
29307 | $in_quote--; | ||||
29308 | last; | ||||
29309 | } | ||||
29310 | } | ||||
29311 | elsif ( $tok eq $beginning_tok ) { | ||||
29312 | $quote_depth++; | ||||
29313 | } | ||||
29314 | elsif ( $tok eq '\\' ) { | ||||
29315 | |||||
29316 | # retain backslash unless it hides the beginning or end token | ||||
29317 | $tok = $$rtokens[ ++$i ]; | ||||
29318 | $quoted_string .= '\\' | ||||
29319 | unless ( $tok eq $end_tok || $tok eq $beginning_tok ); | ||||
29320 | } | ||||
29321 | $quoted_string .= $tok; | ||||
29322 | } | ||||
29323 | } | ||||
29324 | if ( $i > $max_token_index ) { $i = $max_token_index } | ||||
29325 | return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, | ||||
29326 | $quoted_string ); | ||||
29327 | } | ||||
29328 | |||||
29329 | sub indicate_error { | ||||
29330 | my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; | ||||
29331 | interrupt_logfile(); | ||||
29332 | warning($msg); | ||||
29333 | write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); | ||||
29334 | resume_logfile(); | ||||
29335 | } | ||||
29336 | |||||
29337 | sub write_error_indicator_pair { | ||||
29338 | my ( $line_number, $input_line, $pos, $carrat ) = @_; | ||||
29339 | my ( $offset, $numbered_line, $underline ) = | ||||
29340 | make_numbered_line( $line_number, $input_line, $pos ); | ||||
29341 | $underline = write_on_underline( $underline, $pos - $offset, $carrat ); | ||||
29342 | warning( $numbered_line . "\n" ); | ||||
29343 | $underline =~ s/\s*$//; | ||||
29344 | warning( $underline . "\n" ); | ||||
29345 | } | ||||
29346 | |||||
29347 | sub make_numbered_line { | ||||
29348 | |||||
29349 | # Given an input line, its line number, and a character position of | ||||
29350 | # interest, create a string not longer than 80 characters of the form | ||||
29351 | # $lineno: sub_string | ||||
29352 | # such that the sub_string of $str contains the position of interest | ||||
29353 | # | ||||
29354 | # Here is an example of what we want, in this case we add trailing | ||||
29355 | # '...' because the line is long. | ||||
29356 | # | ||||
29357 | # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... | ||||
29358 | # | ||||
29359 | # Here is another example, this time in which we used leading '...' | ||||
29360 | # because of excessive length: | ||||
29361 | # | ||||
29362 | # 2: ... er of the World Wide Web Consortium's | ||||
29363 | # | ||||
29364 | # input parameters are: | ||||
29365 | # $lineno = line number | ||||
29366 | # $str = the text of the line | ||||
29367 | # $pos = position of interest (the error) : 0 = first character | ||||
29368 | # | ||||
29369 | # We return : | ||||
29370 | # - $offset = an offset which corrects the position in case we only | ||||
29371 | # display part of a line, such that $pos-$offset is the effective | ||||
29372 | # position from the start of the displayed line. | ||||
29373 | # - $numbered_line = the numbered line as above, | ||||
29374 | # - $underline = a blank 'underline' which is all spaces with the same | ||||
29375 | # number of characters as the numbered line. | ||||
29376 | |||||
29377 | my ( $lineno, $str, $pos ) = @_; | ||||
29378 | my $offset = ( $pos < 60 ) ? 0 : $pos - 40; | ||||
29379 | my $excess = length($str) - $offset - 68; | ||||
29380 | my $numc = ( $excess > 0 ) ? 68 : undef; | ||||
29381 | |||||
29382 | if ( defined($numc) ) { | ||||
29383 | if ( $offset == 0 ) { | ||||
29384 | $str = substr( $str, $offset, $numc - 4 ) . " ..."; | ||||
29385 | } | ||||
29386 | else { | ||||
29387 | $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; | ||||
29388 | } | ||||
29389 | } | ||||
29390 | else { | ||||
29391 | |||||
29392 | if ( $offset == 0 ) { | ||||
29393 | } | ||||
29394 | else { | ||||
29395 | $str = "... " . substr( $str, $offset + 4 ); | ||||
29396 | } | ||||
29397 | } | ||||
29398 | |||||
29399 | my $numbered_line = sprintf( "%d: ", $lineno ); | ||||
29400 | $offset -= length($numbered_line); | ||||
29401 | $numbered_line .= $str; | ||||
29402 | my $underline = " " x length($numbered_line); | ||||
29403 | return ( $offset, $numbered_line, $underline ); | ||||
29404 | } | ||||
29405 | |||||
29406 | sub write_on_underline { | ||||
29407 | |||||
29408 | # The "underline" is a string that shows where an error is; it starts | ||||
29409 | # out as a string of blanks with the same length as the numbered line of | ||||
29410 | # code above it, and we have to add marking to show where an error is. | ||||
29411 | # In the example below, we want to write the string '--^' just below | ||||
29412 | # the line of bad code: | ||||
29413 | # | ||||
29414 | # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... | ||||
29415 | # ---^ | ||||
29416 | # We are given the current underline string, plus a position and a | ||||
29417 | # string to write on it. | ||||
29418 | # | ||||
29419 | # In the above example, there will be 2 calls to do this: | ||||
29420 | # First call: $pos=19, pos_chr=^ | ||||
29421 | # Second call: $pos=16, pos_chr=--- | ||||
29422 | # | ||||
29423 | # This is a trivial thing to do with substr, but there is some | ||||
29424 | # checking to do. | ||||
29425 | |||||
29426 | my ( $underline, $pos, $pos_chr ) = @_; | ||||
29427 | |||||
29428 | # check for error..shouldn't happen | ||||
29429 | unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { | ||||
29430 | return $underline; | ||||
29431 | } | ||||
29432 | my $excess = length($pos_chr) + $pos - length($underline); | ||||
29433 | if ( $excess > 0 ) { | ||||
29434 | $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); | ||||
29435 | } | ||||
29436 | substr( $underline, $pos, length($pos_chr) ) = $pos_chr; | ||||
29437 | return ($underline); | ||||
29438 | } | ||||
29439 | |||||
29440 | sub pre_tokenize { | ||||
29441 | |||||
29442 | # Break a string, $str, into a sequence of preliminary tokens. We | ||||
29443 | # are interested in these types of tokens: | ||||
29444 | # words (type='w'), example: 'max_tokens_wanted' | ||||
29445 | # digits (type = 'd'), example: '0755' | ||||
29446 | # whitespace (type = 'b'), example: ' ' | ||||
29447 | # any other single character (i.e. punct; type = the character itself). | ||||
29448 | # We cannot do better than this yet because we might be in a quoted | ||||
29449 | # string or pattern. Caller sets $max_tokens_wanted to 0 to get all | ||||
29450 | # tokens. | ||||
29451 | my ( $str, $max_tokens_wanted ) = @_; | ||||
29452 | |||||
29453 | # we return references to these 3 arrays: | ||||
29454 | my @tokens = (); # array of the tokens themselves | ||||
29455 | my @token_map = (0); # string position of start of each token | ||||
29456 | my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct | ||||
29457 | |||||
29458 | do { | ||||
29459 | |||||
29460 | # whitespace | ||||
29461 | if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } | ||||
29462 | |||||
29463 | # numbers | ||||
29464 | # note that this must come before words! | ||||
29465 | elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } | ||||
29466 | |||||
29467 | # words | ||||
29468 | elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } | ||||
29469 | |||||
29470 | # single-character punctuation | ||||
29471 | elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } | ||||
29472 | |||||
29473 | # that's all.. | ||||
29474 | else { | ||||
29475 | return ( \@tokens, \@token_map, \@type ); | ||||
29476 | } | ||||
29477 | |||||
29478 | push @tokens, $1; | ||||
29479 | push @token_map, pos($str); | ||||
29480 | |||||
29481 | } while ( --$max_tokens_wanted != 0 ); | ||||
29482 | |||||
29483 | return ( \@tokens, \@token_map, \@type ); | ||||
29484 | } | ||||
29485 | |||||
29486 | sub show_tokens { | ||||
29487 | |||||
29488 | # this is an old debug routine | ||||
29489 | my ( $rtokens, $rtoken_map ) = @_; | ||||
29490 | my $num = scalar(@$rtokens); | ||||
29491 | my $i; | ||||
29492 | |||||
29493 | for ( $i = 0 ; $i < $num ; $i++ ) { | ||||
29494 | my $len = length( $$rtokens[$i] ); | ||||
29495 | print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; | ||||
29496 | } | ||||
29497 | } | ||||
29498 | |||||
29499 | sub matching_end_token { | ||||
29500 | |||||
29501 | # find closing character for a pattern | ||||
29502 | my $beginning_token = shift; | ||||
29503 | |||||
29504 | if ( $beginning_token eq '{' ) { | ||||
29505 | '}'; | ||||
29506 | } | ||||
29507 | elsif ( $beginning_token eq '[' ) { | ||||
29508 | ']'; | ||||
29509 | } | ||||
29510 | elsif ( $beginning_token eq '<' ) { | ||||
29511 | '>'; | ||||
29512 | } | ||||
29513 | elsif ( $beginning_token eq '(' ) { | ||||
29514 | ')'; | ||||
29515 | } | ||||
29516 | else { | ||||
29517 | $beginning_token; | ||||
29518 | } | ||||
29519 | } | ||||
29520 | |||||
29521 | sub dump_token_types { | ||||
29522 | my $class = shift; | ||||
29523 | my $fh = shift; | ||||
29524 | |||||
29525 | # This should be the latest list of token types in use | ||||
29526 | # adding NEW_TOKENS: add a comment here | ||||
29527 | print $fh <<'END_OF_LIST'; | ||||
29528 | |||||
29529 | Here is a list of the token types currently used for lines of type 'CODE'. | ||||
29530 | For the following tokens, the "type" of a token is just the token itself. | ||||
29531 | |||||
29532 | .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> | ||||
29533 | ( ) <= >= == =~ !~ != ++ -- /= x= | ||||
29534 | ... **= <<= >>= &&= ||= //= <=> | ||||
29535 | , + - / * | % ! x ~ = \ ? : . < > ^ & | ||||
29536 | |||||
29537 | The following additional token types are defined: | ||||
29538 | |||||
29539 | type meaning | ||||
29540 | b blank (white space) | ||||
29541 | { indent: opening structural curly brace or square bracket or paren | ||||
29542 | (code block, anonymous hash reference, or anonymous array reference) | ||||
29543 | } outdent: right structural curly brace or square bracket or paren | ||||
29544 | [ left non-structural square bracket (enclosing an array index) | ||||
29545 | ] right non-structural square bracket | ||||
29546 | ( left non-structural paren (all but a list right of an =) | ||||
29547 | ) right non-structural paren | ||||
29548 | L left non-structural curly brace (enclosing a key) | ||||
29549 | R right non-structural curly brace | ||||
29550 | ; terminal semicolon | ||||
29551 | f indicates a semicolon in a "for" statement | ||||
29552 | h here_doc operator << | ||||
29553 | # a comment | ||||
29554 | Q indicates a quote or pattern | ||||
29555 | q indicates a qw quote block | ||||
29556 | k a perl keyword | ||||
29557 | C user-defined constant or constant function (with void prototype = ()) | ||||
29558 | U user-defined function taking parameters | ||||
29559 | G user-defined function taking block parameter (like grep/map/eval) | ||||
29560 | M (unused, but reserved for subroutine definition name) | ||||
29561 | P (unused, but -html uses it to label pod text) | ||||
29562 | t type indicater such as %,$,@,*,&,sub | ||||
29563 | w bare word (perhaps a subroutine call) | ||||
29564 | i identifier of some type (with leading %, $, @, *, &, sub, -> ) | ||||
29565 | n a number | ||||
29566 | v a v-string | ||||
29567 | F a file test operator (like -e) | ||||
29568 | Y File handle | ||||
29569 | Z identifier in indirect object slot: may be file handle, object | ||||
29570 | J LABEL: code block label | ||||
29571 | j LABEL after next, last, redo, goto | ||||
29572 | p unary + | ||||
29573 | m unary - | ||||
29574 | pp pre-increment operator ++ | ||||
29575 | mm pre-decrement operator -- | ||||
29576 | A : used as attribute separator | ||||
29577 | |||||
29578 | Here are the '_line_type' codes used internally: | ||||
29579 | SYSTEM - system-specific code before hash-bang line | ||||
29580 | CODE - line of perl code (including comments) | ||||
29581 | POD_START - line starting pod, such as '=head' | ||||
29582 | POD - pod documentation text | ||||
29583 | POD_END - last line of pod section, '=cut' | ||||
29584 | HERE - text of here-document | ||||
29585 | HERE_END - last line of here-doc (target word) | ||||
29586 | FORMAT - format section | ||||
29587 | FORMAT_END - last line of format section, '.' | ||||
29588 | DATA_START - __DATA__ line | ||||
29589 | DATA - unidentified text following __DATA__ | ||||
29590 | END_START - __END__ line | ||||
29591 | END - unidentified text following __END__ | ||||
29592 | ERROR - we are in big trouble, probably not a perl script | ||||
29593 | END_OF_LIST | ||||
29594 | } | ||||
29595 | |||||
29596 | # spent 484µs within Perl::Tidy::Tokenizer::BEGIN@29596 which was called:
# once (484µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 30087 | ||||
29597 | |||||
29598 | # These names are used in error messages | ||||
29599 | 1 | 2µs | @opening_brace_names = qw# '{' '[' '(' '?' #; | ||
29600 | 1 | 4µs | @closing_brace_names = qw# '}' ']' ')' ':' #; | ||
29601 | |||||
29602 | 1 | 4µs | my @digraphs = qw( | ||
29603 | .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> | ||||
29604 | <= >= == =~ !~ != ++ -- /= x= ~~ | ||||
29605 | ); | ||||
29606 | 1 | 13µs | @is_digraph{@digraphs} = (1) x scalar(@digraphs); | ||
29607 | |||||
29608 | 1 | 1µs | my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); | ||
29609 | 1 | 4µs | @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); | ||
29610 | |||||
29611 | # make a hash of all valid token types for self-checking the tokenizer | ||||
29612 | # (adding NEW_TOKENS : select a new character and add to this list) | ||||
29613 | 1 | 6µs | my @valid_token_types = qw# | ||
29614 | A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v | ||||
29615 | { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & | ||||
29616 | #; | ||||
29617 | 1 | 9µs | push( @valid_token_types, @digraphs ); | ||
29618 | 1 | 1µs | push( @valid_token_types, @trigraphs ); | ||
29619 | 1 | 600ns | push( @valid_token_types, ( '#', ',', 'CORE::' ) ); | ||
29620 | 1 | 28µs | @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); | ||
29621 | |||||
29622 | # a list of file test letters, as in -e (Table 3-4 of 'camel 3') | ||||
29623 | 1 | 4µs | my @file_test_operators = | ||
29624 | qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); | ||||
29625 | 1 | 10µs | @is_file_test_operator{@file_test_operators} = | ||
29626 | (1) x scalar(@file_test_operators); | ||||
29627 | |||||
29628 | # these functions have prototypes of the form (&), so when they are | ||||
29629 | # followed by a block, that block MAY BE followed by an operator. | ||||
29630 | # Smartmatch operator ~~ may be followed by anonymous hash or array ref | ||||
29631 | 1 | 800ns | @_ = qw( do eval ); | ||
29632 | 1 | 1µs | @is_block_operator{@_} = (1) x scalar(@_); | ||
29633 | |||||
29634 | # these functions allow an identifier in the indirect object slot | ||||
29635 | 1 | 2µs | @_ = qw( print printf sort exec system say); | ||
29636 | 1 | 3µs | @is_indirect_object_taker{@_} = (1) x scalar(@_); | ||
29637 | |||||
29638 | # These tokens may precede a code block | ||||
29639 | # patched for SWITCH/CASE | ||||
29640 | 1 | 5µs | @_ = | ||
29641 | qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else | ||||
29642 | unless do while until eval for foreach map grep sort | ||||
29643 | switch case given when); | ||||
29644 | 1 | 9µs | @is_code_block_token{@_} = (1) x scalar(@_); | ||
29645 | |||||
29646 | # I'll build the list of keywords incrementally | ||||
29647 | 1 | 200ns | my @Keywords = (); | ||
29648 | |||||
29649 | # keywords and tokens after which a value or pattern is expected, | ||||
29650 | # but not an operator. In other words, these should consume terms | ||||
29651 | # to their right, or at least they are not expected to be followed | ||||
29652 | # immediately by operators. | ||||
29653 | 1 | 31µs | my @value_requestor = qw( | ||
29654 | AUTOLOAD | ||||
29655 | BEGIN | ||||
29656 | CHECK | ||||
29657 | DESTROY | ||||
29658 | END | ||||
29659 | EQ | ||||
29660 | GE | ||||
29661 | GT | ||||
29662 | INIT | ||||
29663 | LE | ||||
29664 | LT | ||||
29665 | NE | ||||
29666 | UNITCHECK | ||||
29667 | abs | ||||
29668 | accept | ||||
29669 | alarm | ||||
29670 | and | ||||
29671 | atan2 | ||||
29672 | bind | ||||
29673 | binmode | ||||
29674 | bless | ||||
29675 | break | ||||
29676 | caller | ||||
29677 | chdir | ||||
29678 | chmod | ||||
29679 | chomp | ||||
29680 | chop | ||||
29681 | chown | ||||
29682 | chr | ||||
29683 | chroot | ||||
29684 | close | ||||
29685 | closedir | ||||
29686 | cmp | ||||
29687 | connect | ||||
29688 | continue | ||||
29689 | cos | ||||
29690 | crypt | ||||
29691 | dbmclose | ||||
29692 | dbmopen | ||||
29693 | defined | ||||
29694 | delete | ||||
29695 | die | ||||
29696 | dump | ||||
29697 | each | ||||
29698 | else | ||||
29699 | elsif | ||||
29700 | eof | ||||
29701 | eq | ||||
29702 | exec | ||||
29703 | exists | ||||
29704 | exit | ||||
29705 | exp | ||||
29706 | fcntl | ||||
29707 | fileno | ||||
29708 | flock | ||||
29709 | for | ||||
29710 | foreach | ||||
29711 | formline | ||||
29712 | ge | ||||
29713 | getc | ||||
29714 | getgrgid | ||||
29715 | getgrnam | ||||
29716 | gethostbyaddr | ||||
29717 | gethostbyname | ||||
29718 | getnetbyaddr | ||||
29719 | getnetbyname | ||||
29720 | getpeername | ||||
29721 | getpgrp | ||||
29722 | getpriority | ||||
29723 | getprotobyname | ||||
29724 | getprotobynumber | ||||
29725 | getpwnam | ||||
29726 | getpwuid | ||||
29727 | getservbyname | ||||
29728 | getservbyport | ||||
29729 | getsockname | ||||
29730 | getsockopt | ||||
29731 | glob | ||||
29732 | gmtime | ||||
29733 | goto | ||||
29734 | grep | ||||
29735 | gt | ||||
29736 | hex | ||||
29737 | if | ||||
29738 | index | ||||
29739 | int | ||||
29740 | ioctl | ||||
29741 | join | ||||
29742 | keys | ||||
29743 | kill | ||||
29744 | last | ||||
29745 | lc | ||||
29746 | lcfirst | ||||
29747 | le | ||||
29748 | length | ||||
29749 | link | ||||
29750 | listen | ||||
29751 | local | ||||
29752 | localtime | ||||
29753 | lock | ||||
29754 | log | ||||
29755 | lstat | ||||
29756 | lt | ||||
29757 | map | ||||
29758 | mkdir | ||||
29759 | msgctl | ||||
29760 | msgget | ||||
29761 | msgrcv | ||||
29762 | msgsnd | ||||
29763 | my | ||||
29764 | ne | ||||
29765 | next | ||||
29766 | no | ||||
29767 | not | ||||
29768 | oct | ||||
29769 | open | ||||
29770 | opendir | ||||
29771 | or | ||||
29772 | ord | ||||
29773 | our | ||||
29774 | pack | ||||
29775 | pipe | ||||
29776 | pop | ||||
29777 | pos | ||||
29778 | |||||
29779 | printf | ||||
29780 | prototype | ||||
29781 | push | ||||
29782 | quotemeta | ||||
29783 | rand | ||||
29784 | read | ||||
29785 | readdir | ||||
29786 | readlink | ||||
29787 | readline | ||||
29788 | readpipe | ||||
29789 | recv | ||||
29790 | redo | ||||
29791 | ref | ||||
29792 | rename | ||||
29793 | require | ||||
29794 | reset | ||||
29795 | return | ||||
29796 | reverse | ||||
29797 | rewinddir | ||||
29798 | rindex | ||||
29799 | rmdir | ||||
29800 | scalar | ||||
29801 | seek | ||||
29802 | seekdir | ||||
29803 | select | ||||
29804 | semctl | ||||
29805 | semget | ||||
29806 | semop | ||||
29807 | send | ||||
29808 | sethostent | ||||
29809 | setnetent | ||||
29810 | setpgrp | ||||
29811 | setpriority | ||||
29812 | setprotoent | ||||
29813 | setservent | ||||
29814 | setsockopt | ||||
29815 | shift | ||||
29816 | shmctl | ||||
29817 | shmget | ||||
29818 | shmread | ||||
29819 | shmwrite | ||||
29820 | shutdown | ||||
29821 | sin | ||||
29822 | sleep | ||||
29823 | socket | ||||
29824 | socketpair | ||||
29825 | sort | ||||
29826 | splice | ||||
29827 | split | ||||
29828 | sprintf | ||||
29829 | sqrt | ||||
29830 | srand | ||||
29831 | stat | ||||
29832 | study | ||||
29833 | substr | ||||
29834 | symlink | ||||
29835 | syscall | ||||
29836 | sysopen | ||||
29837 | sysread | ||||
29838 | sysseek | ||||
29839 | system | ||||
29840 | syswrite | ||||
29841 | tell | ||||
29842 | telldir | ||||
29843 | tie | ||||
29844 | tied | ||||
29845 | truncate | ||||
29846 | uc | ||||
29847 | ucfirst | ||||
29848 | umask | ||||
29849 | undef | ||||
29850 | unless | ||||
29851 | unlink | ||||
29852 | unpack | ||||
29853 | unshift | ||||
29854 | untie | ||||
29855 | until | ||||
29856 | use | ||||
29857 | utime | ||||
29858 | values | ||||
29859 | vec | ||||
29860 | waitpid | ||||
29861 | warn | ||||
29862 | while | ||||
29863 | write | ||||
29864 | xor | ||||
29865 | |||||
29866 | switch | ||||
29867 | case | ||||
29868 | given | ||||
29869 | when | ||||
29870 | err | ||||
29871 | say | ||||
29872 | ); | ||||
29873 | |||||
29874 | # patched above for SWITCH/CASE given/when err say | ||||
29875 | # 'err' is a fairly safe addition. | ||||
29876 | # TODO: 'default' still needed if appropriate | ||||
29877 | # 'use feature' seen, but perltidy works ok without it. | ||||
29878 | # Concerned that 'default' could break code. | ||||
29879 | 1 | 40µs | push( @Keywords, @value_requestor ); | ||
29880 | |||||
29881 | # These are treated the same but are not keywords: | ||||
29882 | 1 | 600ns | my @extra_vr = qw( | ||
29883 | constant | ||||
29884 | vars | ||||
29885 | ); | ||||
29886 | 1 | 500ns | push( @value_requestor, @extra_vr ); | ||
29887 | |||||
29888 | 1 | 75µs | @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); | ||
29889 | |||||
29890 | # this list contains keywords which do not look for arguments, | ||||
29891 | # so that they might be followed by an operator, or at least | ||||
29892 | # not a term. | ||||
29893 | 1 | 3µs | my @operator_requestor = qw( | ||
29894 | endgrent | ||||
29895 | endhostent | ||||
29896 | endnetent | ||||
29897 | endprotoent | ||||
29898 | endpwent | ||||
29899 | endservent | ||||
29900 | fork | ||||
29901 | getgrent | ||||
29902 | gethostent | ||||
29903 | getlogin | ||||
29904 | getnetent | ||||
29905 | getppid | ||||
29906 | getprotoent | ||||
29907 | getpwent | ||||
29908 | getservent | ||||
29909 | setgrent | ||||
29910 | setpwent | ||||
29911 | time | ||||
29912 | times | ||||
29913 | wait | ||||
29914 | wantarray | ||||
29915 | ); | ||||
29916 | |||||
29917 | 1 | 3µs | push( @Keywords, @operator_requestor ); | ||
29918 | |||||
29919 | # These are treated the same but are not considered keywords: | ||||
29920 | 1 | 500ns | my @extra_or = qw( | ||
29921 | STDERR | ||||
29922 | STDIN | ||||
29923 | STDOUT | ||||
29924 | ); | ||||
29925 | |||||
29926 | 1 | 800ns | push( @operator_requestor, @extra_or ); | ||
29927 | |||||
29928 | 1 | 9µs | @expecting_operator_token{@operator_requestor} = | ||
29929 | (1) x scalar(@operator_requestor); | ||||
29930 | |||||
29931 | # these token TYPES expect trailing operator but not a term | ||||
29932 | # note: ++ and -- are post-increment and decrement, 'C' = constant | ||||
29933 | 1 | 1µs | my @operator_requestor_types = qw( ++ -- C <> q ); | ||
29934 | 1 | 1µs | @expecting_operator_types{@operator_requestor_types} = | ||
29935 | (1) x scalar(@operator_requestor_types); | ||||
29936 | |||||
29937 | # these token TYPES consume values (terms) | ||||
29938 | # note: pp and mm are pre-increment and decrement | ||||
29939 | # f=semicolon in for, F=file test operator | ||||
29940 | 1 | 8µs | my @value_requestor_type = qw# | ||
29941 | L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x | ||||
29942 | **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= | ||||
29943 | <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ | ||||
29944 | f F pp mm Y p m U J G j >> << ^ t | ||||
29945 | #; | ||||
29946 | 1 | 2µs | push( @value_requestor_type, ',' ) | ||
29947 | ; # (perl doesn't like a ',' in a qw block) | ||||
29948 | 1 | 17µs | @expecting_term_types{@value_requestor_type} = | ||
29949 | (1) x scalar(@value_requestor_type); | ||||
29950 | |||||
29951 | # Note: the following valid token types are not assigned here to | ||||
29952 | # hashes requesting to be followed by values or terms, but are | ||||
29953 | # instead currently hard-coded into sub operator_expected: | ||||
29954 | # ) -> :: Q R Z ] b h i k n v w } # | ||||
29955 | |||||
29956 | # For simple syntax checking, it is nice to have a list of operators which | ||||
29957 | # will really be unhappy if not followed by a term. This includes most | ||||
29958 | # of the above... | ||||
29959 | 1 | 14µs | %really_want_term = %expecting_term_types; | ||
29960 | |||||
29961 | # with these exceptions... | ||||
29962 | 1 | 600ns | delete $really_want_term{'U'}; # user sub, depends on prototype | ||
29963 | 1 | 200ns | delete $really_want_term{'F'}; # file test works on $_ if no following term | ||
29964 | 1 | 100ns | delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; | ||
29965 | # let perl do it | ||||
29966 | |||||
29967 | 1 | 4µs | @_ = qw(q qq qw qx qr s y tr m); | ||
29968 | 1 | 3µs | @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_); | ||
29969 | |||||
29970 | # These keywords are handled specially in the tokenizer code: | ||||
29971 | 1 | 2µs | my @special_keywords = qw( | ||
29972 | do | ||||
29973 | eval | ||||
29974 | format | ||||
29975 | m | ||||
29976 | package | ||||
29977 | q | ||||
29978 | |||||
29979 | qr | ||||
29980 | qw | ||||
29981 | qx | ||||
29982 | s | ||||
29983 | sub | ||||
29984 | tr | ||||
29985 | y | ||||
29986 | ); | ||||
29987 | 1 | 2µs | push( @Keywords, @special_keywords ); | ||
29988 | |||||
29989 | # Keywords after which list formatting may be used | ||||
29990 | # WARNING: do not include |map|grep|eval or perl may die on | ||||
29991 | # syntax errors (map1.t). | ||||
29992 | 1 | 9µs | my @keyword_taking_list = qw( | ||
29993 | and | ||||
29994 | chmod | ||||
29995 | chomp | ||||
29996 | chop | ||||
29997 | chown | ||||
29998 | dbmopen | ||||
29999 | die | ||||
30000 | elsif | ||||
30001 | exec | ||||
30002 | fcntl | ||||
30003 | for | ||||
30004 | foreach | ||||
30005 | formline | ||||
30006 | getsockopt | ||||
30007 | if | ||||
30008 | index | ||||
30009 | ioctl | ||||
30010 | join | ||||
30011 | kill | ||||
30012 | local | ||||
30013 | msgctl | ||||
30014 | msgrcv | ||||
30015 | msgsnd | ||||
30016 | my | ||||
30017 | open | ||||
30018 | or | ||||
30019 | our | ||||
30020 | pack | ||||
30021 | |||||
30022 | printf | ||||
30023 | push | ||||
30024 | read | ||||
30025 | readpipe | ||||
30026 | recv | ||||
30027 | return | ||||
30028 | reverse | ||||
30029 | rindex | ||||
30030 | seek | ||||
30031 | select | ||||
30032 | semctl | ||||
30033 | semget | ||||
30034 | send | ||||
30035 | setpriority | ||||
30036 | setsockopt | ||||
30037 | shmctl | ||||
30038 | shmget | ||||
30039 | shmread | ||||
30040 | shmwrite | ||||
30041 | socket | ||||
30042 | socketpair | ||||
30043 | sort | ||||
30044 | splice | ||||
30045 | split | ||||
30046 | sprintf | ||||
30047 | substr | ||||
30048 | syscall | ||||
30049 | sysopen | ||||
30050 | sysread | ||||
30051 | sysseek | ||||
30052 | system | ||||
30053 | syswrite | ||||
30054 | tie | ||||
30055 | unless | ||||
30056 | unlink | ||||
30057 | unpack | ||||
30058 | unshift | ||||
30059 | until | ||||
30060 | vec | ||||
30061 | warn | ||||
30062 | while | ||||
30063 | given | ||||
30064 | when | ||||
30065 | ); | ||||
30066 | 1 | 20µs | @is_keyword_taking_list{@keyword_taking_list} = | ||
30067 | (1) x scalar(@keyword_taking_list); | ||||
30068 | |||||
30069 | # These are not used in any way yet | ||||
30070 | # my @unused_keywords = qw( | ||||
30071 | # __FILE__ | ||||
30072 | # __LINE__ | ||||
30073 | # __PACKAGE__ | ||||
30074 | # ); | ||||
30075 | |||||
30076 | # The list of keywords was originally extracted from function 'keyword' in | ||||
30077 | # perl file toke.c version 5.005.03, using this utility, plus a | ||||
30078 | # little editing: (file getkwd.pl): | ||||
30079 | # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } | ||||
30080 | # Add 'get' prefix where necessary, then split into the above lists. | ||||
30081 | # This list should be updated as necessary. | ||||
30082 | # The list should not contain these special variables: | ||||
30083 | # ARGV DATA ENV SIG STDERR STDIN STDOUT | ||||
30084 | # __DATA__ __END__ | ||||
30085 | |||||
30086 | 1 | 131µs | @is_keyword{@Keywords} = (1) x scalar(@Keywords); | ||
30087 | 1 | 416µs | 1 | 484µs | } # spent 484µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@29596 |
30088 | 1 | 42µs | 1; | ||
30089 | __END__ | ||||
# spent 6µs within Perl::Tidy::CORE:subst which was called:
# once (6µs+0s) by Perl::Tidy::BEGIN@81 at line 82 | |||||
# spent 1µs within Perl::Tidy::CORE:substcont which was called 2 times, avg 650ns/call:
# 2 times (1µs+0s) by Perl::Tidy::BEGIN@81 at line 82, avg 650ns/call |