| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/Perl/Tidy.pm |
| Statements | Executed 772 statements in 93.6ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 1.84ms | 1.96ms | Perl::Tidy::BEGIN@78 |
| 1 | 1 | 1 | 628µs | 1.00ms | Perl::Tidy::BEGIN@76 |
| 1 | 1 | 1 | 484µs | 484µs | Perl::Tidy::Tokenizer::BEGIN@29596 |
| 1 | 1 | 1 | 93µs | 93µs | Perl::Tidy::Formatter::BEGIN@6198 |
| 1 | 1 | 1 | 90µs | 90µs | Perl::Tidy::Formatter::BEGIN@8401 |
| 1 | 1 | 1 | 86µs | 86µs | Perl::Tidy::Formatter::BEGIN@13836 |
| 1 | 1 | 1 | 50µs | 50µs | Perl::Tidy::HtmlWriter::BEGIN@4762 |
| 1 | 1 | 1 | 30µs | 30µs | Perl::Tidy::Formatter::BEGIN@13541 |
| 1 | 1 | 1 | 28µs | 28µs | Perl::Tidy::Formatter::BEGIN@8312 |
| 1 | 1 | 1 | 22µs | 22µs | Perl::Tidy::BEGIN@215 |
| 1 | 1 | 1 | 17µs | 48µs | Perl::Tidy::Formatter::BEGIN@5950 |
| 1 | 1 | 1 | 16µs | 23µs | Perl::Tidy::BEGIN@81 |
| 1 | 1 | 1 | 16µs | 86µs | Perl::Tidy::Tokenizer::BEGIN@27475 |
| 1 | 1 | 1 | 15µs | 15µs | Perl::Tidy::Formatter::BEGIN@17043 |
| 1 | 1 | 1 | 15µs | 44µs | Perl::Tidy::Tokenizer::BEGIN@22672 |
| 1 | 1 | 1 | 14µs | 14µs | Perl::Tidy::BEGIN@56 |
| 1 | 1 | 1 | 14µs | 14µs | Perl::Tidy::Formatter::BEGIN@14914 |
| 1 | 1 | 1 | 14µs | 14µs | Perl::Tidy::VerticalAligner::BEGIN@20752 |
| 1 | 1 | 1 | 12µs | 12µs | Perl::Tidy::Formatter::BEGIN@15938 |
| 1 | 1 | 1 | 12µs | 69µs | Perl::Tidy::FileWriter::BEGIN@22177 |
| 1 | 1 | 1 | 12µs | 12µs | Perl::Tidy::Formatter::BEGIN@11610 |
| 1 | 1 | 1 | 11µs | 11µs | Perl::Tidy::Formatter::BEGIN@8152 |
| 1 | 1 | 1 | 11µs | 53µs | Perl::Tidy::Tokenizer::BEGIN@22598 |
| 1 | 1 | 1 | 11µs | 44µs | Perl::Tidy::Tokenizer::BEGIN@22666 |
| 1 | 1 | 1 | 11µs | 46µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19322 |
| 1 | 1 | 1 | 11µs | 110µs | Perl::Tidy::BEGIN@2125 |
| 1 | 1 | 1 | 11µs | 42µs | Perl::Tidy::BEGIN@75 |
| 1 | 1 | 1 | 11µs | 38µs | Perl::Tidy::BEGIN@77 |
| 1 | 1 | 1 | 11µs | 58µs | Perl::Tidy::Tokenizer::BEGIN@22580 |
| 1 | 1 | 1 | 11µs | 29µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19319 |
| 1 | 1 | 1 | 11µs | 72µs | Perl::Tidy::IndentationItem::BEGIN@19077 |
| 1 | 1 | 1 | 10µs | 45µs | Perl::Tidy::HtmlWriter::BEGIN@4522 |
| 1 | 1 | 1 | 10µs | 22µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19496 |
| 1 | 1 | 1 | 10µs | 61µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19320 |
| 1 | 1 | 1 | 10µs | 48µs | Perl::Tidy::IOScalar::BEGIN@3659 |
| 1 | 1 | 1 | 10µs | 63µs | Perl::Tidy::Logger::BEGIN@4303 |
| 1 | 1 | 1 | 10µs | 22µs | Perl::Tidy::BEGIN@59 |
| 1 | 1 | 1 | 10µs | 10µs | Perl::Tidy::Formatter::BEGIN@12316 |
| 1 | 1 | 1 | 10µs | 10µs | Perl::Tidy::Formatter::BEGIN@12798 |
| 1 | 1 | 1 | 10µs | 40µs | Perl::Tidy::Formatter::BEGIN@5952 |
| 1 | 1 | 1 | 10µs | 51µs | Perl::Tidy::Formatter::BEGIN@5944 |
| 1 | 1 | 1 | 10µs | 2.07ms | Perl::Tidy::Formatter::BEGIN@5980 |
| 1 | 1 | 1 | 9µs | 72µs | Perl::Tidy::BEGIN@79 |
| 1 | 1 | 1 | 9µs | 39µs | Perl::Tidy::IOScalarArray::BEGIN@3743 |
| 1 | 1 | 1 | 9µs | 38µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19334 |
| 1 | 1 | 1 | 9µs | 4.38ms | Perl::Tidy::Tokenizer::BEGIN@22601 |
| 1 | 1 | 1 | 9µs | 110µs | Perl::Tidy::HtmlWriter::BEGIN@4525 |
| 1 | 1 | 1 | 9µs | 50µs | Perl::Tidy::Tokenizer::BEGIN@22581 |
| 1 | 1 | 1 | 9µs | 41µs | Perl::Tidy::Formatter::BEGIN@5979 |
| 1 | 1 | 1 | 9µs | 38µs | Perl::Tidy::Formatter::BEGIN@5956 |
| 1 | 1 | 1 | 9µs | 45µs | Perl::Tidy::Tokenizer::BEGIN@23592 |
| 1 | 1 | 1 | 8µs | 8µs | Perl::Tidy::Formatter::BEGIN@10806 |
| 1 | 1 | 1 | 8µs | 42µs | Perl::Tidy::Formatter::BEGIN@5946 |
| 1 | 1 | 1 | 8µs | 38µs | Perl::Tidy::BEGIN@61 |
| 1 | 1 | 1 | 8µs | 44µs | Perl::Tidy::Tokenizer::BEGIN@22582 |
| 1 | 1 | 1 | 8µs | 201µs | Perl::Tidy::Tokenizer::BEGIN@22644 |
| 1 | 1 | 1 | 8µs | 43µs | Perl::Tidy::HtmlWriter::BEGIN@4955 |
| 1 | 1 | 1 | 8µs | 8µs | Perl::Tidy::Tokenizer::BEGIN@22575 |
| 1 | 1 | 1 | 8µs | 580µs | Perl::Tidy::VerticalAligner::BEGIN@19616 |
| 1 | 1 | 1 | 8µs | 72µs | Perl::Tidy::BEGIN@64 |
| 1 | 1 | 1 | 8µs | 43µs | Perl::Tidy::Formatter::BEGIN@5945 |
| 1 | 1 | 1 | 8µs | 45µs | Perl::Tidy::Logger::BEGIN@4339 |
| 1 | 1 | 1 | 8µs | 41µs | Perl::Tidy::VerticalAligner::BEGIN@19600 |
| 1 | 1 | 1 | 8µs | 24µs | Perl::Tidy::BEGIN@60 |
| 1 | 1 | 1 | 8µs | 40µs | Perl::Tidy::Formatter::BEGIN@5948 |
| 1 | 1 | 1 | 8µs | 45µs | Perl::Tidy::Formatter::BEGIN@6282 |
| 1 | 1 | 1 | 8µs | 53µs | Perl::Tidy::Tokenizer::BEGIN@22583 |
| 1 | 1 | 1 | 8µs | 45µs | Perl::Tidy::Formatter::BEGIN@5949 |
| 1 | 1 | 1 | 8µs | 43µs | Perl::Tidy::Tokenizer::BEGIN@22584 |
| 1 | 1 | 1 | 8µs | 19µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19380 |
| 1 | 1 | 1 | 8µs | 8µs | Perl::Tidy::Formatter::BEGIN@5940 |
| 1 | 1 | 1 | 8µs | 42µs | Perl::Tidy::HtmlWriter::BEGIN@4956 |
| 1 | 1 | 1 | 8µs | 37µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19506 |
| 1 | 1 | 1 | 7µs | 45µs | Perl::Tidy::Formatter::BEGIN@5947 |
| 1 | 1 | 1 | 7µs | 42µs | Perl::Tidy::Formatter::BEGIN@6298 |
| 1 | 1 | 1 | 7µs | 41µs | Perl::Tidy::Formatter::BEGIN@6302 |
| 1 | 1 | 1 | 7µs | 43µs | Perl::Tidy::Tokenizer::BEGIN@23593 |
| 1 | 1 | 1 | 7µs | 38µs | Perl::Tidy::Formatter::BEGIN@5951 |
| 1 | 1 | 1 | 7µs | 40µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19501 |
| 1 | 1 | 1 | 7µs | 36µs | Perl::Tidy::Formatter::BEGIN@6292 |
| 1 | 1 | 1 | 7µs | 40µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19503 |
| 1 | 1 | 1 | 7µs | 37µs | Perl::Tidy::Formatter::BEGIN@6288 |
| 1 | 1 | 1 | 7µs | 36µs | Perl::Tidy::Tokenizer::BEGIN@22671 |
| 1 | 1 | 1 | 7µs | 16µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19542 |
| 1 | 1 | 1 | 7µs | 35µs | Perl::Tidy::HtmlWriter::BEGIN@4961 |
| 1 | 1 | 1 | 7µs | 40µs | Perl::Tidy::Tokenizer::BEGIN@22665 |
| 1 | 1 | 1 | 7µs | 36µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19505 |
| 1 | 1 | 1 | 7µs | 39µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19324 |
| 1 | 1 | 1 | 7µs | 49µs | Perl::Tidy::Formatter::BEGIN@6295 |
| 1 | 1 | 1 | 7µs | 37µs | Perl::Tidy::IndentationItem::BEGIN@19079 |
| 1 | 1 | 1 | 7µs | 40µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19323 |
| 1 | 1 | 1 | 7µs | 38µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19326 |
| 1 | 1 | 1 | 7µs | 7µs | Perl::Tidy::Formatter::BEGIN@11861 |
| 1 | 1 | 1 | 7µs | 38µs | Perl::Tidy::Formatter::BEGIN@6283 |
| 1 | 1 | 1 | 7µs | 36µs | Perl::Tidy::Formatter::BEGIN@6289 |
| 1 | 1 | 1 | 7µs | 39µs | Perl::Tidy::IndentationItem::BEGIN@19078 |
| 1 | 1 | 1 | 7µs | 36µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19507 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::Formatter::BEGIN@5954 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::Formatter::BEGIN@6291 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::HtmlWriter::BEGIN@4957 |
| 1 | 1 | 1 | 6µs | 38µs | Perl::Tidy::HtmlWriter::BEGIN@4963 |
| 1 | 1 | 1 | 6µs | 40µs | Perl::Tidy::IndentationItem::BEGIN@19080 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::IndentationItem::BEGIN@19083 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::IndentationItem::BEGIN@19090 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19502 |
| 1 | 1 | 1 | 6µs | 39µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19325 |
| 1 | 1 | 1 | 6µs | 46µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19327 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19330 |
| 1 | 1 | 1 | 6µs | 37µs | Perl::Tidy::Formatter::BEGIN@5957 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::Formatter::BEGIN@6284 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::Formatter::BEGIN@6287 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::HtmlWriter::BEGIN@4958 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::HtmlWriter::BEGIN@4959 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::IndentationItem::BEGIN@19084 |
| 1 | 1 | 1 | 6µs | 37µs | Perl::Tidy::Formatter::BEGIN@5953 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::Formatter::BEGIN@5955 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::IndentationItem::BEGIN@19086 |
| 1 | 1 | 1 | 6µs | 48µs | Perl::Tidy::IndentationItem::BEGIN@19087 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::Tokenizer::BEGIN@22667 |
| 1 | 1 | 1 | 6µs | 40µs | Perl::Tidy::Tokenizer::BEGIN@22670 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::Tokenizer::BEGIN@23594 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::VerticalAligner::Alignment::BEGIN@19504 |
| 1 | 1 | 1 | 6µs | 6µs | Perl::Tidy::VerticalAligner::BEGIN@19595 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19329 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::Formatter::BEGIN@6290 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::IndentationItem::BEGIN@19095 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::Tokenizer::BEGIN@23595 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::VerticalAligner::BEGIN@19601 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::HtmlWriter::BEGIN@4962 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::IndentationItem::BEGIN@19093 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19331 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::IndentationItem::BEGIN@19085 |
| 1 | 1 | 1 | 6µs | 36µs | Perl::Tidy::VerticalAligner::BEGIN@19602 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19332 |
| 1 | 1 | 1 | 6µs | 39µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19333 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::HtmlWriter::BEGIN@4960 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::IndentationItem::BEGIN@19082 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::IndentationItem::BEGIN@19092 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::Tokenizer::BEGIN@22675 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::IndentationItem::BEGIN@19094 |
| 1 | 1 | 1 | 6µs | 34µs | Perl::Tidy::VerticalAligner::BEGIN@19603 |
| 1 | 1 | 1 | 6µs | 35µs | Perl::Tidy::VerticalAligner::Line::BEGIN@19328 |
| 1 | 1 | 1 | 6µs | 6µs | Perl::Tidy::CORE:subst (opcode) |
| 1 | 1 | 1 | 3µs | 3µs | Perl::Tidy::BEGIN@57 |
| 2 | 1 | 1 | 1µs | 1µs | Perl::Tidy::CORE:substcont (opcode) |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Debugger::close_debug_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Debugger::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Debugger::really_open_debug_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Debugger::write_debug_entry |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::DevNull::close |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::DevNull::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::DevNull::print |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Diagnostics::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Diagnostics::set_input_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Diagnostics::write_diagnostics |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Die |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Exit |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::decrement_output_line_number |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::get_consecutive_nonblank_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::get_output_line_number |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::report_line_length_errors |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::require_blank_code_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::reset_consecutive_blank_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::tee_off |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::tee_on |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::want_blank_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::write_blank_code_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::write_code_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::write_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::FileWriter::write_logfile_entry |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::DESTROY |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::__ANON__[:5961] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::__ANON__[:7683] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::__ANON__[:7694] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::_decrement_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::_increment_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::accumulate_block_text |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::accumulate_csc_text |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::add_closing_side_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::balance_csc_text |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::bias_table_key |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::black_box |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::break_all_chain_tokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::break_equals |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::check_for_long_gnu_style_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::check_for_new_minimum_depth |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::check_options |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::clear_breakpoint_undo_stack |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::comma_arrow_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::compactify_table |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::compare_indentation_levels |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::complain |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::copy_old_breakpoints |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::correct_lp_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::create_one_line_block |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::destroy_one_line_block |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::do_uncontained_comma_breaks |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::dump_want_left_space |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::dump_want_right_space |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::excess_line_length |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::extract_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::find_token_starting_list |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::finish_formatting |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::finish_lp_batch |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::flush |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_AVAILABLE_SPACES_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_RECOVERABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_added_semicolon_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_maximum_fields_wanted |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_opening_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_saw_brace_error |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::get_seqno |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::in_same_container |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::insert_additional_breaks |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::insert_final_breaks |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::insert_new_token_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::is_essential_whitespace |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::is_unbalanced_batch |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::is_unbreakable_container |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::leading_spaces_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::lookup_opening_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_alignment_patterns |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_bli_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_block_brace_vertical_tightness_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_block_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_closing_side_comment_list_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_closing_side_comment_prefix |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_else_csc_text |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_format_skipping_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_static_block_comment_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::make_static_side_comment_pattern |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::match_opening_and_closing_tokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::max |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::maximum_line_length |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::maximum_line_length_for_level |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::maximum_number_of_fields |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::min |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::new_lp_indentation_item |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::note_added_semicolon |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::note_deleted_semicolon |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::note_embedded_tab |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::output_line_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::pad_array_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::pad_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::prepare_for_new_input_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::previous_nonblank_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::print_line_of_tokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::recombine_breakpoints |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::reduce_lp_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::report_definite_bug |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::reset_block_text_accumulator |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::restore_current_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::rtoken_length |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::save_current_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::save_opening_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::scan_list |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::secret_operator_whitespace |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::send_lines_to_vertical_aligner |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_adjusted_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_block_text_accumulator |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_bond_strengths |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_closing_breakpoint |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_comma_breakpoints |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_comma_breakpoints_do |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_continuation_breaks |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_fake_breakpoint |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_for_semicolon_breakpoints |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_forced_breakpoint |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_leading_whitespace |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_logical_breakpoints |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_logical_padding |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_nobreaks |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_non_alignment_flags |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_ragged_breakpoints |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_vertical_alignment_markers |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_vertical_tightness_flags |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::set_white_space_flag |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::split_words |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::starting_one_line_block |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::store_token_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::study_list_complexity |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::table_columns_available |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::terminal_type |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::token_length |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::token_sequence_length |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::total_line_length |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::trim |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::undo_ci |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::undo_forced_breakpoint_stack |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::undo_lp_ci |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::unstore_token_to_go |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::want_blank_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::warning |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::we_are_at_the_last_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::write_diagnostics |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::write_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::write_logfile_entry |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Formatter::write_unindented_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::__ANON__[:4680] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::__ANON__[:4690] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::__ANON__[:5185] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::__ANON__[:5210] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::add_toc_item |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::change_anchor_names |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::check_RGB |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::check_options |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::close_html_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::escape_html |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::finish_formatting |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::make_abbreviated_names |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::make_frame |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::make_getopt_long_names |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::markup_html_element |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::markup_tokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::pod_to_html |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::set_default_color |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::set_default_properties |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::write_frame_html |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::write_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::write_style_sheet_data |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::write_style_sheet_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::HtmlWriter::write_toc_html |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalar::close |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalar::getline |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalar::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalar::print |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalarArray::close |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalarArray::getline |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalarArray::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IOScalarArray::print |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::decrease_AVAILABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::decrease_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_ALIGN_PAREN |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_ARROW_COUNT |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_AVAILABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_CI_LEVEL |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_CLOSED |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_COMMA_COUNT |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_HAVE_CHILD |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_INDEX |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_LEVEL |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_MARKED |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_RECOVERABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_SEQUENCE_NUMBER |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_STACK_DEPTH |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::get_STARTING_INDEX |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::increase_RECOVERABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::permanently_decrease_AVAILABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::set_ARROW_COUNT |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::set_CLOSED |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::set_COMMA_COUNT |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::set_HAVE_CHILD |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::set_MARKED |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::set_RECOVERABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::IndentationItem::tentatively_decrease_AVAILABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineBuffer::get_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineBuffer::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineBuffer::peek_ahead |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::close_output_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::close_tee_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::really_open_tee_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::tee_off |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::tee_on |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSink::write_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSource::close_input_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSource::get_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::LineSource::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::ask_user_for_bug_report |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::black_box |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::block_log_output |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::brace_warning |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::complain |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::finish |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::get_saw_brace_error |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::get_use_prefix |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::get_warning_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::increment_brace_error |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::interrupt_logfile |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::logfile_output |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::make_line_information_string |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::report_definite_bug |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::report_possible_bug |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::resume_logfile |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::unblock_log_output |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::warning |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::we_are_at_the_last_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::write_column_headings |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Logger::write_logfile_entry |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::DESTROY |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:22588] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24021] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24025] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24050] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24164] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24184] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24196] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24221] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24228] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24235] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24242] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24278] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24400] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24429] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24448] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24465] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24491] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24515] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24527] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24561] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24575] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24581] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24591] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24604] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24622] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24658] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24689] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24693] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24734] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24740] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24750] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24760] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24771] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24776] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24781] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::__ANON__[:24786] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::_decrement_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::_increment_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::brace_warning |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::check_final_nesting_depths |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::check_prototype |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::code_block_type |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::complain |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::decide_if_code_block |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::decrease_nesting_depth |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::do_quote |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::do_scan_package |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::do_scan_sub |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::dump_functions |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::dump_token_types |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::error_if_expecting_OPERATOR |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::error_if_expecting_TERM |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::find_angle_operator_termination |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::find_here_doc |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::find_next_nonblank_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::find_next_nonblank_token_on_this_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::find_starting_indentation_level |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::follow_quoted_string |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::get_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::get_indentation_level |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::get_input_line_number |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::get_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::get_saw_brace_error |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::guess_if_here_doc |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::guess_if_pattern_or_conditional |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::guess_if_pattern_or_division |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::guess_old_indentation_level |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::increase_nesting_depth |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::increment_brace_error |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::indicate_error |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::initialize_tokenizer_state |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::interrupt_logfile |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::inverse_pretoken_map |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::is_non_structural_brace |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::label_ok |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::make_numbered_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::matching_end_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::new_statement_ok |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::numerator_expected |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::ones_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::operator_expected |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::pattern_expected |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::peek_ahead_for_n_nonblank_pre_tokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::peek_ahead_for_nonblank_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::peeked_ahead |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::pre_tokenize |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::prepare_for_a_new_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::report_definite_bug |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::report_tokenization_errors |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::report_v_string |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::reset_indentation_level |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::restore_tokenizer_state |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::resume_logfile |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::save_tokenizer_state |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_bare_identifier |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_bare_identifier_do |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_id |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_id_do |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_identifier |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_identifier_do |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_number |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_number_do |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::scan_replacement_text |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::show_tokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::tokenize_this_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::unexpected |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::warning |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::write_diagnostics |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::write_error_indicator_pair |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::write_logfile_entry |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Tokenizer::write_on_underline |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::DESTROY |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::_decrement_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::_increment_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_ending_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_matching_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_serial_number |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_starting_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::get_starting_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::increment_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::restore_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::save_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::set_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::set_ending_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::set_matching_token |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::set_starting_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Alignment::set_starting_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::DESTROY |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::_decrement_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::_increment_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::current_field_width |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::field_width_growth |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_alignment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_alignments |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_available_space_on_right |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_is_hanging_side_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_jmax |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_jmax_original_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_leading_space_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_list_type |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_outdent_long_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_rfields |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_rpatterns |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_rtokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_rvertical_tightness_flags |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::get_starting_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::increase_field_width |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::increment_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::new |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_alignment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_alignments |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_column |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_is_hanging_side_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_jmax |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_jmax_original_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_leading_space_count |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_list_type |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_outdent_long_lines |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_rfields |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_rpatterns |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::set_rtokens |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::Line::starting_field_width |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::__ANON__[:19607] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::add_to_group |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::adjust_side_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::check_fit |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::check_match |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::combine_fields |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::decide_if_aligned |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::decide_if_list |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::dump_alignments |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::dump_array |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::dump_valign_buffer |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::eliminate_new_fields |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::eliminate_old_fields |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::fix_terminal_else |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::fix_terminal_ternary |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::flush |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::forget_side_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::get_RECOVERABLE_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::get_SPACES |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::get_STACK_DEPTH |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::get_extra_leading_spaces |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::get_leading_string |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::get_output_line_number |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::improve_continuation_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::initialize |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::initialize_for_new_group |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::join_hanging_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::level_change |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::make_alignment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::make_side_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::maximum_line_length_for_level |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::my_flush |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::reduce_valign_buffer_indentation |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::report_anything_unusual |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::report_definite_bug |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::restore_alignment_columns |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::save_alignment_columns |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::valign_input |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::valign_output_step_A |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::valign_output_step_B |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::valign_output_step_C |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::valign_output_step_D |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::warning |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::write_diagnostics |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::VerticalAligner::write_logfile_entry |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Warn |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Win_Config_Locs |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::Win_OS_Type |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:111] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:114] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:125] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:128] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:142] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:145] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:1507] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:160] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:163] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:2378] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:2857] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::__ANON__[:331] |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::_process_command_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::catfile |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::check_options |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::check_syntax |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::check_vms_filename |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::do_syntax_check |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::dump_config_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::dump_defaults |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::dump_long_names |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::dump_short_names |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::expand_command_abbreviations |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::fileglob_to_re |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::find_config_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::find_file_upwards |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::find_input_line_ending |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::generate_options |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::get_stream_as_named_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::is_unix |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::look_for_Windows |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::make_extension |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::parse_args |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::perltidy |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::process_command_line |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::process_this_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::read_config_file |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::readable_options |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::show_version |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::streamhandle |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::strip_comment |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::usage |
| 0 | 0 | 0 | 0s | 0s | Perl::Tidy::write_logfile_header |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | ############################################################ | ||||
| 3 | # | ||||
| 4 | # perltidy - a perl script indenter and formatter | ||||
| 5 | # | ||||
| 6 | # Copyright (c) 2000-2014 by Steve Hancock | ||||
| 7 | # Distributed under the GPL license agreement; see file COPYING | ||||
| 8 | # | ||||
| 9 | # This program is free software; you can redistribute it and/or modify | ||||
| 10 | # it under the terms of the GNU General Public License as published by | ||||
| 11 | # the Free Software Foundation; either version 2 of the License, or | ||||
| 12 | # (at your option) any later version. | ||||
| 13 | # | ||||
| 14 | # This program is distributed in the hope that it will be useful, | ||||
| 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| 17 | # GNU General Public License for more details. | ||||
| 18 | # | ||||
| 19 | # You should have received a copy of the GNU General Public License along | ||||
| 20 | # with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 21 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| 22 | # | ||||
| 23 | # For brief instructions, try 'perltidy -h'. | ||||
| 24 | # For more complete documentation, try 'man perltidy' | ||||
| 25 | # or visit http://perltidy.sourceforge.net | ||||
| 26 | # | ||||
| 27 | # This script is an example of the default style. It was formatted with: | ||||
| 28 | # | ||||
| 29 | # perltidy Tidy.pm | ||||
| 30 | # | ||||
| 31 | # Code Contributions: See ChangeLog.html for a complete history. | ||||
| 32 | # Michael Cartmell supplied code for adaptation to VMS and helped with | ||||
| 33 | # v-strings. | ||||
| 34 | # Hugh S. Myers supplied sub streamhandle and the supporting code to | ||||
| 35 | # create a Perl::Tidy module which can operate on strings, arrays, etc. | ||||
| 36 | # Yves Orton supplied coding to help detect Windows versions. | ||||
| 37 | # Axel Rose supplied a patch for MacPerl. | ||||
| 38 | # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. | ||||
| 39 | # Dan Tyrell contributed a patch for binary I/O. | ||||
| 40 | # Ueli Hugenschmidt contributed a patch for -fpsc | ||||
| 41 | # Sam Kington supplied a patch to identify the initial indentation of | ||||
| 42 | # entabbed code. | ||||
| 43 | # jonathan swartz supplied patches for: | ||||
| 44 | # * .../ pattern, which looks upwards from directory | ||||
| 45 | # * --notidy, to be used in directories where we want to avoid | ||||
| 46 | # accidentally tidying | ||||
| 47 | # * prefilter and postfilter | ||||
| 48 | # * iterations option | ||||
| 49 | # | ||||
| 50 | # Many others have supplied key ideas, suggestions, and bug reports; | ||||
| 51 | # see the CHANGES file. | ||||
| 52 | # | ||||
| 53 | ############################################################ | ||||
| 54 | |||||
| 55 | package Perl::Tidy; | ||||
| 56 | 2 | 40µs | 1 | 14µs | # spent 14µs within Perl::Tidy::BEGIN@56 which was called:
# once (14µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 56 # spent 14µs making 1 call to Perl::Tidy::BEGIN@56 |
| 57 | 1 | 18µs | 1 | 3µs | # spent 3µs within Perl::Tidy::BEGIN@57 which was called:
# once (3µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 57 # spent 3µs making 1 call to Perl::Tidy::BEGIN@57 |
| 58 | |||||
| 59 | 2 | 23µs | 2 | 33µs | # spent 22µs (10+11) within Perl::Tidy::BEGIN@59 which was called:
# once (10µs+11µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 59 # spent 22µs making 1 call to Perl::Tidy::BEGIN@59
# spent 12µs making 1 call to strict::import |
| 60 | 2 | 19µs | 2 | 40µs | # spent 24µs (8+16) within Perl::Tidy::BEGIN@60 which was called:
# once (8µs+16µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 60 # spent 24µs making 1 call to Perl::Tidy::BEGIN@60
# spent 16µs making 1 call to Exporter::import |
| 61 | 2 | 28µs | 2 | 67µs | # spent 38µs (8+30) within Perl::Tidy::BEGIN@61 which was called:
# once (8µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 61 # spent 38µs making 1 call to Perl::Tidy::BEGIN@61
# spent 30µs making 1 call to Exporter::import |
| 62 | 1 | 2µs | $|++; | ||
| 63 | |||||
| 64 | 1 | 600ns | # spent 72µs (8+64) within Perl::Tidy::BEGIN@64 which was called:
# once (8µs+64µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 70 | ||
| 65 | $VERSION | ||||
| 66 | @ISA | ||||
| 67 | @EXPORT | ||||
| 68 | $missing_file_spec | ||||
| 69 | $fh_stderr | ||||
| 70 | 1 | 32µs | 2 | 136µs | }; # spent 72µs making 1 call to Perl::Tidy::BEGIN@64
# spent 64µs making 1 call to vars::import |
| 71 | |||||
| 72 | 1 | 11µs | @ISA = qw( Exporter ); | ||
| 73 | 1 | 800ns | @EXPORT = qw( &perltidy ); | ||
| 74 | |||||
| 75 | 2 | 34µs | 2 | 73µs | # spent 42µs (11+31) within Perl::Tidy::BEGIN@75 which was called:
# once (11µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 75 # spent 42µs making 1 call to Perl::Tidy::BEGIN@75
# spent 31µs making 1 call to Exporter::import |
| 76 | 2 | 133µs | 2 | 1.10ms | # spent 1.00ms (628µs+377µs) within Perl::Tidy::BEGIN@76 which was called:
# once (628µs+377µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 76 # spent 1.00ms making 1 call to Perl::Tidy::BEGIN@76
# spent 100µs making 1 call to Exporter::import |
| 77 | 2 | 21µs | 2 | 66µs | # spent 38µs (11+28) within Perl::Tidy::BEGIN@77 which was called:
# once (11µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 77 # spent 38µs making 1 call to Perl::Tidy::BEGIN@77
# spent 28µs making 1 call to Exporter::import |
| 78 | 2 | 109µs | 2 | 1.98ms | # spent 1.96ms (1.84+119µs) within Perl::Tidy::BEGIN@78 which was called:
# once (1.84ms+119µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 78 # spent 1.96ms making 1 call to Perl::Tidy::BEGIN@78
# spent 27µs making 1 call to Exporter::import |
| 79 | 2 | 53µs | 2 | 135µs | # spent 72µs (9+63) within Perl::Tidy::BEGIN@79 which was called:
# once (9µs+63µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 79 # spent 72µs making 1 call to Perl::Tidy::BEGIN@79
# spent 63µs making 1 call to Exporter::import |
| 80 | |||||
| 81 | # spent 23µs (16+7) within Perl::Tidy::BEGIN@81 which was called:
# once (16µs+7µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 83 | ||||
| 82 | 1 | 22µs | 3 | 7µs | ( $VERSION = q($Id: Tidy.pm,v 1.74 2014/03/28 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker # spent 6µs making 1 call to Perl::Tidy::CORE:subst
# spent 1µs making 2 calls to Perl::Tidy::CORE:substcont, avg 650ns/call |
| 83 | 1 | 454µs | 1 | 23µs | } # spent 23µs making 1 call to Perl::Tidy::BEGIN@81 |
| 84 | |||||
| 85 | sub streamhandle { | ||||
| 86 | |||||
| 87 | # given filename and mode (r or w), create an object which: | ||||
| 88 | # has a 'getline' method if mode='r', and | ||||
| 89 | # has a 'print' method if mode='w'. | ||||
| 90 | # The objects also need a 'close' method. | ||||
| 91 | # | ||||
| 92 | # How the object is made: | ||||
| 93 | # | ||||
| 94 | # if $filename is: Make object using: | ||||
| 95 | # ---------------- ----------------- | ||||
| 96 | # '-' (STDIN if mode = 'r', STDOUT if mode='w') | ||||
| 97 | # string IO::File | ||||
| 98 | # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) | ||||
| 99 | # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) | ||||
| 100 | # object object | ||||
| 101 | # (check for 'print' method for 'w' mode) | ||||
| 102 | # (check for 'getline' method for 'r' mode) | ||||
| 103 | my $ref = ref( my $filename = shift ); | ||||
| 104 | my $mode = shift; | ||||
| 105 | my $New; | ||||
| 106 | my $fh; | ||||
| 107 | |||||
| 108 | # handle a reference | ||||
| 109 | if ($ref) { | ||||
| 110 | if ( $ref eq 'ARRAY' ) { | ||||
| 111 | $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; | ||||
| 112 | } | ||||
| 113 | elsif ( $ref eq 'SCALAR' ) { | ||||
| 114 | $New = sub { Perl::Tidy::IOScalar->new(@_) }; | ||||
| 115 | } | ||||
| 116 | else { | ||||
| 117 | |||||
| 118 | # Accept an object with a getline method for reading. Note: | ||||
| 119 | # IO::File is built-in and does not respond to the defined | ||||
| 120 | # operator. If this causes trouble, the check can be | ||||
| 121 | # skipped and we can just let it crash if there is no | ||||
| 122 | # getline. | ||||
| 123 | if ( $mode =~ /[rR]/ ) { | ||||
| 124 | if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { | ||||
| 125 | $New = sub { $filename }; | ||||
| 126 | } | ||||
| 127 | else { | ||||
| 128 | $New = sub { undef }; | ||||
| 129 | confess <<EOM; | ||||
| 130 | ------------------------------------------------------------------------ | ||||
| 131 | No 'getline' method is defined for object of class $ref | ||||
| 132 | Please check your call to Perl::Tidy::perltidy. Trace follows. | ||||
| 133 | ------------------------------------------------------------------------ | ||||
| 134 | EOM | ||||
| 135 | } | ||||
| 136 | } | ||||
| 137 | |||||
| 138 | # Accept an object with a print method for writing. | ||||
| 139 | # See note above about IO::File | ||||
| 140 | if ( $mode =~ /[wW]/ ) { | ||||
| 141 | if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { | ||||
| 142 | $New = sub { $filename }; | ||||
| 143 | } | ||||
| 144 | else { | ||||
| 145 | $New = sub { undef }; | ||||
| 146 | confess <<EOM; | ||||
| 147 | ------------------------------------------------------------------------ | ||||
| 148 | No 'print' method is defined for object of class $ref | ||||
| 149 | Please check your call to Perl::Tidy::perltidy. Trace follows. | ||||
| 150 | ------------------------------------------------------------------------ | ||||
| 151 | EOM | ||||
| 152 | } | ||||
| 153 | } | ||||
| 154 | } | ||||
| 155 | } | ||||
| 156 | |||||
| 157 | # handle a string | ||||
| 158 | else { | ||||
| 159 | if ( $filename eq '-' ) { | ||||
| 160 | $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } | ||||
| 161 | } | ||||
| 162 | else { | ||||
| 163 | $New = sub { IO::File->new(@_) }; | ||||
| 164 | } | ||||
| 165 | } | ||||
| 166 | $fh = $New->( $filename, $mode ) | ||||
| 167 | or Warn("Couldn't open file:$filename in mode:$mode : $!\n"); | ||||
| 168 | return $fh, ( $ref or $filename ); | ||||
| 169 | } | ||||
| 170 | |||||
| 171 | sub find_input_line_ending { | ||||
| 172 | |||||
| 173 | # Peek at a file and return first line ending character. | ||||
| 174 | # Quietly return undef in case of any trouble. | ||||
| 175 | my ($input_file) = @_; | ||||
| 176 | my $ending; | ||||
| 177 | |||||
| 178 | # silently ignore input from object or stdin | ||||
| 179 | if ( ref($input_file) || $input_file eq '-' ) { | ||||
| 180 | return $ending; | ||||
| 181 | } | ||||
| 182 | open( INFILE, $input_file ) || return $ending; | ||||
| 183 | |||||
| 184 | binmode INFILE; | ||||
| 185 | my $buf; | ||||
| 186 | read( INFILE, $buf, 1024 ); | ||||
| 187 | close INFILE; | ||||
| 188 | if ( $buf && $buf =~ /([\012\015]+)/ ) { | ||||
| 189 | my $test = $1; | ||||
| 190 | |||||
| 191 | # dos | ||||
| 192 | if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } | ||||
| 193 | |||||
| 194 | # mac | ||||
| 195 | elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } | ||||
| 196 | |||||
| 197 | # unix | ||||
| 198 | elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } | ||||
| 199 | |||||
| 200 | # unknown | ||||
| 201 | else { } | ||||
| 202 | } | ||||
| 203 | |||||
| 204 | # no ending seen | ||||
| 205 | else { } | ||||
| 206 | |||||
| 207 | return $ending; | ||||
| 208 | } | ||||
| 209 | |||||
| 210 | sub catfile { | ||||
| 211 | |||||
| 212 | # concatenate a path and file basename | ||||
| 213 | # returns undef in case of error | ||||
| 214 | |||||
| 215 | 2 | 5.10ms | 1 | 22µs | # spent 22µs within Perl::Tidy::BEGIN@215 which was called:
# once (22µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 215 # spent 22µs making 1 call to Perl::Tidy::BEGIN@215 # spent 2µs executing statements in string eval |
| 216 | |||||
| 217 | # use File::Spec if we can | ||||
| 218 | unless ($missing_file_spec) { | ||||
| 219 | return File::Spec->catfile(@_); | ||||
| 220 | } | ||||
| 221 | |||||
| 222 | # Perl 5.004 systems may not have File::Spec so we'll make | ||||
| 223 | # a simple try. We assume File::Basename is available. | ||||
| 224 | # return undef if not successful. | ||||
| 225 | my $name = pop @_; | ||||
| 226 | my $path = join '/', @_; | ||||
| 227 | my $test_file = $path . $name; | ||||
| 228 | my ( $test_name, $test_path ) = fileparse($test_file); | ||||
| 229 | return $test_file if ( $test_name eq $name ); | ||||
| 230 | return undef if ( $^O eq 'VMS' ); | ||||
| 231 | |||||
| 232 | # this should work at least for Windows and Unix: | ||||
| 233 | $test_file = $path . '/' . $name; | ||||
| 234 | ( $test_name, $test_path ) = fileparse($test_file); | ||||
| 235 | return $test_file if ( $test_name eq $name ); | ||||
| 236 | return undef; | ||||
| 237 | } | ||||
| 238 | |||||
| 239 | # Here is a map of the flow of data from the input source to the output | ||||
| 240 | # line sink: | ||||
| 241 | # | ||||
| 242 | # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> | ||||
| 243 | # input groups output | ||||
| 244 | # lines tokens lines of lines lines | ||||
| 245 | # lines | ||||
| 246 | # | ||||
| 247 | # The names correspond to the package names responsible for the unit processes. | ||||
| 248 | # | ||||
| 249 | # The overall process is controlled by the "main" package. | ||||
| 250 | # | ||||
| 251 | # LineSource is the stream of input lines | ||||
| 252 | # | ||||
| 253 | # Tokenizer analyzes a line and breaks it into tokens, peeking ahead | ||||
| 254 | # if necessary. A token is any section of the input line which should be | ||||
| 255 | # manipulated as a single entity during formatting. For example, a single | ||||
| 256 | # ',' character is a token, and so is an entire side comment. It handles | ||||
| 257 | # the complexities of Perl syntax, such as distinguishing between '<<' as | ||||
| 258 | # a shift operator and as a here-document, or distinguishing between '/' | ||||
| 259 | # as a divide symbol and as a pattern delimiter. | ||||
| 260 | # | ||||
| 261 | # Formatter inserts and deletes whitespace between tokens, and breaks | ||||
| 262 | # sequences of tokens at appropriate points as output lines. It bases its | ||||
| 263 | # decisions on the default rules as modified by any command-line options. | ||||
| 264 | # | ||||
| 265 | # VerticalAligner collects groups of lines together and tries to line up | ||||
| 266 | # certain tokens, such as '=>', '#', and '=' by adding whitespace. | ||||
| 267 | # | ||||
| 268 | # FileWriter simply writes lines to the output stream. | ||||
| 269 | # | ||||
| 270 | # The Logger package, not shown, records significant events and warning | ||||
| 271 | # messages. It writes a .LOG file, which may be saved with a | ||||
| 272 | # '-log' or a '-g' flag. | ||||
| 273 | |||||
| 274 | sub perltidy { | ||||
| 275 | |||||
| 276 | my %defaults = ( | ||||
| 277 | argv => undef, | ||||
| 278 | destination => undef, | ||||
| 279 | formatter => undef, | ||||
| 280 | logfile => undef, | ||||
| 281 | errorfile => undef, | ||||
| 282 | perltidyrc => undef, | ||||
| 283 | source => undef, | ||||
| 284 | stderr => undef, | ||||
| 285 | dump_options => undef, | ||||
| 286 | dump_options_type => undef, | ||||
| 287 | dump_getopt_flags => undef, | ||||
| 288 | dump_options_category => undef, | ||||
| 289 | dump_options_range => undef, | ||||
| 290 | dump_abbreviations => undef, | ||||
| 291 | prefilter => undef, | ||||
| 292 | postfilter => undef, | ||||
| 293 | ); | ||||
| 294 | |||||
| 295 | # don't overwrite callers ARGV | ||||
| 296 | local @ARGV = @ARGV; | ||||
| 297 | local *STDERR = *STDERR; | ||||
| 298 | |||||
| 299 | my %input_hash = @_; | ||||
| 300 | |||||
| 301 | if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { | ||||
| 302 | local $" = ')('; | ||||
| 303 | my @good_keys = sort keys %defaults; | ||||
| 304 | @bad_keys = sort @bad_keys; | ||||
| 305 | confess <<EOM; | ||||
| 306 | ------------------------------------------------------------------------ | ||||
| 307 | Unknown perltidy parameter : (@bad_keys) | ||||
| 308 | perltidy only understands : (@good_keys) | ||||
| 309 | ------------------------------------------------------------------------ | ||||
| 310 | |||||
| 311 | EOM | ||||
| 312 | } | ||||
| 313 | |||||
| 314 | my $get_hash_ref = sub { | ||||
| 315 | my ($key) = @_; | ||||
| 316 | my $hash_ref = $input_hash{$key}; | ||||
| 317 | if ( defined($hash_ref) ) { | ||||
| 318 | unless ( ref($hash_ref) eq 'HASH' ) { | ||||
| 319 | my $what = ref($hash_ref); | ||||
| 320 | my $but_is = | ||||
| 321 | $what ? "but is ref to $what" : "but is not a reference"; | ||||
| 322 | croak <<EOM; | ||||
| 323 | ------------------------------------------------------------------------ | ||||
| 324 | error in call to perltidy: | ||||
| 325 | -$key must be reference to HASH $but_is | ||||
| 326 | ------------------------------------------------------------------------ | ||||
| 327 | EOM | ||||
| 328 | } | ||||
| 329 | } | ||||
| 330 | return $hash_ref; | ||||
| 331 | }; | ||||
| 332 | |||||
| 333 | %input_hash = ( %defaults, %input_hash ); | ||||
| 334 | my $argv = $input_hash{'argv'}; | ||||
| 335 | my $destination_stream = $input_hash{'destination'}; | ||||
| 336 | my $errorfile_stream = $input_hash{'errorfile'}; | ||||
| 337 | my $logfile_stream = $input_hash{'logfile'}; | ||||
| 338 | my $perltidyrc_stream = $input_hash{'perltidyrc'}; | ||||
| 339 | my $source_stream = $input_hash{'source'}; | ||||
| 340 | my $stderr_stream = $input_hash{'stderr'}; | ||||
| 341 | my $user_formatter = $input_hash{'formatter'}; | ||||
| 342 | my $prefilter = $input_hash{'prefilter'}; | ||||
| 343 | my $postfilter = $input_hash{'postfilter'}; | ||||
| 344 | |||||
| 345 | if ($stderr_stream) { | ||||
| 346 | ( $fh_stderr, my $stderr_file ) = | ||||
| 347 | Perl::Tidy::streamhandle( $stderr_stream, 'w' ); | ||||
| 348 | if ( !$fh_stderr ) { | ||||
| 349 | croak <<EOM; | ||||
| 350 | ------------------------------------------------------------------------ | ||||
| 351 | Unable to redirect STDERR to $stderr_stream | ||||
| 352 | Please check value of -stderr in call to perltidy | ||||
| 353 | ------------------------------------------------------------------------ | ||||
| 354 | EOM | ||||
| 355 | } | ||||
| 356 | } | ||||
| 357 | else { | ||||
| 358 | $fh_stderr = *STDERR; | ||||
| 359 | } | ||||
| 360 | |||||
| 361 | sub Warn ($) { $fh_stderr->print( $_[0] ); } | ||||
| 362 | |||||
| 363 | sub Exit ($) { | ||||
| 364 | if ( $_[0] ) { goto ERROR_EXIT } | ||||
| 365 | else { goto NORMAL_EXIT } | ||||
| 366 | } | ||||
| 367 | |||||
| 368 | sub Die ($) { Warn $_[0]; Exit(1); } | ||||
| 369 | |||||
| 370 | # extract various dump parameters | ||||
| 371 | my $dump_options_type = $input_hash{'dump_options_type'}; | ||||
| 372 | my $dump_options = $get_hash_ref->('dump_options'); | ||||
| 373 | my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); | ||||
| 374 | my $dump_options_category = $get_hash_ref->('dump_options_category'); | ||||
| 375 | my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); | ||||
| 376 | my $dump_options_range = $get_hash_ref->('dump_options_range'); | ||||
| 377 | |||||
| 378 | # validate dump_options_type | ||||
| 379 | if ( defined($dump_options) ) { | ||||
| 380 | unless ( defined($dump_options_type) ) { | ||||
| 381 | $dump_options_type = 'perltidyrc'; | ||||
| 382 | } | ||||
| 383 | unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { | ||||
| 384 | croak <<EOM; | ||||
| 385 | ------------------------------------------------------------------------ | ||||
| 386 | Please check value of -dump_options_type in call to perltidy; | ||||
| 387 | saw: '$dump_options_type' | ||||
| 388 | expecting: 'perltidyrc' or 'full' | ||||
| 389 | ------------------------------------------------------------------------ | ||||
| 390 | EOM | ||||
| 391 | |||||
| 392 | } | ||||
| 393 | } | ||||
| 394 | else { | ||||
| 395 | $dump_options_type = ""; | ||||
| 396 | } | ||||
| 397 | |||||
| 398 | if ($user_formatter) { | ||||
| 399 | |||||
| 400 | # if the user defines a formatter, there is no output stream, | ||||
| 401 | # but we need a null stream to keep coding simple | ||||
| 402 | $destination_stream = Perl::Tidy::DevNull->new(); | ||||
| 403 | } | ||||
| 404 | |||||
| 405 | # see if ARGV is overridden | ||||
| 406 | if ( defined($argv) ) { | ||||
| 407 | |||||
| 408 | my $rargv = ref $argv; | ||||
| 409 | if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } | ||||
| 410 | |||||
| 411 | # ref to ARRAY | ||||
| 412 | if ($rargv) { | ||||
| 413 | if ( $rargv eq 'ARRAY' ) { | ||||
| 414 | @ARGV = @$argv; | ||||
| 415 | } | ||||
| 416 | else { | ||||
| 417 | croak <<EOM; | ||||
| 418 | ------------------------------------------------------------------------ | ||||
| 419 | Please check value of -argv in call to perltidy; | ||||
| 420 | it must be a string or ref to ARRAY but is: $rargv | ||||
| 421 | ------------------------------------------------------------------------ | ||||
| 422 | EOM | ||||
| 423 | } | ||||
| 424 | } | ||||
| 425 | |||||
| 426 | # string | ||||
| 427 | else { | ||||
| 428 | my ( $rargv, $msg ) = parse_args($argv); | ||||
| 429 | if ($msg) { | ||||
| 430 | Die <<EOM; | ||||
| 431 | Error parsing this string passed to to perltidy with 'argv': | ||||
| 432 | $msg | ||||
| 433 | EOM | ||||
| 434 | } | ||||
| 435 | @ARGV = @{$rargv}; | ||||
| 436 | } | ||||
| 437 | } | ||||
| 438 | |||||
| 439 | my $rpending_complaint; | ||||
| 440 | $$rpending_complaint = ""; | ||||
| 441 | my $rpending_logfile_message; | ||||
| 442 | $$rpending_logfile_message = ""; | ||||
| 443 | |||||
| 444 | my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint); | ||||
| 445 | |||||
| 446 | # VMS file names are restricted to a 40.40 format, so we append _tdy | ||||
| 447 | # instead of .tdy, etc. (but see also sub check_vms_filename) | ||||
| 448 | my $dot; | ||||
| 449 | my $dot_pattern; | ||||
| 450 | if ( $^O eq 'VMS' ) { | ||||
| 451 | $dot = '_'; | ||||
| 452 | $dot_pattern = '_'; | ||||
| 453 | } | ||||
| 454 | else { | ||||
| 455 | $dot = '.'; | ||||
| 456 | $dot_pattern = '\.'; # must escape for use in regex | ||||
| 457 | } | ||||
| 458 | |||||
| 459 | #--------------------------------------------------------------- | ||||
| 460 | # get command line options | ||||
| 461 | #--------------------------------------------------------------- | ||||
| 462 | my ( | ||||
| 463 | $rOpts, $config_file, $rraw_options, | ||||
| 464 | $saw_extrude, $saw_pbp, $roption_string, | ||||
| 465 | $rexpansion, $roption_category, $roption_range | ||||
| 466 | ) | ||||
| 467 | = process_command_line( | ||||
| 468 | $perltidyrc_stream, $is_Windows, $Windows_type, | ||||
| 469 | $rpending_complaint, $dump_options_type, | ||||
| 470 | ); | ||||
| 471 | |||||
| 472 | #--------------------------------------------------------------- | ||||
| 473 | # Handle requests to dump information | ||||
| 474 | #--------------------------------------------------------------- | ||||
| 475 | |||||
| 476 | # return or exit immediately after all dumps | ||||
| 477 | my $quit_now = 0; | ||||
| 478 | |||||
| 479 | # Getopt parameters and their flags | ||||
| 480 | if ( defined($dump_getopt_flags) ) { | ||||
| 481 | $quit_now = 1; | ||||
| 482 | foreach my $op ( @{$roption_string} ) { | ||||
| 483 | my $opt = $op; | ||||
| 484 | my $flag = ""; | ||||
| 485 | |||||
| 486 | # Examples: | ||||
| 487 | # some-option=s | ||||
| 488 | # some-option=i | ||||
| 489 | # some-option:i | ||||
| 490 | # some-option! | ||||
| 491 | if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) { | ||||
| 492 | $opt = $1; | ||||
| 493 | $flag = $2; | ||||
| 494 | } | ||||
| 495 | $dump_getopt_flags->{$opt} = $flag; | ||||
| 496 | } | ||||
| 497 | } | ||||
| 498 | |||||
| 499 | if ( defined($dump_options_category) ) { | ||||
| 500 | $quit_now = 1; | ||||
| 501 | %{$dump_options_category} = %{$roption_category}; | ||||
| 502 | } | ||||
| 503 | |||||
| 504 | if ( defined($dump_options_range) ) { | ||||
| 505 | $quit_now = 1; | ||||
| 506 | %{$dump_options_range} = %{$roption_range}; | ||||
| 507 | } | ||||
| 508 | |||||
| 509 | if ( defined($dump_abbreviations) ) { | ||||
| 510 | $quit_now = 1; | ||||
| 511 | %{$dump_abbreviations} = %{$rexpansion}; | ||||
| 512 | } | ||||
| 513 | |||||
| 514 | if ( defined($dump_options) ) { | ||||
| 515 | $quit_now = 1; | ||||
| 516 | %{$dump_options} = %{$rOpts}; | ||||
| 517 | } | ||||
| 518 | |||||
| 519 | Exit 0 if ($quit_now); | ||||
| 520 | |||||
| 521 | # make printable string of options for this run as possible diagnostic | ||||
| 522 | my $readable_options = readable_options( $rOpts, $roption_string ); | ||||
| 523 | |||||
| 524 | # dump from command line | ||||
| 525 | if ( $rOpts->{'dump-options'} ) { | ||||
| 526 | print STDOUT $readable_options; | ||||
| 527 | Exit 0; | ||||
| 528 | } | ||||
| 529 | |||||
| 530 | #--------------------------------------------------------------- | ||||
| 531 | # check parameters and their interactions | ||||
| 532 | #--------------------------------------------------------------- | ||||
| 533 | my $tabsize = | ||||
| 534 | check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); | ||||
| 535 | |||||
| 536 | if ($user_formatter) { | ||||
| 537 | $rOpts->{'format'} = 'user'; | ||||
| 538 | } | ||||
| 539 | |||||
| 540 | # there must be one entry here for every possible format | ||||
| 541 | my %default_file_extension = ( | ||||
| 542 | tidy => 'tdy', | ||||
| 543 | html => 'html', | ||||
| 544 | user => '', | ||||
| 545 | ); | ||||
| 546 | |||||
| 547 | # be sure we have a valid output format | ||||
| 548 | unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { | ||||
| 549 | my $formats = join ' ', | ||||
| 550 | sort map { "'" . $_ . "'" } keys %default_file_extension; | ||||
| 551 | my $fmt = $rOpts->{'format'}; | ||||
| 552 | Die "-format='$fmt' but must be one of: $formats\n"; | ||||
| 553 | } | ||||
| 554 | |||||
| 555 | my $output_extension = make_extension( $rOpts->{'output-file-extension'}, | ||||
| 556 | $default_file_extension{ $rOpts->{'format'} }, $dot ); | ||||
| 557 | |||||
| 558 | # If the backup extension contains a / character then the backup should | ||||
| 559 | # be deleted when the -b option is used. On older versions of | ||||
| 560 | # perltidy this will generate an error message due to an illegal | ||||
| 561 | # file name. | ||||
| 562 | # | ||||
| 563 | # A backup file will still be generated but will be deleted | ||||
| 564 | # at the end. If -bext='/' then this extension will be | ||||
| 565 | # the default 'bak'. Otherwise it will be whatever characters | ||||
| 566 | # remains after all '/' characters are removed. For example: | ||||
| 567 | # -bext extension slashes | ||||
| 568 | # '/' bak 1 | ||||
| 569 | # '/delete' delete 1 | ||||
| 570 | # 'delete/' delete 1 | ||||
| 571 | # '/dev/null' devnull 2 (Currently not allowed) | ||||
| 572 | my $bext = $rOpts->{'backup-file-extension'}; | ||||
| 573 | my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); | ||||
| 574 | |||||
| 575 | # At present only one forward slash is allowed. In the future multiple | ||||
| 576 | # slashes may be allowed to allow for other options | ||||
| 577 | if ( $delete_backup > 1 ) { | ||||
| 578 | Die "-bext=$bext contains more than one '/'\n"; | ||||
| 579 | } | ||||
| 580 | |||||
| 581 | my $backup_extension = | ||||
| 582 | make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); | ||||
| 583 | |||||
| 584 | my $html_toc_extension = | ||||
| 585 | make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); | ||||
| 586 | |||||
| 587 | my $html_src_extension = | ||||
| 588 | make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); | ||||
| 589 | |||||
| 590 | # check for -b option; | ||||
| 591 | # silently ignore unless beautify mode | ||||
| 592 | my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} | ||||
| 593 | && $rOpts->{'format'} eq 'tidy'; | ||||
| 594 | |||||
| 595 | # Turn off -b with warnings in case of conflicts with other options. | ||||
| 596 | # NOTE: Do this silently, without warnings, if there is a source or | ||||
| 597 | # destination stream, or standard output is used. This is because the -b | ||||
| 598 | # flag may have been in a .perltidyrc file and warnings break | ||||
| 599 | # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014. | ||||
| 600 | if ($in_place_modify) { | ||||
| 601 | if ( $rOpts->{'standard-output'} ) { | ||||
| 602 | ## my $msg = "Ignoring -b; you may not use -b and -st together"; | ||||
| 603 | ## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); | ||||
| 604 | ## Warn "$msg\n"; | ||||
| 605 | $in_place_modify = 0; | ||||
| 606 | } | ||||
| 607 | if ($destination_stream) { | ||||
| 608 | ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n"; | ||||
| 609 | $in_place_modify = 0; | ||||
| 610 | } | ||||
| 611 | if ( ref($source_stream) ) { | ||||
| 612 | ##Warn "Ignoring -b; you may not specify a source array and -b together\n"; | ||||
| 613 | $in_place_modify = 0; | ||||
| 614 | } | ||||
| 615 | if ( $rOpts->{'outfile'} ) { | ||||
| 616 | ##Warn "Ignoring -b; you may not use -b and -o together\n"; | ||||
| 617 | $in_place_modify = 0; | ||||
| 618 | } | ||||
| 619 | if ( defined( $rOpts->{'output-path'} ) ) { | ||||
| 620 | ##Warn "Ignoring -b; you may not use -b and -opath together\n"; | ||||
| 621 | $in_place_modify = 0; | ||||
| 622 | } | ||||
| 623 | } | ||||
| 624 | |||||
| 625 | Perl::Tidy::Formatter::check_options($rOpts); | ||||
| 626 | if ( $rOpts->{'format'} eq 'html' ) { | ||||
| 627 | Perl::Tidy::HtmlWriter->check_options($rOpts); | ||||
| 628 | } | ||||
| 629 | |||||
| 630 | # make the pattern of file extensions that we shouldn't touch | ||||
| 631 | my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; | ||||
| 632 | if ($output_extension) { | ||||
| 633 | my $ext = quotemeta($output_extension); | ||||
| 634 | $forbidden_file_extensions .= "|$ext"; | ||||
| 635 | } | ||||
| 636 | if ( $in_place_modify && $backup_extension ) { | ||||
| 637 | my $ext = quotemeta($backup_extension); | ||||
| 638 | $forbidden_file_extensions .= "|$ext"; | ||||
| 639 | } | ||||
| 640 | $forbidden_file_extensions .= ')$'; | ||||
| 641 | |||||
| 642 | # Create a diagnostics object if requested; | ||||
| 643 | # This is only useful for code development | ||||
| 644 | my $diagnostics_object = undef; | ||||
| 645 | if ( $rOpts->{'DIAGNOSTICS'} ) { | ||||
| 646 | $diagnostics_object = Perl::Tidy::Diagnostics->new(); | ||||
| 647 | } | ||||
| 648 | |||||
| 649 | # no filenames should be given if input is from an array | ||||
| 650 | if ($source_stream) { | ||||
| 651 | if ( @ARGV > 0 ) { | ||||
| 652 | Die | ||||
| 653 | "You may not specify any filenames when a source array is given\n"; | ||||
| 654 | } | ||||
| 655 | |||||
| 656 | # we'll stuff the source array into ARGV | ||||
| 657 | unshift( @ARGV, $source_stream ); | ||||
| 658 | |||||
| 659 | # No special treatment for source stream which is a filename. | ||||
| 660 | # This will enable checks for binary files and other bad stuff. | ||||
| 661 | $source_stream = undef unless ref($source_stream); | ||||
| 662 | } | ||||
| 663 | |||||
| 664 | # use stdin by default if no source array and no args | ||||
| 665 | else { | ||||
| 666 | unshift( @ARGV, '-' ) unless @ARGV; | ||||
| 667 | } | ||||
| 668 | |||||
| 669 | #--------------------------------------------------------------- | ||||
| 670 | # Ready to go... | ||||
| 671 | # main loop to process all files in argument list | ||||
| 672 | #--------------------------------------------------------------- | ||||
| 673 | my $number_of_files = @ARGV; | ||||
| 674 | my $formatter = undef; | ||||
| 675 | my $tokenizer = undef; | ||||
| 676 | while ( my $input_file = shift @ARGV ) { | ||||
| 677 | my $fileroot; | ||||
| 678 | my $input_file_permissions; | ||||
| 679 | |||||
| 680 | #--------------------------------------------------------------- | ||||
| 681 | # prepare this input stream | ||||
| 682 | #--------------------------------------------------------------- | ||||
| 683 | if ($source_stream) { | ||||
| 684 | $fileroot = "perltidy"; | ||||
| 685 | |||||
| 686 | # If the source is from an array or string, then .LOG output | ||||
| 687 | # is only possible if a logfile stream is specified. This prevents | ||||
| 688 | # unexpected perltidy.LOG files. | ||||
| 689 | if ( !defined($logfile_stream) ) { | ||||
| 690 | $logfile_stream = Perl::Tidy::DevNull->new(); | ||||
| 691 | } | ||||
| 692 | } | ||||
| 693 | elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN | ||||
| 694 | $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc | ||||
| 695 | $in_place_modify = 0; | ||||
| 696 | } | ||||
| 697 | else { | ||||
| 698 | $fileroot = $input_file; | ||||
| 699 | unless ( -e $input_file ) { | ||||
| 700 | |||||
| 701 | # file doesn't exist - check for a file glob | ||||
| 702 | if ( $input_file =~ /([\?\*\[\{])/ ) { | ||||
| 703 | |||||
| 704 | # Windows shell may not remove quotes, so do it | ||||
| 705 | my $input_file = $input_file; | ||||
| 706 | if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } | ||||
| 707 | if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } | ||||
| 708 | my $pattern = fileglob_to_re($input_file); | ||||
| 709 | ##eval "/$pattern/"; | ||||
| 710 | if ( !$@ && opendir( DIR, './' ) ) { | ||||
| 711 | my @files = | ||||
| 712 | grep { /$pattern/ && !-d $_ } readdir(DIR); | ||||
| 713 | closedir(DIR); | ||||
| 714 | if (@files) { | ||||
| 715 | unshift @ARGV, @files; | ||||
| 716 | next; | ||||
| 717 | } | ||||
| 718 | } | ||||
| 719 | } | ||||
| 720 | Warn "skipping file: '$input_file': no matches found\n"; | ||||
| 721 | next; | ||||
| 722 | } | ||||
| 723 | |||||
| 724 | unless ( -f $input_file ) { | ||||
| 725 | Warn "skipping file: $input_file: not a regular file\n"; | ||||
| 726 | next; | ||||
| 727 | } | ||||
| 728 | |||||
| 729 | # As a safety precaution, skip zero length files. | ||||
| 730 | # If for example a source file got clobbered somehow, | ||||
| 731 | # the old .tdy or .bak files might still exist so we | ||||
| 732 | # shouldn't overwrite them with zero length files. | ||||
| 733 | unless ( -s $input_file ) { | ||||
| 734 | Warn "skipping file: $input_file: Zero size\n"; | ||||
| 735 | next; | ||||
| 736 | } | ||||
| 737 | |||||
| 738 | unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { | ||||
| 739 | Warn | ||||
| 740 | "skipping file: $input_file: Non-text (override with -f)\n"; | ||||
| 741 | next; | ||||
| 742 | } | ||||
| 743 | |||||
| 744 | # we should have a valid filename now | ||||
| 745 | $fileroot = $input_file; | ||||
| 746 | $input_file_permissions = ( stat $input_file )[2] & 07777; | ||||
| 747 | |||||
| 748 | if ( $^O eq 'VMS' ) { | ||||
| 749 | ( $fileroot, $dot ) = check_vms_filename($fileroot); | ||||
| 750 | } | ||||
| 751 | |||||
| 752 | # add option to change path here | ||||
| 753 | if ( defined( $rOpts->{'output-path'} ) ) { | ||||
| 754 | |||||
| 755 | my ( $base, $old_path ) = fileparse($fileroot); | ||||
| 756 | my $new_path = $rOpts->{'output-path'}; | ||||
| 757 | unless ( -d $new_path ) { | ||||
| 758 | unless ( mkdir $new_path, 0777 ) { | ||||
| 759 | Die "unable to create directory $new_path: $!\n"; | ||||
| 760 | } | ||||
| 761 | } | ||||
| 762 | my $path = $new_path; | ||||
| 763 | $fileroot = catfile( $path, $base ); | ||||
| 764 | unless ($fileroot) { | ||||
| 765 | Die <<EOM; | ||||
| 766 | ------------------------------------------------------------------------ | ||||
| 767 | Problem combining $new_path and $base to make a filename; check -opath | ||||
| 768 | ------------------------------------------------------------------------ | ||||
| 769 | EOM | ||||
| 770 | } | ||||
| 771 | } | ||||
| 772 | } | ||||
| 773 | |||||
| 774 | # Skip files with same extension as the output files because | ||||
| 775 | # this can lead to a messy situation with files like | ||||
| 776 | # script.tdy.tdy.tdy ... or worse problems ... when you | ||||
| 777 | # rerun perltidy over and over with wildcard input. | ||||
| 778 | if ( | ||||
| 779 | !$source_stream | ||||
| 780 | && ( $input_file =~ /$forbidden_file_extensions/o | ||||
| 781 | || $input_file eq 'DIAGNOSTICS' ) | ||||
| 782 | ) | ||||
| 783 | { | ||||
| 784 | Warn "skipping file: $input_file: wrong extension\n"; | ||||
| 785 | next; | ||||
| 786 | } | ||||
| 787 | |||||
| 788 | # the 'source_object' supplies a method to read the input file | ||||
| 789 | my $source_object = | ||||
| 790 | Perl::Tidy::LineSource->new( $input_file, $rOpts, | ||||
| 791 | $rpending_logfile_message ); | ||||
| 792 | next unless ($source_object); | ||||
| 793 | |||||
| 794 | # Prefilters and postfilters: The prefilter is a code reference | ||||
| 795 | # that will be applied to the source before tidying, and the | ||||
| 796 | # postfilter is a code reference to the result before outputting. | ||||
| 797 | if ($prefilter) { | ||||
| 798 | my $buf = ''; | ||||
| 799 | while ( my $line = $source_object->get_line() ) { | ||||
| 800 | $buf .= $line; | ||||
| 801 | } | ||||
| 802 | $buf = $prefilter->($buf); | ||||
| 803 | |||||
| 804 | $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, | ||||
| 805 | $rpending_logfile_message ); | ||||
| 806 | } | ||||
| 807 | |||||
| 808 | # register this file name with the Diagnostics package | ||||
| 809 | $diagnostics_object->set_input_file($input_file) | ||||
| 810 | if $diagnostics_object; | ||||
| 811 | |||||
| 812 | #--------------------------------------------------------------- | ||||
| 813 | # prepare the output stream | ||||
| 814 | #--------------------------------------------------------------- | ||||
| 815 | my $output_file = undef; | ||||
| 816 | my $actual_output_extension; | ||||
| 817 | |||||
| 818 | if ( $rOpts->{'outfile'} ) { | ||||
| 819 | |||||
| 820 | if ( $number_of_files <= 1 ) { | ||||
| 821 | |||||
| 822 | if ( $rOpts->{'standard-output'} ) { | ||||
| 823 | my $msg = "You may not use -o and -st together"; | ||||
| 824 | $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); | ||||
| 825 | Die "$msg\n"; | ||||
| 826 | } | ||||
| 827 | elsif ($destination_stream) { | ||||
| 828 | Die | ||||
| 829 | "You may not specify a destination array and -o together\n"; | ||||
| 830 | } | ||||
| 831 | elsif ( defined( $rOpts->{'output-path'} ) ) { | ||||
| 832 | Die "You may not specify -o and -opath together\n"; | ||||
| 833 | } | ||||
| 834 | elsif ( defined( $rOpts->{'output-file-extension'} ) ) { | ||||
| 835 | Die "You may not specify -o and -oext together\n"; | ||||
| 836 | } | ||||
| 837 | $output_file = $rOpts->{outfile}; | ||||
| 838 | |||||
| 839 | # make sure user gives a file name after -o | ||||
| 840 | if ( $output_file =~ /^-/ ) { | ||||
| 841 | Die "You must specify a valid filename after -o\n"; | ||||
| 842 | } | ||||
| 843 | |||||
| 844 | # do not overwrite input file with -o | ||||
| 845 | if ( defined($input_file_permissions) | ||||
| 846 | && ( $output_file eq $input_file ) ) | ||||
| 847 | { | ||||
| 848 | Die "Use 'perltidy -b $input_file' to modify in-place\n"; | ||||
| 849 | } | ||||
| 850 | } | ||||
| 851 | else { | ||||
| 852 | Die "You may not use -o with more than one input file\n"; | ||||
| 853 | } | ||||
| 854 | } | ||||
| 855 | elsif ( $rOpts->{'standard-output'} ) { | ||||
| 856 | if ($destination_stream) { | ||||
| 857 | my $msg = | ||||
| 858 | "You may not specify a destination array and -st together\n"; | ||||
| 859 | $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); | ||||
| 860 | Die "$msg\n"; | ||||
| 861 | } | ||||
| 862 | $output_file = '-'; | ||||
| 863 | |||||
| 864 | if ( $number_of_files <= 1 ) { | ||||
| 865 | } | ||||
| 866 | else { | ||||
| 867 | Die "You may not use -st with more than one input file\n"; | ||||
| 868 | } | ||||
| 869 | } | ||||
| 870 | elsif ($destination_stream) { | ||||
| 871 | $output_file = $destination_stream; | ||||
| 872 | } | ||||
| 873 | elsif ($source_stream) { # source but no destination goes to stdout | ||||
| 874 | $output_file = '-'; | ||||
| 875 | } | ||||
| 876 | elsif ( $input_file eq '-' ) { | ||||
| 877 | $output_file = '-'; | ||||
| 878 | } | ||||
| 879 | else { | ||||
| 880 | if ($in_place_modify) { | ||||
| 881 | $output_file = IO::File->new_tmpfile() | ||||
| 882 | or Die "cannot open temp file for -b option: $!\n"; | ||||
| 883 | } | ||||
| 884 | else { | ||||
| 885 | $actual_output_extension = $output_extension; | ||||
| 886 | $output_file = $fileroot . $output_extension; | ||||
| 887 | } | ||||
| 888 | } | ||||
| 889 | |||||
| 890 | # the 'sink_object' knows how to write the output file | ||||
| 891 | my $tee_file = $fileroot . $dot . "TEE"; | ||||
| 892 | |||||
| 893 | my $line_separator = $rOpts->{'output-line-ending'}; | ||||
| 894 | if ( $rOpts->{'preserve-line-endings'} ) { | ||||
| 895 | $line_separator = find_input_line_ending($input_file); | ||||
| 896 | } | ||||
| 897 | |||||
| 898 | # Eventually all I/O may be done with binmode, but for now it is | ||||
| 899 | # only done when a user requests a particular line separator | ||||
| 900 | # through the -ple or -ole flags | ||||
| 901 | my $binmode = 0; | ||||
| 902 | if ( defined($line_separator) ) { $binmode = 1 } | ||||
| 903 | else { $line_separator = "\n" } | ||||
| 904 | |||||
| 905 | my ( $sink_object, $postfilter_buffer ); | ||||
| 906 | if ($postfilter) { | ||||
| 907 | $sink_object = | ||||
| 908 | Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, | ||||
| 909 | $line_separator, $rOpts, $rpending_logfile_message, $binmode ); | ||||
| 910 | } | ||||
| 911 | else { | ||||
| 912 | $sink_object = | ||||
| 913 | Perl::Tidy::LineSink->new( $output_file, $tee_file, | ||||
| 914 | $line_separator, $rOpts, $rpending_logfile_message, $binmode ); | ||||
| 915 | } | ||||
| 916 | |||||
| 917 | #--------------------------------------------------------------- | ||||
| 918 | # initialize the error logger for this file | ||||
| 919 | #--------------------------------------------------------------- | ||||
| 920 | my $warning_file = $fileroot . $dot . "ERR"; | ||||
| 921 | if ($errorfile_stream) { $warning_file = $errorfile_stream } | ||||
| 922 | my $log_file = $fileroot . $dot . "LOG"; | ||||
| 923 | if ($logfile_stream) { $log_file = $logfile_stream } | ||||
| 924 | |||||
| 925 | my $logger_object = | ||||
| 926 | Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, | ||||
| 927 | $fh_stderr, $saw_extrude ); | ||||
| 928 | write_logfile_header( | ||||
| 929 | $rOpts, $logger_object, $config_file, | ||||
| 930 | $rraw_options, $Windows_type, $readable_options, | ||||
| 931 | ); | ||||
| 932 | if ($$rpending_logfile_message) { | ||||
| 933 | $logger_object->write_logfile_entry($$rpending_logfile_message); | ||||
| 934 | } | ||||
| 935 | if ($$rpending_complaint) { | ||||
| 936 | $logger_object->complain($$rpending_complaint); | ||||
| 937 | } | ||||
| 938 | |||||
| 939 | #--------------------------------------------------------------- | ||||
| 940 | # initialize the debug object, if any | ||||
| 941 | #--------------------------------------------------------------- | ||||
| 942 | my $debugger_object = undef; | ||||
| 943 | if ( $rOpts->{DEBUG} ) { | ||||
| 944 | $debugger_object = | ||||
| 945 | Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); | ||||
| 946 | } | ||||
| 947 | |||||
| 948 | #--------------------------------------------------------------- | ||||
| 949 | # loop over iterations for one source stream | ||||
| 950 | #--------------------------------------------------------------- | ||||
| 951 | |||||
| 952 | # We will do a convergence test if 3 or more iterations are allowed. | ||||
| 953 | # It would be pointless for fewer because we have to make at least | ||||
| 954 | # two passes before we can see if we are converged, and the test | ||||
| 955 | # would just slow things down. | ||||
| 956 | my $max_iterations = $rOpts->{'iterations'}; | ||||
| 957 | my $convergence_log_message; | ||||
| 958 | my %saw_md5; | ||||
| 959 | my $do_convergence_test = $max_iterations > 2; | ||||
| 960 | if ($do_convergence_test) { | ||||
| 961 | eval "use Digest::MD5 qw(md5_hex)"; | ||||
| 962 | $do_convergence_test = !$@; | ||||
| 963 | |||||
| 964 | # Trying to avoid problems with ancient versions of perl because | ||||
| 965 | # I don't know in which version number utf8::encode was introduced. | ||||
| 966 | eval { my $string = "perltidy"; utf8::encode($string) }; | ||||
| 967 | $do_convergence_test = $do_convergence_test && !$@; | ||||
| 968 | } | ||||
| 969 | |||||
| 970 | # save objects to allow redirecting output during iterations | ||||
| 971 | my $sink_object_final = $sink_object; | ||||
| 972 | my $debugger_object_final = $debugger_object; | ||||
| 973 | my $logger_object_final = $logger_object; | ||||
| 974 | |||||
| 975 | for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { | ||||
| 976 | |||||
| 977 | # send output stream to temp buffers until last iteration | ||||
| 978 | my $sink_buffer; | ||||
| 979 | if ( $iter < $max_iterations ) { | ||||
| 980 | $sink_object = | ||||
| 981 | Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, | ||||
| 982 | $line_separator, $rOpts, $rpending_logfile_message, | ||||
| 983 | $binmode ); | ||||
| 984 | } | ||||
| 985 | else { | ||||
| 986 | $sink_object = $sink_object_final; | ||||
| 987 | } | ||||
| 988 | |||||
| 989 | # Save logger, debugger output only on pass 1 because: | ||||
| 990 | # (1) line number references must be to the starting | ||||
| 991 | # source, not an intermediate result, and | ||||
| 992 | # (2) we need to know if there are errors so we can stop the | ||||
| 993 | # iterations early if necessary. | ||||
| 994 | if ( $iter > 1 ) { | ||||
| 995 | $debugger_object = undef; | ||||
| 996 | $logger_object = undef; | ||||
| 997 | } | ||||
| 998 | |||||
| 999 | #------------------------------------------------------------ | ||||
| 1000 | # create a formatter for this file : html writer or | ||||
| 1001 | # pretty printer | ||||
| 1002 | #------------------------------------------------------------ | ||||
| 1003 | |||||
| 1004 | # we have to delete any old formatter because, for safety, | ||||
| 1005 | # the formatter will check to see that there is only one. | ||||
| 1006 | $formatter = undef; | ||||
| 1007 | |||||
| 1008 | if ($user_formatter) { | ||||
| 1009 | $formatter = $user_formatter; | ||||
| 1010 | } | ||||
| 1011 | elsif ( $rOpts->{'format'} eq 'html' ) { | ||||
| 1012 | $formatter = | ||||
| 1013 | Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, | ||||
| 1014 | $actual_output_extension, $html_toc_extension, | ||||
| 1015 | $html_src_extension ); | ||||
| 1016 | } | ||||
| 1017 | elsif ( $rOpts->{'format'} eq 'tidy' ) { | ||||
| 1018 | $formatter = Perl::Tidy::Formatter->new( | ||||
| 1019 | logger_object => $logger_object, | ||||
| 1020 | diagnostics_object => $diagnostics_object, | ||||
| 1021 | sink_object => $sink_object, | ||||
| 1022 | ); | ||||
| 1023 | } | ||||
| 1024 | else { | ||||
| 1025 | Die "I don't know how to do -format=$rOpts->{'format'}\n"; | ||||
| 1026 | } | ||||
| 1027 | |||||
| 1028 | unless ($formatter) { | ||||
| 1029 | Die "Unable to continue with $rOpts->{'format'} formatting\n"; | ||||
| 1030 | } | ||||
| 1031 | |||||
| 1032 | #--------------------------------------------------------------- | ||||
| 1033 | # create the tokenizer for this file | ||||
| 1034 | #--------------------------------------------------------------- | ||||
| 1035 | $tokenizer = undef; # must destroy old tokenizer | ||||
| 1036 | $tokenizer = Perl::Tidy::Tokenizer->new( | ||||
| 1037 | source_object => $source_object, | ||||
| 1038 | logger_object => $logger_object, | ||||
| 1039 | debugger_object => $debugger_object, | ||||
| 1040 | diagnostics_object => $diagnostics_object, | ||||
| 1041 | tabsize => $tabsize, | ||||
| 1042 | |||||
| 1043 | starting_level => $rOpts->{'starting-indentation-level'}, | ||||
| 1044 | indent_columns => $rOpts->{'indent-columns'}, | ||||
| 1045 | look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, | ||||
| 1046 | look_for_autoloader => $rOpts->{'look-for-autoloader'}, | ||||
| 1047 | look_for_selfloader => $rOpts->{'look-for-selfloader'}, | ||||
| 1048 | trim_qw => $rOpts->{'trim-qw'}, | ||||
| 1049 | |||||
| 1050 | continuation_indentation => | ||||
| 1051 | $rOpts->{'continuation-indentation'}, | ||||
| 1052 | outdent_labels => $rOpts->{'outdent-labels'}, | ||||
| 1053 | ); | ||||
| 1054 | |||||
| 1055 | #--------------------------------------------------------------- | ||||
| 1056 | # now we can do it | ||||
| 1057 | #--------------------------------------------------------------- | ||||
| 1058 | process_this_file( $tokenizer, $formatter ); | ||||
| 1059 | |||||
| 1060 | #--------------------------------------------------------------- | ||||
| 1061 | # close the input source and report errors | ||||
| 1062 | #--------------------------------------------------------------- | ||||
| 1063 | $source_object->close_input_file(); | ||||
| 1064 | |||||
| 1065 | # line source for next iteration (if any) comes from the current | ||||
| 1066 | # temporary output buffer | ||||
| 1067 | if ( $iter < $max_iterations ) { | ||||
| 1068 | |||||
| 1069 | $sink_object->close_output_file(); | ||||
| 1070 | $source_object = | ||||
| 1071 | Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, | ||||
| 1072 | $rpending_logfile_message ); | ||||
| 1073 | |||||
| 1074 | # stop iterations if errors or converged | ||||
| 1075 | my $stop_now = $logger_object->{_warning_count}; | ||||
| 1076 | if ($stop_now) { | ||||
| 1077 | $convergence_log_message = <<EOM; | ||||
| 1078 | Stopping iterations because of errors. | ||||
| 1079 | EOM | ||||
| 1080 | } | ||||
| 1081 | elsif ($do_convergence_test) { | ||||
| 1082 | |||||
| 1083 | # Patch for [rt.cpan.org #88020] | ||||
| 1084 | # Use utf8::encode since md5_hex() only operates on bytes. | ||||
| 1085 | my $digest = md5_hex( utf8::encode($sink_buffer) ); | ||||
| 1086 | if ( !$saw_md5{$digest} ) { | ||||
| 1087 | $saw_md5{$digest} = $iter; | ||||
| 1088 | } | ||||
| 1089 | else { | ||||
| 1090 | |||||
| 1091 | # Deja vu, stop iterating | ||||
| 1092 | $stop_now = 1; | ||||
| 1093 | my $iterm = $iter - 1; | ||||
| 1094 | if ( $saw_md5{$digest} != $iterm ) { | ||||
| 1095 | |||||
| 1096 | # Blinking (oscillating) between two stable | ||||
| 1097 | # end states. This has happened in the past | ||||
| 1098 | # but at present there are no known instances. | ||||
| 1099 | $convergence_log_message = <<EOM; | ||||
| 1100 | Blinking. Output for iteration $iter same as for $saw_md5{$digest}. | ||||
| 1101 | EOM | ||||
| 1102 | $diagnostics_object->write_diagnostics( | ||||
| 1103 | $convergence_log_message) | ||||
| 1104 | if $diagnostics_object; | ||||
| 1105 | } | ||||
| 1106 | else { | ||||
| 1107 | $convergence_log_message = <<EOM; | ||||
| 1108 | Converged. Output for iteration $iter same as for iter $iterm. | ||||
| 1109 | EOM | ||||
| 1110 | $diagnostics_object->write_diagnostics( | ||||
| 1111 | $convergence_log_message) | ||||
| 1112 | if $diagnostics_object && $iterm > 2; | ||||
| 1113 | } | ||||
| 1114 | } | ||||
| 1115 | } ## end if ($do_convergence_test) | ||||
| 1116 | |||||
| 1117 | if ($stop_now) { | ||||
| 1118 | |||||
| 1119 | # we are stopping the iterations early; | ||||
| 1120 | # copy the output stream to its final destination | ||||
| 1121 | $sink_object = $sink_object_final; | ||||
| 1122 | while ( my $line = $source_object->get_line() ) { | ||||
| 1123 | $sink_object->write_line($line); | ||||
| 1124 | } | ||||
| 1125 | $source_object->close_input_file(); | ||||
| 1126 | last; | ||||
| 1127 | } | ||||
| 1128 | } ## end if ( $iter < $max_iterations) | ||||
| 1129 | } # end loop over iterations for one source file | ||||
| 1130 | |||||
| 1131 | # restore objects which have been temporarily undefined | ||||
| 1132 | # for second and higher iterations | ||||
| 1133 | $debugger_object = $debugger_object_final; | ||||
| 1134 | $logger_object = $logger_object_final; | ||||
| 1135 | |||||
| 1136 | $logger_object->write_logfile_entry($convergence_log_message) | ||||
| 1137 | if $convergence_log_message; | ||||
| 1138 | |||||
| 1139 | #--------------------------------------------------------------- | ||||
| 1140 | # Perform any postfilter operation | ||||
| 1141 | #--------------------------------------------------------------- | ||||
| 1142 | if ($postfilter) { | ||||
| 1143 | $sink_object->close_output_file(); | ||||
| 1144 | $sink_object = | ||||
| 1145 | Perl::Tidy::LineSink->new( $output_file, $tee_file, | ||||
| 1146 | $line_separator, $rOpts, $rpending_logfile_message, $binmode ); | ||||
| 1147 | my $buf = $postfilter->($postfilter_buffer); | ||||
| 1148 | $source_object = | ||||
| 1149 | Perl::Tidy::LineSource->new( \$buf, $rOpts, | ||||
| 1150 | $rpending_logfile_message ); | ||||
| 1151 | while ( my $line = $source_object->get_line() ) { | ||||
| 1152 | $sink_object->write_line($line); | ||||
| 1153 | } | ||||
| 1154 | $source_object->close_input_file(); | ||||
| 1155 | } | ||||
| 1156 | |||||
| 1157 | # Save names of the input and output files for syntax check | ||||
| 1158 | my $ifname = $input_file; | ||||
| 1159 | my $ofname = $output_file; | ||||
| 1160 | |||||
| 1161 | #--------------------------------------------------------------- | ||||
| 1162 | # handle the -b option (backup and modify in-place) | ||||
| 1163 | #--------------------------------------------------------------- | ||||
| 1164 | if ($in_place_modify) { | ||||
| 1165 | unless ( -f $input_file ) { | ||||
| 1166 | |||||
| 1167 | # oh, oh, no real file to backup .. | ||||
| 1168 | # shouldn't happen because of numerous preliminary checks | ||||
| 1169 | Die | ||||
| 1170 | "problem with -b backing up input file '$input_file': not a file\n"; | ||||
| 1171 | } | ||||
| 1172 | my $backup_name = $input_file . $backup_extension; | ||||
| 1173 | if ( -f $backup_name ) { | ||||
| 1174 | unlink($backup_name) | ||||
| 1175 | or Die | ||||
| 1176 | "unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; | ||||
| 1177 | } | ||||
| 1178 | |||||
| 1179 | # backup the input file | ||||
| 1180 | # we use copy for symlinks, move for regular files | ||||
| 1181 | if ( -l $input_file ) { | ||||
| 1182 | File::Copy::copy( $input_file, $backup_name ) | ||||
| 1183 | or Die "File::Copy failed trying to backup source: $!"; | ||||
| 1184 | } | ||||
| 1185 | else { | ||||
| 1186 | rename( $input_file, $backup_name ) | ||||
| 1187 | or Die | ||||
| 1188 | "problem renaming $input_file to $backup_name for -b option: $!\n"; | ||||
| 1189 | } | ||||
| 1190 | $ifname = $backup_name; | ||||
| 1191 | |||||
| 1192 | # copy the output to the original input file | ||||
| 1193 | # NOTE: it would be nice to just close $output_file and use | ||||
| 1194 | # File::Copy::copy here, but in this case $output_file is the | ||||
| 1195 | # handle of an open nameless temporary file so we would lose | ||||
| 1196 | # everything if we closed it. | ||||
| 1197 | seek( $output_file, 0, 0 ) | ||||
| 1198 | or Die "unable to rewind a temporary file for -b option: $!\n"; | ||||
| 1199 | my $fout = IO::File->new("> $input_file") | ||||
| 1200 | or Die | ||||
| 1201 | "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"; | ||||
| 1202 | binmode $fout; | ||||
| 1203 | my $line; | ||||
| 1204 | while ( $line = $output_file->getline() ) { | ||||
| 1205 | $fout->print($line); | ||||
| 1206 | } | ||||
| 1207 | $fout->close(); | ||||
| 1208 | $output_file = $input_file; | ||||
| 1209 | $ofname = $input_file; | ||||
| 1210 | } | ||||
| 1211 | |||||
| 1212 | #--------------------------------------------------------------- | ||||
| 1213 | # clean up and report errors | ||||
| 1214 | #--------------------------------------------------------------- | ||||
| 1215 | $sink_object->close_output_file() if $sink_object; | ||||
| 1216 | $debugger_object->close_debug_file() if $debugger_object; | ||||
| 1217 | |||||
| 1218 | # set output file permissions | ||||
| 1219 | if ( $output_file && -f $output_file && !-l $output_file ) { | ||||
| 1220 | if ($input_file_permissions) { | ||||
| 1221 | |||||
| 1222 | # give output script same permissions as input script, but | ||||
| 1223 | # make it user-writable or else we can't run perltidy again. | ||||
| 1224 | # Thus we retain whatever executable flags were set. | ||||
| 1225 | if ( $rOpts->{'format'} eq 'tidy' ) { | ||||
| 1226 | chmod( $input_file_permissions | 0600, $output_file ); | ||||
| 1227 | } | ||||
| 1228 | |||||
| 1229 | # else use default permissions for html and any other format | ||||
| 1230 | } | ||||
| 1231 | } | ||||
| 1232 | |||||
| 1233 | #--------------------------------------------------------------- | ||||
| 1234 | # Do syntax check if requested and possible | ||||
| 1235 | #--------------------------------------------------------------- | ||||
| 1236 | my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes | ||||
| 1237 | if ( $logger_object | ||||
| 1238 | && $rOpts->{'check-syntax'} | ||||
| 1239 | && $ifname | ||||
| 1240 | && $ofname ) | ||||
| 1241 | { | ||||
| 1242 | $infile_syntax_ok = | ||||
| 1243 | check_syntax( $ifname, $ofname, $logger_object, $rOpts ); | ||||
| 1244 | } | ||||
| 1245 | |||||
| 1246 | #--------------------------------------------------------------- | ||||
| 1247 | # remove the original file for in-place modify as follows: | ||||
| 1248 | # $delete_backup=0 never | ||||
| 1249 | # $delete_backup=1 only if no errors | ||||
| 1250 | # $delete_backup>1 always : NOT ALLOWED, too risky, see above | ||||
| 1251 | #--------------------------------------------------------------- | ||||
| 1252 | if ( $in_place_modify | ||||
| 1253 | && $delete_backup | ||||
| 1254 | && -f $ifname | ||||
| 1255 | && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) | ||||
| 1256 | { | ||||
| 1257 | |||||
| 1258 | # As an added safety precaution, do not delete the source file | ||||
| 1259 | # if its size has dropped from positive to zero, since this | ||||
| 1260 | # could indicate a disaster of some kind, including a hardware | ||||
| 1261 | # failure. Actually, this could happen if you had a file of | ||||
| 1262 | # all comments (or pod) and deleted everything with -dac (-dap) | ||||
| 1263 | # for some reason. | ||||
| 1264 | if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { | ||||
| 1265 | Warn( | ||||
| 1266 | "output file '$output_file' missing or zero length; original '$ifname' not deleted\n" | ||||
| 1267 | ); | ||||
| 1268 | } | ||||
| 1269 | else { | ||||
| 1270 | unlink($ifname) | ||||
| 1271 | or Die | ||||
| 1272 | "unable to remove previous '$ifname' for -b option; check permissions: $!\n"; | ||||
| 1273 | } | ||||
| 1274 | } | ||||
| 1275 | |||||
| 1276 | $logger_object->finish( $infile_syntax_ok, $formatter ) | ||||
| 1277 | if $logger_object; | ||||
| 1278 | } # end of main loop to process all files | ||||
| 1279 | |||||
| 1280 | NORMAL_EXIT: | ||||
| 1281 | return 0; | ||||
| 1282 | |||||
| 1283 | ERROR_EXIT: | ||||
| 1284 | return 1; | ||||
| 1285 | } # end of main program perltidy | ||||
| 1286 | |||||
| 1287 | sub get_stream_as_named_file { | ||||
| 1288 | |||||
| 1289 | # Return the name of a file containing a stream of data, creating | ||||
| 1290 | # a temporary file if necessary. | ||||
| 1291 | # Given: | ||||
| 1292 | # $stream - the name of a file or stream | ||||
| 1293 | # Returns: | ||||
| 1294 | # $fname = name of file if possible, or undef | ||||
| 1295 | # $if_tmpfile = true if temp file, undef if not temp file | ||||
| 1296 | # | ||||
| 1297 | # This routine is needed for passing actual files to Perl for | ||||
| 1298 | # a syntax check. | ||||
| 1299 | my ($stream) = @_; | ||||
| 1300 | my $is_tmpfile; | ||||
| 1301 | my $fname; | ||||
| 1302 | if ($stream) { | ||||
| 1303 | if ( ref($stream) ) { | ||||
| 1304 | my ( $fh_stream, $fh_name ) = | ||||
| 1305 | Perl::Tidy::streamhandle( $stream, 'r' ); | ||||
| 1306 | if ($fh_stream) { | ||||
| 1307 | my ( $fout, $tmpnam ) = tempfile(); | ||||
| 1308 | if ($fout) { | ||||
| 1309 | $fname = $tmpnam; | ||||
| 1310 | $is_tmpfile = 1; | ||||
| 1311 | binmode $fout; | ||||
| 1312 | while ( my $line = $fh_stream->getline() ) { | ||||
| 1313 | $fout->print($line); | ||||
| 1314 | } | ||||
| 1315 | $fout->close(); | ||||
| 1316 | } | ||||
| 1317 | $fh_stream->close(); | ||||
| 1318 | } | ||||
| 1319 | } | ||||
| 1320 | elsif ( $stream ne '-' && -f $stream ) { | ||||
| 1321 | $fname = $stream; | ||||
| 1322 | } | ||||
| 1323 | } | ||||
| 1324 | return ( $fname, $is_tmpfile ); | ||||
| 1325 | } | ||||
| 1326 | |||||
| 1327 | sub fileglob_to_re { | ||||
| 1328 | |||||
| 1329 | # modified (corrected) from version in find2perl | ||||
| 1330 | my $x = shift; | ||||
| 1331 | $x =~ s#([./^\$()])#\\$1#g; # escape special characters | ||||
| 1332 | $x =~ s#\*#.*#g; # '*' -> '.*' | ||||
| 1333 | $x =~ s#\?#.#g; # '?' -> '.' | ||||
| 1334 | "^$x\\z"; # match whole word | ||||
| 1335 | } | ||||
| 1336 | |||||
| 1337 | sub make_extension { | ||||
| 1338 | |||||
| 1339 | # Make a file extension, including any leading '.' if necessary | ||||
| 1340 | # The '.' may actually be an '_' under VMS | ||||
| 1341 | my ( $extension, $default, $dot ) = @_; | ||||
| 1342 | |||||
| 1343 | # Use the default if none specified | ||||
| 1344 | $extension = $default unless ($extension); | ||||
| 1345 | |||||
| 1346 | # Only extensions with these leading characters get a '.' | ||||
| 1347 | # This rule gives the user some freedom | ||||
| 1348 | if ( $extension =~ /^[a-zA-Z0-9]/ ) { | ||||
| 1349 | $extension = $dot . $extension; | ||||
| 1350 | } | ||||
| 1351 | return $extension; | ||||
| 1352 | } | ||||
| 1353 | |||||
| 1354 | sub write_logfile_header { | ||||
| 1355 | my ( | ||||
| 1356 | $rOpts, $logger_object, $config_file, | ||||
| 1357 | $rraw_options, $Windows_type, $readable_options | ||||
| 1358 | ) = @_; | ||||
| 1359 | $logger_object->write_logfile_entry( | ||||
| 1360 | "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" | ||||
| 1361 | ); | ||||
| 1362 | if ($Windows_type) { | ||||
| 1363 | $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); | ||||
| 1364 | } | ||||
| 1365 | my $options_string = join( ' ', @$rraw_options ); | ||||
| 1366 | |||||
| 1367 | if ($config_file) { | ||||
| 1368 | $logger_object->write_logfile_entry( | ||||
| 1369 | "Found Configuration File >>> $config_file \n"); | ||||
| 1370 | } | ||||
| 1371 | $logger_object->write_logfile_entry( | ||||
| 1372 | "Configuration and command line parameters for this run:\n"); | ||||
| 1373 | $logger_object->write_logfile_entry("$options_string\n"); | ||||
| 1374 | |||||
| 1375 | if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { | ||||
| 1376 | $rOpts->{'logfile'} = 1; # force logfile to be saved | ||||
| 1377 | $logger_object->write_logfile_entry( | ||||
| 1378 | "Final parameter set for this run\n"); | ||||
| 1379 | $logger_object->write_logfile_entry( | ||||
| 1380 | "------------------------------------\n"); | ||||
| 1381 | |||||
| 1382 | $logger_object->write_logfile_entry($readable_options); | ||||
| 1383 | |||||
| 1384 | $logger_object->write_logfile_entry( | ||||
| 1385 | "------------------------------------\n"); | ||||
| 1386 | } | ||||
| 1387 | $logger_object->write_logfile_entry( | ||||
| 1388 | "To find error messages search for 'WARNING' with your editor\n"); | ||||
| 1389 | } | ||||
| 1390 | |||||
| 1391 | sub generate_options { | ||||
| 1392 | |||||
| 1393 | ###################################################################### | ||||
| 1394 | # Generate and return references to: | ||||
| 1395 | # @option_string - the list of options to be passed to Getopt::Long | ||||
| 1396 | # @defaults - the list of default options | ||||
| 1397 | # %expansion - a hash showing how all abbreviations are expanded | ||||
| 1398 | # %category - a hash giving the general category of each option | ||||
| 1399 | # %option_range - a hash giving the valid ranges of certain options | ||||
| 1400 | |||||
| 1401 | # Note: a few options are not documented in the man page and usage | ||||
| 1402 | # message. This is because these are experimental or debug options and | ||||
| 1403 | # may or may not be retained in future versions. | ||||
| 1404 | # | ||||
| 1405 | # Here are the undocumented flags as far as I know. Any of them | ||||
| 1406 | # may disappear at any time. They are mainly for fine-tuning | ||||
| 1407 | # and debugging. | ||||
| 1408 | # | ||||
| 1409 | # fll --> fuzzy-line-length # a trivial parameter which gets | ||||
| 1410 | # turned off for the extrude option | ||||
| 1411 | # which is mainly for debugging | ||||
| 1412 | # scl --> short-concatenation-item-length # helps break at '.' | ||||
| 1413 | # recombine # for debugging line breaks | ||||
| 1414 | # valign # for debugging vertical alignment | ||||
| 1415 | # I --> DIAGNOSTICS # for debugging | ||||
| 1416 | ###################################################################### | ||||
| 1417 | |||||
| 1418 | # here is a summary of the Getopt codes: | ||||
| 1419 | # <none> does not take an argument | ||||
| 1420 | # =s takes a mandatory string | ||||
| 1421 | # :s takes an optional string (DO NOT USE - filenames will get eaten up) | ||||
| 1422 | # =i takes a mandatory integer | ||||
| 1423 | # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) | ||||
| 1424 | # ! does not take an argument and may be negated | ||||
| 1425 | # i.e., -foo and -nofoo are allowed | ||||
| 1426 | # a double dash signals the end of the options list | ||||
| 1427 | # | ||||
| 1428 | #--------------------------------------------------------------- | ||||
| 1429 | # Define the option string passed to GetOptions. | ||||
| 1430 | #--------------------------------------------------------------- | ||||
| 1431 | |||||
| 1432 | my @option_string = (); | ||||
| 1433 | my %expansion = (); | ||||
| 1434 | my %option_category = (); | ||||
| 1435 | my %option_range = (); | ||||
| 1436 | my $rexpansion = \%expansion; | ||||
| 1437 | |||||
| 1438 | # names of categories in manual | ||||
| 1439 | # leading integers will allow sorting | ||||
| 1440 | my @category_name = ( | ||||
| 1441 | '0. I/O control', | ||||
| 1442 | '1. Basic formatting options', | ||||
| 1443 | '2. Code indentation control', | ||||
| 1444 | '3. Whitespace control', | ||||
| 1445 | '4. Comment controls', | ||||
| 1446 | '5. Linebreak controls', | ||||
| 1447 | '6. Controlling list formatting', | ||||
| 1448 | '7. Retaining or ignoring existing line breaks', | ||||
| 1449 | '8. Blank line control', | ||||
| 1450 | '9. Other controls', | ||||
| 1451 | '10. HTML options', | ||||
| 1452 | '11. pod2html options', | ||||
| 1453 | '12. Controlling HTML properties', | ||||
| 1454 | '13. Debugging', | ||||
| 1455 | ); | ||||
| 1456 | |||||
| 1457 | # These options are parsed directly by perltidy: | ||||
| 1458 | # help h | ||||
| 1459 | # version v | ||||
| 1460 | # However, they are included in the option set so that they will | ||||
| 1461 | # be seen in the options dump. | ||||
| 1462 | |||||
| 1463 | # These long option names have no abbreviations or are treated specially | ||||
| 1464 | @option_string = qw( | ||||
| 1465 | html! | ||||
| 1466 | noprofile | ||||
| 1467 | no-profile | ||||
| 1468 | npro | ||||
| 1469 | recombine! | ||||
| 1470 | valign! | ||||
| 1471 | notidy | ||||
| 1472 | ); | ||||
| 1473 | |||||
| 1474 | my $category = 13; # Debugging | ||||
| 1475 | foreach (@option_string) { | ||||
| 1476 | my $opt = $_; # must avoid changing the actual flag | ||||
| 1477 | $opt =~ s/!$//; | ||||
| 1478 | $option_category{$opt} = $category_name[$category]; | ||||
| 1479 | } | ||||
| 1480 | |||||
| 1481 | $category = 11; # HTML | ||||
| 1482 | $option_category{html} = $category_name[$category]; | ||||
| 1483 | |||||
| 1484 | # routine to install and check options | ||||
| 1485 | my $add_option = sub { | ||||
| 1486 | my ( $long_name, $short_name, $flag ) = @_; | ||||
| 1487 | push @option_string, $long_name . $flag; | ||||
| 1488 | $option_category{$long_name} = $category_name[$category]; | ||||
| 1489 | if ($short_name) { | ||||
| 1490 | if ( $expansion{$short_name} ) { | ||||
| 1491 | my $existing_name = $expansion{$short_name}[0]; | ||||
| 1492 | Die | ||||
| 1493 | "redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; | ||||
| 1494 | } | ||||
| 1495 | $expansion{$short_name} = [$long_name]; | ||||
| 1496 | if ( $flag eq '!' ) { | ||||
| 1497 | my $nshort_name = 'n' . $short_name; | ||||
| 1498 | my $nolong_name = 'no' . $long_name; | ||||
| 1499 | if ( $expansion{$nshort_name} ) { | ||||
| 1500 | my $existing_name = $expansion{$nshort_name}[0]; | ||||
| 1501 | Die | ||||
| 1502 | "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; | ||||
| 1503 | } | ||||
| 1504 | $expansion{$nshort_name} = [$nolong_name]; | ||||
| 1505 | } | ||||
| 1506 | } | ||||
| 1507 | }; | ||||
| 1508 | |||||
| 1509 | # Install long option names which have a simple abbreviation. | ||||
| 1510 | # Options with code '!' get standard negation ('no' for long names, | ||||
| 1511 | # 'n' for abbreviations). Categories follow the manual. | ||||
| 1512 | |||||
| 1513 | ########################### | ||||
| 1514 | $category = 0; # I/O_Control | ||||
| 1515 | ########################### | ||||
| 1516 | $add_option->( 'backup-and-modify-in-place', 'b', '!' ); | ||||
| 1517 | $add_option->( 'backup-file-extension', 'bext', '=s' ); | ||||
| 1518 | $add_option->( 'force-read-binary', 'f', '!' ); | ||||
| 1519 | $add_option->( 'format', 'fmt', '=s' ); | ||||
| 1520 | $add_option->( 'iterations', 'it', '=i' ); | ||||
| 1521 | $add_option->( 'logfile', 'log', '!' ); | ||||
| 1522 | $add_option->( 'logfile-gap', 'g', ':i' ); | ||||
| 1523 | $add_option->( 'outfile', 'o', '=s' ); | ||||
| 1524 | $add_option->( 'output-file-extension', 'oext', '=s' ); | ||||
| 1525 | $add_option->( 'output-path', 'opath', '=s' ); | ||||
| 1526 | $add_option->( 'profile', 'pro', '=s' ); | ||||
| 1527 | $add_option->( 'quiet', 'q', '!' ); | ||||
| 1528 | $add_option->( 'standard-error-output', 'se', '!' ); | ||||
| 1529 | $add_option->( 'standard-output', 'st', '!' ); | ||||
| 1530 | $add_option->( 'warning-output', 'w', '!' ); | ||||
| 1531 | |||||
| 1532 | # options which are both toggle switches and values moved here | ||||
| 1533 | # to hide from tidyview (which does not show category 0 flags): | ||||
| 1534 | # -ole moved here from category 1 | ||||
| 1535 | # -sil moved here from category 2 | ||||
| 1536 | $add_option->( 'output-line-ending', 'ole', '=s' ); | ||||
| 1537 | $add_option->( 'starting-indentation-level', 'sil', '=i' ); | ||||
| 1538 | |||||
| 1539 | ######################################## | ||||
| 1540 | $category = 1; # Basic formatting options | ||||
| 1541 | ######################################## | ||||
| 1542 | $add_option->( 'check-syntax', 'syn', '!' ); | ||||
| 1543 | $add_option->( 'entab-leading-whitespace', 'et', '=i' ); | ||||
| 1544 | $add_option->( 'indent-columns', 'i', '=i' ); | ||||
| 1545 | $add_option->( 'maximum-line-length', 'l', '=i' ); | ||||
| 1546 | $add_option->( 'variable-maximum-line-length', 'vmll', '!' ); | ||||
| 1547 | $add_option->( 'whitespace-cycle', 'wc', '=i' ); | ||||
| 1548 | $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); | ||||
| 1549 | $add_option->( 'preserve-line-endings', 'ple', '!' ); | ||||
| 1550 | $add_option->( 'tabs', 't', '!' ); | ||||
| 1551 | $add_option->( 'default-tabsize', 'dt', '=i' ); | ||||
| 1552 | |||||
| 1553 | ######################################## | ||||
| 1554 | $category = 2; # Code indentation control | ||||
| 1555 | ######################################## | ||||
| 1556 | $add_option->( 'continuation-indentation', 'ci', '=i' ); | ||||
| 1557 | $add_option->( 'line-up-parentheses', 'lp', '!' ); | ||||
| 1558 | $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); | ||||
| 1559 | $add_option->( 'outdent-keywords', 'okw', '!' ); | ||||
| 1560 | $add_option->( 'outdent-labels', 'ola', '!' ); | ||||
| 1561 | $add_option->( 'outdent-long-quotes', 'olq', '!' ); | ||||
| 1562 | $add_option->( 'indent-closing-brace', 'icb', '!' ); | ||||
| 1563 | $add_option->( 'closing-token-indentation', 'cti', '=i' ); | ||||
| 1564 | $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); | ||||
| 1565 | $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); | ||||
| 1566 | $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); | ||||
| 1567 | $add_option->( 'brace-left-and-indent', 'bli', '!' ); | ||||
| 1568 | $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); | ||||
| 1569 | |||||
| 1570 | ######################################## | ||||
| 1571 | $category = 3; # Whitespace control | ||||
| 1572 | ######################################## | ||||
| 1573 | $add_option->( 'add-semicolons', 'asc', '!' ); | ||||
| 1574 | $add_option->( 'add-whitespace', 'aws', '!' ); | ||||
| 1575 | $add_option->( 'block-brace-tightness', 'bbt', '=i' ); | ||||
| 1576 | $add_option->( 'brace-tightness', 'bt', '=i' ); | ||||
| 1577 | $add_option->( 'delete-old-whitespace', 'dws', '!' ); | ||||
| 1578 | $add_option->( 'delete-semicolons', 'dsm', '!' ); | ||||
| 1579 | $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); | ||||
| 1580 | $add_option->( 'nowant-left-space', 'nwls', '=s' ); | ||||
| 1581 | $add_option->( 'nowant-right-space', 'nwrs', '=s' ); | ||||
| 1582 | $add_option->( 'paren-tightness', 'pt', '=i' ); | ||||
| 1583 | $add_option->( 'space-after-keyword', 'sak', '=s' ); | ||||
| 1584 | $add_option->( 'space-for-semicolon', 'sfs', '!' ); | ||||
| 1585 | $add_option->( 'space-function-paren', 'sfp', '!' ); | ||||
| 1586 | $add_option->( 'space-keyword-paren', 'skp', '!' ); | ||||
| 1587 | $add_option->( 'space-terminal-semicolon', 'sts', '!' ); | ||||
| 1588 | $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); | ||||
| 1589 | $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); | ||||
| 1590 | $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); | ||||
| 1591 | $add_option->( 'tight-secret-operators', 'tso', '!' ); | ||||
| 1592 | $add_option->( 'trim-qw', 'tqw', '!' ); | ||||
| 1593 | $add_option->( 'trim-pod', 'trp', '!' ); | ||||
| 1594 | $add_option->( 'want-left-space', 'wls', '=s' ); | ||||
| 1595 | $add_option->( 'want-right-space', 'wrs', '=s' ); | ||||
| 1596 | |||||
| 1597 | ######################################## | ||||
| 1598 | $category = 4; # Comment controls | ||||
| 1599 | ######################################## | ||||
| 1600 | $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); | ||||
| 1601 | $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); | ||||
| 1602 | $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); | ||||
| 1603 | $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); | ||||
| 1604 | $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); | ||||
| 1605 | $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); | ||||
| 1606 | $add_option->( 'closing-side-comments', 'csc', '!' ); | ||||
| 1607 | $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); | ||||
| 1608 | $add_option->( 'format-skipping', 'fs', '!' ); | ||||
| 1609 | $add_option->( 'format-skipping-begin', 'fsb', '=s' ); | ||||
| 1610 | $add_option->( 'format-skipping-end', 'fse', '=s' ); | ||||
| 1611 | $add_option->( 'hanging-side-comments', 'hsc', '!' ); | ||||
| 1612 | $add_option->( 'indent-block-comments', 'ibc', '!' ); | ||||
| 1613 | $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); | ||||
| 1614 | $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); | ||||
| 1615 | $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); | ||||
| 1616 | $add_option->( 'outdent-long-comments', 'olc', '!' ); | ||||
| 1617 | $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); | ||||
| 1618 | $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); | ||||
| 1619 | $add_option->( 'static-block-comments', 'sbc', '!' ); | ||||
| 1620 | $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); | ||||
| 1621 | $add_option->( 'static-side-comments', 'ssc', '!' ); | ||||
| 1622 | $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' ); | ||||
| 1623 | |||||
| 1624 | ######################################## | ||||
| 1625 | $category = 5; # Linebreak controls | ||||
| 1626 | ######################################## | ||||
| 1627 | $add_option->( 'add-newlines', 'anl', '!' ); | ||||
| 1628 | $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); | ||||
| 1629 | $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); | ||||
| 1630 | $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); | ||||
| 1631 | $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); | ||||
| 1632 | $add_option->( 'cuddled-else', 'ce', '!' ); | ||||
| 1633 | $add_option->( 'delete-old-newlines', 'dnl', '!' ); | ||||
| 1634 | $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); | ||||
| 1635 | $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); | ||||
| 1636 | $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); | ||||
| 1637 | $add_option->( 'opening-paren-right', 'opr', '!' ); | ||||
| 1638 | $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); | ||||
| 1639 | $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); | ||||
| 1640 | $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); | ||||
| 1641 | $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); | ||||
| 1642 | $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); | ||||
| 1643 | $add_option->( 'stack-closing-block-brace', 'scbb', '!' ); | ||||
| 1644 | $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); | ||||
| 1645 | $add_option->( 'stack-closing-paren', 'scp', '!' ); | ||||
| 1646 | $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); | ||||
| 1647 | $add_option->( 'stack-opening-block-brace', 'sobb', '!' ); | ||||
| 1648 | $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); | ||||
| 1649 | $add_option->( 'stack-opening-paren', 'sop', '!' ); | ||||
| 1650 | $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); | ||||
| 1651 | $add_option->( 'vertical-tightness', 'vt', '=i' ); | ||||
| 1652 | $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); | ||||
| 1653 | $add_option->( 'want-break-after', 'wba', '=s' ); | ||||
| 1654 | $add_option->( 'want-break-before', 'wbb', '=s' ); | ||||
| 1655 | $add_option->( 'break-after-all-operators', 'baao', '!' ); | ||||
| 1656 | $add_option->( 'break-before-all-operators', 'bbao', '!' ); | ||||
| 1657 | $add_option->( 'keep-interior-semicolons', 'kis', '!' ); | ||||
| 1658 | |||||
| 1659 | ######################################## | ||||
| 1660 | $category = 6; # Controlling list formatting | ||||
| 1661 | ######################################## | ||||
| 1662 | $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); | ||||
| 1663 | $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); | ||||
| 1664 | $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); | ||||
| 1665 | |||||
| 1666 | ######################################## | ||||
| 1667 | $category = 7; # Retaining or ignoring existing line breaks | ||||
| 1668 | ######################################## | ||||
| 1669 | $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); | ||||
| 1670 | $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); | ||||
| 1671 | $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); | ||||
| 1672 | $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' ); | ||||
| 1673 | $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); | ||||
| 1674 | |||||
| 1675 | ######################################## | ||||
| 1676 | $category = 8; # Blank line control | ||||
| 1677 | ######################################## | ||||
| 1678 | $add_option->( 'blanks-before-blocks', 'bbb', '!' ); | ||||
| 1679 | $add_option->( 'blanks-before-comments', 'bbc', '!' ); | ||||
| 1680 | $add_option->( 'blank-lines-before-subs', 'blbs', '=i' ); | ||||
| 1681 | $add_option->( 'blank-lines-before-packages', 'blbp', '=i' ); | ||||
| 1682 | $add_option->( 'long-block-line-count', 'lbl', '=i' ); | ||||
| 1683 | $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); | ||||
| 1684 | $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); | ||||
| 1685 | |||||
| 1686 | ######################################## | ||||
| 1687 | $category = 9; # Other controls | ||||
| 1688 | ######################################## | ||||
| 1689 | $add_option->( 'delete-block-comments', 'dbc', '!' ); | ||||
| 1690 | $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); | ||||
| 1691 | $add_option->( 'delete-pod', 'dp', '!' ); | ||||
| 1692 | $add_option->( 'delete-side-comments', 'dsc', '!' ); | ||||
| 1693 | $add_option->( 'tee-block-comments', 'tbc', '!' ); | ||||
| 1694 | $add_option->( 'tee-pod', 'tp', '!' ); | ||||
| 1695 | $add_option->( 'tee-side-comments', 'tsc', '!' ); | ||||
| 1696 | $add_option->( 'look-for-autoloader', 'lal', '!' ); | ||||
| 1697 | $add_option->( 'look-for-hash-bang', 'x', '!' ); | ||||
| 1698 | $add_option->( 'look-for-selfloader', 'lsl', '!' ); | ||||
| 1699 | $add_option->( 'pass-version-line', 'pvl', '!' ); | ||||
| 1700 | |||||
| 1701 | ######################################## | ||||
| 1702 | $category = 13; # Debugging | ||||
| 1703 | ######################################## | ||||
| 1704 | $add_option->( 'DEBUG', 'D', '!' ); | ||||
| 1705 | $add_option->( 'DIAGNOSTICS', 'I', '!' ); | ||||
| 1706 | $add_option->( 'dump-defaults', 'ddf', '!' ); | ||||
| 1707 | $add_option->( 'dump-long-names', 'dln', '!' ); | ||||
| 1708 | $add_option->( 'dump-options', 'dop', '!' ); | ||||
| 1709 | $add_option->( 'dump-profile', 'dpro', '!' ); | ||||
| 1710 | $add_option->( 'dump-short-names', 'dsn', '!' ); | ||||
| 1711 | $add_option->( 'dump-token-types', 'dtt', '!' ); | ||||
| 1712 | $add_option->( 'dump-want-left-space', 'dwls', '!' ); | ||||
| 1713 | $add_option->( 'dump-want-right-space', 'dwrs', '!' ); | ||||
| 1714 | $add_option->( 'fuzzy-line-length', 'fll', '!' ); | ||||
| 1715 | $add_option->( 'help', 'h', '' ); | ||||
| 1716 | $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); | ||||
| 1717 | $add_option->( 'show-options', 'opt', '!' ); | ||||
| 1718 | $add_option->( 'version', 'v', '' ); | ||||
| 1719 | $add_option->( 'memoize', 'mem', '!' ); | ||||
| 1720 | |||||
| 1721 | #--------------------------------------------------------------------- | ||||
| 1722 | |||||
| 1723 | # The Perl::Tidy::HtmlWriter will add its own options to the string | ||||
| 1724 | Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); | ||||
| 1725 | |||||
| 1726 | ######################################## | ||||
| 1727 | # Set categories 10, 11, 12 | ||||
| 1728 | ######################################## | ||||
| 1729 | # Based on their known order | ||||
| 1730 | $category = 12; # HTML properties | ||||
| 1731 | foreach my $opt (@option_string) { | ||||
| 1732 | my $long_name = $opt; | ||||
| 1733 | $long_name =~ s/(!|=.*|:.*)$//; | ||||
| 1734 | unless ( defined( $option_category{$long_name} ) ) { | ||||
| 1735 | if ( $long_name =~ /^html-linked/ ) { | ||||
| 1736 | $category = 10; # HTML options | ||||
| 1737 | } | ||||
| 1738 | elsif ( $long_name =~ /^pod2html/ ) { | ||||
| 1739 | $category = 11; # Pod2html | ||||
| 1740 | } | ||||
| 1741 | $option_category{$long_name} = $category_name[$category]; | ||||
| 1742 | } | ||||
| 1743 | } | ||||
| 1744 | |||||
| 1745 | #--------------------------------------------------------------- | ||||
| 1746 | # Assign valid ranges to certain options | ||||
| 1747 | #--------------------------------------------------------------- | ||||
| 1748 | # In the future, these may be used to make preliminary checks | ||||
| 1749 | # hash keys are long names | ||||
| 1750 | # If key or value is undefined: | ||||
| 1751 | # strings may have any value | ||||
| 1752 | # integer ranges are >=0 | ||||
| 1753 | # If value is defined: | ||||
| 1754 | # value is [qw(any valid words)] for strings | ||||
| 1755 | # value is [min, max] for integers | ||||
| 1756 | # if min is undefined, there is no lower limit | ||||
| 1757 | # if max is undefined, there is no upper limit | ||||
| 1758 | # Parameters not listed here have defaults | ||||
| 1759 | %option_range = ( | ||||
| 1760 | 'format' => [ 'tidy', 'html', 'user' ], | ||||
| 1761 | 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], | ||||
| 1762 | |||||
| 1763 | 'block-brace-tightness' => [ 0, 2 ], | ||||
| 1764 | 'brace-tightness' => [ 0, 2 ], | ||||
| 1765 | 'paren-tightness' => [ 0, 2 ], | ||||
| 1766 | 'square-bracket-tightness' => [ 0, 2 ], | ||||
| 1767 | |||||
| 1768 | 'block-brace-vertical-tightness' => [ 0, 2 ], | ||||
| 1769 | 'brace-vertical-tightness' => [ 0, 2 ], | ||||
| 1770 | 'brace-vertical-tightness-closing' => [ 0, 2 ], | ||||
| 1771 | 'paren-vertical-tightness' => [ 0, 2 ], | ||||
| 1772 | 'paren-vertical-tightness-closing' => [ 0, 2 ], | ||||
| 1773 | 'square-bracket-vertical-tightness' => [ 0, 2 ], | ||||
| 1774 | 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], | ||||
| 1775 | 'vertical-tightness' => [ 0, 2 ], | ||||
| 1776 | 'vertical-tightness-closing' => [ 0, 2 ], | ||||
| 1777 | |||||
| 1778 | 'closing-brace-indentation' => [ 0, 3 ], | ||||
| 1779 | 'closing-paren-indentation' => [ 0, 3 ], | ||||
| 1780 | 'closing-square-bracket-indentation' => [ 0, 3 ], | ||||
| 1781 | 'closing-token-indentation' => [ 0, 3 ], | ||||
| 1782 | |||||
| 1783 | 'closing-side-comment-else-flag' => [ 0, 2 ], | ||||
| 1784 | 'comma-arrow-breakpoints' => [ 0, 5 ], | ||||
| 1785 | ); | ||||
| 1786 | |||||
| 1787 | # Note: we could actually allow negative ci if someone really wants it: | ||||
| 1788 | # $option_range{'continuation-indentation'} = [ undef, undef ]; | ||||
| 1789 | |||||
| 1790 | #--------------------------------------------------------------- | ||||
| 1791 | # Assign default values to the above options here, except | ||||
| 1792 | # for 'outfile' and 'help'. | ||||
| 1793 | # These settings should approximate the perlstyle(1) suggestions. | ||||
| 1794 | #--------------------------------------------------------------- | ||||
| 1795 | my @defaults = qw( | ||||
| 1796 | add-newlines | ||||
| 1797 | add-semicolons | ||||
| 1798 | add-whitespace | ||||
| 1799 | blanks-before-blocks | ||||
| 1800 | blanks-before-comments | ||||
| 1801 | blank-lines-before-subs=1 | ||||
| 1802 | blank-lines-before-packages=1 | ||||
| 1803 | block-brace-tightness=0 | ||||
| 1804 | block-brace-vertical-tightness=0 | ||||
| 1805 | brace-tightness=1 | ||||
| 1806 | brace-vertical-tightness-closing=0 | ||||
| 1807 | brace-vertical-tightness=0 | ||||
| 1808 | break-at-old-logical-breakpoints | ||||
| 1809 | break-at-old-ternary-breakpoints | ||||
| 1810 | break-at-old-attribute-breakpoints | ||||
| 1811 | break-at-old-keyword-breakpoints | ||||
| 1812 | comma-arrow-breakpoints=5 | ||||
| 1813 | nocheck-syntax | ||||
| 1814 | closing-side-comment-interval=6 | ||||
| 1815 | closing-side-comment-maximum-text=20 | ||||
| 1816 | closing-side-comment-else-flag=0 | ||||
| 1817 | closing-side-comments-balanced | ||||
| 1818 | closing-paren-indentation=0 | ||||
| 1819 | closing-brace-indentation=0 | ||||
| 1820 | closing-square-bracket-indentation=0 | ||||
| 1821 | continuation-indentation=2 | ||||
| 1822 | delete-old-newlines | ||||
| 1823 | delete-semicolons | ||||
| 1824 | fuzzy-line-length | ||||
| 1825 | hanging-side-comments | ||||
| 1826 | indent-block-comments | ||||
| 1827 | indent-columns=4 | ||||
| 1828 | iterations=1 | ||||
| 1829 | keep-old-blank-lines=1 | ||||
| 1830 | long-block-line-count=8 | ||||
| 1831 | look-for-autoloader | ||||
| 1832 | look-for-selfloader | ||||
| 1833 | maximum-consecutive-blank-lines=1 | ||||
| 1834 | maximum-fields-per-table=0 | ||||
| 1835 | maximum-line-length=80 | ||||
| 1836 | memoize | ||||
| 1837 | minimum-space-to-comment=4 | ||||
| 1838 | nobrace-left-and-indent | ||||
| 1839 | nocuddled-else | ||||
| 1840 | nodelete-old-whitespace | ||||
| 1841 | nohtml | ||||
| 1842 | nologfile | ||||
| 1843 | noquiet | ||||
| 1844 | noshow-options | ||||
| 1845 | nostatic-side-comments | ||||
| 1846 | notabs | ||||
| 1847 | nowarning-output | ||||
| 1848 | outdent-labels | ||||
| 1849 | outdent-long-quotes | ||||
| 1850 | outdent-long-comments | ||||
| 1851 | paren-tightness=1 | ||||
| 1852 | paren-vertical-tightness-closing=0 | ||||
| 1853 | paren-vertical-tightness=0 | ||||
| 1854 | pass-version-line | ||||
| 1855 | recombine | ||||
| 1856 | valign | ||||
| 1857 | short-concatenation-item-length=8 | ||||
| 1858 | space-for-semicolon | ||||
| 1859 | square-bracket-tightness=1 | ||||
| 1860 | square-bracket-vertical-tightness-closing=0 | ||||
| 1861 | square-bracket-vertical-tightness=0 | ||||
| 1862 | static-block-comments | ||||
| 1863 | trim-qw | ||||
| 1864 | format=tidy | ||||
| 1865 | backup-file-extension=bak | ||||
| 1866 | format-skipping | ||||
| 1867 | default-tabsize=8 | ||||
| 1868 | |||||
| 1869 | pod2html | ||||
| 1870 | html-table-of-contents | ||||
| 1871 | html-entities | ||||
| 1872 | ); | ||||
| 1873 | |||||
| 1874 | push @defaults, "perl-syntax-check-flags=-c -T"; | ||||
| 1875 | |||||
| 1876 | #--------------------------------------------------------------- | ||||
| 1877 | # Define abbreviations which will be expanded into the above primitives. | ||||
| 1878 | # These may be defined recursively. | ||||
| 1879 | #--------------------------------------------------------------- | ||||
| 1880 | %expansion = ( | ||||
| 1881 | %expansion, | ||||
| 1882 | 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], | ||||
| 1883 | 'fnl' => [qw(freeze-newlines)], | ||||
| 1884 | 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], | ||||
| 1885 | 'fws' => [qw(freeze-whitespace)], | ||||
| 1886 | 'freeze-blank-lines' => | ||||
| 1887 | [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], | ||||
| 1888 | 'fbl' => [qw(freeze-blank-lines)], | ||||
| 1889 | 'indent-only' => [qw(freeze-newlines freeze-whitespace)], | ||||
| 1890 | 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], | ||||
| 1891 | 'nooutdent-long-lines' => | ||||
| 1892 | [qw(nooutdent-long-quotes nooutdent-long-comments)], | ||||
| 1893 | 'noll' => [qw(nooutdent-long-lines)], | ||||
| 1894 | 'io' => [qw(indent-only)], | ||||
| 1895 | 'delete-all-comments' => | ||||
| 1896 | [qw(delete-block-comments delete-side-comments delete-pod)], | ||||
| 1897 | 'nodelete-all-comments' => | ||||
| 1898 | [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], | ||||
| 1899 | 'dac' => [qw(delete-all-comments)], | ||||
| 1900 | 'ndac' => [qw(nodelete-all-comments)], | ||||
| 1901 | 'gnu' => [qw(gnu-style)], | ||||
| 1902 | 'pbp' => [qw(perl-best-practices)], | ||||
| 1903 | 'tee-all-comments' => | ||||
| 1904 | [qw(tee-block-comments tee-side-comments tee-pod)], | ||||
| 1905 | 'notee-all-comments' => | ||||
| 1906 | [qw(notee-block-comments notee-side-comments notee-pod)], | ||||
| 1907 | 'tac' => [qw(tee-all-comments)], | ||||
| 1908 | 'ntac' => [qw(notee-all-comments)], | ||||
| 1909 | 'html' => [qw(format=html)], | ||||
| 1910 | 'nhtml' => [qw(format=tidy)], | ||||
| 1911 | 'tidy' => [qw(format=tidy)], | ||||
| 1912 | |||||
| 1913 | 'swallow-optional-blank-lines' => [qw(kbl=0)], | ||||
| 1914 | 'noswallow-optional-blank-lines' => [qw(kbl=1)], | ||||
| 1915 | 'sob' => [qw(kbl=0)], | ||||
| 1916 | 'nsob' => [qw(kbl=1)], | ||||
| 1917 | |||||
| 1918 | 'break-after-comma-arrows' => [qw(cab=0)], | ||||
| 1919 | 'nobreak-after-comma-arrows' => [qw(cab=1)], | ||||
| 1920 | 'baa' => [qw(cab=0)], | ||||
| 1921 | 'nbaa' => [qw(cab=1)], | ||||
| 1922 | |||||
| 1923 | 'blanks-before-subs' => [qw(blbs=1 blbp=1)], | ||||
| 1924 | 'bbs' => [qw(blbs=1 blbp=1)], | ||||
| 1925 | 'noblanks-before-subs' => [qw(blbs=0 blbp=0)], | ||||
| 1926 | 'nbbs' => [qw(blbs=0 blbp=0)], | ||||
| 1927 | |||||
| 1928 | 'break-at-old-trinary-breakpoints' => [qw(bot)], | ||||
| 1929 | |||||
| 1930 | 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
| 1931 | 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], | ||||
| 1932 | 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
| 1933 | 'icp' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
| 1934 | 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
| 1935 | |||||
| 1936 | 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
| 1937 | 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], | ||||
| 1938 | 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
| 1939 | 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], | ||||
| 1940 | 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], | ||||
| 1941 | |||||
| 1942 | 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], | ||||
| 1943 | 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], | ||||
| 1944 | 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], | ||||
| 1945 | |||||
| 1946 | 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], | ||||
| 1947 | 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], | ||||
| 1948 | 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], | ||||
| 1949 | |||||
| 1950 | 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], | ||||
| 1951 | 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], | ||||
| 1952 | 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], | ||||
| 1953 | |||||
| 1954 | 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], | ||||
| 1955 | 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], | ||||
| 1956 | 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], | ||||
| 1957 | |||||
| 1958 | 'otr' => [qw(opr ohbr osbr)], | ||||
| 1959 | 'opening-token-right' => [qw(opr ohbr osbr)], | ||||
| 1960 | 'notr' => [qw(nopr nohbr nosbr)], | ||||
| 1961 | 'noopening-token-right' => [qw(nopr nohbr nosbr)], | ||||
| 1962 | |||||
| 1963 | 'sot' => [qw(sop sohb sosb)], | ||||
| 1964 | 'nsot' => [qw(nsop nsohb nsosb)], | ||||
| 1965 | 'stack-opening-tokens' => [qw(sop sohb sosb)], | ||||
| 1966 | 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], | ||||
| 1967 | |||||
| 1968 | 'sct' => [qw(scp schb scsb)], | ||||
| 1969 | 'stack-closing-tokens' => => [qw(scp schb scsb)], | ||||
| 1970 | 'nsct' => [qw(nscp nschb nscsb)], | ||||
| 1971 | 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], | ||||
| 1972 | |||||
| 1973 | 'sac' => [qw(sot sct)], | ||||
| 1974 | 'nsac' => [qw(nsot nsct)], | ||||
| 1975 | 'stack-all-containers' => [qw(sot sct)], | ||||
| 1976 | 'nostack-all-containers' => [qw(nsot nsct)], | ||||
| 1977 | |||||
| 1978 | 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], | ||||
| 1979 | 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], | ||||
| 1980 | 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], | ||||
| 1981 | 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], | ||||
| 1982 | 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], | ||||
| 1983 | 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], | ||||
| 1984 | |||||
| 1985 | 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)], | ||||
| 1986 | 'sobb' => [qw(bbvt=2 bbvtl=*)], | ||||
| 1987 | 'nostack-opening-block-brace' => [qw(bbvt=0)], | ||||
| 1988 | 'nsobb' => [qw(bbvt=0)], | ||||
| 1989 | |||||
| 1990 | 'converge' => [qw(it=4)], | ||||
| 1991 | 'noconverge' => [qw(it=1)], | ||||
| 1992 | 'conv' => [qw(it=4)], | ||||
| 1993 | 'nconv' => [qw(it=1)], | ||||
| 1994 | |||||
| 1995 | # 'mangle' originally deleted pod and comments, but to keep it | ||||
| 1996 | # reversible, it no longer does. But if you really want to | ||||
| 1997 | # delete them, just use: | ||||
| 1998 | # -mangle -dac | ||||
| 1999 | |||||
| 2000 | # An interesting use for 'mangle' is to do this: | ||||
| 2001 | # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new | ||||
| 2002 | # which will form as many one-line blocks as possible | ||||
| 2003 | |||||
| 2004 | 'mangle' => [ | ||||
| 2005 | qw( | ||||
| 2006 | check-syntax | ||||
| 2007 | keep-old-blank-lines=0 | ||||
| 2008 | delete-old-newlines | ||||
| 2009 | delete-old-whitespace | ||||
| 2010 | delete-semicolons | ||||
| 2011 | indent-columns=0 | ||||
| 2012 | maximum-consecutive-blank-lines=0 | ||||
| 2013 | maximum-line-length=100000 | ||||
| 2014 | noadd-newlines | ||||
| 2015 | noadd-semicolons | ||||
| 2016 | noadd-whitespace | ||||
| 2017 | noblanks-before-blocks | ||||
| 2018 | blank-lines-before-subs=0 | ||||
| 2019 | blank-lines-before-packages=0 | ||||
| 2020 | notabs | ||||
| 2021 | ) | ||||
| 2022 | ], | ||||
| 2023 | |||||
| 2024 | # 'extrude' originally deleted pod and comments, but to keep it | ||||
| 2025 | # reversible, it no longer does. But if you really want to | ||||
| 2026 | # delete them, just use | ||||
| 2027 | # extrude -dac | ||||
| 2028 | # | ||||
| 2029 | # An interesting use for 'extrude' is to do this: | ||||
| 2030 | # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new | ||||
| 2031 | # which will break up all one-line blocks. | ||||
| 2032 | |||||
| 2033 | 'extrude' => [ | ||||
| 2034 | qw( | ||||
| 2035 | check-syntax | ||||
| 2036 | ci=0 | ||||
| 2037 | delete-old-newlines | ||||
| 2038 | delete-old-whitespace | ||||
| 2039 | delete-semicolons | ||||
| 2040 | indent-columns=0 | ||||
| 2041 | maximum-consecutive-blank-lines=0 | ||||
| 2042 | maximum-line-length=1 | ||||
| 2043 | noadd-semicolons | ||||
| 2044 | noadd-whitespace | ||||
| 2045 | noblanks-before-blocks | ||||
| 2046 | blank-lines-before-subs=0 | ||||
| 2047 | blank-lines-before-packages=0 | ||||
| 2048 | nofuzzy-line-length | ||||
| 2049 | notabs | ||||
| 2050 | norecombine | ||||
| 2051 | ) | ||||
| 2052 | ], | ||||
| 2053 | |||||
| 2054 | # this style tries to follow the GNU Coding Standards (which do | ||||
| 2055 | # not really apply to perl but which are followed by some perl | ||||
| 2056 | # programmers). | ||||
| 2057 | 'gnu-style' => [ | ||||
| 2058 | qw( | ||||
| 2059 | lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 | ||||
| 2060 | ) | ||||
| 2061 | ], | ||||
| 2062 | |||||
| 2063 | # Style suggested in Damian Conway's Perl Best Practices | ||||
| 2064 | 'perl-best-practices' => [ | ||||
| 2065 | qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), | ||||
| 2066 | q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) | ||||
| 2067 | ], | ||||
| 2068 | |||||
| 2069 | # Additional styles can be added here | ||||
| 2070 | ); | ||||
| 2071 | |||||
| 2072 | Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); | ||||
| 2073 | |||||
| 2074 | # Uncomment next line to dump all expansions for debugging: | ||||
| 2075 | # dump_short_names(\%expansion); | ||||
| 2076 | return ( | ||||
| 2077 | \@option_string, \@defaults, \%expansion, | ||||
| 2078 | \%option_category, \%option_range | ||||
| 2079 | ); | ||||
| 2080 | |||||
| 2081 | } # end of generate_options | ||||
| 2082 | |||||
| 2083 | # Memoize process_command_line. Given same @ARGV passed in, return same | ||||
| 2084 | # values and same @ARGV back. | ||||
| 2085 | # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds | ||||
| 2086 | # up masontidy (https://metacpan.org/module/masontidy) | ||||
| 2087 | |||||
| 2088 | 1 | 100ns | my %process_command_line_cache; | ||
| 2089 | |||||
| 2090 | sub process_command_line { | ||||
| 2091 | |||||
| 2092 | my ( | ||||
| 2093 | $perltidyrc_stream, $is_Windows, $Windows_type, | ||||
| 2094 | $rpending_complaint, $dump_options_type | ||||
| 2095 | ) = @_; | ||||
| 2096 | |||||
| 2097 | my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; | ||||
| 2098 | if ($use_cache) { | ||||
| 2099 | my $cache_key = join( chr(28), @ARGV ); | ||||
| 2100 | if ( my $result = $process_command_line_cache{$cache_key} ) { | ||||
| 2101 | my ( $argv, @retvals ) = @$result; | ||||
| 2102 | @ARGV = @$argv; | ||||
| 2103 | return @retvals; | ||||
| 2104 | } | ||||
| 2105 | else { | ||||
| 2106 | my @retvals = _process_command_line(@_); | ||||
| 2107 | $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] | ||||
| 2108 | if $retvals[0]->{'memoize'}; | ||||
| 2109 | return @retvals; | ||||
| 2110 | } | ||||
| 2111 | } | ||||
| 2112 | else { | ||||
| 2113 | return _process_command_line(@_); | ||||
| 2114 | } | ||||
| 2115 | } | ||||
| 2116 | |||||
| 2117 | # (note the underscore here) | ||||
| 2118 | sub _process_command_line { | ||||
| 2119 | |||||
| 2120 | my ( | ||||
| 2121 | $perltidyrc_stream, $is_Windows, $Windows_type, | ||||
| 2122 | $rpending_complaint, $dump_options_type | ||||
| 2123 | ) = @_; | ||||
| 2124 | |||||
| 2125 | 2 | 4.15ms | 2 | 209µs | # spent 110µs (11+99) within Perl::Tidy::BEGIN@2125 which was called:
# once (11µs+99µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 2125 # spent 110µs making 1 call to Perl::Tidy::BEGIN@2125
# spent 99µs making 1 call to Getopt::Long::import |
| 2126 | |||||
| 2127 | my ( | ||||
| 2128 | $roption_string, $rdefaults, $rexpansion, | ||||
| 2129 | $roption_category, $roption_range | ||||
| 2130 | ) = generate_options(); | ||||
| 2131 | |||||
| 2132 | #--------------------------------------------------------------- | ||||
| 2133 | # set the defaults by passing the above list through GetOptions | ||||
| 2134 | #--------------------------------------------------------------- | ||||
| 2135 | my %Opts = (); | ||||
| 2136 | { | ||||
| 2137 | local @ARGV; | ||||
| 2138 | my $i; | ||||
| 2139 | |||||
| 2140 | # do not load the defaults if we are just dumping perltidyrc | ||||
| 2141 | unless ( $dump_options_type eq 'perltidyrc' ) { | ||||
| 2142 | for $i (@$rdefaults) { push @ARGV, "--" . $i } | ||||
| 2143 | } | ||||
| 2144 | |||||
| 2145 | # Patch to save users Getopt::Long configuration | ||||
| 2146 | # and set to Getopt::Long defaults. Use eval to avoid | ||||
| 2147 | # breaking old versions of Perl without these routines. | ||||
| 2148 | my $glc; | ||||
| 2149 | eval { $glc = Getopt::Long::Configure() }; | ||||
| 2150 | unless ($@) { | ||||
| 2151 | eval { Getopt::Long::ConfigDefaults() }; | ||||
| 2152 | } | ||||
| 2153 | else { $glc = undef } | ||||
| 2154 | |||||
| 2155 | if ( !GetOptions( \%Opts, @$roption_string ) ) { | ||||
| 2156 | Die "Programming Bug: error in setting default options"; | ||||
| 2157 | } | ||||
| 2158 | |||||
| 2159 | # Patch to put the previous Getopt::Long configuration back | ||||
| 2160 | eval { Getopt::Long::Configure($glc) } if defined $glc; | ||||
| 2161 | } | ||||
| 2162 | |||||
| 2163 | my $word; | ||||
| 2164 | my @raw_options = (); | ||||
| 2165 | my $config_file = ""; | ||||
| 2166 | my $saw_ignore_profile = 0; | ||||
| 2167 | my $saw_extrude = 0; | ||||
| 2168 | my $saw_pbp = 0; | ||||
| 2169 | my $saw_dump_profile = 0; | ||||
| 2170 | my $i; | ||||
| 2171 | |||||
| 2172 | #--------------------------------------------------------------- | ||||
| 2173 | # Take a first look at the command-line parameters. Do as many | ||||
| 2174 | # immediate dumps as possible, which can avoid confusion if the | ||||
| 2175 | # perltidyrc file has an error. | ||||
| 2176 | #--------------------------------------------------------------- | ||||
| 2177 | foreach $i (@ARGV) { | ||||
| 2178 | |||||
| 2179 | $i =~ s/^--/-/; | ||||
| 2180 | if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { | ||||
| 2181 | $saw_ignore_profile = 1; | ||||
| 2182 | } | ||||
| 2183 | |||||
| 2184 | # note: this must come before -pro and -profile, below: | ||||
| 2185 | elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { | ||||
| 2186 | $saw_dump_profile = 1; | ||||
| 2187 | } | ||||
| 2188 | elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { | ||||
| 2189 | if ($config_file) { | ||||
| 2190 | Warn | ||||
| 2191 | "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; | ||||
| 2192 | } | ||||
| 2193 | $config_file = $2; | ||||
| 2194 | |||||
| 2195 | # resolve <dir>/.../<file>, meaning look upwards from directory | ||||
| 2196 | if ( defined($config_file) ) { | ||||
| 2197 | if ( my ( $start_dir, $search_file ) = | ||||
| 2198 | ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) | ||||
| 2199 | { | ||||
| 2200 | $start_dir = '.' if !$start_dir; | ||||
| 2201 | $start_dir = Cwd::realpath($start_dir); | ||||
| 2202 | if ( my $found_file = | ||||
| 2203 | find_file_upwards( $start_dir, $search_file ) ) | ||||
| 2204 | { | ||||
| 2205 | $config_file = $found_file; | ||||
| 2206 | } | ||||
| 2207 | } | ||||
| 2208 | } | ||||
| 2209 | unless ( -e $config_file ) { | ||||
| 2210 | Warn "cannot find file given with -pro=$config_file: $!\n"; | ||||
| 2211 | $config_file = ""; | ||||
| 2212 | } | ||||
| 2213 | } | ||||
| 2214 | elsif ( $i =~ /^-(pro|profile)=?$/ ) { | ||||
| 2215 | Die "usage: -pro=filename or --profile=filename, no spaces\n"; | ||||
| 2216 | } | ||||
| 2217 | elsif ( $i =~ /^-extrude$/ ) { | ||||
| 2218 | $saw_extrude = 1; | ||||
| 2219 | } | ||||
| 2220 | elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) { | ||||
| 2221 | $saw_pbp = 1; | ||||
| 2222 | } | ||||
| 2223 | elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { | ||||
| 2224 | usage(); | ||||
| 2225 | Exit 0; | ||||
| 2226 | } | ||||
| 2227 | elsif ( $i =~ /^-(version|v)$/ ) { | ||||
| 2228 | show_version(); | ||||
| 2229 | Exit 0; | ||||
| 2230 | } | ||||
| 2231 | elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { | ||||
| 2232 | dump_defaults(@$rdefaults); | ||||
| 2233 | Exit 0; | ||||
| 2234 | } | ||||
| 2235 | elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { | ||||
| 2236 | dump_long_names(@$roption_string); | ||||
| 2237 | Exit 0; | ||||
| 2238 | } | ||||
| 2239 | elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { | ||||
| 2240 | dump_short_names($rexpansion); | ||||
| 2241 | Exit 0; | ||||
| 2242 | } | ||||
| 2243 | elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { | ||||
| 2244 | Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); | ||||
| 2245 | Exit 0; | ||||
| 2246 | } | ||||
| 2247 | } | ||||
| 2248 | |||||
| 2249 | if ( $saw_dump_profile && $saw_ignore_profile ) { | ||||
| 2250 | Warn "No profile to dump because of -npro\n"; | ||||
| 2251 | Exit 1; | ||||
| 2252 | } | ||||
| 2253 | |||||
| 2254 | #--------------------------------------------------------------- | ||||
| 2255 | # read any .perltidyrc configuration file | ||||
| 2256 | #--------------------------------------------------------------- | ||||
| 2257 | unless ($saw_ignore_profile) { | ||||
| 2258 | |||||
| 2259 | # resolve possible conflict between $perltidyrc_stream passed | ||||
| 2260 | # as call parameter to perltidy and -pro=filename on command | ||||
| 2261 | # line. | ||||
| 2262 | if ($perltidyrc_stream) { | ||||
| 2263 | if ($config_file) { | ||||
| 2264 | Warn <<EOM; | ||||
| 2265 | Conflict: a perltidyrc configuration file was specified both as this | ||||
| 2266 | perltidy call parameter: $perltidyrc_stream | ||||
| 2267 | and with this -profile=$config_file. | ||||
| 2268 | Using -profile=$config_file. | ||||
| 2269 | EOM | ||||
| 2270 | } | ||||
| 2271 | else { | ||||
| 2272 | $config_file = $perltidyrc_stream; | ||||
| 2273 | } | ||||
| 2274 | } | ||||
| 2275 | |||||
| 2276 | # look for a config file if we don't have one yet | ||||
| 2277 | my $rconfig_file_chatter; | ||||
| 2278 | $$rconfig_file_chatter = ""; | ||||
| 2279 | $config_file = | ||||
| 2280 | find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter, | ||||
| 2281 | $rpending_complaint ) | ||||
| 2282 | unless $config_file; | ||||
| 2283 | |||||
| 2284 | # open any config file | ||||
| 2285 | my $fh_config; | ||||
| 2286 | if ($config_file) { | ||||
| 2287 | ( $fh_config, $config_file ) = | ||||
| 2288 | Perl::Tidy::streamhandle( $config_file, 'r' ); | ||||
| 2289 | unless ($fh_config) { | ||||
| 2290 | $$rconfig_file_chatter .= | ||||
| 2291 | "# $config_file exists but cannot be opened\n"; | ||||
| 2292 | } | ||||
| 2293 | } | ||||
| 2294 | |||||
| 2295 | if ($saw_dump_profile) { | ||||
| 2296 | dump_config_file( $fh_config, $config_file, $rconfig_file_chatter ); | ||||
| 2297 | Exit 0; | ||||
| 2298 | } | ||||
| 2299 | |||||
| 2300 | if ($fh_config) { | ||||
| 2301 | |||||
| 2302 | my ( $rconfig_list, $death_message, $_saw_pbp ) = | ||||
| 2303 | read_config_file( $fh_config, $config_file, $rexpansion ); | ||||
| 2304 | Die $death_message if ($death_message); | ||||
| 2305 | $saw_pbp ||= $_saw_pbp; | ||||
| 2306 | |||||
| 2307 | # process any .perltidyrc parameters right now so we can | ||||
| 2308 | # localize errors | ||||
| 2309 | if (@$rconfig_list) { | ||||
| 2310 | local @ARGV = @$rconfig_list; | ||||
| 2311 | |||||
| 2312 | expand_command_abbreviations( $rexpansion, \@raw_options, | ||||
| 2313 | $config_file ); | ||||
| 2314 | |||||
| 2315 | if ( !GetOptions( \%Opts, @$roption_string ) ) { | ||||
| 2316 | Die | ||||
| 2317 | "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; | ||||
| 2318 | } | ||||
| 2319 | |||||
| 2320 | # Anything left in this local @ARGV is an error and must be | ||||
| 2321 | # invalid bare words from the configuration file. We cannot | ||||
| 2322 | # check this earlier because bare words may have been valid | ||||
| 2323 | # values for parameters. We had to wait for GetOptions to have | ||||
| 2324 | # a look at @ARGV. | ||||
| 2325 | if (@ARGV) { | ||||
| 2326 | my $count = @ARGV; | ||||
| 2327 | my $str = "\'" . pop(@ARGV) . "\'"; | ||||
| 2328 | while ( my $param = pop(@ARGV) ) { | ||||
| 2329 | if ( length($str) < 70 ) { | ||||
| 2330 | $str .= ", '$param'"; | ||||
| 2331 | } | ||||
| 2332 | else { | ||||
| 2333 | $str .= ", ..."; | ||||
| 2334 | last; | ||||
| 2335 | } | ||||
| 2336 | } | ||||
| 2337 | Die <<EOM; | ||||
| 2338 | There are $count unrecognized values in the configuration file '$config_file': | ||||
| 2339 | $str | ||||
| 2340 | Use leading dashes for parameters. Use -npro to ignore this file. | ||||
| 2341 | EOM | ||||
| 2342 | } | ||||
| 2343 | |||||
| 2344 | # Undo any options which cause premature exit. They are not | ||||
| 2345 | # appropriate for a config file, and it could be hard to | ||||
| 2346 | # diagnose the cause of the premature exit. | ||||
| 2347 | foreach ( | ||||
| 2348 | qw{ | ||||
| 2349 | dump-defaults | ||||
| 2350 | dump-long-names | ||||
| 2351 | dump-options | ||||
| 2352 | dump-profile | ||||
| 2353 | dump-short-names | ||||
| 2354 | dump-token-types | ||||
| 2355 | dump-want-left-space | ||||
| 2356 | dump-want-right-space | ||||
| 2357 | help | ||||
| 2358 | stylesheet | ||||
| 2359 | version | ||||
| 2360 | } | ||||
| 2361 | ) | ||||
| 2362 | { | ||||
| 2363 | |||||
| 2364 | if ( defined( $Opts{$_} ) ) { | ||||
| 2365 | delete $Opts{$_}; | ||||
| 2366 | Warn "ignoring --$_ in config file: $config_file\n"; | ||||
| 2367 | } | ||||
| 2368 | } | ||||
| 2369 | } | ||||
| 2370 | } | ||||
| 2371 | } | ||||
| 2372 | |||||
| 2373 | #--------------------------------------------------------------- | ||||
| 2374 | # now process the command line parameters | ||||
| 2375 | #--------------------------------------------------------------- | ||||
| 2376 | expand_command_abbreviations( $rexpansion, \@raw_options, $config_file ); | ||||
| 2377 | |||||
| 2378 | local $SIG{'__WARN__'} = sub { Warn $_[0] }; | ||||
| 2379 | if ( !GetOptions( \%Opts, @$roption_string ) ) { | ||||
| 2380 | Die "Error on command line; for help try 'perltidy -h'\n"; | ||||
| 2381 | } | ||||
| 2382 | |||||
| 2383 | return ( | ||||
| 2384 | \%Opts, $config_file, \@raw_options, | ||||
| 2385 | $saw_extrude, $saw_pbp, $roption_string, | ||||
| 2386 | $rexpansion, $roption_category, $roption_range | ||||
| 2387 | ); | ||||
| 2388 | } # end of process_command_line | ||||
| 2389 | |||||
| 2390 | sub check_options { | ||||
| 2391 | |||||
| 2392 | my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_; | ||||
| 2393 | |||||
| 2394 | #--------------------------------------------------------------- | ||||
| 2395 | # check and handle any interactions among the basic options.. | ||||
| 2396 | #--------------------------------------------------------------- | ||||
| 2397 | |||||
| 2398 | # Since -vt, -vtc, and -cti are abbreviations, but under | ||||
| 2399 | # msdos, an unquoted input parameter like vtc=1 will be | ||||
| 2400 | # seen as 2 parameters, vtc and 1, so the abbreviations | ||||
| 2401 | # won't be seen. Therefore, we will catch them here if | ||||
| 2402 | # they get through. | ||||
| 2403 | |||||
| 2404 | if ( defined $rOpts->{'vertical-tightness'} ) { | ||||
| 2405 | my $vt = $rOpts->{'vertical-tightness'}; | ||||
| 2406 | $rOpts->{'paren-vertical-tightness'} = $vt; | ||||
| 2407 | $rOpts->{'square-bracket-vertical-tightness'} = $vt; | ||||
| 2408 | $rOpts->{'brace-vertical-tightness'} = $vt; | ||||
| 2409 | } | ||||
| 2410 | |||||
| 2411 | if ( defined $rOpts->{'vertical-tightness-closing'} ) { | ||||
| 2412 | my $vtc = $rOpts->{'vertical-tightness-closing'}; | ||||
| 2413 | $rOpts->{'paren-vertical-tightness-closing'} = $vtc; | ||||
| 2414 | $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; | ||||
| 2415 | $rOpts->{'brace-vertical-tightness-closing'} = $vtc; | ||||
| 2416 | } | ||||
| 2417 | |||||
| 2418 | if ( defined $rOpts->{'closing-token-indentation'} ) { | ||||
| 2419 | my $cti = $rOpts->{'closing-token-indentation'}; | ||||
| 2420 | $rOpts->{'closing-square-bracket-indentation'} = $cti; | ||||
| 2421 | $rOpts->{'closing-brace-indentation'} = $cti; | ||||
| 2422 | $rOpts->{'closing-paren-indentation'} = $cti; | ||||
| 2423 | } | ||||
| 2424 | |||||
| 2425 | # In quiet mode, there is no log file and hence no way to report | ||||
| 2426 | # results of syntax check, so don't do it. | ||||
| 2427 | if ( $rOpts->{'quiet'} ) { | ||||
| 2428 | $rOpts->{'check-syntax'} = 0; | ||||
| 2429 | } | ||||
| 2430 | |||||
| 2431 | # can't check syntax if no output | ||||
| 2432 | if ( $rOpts->{'format'} ne 'tidy' ) { | ||||
| 2433 | $rOpts->{'check-syntax'} = 0; | ||||
| 2434 | } | ||||
| 2435 | |||||
| 2436 | # Never let Windows 9x/Me systems run syntax check -- this will prevent a | ||||
| 2437 | # wide variety of nasty problems on these systems, because they cannot | ||||
| 2438 | # reliably run backticks. Don't even think about changing this! | ||||
| 2439 | if ( $rOpts->{'check-syntax'} | ||||
| 2440 | && $is_Windows | ||||
| 2441 | && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) | ||||
| 2442 | { | ||||
| 2443 | $rOpts->{'check-syntax'} = 0; | ||||
| 2444 | } | ||||
| 2445 | |||||
| 2446 | # It's really a bad idea to check syntax as root unless you wrote | ||||
| 2447 | # the script yourself. FIXME: not sure if this works with VMS | ||||
| 2448 | unless ($is_Windows) { | ||||
| 2449 | |||||
| 2450 | if ( $< == 0 && $rOpts->{'check-syntax'} ) { | ||||
| 2451 | $rOpts->{'check-syntax'} = 0; | ||||
| 2452 | $$rpending_complaint .= | ||||
| 2453 | "Syntax check deactivated for safety; you shouldn't run this as root\n"; | ||||
| 2454 | } | ||||
| 2455 | } | ||||
| 2456 | |||||
| 2457 | # check iteration count and quietly fix if necessary: | ||||
| 2458 | # - iterations option only applies to code beautification mode | ||||
| 2459 | # - the convergence check should stop most runs on iteration 2, and | ||||
| 2460 | # virtually all on iteration 3. But we'll allow up to 6. | ||||
| 2461 | if ( $rOpts->{'format'} ne 'tidy' ) { | ||||
| 2462 | $rOpts->{'iterations'} = 1; | ||||
| 2463 | } | ||||
| 2464 | elsif ( defined( $rOpts->{'iterations'} ) ) { | ||||
| 2465 | if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } | ||||
| 2466 | elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 } | ||||
| 2467 | } | ||||
| 2468 | else { | ||||
| 2469 | $rOpts->{'iterations'} = 1; | ||||
| 2470 | } | ||||
| 2471 | |||||
| 2472 | # check for reasonable number of blank lines and fix to avoid problems | ||||
| 2473 | if ( $rOpts->{'blank-lines-before-subs'} ) { | ||||
| 2474 | if ( $rOpts->{'blank-lines-before-subs'} < 0 ) { | ||||
| 2475 | $rOpts->{'blank-lines-before-subs'} = 0; | ||||
| 2476 | Warn "negative value of -blbs, setting 0\n"; | ||||
| 2477 | } | ||||
| 2478 | if ( $rOpts->{'blank-lines-before-subs'} > 100 ) { | ||||
| 2479 | Warn "unreasonably large value of -blbs, reducing\n"; | ||||
| 2480 | $rOpts->{'blank-lines-before-subs'} = 100; | ||||
| 2481 | } | ||||
| 2482 | } | ||||
| 2483 | if ( $rOpts->{'blank-lines-before-packages'} ) { | ||||
| 2484 | if ( $rOpts->{'blank-lines-before-packages'} < 0 ) { | ||||
| 2485 | Warn "negative value of -blbp, setting 0\n"; | ||||
| 2486 | $rOpts->{'blank-lines-before-packages'} = 0; | ||||
| 2487 | } | ||||
| 2488 | if ( $rOpts->{'blank-lines-before-packages'} > 100 ) { | ||||
| 2489 | Warn "unreasonably large value of -blbp, reducing\n"; | ||||
| 2490 | $rOpts->{'blank-lines-before-packages'} = 100; | ||||
| 2491 | } | ||||
| 2492 | } | ||||
| 2493 | |||||
| 2494 | # setting a non-negative logfile gap causes logfile to be saved | ||||
| 2495 | if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { | ||||
| 2496 | $rOpts->{'logfile'} = 1; | ||||
| 2497 | } | ||||
| 2498 | |||||
| 2499 | # set short-cut flag when only indentation is to be done. | ||||
| 2500 | # Note that the user may or may not have already set the | ||||
| 2501 | # indent-only flag. | ||||
| 2502 | if ( !$rOpts->{'add-whitespace'} | ||||
| 2503 | && !$rOpts->{'delete-old-whitespace'} | ||||
| 2504 | && !$rOpts->{'add-newlines'} | ||||
| 2505 | && !$rOpts->{'delete-old-newlines'} ) | ||||
| 2506 | { | ||||
| 2507 | $rOpts->{'indent-only'} = 1; | ||||
| 2508 | } | ||||
| 2509 | |||||
| 2510 | # -isbc implies -ibc | ||||
| 2511 | if ( $rOpts->{'indent-spaced-block-comments'} ) { | ||||
| 2512 | $rOpts->{'indent-block-comments'} = 1; | ||||
| 2513 | } | ||||
| 2514 | |||||
| 2515 | # -bli flag implies -bl | ||||
| 2516 | if ( $rOpts->{'brace-left-and-indent'} ) { | ||||
| 2517 | $rOpts->{'opening-brace-on-new-line'} = 1; | ||||
| 2518 | } | ||||
| 2519 | |||||
| 2520 | if ( $rOpts->{'opening-brace-always-on-right'} | ||||
| 2521 | && $rOpts->{'opening-brace-on-new-line'} ) | ||||
| 2522 | { | ||||
| 2523 | Warn <<EOM; | ||||
| 2524 | Conflict: you specified both 'opening-brace-always-on-right' (-bar) and | ||||
| 2525 | 'opening-brace-on-new-line' (-bl). Ignoring -bl. | ||||
| 2526 | EOM | ||||
| 2527 | $rOpts->{'opening-brace-on-new-line'} = 0; | ||||
| 2528 | } | ||||
| 2529 | |||||
| 2530 | # it simplifies things if -bl is 0 rather than undefined | ||||
| 2531 | if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { | ||||
| 2532 | $rOpts->{'opening-brace-on-new-line'} = 0; | ||||
| 2533 | } | ||||
| 2534 | |||||
| 2535 | # -sbl defaults to -bl if not defined | ||||
| 2536 | if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { | ||||
| 2537 | $rOpts->{'opening-sub-brace-on-new-line'} = | ||||
| 2538 | $rOpts->{'opening-brace-on-new-line'}; | ||||
| 2539 | } | ||||
| 2540 | |||||
| 2541 | if ( $rOpts->{'entab-leading-whitespace'} ) { | ||||
| 2542 | if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { | ||||
| 2543 | Warn "-et=n must use a positive integer; ignoring -et\n"; | ||||
| 2544 | $rOpts->{'entab-leading-whitespace'} = undef; | ||||
| 2545 | } | ||||
| 2546 | |||||
| 2547 | # entab leading whitespace has priority over the older 'tabs' option | ||||
| 2548 | if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } | ||||
| 2549 | } | ||||
| 2550 | |||||
| 2551 | # set a default tabsize to be used in guessing the starting indentation | ||||
| 2552 | # level if and only if this run does not use tabs and the old code does | ||||
| 2553 | # use tabs | ||||
| 2554 | if ( $rOpts->{'default-tabsize'} ) { | ||||
| 2555 | if ( $rOpts->{'default-tabsize'} < 0 ) { | ||||
| 2556 | Warn "negative value of -dt, setting 0\n"; | ||||
| 2557 | $rOpts->{'default-tabsize'} = 0; | ||||
| 2558 | } | ||||
| 2559 | if ( $rOpts->{'default-tabsize'} > 20 ) { | ||||
| 2560 | Warn "unreasonably large value of -dt, reducing\n"; | ||||
| 2561 | $rOpts->{'default-tabsize'} = 20; | ||||
| 2562 | } | ||||
| 2563 | } | ||||
| 2564 | else { | ||||
| 2565 | $rOpts->{'default-tabsize'} = 8; | ||||
| 2566 | } | ||||
| 2567 | |||||
| 2568 | # Define $tabsize, the number of spaces per tab for use in | ||||
| 2569 | # guessing the indentation of source lines with leading tabs. | ||||
| 2570 | # Assume same as for this run if tabs are used , otherwise assume | ||||
| 2571 | # a default value, typically 8 | ||||
| 2572 | my $tabsize = | ||||
| 2573 | $rOpts->{'entab-leading-whitespace'} | ||||
| 2574 | ? $rOpts->{'entab-leading-whitespace'} | ||||
| 2575 | : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} | ||||
| 2576 | : $rOpts->{'default-tabsize'}; | ||||
| 2577 | return $tabsize; | ||||
| 2578 | } | ||||
| 2579 | |||||
| 2580 | sub find_file_upwards { | ||||
| 2581 | my ( $search_dir, $search_file ) = @_; | ||||
| 2582 | |||||
| 2583 | $search_dir =~ s{/+$}{}; | ||||
| 2584 | $search_file =~ s{^/+}{}; | ||||
| 2585 | |||||
| 2586 | while (1) { | ||||
| 2587 | my $try_path = "$search_dir/$search_file"; | ||||
| 2588 | if ( -f $try_path ) { | ||||
| 2589 | return $try_path; | ||||
| 2590 | } | ||||
| 2591 | elsif ( $search_dir eq '/' ) { | ||||
| 2592 | return undef; | ||||
| 2593 | } | ||||
| 2594 | else { | ||||
| 2595 | $search_dir = dirname($search_dir); | ||||
| 2596 | } | ||||
| 2597 | } | ||||
| 2598 | } | ||||
| 2599 | |||||
| 2600 | sub expand_command_abbreviations { | ||||
| 2601 | |||||
| 2602 | # go through @ARGV and expand any abbreviations | ||||
| 2603 | |||||
| 2604 | my ( $rexpansion, $rraw_options, $config_file ) = @_; | ||||
| 2605 | my ($word); | ||||
| 2606 | |||||
| 2607 | # set a pass limit to prevent an infinite loop; | ||||
| 2608 | # 10 should be plenty, but it may be increased to allow deeply | ||||
| 2609 | # nested expansions. | ||||
| 2610 | my $max_passes = 10; | ||||
| 2611 | my @new_argv = (); | ||||
| 2612 | |||||
| 2613 | # keep looping until all expansions have been converted into actual | ||||
| 2614 | # dash parameters.. | ||||
| 2615 | for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { | ||||
| 2616 | my @new_argv = (); | ||||
| 2617 | my $abbrev_count = 0; | ||||
| 2618 | |||||
| 2619 | # loop over each item in @ARGV.. | ||||
| 2620 | foreach $word (@ARGV) { | ||||
| 2621 | |||||
| 2622 | # convert any leading 'no-' to just 'no' | ||||
| 2623 | if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } | ||||
| 2624 | |||||
| 2625 | # if it is a dash flag (instead of a file name).. | ||||
| 2626 | if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { | ||||
| 2627 | |||||
| 2628 | my $abr = $1; | ||||
| 2629 | my $flags = $2; | ||||
| 2630 | |||||
| 2631 | # save the raw input for debug output in case of circular refs | ||||
| 2632 | if ( $pass_count == 0 ) { | ||||
| 2633 | push( @$rraw_options, $word ); | ||||
| 2634 | } | ||||
| 2635 | |||||
| 2636 | # recombine abbreviation and flag, if necessary, | ||||
| 2637 | # to allow abbreviations with arguments such as '-vt=1' | ||||
| 2638 | if ( $rexpansion->{ $abr . $flags } ) { | ||||
| 2639 | $abr = $abr . $flags; | ||||
| 2640 | $flags = ""; | ||||
| 2641 | } | ||||
| 2642 | |||||
| 2643 | # if we see this dash item in the expansion hash.. | ||||
| 2644 | if ( $rexpansion->{$abr} ) { | ||||
| 2645 | $abbrev_count++; | ||||
| 2646 | |||||
| 2647 | # stuff all of the words that it expands to into the | ||||
| 2648 | # new arg list for the next pass | ||||
| 2649 | foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { | ||||
| 2650 | next unless $abbrev; # for safety; shouldn't happen | ||||
| 2651 | push( @new_argv, '--' . $abbrev . $flags ); | ||||
| 2652 | } | ||||
| 2653 | } | ||||
| 2654 | |||||
| 2655 | # not in expansion hash, must be actual long name | ||||
| 2656 | else { | ||||
| 2657 | push( @new_argv, $word ); | ||||
| 2658 | } | ||||
| 2659 | } | ||||
| 2660 | |||||
| 2661 | # not a dash item, so just save it for the next pass | ||||
| 2662 | else { | ||||
| 2663 | push( @new_argv, $word ); | ||||
| 2664 | } | ||||
| 2665 | } # end of this pass | ||||
| 2666 | |||||
| 2667 | # update parameter list @ARGV to the new one | ||||
| 2668 | @ARGV = @new_argv; | ||||
| 2669 | last unless ( $abbrev_count > 0 ); | ||||
| 2670 | |||||
| 2671 | # make sure we are not in an infinite loop | ||||
| 2672 | if ( $pass_count == $max_passes ) { | ||||
| 2673 | local $" = ')('; | ||||
| 2674 | Warn <<EOM; | ||||
| 2675 | I'm tired. We seem to be in an infinite loop trying to expand aliases. | ||||
| 2676 | Here are the raw options; | ||||
| 2677 | (rraw_options) | ||||
| 2678 | EOM | ||||
| 2679 | my $num = @new_argv; | ||||
| 2680 | if ( $num < 50 ) { | ||||
| 2681 | Warn <<EOM; | ||||
| 2682 | After $max_passes passes here is ARGV | ||||
| 2683 | (@new_argv) | ||||
| 2684 | EOM | ||||
| 2685 | } | ||||
| 2686 | else { | ||||
| 2687 | Warn <<EOM; | ||||
| 2688 | After $max_passes passes ARGV has $num entries | ||||
| 2689 | EOM | ||||
| 2690 | } | ||||
| 2691 | |||||
| 2692 | if ($config_file) { | ||||
| 2693 | Die <<"DIE"; | ||||
| 2694 | Please check your configuration file $config_file for circular-references. | ||||
| 2695 | To deactivate it, use -npro. | ||||
| 2696 | DIE | ||||
| 2697 | } | ||||
| 2698 | else { | ||||
| 2699 | Die <<'DIE'; | ||||
| 2700 | Program bug - circular-references in the %expansion hash, probably due to | ||||
| 2701 | a recent program change. | ||||
| 2702 | DIE | ||||
| 2703 | } | ||||
| 2704 | } # end of check for circular references | ||||
| 2705 | } # end of loop over all passes | ||||
| 2706 | } | ||||
| 2707 | |||||
| 2708 | # Debug routine -- this will dump the expansion hash | ||||
| 2709 | sub dump_short_names { | ||||
| 2710 | my $rexpansion = shift; | ||||
| 2711 | print STDOUT <<EOM; | ||||
| 2712 | List of short names. This list shows how all abbreviations are | ||||
| 2713 | translated into other abbreviations and, eventually, into long names. | ||||
| 2714 | New abbreviations may be defined in a .perltidyrc file. | ||||
| 2715 | For a list of all long names, use perltidy --dump-long-names (-dln). | ||||
| 2716 | -------------------------------------------------------------------------- | ||||
| 2717 | EOM | ||||
| 2718 | foreach my $abbrev ( sort keys %$rexpansion ) { | ||||
| 2719 | my @list = @{ $$rexpansion{$abbrev} }; | ||||
| 2720 | print STDOUT "$abbrev --> @list\n"; | ||||
| 2721 | } | ||||
| 2722 | } | ||||
| 2723 | |||||
| 2724 | sub check_vms_filename { | ||||
| 2725 | |||||
| 2726 | # given a valid filename (the perltidy input file) | ||||
| 2727 | # create a modified filename and separator character | ||||
| 2728 | # suitable for VMS. | ||||
| 2729 | # | ||||
| 2730 | # Contributed by Michael Cartmell | ||||
| 2731 | # | ||||
| 2732 | my ( $base, $path ) = fileparse( $_[0] ); | ||||
| 2733 | |||||
| 2734 | # remove explicit ; version | ||||
| 2735 | $base =~ s/;-?\d*$// | ||||
| 2736 | |||||
| 2737 | # remove explicit . version ie two dots in filename NB ^ escapes a dot | ||||
| 2738 | or $base =~ s/( # begin capture $1 | ||||
| 2739 | (?:^|[^^])\. # match a dot not preceded by a caret | ||||
| 2740 | (?: # followed by nothing | ||||
| 2741 | | # or | ||||
| 2742 | .*[^^] # anything ending in a non caret | ||||
| 2743 | ) | ||||
| 2744 | ) # end capture $1 | ||||
| 2745 | \.-?\d*$ # match . version number | ||||
| 2746 | /$1/x; | ||||
| 2747 | |||||
| 2748 | # normalise filename, if there are no unescaped dots then append one | ||||
| 2749 | $base .= '.' unless $base =~ /(?:^|[^^])\./; | ||||
| 2750 | |||||
| 2751 | # if we don't already have an extension then we just append the extension | ||||
| 2752 | my $separator = ( $base =~ /\.$/ ) ? "" : "_"; | ||||
| 2753 | return ( $path . $base, $separator ); | ||||
| 2754 | } | ||||
| 2755 | |||||
| 2756 | sub Win_OS_Type { | ||||
| 2757 | |||||
| 2758 | # TODO: are these more standard names? | ||||
| 2759 | # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 | ||||
| 2760 | |||||
| 2761 | # Returns a string that determines what MS OS we are on. | ||||
| 2762 | # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 | ||||
| 2763 | # Returns blank string if not an MS system. | ||||
| 2764 | # Original code contributed by: Yves Orton | ||||
| 2765 | # We need to know this to decide where to look for config files | ||||
| 2766 | |||||
| 2767 | my $rpending_complaint = shift; | ||||
| 2768 | my $os = ""; | ||||
| 2769 | return $os unless $^O =~ /win32|dos/i; # is it a MS box? | ||||
| 2770 | |||||
| 2771 | # Systems built from Perl source may not have Win32.pm | ||||
| 2772 | # But probably have Win32::GetOSVersion() anyway so the | ||||
| 2773 | # following line is not 'required': | ||||
| 2774 | # return $os unless eval('require Win32'); | ||||
| 2775 | |||||
| 2776 | # Use the standard API call to determine the version | ||||
| 2777 | my ( $undef, $major, $minor, $build, $id ); | ||||
| 2778 | eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; | ||||
| 2779 | |||||
| 2780 | # | ||||
| 2781 | # NAME ID MAJOR MINOR | ||||
| 2782 | # Windows NT 4 2 4 0 | ||||
| 2783 | # Windows 2000 2 5 0 | ||||
| 2784 | # Windows XP 2 5 1 | ||||
| 2785 | # Windows Server 2003 2 5 2 | ||||
| 2786 | |||||
| 2787 | return "win32s" unless $id; # If id==0 then its a win32s box. | ||||
| 2788 | $os = { # Magic numbers from MSDN | ||||
| 2789 | # documentation of GetOSVersion | ||||
| 2790 | 1 => { | ||||
| 2791 | 0 => "95", | ||||
| 2792 | 10 => "98", | ||||
| 2793 | 90 => "Me" | ||||
| 2794 | }, | ||||
| 2795 | 2 => { | ||||
| 2796 | 0 => "2000", # or NT 4, see below | ||||
| 2797 | 1 => "XP/.Net", | ||||
| 2798 | 2 => "Win2003", | ||||
| 2799 | 51 => "NT3.51" | ||||
| 2800 | } | ||||
| 2801 | }->{$id}->{$minor}; | ||||
| 2802 | |||||
| 2803 | # If $os is undefined, the above code is out of date. Suggested updates | ||||
| 2804 | # are welcome. | ||||
| 2805 | unless ( defined $os ) { | ||||
| 2806 | $os = ""; | ||||
| 2807 | $$rpending_complaint .= <<EOS; | ||||
| 2808 | Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record! | ||||
| 2809 | We won't be able to look for a system-wide config file. | ||||
| 2810 | EOS | ||||
| 2811 | } | ||||
| 2812 | |||||
| 2813 | # Unfortunately the logic used for the various versions isn't so clever.. | ||||
| 2814 | # so we have to handle an outside case. | ||||
| 2815 | return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; | ||||
| 2816 | } | ||||
| 2817 | |||||
| 2818 | sub is_unix { | ||||
| 2819 | return | ||||
| 2820 | ( $^O !~ /win32|dos/i ) | ||||
| 2821 | && ( $^O ne 'VMS' ) | ||||
| 2822 | && ( $^O ne 'OS2' ) | ||||
| 2823 | && ( $^O ne 'MacOS' ); | ||||
| 2824 | } | ||||
| 2825 | |||||
| 2826 | sub look_for_Windows { | ||||
| 2827 | |||||
| 2828 | # determine Windows sub-type and location of | ||||
| 2829 | # system-wide configuration files | ||||
| 2830 | my $rpending_complaint = shift; | ||||
| 2831 | my $is_Windows = ( $^O =~ /win32|dos/i ); | ||||
| 2832 | my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows; | ||||
| 2833 | return ( $is_Windows, $Windows_type ); | ||||
| 2834 | } | ||||
| 2835 | |||||
| 2836 | sub find_config_file { | ||||
| 2837 | |||||
| 2838 | # look for a .perltidyrc configuration file | ||||
| 2839 | # For Windows also look for a file named perltidy.ini | ||||
| 2840 | my ( $is_Windows, $Windows_type, $rconfig_file_chatter, | ||||
| 2841 | $rpending_complaint ) = @_; | ||||
| 2842 | |||||
| 2843 | $$rconfig_file_chatter .= "# Config file search...system reported as:"; | ||||
| 2844 | if ($is_Windows) { | ||||
| 2845 | $$rconfig_file_chatter .= "Windows $Windows_type\n"; | ||||
| 2846 | } | ||||
| 2847 | else { | ||||
| 2848 | $$rconfig_file_chatter .= " $^O\n"; | ||||
| 2849 | } | ||||
| 2850 | |||||
| 2851 | # sub to check file existence and record all tests | ||||
| 2852 | my $exists_config_file = sub { | ||||
| 2853 | my $config_file = shift; | ||||
| 2854 | return 0 unless $config_file; | ||||
| 2855 | $$rconfig_file_chatter .= "# Testing: $config_file\n"; | ||||
| 2856 | return -f $config_file; | ||||
| 2857 | }; | ||||
| 2858 | |||||
| 2859 | my $config_file; | ||||
| 2860 | |||||
| 2861 | # look in current directory first | ||||
| 2862 | $config_file = ".perltidyrc"; | ||||
| 2863 | return $config_file if $exists_config_file->($config_file); | ||||
| 2864 | if ($is_Windows) { | ||||
| 2865 | $config_file = "perltidy.ini"; | ||||
| 2866 | return $config_file if $exists_config_file->($config_file); | ||||
| 2867 | } | ||||
| 2868 | |||||
| 2869 | # Default environment vars. | ||||
| 2870 | my @envs = qw(PERLTIDY HOME); | ||||
| 2871 | |||||
| 2872 | # Check the NT/2k/XP locations, first a local machine def, then a | ||||
| 2873 | # network def | ||||
| 2874 | push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; | ||||
| 2875 | |||||
| 2876 | # Now go through the environment ... | ||||
| 2877 | foreach my $var (@envs) { | ||||
| 2878 | $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; | ||||
| 2879 | if ( defined( $ENV{$var} ) ) { | ||||
| 2880 | $$rconfig_file_chatter .= " = $ENV{$var}\n"; | ||||
| 2881 | |||||
| 2882 | # test ENV{ PERLTIDY } as file: | ||||
| 2883 | if ( $var eq 'PERLTIDY' ) { | ||||
| 2884 | $config_file = "$ENV{$var}"; | ||||
| 2885 | return $config_file if $exists_config_file->($config_file); | ||||
| 2886 | } | ||||
| 2887 | |||||
| 2888 | # test ENV as directory: | ||||
| 2889 | $config_file = catfile( $ENV{$var}, ".perltidyrc" ); | ||||
| 2890 | return $config_file if $exists_config_file->($config_file); | ||||
| 2891 | |||||
| 2892 | if ($is_Windows) { | ||||
| 2893 | $config_file = catfile( $ENV{$var}, "perltidy.ini" ); | ||||
| 2894 | return $config_file if $exists_config_file->($config_file); | ||||
| 2895 | } | ||||
| 2896 | } | ||||
| 2897 | else { | ||||
| 2898 | $$rconfig_file_chatter .= "\n"; | ||||
| 2899 | } | ||||
| 2900 | } | ||||
| 2901 | |||||
| 2902 | # then look for a system-wide definition | ||||
| 2903 | # where to look varies with OS | ||||
| 2904 | if ($is_Windows) { | ||||
| 2905 | |||||
| 2906 | if ($Windows_type) { | ||||
| 2907 | my ( $os, $system, $allusers ) = | ||||
| 2908 | Win_Config_Locs( $rpending_complaint, $Windows_type ); | ||||
| 2909 | |||||
| 2910 | # Check All Users directory, if there is one. | ||||
| 2911 | # i.e. C:\Documents and Settings\User\perltidy.ini | ||||
| 2912 | if ($allusers) { | ||||
| 2913 | |||||
| 2914 | $config_file = catfile( $allusers, ".perltidyrc" ); | ||||
| 2915 | return $config_file if $exists_config_file->($config_file); | ||||
| 2916 | |||||
| 2917 | $config_file = catfile( $allusers, "perltidy.ini" ); | ||||
| 2918 | return $config_file if $exists_config_file->($config_file); | ||||
| 2919 | } | ||||
| 2920 | |||||
| 2921 | # Check system directory. | ||||
| 2922 | # retain old code in case someone has been able to create | ||||
| 2923 | # a file with a leading period. | ||||
| 2924 | $config_file = catfile( $system, ".perltidyrc" ); | ||||
| 2925 | return $config_file if $exists_config_file->($config_file); | ||||
| 2926 | |||||
| 2927 | $config_file = catfile( $system, "perltidy.ini" ); | ||||
| 2928 | return $config_file if $exists_config_file->($config_file); | ||||
| 2929 | } | ||||
| 2930 | } | ||||
| 2931 | |||||
| 2932 | # Place to add customization code for other systems | ||||
| 2933 | elsif ( $^O eq 'OS2' ) { | ||||
| 2934 | } | ||||
| 2935 | elsif ( $^O eq 'MacOS' ) { | ||||
| 2936 | } | ||||
| 2937 | elsif ( $^O eq 'VMS' ) { | ||||
| 2938 | } | ||||
| 2939 | |||||
| 2940 | # Assume some kind of Unix | ||||
| 2941 | else { | ||||
| 2942 | |||||
| 2943 | $config_file = "/usr/local/etc/perltidyrc"; | ||||
| 2944 | return $config_file if $exists_config_file->($config_file); | ||||
| 2945 | |||||
| 2946 | $config_file = "/etc/perltidyrc"; | ||||
| 2947 | return $config_file if $exists_config_file->($config_file); | ||||
| 2948 | } | ||||
| 2949 | |||||
| 2950 | # Couldn't find a config file | ||||
| 2951 | return; | ||||
| 2952 | } | ||||
| 2953 | |||||
| 2954 | sub Win_Config_Locs { | ||||
| 2955 | |||||
| 2956 | # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), | ||||
| 2957 | # or undef if its not a win32 OS. In list context returns OS, System | ||||
| 2958 | # Directory, and All Users Directory. All Users will be empty on a | ||||
| 2959 | # 9x/Me box. Contributed by: Yves Orton. | ||||
| 2960 | |||||
| 2961 | my $rpending_complaint = shift; | ||||
| 2962 | my $os = (@_) ? shift : Win_OS_Type(); | ||||
| 2963 | return unless $os; | ||||
| 2964 | |||||
| 2965 | my $system = ""; | ||||
| 2966 | my $allusers = ""; | ||||
| 2967 | |||||
| 2968 | if ( $os =~ /9[58]|Me/ ) { | ||||
| 2969 | $system = "C:/Windows"; | ||||
| 2970 | } | ||||
| 2971 | elsif ( $os =~ /NT|XP|200?/ ) { | ||||
| 2972 | $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; | ||||
| 2973 | $allusers = | ||||
| 2974 | ( $os =~ /NT/ ) | ||||
| 2975 | ? "C:/WinNT/profiles/All Users/" | ||||
| 2976 | : "C:/Documents and Settings/All Users/"; | ||||
| 2977 | } | ||||
| 2978 | else { | ||||
| 2979 | |||||
| 2980 | # This currently would only happen on a win32s computer. I don't have | ||||
| 2981 | # one to test, so I am unsure how to proceed. Suggestions welcome! | ||||
| 2982 | $$rpending_complaint .= | ||||
| 2983 | "I dont know a sensible place to look for config files on an $os system.\n"; | ||||
| 2984 | return; | ||||
| 2985 | } | ||||
| 2986 | return wantarray ? ( $os, $system, $allusers ) : $os; | ||||
| 2987 | } | ||||
| 2988 | |||||
| 2989 | sub dump_config_file { | ||||
| 2990 | my $fh = shift; | ||||
| 2991 | my $config_file = shift; | ||||
| 2992 | my $rconfig_file_chatter = shift; | ||||
| 2993 | print STDOUT "$$rconfig_file_chatter"; | ||||
| 2994 | if ($fh) { | ||||
| 2995 | print STDOUT "# Dump of file: '$config_file'\n"; | ||||
| 2996 | while ( my $line = $fh->getline() ) { print STDOUT $line } | ||||
| 2997 | eval { $fh->close() }; | ||||
| 2998 | } | ||||
| 2999 | else { | ||||
| 3000 | print STDOUT "# ...no config file found\n"; | ||||
| 3001 | } | ||||
| 3002 | } | ||||
| 3003 | |||||
| 3004 | sub read_config_file { | ||||
| 3005 | |||||
| 3006 | my ( $fh, $config_file, $rexpansion ) = @_; | ||||
| 3007 | my @config_list = (); | ||||
| 3008 | my $saw_pbp; | ||||
| 3009 | |||||
| 3010 | # file is bad if non-empty $death_message is returned | ||||
| 3011 | my $death_message = ""; | ||||
| 3012 | |||||
| 3013 | my $name = undef; | ||||
| 3014 | my $line_no; | ||||
| 3015 | while ( my $line = $fh->getline() ) { | ||||
| 3016 | $line_no++; | ||||
| 3017 | chomp $line; | ||||
| 3018 | ( $line, $death_message ) = | ||||
| 3019 | strip_comment( $line, $config_file, $line_no ); | ||||
| 3020 | last if ($death_message); | ||||
| 3021 | next unless $line; | ||||
| 3022 | $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends | ||||
| 3023 | next unless $line; | ||||
| 3024 | |||||
| 3025 | # look for something of the general form | ||||
| 3026 | # newname { body } | ||||
| 3027 | # or just | ||||
| 3028 | # body | ||||
| 3029 | |||||
| 3030 | my $body = $line; | ||||
| 3031 | my ($newname); | ||||
| 3032 | if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) { | ||||
| 3033 | ( $newname, $body ) = ( $2, $3, ); | ||||
| 3034 | } | ||||
| 3035 | if ($body) { | ||||
| 3036 | |||||
| 3037 | if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) { | ||||
| 3038 | $saw_pbp = 1; | ||||
| 3039 | } | ||||
| 3040 | |||||
| 3041 | # handle a new alias definition | ||||
| 3042 | if ($newname) { | ||||
| 3043 | if ($name) { | ||||
| 3044 | $death_message = | ||||
| 3045 | "No '}' seen after $name and before $newname in config file $config_file line $.\n"; | ||||
| 3046 | last; | ||||
| 3047 | } | ||||
| 3048 | $name = $newname; | ||||
| 3049 | |||||
| 3050 | if ( ${$rexpansion}{$name} ) { | ||||
| 3051 | local $" = ')('; | ||||
| 3052 | my @names = sort keys %$rexpansion; | ||||
| 3053 | $death_message = | ||||
| 3054 | "Here is a list of all installed aliases\n(@names)\n" | ||||
| 3055 | . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; | ||||
| 3056 | last; | ||||
| 3057 | } | ||||
| 3058 | ${$rexpansion}{$name} = []; | ||||
| 3059 | } | ||||
| 3060 | |||||
| 3061 | # now do the body | ||||
| 3062 | if ($body) { | ||||
| 3063 | |||||
| 3064 | my ( $rbody_parts, $msg ) = parse_args($body); | ||||
| 3065 | if ($msg) { | ||||
| 3066 | $death_message = <<EOM; | ||||
| 3067 | Error reading file '$config_file' at line number $line_no. | ||||
| 3068 | $msg | ||||
| 3069 | Please fix this line or use -npro to avoid reading this file | ||||
| 3070 | EOM | ||||
| 3071 | last; | ||||
| 3072 | } | ||||
| 3073 | |||||
| 3074 | if ($name) { | ||||
| 3075 | |||||
| 3076 | # remove leading dashes if this is an alias | ||||
| 3077 | foreach (@$rbody_parts) { s/^\-+//; } | ||||
| 3078 | push @{ ${$rexpansion}{$name} }, @$rbody_parts; | ||||
| 3079 | } | ||||
| 3080 | else { | ||||
| 3081 | push( @config_list, @$rbody_parts ); | ||||
| 3082 | } | ||||
| 3083 | } | ||||
| 3084 | } | ||||
| 3085 | } | ||||
| 3086 | eval { $fh->close() }; | ||||
| 3087 | return ( \@config_list, $death_message, $saw_pbp ); | ||||
| 3088 | } | ||||
| 3089 | |||||
| 3090 | sub strip_comment { | ||||
| 3091 | |||||
| 3092 | # Strip any comment from a command line | ||||
| 3093 | my ( $instr, $config_file, $line_no ) = @_; | ||||
| 3094 | my $msg = ""; | ||||
| 3095 | |||||
| 3096 | # check for full-line comment | ||||
| 3097 | if ( $instr =~ /^\s*#/ ) { | ||||
| 3098 | return ( "", $msg ); | ||||
| 3099 | } | ||||
| 3100 | |||||
| 3101 | # nothing to do if no comments | ||||
| 3102 | if ( $instr !~ /#/ ) { | ||||
| 3103 | return ( $instr, $msg ); | ||||
| 3104 | } | ||||
| 3105 | |||||
| 3106 | # handle case of no quotes | ||||
| 3107 | elsif ( $instr !~ /['"]/ ) { | ||||
| 3108 | |||||
| 3109 | # We now require a space before the # of a side comment | ||||
| 3110 | # this allows something like: | ||||
| 3111 | # -sbcp=# | ||||
| 3112 | # Otherwise, it would have to be quoted: | ||||
| 3113 | # -sbcp='#' | ||||
| 3114 | $instr =~ s/\s+\#.*$//; | ||||
| 3115 | return ( $instr, $msg ); | ||||
| 3116 | } | ||||
| 3117 | |||||
| 3118 | # handle comments and quotes | ||||
| 3119 | my $outstr = ""; | ||||
| 3120 | my $quote_char = ""; | ||||
| 3121 | while (1) { | ||||
| 3122 | |||||
| 3123 | # looking for ending quote character | ||||
| 3124 | if ($quote_char) { | ||||
| 3125 | if ( $instr =~ /\G($quote_char)/gc ) { | ||||
| 3126 | $quote_char = ""; | ||||
| 3127 | $outstr .= $1; | ||||
| 3128 | } | ||||
| 3129 | elsif ( $instr =~ /\G(.)/gc ) { | ||||
| 3130 | $outstr .= $1; | ||||
| 3131 | } | ||||
| 3132 | |||||
| 3133 | # error..we reached the end without seeing the ending quote char | ||||
| 3134 | else { | ||||
| 3135 | $msg = <<EOM; | ||||
| 3136 | Error reading file $config_file at line number $line_no. | ||||
| 3137 | Did not see ending quote character <$quote_char> in this text: | ||||
| 3138 | $instr | ||||
| 3139 | Please fix this line or use -npro to avoid reading this file | ||||
| 3140 | EOM | ||||
| 3141 | last; | ||||
| 3142 | } | ||||
| 3143 | } | ||||
| 3144 | |||||
| 3145 | # accumulating characters and looking for start of a quoted string | ||||
| 3146 | else { | ||||
| 3147 | if ( $instr =~ /\G([\"\'])/gc ) { | ||||
| 3148 | $outstr .= $1; | ||||
| 3149 | $quote_char = $1; | ||||
| 3150 | } | ||||
| 3151 | |||||
| 3152 | # Note: not yet enforcing the space-before-hash rule for side | ||||
| 3153 | # comments if the parameter is quoted. | ||||
| 3154 | elsif ( $instr =~ /\G#/gc ) { | ||||
| 3155 | last; | ||||
| 3156 | } | ||||
| 3157 | elsif ( $instr =~ /\G(.)/gc ) { | ||||
| 3158 | $outstr .= $1; | ||||
| 3159 | } | ||||
| 3160 | else { | ||||
| 3161 | last; | ||||
| 3162 | } | ||||
| 3163 | } | ||||
| 3164 | } | ||||
| 3165 | return ( $outstr, $msg ); | ||||
| 3166 | } | ||||
| 3167 | |||||
| 3168 | sub parse_args { | ||||
| 3169 | |||||
| 3170 | # Parse a command string containing multiple string with possible | ||||
| 3171 | # quotes, into individual commands. It might look like this, for example: | ||||
| 3172 | # | ||||
| 3173 | # -wba=" + - " -some-thing -wbb='. && ||' | ||||
| 3174 | # | ||||
| 3175 | # There is no need, at present, to handle escaped quote characters. | ||||
| 3176 | # (They are not perltidy tokens, so needn't be in strings). | ||||
| 3177 | |||||
| 3178 | my ($body) = @_; | ||||
| 3179 | my @body_parts = (); | ||||
| 3180 | my $quote_char = ""; | ||||
| 3181 | my $part = ""; | ||||
| 3182 | my $msg = ""; | ||||
| 3183 | while (1) { | ||||
| 3184 | |||||
| 3185 | # looking for ending quote character | ||||
| 3186 | if ($quote_char) { | ||||
| 3187 | if ( $body =~ /\G($quote_char)/gc ) { | ||||
| 3188 | $quote_char = ""; | ||||
| 3189 | } | ||||
| 3190 | elsif ( $body =~ /\G(.)/gc ) { | ||||
| 3191 | $part .= $1; | ||||
| 3192 | } | ||||
| 3193 | |||||
| 3194 | # error..we reached the end without seeing the ending quote char | ||||
| 3195 | else { | ||||
| 3196 | if ( length($part) ) { push @body_parts, $part; } | ||||
| 3197 | $msg = <<EOM; | ||||
| 3198 | Did not see ending quote character <$quote_char> in this text: | ||||
| 3199 | $body | ||||
| 3200 | EOM | ||||
| 3201 | last; | ||||
| 3202 | } | ||||
| 3203 | } | ||||
| 3204 | |||||
| 3205 | # accumulating characters and looking for start of a quoted string | ||||
| 3206 | else { | ||||
| 3207 | if ( $body =~ /\G([\"\'])/gc ) { | ||||
| 3208 | $quote_char = $1; | ||||
| 3209 | } | ||||
| 3210 | elsif ( $body =~ /\G(\s+)/gc ) { | ||||
| 3211 | if ( length($part) ) { push @body_parts, $part; } | ||||
| 3212 | $part = ""; | ||||
| 3213 | } | ||||
| 3214 | elsif ( $body =~ /\G(.)/gc ) { | ||||
| 3215 | $part .= $1; | ||||
| 3216 | } | ||||
| 3217 | else { | ||||
| 3218 | if ( length($part) ) { push @body_parts, $part; } | ||||
| 3219 | last; | ||||
| 3220 | } | ||||
| 3221 | } | ||||
| 3222 | } | ||||
| 3223 | return ( \@body_parts, $msg ); | ||||
| 3224 | } | ||||
| 3225 | |||||
| 3226 | sub dump_long_names { | ||||
| 3227 | |||||
| 3228 | my @names = sort @_; | ||||
| 3229 | print STDOUT <<EOM; | ||||
| 3230 | # Command line long names (passed to GetOptions) | ||||
| 3231 | #--------------------------------------------------------------- | ||||
| 3232 | # here is a summary of the Getopt codes: | ||||
| 3233 | # <none> does not take an argument | ||||
| 3234 | # =s takes a mandatory string | ||||
| 3235 | # :s takes an optional string | ||||
| 3236 | # =i takes a mandatory integer | ||||
| 3237 | # :i takes an optional integer | ||||
| 3238 | # ! does not take an argument and may be negated | ||||
| 3239 | # i.e., -foo and -nofoo are allowed | ||||
| 3240 | # a double dash signals the end of the options list | ||||
| 3241 | # | ||||
| 3242 | #--------------------------------------------------------------- | ||||
| 3243 | EOM | ||||
| 3244 | |||||
| 3245 | foreach (@names) { print STDOUT "$_\n" } | ||||
| 3246 | } | ||||
| 3247 | |||||
| 3248 | sub dump_defaults { | ||||
| 3249 | my @defaults = sort @_; | ||||
| 3250 | print STDOUT "Default command line options:\n"; | ||||
| 3251 | foreach (@_) { print STDOUT "$_\n" } | ||||
| 3252 | } | ||||
| 3253 | |||||
| 3254 | sub readable_options { | ||||
| 3255 | |||||
| 3256 | # return options for this run as a string which could be | ||||
| 3257 | # put in a perltidyrc file | ||||
| 3258 | my ( $rOpts, $roption_string ) = @_; | ||||
| 3259 | my %Getopt_flags; | ||||
| 3260 | my $rGetopt_flags = \%Getopt_flags; | ||||
| 3261 | my $readable_options = "# Final parameter set for this run.\n"; | ||||
| 3262 | $readable_options .= | ||||
| 3263 | "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; | ||||
| 3264 | foreach my $opt ( @{$roption_string} ) { | ||||
| 3265 | my $flag = ""; | ||||
| 3266 | if ( $opt =~ /(.*)(!|=.*)$/ ) { | ||||
| 3267 | $opt = $1; | ||||
| 3268 | $flag = $2; | ||||
| 3269 | } | ||||
| 3270 | if ( defined( $rOpts->{$opt} ) ) { | ||||
| 3271 | $rGetopt_flags->{$opt} = $flag; | ||||
| 3272 | } | ||||
| 3273 | } | ||||
| 3274 | foreach my $key ( sort keys %{$rOpts} ) { | ||||
| 3275 | my $flag = $rGetopt_flags->{$key}; | ||||
| 3276 | my $value = $rOpts->{$key}; | ||||
| 3277 | my $prefix = '--'; | ||||
| 3278 | my $suffix = ""; | ||||
| 3279 | if ($flag) { | ||||
| 3280 | if ( $flag =~ /^=/ ) { | ||||
| 3281 | if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } | ||||
| 3282 | $suffix = "=" . $value; | ||||
| 3283 | } | ||||
| 3284 | elsif ( $flag =~ /^!/ ) { | ||||
| 3285 | $prefix .= "no" unless ($value); | ||||
| 3286 | } | ||||
| 3287 | else { | ||||
| 3288 | |||||
| 3289 | # shouldn't happen | ||||
| 3290 | $readable_options .= | ||||
| 3291 | "# ERROR in dump_options: unrecognized flag $flag for $key\n"; | ||||
| 3292 | } | ||||
| 3293 | } | ||||
| 3294 | $readable_options .= $prefix . $key . $suffix . "\n"; | ||||
| 3295 | } | ||||
| 3296 | return $readable_options; | ||||
| 3297 | } | ||||
| 3298 | |||||
| 3299 | sub show_version { | ||||
| 3300 | print STDOUT <<"EOM"; | ||||
| 3301 | This is perltidy, v$VERSION | ||||
| 3302 | |||||
| 3303 | Copyright 2000-2014, Steve Hancock | ||||
| 3304 | |||||
| 3305 | Perltidy is free software and may be copied under the terms of the GNU | ||||
| 3306 | General Public License, which is included in the distribution files. | ||||
| 3307 | |||||
| 3308 | Complete documentation for perltidy can be found using 'man perltidy' | ||||
| 3309 | or on the internet at http://perltidy.sourceforge.net. | ||||
| 3310 | EOM | ||||
| 3311 | } | ||||
| 3312 | |||||
| 3313 | sub usage { | ||||
| 3314 | |||||
| 3315 | print STDOUT <<EOF; | ||||
| 3316 | This is perltidy version $VERSION, a perl script indenter. Usage: | ||||
| 3317 | |||||
| 3318 | perltidy [ options ] file1 file2 file3 ... | ||||
| 3319 | (output goes to file1.tdy, file2.tdy, file3.tdy, ...) | ||||
| 3320 | perltidy [ options ] file1 -o outfile | ||||
| 3321 | perltidy [ options ] file1 -st >outfile | ||||
| 3322 | perltidy [ options ] <infile >outfile | ||||
| 3323 | |||||
| 3324 | Options have short and long forms. Short forms are shown; see | ||||
| 3325 | man pages for long forms. Note: '=s' indicates a required string, | ||||
| 3326 | and '=n' indicates a required integer. | ||||
| 3327 | |||||
| 3328 | I/O control | ||||
| 3329 | -h show this help | ||||
| 3330 | -o=file name of the output file (only if single input file) | ||||
| 3331 | -oext=s change output extension from 'tdy' to s | ||||
| 3332 | -opath=path change path to be 'path' for output files | ||||
| 3333 | -b backup original to .bak and modify file in-place | ||||
| 3334 | -bext=s change default backup extension from 'bak' to s | ||||
| 3335 | -q deactivate error messages (for running under editor) | ||||
| 3336 | -w include non-critical warning messages in the .ERR error output | ||||
| 3337 | -syn run perl -c to check syntax (default under unix systems) | ||||
| 3338 | -log save .LOG file, which has useful diagnostics | ||||
| 3339 | -f force perltidy to read a binary file | ||||
| 3340 | -g like -log but writes more detailed .LOG file, for debugging scripts | ||||
| 3341 | -opt write the set of options actually used to a .LOG file | ||||
| 3342 | -npro ignore .perltidyrc configuration command file | ||||
| 3343 | -pro=file read configuration commands from file instead of .perltidyrc | ||||
| 3344 | -st send output to standard output, STDOUT | ||||
| 3345 | -se send all error output to standard error output, STDERR | ||||
| 3346 | -v display version number to standard output and quit | ||||
| 3347 | |||||
| 3348 | Basic Options: | ||||
| 3349 | -i=n use n columns per indentation level (default n=4) | ||||
| 3350 | -t tabs: use one tab character per indentation level, not recommeded | ||||
| 3351 | -nt no tabs: use n spaces per indentation level (default) | ||||
| 3352 | -et=n entab leading whitespace n spaces per tab; not recommended | ||||
| 3353 | -io "indent only": just do indentation, no other formatting. | ||||
| 3354 | -sil=n set starting indentation level to n; use if auto detection fails | ||||
| 3355 | -ole=s specify output line ending (s=dos or win, mac, unix) | ||||
| 3356 | -ple keep output line endings same as input (input must be filename) | ||||
| 3357 | |||||
| 3358 | Whitespace Control | ||||
| 3359 | -fws freeze whitespace; this disables all whitespace changes | ||||
| 3360 | and disables the following switches: | ||||
| 3361 | -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) | ||||
| 3362 | -bbt same as -bt but for code block braces; same as -bt if not given | ||||
| 3363 | -bbvt block braces vertically tight; use with -bl or -bli | ||||
| 3364 | -bbvtl=s make -bbvt to apply to selected list of block types | ||||
| 3365 | -pt=n paren tightness (n=0, 1 or 2) | ||||
| 3366 | -sbt=n square bracket tightness (n=0, 1, or 2) | ||||
| 3367 | -bvt=n brace vertical tightness, | ||||
| 3368 | n=(0=open, 1=close unless multiple steps on a line, 2=always close) | ||||
| 3369 | -pvt=n paren vertical tightness (see -bvt for n) | ||||
| 3370 | -sbvt=n square bracket vertical tightness (see -bvt for n) | ||||
| 3371 | -bvtc=n closing brace vertical tightness: | ||||
| 3372 | n=(0=open, 1=sometimes close, 2=always close) | ||||
| 3373 | -pvtc=n closing paren vertical tightness, see -bvtc for n. | ||||
| 3374 | -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. | ||||
| 3375 | -ci=n sets continuation indentation=n, default is n=2 spaces | ||||
| 3376 | -lp line up parentheses, brackets, and non-BLOCK braces | ||||
| 3377 | -sfs add space before semicolon in for( ; ; ) | ||||
| 3378 | -aws allow perltidy to add whitespace (default) | ||||
| 3379 | -dws delete all old non-essential whitespace | ||||
| 3380 | -icb indent closing brace of a code block | ||||
| 3381 | -cti=n closing indentation of paren, square bracket, or non-block brace: | ||||
| 3382 | n=0 none, =1 align with opening, =2 one full indentation level | ||||
| 3383 | -icp equivalent to -cti=2 | ||||
| 3384 | -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' | ||||
| 3385 | -wrs=s want space right of tokens in string; | ||||
| 3386 | -sts put space before terminal semicolon of a statement | ||||
| 3387 | -sak=s put space between keywords given in s and '('; | ||||
| 3388 | -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' | ||||
| 3389 | |||||
| 3390 | Line Break Control | ||||
| 3391 | -fnl freeze newlines; this disables all line break changes | ||||
| 3392 | and disables the following switches: | ||||
| 3393 | -anl add newlines; ok to introduce new line breaks | ||||
| 3394 | -bbs add blank line before subs and packages | ||||
| 3395 | -bbc add blank line before block comments | ||||
| 3396 | -bbb add blank line between major blocks | ||||
| 3397 | -kbl=n keep old blank lines? 0=no, 1=some, 2=all | ||||
| 3398 | -mbl=n maximum consecutive blank lines to output (default=1) | ||||
| 3399 | -ce cuddled else; use this style: '} else {' | ||||
| 3400 | -dnl delete old newlines (default) | ||||
| 3401 | -l=n maximum line length; default n=80 | ||||
| 3402 | -bl opening brace on new line | ||||
| 3403 | -sbl opening sub brace on new line. value of -bl is used if not given. | ||||
| 3404 | -bli opening brace on new line and indented | ||||
| 3405 | -bar opening brace always on right, even for long clauses | ||||
| 3406 | -vt=n vertical tightness (requires -lp); n controls break after opening | ||||
| 3407 | token: 0=never 1=no break if next line balanced 2=no break | ||||
| 3408 | -vtc=n vertical tightness of closing container; n controls if closing | ||||
| 3409 | token starts new line: 0=always 1=not unless list 1=never | ||||
| 3410 | -wba=s want break after tokens in string; i.e. wba=': .' | ||||
| 3411 | -wbb=s want break before tokens in string | ||||
| 3412 | |||||
| 3413 | Following Old Breakpoints | ||||
| 3414 | -kis keep interior semicolons. Allows multiple statements per line. | ||||
| 3415 | -boc break at old comma breaks: turns off all automatic list formatting | ||||
| 3416 | -bol break at old logical breakpoints: or, and, ||, && (default) | ||||
| 3417 | -bok break at old list keyword breakpoints such as map, sort (default) | ||||
| 3418 | -bot break at old conditional (ternary ?:) operator breakpoints (default) | ||||
| 3419 | -boa break at old attribute breakpoints | ||||
| 3420 | -cab=n break at commas after a comma-arrow (=>): | ||||
| 3421 | n=0 break at all commas after => | ||||
| 3422 | n=1 stable: break unless this breaks an existing one-line container | ||||
| 3423 | n=2 break only if a one-line container cannot be formed | ||||
| 3424 | n=3 do not treat commas after => specially at all | ||||
| 3425 | |||||
| 3426 | Comment controls | ||||
| 3427 | -ibc indent block comments (default) | ||||
| 3428 | -isbc indent spaced block comments; may indent unless no leading space | ||||
| 3429 | -msc=n minimum desired spaces to side comment, default 4 | ||||
| 3430 | -fpsc=n fix position for side comments; default 0; | ||||
| 3431 | -csc add or update closing side comments after closing BLOCK brace | ||||
| 3432 | -dcsc delete closing side comments created by a -csc command | ||||
| 3433 | -cscp=s change closing side comment prefix to be other than '## end' | ||||
| 3434 | -cscl=s change closing side comment to apply to selected list of blocks | ||||
| 3435 | -csci=n minimum number of lines needed to apply a -csc tag, default n=6 | ||||
| 3436 | -csct=n maximum number of columns of appended text, default n=20 | ||||
| 3437 | -cscw causes warning if old side comment is overwritten with -csc | ||||
| 3438 | |||||
| 3439 | -sbc use 'static block comments' identified by leading '##' (default) | ||||
| 3440 | -sbcp=s change static block comment identifier to be other than '##' | ||||
| 3441 | -osbc outdent static block comments | ||||
| 3442 | |||||
| 3443 | -ssc use 'static side comments' identified by leading '##' (default) | ||||
| 3444 | -sscp=s change static side comment identifier to be other than '##' | ||||
| 3445 | |||||
| 3446 | Delete selected text | ||||
| 3447 | -dac delete all comments AND pod | ||||
| 3448 | -dbc delete block comments | ||||
| 3449 | -dsc delete side comments | ||||
| 3450 | -dp delete pod | ||||
| 3451 | |||||
| 3452 | Send selected text to a '.TEE' file | ||||
| 3453 | -tac tee all comments AND pod | ||||
| 3454 | -tbc tee block comments | ||||
| 3455 | -tsc tee side comments | ||||
| 3456 | -tp tee pod | ||||
| 3457 | |||||
| 3458 | Outdenting | ||||
| 3459 | -olq outdent long quoted strings (default) | ||||
| 3460 | -olc outdent a long block comment line | ||||
| 3461 | -ola outdent statement labels | ||||
| 3462 | -okw outdent control keywords (redo, next, last, goto, return) | ||||
| 3463 | -okwl=s specify alternative keywords for -okw command | ||||
| 3464 | |||||
| 3465 | Other controls | ||||
| 3466 | -mft=n maximum fields per table; default n=40 | ||||
| 3467 | -x do not format lines before hash-bang line (i.e., for VMS) | ||||
| 3468 | -asc allows perltidy to add a ';' when missing (default) | ||||
| 3469 | -dsm allows perltidy to delete an unnecessary ';' (default) | ||||
| 3470 | |||||
| 3471 | Combinations of other parameters | ||||
| 3472 | -gnu attempt to follow GNU Coding Standards as applied to perl | ||||
| 3473 | -mangle remove as many newlines as possible (but keep comments and pods) | ||||
| 3474 | -extrude insert as many newlines as possible | ||||
| 3475 | |||||
| 3476 | Dump and die, debugging | ||||
| 3477 | -dop dump options used in this run to standard output and quit | ||||
| 3478 | -ddf dump default options to standard output and quit | ||||
| 3479 | -dsn dump all option short names to standard output and quit | ||||
| 3480 | -dln dump option long names to standard output and quit | ||||
| 3481 | -dpro dump whatever configuration file is in effect to standard output | ||||
| 3482 | -dtt dump all token types to standard output and quit | ||||
| 3483 | |||||
| 3484 | HTML | ||||
| 3485 | -html write an html file (see 'man perl2web' for many options) | ||||
| 3486 | Note: when -html is used, no indentation or formatting are done. | ||||
| 3487 | Hint: try perltidy -html -css=mystyle.css filename.pl | ||||
| 3488 | and edit mystyle.css to change the appearance of filename.html. | ||||
| 3489 | -nnn gives line numbers | ||||
| 3490 | -pre only writes out <pre>..</pre> code section | ||||
| 3491 | -toc places a table of contents to subs at the top (default) | ||||
| 3492 | -pod passes pod text through pod2html (default) | ||||
| 3493 | -frm write html as a frame (3 files) | ||||
| 3494 | -text=s extra extension for table of contents if -frm, default='toc' | ||||
| 3495 | -sext=s extra extension for file content if -frm, default='src' | ||||
| 3496 | |||||
| 3497 | A prefix of "n" negates short form toggle switches, and a prefix of "no" | ||||
| 3498 | negates the long forms. For example, -nasc means don't add missing | ||||
| 3499 | semicolons. | ||||
| 3500 | |||||
| 3501 | If you are unable to see this entire text, try "perltidy -h | more" | ||||
| 3502 | For more detailed information, and additional options, try "man perltidy", | ||||
| 3503 | or go to the perltidy home page at http://perltidy.sourceforge.net | ||||
| 3504 | EOF | ||||
| 3505 | |||||
| 3506 | } | ||||
| 3507 | |||||
| 3508 | sub process_this_file { | ||||
| 3509 | |||||
| 3510 | my ( $truth, $beauty ) = @_; | ||||
| 3511 | |||||
| 3512 | # loop to process each line of this file | ||||
| 3513 | while ( my $line_of_tokens = $truth->get_line() ) { | ||||
| 3514 | $beauty->write_line($line_of_tokens); | ||||
| 3515 | } | ||||
| 3516 | |||||
| 3517 | # finish up | ||||
| 3518 | eval { $beauty->finish_formatting() }; | ||||
| 3519 | $truth->report_tokenization_errors(); | ||||
| 3520 | } | ||||
| 3521 | |||||
| 3522 | sub check_syntax { | ||||
| 3523 | |||||
| 3524 | # Use 'perl -c' to make sure that we did not create bad syntax | ||||
| 3525 | # This is a very good independent check for programming errors | ||||
| 3526 | # | ||||
| 3527 | # Given names of the input and output files, ($istream, $ostream), | ||||
| 3528 | # we do the following: | ||||
| 3529 | # - check syntax of the input file | ||||
| 3530 | # - if bad, all done (could be an incomplete code snippet) | ||||
| 3531 | # - if infile syntax ok, then check syntax of the output file; | ||||
| 3532 | # - if outfile syntax bad, issue warning; this implies a code bug! | ||||
| 3533 | # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good | ||||
| 3534 | |||||
| 3535 | my ( $istream, $ostream, $logger_object, $rOpts ) = @_; | ||||
| 3536 | my $infile_syntax_ok = 0; | ||||
| 3537 | my $line_of_dashes = '-' x 42 . "\n"; | ||||
| 3538 | |||||
| 3539 | my $flags = $rOpts->{'perl-syntax-check-flags'}; | ||||
| 3540 | |||||
| 3541 | # be sure we invoke perl with -c | ||||
| 3542 | # note: perl will accept repeated flags like '-c -c'. It is safest | ||||
| 3543 | # to append another -c than try to find an interior bundled c, as | ||||
| 3544 | # in -Tc, because such a 'c' might be in a quoted string, for example. | ||||
| 3545 | if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } | ||||
| 3546 | |||||
| 3547 | # be sure we invoke perl with -x if requested | ||||
| 3548 | # same comments about repeated parameters applies | ||||
| 3549 | if ( $rOpts->{'look-for-hash-bang'} ) { | ||||
| 3550 | if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } | ||||
| 3551 | } | ||||
| 3552 | |||||
| 3553 | # this shouldn't happen unless a temporary file couldn't be made | ||||
| 3554 | if ( $istream eq '-' ) { | ||||
| 3555 | $logger_object->write_logfile_entry( | ||||
| 3556 | "Cannot run perl -c on STDIN and STDOUT\n"); | ||||
| 3557 | return $infile_syntax_ok; | ||||
| 3558 | } | ||||
| 3559 | |||||
| 3560 | $logger_object->write_logfile_entry( | ||||
| 3561 | "checking input file syntax with perl $flags\n"); | ||||
| 3562 | |||||
| 3563 | # Not all operating systems/shells support redirection of the standard | ||||
| 3564 | # error output. | ||||
| 3565 | my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; | ||||
| 3566 | |||||
| 3567 | my ( $istream_filename, $perl_output ) = | ||||
| 3568 | do_syntax_check( $istream, $flags, $error_redirection ); | ||||
| 3569 | $logger_object->write_logfile_entry( | ||||
| 3570 | "Input stream passed to Perl as file $istream_filename\n"); | ||||
| 3571 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
| 3572 | $logger_object->write_logfile_entry("$perl_output\n"); | ||||
| 3573 | |||||
| 3574 | if ( $perl_output =~ /syntax\s*OK/ ) { | ||||
| 3575 | $infile_syntax_ok = 1; | ||||
| 3576 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
| 3577 | $logger_object->write_logfile_entry( | ||||
| 3578 | "checking output file syntax with perl $flags ...\n"); | ||||
| 3579 | my ( $ostream_filename, $perl_output ) = | ||||
| 3580 | do_syntax_check( $ostream, $flags, $error_redirection ); | ||||
| 3581 | $logger_object->write_logfile_entry( | ||||
| 3582 | "Output stream passed to Perl as file $ostream_filename\n"); | ||||
| 3583 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
| 3584 | $logger_object->write_logfile_entry("$perl_output\n"); | ||||
| 3585 | |||||
| 3586 | unless ( $perl_output =~ /syntax\s*OK/ ) { | ||||
| 3587 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
| 3588 | $logger_object->warning( | ||||
| 3589 | "The output file has a syntax error when tested with perl $flags $ostream !\n" | ||||
| 3590 | ); | ||||
| 3591 | $logger_object->warning( | ||||
| 3592 | "This implies an error in perltidy; the file $ostream is bad\n" | ||||
| 3593 | ); | ||||
| 3594 | $logger_object->report_definite_bug(); | ||||
| 3595 | |||||
| 3596 | # the perl version number will be helpful for diagnosing the problem | ||||
| 3597 | $logger_object->write_logfile_entry( | ||||
| 3598 | qx/perl -v $error_redirection/ . "\n" ); | ||||
| 3599 | } | ||||
| 3600 | } | ||||
| 3601 | else { | ||||
| 3602 | |||||
| 3603 | # Only warn of perl -c syntax errors. Other messages, | ||||
| 3604 | # such as missing modules, are too common. They can be | ||||
| 3605 | # seen by running with perltidy -w | ||||
| 3606 | $logger_object->complain("A syntax check using perl $flags\n"); | ||||
| 3607 | $logger_object->complain( | ||||
| 3608 | "for the output in file $istream_filename gives:\n"); | ||||
| 3609 | $logger_object->complain($line_of_dashes); | ||||
| 3610 | $logger_object->complain("$perl_output\n"); | ||||
| 3611 | $logger_object->complain($line_of_dashes); | ||||
| 3612 | $infile_syntax_ok = -1; | ||||
| 3613 | $logger_object->write_logfile_entry($line_of_dashes); | ||||
| 3614 | $logger_object->write_logfile_entry( | ||||
| 3615 | "The output file will not be checked because of input file problems\n" | ||||
| 3616 | ); | ||||
| 3617 | } | ||||
| 3618 | return $infile_syntax_ok; | ||||
| 3619 | } | ||||
| 3620 | |||||
| 3621 | sub do_syntax_check { | ||||
| 3622 | my ( $stream, $flags, $error_redirection ) = @_; | ||||
| 3623 | |||||
| 3624 | # We need a named input file for executing perl | ||||
| 3625 | my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream); | ||||
| 3626 | |||||
| 3627 | # TODO: Need to add name of file to log somewhere | ||||
| 3628 | # otherwise Perl output is hard to read | ||||
| 3629 | if ( !$stream_filename ) { return $stream_filename, "" } | ||||
| 3630 | |||||
| 3631 | # We have to quote the filename in case it has unusual characters | ||||
| 3632 | # or spaces. Example: this filename #CM11.pm# gives trouble. | ||||
| 3633 | my $quoted_stream_filename = '"' . $stream_filename . '"'; | ||||
| 3634 | |||||
| 3635 | # Under VMS something like -T will become -t (and an error) so we | ||||
| 3636 | # will put quotes around the flags. Double quotes seem to work on | ||||
| 3637 | # Unix/Windows/VMS, but this may not work on all systems. (Single | ||||
| 3638 | # quotes do not work under Windows). It could become necessary to | ||||
| 3639 | # put double quotes around each flag, such as: -"c" -"T" | ||||
| 3640 | # We may eventually need some system-dependent coding here. | ||||
| 3641 | $flags = '"' . $flags . '"'; | ||||
| 3642 | |||||
| 3643 | # now wish for luck... | ||||
| 3644 | my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; | ||||
| 3645 | |||||
| 3646 | unlink $stream_filename if ($is_tmpfile); | ||||
| 3647 | return $stream_filename, $msg; | ||||
| 3648 | } | ||||
| 3649 | |||||
| 3650 | ##################################################################### | ||||
| 3651 | # | ||||
| 3652 | # This is a stripped down version of IO::Scalar | ||||
| 3653 | # Given a reference to a scalar, it supplies either: | ||||
| 3654 | # a getline method which reads lines (mode='r'), or | ||||
| 3655 | # a print method which reads lines (mode='w') | ||||
| 3656 | # | ||||
| 3657 | ##################################################################### | ||||
| 3658 | package Perl::Tidy::IOScalar; | ||||
| 3659 | 2 | 285µs | 2 | 85µs | # spent 48µs (10+37) within Perl::Tidy::IOScalar::BEGIN@3659 which was called:
# once (10µs+37µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 3659 # spent 48µs making 1 call to Perl::Tidy::IOScalar::BEGIN@3659
# spent 37µs making 1 call to Exporter::import |
| 3660 | |||||
| 3661 | sub new { | ||||
| 3662 | my ( $package, $rscalar, $mode ) = @_; | ||||
| 3663 | my $ref = ref $rscalar; | ||||
| 3664 | if ( $ref ne 'SCALAR' ) { | ||||
| 3665 | confess <<EOM; | ||||
| 3666 | ------------------------------------------------------------------------ | ||||
| 3667 | expecting ref to SCALAR but got ref to ($ref); trace follows: | ||||
| 3668 | ------------------------------------------------------------------------ | ||||
| 3669 | EOM | ||||
| 3670 | |||||
| 3671 | } | ||||
| 3672 | if ( $mode eq 'w' ) { | ||||
| 3673 | $$rscalar = ""; | ||||
| 3674 | return bless [ $rscalar, $mode ], $package; | ||||
| 3675 | } | ||||
| 3676 | elsif ( $mode eq 'r' ) { | ||||
| 3677 | |||||
| 3678 | # Convert a scalar to an array. | ||||
| 3679 | # This avoids looking for "\n" on each call to getline | ||||
| 3680 | # | ||||
| 3681 | # NOTES: The -1 count is needed to avoid loss of trailing blank lines | ||||
| 3682 | # (which might be important in a DATA section). | ||||
| 3683 | my @array; | ||||
| 3684 | if ( $rscalar && ${$rscalar} ) { | ||||
| 3685 | @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1; | ||||
| 3686 | |||||
| 3687 | # remove possible extra blank line introduced with split | ||||
| 3688 | if ( @array && $array[-1] eq "\n" ) { pop @array } | ||||
| 3689 | } | ||||
| 3690 | my $i_next = 0; | ||||
| 3691 | return bless [ \@array, $mode, $i_next ], $package; | ||||
| 3692 | } | ||||
| 3693 | else { | ||||
| 3694 | confess <<EOM; | ||||
| 3695 | ------------------------------------------------------------------------ | ||||
| 3696 | expecting mode = 'r' or 'w' but got mode ($mode); trace follows: | ||||
| 3697 | ------------------------------------------------------------------------ | ||||
| 3698 | EOM | ||||
| 3699 | } | ||||
| 3700 | } | ||||
| 3701 | |||||
| 3702 | sub getline { | ||||
| 3703 | my $self = shift; | ||||
| 3704 | my $mode = $self->[1]; | ||||
| 3705 | if ( $mode ne 'r' ) { | ||||
| 3706 | confess <<EOM; | ||||
| 3707 | ------------------------------------------------------------------------ | ||||
| 3708 | getline call requires mode = 'r' but mode = ($mode); trace follows: | ||||
| 3709 | ------------------------------------------------------------------------ | ||||
| 3710 | EOM | ||||
| 3711 | } | ||||
| 3712 | my $i = $self->[2]++; | ||||
| 3713 | return $self->[0]->[$i]; | ||||
| 3714 | } | ||||
| 3715 | |||||
| 3716 | sub print { | ||||
| 3717 | my $self = shift; | ||||
| 3718 | my $mode = $self->[1]; | ||||
| 3719 | if ( $mode ne 'w' ) { | ||||
| 3720 | confess <<EOM; | ||||
| 3721 | ------------------------------------------------------------------------ | ||||
| 3722 | print call requires mode = 'w' but mode = ($mode); trace follows: | ||||
| 3723 | ------------------------------------------------------------------------ | ||||
| 3724 | EOM | ||||
| 3725 | } | ||||
| 3726 | ${ $self->[0] } .= $_[0]; | ||||
| 3727 | } | ||||
| 3728 | sub close { return } | ||||
| 3729 | |||||
| 3730 | ##################################################################### | ||||
| 3731 | # | ||||
| 3732 | # This is a stripped down version of IO::ScalarArray | ||||
| 3733 | # Given a reference to an array, it supplies either: | ||||
| 3734 | # a getline method which reads lines (mode='r'), or | ||||
| 3735 | # a print method which reads lines (mode='w') | ||||
| 3736 | # | ||||
| 3737 | # NOTE: this routine assumes that there aren't any embedded | ||||
| 3738 | # newlines within any of the array elements. There are no checks | ||||
| 3739 | # for that. | ||||
| 3740 | # | ||||
| 3741 | ##################################################################### | ||||
| 3742 | package Perl::Tidy::IOScalarArray; | ||||
| 3743 | 2 | 1.74ms | 2 | 69µs | # spent 39µs (9+30) within Perl::Tidy::IOScalarArray::BEGIN@3743 which was called:
# once (9µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 3743 # spent 39µs making 1 call to Perl::Tidy::IOScalarArray::BEGIN@3743
# spent 30µs making 1 call to Exporter::import |
| 3744 | |||||
| 3745 | sub new { | ||||
| 3746 | my ( $package, $rarray, $mode ) = @_; | ||||
| 3747 | my $ref = ref $rarray; | ||||
| 3748 | if ( $ref ne 'ARRAY' ) { | ||||
| 3749 | confess <<EOM; | ||||
| 3750 | ------------------------------------------------------------------------ | ||||
| 3751 | expecting ref to ARRAY but got ref to ($ref); trace follows: | ||||
| 3752 | ------------------------------------------------------------------------ | ||||
| 3753 | EOM | ||||
| 3754 | |||||
| 3755 | } | ||||
| 3756 | if ( $mode eq 'w' ) { | ||||
| 3757 | @$rarray = (); | ||||
| 3758 | return bless [ $rarray, $mode ], $package; | ||||
| 3759 | } | ||||
| 3760 | elsif ( $mode eq 'r' ) { | ||||
| 3761 | my $i_next = 0; | ||||
| 3762 | return bless [ $rarray, $mode, $i_next ], $package; | ||||
| 3763 | } | ||||
| 3764 | else { | ||||
| 3765 | confess <<EOM; | ||||
| 3766 | ------------------------------------------------------------------------ | ||||
| 3767 | expecting mode = 'r' or 'w' but got mode ($mode); trace follows: | ||||
| 3768 | ------------------------------------------------------------------------ | ||||
| 3769 | EOM | ||||
| 3770 | } | ||||
| 3771 | } | ||||
| 3772 | |||||
| 3773 | sub getline { | ||||
| 3774 | my $self = shift; | ||||
| 3775 | my $mode = $self->[1]; | ||||
| 3776 | if ( $mode ne 'r' ) { | ||||
| 3777 | confess <<EOM; | ||||
| 3778 | ------------------------------------------------------------------------ | ||||
| 3779 | getline requires mode = 'r' but mode = ($mode); trace follows: | ||||
| 3780 | ------------------------------------------------------------------------ | ||||
| 3781 | EOM | ||||
| 3782 | } | ||||
| 3783 | my $i = $self->[2]++; | ||||
| 3784 | return $self->[0]->[$i]; | ||||
| 3785 | } | ||||
| 3786 | |||||
| 3787 | sub print { | ||||
| 3788 | my $self = shift; | ||||
| 3789 | my $mode = $self->[1]; | ||||
| 3790 | if ( $mode ne 'w' ) { | ||||
| 3791 | confess <<EOM; | ||||
| 3792 | ------------------------------------------------------------------------ | ||||
| 3793 | print requires mode = 'w' but mode = ($mode); trace follows: | ||||
| 3794 | ------------------------------------------------------------------------ | ||||
| 3795 | EOM | ||||
| 3796 | } | ||||
| 3797 | push @{ $self->[0] }, $_[0]; | ||||
| 3798 | } | ||||
| 3799 | sub close { return } | ||||
| 3800 | |||||
| 3801 | ##################################################################### | ||||
| 3802 | # | ||||
| 3803 | # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method | ||||
| 3804 | # which returns the next line to be parsed | ||||
| 3805 | # | ||||
| 3806 | ##################################################################### | ||||
| 3807 | |||||
| 3808 | package Perl::Tidy::LineSource; | ||||
| 3809 | |||||
| 3810 | sub new { | ||||
| 3811 | |||||
| 3812 | my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; | ||||
| 3813 | |||||
| 3814 | my $input_line_ending; | ||||
| 3815 | if ( $rOpts->{'preserve-line-endings'} ) { | ||||
| 3816 | $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); | ||||
| 3817 | } | ||||
| 3818 | |||||
| 3819 | ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); | ||||
| 3820 | return undef unless $fh; | ||||
| 3821 | |||||
| 3822 | # in order to check output syntax when standard output is used, | ||||
| 3823 | # or when it is an object, we have to make a copy of the file | ||||
| 3824 | if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) | ||||
| 3825 | { | ||||
| 3826 | |||||
| 3827 | # Turning off syntax check when input output is used. | ||||
| 3828 | # The reason is that temporary files cause problems on | ||||
| 3829 | # on many systems. | ||||
| 3830 | $rOpts->{'check-syntax'} = 0; | ||||
| 3831 | |||||
| 3832 | $$rpending_logfile_message .= <<EOM; | ||||
| 3833 | Note: --syntax check will be skipped because standard input is used | ||||
| 3834 | EOM | ||||
| 3835 | |||||
| 3836 | } | ||||
| 3837 | |||||
| 3838 | return bless { | ||||
| 3839 | _fh => $fh, | ||||
| 3840 | _filename => $input_file, | ||||
| 3841 | _input_line_ending => $input_line_ending, | ||||
| 3842 | _rinput_buffer => [], | ||||
| 3843 | _started => 0, | ||||
| 3844 | }, $class; | ||||
| 3845 | } | ||||
| 3846 | |||||
| 3847 | sub close_input_file { | ||||
| 3848 | my $self = shift; | ||||
| 3849 | |||||
| 3850 | # Only close physical files, not STDIN and other objects | ||||
| 3851 | my $filename = $self->{_filename}; | ||||
| 3852 | if ( $filename ne '-' && !ref $filename ) { | ||||
| 3853 | eval { $self->{_fh}->close() }; | ||||
| 3854 | } | ||||
| 3855 | } | ||||
| 3856 | |||||
| 3857 | sub get_line { | ||||
| 3858 | my $self = shift; | ||||
| 3859 | my $line = undef; | ||||
| 3860 | my $fh = $self->{_fh}; | ||||
| 3861 | my $rinput_buffer = $self->{_rinput_buffer}; | ||||
| 3862 | |||||
| 3863 | if ( scalar(@$rinput_buffer) ) { | ||||
| 3864 | $line = shift @$rinput_buffer; | ||||
| 3865 | } | ||||
| 3866 | else { | ||||
| 3867 | $line = $fh->getline(); | ||||
| 3868 | |||||
| 3869 | # patch to read raw mac files under unix, dos | ||||
| 3870 | # see if the first line has embedded \r's | ||||
| 3871 | if ( $line && !$self->{_started} ) { | ||||
| 3872 | if ( $line =~ /[\015][^\015\012]/ ) { | ||||
| 3873 | |||||
| 3874 | # found one -- break the line up and store in a buffer | ||||
| 3875 | @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; | ||||
| 3876 | my $count = @$rinput_buffer; | ||||
| 3877 | $line = shift @$rinput_buffer; | ||||
| 3878 | } | ||||
| 3879 | $self->{_started}++; | ||||
| 3880 | } | ||||
| 3881 | } | ||||
| 3882 | return $line; | ||||
| 3883 | } | ||||
| 3884 | |||||
| 3885 | ##################################################################### | ||||
| 3886 | # | ||||
| 3887 | # the Perl::Tidy::LineSink class supplies a write_line method for | ||||
| 3888 | # actual file writing | ||||
| 3889 | # | ||||
| 3890 | ##################################################################### | ||||
| 3891 | |||||
| 3892 | package Perl::Tidy::LineSink; | ||||
| 3893 | |||||
| 3894 | sub new { | ||||
| 3895 | |||||
| 3896 | my ( $class, $output_file, $tee_file, $line_separator, $rOpts, | ||||
| 3897 | $rpending_logfile_message, $binmode ) | ||||
| 3898 | = @_; | ||||
| 3899 | my $fh = undef; | ||||
| 3900 | my $fh_tee = undef; | ||||
| 3901 | |||||
| 3902 | my $output_file_open = 0; | ||||
| 3903 | |||||
| 3904 | if ( $rOpts->{'format'} eq 'tidy' ) { | ||||
| 3905 | ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); | ||||
| 3906 | unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; } | ||||
| 3907 | $output_file_open = 1; | ||||
| 3908 | if ($binmode) { | ||||
| 3909 | if ( ref($fh) eq 'IO::File' ) { | ||||
| 3910 | binmode $fh; | ||||
| 3911 | } | ||||
| 3912 | if ( $output_file eq '-' ) { binmode STDOUT } | ||||
| 3913 | } | ||||
| 3914 | } | ||||
| 3915 | |||||
| 3916 | # in order to check output syntax when standard output is used, | ||||
| 3917 | # or when it is an object, we have to make a copy of the file | ||||
| 3918 | if ( $output_file eq '-' || ref $output_file ) { | ||||
| 3919 | if ( $rOpts->{'check-syntax'} ) { | ||||
| 3920 | |||||
| 3921 | # Turning off syntax check when standard output is used. | ||||
| 3922 | # The reason is that temporary files cause problems on | ||||
| 3923 | # on many systems. | ||||
| 3924 | $rOpts->{'check-syntax'} = 0; | ||||
| 3925 | $$rpending_logfile_message .= <<EOM; | ||||
| 3926 | Note: --syntax check will be skipped because standard output is used | ||||
| 3927 | EOM | ||||
| 3928 | |||||
| 3929 | } | ||||
| 3930 | } | ||||
| 3931 | |||||
| 3932 | bless { | ||||
| 3933 | _fh => $fh, | ||||
| 3934 | _fh_tee => $fh_tee, | ||||
| 3935 | _output_file => $output_file, | ||||
| 3936 | _output_file_open => $output_file_open, | ||||
| 3937 | _tee_flag => 0, | ||||
| 3938 | _tee_file => $tee_file, | ||||
| 3939 | _tee_file_opened => 0, | ||||
| 3940 | _line_separator => $line_separator, | ||||
| 3941 | _binmode => $binmode, | ||||
| 3942 | }, $class; | ||||
| 3943 | } | ||||
| 3944 | |||||
| 3945 | sub write_line { | ||||
| 3946 | |||||
| 3947 | my $self = shift; | ||||
| 3948 | my $fh = $self->{_fh}; | ||||
| 3949 | |||||
| 3950 | my $output_file_open = $self->{_output_file_open}; | ||||
| 3951 | chomp $_[0]; | ||||
| 3952 | $_[0] .= $self->{_line_separator}; | ||||
| 3953 | |||||
| 3954 | $fh->print( $_[0] ) if ( $self->{_output_file_open} ); | ||||
| 3955 | |||||
| 3956 | if ( $self->{_tee_flag} ) { | ||||
| 3957 | unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } | ||||
| 3958 | my $fh_tee = $self->{_fh_tee}; | ||||
| 3959 | print $fh_tee $_[0]; | ||||
| 3960 | } | ||||
| 3961 | } | ||||
| 3962 | |||||
| 3963 | sub tee_on { | ||||
| 3964 | my $self = shift; | ||||
| 3965 | $self->{_tee_flag} = 1; | ||||
| 3966 | } | ||||
| 3967 | |||||
| 3968 | sub tee_off { | ||||
| 3969 | my $self = shift; | ||||
| 3970 | $self->{_tee_flag} = 0; | ||||
| 3971 | } | ||||
| 3972 | |||||
| 3973 | sub really_open_tee_file { | ||||
| 3974 | my $self = shift; | ||||
| 3975 | my $tee_file = $self->{_tee_file}; | ||||
| 3976 | my $fh_tee; | ||||
| 3977 | $fh_tee = IO::File->new(">$tee_file") | ||||
| 3978 | or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); | ||||
| 3979 | binmode $fh_tee if $self->{_binmode}; | ||||
| 3980 | $self->{_tee_file_opened} = 1; | ||||
| 3981 | $self->{_fh_tee} = $fh_tee; | ||||
| 3982 | } | ||||
| 3983 | |||||
| 3984 | sub close_output_file { | ||||
| 3985 | my $self = shift; | ||||
| 3986 | |||||
| 3987 | # Only close physical files, not STDOUT and other objects | ||||
| 3988 | my $output_file = $self->{_output_file}; | ||||
| 3989 | if ( $output_file ne '-' && !ref $output_file ) { | ||||
| 3990 | eval { $self->{_fh}->close() } if $self->{_output_file_open}; | ||||
| 3991 | } | ||||
| 3992 | $self->close_tee_file(); | ||||
| 3993 | } | ||||
| 3994 | |||||
| 3995 | sub close_tee_file { | ||||
| 3996 | my $self = shift; | ||||
| 3997 | |||||
| 3998 | # Only close physical files, not STDOUT and other objects | ||||
| 3999 | if ( $self->{_tee_file_opened} ) { | ||||
| 4000 | my $tee_file = $self->{_tee_file}; | ||||
| 4001 | if ( $tee_file ne '-' && !ref $tee_file ) { | ||||
| 4002 | eval { $self->{_fh_tee}->close() }; | ||||
| 4003 | $self->{_tee_file_opened} = 0; | ||||
| 4004 | } | ||||
| 4005 | } | ||||
| 4006 | } | ||||
| 4007 | |||||
| 4008 | ##################################################################### | ||||
| 4009 | # | ||||
| 4010 | # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is | ||||
| 4011 | # useful for program development. | ||||
| 4012 | # | ||||
| 4013 | # Only one such file is created regardless of the number of input | ||||
| 4014 | # files processed. This allows the results of processing many files | ||||
| 4015 | # to be summarized in a single file. | ||||
| 4016 | # | ||||
| 4017 | ##################################################################### | ||||
| 4018 | |||||
| 4019 | package Perl::Tidy::Diagnostics; | ||||
| 4020 | |||||
| 4021 | sub new { | ||||
| 4022 | |||||
| 4023 | my $class = shift; | ||||
| 4024 | bless { | ||||
| 4025 | _write_diagnostics_count => 0, | ||||
| 4026 | _last_diagnostic_file => "", | ||||
| 4027 | _input_file => "", | ||||
| 4028 | _fh => undef, | ||||
| 4029 | }, $class; | ||||
| 4030 | } | ||||
| 4031 | |||||
| 4032 | sub set_input_file { | ||||
| 4033 | my $self = shift; | ||||
| 4034 | $self->{_input_file} = $_[0]; | ||||
| 4035 | } | ||||
| 4036 | |||||
| 4037 | # This is a diagnostic routine which is useful for program development. | ||||
| 4038 | # Output from debug messages go to a file named DIAGNOSTICS, where | ||||
| 4039 | # they are labeled by file and line. This allows many files to be | ||||
| 4040 | # scanned at once for some particular condition of interest. | ||||
| 4041 | sub write_diagnostics { | ||||
| 4042 | my $self = shift; | ||||
| 4043 | |||||
| 4044 | unless ( $self->{_write_diagnostics_count} ) { | ||||
| 4045 | open DIAGNOSTICS, ">DIAGNOSTICS" | ||||
| 4046 | or death("couldn't open DIAGNOSTICS: $!\n"); | ||||
| 4047 | } | ||||
| 4048 | |||||
| 4049 | my $last_diagnostic_file = $self->{_last_diagnostic_file}; | ||||
| 4050 | my $input_file = $self->{_input_file}; | ||||
| 4051 | if ( $last_diagnostic_file ne $input_file ) { | ||||
| 4052 | print DIAGNOSTICS "\nFILE:$input_file\n"; | ||||
| 4053 | } | ||||
| 4054 | $self->{_last_diagnostic_file} = $input_file; | ||||
| 4055 | my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); | ||||
| 4056 | print DIAGNOSTICS "$input_line_number:\t@_"; | ||||
| 4057 | $self->{_write_diagnostics_count}++; | ||||
| 4058 | } | ||||
| 4059 | |||||
| 4060 | ##################################################################### | ||||
| 4061 | # | ||||
| 4062 | # The Perl::Tidy::Logger class writes the .LOG and .ERR files | ||||
| 4063 | # | ||||
| 4064 | ##################################################################### | ||||
| 4065 | |||||
| 4066 | package Perl::Tidy::Logger; | ||||
| 4067 | |||||
| 4068 | sub new { | ||||
| 4069 | my $class = shift; | ||||
| 4070 | my $fh; | ||||
| 4071 | my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_; | ||||
| 4072 | |||||
| 4073 | my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; | ||||
| 4074 | |||||
| 4075 | # remove any old error output file if we might write a new one | ||||
| 4076 | unless ( $fh_warnings || ref($warning_file) ) { | ||||
| 4077 | if ( -e $warning_file ) { unlink($warning_file) } | ||||
| 4078 | } | ||||
| 4079 | |||||
| 4080 | my $logfile_gap = | ||||
| 4081 | defined( $rOpts->{'logfile-gap'} ) | ||||
| 4082 | ? $rOpts->{'logfile-gap'} | ||||
| 4083 | : 50; | ||||
| 4084 | if ( $logfile_gap == 0 ) { $logfile_gap = 1 } | ||||
| 4085 | |||||
| 4086 | bless { | ||||
| 4087 | _log_file => $log_file, | ||||
| 4088 | _logfile_gap => $logfile_gap, | ||||
| 4089 | _rOpts => $rOpts, | ||||
| 4090 | _fh_warnings => $fh_warnings, | ||||
| 4091 | _last_input_line_written => 0, | ||||
| 4092 | _at_end_of_file => 0, | ||||
| 4093 | _use_prefix => 1, | ||||
| 4094 | _block_log_output => 0, | ||||
| 4095 | _line_of_tokens => undef, | ||||
| 4096 | _output_line_number => undef, | ||||
| 4097 | _wrote_line_information_string => 0, | ||||
| 4098 | _wrote_column_headings => 0, | ||||
| 4099 | _warning_file => $warning_file, | ||||
| 4100 | _warning_count => 0, | ||||
| 4101 | _complaint_count => 0, | ||||
| 4102 | _saw_code_bug => -1, # -1=no 0=maybe 1=for sure | ||||
| 4103 | _saw_brace_error => 0, | ||||
| 4104 | _saw_extrude => $saw_extrude, | ||||
| 4105 | _output_array => [], | ||||
| 4106 | }, $class; | ||||
| 4107 | } | ||||
| 4108 | |||||
| 4109 | sub get_warning_count { | ||||
| 4110 | my $self = shift; | ||||
| 4111 | return $self->{_warning_count}; | ||||
| 4112 | } | ||||
| 4113 | |||||
| 4114 | sub get_use_prefix { | ||||
| 4115 | my $self = shift; | ||||
| 4116 | return $self->{_use_prefix}; | ||||
| 4117 | } | ||||
| 4118 | |||||
| 4119 | sub block_log_output { | ||||
| 4120 | my $self = shift; | ||||
| 4121 | $self->{_block_log_output} = 1; | ||||
| 4122 | } | ||||
| 4123 | |||||
| 4124 | sub unblock_log_output { | ||||
| 4125 | my $self = shift; | ||||
| 4126 | $self->{_block_log_output} = 0; | ||||
| 4127 | } | ||||
| 4128 | |||||
| 4129 | sub interrupt_logfile { | ||||
| 4130 | my $self = shift; | ||||
| 4131 | $self->{_use_prefix} = 0; | ||||
| 4132 | $self->warning("\n"); | ||||
| 4133 | $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); | ||||
| 4134 | } | ||||
| 4135 | |||||
| 4136 | sub resume_logfile { | ||||
| 4137 | my $self = shift; | ||||
| 4138 | $self->write_logfile_entry( '#' x 60 . "\n" ); | ||||
| 4139 | $self->{_use_prefix} = 1; | ||||
| 4140 | } | ||||
| 4141 | |||||
| 4142 | sub we_are_at_the_last_line { | ||||
| 4143 | my $self = shift; | ||||
| 4144 | unless ( $self->{_wrote_line_information_string} ) { | ||||
| 4145 | $self->write_logfile_entry("Last line\n\n"); | ||||
| 4146 | } | ||||
| 4147 | $self->{_at_end_of_file} = 1; | ||||
| 4148 | } | ||||
| 4149 | |||||
| 4150 | # record some stuff in case we go down in flames | ||||
| 4151 | sub black_box { | ||||
| 4152 | my $self = shift; | ||||
| 4153 | my ( $line_of_tokens, $output_line_number ) = @_; | ||||
| 4154 | my $input_line = $line_of_tokens->{_line_text}; | ||||
| 4155 | my $input_line_number = $line_of_tokens->{_line_number}; | ||||
| 4156 | |||||
| 4157 | # save line information in case we have to write a logfile message | ||||
| 4158 | $self->{_line_of_tokens} = $line_of_tokens; | ||||
| 4159 | $self->{_output_line_number} = $output_line_number; | ||||
| 4160 | $self->{_wrote_line_information_string} = 0; | ||||
| 4161 | |||||
| 4162 | my $last_input_line_written = $self->{_last_input_line_written}; | ||||
| 4163 | my $rOpts = $self->{_rOpts}; | ||||
| 4164 | if ( | ||||
| 4165 | ( | ||||
| 4166 | ( $input_line_number - $last_input_line_written ) >= | ||||
| 4167 | $self->{_logfile_gap} | ||||
| 4168 | ) | ||||
| 4169 | || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) | ||||
| 4170 | ) | ||||
| 4171 | { | ||||
| 4172 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
| 4173 | my $structural_indentation_level = $$rlevels[0]; | ||||
| 4174 | $self->{_last_input_line_written} = $input_line_number; | ||||
| 4175 | ( my $out_str = $input_line ) =~ s/^\s*//; | ||||
| 4176 | chomp $out_str; | ||||
| 4177 | |||||
| 4178 | $out_str = ( '.' x $structural_indentation_level ) . $out_str; | ||||
| 4179 | |||||
| 4180 | if ( length($out_str) > 35 ) { | ||||
| 4181 | $out_str = substr( $out_str, 0, 35 ) . " ...."; | ||||
| 4182 | } | ||||
| 4183 | $self->logfile_output( "", "$out_str\n" ); | ||||
| 4184 | } | ||||
| 4185 | } | ||||
| 4186 | |||||
| 4187 | sub write_logfile_entry { | ||||
| 4188 | my $self = shift; | ||||
| 4189 | |||||
| 4190 | # add leading >>> to avoid confusing error messages and code | ||||
| 4191 | $self->logfile_output( ">>>", "@_" ); | ||||
| 4192 | } | ||||
| 4193 | |||||
| 4194 | sub write_column_headings { | ||||
| 4195 | my $self = shift; | ||||
| 4196 | |||||
| 4197 | $self->{_wrote_column_headings} = 1; | ||||
| 4198 | my $routput_array = $self->{_output_array}; | ||||
| 4199 | push @{$routput_array}, <<EOM; | ||||
| 4200 | The nesting depths in the table below are at the start of the lines. | ||||
| 4201 | The indicated output line numbers are not always exact. | ||||
| 4202 | ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. | ||||
| 4203 | |||||
| 4204 | in:out indent c b nesting code + messages; (messages begin with >>>) | ||||
| 4205 | lines levels i k (code begins with one '.' per indent level) | ||||
| 4206 | ------ ----- - - -------- ------------------------------------------- | ||||
| 4207 | EOM | ||||
| 4208 | } | ||||
| 4209 | |||||
| 4210 | sub make_line_information_string { | ||||
| 4211 | |||||
| 4212 | # make columns of information when a logfile message needs to go out | ||||
| 4213 | my $self = shift; | ||||
| 4214 | my $line_of_tokens = $self->{_line_of_tokens}; | ||||
| 4215 | my $input_line_number = $line_of_tokens->{_line_number}; | ||||
| 4216 | my $line_information_string = ""; | ||||
| 4217 | if ($input_line_number) { | ||||
| 4218 | |||||
| 4219 | my $output_line_number = $self->{_output_line_number}; | ||||
| 4220 | my $brace_depth = $line_of_tokens->{_curly_brace_depth}; | ||||
| 4221 | my $paren_depth = $line_of_tokens->{_paren_depth}; | ||||
| 4222 | my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; | ||||
| 4223 | my $guessed_indentation_level = | ||||
| 4224 | $line_of_tokens->{_guessed_indentation_level}; | ||||
| 4225 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
| 4226 | my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; | ||||
| 4227 | my $rci_levels = $line_of_tokens->{_rci_levels}; | ||||
| 4228 | my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; | ||||
| 4229 | |||||
| 4230 | my $structural_indentation_level = $$rlevels[0]; | ||||
| 4231 | |||||
| 4232 | $self->write_column_headings() unless $self->{_wrote_column_headings}; | ||||
| 4233 | |||||
| 4234 | # keep logfile columns aligned for scripts up to 999 lines; | ||||
| 4235 | # for longer scripts it doesn't really matter | ||||
| 4236 | my $extra_space = ""; | ||||
| 4237 | $extra_space .= | ||||
| 4238 | ( $input_line_number < 10 ) ? " " | ||||
| 4239 | : ( $input_line_number < 100 ) ? " " | ||||
| 4240 | : ""; | ||||
| 4241 | $extra_space .= | ||||
| 4242 | ( $output_line_number < 10 ) ? " " | ||||
| 4243 | : ( $output_line_number < 100 ) ? " " | ||||
| 4244 | : ""; | ||||
| 4245 | |||||
| 4246 | # there are 2 possible nesting strings: | ||||
| 4247 | # the original which looks like this: (0 [1 {2 | ||||
| 4248 | # the new one, which looks like this: {{[ | ||||
| 4249 | # the new one is easier to read, and shows the order, but | ||||
| 4250 | # could be arbitrarily long, so we use it unless it is too long | ||||
| 4251 | my $nesting_string = | ||||
| 4252 | "($paren_depth [$square_bracket_depth {$brace_depth"; | ||||
| 4253 | my $nesting_string_new = $$rnesting_tokens[0]; | ||||
| 4254 | |||||
| 4255 | my $ci_level = $$rci_levels[0]; | ||||
| 4256 | if ( $ci_level > 9 ) { $ci_level = '*' } | ||||
| 4257 | my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; | ||||
| 4258 | |||||
| 4259 | if ( length($nesting_string_new) <= 8 ) { | ||||
| 4260 | $nesting_string = | ||||
| 4261 | $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); | ||||
| 4262 | } | ||||
| 4263 | $line_information_string = | ||||
| 4264 | "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; | ||||
| 4265 | } | ||||
| 4266 | return $line_information_string; | ||||
| 4267 | } | ||||
| 4268 | |||||
| 4269 | sub logfile_output { | ||||
| 4270 | my $self = shift; | ||||
| 4271 | my ( $prompt, $msg ) = @_; | ||||
| 4272 | return if ( $self->{_block_log_output} ); | ||||
| 4273 | |||||
| 4274 | my $routput_array = $self->{_output_array}; | ||||
| 4275 | if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { | ||||
| 4276 | push @{$routput_array}, "$msg"; | ||||
| 4277 | } | ||||
| 4278 | else { | ||||
| 4279 | my $line_information_string = $self->make_line_information_string(); | ||||
| 4280 | $self->{_wrote_line_information_string} = 1; | ||||
| 4281 | |||||
| 4282 | if ($line_information_string) { | ||||
| 4283 | push @{$routput_array}, "$line_information_string $prompt$msg"; | ||||
| 4284 | } | ||||
| 4285 | else { | ||||
| 4286 | push @{$routput_array}, "$msg"; | ||||
| 4287 | } | ||||
| 4288 | } | ||||
| 4289 | } | ||||
| 4290 | |||||
| 4291 | sub get_saw_brace_error { | ||||
| 4292 | my $self = shift; | ||||
| 4293 | return $self->{_saw_brace_error}; | ||||
| 4294 | } | ||||
| 4295 | |||||
| 4296 | sub increment_brace_error { | ||||
| 4297 | my $self = shift; | ||||
| 4298 | $self->{_saw_brace_error}++; | ||||
| 4299 | } | ||||
| 4300 | |||||
| 4301 | sub brace_warning { | ||||
| 4302 | my $self = shift; | ||||
| 4303 | 2 | 128µs | 2 | 115µs | # spent 63µs (10+52) within Perl::Tidy::Logger::BEGIN@4303 which was called:
# once (10µs+52µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4303 # spent 63µs making 1 call to Perl::Tidy::Logger::BEGIN@4303
# spent 52µs making 1 call to constant::import |
| 4304 | my $saw_brace_error = $self->{_saw_brace_error}; | ||||
| 4305 | |||||
| 4306 | if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { | ||||
| 4307 | $self->warning(@_); | ||||
| 4308 | } | ||||
| 4309 | $saw_brace_error++; | ||||
| 4310 | $self->{_saw_brace_error} = $saw_brace_error; | ||||
| 4311 | |||||
| 4312 | if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { | ||||
| 4313 | $self->warning("No further warnings of this type will be given\n"); | ||||
| 4314 | } | ||||
| 4315 | } | ||||
| 4316 | |||||
| 4317 | sub complain { | ||||
| 4318 | |||||
| 4319 | # handle non-critical warning messages based on input flag | ||||
| 4320 | my $self = shift; | ||||
| 4321 | my $rOpts = $self->{_rOpts}; | ||||
| 4322 | |||||
| 4323 | # these appear in .ERR output only if -w flag is used | ||||
| 4324 | if ( $rOpts->{'warning-output'} ) { | ||||
| 4325 | $self->warning(@_); | ||||
| 4326 | } | ||||
| 4327 | |||||
| 4328 | # otherwise, they go to the .LOG file | ||||
| 4329 | else { | ||||
| 4330 | $self->{_complaint_count}++; | ||||
| 4331 | $self->write_logfile_entry(@_); | ||||
| 4332 | } | ||||
| 4333 | } | ||||
| 4334 | |||||
| 4335 | sub warning { | ||||
| 4336 | |||||
| 4337 | # report errors to .ERR file (or stdout) | ||||
| 4338 | my $self = shift; | ||||
| 4339 | 2 | 578µs | 2 | 83µs | # spent 45µs (8+38) within Perl::Tidy::Logger::BEGIN@4339 which was called:
# once (8µs+38µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4339 # spent 45µs making 1 call to Perl::Tidy::Logger::BEGIN@4339
# spent 38µs making 1 call to constant::import |
| 4340 | |||||
| 4341 | my $rOpts = $self->{_rOpts}; | ||||
| 4342 | unless ( $rOpts->{'quiet'} ) { | ||||
| 4343 | |||||
| 4344 | my $warning_count = $self->{_warning_count}; | ||||
| 4345 | my $fh_warnings = $self->{_fh_warnings}; | ||||
| 4346 | if ( !$fh_warnings ) { | ||||
| 4347 | my $warning_file = $self->{_warning_file}; | ||||
| 4348 | ( $fh_warnings, my $filename ) = | ||||
| 4349 | Perl::Tidy::streamhandle( $warning_file, 'w' ); | ||||
| 4350 | $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); | ||||
| 4351 | Perl::Tidy::Warn "## Please see file $filename\n" | ||||
| 4352 | unless ref($warning_file); | ||||
| 4353 | $self->{_fh_warnings} = $fh_warnings; | ||||
| 4354 | } | ||||
| 4355 | |||||
| 4356 | if ( $warning_count < WARNING_LIMIT ) { | ||||
| 4357 | if ( $self->get_use_prefix() > 0 ) { | ||||
| 4358 | my $input_line_number = | ||||
| 4359 | Perl::Tidy::Tokenizer::get_input_line_number(); | ||||
| 4360 | if ( !defined($input_line_number) ) { $input_line_number = -1 } | ||||
| 4361 | $fh_warnings->print("$input_line_number:\t@_"); | ||||
| 4362 | $self->write_logfile_entry("WARNING: @_"); | ||||
| 4363 | } | ||||
| 4364 | else { | ||||
| 4365 | $fh_warnings->print(@_); | ||||
| 4366 | $self->write_logfile_entry(@_); | ||||
| 4367 | } | ||||
| 4368 | } | ||||
| 4369 | $warning_count++; | ||||
| 4370 | $self->{_warning_count} = $warning_count; | ||||
| 4371 | |||||
| 4372 | if ( $warning_count == WARNING_LIMIT ) { | ||||
| 4373 | $fh_warnings->print("No further warnings will be given\n"); | ||||
| 4374 | } | ||||
| 4375 | } | ||||
| 4376 | } | ||||
| 4377 | |||||
| 4378 | # programming bug codes: | ||||
| 4379 | # -1 = no bug | ||||
| 4380 | # 0 = maybe, not sure. | ||||
| 4381 | # 1 = definitely | ||||
| 4382 | sub report_possible_bug { | ||||
| 4383 | my $self = shift; | ||||
| 4384 | my $saw_code_bug = $self->{_saw_code_bug}; | ||||
| 4385 | $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; | ||||
| 4386 | } | ||||
| 4387 | |||||
| 4388 | sub report_definite_bug { | ||||
| 4389 | my $self = shift; | ||||
| 4390 | $self->{_saw_code_bug} = 1; | ||||
| 4391 | } | ||||
| 4392 | |||||
| 4393 | sub ask_user_for_bug_report { | ||||
| 4394 | my $self = shift; | ||||
| 4395 | |||||
| 4396 | my ( $infile_syntax_ok, $formatter ) = @_; | ||||
| 4397 | my $saw_code_bug = $self->{_saw_code_bug}; | ||||
| 4398 | if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { | ||||
| 4399 | $self->warning(<<EOM); | ||||
| 4400 | |||||
| 4401 | You may have encountered a code bug in perltidy. If you think so, and | ||||
| 4402 | the problem is not listed in the BUGS file at | ||||
| 4403 | http://perltidy.sourceforge.net, please report it so that it can be | ||||
| 4404 | corrected. Include the smallest possible script which has the problem, | ||||
| 4405 | along with the .LOG file. See the manual pages for contact information. | ||||
| 4406 | Thank you! | ||||
| 4407 | EOM | ||||
| 4408 | |||||
| 4409 | } | ||||
| 4410 | elsif ( $saw_code_bug == 1 ) { | ||||
| 4411 | if ( $self->{_saw_extrude} ) { | ||||
| 4412 | $self->warning(<<EOM); | ||||
| 4413 | |||||
| 4414 | You may have encountered a bug in perltidy. However, since you are using the | ||||
| 4415 | -extrude option, the problem may be with perl or one of its modules, which have | ||||
| 4416 | occasional problems with this type of file. If you believe that the | ||||
| 4417 | problem is with perltidy, and the problem is not listed in the BUGS file at | ||||
| 4418 | http://perltidy.sourceforge.net, please report it so that it can be corrected. | ||||
| 4419 | Include the smallest possible script which has the problem, along with the .LOG | ||||
| 4420 | file. See the manual pages for contact information. | ||||
| 4421 | Thank you! | ||||
| 4422 | EOM | ||||
| 4423 | } | ||||
| 4424 | else { | ||||
| 4425 | $self->warning(<<EOM); | ||||
| 4426 | |||||
| 4427 | Oops, you seem to have encountered a bug in perltidy. Please check the | ||||
| 4428 | BUGS file at http://perltidy.sourceforge.net. If the problem is not | ||||
| 4429 | listed there, please report it so that it can be corrected. Include the | ||||
| 4430 | smallest possible script which produces this message, along with the | ||||
| 4431 | .LOG file if appropriate. See the manual pages for contact information. | ||||
| 4432 | Your efforts are appreciated. | ||||
| 4433 | Thank you! | ||||
| 4434 | EOM | ||||
| 4435 | my $added_semicolon_count = 0; | ||||
| 4436 | eval { | ||||
| 4437 | $added_semicolon_count = | ||||
| 4438 | $formatter->get_added_semicolon_count(); | ||||
| 4439 | }; | ||||
| 4440 | if ( $added_semicolon_count > 0 ) { | ||||
| 4441 | $self->warning(<<EOM); | ||||
| 4442 | |||||
| 4443 | The log file shows that perltidy added $added_semicolon_count semicolons. | ||||
| 4444 | Please rerun with -nasc to see if that is the cause of the syntax error. Even | ||||
| 4445 | if that is the problem, please report it so that it can be fixed. | ||||
| 4446 | EOM | ||||
| 4447 | |||||
| 4448 | } | ||||
| 4449 | } | ||||
| 4450 | } | ||||
| 4451 | } | ||||
| 4452 | |||||
| 4453 | sub finish { | ||||
| 4454 | |||||
| 4455 | # called after all formatting to summarize errors | ||||
| 4456 | my $self = shift; | ||||
| 4457 | my ( $infile_syntax_ok, $formatter ) = @_; | ||||
| 4458 | |||||
| 4459 | my $rOpts = $self->{_rOpts}; | ||||
| 4460 | my $warning_count = $self->{_warning_count}; | ||||
| 4461 | my $saw_code_bug = $self->{_saw_code_bug}; | ||||
| 4462 | |||||
| 4463 | my $save_logfile = | ||||
| 4464 | ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) | ||||
| 4465 | || $saw_code_bug == 1 | ||||
| 4466 | || $rOpts->{'logfile'}; | ||||
| 4467 | my $log_file = $self->{_log_file}; | ||||
| 4468 | if ($warning_count) { | ||||
| 4469 | if ($save_logfile) { | ||||
| 4470 | $self->block_log_output(); # avoid echoing this to the logfile | ||||
| 4471 | $self->warning( | ||||
| 4472 | "The logfile $log_file may contain useful information\n"); | ||||
| 4473 | $self->unblock_log_output(); | ||||
| 4474 | } | ||||
| 4475 | |||||
| 4476 | if ( $self->{_complaint_count} > 0 ) { | ||||
| 4477 | $self->warning( | ||||
| 4478 | "To see $self->{_complaint_count} non-critical warnings rerun with -w\n" | ||||
| 4479 | ); | ||||
| 4480 | } | ||||
| 4481 | |||||
| 4482 | if ( $self->{_saw_brace_error} | ||||
| 4483 | && ( $self->{_logfile_gap} > 1 || !$save_logfile ) ) | ||||
| 4484 | { | ||||
| 4485 | $self->warning("To save a full .LOG file rerun with -g\n"); | ||||
| 4486 | } | ||||
| 4487 | } | ||||
| 4488 | $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); | ||||
| 4489 | |||||
| 4490 | if ($save_logfile) { | ||||
| 4491 | my $log_file = $self->{_log_file}; | ||||
| 4492 | my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); | ||||
| 4493 | if ($fh) { | ||||
| 4494 | my $routput_array = $self->{_output_array}; | ||||
| 4495 | foreach ( @{$routput_array} ) { $fh->print($_) } | ||||
| 4496 | if ( $log_file ne '-' && !ref $log_file ) { | ||||
| 4497 | eval { $fh->close() }; | ||||
| 4498 | } | ||||
| 4499 | } | ||||
| 4500 | } | ||||
| 4501 | } | ||||
| 4502 | |||||
| 4503 | ##################################################################### | ||||
| 4504 | # | ||||
| 4505 | # The Perl::Tidy::DevNull class supplies a dummy print method | ||||
| 4506 | # | ||||
| 4507 | ##################################################################### | ||||
| 4508 | |||||
| 4509 | package Perl::Tidy::DevNull; | ||||
| 4510 | sub new { return bless {}, $_[0] } | ||||
| 4511 | sub print { return } | ||||
| 4512 | sub close { return } | ||||
| 4513 | |||||
| 4514 | ##################################################################### | ||||
| 4515 | # | ||||
| 4516 | # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html | ||||
| 4517 | # | ||||
| 4518 | ##################################################################### | ||||
| 4519 | |||||
| 4520 | package Perl::Tidy::HtmlWriter; | ||||
| 4521 | |||||
| 4522 | 2 | 38µs | 2 | 80µs | # spent 45µs (10+35) within Perl::Tidy::HtmlWriter::BEGIN@4522 which was called:
# once (10µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4522 # spent 45µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4522
# spent 35µs making 1 call to Exporter::import |
| 4523 | |||||
| 4524 | # class variables | ||||
| 4525 | 1 | 600ns | # spent 110µs (9+101) within Perl::Tidy::HtmlWriter::BEGIN@4525 which was called:
# once (9µs+101µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4535 | ||
| 4526 | %html_color | ||||
| 4527 | %html_bold | ||||
| 4528 | %html_italic | ||||
| 4529 | %token_short_names | ||||
| 4530 | %short_to_long_names | ||||
| 4531 | $rOpts | ||||
| 4532 | $css_filename | ||||
| 4533 | $css_linkname | ||||
| 4534 | $missing_html_entities | ||||
| 4535 | 1 | 765µs | 2 | 211µs | }; # spent 110µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4525
# spent 101µs making 1 call to vars::import |
| 4536 | |||||
| 4537 | # replace unsafe characters with HTML entity representation if HTML::Entities | ||||
| 4538 | # is available | ||||
| 4539 | 3 | 34µs | { eval "use HTML::Entities"; $missing_html_entities = $@; } # spent 125µs executing statements in string eval # includes 3.00ms spent executing 1 call to 1 sub defined therein. | ||
| 4540 | |||||
| 4541 | sub new { | ||||
| 4542 | |||||
| 4543 | my ( $class, $input_file, $html_file, $extension, $html_toc_extension, | ||||
| 4544 | $html_src_extension ) | ||||
| 4545 | = @_; | ||||
| 4546 | |||||
| 4547 | my $html_file_opened = 0; | ||||
| 4548 | my $html_fh; | ||||
| 4549 | ( $html_fh, my $html_filename ) = | ||||
| 4550 | Perl::Tidy::streamhandle( $html_file, 'w' ); | ||||
| 4551 | unless ($html_fh) { | ||||
| 4552 | Perl::Tidy::Warn("can't open $html_file: $!\n"); | ||||
| 4553 | return undef; | ||||
| 4554 | } | ||||
| 4555 | $html_file_opened = 1; | ||||
| 4556 | |||||
| 4557 | if ( !$input_file || $input_file eq '-' || ref($input_file) ) { | ||||
| 4558 | $input_file = "NONAME"; | ||||
| 4559 | } | ||||
| 4560 | |||||
| 4561 | # write the table of contents to a string | ||||
| 4562 | my $toc_string; | ||||
| 4563 | my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); | ||||
| 4564 | |||||
| 4565 | my $html_pre_fh; | ||||
| 4566 | my @pre_string_stack; | ||||
| 4567 | if ( $rOpts->{'html-pre-only'} ) { | ||||
| 4568 | |||||
| 4569 | # pre section goes directly to the output stream | ||||
| 4570 | $html_pre_fh = $html_fh; | ||||
| 4571 | $html_pre_fh->print( <<"PRE_END"); | ||||
| 4572 | <pre> | ||||
| 4573 | PRE_END | ||||
| 4574 | } | ||||
| 4575 | else { | ||||
| 4576 | |||||
| 4577 | # pre section go out to a temporary string | ||||
| 4578 | my $pre_string; | ||||
| 4579 | $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); | ||||
| 4580 | push @pre_string_stack, \$pre_string; | ||||
| 4581 | } | ||||
| 4582 | |||||
| 4583 | # pod text gets diverted if the 'pod2html' is used | ||||
| 4584 | my $html_pod_fh; | ||||
| 4585 | my $pod_string; | ||||
| 4586 | if ( $rOpts->{'pod2html'} ) { | ||||
| 4587 | if ( $rOpts->{'html-pre-only'} ) { | ||||
| 4588 | undef $rOpts->{'pod2html'}; | ||||
| 4589 | } | ||||
| 4590 | else { | ||||
| 4591 | eval "use Pod::Html"; | ||||
| 4592 | if ($@) { | ||||
| 4593 | Perl::Tidy::Warn | ||||
| 4594 | "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; | ||||
| 4595 | undef $rOpts->{'pod2html'}; | ||||
| 4596 | } | ||||
| 4597 | else { | ||||
| 4598 | $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' ); | ||||
| 4599 | } | ||||
| 4600 | } | ||||
| 4601 | } | ||||
| 4602 | |||||
| 4603 | my $toc_filename; | ||||
| 4604 | my $src_filename; | ||||
| 4605 | if ( $rOpts->{'frames'} ) { | ||||
| 4606 | unless ($extension) { | ||||
| 4607 | Perl::Tidy::Warn | ||||
| 4608 | "cannot use frames without a specified output extension; ignoring -frm\n"; | ||||
| 4609 | undef $rOpts->{'frames'}; | ||||
| 4610 | } | ||||
| 4611 | else { | ||||
| 4612 | $toc_filename = $input_file . $html_toc_extension . $extension; | ||||
| 4613 | $src_filename = $input_file . $html_src_extension . $extension; | ||||
| 4614 | } | ||||
| 4615 | } | ||||
| 4616 | |||||
| 4617 | # ---------------------------------------------------------- | ||||
| 4618 | # Output is now directed as follows: | ||||
| 4619 | # html_toc_fh <-- table of contents items | ||||
| 4620 | # html_pre_fh <-- the <pre> section of formatted code, except: | ||||
| 4621 | # html_pod_fh <-- pod goes here with the pod2html option | ||||
| 4622 | # ---------------------------------------------------------- | ||||
| 4623 | |||||
| 4624 | my $title = $rOpts->{'title'}; | ||||
| 4625 | unless ($title) { | ||||
| 4626 | ( $title, my $path ) = fileparse($input_file); | ||||
| 4627 | } | ||||
| 4628 | my $toc_item_count = 0; | ||||
| 4629 | my $in_toc_package = ""; | ||||
| 4630 | my $last_level = 0; | ||||
| 4631 | bless { | ||||
| 4632 | _input_file => $input_file, # name of input file | ||||
| 4633 | _title => $title, # title, unescaped | ||||
| 4634 | _html_file => $html_file, # name of .html output file | ||||
| 4635 | _toc_filename => $toc_filename, # for frames option | ||||
| 4636 | _src_filename => $src_filename, # for frames option | ||||
| 4637 | _html_file_opened => $html_file_opened, # a flag | ||||
| 4638 | _html_fh => $html_fh, # the output stream | ||||
| 4639 | _html_pre_fh => $html_pre_fh, # pre section goes here | ||||
| 4640 | _rpre_string_stack => \@pre_string_stack, # stack of pre sections | ||||
| 4641 | _html_pod_fh => $html_pod_fh, # pod goes here if pod2html | ||||
| 4642 | _rpod_string => \$pod_string, # string holding pod | ||||
| 4643 | _pod_cut_count => 0, # how many =cut's? | ||||
| 4644 | _html_toc_fh => $html_toc_fh, # fh for table of contents | ||||
| 4645 | _rtoc_string => \$toc_string, # string holding toc | ||||
| 4646 | _rtoc_item_count => \$toc_item_count, # how many toc items | ||||
| 4647 | _rin_toc_package => \$in_toc_package, # package name | ||||
| 4648 | _rtoc_name_count => {}, # hash to track unique names | ||||
| 4649 | _rpackage_stack => [], # stack to check for package | ||||
| 4650 | # name changes | ||||
| 4651 | _rlast_level => \$last_level, # brace indentation level | ||||
| 4652 | }, $class; | ||||
| 4653 | } | ||||
| 4654 | |||||
| 4655 | sub add_toc_item { | ||||
| 4656 | |||||
| 4657 | # Add an item to the html table of contents. | ||||
| 4658 | # This is called even if no table of contents is written, | ||||
| 4659 | # because we still want to put the anchors in the <pre> text. | ||||
| 4660 | # We are given an anchor name and its type; types are: | ||||
| 4661 | # 'package', 'sub', '__END__', '__DATA__', 'EOF' | ||||
| 4662 | # There must be an 'EOF' call at the end to wrap things up. | ||||
| 4663 | my $self = shift; | ||||
| 4664 | my ( $name, $type ) = @_; | ||||
| 4665 | my $html_toc_fh = $self->{_html_toc_fh}; | ||||
| 4666 | my $html_pre_fh = $self->{_html_pre_fh}; | ||||
| 4667 | my $rtoc_name_count = $self->{_rtoc_name_count}; | ||||
| 4668 | my $rtoc_item_count = $self->{_rtoc_item_count}; | ||||
| 4669 | my $rlast_level = $self->{_rlast_level}; | ||||
| 4670 | my $rin_toc_package = $self->{_rin_toc_package}; | ||||
| 4671 | my $rpackage_stack = $self->{_rpackage_stack}; | ||||
| 4672 | |||||
| 4673 | # packages contain sublists of subs, so to avoid errors all package | ||||
| 4674 | # items are written and finished with the following routines | ||||
| 4675 | my $end_package_list = sub { | ||||
| 4676 | if ($$rin_toc_package) { | ||||
| 4677 | $html_toc_fh->print("</ul>\n</li>\n"); | ||||
| 4678 | $$rin_toc_package = ""; | ||||
| 4679 | } | ||||
| 4680 | }; | ||||
| 4681 | |||||
| 4682 | my $start_package_list = sub { | ||||
| 4683 | my ( $unique_name, $package ) = @_; | ||||
| 4684 | if ($$rin_toc_package) { $end_package_list->() } | ||||
| 4685 | $html_toc_fh->print(<<EOM); | ||||
| 4686 | <li><a href=\"#$unique_name\">package $package</a> | ||||
| 4687 | <ul> | ||||
| 4688 | EOM | ||||
| 4689 | $$rin_toc_package = $package; | ||||
| 4690 | }; | ||||
| 4691 | |||||
| 4692 | # start the table of contents on the first item | ||||
| 4693 | unless ($$rtoc_item_count) { | ||||
| 4694 | |||||
| 4695 | # but just quit if we hit EOF without any other entries | ||||
| 4696 | # in this case, there will be no toc | ||||
| 4697 | return if ( $type eq 'EOF' ); | ||||
| 4698 | $html_toc_fh->print( <<"TOC_END"); | ||||
| 4699 | <!-- BEGIN CODE INDEX --><a name="code-index"></a> | ||||
| 4700 | <ul> | ||||
| 4701 | TOC_END | ||||
| 4702 | } | ||||
| 4703 | $$rtoc_item_count++; | ||||
| 4704 | |||||
| 4705 | # make a unique anchor name for this location: | ||||
| 4706 | # - packages get a 'package-' prefix | ||||
| 4707 | # - subs use their names | ||||
| 4708 | my $unique_name = $name; | ||||
| 4709 | if ( $type eq 'package' ) { $unique_name = "package-$name" } | ||||
| 4710 | |||||
| 4711 | # append '-1', '-2', etc if necessary to make unique; this will | ||||
| 4712 | # be unique because subs and packages cannot have a '-' | ||||
| 4713 | if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { | ||||
| 4714 | $unique_name .= "-$count"; | ||||
| 4715 | } | ||||
| 4716 | |||||
| 4717 | # - all names get terminal '-' if pod2html is used, to avoid | ||||
| 4718 | # conflicts with anchor names created by pod2html | ||||
| 4719 | if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } | ||||
| 4720 | |||||
| 4721 | # start/stop lists of subs | ||||
| 4722 | if ( $type eq 'sub' ) { | ||||
| 4723 | my $package = $rpackage_stack->[$$rlast_level]; | ||||
| 4724 | unless ($package) { $package = 'main' } | ||||
| 4725 | |||||
| 4726 | # if we're already in a package/sub list, be sure its the right | ||||
| 4727 | # package or else close it | ||||
| 4728 | if ( $$rin_toc_package && $$rin_toc_package ne $package ) { | ||||
| 4729 | $end_package_list->(); | ||||
| 4730 | } | ||||
| 4731 | |||||
| 4732 | # start a package/sub list if necessary | ||||
| 4733 | unless ($$rin_toc_package) { | ||||
| 4734 | $start_package_list->( $unique_name, $package ); | ||||
| 4735 | } | ||||
| 4736 | } | ||||
| 4737 | |||||
| 4738 | # now write an entry in the toc for this item | ||||
| 4739 | if ( $type eq 'package' ) { | ||||
| 4740 | $start_package_list->( $unique_name, $name ); | ||||
| 4741 | } | ||||
| 4742 | elsif ( $type eq 'sub' ) { | ||||
| 4743 | $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); | ||||
| 4744 | } | ||||
| 4745 | else { | ||||
| 4746 | $end_package_list->(); | ||||
| 4747 | $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); | ||||
| 4748 | } | ||||
| 4749 | |||||
| 4750 | # write the anchor in the <pre> section | ||||
| 4751 | $html_pre_fh->print("<a name=\"$unique_name\"></a>"); | ||||
| 4752 | |||||
| 4753 | # end the table of contents, if any, on the end of file | ||||
| 4754 | if ( $type eq 'EOF' ) { | ||||
| 4755 | $html_toc_fh->print( <<"TOC_END"); | ||||
| 4756 | </ul> | ||||
| 4757 | <!-- END CODE INDEX --> | ||||
| 4758 | TOC_END | ||||
| 4759 | } | ||||
| 4760 | } | ||||
| 4761 | |||||
| 4762 | # spent 50µs within Perl::Tidy::HtmlWriter::BEGIN@4762 which was called:
# once (50µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4867 | ||||
| 4763 | |||||
| 4764 | # This is the official list of tokens which may be identified by the | ||||
| 4765 | # user. Long names are used as getopt keys. Short names are | ||||
| 4766 | # convenient short abbreviations for specifying input. Short names | ||||
| 4767 | # somewhat resemble token type characters, but are often different | ||||
| 4768 | # because they may only be alphanumeric, to allow command line | ||||
| 4769 | # input. Also, note that because of case insensitivity of html, | ||||
| 4770 | # this table must be in a single case only (I've chosen to use all | ||||
| 4771 | # lower case). | ||||
| 4772 | # When adding NEW_TOKENS: update this hash table | ||||
| 4773 | # short names => long names | ||||
| 4774 | 1 | 25µs | %short_to_long_names = ( | ||
| 4775 | 'n' => 'numeric', | ||||
| 4776 | 'p' => 'paren', | ||||
| 4777 | 'q' => 'quote', | ||||
| 4778 | 's' => 'structure', | ||||
| 4779 | 'c' => 'comment', | ||||
| 4780 | 'v' => 'v-string', | ||||
| 4781 | 'cm' => 'comma', | ||||
| 4782 | 'w' => 'bareword', | ||||
| 4783 | 'co' => 'colon', | ||||
| 4784 | 'pu' => 'punctuation', | ||||
| 4785 | 'i' => 'identifier', | ||||
| 4786 | 'j' => 'label', | ||||
| 4787 | 'h' => 'here-doc-target', | ||||
| 4788 | 'hh' => 'here-doc-text', | ||||
| 4789 | 'k' => 'keyword', | ||||
| 4790 | 'sc' => 'semicolon', | ||||
| 4791 | 'm' => 'subroutine', | ||||
| 4792 | 'pd' => 'pod-text', | ||||
| 4793 | ); | ||||
| 4794 | |||||
| 4795 | # Now we have to map actual token types into one of the above short | ||||
| 4796 | # names; any token types not mapped will get 'punctuation' | ||||
| 4797 | # properties. | ||||
| 4798 | |||||
| 4799 | # The values of this hash table correspond to the keys of the | ||||
| 4800 | # previous hash table. | ||||
| 4801 | # The keys of this hash table are token types and can be seen | ||||
| 4802 | # by running with --dump-token-types (-dtt). | ||||
| 4803 | |||||
| 4804 | # When adding NEW_TOKENS: update this hash table | ||||
| 4805 | # $type => $short_name | ||||
| 4806 | 1 | 8µs | %token_short_names = ( | ||
| 4807 | '#' => 'c', | ||||
| 4808 | 'n' => 'n', | ||||
| 4809 | 'v' => 'v', | ||||
| 4810 | 'k' => 'k', | ||||
| 4811 | 'F' => 'k', | ||||
| 4812 | 'Q' => 'q', | ||||
| 4813 | 'q' => 'q', | ||||
| 4814 | 'J' => 'j', | ||||
| 4815 | 'j' => 'j', | ||||
| 4816 | 'h' => 'h', | ||||
| 4817 | 'H' => 'hh', | ||||
| 4818 | 'w' => 'w', | ||||
| 4819 | ',' => 'cm', | ||||
| 4820 | '=>' => 'cm', | ||||
| 4821 | ';' => 'sc', | ||||
| 4822 | ':' => 'co', | ||||
| 4823 | 'f' => 'sc', | ||||
| 4824 | '(' => 'p', | ||||
| 4825 | ')' => 'p', | ||||
| 4826 | 'M' => 'm', | ||||
| 4827 | 'P' => 'pd', | ||||
| 4828 | 'A' => 'co', | ||||
| 4829 | ); | ||||
| 4830 | |||||
| 4831 | # These token types will all be called identifiers for now | ||||
| 4832 | # FIXME: could separate user defined modules as separate type | ||||
| 4833 | 1 | 2µs | my @identifier = qw" i t U C Y Z G :: CORE::"; | ||
| 4834 | 1 | 7µs | @token_short_names{@identifier} = ('i') x scalar(@identifier); | ||
| 4835 | |||||
| 4836 | # These token types will be called 'structure' | ||||
| 4837 | 1 | 500ns | my @structure = qw" { } "; | ||
| 4838 | 1 | 9µs | @token_short_names{@structure} = ('s') x scalar(@structure); | ||
| 4839 | |||||
| 4840 | # OLD NOTES: save for reference | ||||
| 4841 | # Any of these could be added later if it would be useful. | ||||
| 4842 | # For now, they will by default become punctuation | ||||
| 4843 | # my @list = qw" L R [ ] "; | ||||
| 4844 | # @token_long_names{@list} = ('non-structure') x scalar(@list); | ||||
| 4845 | # | ||||
| 4846 | # my @list = qw" | ||||
| 4847 | # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm | ||||
| 4848 | # "; | ||||
| 4849 | # @token_long_names{@list} = ('math') x scalar(@list); | ||||
| 4850 | # | ||||
| 4851 | # my @list = qw" & &= ~ ~= ^ ^= | |= "; | ||||
| 4852 | # @token_long_names{@list} = ('bit') x scalar(@list); | ||||
| 4853 | # | ||||
| 4854 | # my @list = qw" == != < > <= <=> "; | ||||
| 4855 | # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); | ||||
| 4856 | # | ||||
| 4857 | # my @list = qw" && || ! &&= ||= //= "; | ||||
| 4858 | # @token_long_names{@list} = ('logical') x scalar(@list); | ||||
| 4859 | # | ||||
| 4860 | # my @list = qw" . .= =~ !~ x x= "; | ||||
| 4861 | # @token_long_names{@list} = ('string-operators') x scalar(@list); | ||||
| 4862 | # | ||||
| 4863 | # # Incomplete.. | ||||
| 4864 | # my @list = qw" .. -> <> ... \ ? "; | ||||
| 4865 | # @token_long_names{@list} = ('misc-operators') x scalar(@list); | ||||
| 4866 | |||||
| 4867 | 1 | 384µs | 1 | 50µs | } # spent 50µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4762 |
| 4868 | |||||
| 4869 | sub make_getopt_long_names { | ||||
| 4870 | my $class = shift; | ||||
| 4871 | my ($rgetopt_names) = @_; | ||||
| 4872 | while ( my ( $short_name, $name ) = each %short_to_long_names ) { | ||||
| 4873 | push @$rgetopt_names, "html-color-$name=s"; | ||||
| 4874 | push @$rgetopt_names, "html-italic-$name!"; | ||||
| 4875 | push @$rgetopt_names, "html-bold-$name!"; | ||||
| 4876 | } | ||||
| 4877 | push @$rgetopt_names, "html-color-background=s"; | ||||
| 4878 | push @$rgetopt_names, "html-linked-style-sheet=s"; | ||||
| 4879 | push @$rgetopt_names, "nohtml-style-sheets"; | ||||
| 4880 | push @$rgetopt_names, "html-pre-only"; | ||||
| 4881 | push @$rgetopt_names, "html-line-numbers"; | ||||
| 4882 | push @$rgetopt_names, "html-entities!"; | ||||
| 4883 | push @$rgetopt_names, "stylesheet"; | ||||
| 4884 | push @$rgetopt_names, "html-table-of-contents!"; | ||||
| 4885 | push @$rgetopt_names, "pod2html!"; | ||||
| 4886 | push @$rgetopt_names, "frames!"; | ||||
| 4887 | push @$rgetopt_names, "html-toc-extension=s"; | ||||
| 4888 | push @$rgetopt_names, "html-src-extension=s"; | ||||
| 4889 | |||||
| 4890 | # Pod::Html parameters: | ||||
| 4891 | push @$rgetopt_names, "backlink=s"; | ||||
| 4892 | push @$rgetopt_names, "cachedir=s"; | ||||
| 4893 | push @$rgetopt_names, "htmlroot=s"; | ||||
| 4894 | push @$rgetopt_names, "libpods=s"; | ||||
| 4895 | push @$rgetopt_names, "podpath=s"; | ||||
| 4896 | push @$rgetopt_names, "podroot=s"; | ||||
| 4897 | push @$rgetopt_names, "title=s"; | ||||
| 4898 | |||||
| 4899 | # Pod::Html parameters with leading 'pod' which will be removed | ||||
| 4900 | # before the call to Pod::Html | ||||
| 4901 | push @$rgetopt_names, "podquiet!"; | ||||
| 4902 | push @$rgetopt_names, "podverbose!"; | ||||
| 4903 | push @$rgetopt_names, "podrecurse!"; | ||||
| 4904 | push @$rgetopt_names, "podflush"; | ||||
| 4905 | push @$rgetopt_names, "podheader!"; | ||||
| 4906 | push @$rgetopt_names, "podindex!"; | ||||
| 4907 | } | ||||
| 4908 | |||||
| 4909 | sub make_abbreviated_names { | ||||
| 4910 | |||||
| 4911 | # We're appending things like this to the expansion list: | ||||
| 4912 | # 'hcc' => [qw(html-color-comment)], | ||||
| 4913 | # 'hck' => [qw(html-color-keyword)], | ||||
| 4914 | # etc | ||||
| 4915 | my $class = shift; | ||||
| 4916 | my ($rexpansion) = @_; | ||||
| 4917 | |||||
| 4918 | # abbreviations for color/bold/italic properties | ||||
| 4919 | while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { | ||||
| 4920 | ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; | ||||
| 4921 | ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; | ||||
| 4922 | ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; | ||||
| 4923 | ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; | ||||
| 4924 | ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; | ||||
| 4925 | } | ||||
| 4926 | |||||
| 4927 | # abbreviations for all other html options | ||||
| 4928 | ${$rexpansion}{"hcbg"} = ["html-color-background"]; | ||||
| 4929 | ${$rexpansion}{"pre"} = ["html-pre-only"]; | ||||
| 4930 | ${$rexpansion}{"toc"} = ["html-table-of-contents"]; | ||||
| 4931 | ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; | ||||
| 4932 | ${$rexpansion}{"nnn"} = ["html-line-numbers"]; | ||||
| 4933 | ${$rexpansion}{"hent"} = ["html-entities"]; | ||||
| 4934 | ${$rexpansion}{"nhent"} = ["nohtml-entities"]; | ||||
| 4935 | ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; | ||||
| 4936 | ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; | ||||
| 4937 | ${$rexpansion}{"ss"} = ["stylesheet"]; | ||||
| 4938 | ${$rexpansion}{"pod"} = ["pod2html"]; | ||||
| 4939 | ${$rexpansion}{"npod"} = ["nopod2html"]; | ||||
| 4940 | ${$rexpansion}{"frm"} = ["frames"]; | ||||
| 4941 | ${$rexpansion}{"nfrm"} = ["noframes"]; | ||||
| 4942 | ${$rexpansion}{"text"} = ["html-toc-extension"]; | ||||
| 4943 | ${$rexpansion}{"sext"} = ["html-src-extension"]; | ||||
| 4944 | } | ||||
| 4945 | |||||
| 4946 | sub check_options { | ||||
| 4947 | |||||
| 4948 | # This will be called once after options have been parsed | ||||
| 4949 | my $class = shift; | ||||
| 4950 | $rOpts = shift; | ||||
| 4951 | |||||
| 4952 | # X11 color names for default settings that seemed to look ok | ||||
| 4953 | # (these color names are only used for programming clarity; the hex | ||||
| 4954 | # numbers are actually written) | ||||
| 4955 | 2 | 24µs | 2 | 79µs | # spent 43µs (8+35) within Perl::Tidy::HtmlWriter::BEGIN@4955 which was called:
# once (8µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4955 # spent 43µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4955
# spent 36µs making 1 call to constant::import |
| 4956 | 2 | 22µs | 2 | 76µs | # spent 42µs (8+34) within Perl::Tidy::HtmlWriter::BEGIN@4956 which was called:
# once (8µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4956 # spent 42µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4956
# spent 34µs making 1 call to constant::import |
| 4957 | 2 | 21µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::HtmlWriter::BEGIN@4957 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4957 # spent 36µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4957
# spent 30µs making 1 call to constant::import |
| 4958 | 2 | 20µs | 2 | 63µs | # spent 35µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4958 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4958 # spent 35µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4958
# spent 28µs making 1 call to constant::import |
| 4959 | 2 | 20µs | 2 | 63µs | # spent 35µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4959 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4959 # spent 35µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4959
# spent 28µs making 1 call to constant::import |
| 4960 | 2 | 24µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4960 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4960 # spent 34µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4960
# spent 28µs making 1 call to constant::import |
| 4961 | 2 | 24µs | 2 | 63µs | # spent 35µs (7+28) within Perl::Tidy::HtmlWriter::BEGIN@4961 which was called:
# once (7µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4961 # spent 35µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4961
# spent 28µs making 1 call to constant::import |
| 4962 | 2 | 20µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::HtmlWriter::BEGIN@4962 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4962 # spent 34µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4962
# spent 28µs making 1 call to constant::import |
| 4963 | 2 | 3.63ms | 2 | 69µs | # spent 38µs (6+31) within Perl::Tidy::HtmlWriter::BEGIN@4963 which was called:
# once (6µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 4963 # spent 38µs making 1 call to Perl::Tidy::HtmlWriter::BEGIN@4963
# spent 31µs making 1 call to constant::import |
| 4964 | |||||
| 4965 | # set default color, bold, italic properties | ||||
| 4966 | # anything not listed here will be given the default (punctuation) color -- | ||||
| 4967 | # these types currently not listed and get default: ws pu s sc cm co p | ||||
| 4968 | # When adding NEW_TOKENS: add an entry here if you don't want defaults | ||||
| 4969 | |||||
| 4970 | # set_default_properties( $short_name, default_color, bold?, italic? ); | ||||
| 4971 | set_default_properties( 'c', ForestGreen, 0, 0 ); | ||||
| 4972 | set_default_properties( 'pd', ForestGreen, 0, 1 ); | ||||
| 4973 | set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown | ||||
| 4974 | set_default_properties( 'q', IndianRed3, 0, 0 ); | ||||
| 4975 | set_default_properties( 'hh', IndianRed3, 0, 1 ); | ||||
| 4976 | set_default_properties( 'h', IndianRed3, 1, 0 ); | ||||
| 4977 | set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); | ||||
| 4978 | set_default_properties( 'w', black, 0, 0 ); | ||||
| 4979 | set_default_properties( 'n', MediumOrchid3, 0, 0 ); | ||||
| 4980 | set_default_properties( 'v', MediumOrchid3, 0, 0 ); | ||||
| 4981 | set_default_properties( 'j', IndianRed3, 1, 0 ); | ||||
| 4982 | set_default_properties( 'm', red, 1, 0 ); | ||||
| 4983 | |||||
| 4984 | set_default_color( 'html-color-background', white ); | ||||
| 4985 | set_default_color( 'html-color-punctuation', black ); | ||||
| 4986 | |||||
| 4987 | # setup property lookup tables for tokens based on their short names | ||||
| 4988 | # every token type has a short name, and will use these tables | ||||
| 4989 | # to do the html markup | ||||
| 4990 | while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { | ||||
| 4991 | $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; | ||||
| 4992 | $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; | ||||
| 4993 | $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; | ||||
| 4994 | } | ||||
| 4995 | |||||
| 4996 | # write style sheet to STDOUT and die if requested | ||||
| 4997 | if ( defined( $rOpts->{'stylesheet'} ) ) { | ||||
| 4998 | write_style_sheet_file('-'); | ||||
| 4999 | Perl::Tidy::Exit 0; | ||||
| 5000 | } | ||||
| 5001 | |||||
| 5002 | # make sure user gives a file name after -css | ||||
| 5003 | if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { | ||||
| 5004 | $css_linkname = $rOpts->{'html-linked-style-sheet'}; | ||||
| 5005 | if ( $css_linkname =~ /^-/ ) { | ||||
| 5006 | Perl::Tidy::Die "You must specify a valid filename after -css\n"; | ||||
| 5007 | } | ||||
| 5008 | } | ||||
| 5009 | |||||
| 5010 | # check for conflict | ||||
| 5011 | if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { | ||||
| 5012 | $rOpts->{'nohtml-style-sheets'} = 0; | ||||
| 5013 | warning("You can't specify both -css and -nss; -nss ignored\n"); | ||||
| 5014 | } | ||||
| 5015 | |||||
| 5016 | # write a style sheet file if necessary | ||||
| 5017 | if ($css_linkname) { | ||||
| 5018 | |||||
| 5019 | # if the selected filename exists, don't write, because user may | ||||
| 5020 | # have done some work by hand to create it; use backup name instead | ||||
| 5021 | # Also, this will avoid a potential disaster in which the user | ||||
| 5022 | # forgets to specify the style sheet, like this: | ||||
| 5023 | # perltidy -html -css myfile1.pl myfile2.pl | ||||
| 5024 | # This would cause myfile1.pl to parsed as the style sheet by GetOpts | ||||
| 5025 | my $css_filename = $css_linkname; | ||||
| 5026 | unless ( -e $css_filename ) { | ||||
| 5027 | write_style_sheet_file($css_filename); | ||||
| 5028 | } | ||||
| 5029 | } | ||||
| 5030 | $missing_html_entities = 1 unless $rOpts->{'html-entities'}; | ||||
| 5031 | } | ||||
| 5032 | |||||
| 5033 | sub write_style_sheet_file { | ||||
| 5034 | |||||
| 5035 | my $css_filename = shift; | ||||
| 5036 | my $fh; | ||||
| 5037 | unless ( $fh = IO::File->new("> $css_filename") ) { | ||||
| 5038 | Perl::Tidy::Die "can't open $css_filename: $!\n"; | ||||
| 5039 | } | ||||
| 5040 | write_style_sheet_data($fh); | ||||
| 5041 | eval { $fh->close }; | ||||
| 5042 | } | ||||
| 5043 | |||||
| 5044 | sub write_style_sheet_data { | ||||
| 5045 | |||||
| 5046 | # write the style sheet data to an open file handle | ||||
| 5047 | my $fh = shift; | ||||
| 5048 | |||||
| 5049 | my $bg_color = $rOpts->{'html-color-background'}; | ||||
| 5050 | my $text_color = $rOpts->{'html-color-punctuation'}; | ||||
| 5051 | |||||
| 5052 | # pre-bgcolor is new, and may not be defined | ||||
| 5053 | my $pre_bg_color = $rOpts->{'html-pre-color-background'}; | ||||
| 5054 | $pre_bg_color = $bg_color unless $pre_bg_color; | ||||
| 5055 | |||||
| 5056 | $fh->print(<<"EOM"); | ||||
| 5057 | /* default style sheet generated by perltidy */ | ||||
| 5058 | body {background: $bg_color; color: $text_color} | ||||
| 5059 | pre { color: $text_color; | ||||
| 5060 | background: $pre_bg_color; | ||||
| 5061 | font-family: courier; | ||||
| 5062 | } | ||||
| 5063 | |||||
| 5064 | EOM | ||||
| 5065 | |||||
| 5066 | foreach my $short_name ( sort keys %short_to_long_names ) { | ||||
| 5067 | my $long_name = $short_to_long_names{$short_name}; | ||||
| 5068 | |||||
| 5069 | my $abbrev = '.' . $short_name; | ||||
| 5070 | if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment | ||||
| 5071 | my $color = $html_color{$short_name}; | ||||
| 5072 | if ( !defined($color) ) { $color = $text_color } | ||||
| 5073 | $fh->print("$abbrev \{ color: $color;"); | ||||
| 5074 | |||||
| 5075 | if ( $html_bold{$short_name} ) { | ||||
| 5076 | $fh->print(" font-weight:bold;"); | ||||
| 5077 | } | ||||
| 5078 | |||||
| 5079 | if ( $html_italic{$short_name} ) { | ||||
| 5080 | $fh->print(" font-style:italic;"); | ||||
| 5081 | } | ||||
| 5082 | $fh->print("} /* $long_name */\n"); | ||||
| 5083 | } | ||||
| 5084 | } | ||||
| 5085 | |||||
| 5086 | sub set_default_color { | ||||
| 5087 | |||||
| 5088 | # make sure that options hash $rOpts->{$key} contains a valid color | ||||
| 5089 | my ( $key, $color ) = @_; | ||||
| 5090 | if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } | ||||
| 5091 | $rOpts->{$key} = check_RGB($color); | ||||
| 5092 | } | ||||
| 5093 | |||||
| 5094 | sub check_RGB { | ||||
| 5095 | |||||
| 5096 | # if color is a 6 digit hex RGB value, prepend a #, otherwise | ||||
| 5097 | # assume that it is a valid ascii color name | ||||
| 5098 | my ($color) = @_; | ||||
| 5099 | if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } | ||||
| 5100 | return $color; | ||||
| 5101 | } | ||||
| 5102 | |||||
| 5103 | sub set_default_properties { | ||||
| 5104 | my ( $short_name, $color, $bold, $italic ) = @_; | ||||
| 5105 | |||||
| 5106 | set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); | ||||
| 5107 | my $key; | ||||
| 5108 | $key = "html-bold-$short_to_long_names{$short_name}"; | ||||
| 5109 | $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; | ||||
| 5110 | $key = "html-italic-$short_to_long_names{$short_name}"; | ||||
| 5111 | $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; | ||||
| 5112 | } | ||||
| 5113 | |||||
| 5114 | sub pod_to_html { | ||||
| 5115 | |||||
| 5116 | # Use Pod::Html to process the pod and make the page | ||||
| 5117 | # then merge the perltidy code sections into it. | ||||
| 5118 | # return 1 if success, 0 otherwise | ||||
| 5119 | my $self = shift; | ||||
| 5120 | my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; | ||||
| 5121 | my $input_file = $self->{_input_file}; | ||||
| 5122 | my $title = $self->{_title}; | ||||
| 5123 | my $success_flag = 0; | ||||
| 5124 | |||||
| 5125 | # don't try to use pod2html if no pod | ||||
| 5126 | unless ($pod_string) { | ||||
| 5127 | return $success_flag; | ||||
| 5128 | } | ||||
| 5129 | |||||
| 5130 | # Pod::Html requires a real temporary filename | ||||
| 5131 | my ( $fh_tmp, $tmpfile ) = tempfile(); | ||||
| 5132 | unless ($fh_tmp) { | ||||
| 5133 | Perl::Tidy::Warn | ||||
| 5134 | "unable to open temporary file $tmpfile; cannot use pod2html\n"; | ||||
| 5135 | return $success_flag; | ||||
| 5136 | } | ||||
| 5137 | |||||
| 5138 | #------------------------------------------------------------------ | ||||
| 5139 | # Warning: a temporary file is open; we have to clean up if | ||||
| 5140 | # things go bad. From here on all returns should be by going to | ||||
| 5141 | # RETURN so that the temporary file gets unlinked. | ||||
| 5142 | #------------------------------------------------------------------ | ||||
| 5143 | |||||
| 5144 | # write the pod text to the temporary file | ||||
| 5145 | $fh_tmp->print($pod_string); | ||||
| 5146 | $fh_tmp->close(); | ||||
| 5147 | |||||
| 5148 | # Hand off the pod to pod2html. | ||||
| 5149 | # Note that we can use the same temporary filename for input and output | ||||
| 5150 | # because of the way pod2html works. | ||||
| 5151 | { | ||||
| 5152 | |||||
| 5153 | my @args; | ||||
| 5154 | push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; | ||||
| 5155 | my $kw; | ||||
| 5156 | |||||
| 5157 | # Flags with string args: | ||||
| 5158 | # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", | ||||
| 5159 | # "podpath=s", "podroot=s" | ||||
| 5160 | # Note: -css=s is handled by perltidy itself | ||||
| 5161 | foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { | ||||
| 5162 | if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } | ||||
| 5163 | } | ||||
| 5164 | |||||
| 5165 | # Toggle switches; these have extra leading 'pod' | ||||
| 5166 | # "header!", "index!", "recurse!", "quiet!", "verbose!" | ||||
| 5167 | foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { | ||||
| 5168 | my $kwd = $kw; # allows us to strip 'pod' | ||||
| 5169 | if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } | ||||
| 5170 | elsif ( defined( $rOpts->{$kw} ) ) { | ||||
| 5171 | $kwd =~ s/^pod//; | ||||
| 5172 | push @args, "--no$kwd"; | ||||
| 5173 | } | ||||
| 5174 | } | ||||
| 5175 | |||||
| 5176 | # "flush", | ||||
| 5177 | $kw = 'podflush'; | ||||
| 5178 | if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } | ||||
| 5179 | |||||
| 5180 | # Must clean up if pod2html dies (it can); | ||||
| 5181 | # Be careful not to overwrite callers __DIE__ routine | ||||
| 5182 | local $SIG{__DIE__} = sub { | ||||
| 5183 | unlink $tmpfile if -e $tmpfile; | ||||
| 5184 | Perl::Tidy::Die $_[0]; | ||||
| 5185 | }; | ||||
| 5186 | |||||
| 5187 | pod2html(@args); | ||||
| 5188 | } | ||||
| 5189 | $fh_tmp = IO::File->new( $tmpfile, 'r' ); | ||||
| 5190 | unless ($fh_tmp) { | ||||
| 5191 | |||||
| 5192 | # this error shouldn't happen ... we just used this filename | ||||
| 5193 | Perl::Tidy::Warn | ||||
| 5194 | "unable to open temporary file $tmpfile; cannot use pod2html\n"; | ||||
| 5195 | goto RETURN; | ||||
| 5196 | } | ||||
| 5197 | |||||
| 5198 | my $html_fh = $self->{_html_fh}; | ||||
| 5199 | my @toc; | ||||
| 5200 | my $in_toc; | ||||
| 5201 | my $ul_level = 0; | ||||
| 5202 | my $no_print; | ||||
| 5203 | |||||
| 5204 | # This routine will write the html selectively and store the toc | ||||
| 5205 | my $html_print = sub { | ||||
| 5206 | foreach (@_) { | ||||
| 5207 | $html_fh->print($_) unless ($no_print); | ||||
| 5208 | if ($in_toc) { push @toc, $_ } | ||||
| 5209 | } | ||||
| 5210 | }; | ||||
| 5211 | |||||
| 5212 | # loop over lines of html output from pod2html and merge in | ||||
| 5213 | # the necessary perltidy html sections | ||||
| 5214 | my ( $saw_body, $saw_index, $saw_body_end ); | ||||
| 5215 | while ( my $line = $fh_tmp->getline() ) { | ||||
| 5216 | |||||
| 5217 | if ( $line =~ /^\s*<html>\s*$/i ) { | ||||
| 5218 | my $date = localtime; | ||||
| 5219 | $html_print->("<!-- Generated by perltidy on $date -->\n"); | ||||
| 5220 | $html_print->($line); | ||||
| 5221 | } | ||||
| 5222 | |||||
| 5223 | # Copy the perltidy css, if any, after <body> tag | ||||
| 5224 | elsif ( $line =~ /^\s*<body.*>\s*$/i ) { | ||||
| 5225 | $saw_body = 1; | ||||
| 5226 | $html_print->($css_string) if $css_string; | ||||
| 5227 | $html_print->($line); | ||||
| 5228 | |||||
| 5229 | # add a top anchor and heading | ||||
| 5230 | $html_print->("<a name=\"-top-\"></a>\n"); | ||||
| 5231 | $title = escape_html($title); | ||||
| 5232 | $html_print->("<h1>$title</h1>\n"); | ||||
| 5233 | } | ||||
| 5234 | |||||
| 5235 | # check for start of index, old pod2html | ||||
| 5236 | # before Pod::Html VERSION 1.15_02 it is delimited by comments as: | ||||
| 5237 | # <!-- INDEX BEGIN --> | ||||
| 5238 | # <ul> | ||||
| 5239 | # ... | ||||
| 5240 | # </ul> | ||||
| 5241 | # <!-- INDEX END --> | ||||
| 5242 | # | ||||
| 5243 | elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) { | ||||
| 5244 | $in_toc = 'INDEX'; | ||||
| 5245 | |||||
| 5246 | # when frames are used, an extra table of contents in the | ||||
| 5247 | # contents panel is confusing, so don't print it | ||||
| 5248 | $no_print = $rOpts->{'frames'} | ||||
| 5249 | || !$rOpts->{'html-table-of-contents'}; | ||||
| 5250 | $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; | ||||
| 5251 | $html_print->($line); | ||||
| 5252 | } | ||||
| 5253 | |||||
| 5254 | # check for start of index, new pod2html | ||||
| 5255 | # After Pod::Html VERSION 1.15_02 it is delimited as: | ||||
| 5256 | # <ul id="index"> | ||||
| 5257 | # ... | ||||
| 5258 | # </ul> | ||||
| 5259 | elsif ( $line =~ /^\s*<ul\s+id="index">/i ) { | ||||
| 5260 | $in_toc = 'UL'; | ||||
| 5261 | $ul_level = 1; | ||||
| 5262 | |||||
| 5263 | # when frames are used, an extra table of contents in the | ||||
| 5264 | # contents panel is confusing, so don't print it | ||||
| 5265 | $no_print = $rOpts->{'frames'} | ||||
| 5266 | || !$rOpts->{'html-table-of-contents'}; | ||||
| 5267 | $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; | ||||
| 5268 | $html_print->($line); | ||||
| 5269 | } | ||||
| 5270 | |||||
| 5271 | # Check for end of index, old pod2html | ||||
| 5272 | elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) { | ||||
| 5273 | $saw_index = 1; | ||||
| 5274 | $html_print->($line); | ||||
| 5275 | |||||
| 5276 | # Copy the perltidy toc, if any, after the Pod::Html toc | ||||
| 5277 | if ($toc_string) { | ||||
| 5278 | $html_print->("<hr />\n") if $rOpts->{'frames'}; | ||||
| 5279 | $html_print->("<h2>Code Index:</h2>\n"); | ||||
| 5280 | my @toc = map { $_ .= "\n" } split /\n/, $toc_string; | ||||
| 5281 | $html_print->(@toc); | ||||
| 5282 | } | ||||
| 5283 | $in_toc = ""; | ||||
| 5284 | $no_print = 0; | ||||
| 5285 | } | ||||
| 5286 | |||||
| 5287 | # must track <ul> depth level for new pod2html | ||||
| 5288 | elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) { | ||||
| 5289 | $ul_level++; | ||||
| 5290 | $html_print->($line); | ||||
| 5291 | } | ||||
| 5292 | |||||
| 5293 | # Check for end of index, for new pod2html | ||||
| 5294 | elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) { | ||||
| 5295 | $ul_level--; | ||||
| 5296 | $html_print->($line); | ||||
| 5297 | |||||
| 5298 | # Copy the perltidy toc, if any, after the Pod::Html toc | ||||
| 5299 | if ( $ul_level <= 0 ) { | ||||
| 5300 | $saw_index = 1; | ||||
| 5301 | if ($toc_string) { | ||||
| 5302 | $html_print->("<hr />\n") if $rOpts->{'frames'}; | ||||
| 5303 | $html_print->("<h2>Code Index:</h2>\n"); | ||||
| 5304 | my @toc = map { $_ .= "\n" } split /\n/, $toc_string; | ||||
| 5305 | $html_print->(@toc); | ||||
| 5306 | } | ||||
| 5307 | $in_toc = ""; | ||||
| 5308 | $ul_level = 0; | ||||
| 5309 | $no_print = 0; | ||||
| 5310 | } | ||||
| 5311 | } | ||||
| 5312 | |||||
| 5313 | # Copy one perltidy section after each marker | ||||
| 5314 | elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) { | ||||
| 5315 | $line = $2; | ||||
| 5316 | $html_print->($1) if $1; | ||||
| 5317 | |||||
| 5318 | # Intermingle code and pod sections if we saw multiple =cut's. | ||||
| 5319 | if ( $self->{_pod_cut_count} > 1 ) { | ||||
| 5320 | my $rpre_string = shift(@$rpre_string_stack); | ||||
| 5321 | if ($$rpre_string) { | ||||
| 5322 | $html_print->('<pre>'); | ||||
| 5323 | $html_print->($$rpre_string); | ||||
| 5324 | $html_print->('</pre>'); | ||||
| 5325 | } | ||||
| 5326 | else { | ||||
| 5327 | |||||
| 5328 | # shouldn't happen: we stored a string before writing | ||||
| 5329 | # each marker. | ||||
| 5330 | Perl::Tidy::Warn | ||||
| 5331 | "Problem merging html stream with pod2html; order may be wrong\n"; | ||||
| 5332 | } | ||||
| 5333 | $html_print->($line); | ||||
| 5334 | } | ||||
| 5335 | |||||
| 5336 | # If didn't see multiple =cut lines, we'll put the pod out first | ||||
| 5337 | # and then the code, because it's less confusing. | ||||
| 5338 | else { | ||||
| 5339 | |||||
| 5340 | # since we are not intermixing code and pod, we don't need | ||||
| 5341 | # or want any <hr> lines which separated pod and code | ||||
| 5342 | $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i ); | ||||
| 5343 | } | ||||
| 5344 | } | ||||
| 5345 | |||||
| 5346 | # Copy any remaining code section before the </body> tag | ||||
| 5347 | elsif ( $line =~ /^\s*<\/body>\s*$/i ) { | ||||
| 5348 | $saw_body_end = 1; | ||||
| 5349 | if (@$rpre_string_stack) { | ||||
| 5350 | unless ( $self->{_pod_cut_count} > 1 ) { | ||||
| 5351 | $html_print->('<hr />'); | ||||
| 5352 | } | ||||
| 5353 | while ( my $rpre_string = shift(@$rpre_string_stack) ) { | ||||
| 5354 | $html_print->('<pre>'); | ||||
| 5355 | $html_print->($$rpre_string); | ||||
| 5356 | $html_print->('</pre>'); | ||||
| 5357 | } | ||||
| 5358 | } | ||||
| 5359 | $html_print->($line); | ||||
| 5360 | } | ||||
| 5361 | else { | ||||
| 5362 | $html_print->($line); | ||||
| 5363 | } | ||||
| 5364 | } | ||||
| 5365 | |||||
| 5366 | $success_flag = 1; | ||||
| 5367 | unless ($saw_body) { | ||||
| 5368 | Perl::Tidy::Warn "Did not see <body> in pod2html output\n"; | ||||
| 5369 | $success_flag = 0; | ||||
| 5370 | } | ||||
| 5371 | unless ($saw_body_end) { | ||||
| 5372 | Perl::Tidy::Warn "Did not see </body> in pod2html output\n"; | ||||
| 5373 | $success_flag = 0; | ||||
| 5374 | } | ||||
| 5375 | unless ($saw_index) { | ||||
| 5376 | Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n"; | ||||
| 5377 | $success_flag = 0; | ||||
| 5378 | } | ||||
| 5379 | |||||
| 5380 | RETURN: | ||||
| 5381 | eval { $html_fh->close() }; | ||||
| 5382 | |||||
| 5383 | # note that we have to unlink tmpfile before making frames | ||||
| 5384 | # because the tmpfile may be one of the names used for frames | ||||
| 5385 | unlink $tmpfile if -e $tmpfile; | ||||
| 5386 | if ( $success_flag && $rOpts->{'frames'} ) { | ||||
| 5387 | $self->make_frame( \@toc ); | ||||
| 5388 | } | ||||
| 5389 | return $success_flag; | ||||
| 5390 | } | ||||
| 5391 | |||||
| 5392 | sub make_frame { | ||||
| 5393 | |||||
| 5394 | # Make a frame with table of contents in the left panel | ||||
| 5395 | # and the text in the right panel. | ||||
| 5396 | # On entry: | ||||
| 5397 | # $html_filename contains the no-frames html output | ||||
| 5398 | # $rtoc is a reference to an array with the table of contents | ||||
| 5399 | my $self = shift; | ||||
| 5400 | my ($rtoc) = @_; | ||||
| 5401 | my $input_file = $self->{_input_file}; | ||||
| 5402 | my $html_filename = $self->{_html_file}; | ||||
| 5403 | my $toc_filename = $self->{_toc_filename}; | ||||
| 5404 | my $src_filename = $self->{_src_filename}; | ||||
| 5405 | my $title = $self->{_title}; | ||||
| 5406 | $title = escape_html($title); | ||||
| 5407 | |||||
| 5408 | # FUTURE input parameter: | ||||
| 5409 | my $top_basename = ""; | ||||
| 5410 | |||||
| 5411 | # We need to produce 3 html files: | ||||
| 5412 | # 1. - the table of contents | ||||
| 5413 | # 2. - the contents (source code) itself | ||||
| 5414 | # 3. - the frame which contains them | ||||
| 5415 | |||||
| 5416 | # get basenames for relative links | ||||
| 5417 | my ( $toc_basename, $toc_path ) = fileparse($toc_filename); | ||||
| 5418 | my ( $src_basename, $src_path ) = fileparse($src_filename); | ||||
| 5419 | |||||
| 5420 | # 1. Make the table of contents panel, with appropriate changes | ||||
| 5421 | # to the anchor names | ||||
| 5422 | my $src_frame_name = 'SRC'; | ||||
| 5423 | my $first_anchor = | ||||
| 5424 | write_toc_html( $title, $toc_filename, $src_basename, $rtoc, | ||||
| 5425 | $src_frame_name ); | ||||
| 5426 | |||||
| 5427 | # 2. The current .html filename is renamed to be the contents panel | ||||
| 5428 | rename( $html_filename, $src_filename ) | ||||
| 5429 | or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n"; | ||||
| 5430 | |||||
| 5431 | # 3. Then use the original html filename for the frame | ||||
| 5432 | write_frame_html( | ||||
| 5433 | $title, $html_filename, $top_basename, | ||||
| 5434 | $toc_basename, $src_basename, $src_frame_name | ||||
| 5435 | ); | ||||
| 5436 | } | ||||
| 5437 | |||||
| 5438 | sub write_toc_html { | ||||
| 5439 | |||||
| 5440 | # write a separate html table of contents file for frames | ||||
| 5441 | my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; | ||||
| 5442 | my $fh = IO::File->new( $toc_filename, 'w' ) | ||||
| 5443 | or Perl::Tidy::Die "Cannot open $toc_filename:$!\n"; | ||||
| 5444 | $fh->print(<<EOM); | ||||
| 5445 | <html> | ||||
| 5446 | <head> | ||||
| 5447 | <title>$title</title> | ||||
| 5448 | </head> | ||||
| 5449 | <body> | ||||
| 5450 | <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1> | ||||
| 5451 | EOM | ||||
| 5452 | |||||
| 5453 | my $first_anchor = | ||||
| 5454 | change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); | ||||
| 5455 | $fh->print( join "", @$rtoc ); | ||||
| 5456 | |||||
| 5457 | $fh->print(<<EOM); | ||||
| 5458 | </body> | ||||
| 5459 | </html> | ||||
| 5460 | EOM | ||||
| 5461 | |||||
| 5462 | } | ||||
| 5463 | |||||
| 5464 | sub write_frame_html { | ||||
| 5465 | |||||
| 5466 | # write an html file to be the table of contents frame | ||||
| 5467 | my ( | ||||
| 5468 | $title, $frame_filename, $top_basename, | ||||
| 5469 | $toc_basename, $src_basename, $src_frame_name | ||||
| 5470 | ) = @_; | ||||
| 5471 | |||||
| 5472 | my $fh = IO::File->new( $frame_filename, 'w' ) | ||||
| 5473 | or Perl::Tidy::Die "Cannot open $toc_basename:$!\n"; | ||||
| 5474 | |||||
| 5475 | $fh->print(<<EOM); | ||||
| 5476 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" | ||||
| 5477 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | ||||
| 5478 | <?xml version="1.0" encoding="iso-8859-1" ?> | ||||
| 5479 | <html xmlns="http://www.w3.org/1999/xhtml"> | ||||
| 5480 | <head> | ||||
| 5481 | <title>$title</title> | ||||
| 5482 | </head> | ||||
| 5483 | EOM | ||||
| 5484 | |||||
| 5485 | # two left panels, one right, if master index file | ||||
| 5486 | if ($top_basename) { | ||||
| 5487 | $fh->print(<<EOM); | ||||
| 5488 | <frameset cols="20%,80%"> | ||||
| 5489 | <frameset rows="30%,70%"> | ||||
| 5490 | <frame src = "$top_basename" /> | ||||
| 5491 | <frame src = "$toc_basename" /> | ||||
| 5492 | </frameset> | ||||
| 5493 | EOM | ||||
| 5494 | } | ||||
| 5495 | |||||
| 5496 | # one left panels, one right, if no master index file | ||||
| 5497 | else { | ||||
| 5498 | $fh->print(<<EOM); | ||||
| 5499 | <frameset cols="20%,*"> | ||||
| 5500 | <frame src = "$toc_basename" /> | ||||
| 5501 | EOM | ||||
| 5502 | } | ||||
| 5503 | $fh->print(<<EOM); | ||||
| 5504 | <frame src = "$src_basename" name = "$src_frame_name" /> | ||||
| 5505 | <noframes> | ||||
| 5506 | <body> | ||||
| 5507 | <p>If you see this message, you are using a non-frame-capable web client.</p> | ||||
| 5508 | <p>This document contains:</p> | ||||
| 5509 | <ul> | ||||
| 5510 | <li><a href="$toc_basename">A table of contents</a></li> | ||||
| 5511 | <li><a href="$src_basename">The source code</a></li> | ||||
| 5512 | </ul> | ||||
| 5513 | </body> | ||||
| 5514 | </noframes> | ||||
| 5515 | </frameset> | ||||
| 5516 | </html> | ||||
| 5517 | EOM | ||||
| 5518 | } | ||||
| 5519 | |||||
| 5520 | sub change_anchor_names { | ||||
| 5521 | |||||
| 5522 | # add a filename and target to anchors | ||||
| 5523 | # also return the first anchor | ||||
| 5524 | my ( $rlines, $filename, $target ) = @_; | ||||
| 5525 | my $first_anchor; | ||||
| 5526 | foreach my $line (@$rlines) { | ||||
| 5527 | |||||
| 5528 | # We're looking for lines like this: | ||||
| 5529 | # <LI><A HREF="#synopsis">SYNOPSIS</A></LI> | ||||
| 5530 | # ---- - -------- ----------------- | ||||
| 5531 | # $1 $4 $5 | ||||
| 5532 | if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) { | ||||
| 5533 | my $pre = $1; | ||||
| 5534 | my $name = $4; | ||||
| 5535 | my $post = $5; | ||||
| 5536 | my $href = "$filename#$name"; | ||||
| 5537 | $line = "$pre<a href=\"$href\" target=\"$target\">$post\n"; | ||||
| 5538 | unless ($first_anchor) { $first_anchor = $href } | ||||
| 5539 | } | ||||
| 5540 | } | ||||
| 5541 | return $first_anchor; | ||||
| 5542 | } | ||||
| 5543 | |||||
| 5544 | sub close_html_file { | ||||
| 5545 | my $self = shift; | ||||
| 5546 | return unless $self->{_html_file_opened}; | ||||
| 5547 | |||||
| 5548 | my $html_fh = $self->{_html_fh}; | ||||
| 5549 | my $rtoc_string = $self->{_rtoc_string}; | ||||
| 5550 | |||||
| 5551 | # There are 3 basic paths to html output... | ||||
| 5552 | |||||
| 5553 | # --------------------------------- | ||||
| 5554 | # Path 1: finish up if in -pre mode | ||||
| 5555 | # --------------------------------- | ||||
| 5556 | if ( $rOpts->{'html-pre-only'} ) { | ||||
| 5557 | $html_fh->print( <<"PRE_END"); | ||||
| 5558 | </pre> | ||||
| 5559 | PRE_END | ||||
| 5560 | eval { $html_fh->close() }; | ||||
| 5561 | return; | ||||
| 5562 | } | ||||
| 5563 | |||||
| 5564 | # Finish the index | ||||
| 5565 | $self->add_toc_item( 'EOF', 'EOF' ); | ||||
| 5566 | |||||
| 5567 | my $rpre_string_stack = $self->{_rpre_string_stack}; | ||||
| 5568 | |||||
| 5569 | # Patch to darken the <pre> background color in case of pod2html and | ||||
| 5570 | # interleaved code/documentation. Otherwise, the distinction | ||||
| 5571 | # between code and documentation is blurred. | ||||
| 5572 | if ( $rOpts->{pod2html} | ||||
| 5573 | && $self->{_pod_cut_count} >= 1 | ||||
| 5574 | && $rOpts->{'html-color-background'} eq '#FFFFFF' ) | ||||
| 5575 | { | ||||
| 5576 | $rOpts->{'html-pre-color-background'} = '#F0F0F0'; | ||||
| 5577 | } | ||||
| 5578 | |||||
| 5579 | # put the css or its link into a string, if used | ||||
| 5580 | my $css_string; | ||||
| 5581 | my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' ); | ||||
| 5582 | |||||
| 5583 | # use css linked to another file | ||||
| 5584 | if ( $rOpts->{'html-linked-style-sheet'} ) { | ||||
| 5585 | $fh_css->print( | ||||
| 5586 | qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />) | ||||
| 5587 | ); | ||||
| 5588 | } | ||||
| 5589 | |||||
| 5590 | # use css embedded in this file | ||||
| 5591 | elsif ( !$rOpts->{'nohtml-style-sheets'} ) { | ||||
| 5592 | $fh_css->print( <<'ENDCSS'); | ||||
| 5593 | <style type="text/css"> | ||||
| 5594 | <!-- | ||||
| 5595 | ENDCSS | ||||
| 5596 | write_style_sheet_data($fh_css); | ||||
| 5597 | $fh_css->print( <<"ENDCSS"); | ||||
| 5598 | --> | ||||
| 5599 | </style> | ||||
| 5600 | ENDCSS | ||||
| 5601 | } | ||||
| 5602 | |||||
| 5603 | # ----------------------------------------------------------- | ||||
| 5604 | # path 2: use pod2html if requested | ||||
| 5605 | # If we fail for some reason, continue on to path 3 | ||||
| 5606 | # ----------------------------------------------------------- | ||||
| 5607 | if ( $rOpts->{'pod2html'} ) { | ||||
| 5608 | my $rpod_string = $self->{_rpod_string}; | ||||
| 5609 | $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string, | ||||
| 5610 | $rpre_string_stack ) | ||||
| 5611 | && return; | ||||
| 5612 | } | ||||
| 5613 | |||||
| 5614 | # -------------------------------------------------- | ||||
| 5615 | # path 3: write code in html, with pod only in italics | ||||
| 5616 | # -------------------------------------------------- | ||||
| 5617 | my $input_file = $self->{_input_file}; | ||||
| 5618 | my $title = escape_html($input_file); | ||||
| 5619 | my $date = localtime; | ||||
| 5620 | $html_fh->print( <<"HTML_START"); | ||||
| 5621 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | ||||
| 5622 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | ||||
| 5623 | <!-- Generated by perltidy on $date --> | ||||
| 5624 | <html xmlns="http://www.w3.org/1999/xhtml"> | ||||
| 5625 | <head> | ||||
| 5626 | <title>$title</title> | ||||
| 5627 | HTML_START | ||||
| 5628 | |||||
| 5629 | # output the css, if used | ||||
| 5630 | if ($css_string) { | ||||
| 5631 | $html_fh->print($css_string); | ||||
| 5632 | $html_fh->print( <<"ENDCSS"); | ||||
| 5633 | </head> | ||||
| 5634 | <body> | ||||
| 5635 | ENDCSS | ||||
| 5636 | } | ||||
| 5637 | else { | ||||
| 5638 | |||||
| 5639 | $html_fh->print( <<"HTML_START"); | ||||
| 5640 | </head> | ||||
| 5641 | <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\"> | ||||
| 5642 | HTML_START | ||||
| 5643 | } | ||||
| 5644 | |||||
| 5645 | $html_fh->print("<a name=\"-top-\"></a>\n"); | ||||
| 5646 | $html_fh->print( <<"EOM"); | ||||
| 5647 | <h1>$title</h1> | ||||
| 5648 | EOM | ||||
| 5649 | |||||
| 5650 | # copy the table of contents | ||||
| 5651 | if ( $$rtoc_string | ||||
| 5652 | && !$rOpts->{'frames'} | ||||
| 5653 | && $rOpts->{'html-table-of-contents'} ) | ||||
| 5654 | { | ||||
| 5655 | $html_fh->print($$rtoc_string); | ||||
| 5656 | } | ||||
| 5657 | |||||
| 5658 | # copy the pre section(s) | ||||
| 5659 | my $fname_comment = $input_file; | ||||
| 5660 | $fname_comment =~ s/--+/-/g; # protect HTML comment tags | ||||
| 5661 | $html_fh->print( <<"END_PRE"); | ||||
| 5662 | <hr /> | ||||
| 5663 | <!-- contents of filename: $fname_comment --> | ||||
| 5664 | <pre> | ||||
| 5665 | END_PRE | ||||
| 5666 | |||||
| 5667 | foreach my $rpre_string (@$rpre_string_stack) { | ||||
| 5668 | $html_fh->print($$rpre_string); | ||||
| 5669 | } | ||||
| 5670 | |||||
| 5671 | # and finish the html page | ||||
| 5672 | $html_fh->print( <<"HTML_END"); | ||||
| 5673 | </pre> | ||||
| 5674 | </body> | ||||
| 5675 | </html> | ||||
| 5676 | HTML_END | ||||
| 5677 | eval { $html_fh->close() }; # could be object without close method | ||||
| 5678 | |||||
| 5679 | if ( $rOpts->{'frames'} ) { | ||||
| 5680 | my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; | ||||
| 5681 | $self->make_frame( \@toc ); | ||||
| 5682 | } | ||||
| 5683 | } | ||||
| 5684 | |||||
| 5685 | sub markup_tokens { | ||||
| 5686 | my $self = shift; | ||||
| 5687 | my ( $rtokens, $rtoken_type, $rlevels ) = @_; | ||||
| 5688 | my ( @colored_tokens, $j, $string, $type, $token, $level ); | ||||
| 5689 | my $rlast_level = $self->{_rlast_level}; | ||||
| 5690 | my $rpackage_stack = $self->{_rpackage_stack}; | ||||
| 5691 | |||||
| 5692 | for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { | ||||
| 5693 | $type = $$rtoken_type[$j]; | ||||
| 5694 | $token = $$rtokens[$j]; | ||||
| 5695 | $level = $$rlevels[$j]; | ||||
| 5696 | $level = 0 if ( $level < 0 ); | ||||
| 5697 | |||||
| 5698 | #------------------------------------------------------- | ||||
| 5699 | # Update the package stack. The package stack is needed to keep | ||||
| 5700 | # the toc correct because some packages may be declared within | ||||
| 5701 | # blocks and go out of scope when we leave the block. | ||||
| 5702 | #------------------------------------------------------- | ||||
| 5703 | if ( $level > $$rlast_level ) { | ||||
| 5704 | unless ( $rpackage_stack->[ $level - 1 ] ) { | ||||
| 5705 | $rpackage_stack->[ $level - 1 ] = 'main'; | ||||
| 5706 | } | ||||
| 5707 | $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; | ||||
| 5708 | } | ||||
| 5709 | elsif ( $level < $$rlast_level ) { | ||||
| 5710 | my $package = $rpackage_stack->[$level]; | ||||
| 5711 | unless ($package) { $package = 'main' } | ||||
| 5712 | |||||
| 5713 | # if we change packages due to a nesting change, we | ||||
| 5714 | # have to make an entry in the toc | ||||
| 5715 | if ( $package ne $rpackage_stack->[ $level + 1 ] ) { | ||||
| 5716 | $self->add_toc_item( $package, 'package' ); | ||||
| 5717 | } | ||||
| 5718 | } | ||||
| 5719 | $$rlast_level = $level; | ||||
| 5720 | |||||
| 5721 | #------------------------------------------------------- | ||||
| 5722 | # Intercept a sub name here; split it | ||||
| 5723 | # into keyword 'sub' and sub name; and add an | ||||
| 5724 | # entry in the toc | ||||
| 5725 | #------------------------------------------------------- | ||||
| 5726 | if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { | ||||
| 5727 | $token = $self->markup_html_element( $1, 'k' ); | ||||
| 5728 | push @colored_tokens, $token; | ||||
| 5729 | $token = $2; | ||||
| 5730 | $type = 'M'; | ||||
| 5731 | |||||
| 5732 | # but don't include sub declarations in the toc; | ||||
| 5733 | # these wlll have leading token types 'i;' | ||||
| 5734 | my $signature = join "", @$rtoken_type; | ||||
| 5735 | unless ( $signature =~ /^i;/ ) { | ||||
| 5736 | my $subname = $token; | ||||
| 5737 | $subname =~ s/[\s\(].*$//; # remove any attributes and prototype | ||||
| 5738 | $self->add_toc_item( $subname, 'sub' ); | ||||
| 5739 | } | ||||
| 5740 | } | ||||
| 5741 | |||||
| 5742 | #------------------------------------------------------- | ||||
| 5743 | # Intercept a package name here; split it | ||||
| 5744 | # into keyword 'package' and name; add to the toc, | ||||
| 5745 | # and update the package stack | ||||
| 5746 | #------------------------------------------------------- | ||||
| 5747 | if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { | ||||
| 5748 | $token = $self->markup_html_element( $1, 'k' ); | ||||
| 5749 | push @colored_tokens, $token; | ||||
| 5750 | $token = $2; | ||||
| 5751 | $type = 'i'; | ||||
| 5752 | $self->add_toc_item( "$token", 'package' ); | ||||
| 5753 | $rpackage_stack->[$level] = $token; | ||||
| 5754 | } | ||||
| 5755 | |||||
| 5756 | $token = $self->markup_html_element( $token, $type ); | ||||
| 5757 | push @colored_tokens, $token; | ||||
| 5758 | } | ||||
| 5759 | return ( \@colored_tokens ); | ||||
| 5760 | } | ||||
| 5761 | |||||
| 5762 | sub markup_html_element { | ||||
| 5763 | my $self = shift; | ||||
| 5764 | my ( $token, $type ) = @_; | ||||
| 5765 | |||||
| 5766 | return $token if ( $type eq 'b' ); # skip a blank token | ||||
| 5767 | return $token if ( $token =~ /^\s*$/ ); # skip a blank line | ||||
| 5768 | $token = escape_html($token); | ||||
| 5769 | |||||
| 5770 | # get the short abbreviation for this token type | ||||
| 5771 | my $short_name = $token_short_names{$type}; | ||||
| 5772 | if ( !defined($short_name) ) { | ||||
| 5773 | $short_name = "pu"; # punctuation is default | ||||
| 5774 | } | ||||
| 5775 | |||||
| 5776 | # handle style sheets.. | ||||
| 5777 | if ( !$rOpts->{'nohtml-style-sheets'} ) { | ||||
| 5778 | if ( $short_name ne 'pu' ) { | ||||
| 5779 | $token = qq(<span class="$short_name">) . $token . "</span>"; | ||||
| 5780 | } | ||||
| 5781 | } | ||||
| 5782 | |||||
| 5783 | # handle no style sheets.. | ||||
| 5784 | else { | ||||
| 5785 | my $color = $html_color{$short_name}; | ||||
| 5786 | |||||
| 5787 | if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { | ||||
| 5788 | $token = qq(<font color="$color">) . $token . "</font>"; | ||||
| 5789 | } | ||||
| 5790 | if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" } | ||||
| 5791 | if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" } | ||||
| 5792 | } | ||||
| 5793 | return $token; | ||||
| 5794 | } | ||||
| 5795 | |||||
| 5796 | sub escape_html { | ||||
| 5797 | |||||
| 5798 | my $token = shift; | ||||
| 5799 | if ($missing_html_entities) { | ||||
| 5800 | $token =~ s/\&/&/g; | ||||
| 5801 | $token =~ s/\</</g; | ||||
| 5802 | $token =~ s/\>/>/g; | ||||
| 5803 | $token =~ s/\"/"/g; | ||||
| 5804 | } | ||||
| 5805 | else { | ||||
| 5806 | HTML::Entities::encode_entities($token); | ||||
| 5807 | } | ||||
| 5808 | return $token; | ||||
| 5809 | } | ||||
| 5810 | |||||
| 5811 | sub finish_formatting { | ||||
| 5812 | |||||
| 5813 | # called after last line | ||||
| 5814 | my $self = shift; | ||||
| 5815 | $self->close_html_file(); | ||||
| 5816 | return; | ||||
| 5817 | } | ||||
| 5818 | |||||
| 5819 | sub write_line { | ||||
| 5820 | |||||
| 5821 | my $self = shift; | ||||
| 5822 | return unless $self->{_html_file_opened}; | ||||
| 5823 | my $html_pre_fh = $self->{_html_pre_fh}; | ||||
| 5824 | my ($line_of_tokens) = @_; | ||||
| 5825 | my $line_type = $line_of_tokens->{_line_type}; | ||||
| 5826 | my $input_line = $line_of_tokens->{_line_text}; | ||||
| 5827 | my $line_number = $line_of_tokens->{_line_number}; | ||||
| 5828 | chomp $input_line; | ||||
| 5829 | |||||
| 5830 | # markup line of code.. | ||||
| 5831 | my $html_line; | ||||
| 5832 | if ( $line_type eq 'CODE' ) { | ||||
| 5833 | my $rtoken_type = $line_of_tokens->{_rtoken_type}; | ||||
| 5834 | my $rtokens = $line_of_tokens->{_rtokens}; | ||||
| 5835 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
| 5836 | |||||
| 5837 | if ( $input_line =~ /(^\s*)/ ) { | ||||
| 5838 | $html_line = $1; | ||||
| 5839 | } | ||||
| 5840 | else { | ||||
| 5841 | $html_line = ""; | ||||
| 5842 | } | ||||
| 5843 | my ($rcolored_tokens) = | ||||
| 5844 | $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); | ||||
| 5845 | $html_line .= join '', @$rcolored_tokens; | ||||
| 5846 | } | ||||
| 5847 | |||||
| 5848 | # markup line of non-code.. | ||||
| 5849 | else { | ||||
| 5850 | my $line_character; | ||||
| 5851 | if ( $line_type eq 'HERE' ) { $line_character = 'H' } | ||||
| 5852 | elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } | ||||
| 5853 | elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } | ||||
| 5854 | elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } | ||||
| 5855 | elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } | ||||
| 5856 | elsif ( $line_type eq 'END_START' ) { | ||||
| 5857 | $line_character = 'k'; | ||||
| 5858 | $self->add_toc_item( '__END__', '__END__' ); | ||||
| 5859 | } | ||||
| 5860 | elsif ( $line_type eq 'DATA_START' ) { | ||||
| 5861 | $line_character = 'k'; | ||||
| 5862 | $self->add_toc_item( '__DATA__', '__DATA__' ); | ||||
| 5863 | } | ||||
| 5864 | elsif ( $line_type =~ /^POD/ ) { | ||||
| 5865 | $line_character = 'P'; | ||||
| 5866 | if ( $rOpts->{'pod2html'} ) { | ||||
| 5867 | my $html_pod_fh = $self->{_html_pod_fh}; | ||||
| 5868 | if ( $line_type eq 'POD_START' ) { | ||||
| 5869 | |||||
| 5870 | my $rpre_string_stack = $self->{_rpre_string_stack}; | ||||
| 5871 | my $rpre_string = $rpre_string_stack->[-1]; | ||||
| 5872 | |||||
| 5873 | # if we have written any non-blank lines to the | ||||
| 5874 | # current pre section, start writing to a new output | ||||
| 5875 | # string | ||||
| 5876 | if ( $$rpre_string =~ /\S/ ) { | ||||
| 5877 | my $pre_string; | ||||
| 5878 | $html_pre_fh = | ||||
| 5879 | Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); | ||||
| 5880 | $self->{_html_pre_fh} = $html_pre_fh; | ||||
| 5881 | push @$rpre_string_stack, \$pre_string; | ||||
| 5882 | |||||
| 5883 | # leave a marker in the pod stream so we know | ||||
| 5884 | # where to put the pre section we just | ||||
| 5885 | # finished. | ||||
| 5886 | my $for_html = '=for html'; # don't confuse pod utils | ||||
| 5887 | $html_pod_fh->print(<<EOM); | ||||
| 5888 | |||||
| 5889 | $for_html | ||||
| 5890 | <!-- pERLTIDY sECTION --> | ||||
| 5891 | |||||
| 5892 | EOM | ||||
| 5893 | } | ||||
| 5894 | |||||
| 5895 | # otherwise, just clear the current string and start | ||||
| 5896 | # over | ||||
| 5897 | else { | ||||
| 5898 | $$rpre_string = ""; | ||||
| 5899 | $html_pod_fh->print("\n"); | ||||
| 5900 | } | ||||
| 5901 | } | ||||
| 5902 | $html_pod_fh->print( $input_line . "\n" ); | ||||
| 5903 | if ( $line_type eq 'POD_END' ) { | ||||
| 5904 | $self->{_pod_cut_count}++; | ||||
| 5905 | $html_pod_fh->print("\n"); | ||||
| 5906 | } | ||||
| 5907 | return; | ||||
| 5908 | } | ||||
| 5909 | } | ||||
| 5910 | else { $line_character = 'Q' } | ||||
| 5911 | $html_line = $self->markup_html_element( $input_line, $line_character ); | ||||
| 5912 | } | ||||
| 5913 | |||||
| 5914 | # add the line number if requested | ||||
| 5915 | if ( $rOpts->{'html-line-numbers'} ) { | ||||
| 5916 | my $extra_space .= | ||||
| 5917 | ( $line_number < 10 ) ? " " | ||||
| 5918 | : ( $line_number < 100 ) ? " " | ||||
| 5919 | : ( $line_number < 1000 ) ? " " | ||||
| 5920 | : ""; | ||||
| 5921 | $html_line = $extra_space . $line_number . " " . $html_line; | ||||
| 5922 | } | ||||
| 5923 | |||||
| 5924 | # write the line | ||||
| 5925 | $html_pre_fh->print("$html_line\n"); | ||||
| 5926 | } | ||||
| 5927 | |||||
| 5928 | ##################################################################### | ||||
| 5929 | # | ||||
| 5930 | # The Perl::Tidy::Formatter package adds indentation, whitespace, and | ||||
| 5931 | # line breaks to the token stream | ||||
| 5932 | # | ||||
| 5933 | # WARNING: This is not a real class for speed reasons. Only one | ||||
| 5934 | # Formatter may be used. | ||||
| 5935 | # | ||||
| 5936 | ##################################################################### | ||||
| 5937 | |||||
| 5938 | package Perl::Tidy::Formatter; | ||||
| 5939 | |||||
| 5940 | # spent 8µs within Perl::Tidy::Formatter::BEGIN@5940 which was called:
# once (8µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5977 | ||||
| 5941 | |||||
| 5942 | # Caution: these debug flags produce a lot of output | ||||
| 5943 | # They should all be 0 except when debugging small scripts | ||||
| 5944 | 2 | 26µs | 2 | 93µs | # spent 51µs (10+42) within Perl::Tidy::Formatter::BEGIN@5944 which was called:
# once (10µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5944 # spent 51µs making 1 call to Perl::Tidy::Formatter::BEGIN@5944
# spent 42µs making 1 call to constant::import |
| 5945 | 2 | 29µs | 2 | 78µs | # spent 43µs (8+35) within Perl::Tidy::Formatter::BEGIN@5945 which was called:
# once (8µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5945 # spent 43µs making 1 call to Perl::Tidy::Formatter::BEGIN@5945
# spent 35µs making 1 call to constant::import |
| 5946 | 2 | 23µs | 2 | 77µs | # spent 42µs (8+34) within Perl::Tidy::Formatter::BEGIN@5946 which was called:
# once (8µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5946 # spent 42µs making 1 call to Perl::Tidy::Formatter::BEGIN@5946
# spent 34µs making 1 call to constant::import |
| 5947 | 2 | 23µs | 2 | 82µs | # spent 45µs (7+37) within Perl::Tidy::Formatter::BEGIN@5947 which was called:
# once (7µs+37µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5947 # spent 45µs making 1 call to Perl::Tidy::Formatter::BEGIN@5947
# spent 37µs making 1 call to constant::import |
| 5948 | 2 | 29µs | 2 | 73µs | # spent 40µs (8+33) within Perl::Tidy::Formatter::BEGIN@5948 which was called:
# once (8µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5948 # spent 40µs making 1 call to Perl::Tidy::Formatter::BEGIN@5948
# spent 32µs making 1 call to constant::import |
| 5949 | 2 | 32µs | 2 | 83µs | # spent 45µs (8+38) within Perl::Tidy::Formatter::BEGIN@5949 which was called:
# once (8µs+38µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5949 # spent 45µs making 1 call to Perl::Tidy::Formatter::BEGIN@5949
# spent 38µs making 1 call to constant::import |
| 5950 | 2 | 30µs | 2 | 78µs | # spent 48µs (17+31) within Perl::Tidy::Formatter::BEGIN@5950 which was called:
# once (17µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5950 # spent 48µs making 1 call to Perl::Tidy::Formatter::BEGIN@5950
# spent 31µs making 1 call to constant::import |
| 5951 | 2 | 20µs | 2 | 69µs | # spent 38µs (7+31) within Perl::Tidy::Formatter::BEGIN@5951 which was called:
# once (7µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5951 # spent 38µs making 1 call to Perl::Tidy::Formatter::BEGIN@5951
# spent 31µs making 1 call to constant::import |
| 5952 | 2 | 21µs | 2 | 70µs | # spent 40µs (10+30) within Perl::Tidy::Formatter::BEGIN@5952 which was called:
# once (10µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5952 # spent 40µs making 1 call to Perl::Tidy::Formatter::BEGIN@5952
# spent 30µs making 1 call to constant::import |
| 5953 | 2 | 23µs | 2 | 67µs | # spent 37µs (6+30) within Perl::Tidy::Formatter::BEGIN@5953 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5953 # spent 37µs making 1 call to Perl::Tidy::Formatter::BEGIN@5953
# spent 30µs making 1 call to constant::import |
| 5954 | 2 | 19µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::Formatter::BEGIN@5954 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5954 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@5954
# spent 30µs making 1 call to constant::import |
| 5955 | 2 | 18µs | 2 | 65µs | # spent 36µs (6+29) within Perl::Tidy::Formatter::BEGIN@5955 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5955 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@5955
# spent 29µs making 1 call to constant::import |
| 5956 | 2 | 19µs | 2 | 67µs | # spent 38µs (9+29) within Perl::Tidy::Formatter::BEGIN@5956 which was called:
# once (9µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5956 # spent 38µs making 1 call to Perl::Tidy::Formatter::BEGIN@5956
# spent 29µs making 1 call to constant::import |
| 5957 | 2 | 157µs | 2 | 68µs | # spent 37µs (6+31) within Perl::Tidy::Formatter::BEGIN@5957 which was called:
# once (6µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5957 # spent 37µs making 1 call to Perl::Tidy::Formatter::BEGIN@5957
# spent 31µs making 1 call to constant::import |
| 5958 | |||||
| 5959 | my $debug_warning = sub { | ||||
| 5960 | print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; | ||||
| 5961 | 1 | 2µs | }; | ||
| 5962 | |||||
| 5963 | FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); | ||||
| 5964 | 1 | 0s | FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); | ||
| 5965 | FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); | ||||
| 5966 | 1 | 0s | FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); | ||
| 5967 | FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); | ||||
| 5968 | 1 | 0s | FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); | ||
| 5969 | FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); | ||||
| 5970 | 1 | 0s | FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); | ||
| 5971 | FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); | ||||
| 5972 | 1 | 0s | FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); | ||
| 5973 | FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); | ||||
| 5974 | 1 | 0s | FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); | ||
| 5975 | FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); | ||||
| 5976 | 1 | 6µs | FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); | ||
| 5977 | 1 | 18µs | 1 | 8µs | } # spent 8µs making 1 call to Perl::Tidy::Formatter::BEGIN@5940 |
| 5978 | |||||
| 5979 | 2 | 165µs | 2 | 74µs | # spent 41µs (9+33) within Perl::Tidy::Formatter::BEGIN@5979 which was called:
# once (9µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 5979 # spent 41µs making 1 call to Perl::Tidy::Formatter::BEGIN@5979
# spent 33µs making 1 call to Exporter::import |
| 5980 | 1 | 500ns | # spent 2.07ms (10µs+2.06) within Perl::Tidy::Formatter::BEGIN@5980 which was called:
# once (10µs+2.06ms) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6196 | ||
| 5981 | |||||
| 5982 | @gnu_stack | ||||
| 5983 | $max_gnu_stack_index | ||||
| 5984 | $gnu_position_predictor | ||||
| 5985 | $line_start_index_to_go | ||||
| 5986 | $last_indentation_written | ||||
| 5987 | $last_unadjusted_indentation | ||||
| 5988 | $last_leading_token | ||||
| 5989 | $last_output_short_opening_token | ||||
| 5990 | |||||
| 5991 | $saw_VERSION_in_this_file | ||||
| 5992 | $saw_END_or_DATA_ | ||||
| 5993 | |||||
| 5994 | @gnu_item_list | ||||
| 5995 | $max_gnu_item_index | ||||
| 5996 | $gnu_sequence_number | ||||
| 5997 | $last_output_indentation | ||||
| 5998 | %last_gnu_equals | ||||
| 5999 | %gnu_comma_count | ||||
| 6000 | %gnu_arrow_count | ||||
| 6001 | |||||
| 6002 | @block_type_to_go | ||||
| 6003 | @type_sequence_to_go | ||||
| 6004 | @container_environment_to_go | ||||
| 6005 | @bond_strength_to_go | ||||
| 6006 | @forced_breakpoint_to_go | ||||
| 6007 | @token_lengths_to_go | ||||
| 6008 | @summed_lengths_to_go | ||||
| 6009 | @levels_to_go | ||||
| 6010 | @leading_spaces_to_go | ||||
| 6011 | @reduced_spaces_to_go | ||||
| 6012 | @matching_token_to_go | ||||
| 6013 | @mate_index_to_go | ||||
| 6014 | @nesting_blocks_to_go | ||||
| 6015 | @ci_levels_to_go | ||||
| 6016 | @nesting_depth_to_go | ||||
| 6017 | @nobreak_to_go | ||||
| 6018 | @old_breakpoint_to_go | ||||
| 6019 | @tokens_to_go | ||||
| 6020 | @types_to_go | ||||
| 6021 | @inext_to_go | ||||
| 6022 | @iprev_to_go | ||||
| 6023 | |||||
| 6024 | %saved_opening_indentation | ||||
| 6025 | |||||
| 6026 | $max_index_to_go | ||||
| 6027 | $comma_count_in_batch | ||||
| 6028 | $old_line_count_in_batch | ||||
| 6029 | $last_nonblank_index_to_go | ||||
| 6030 | $last_nonblank_type_to_go | ||||
| 6031 | $last_nonblank_token_to_go | ||||
| 6032 | $last_last_nonblank_index_to_go | ||||
| 6033 | $last_last_nonblank_type_to_go | ||||
| 6034 | $last_last_nonblank_token_to_go | ||||
| 6035 | @nonblank_lines_at_depth | ||||
| 6036 | $starting_in_quote | ||||
| 6037 | $ending_in_quote | ||||
| 6038 | @whitespace_level_stack | ||||
| 6039 | $whitespace_last_level | ||||
| 6040 | |||||
| 6041 | $in_format_skipping_section | ||||
| 6042 | $format_skipping_pattern_begin | ||||
| 6043 | $format_skipping_pattern_end | ||||
| 6044 | |||||
| 6045 | $forced_breakpoint_count | ||||
| 6046 | $forced_breakpoint_undo_count | ||||
| 6047 | @forced_breakpoint_undo_stack | ||||
| 6048 | %postponed_breakpoint | ||||
| 6049 | |||||
| 6050 | $tabbing | ||||
| 6051 | $embedded_tab_count | ||||
| 6052 | $first_embedded_tab_at | ||||
| 6053 | $last_embedded_tab_at | ||||
| 6054 | $deleted_semicolon_count | ||||
| 6055 | $first_deleted_semicolon_at | ||||
| 6056 | $last_deleted_semicolon_at | ||||
| 6057 | $added_semicolon_count | ||||
| 6058 | $first_added_semicolon_at | ||||
| 6059 | $last_added_semicolon_at | ||||
| 6060 | $first_tabbing_disagreement | ||||
| 6061 | $last_tabbing_disagreement | ||||
| 6062 | $in_tabbing_disagreement | ||||
| 6063 | $tabbing_disagreement_count | ||||
| 6064 | $input_line_tabbing | ||||
| 6065 | |||||
| 6066 | $last_line_type | ||||
| 6067 | $last_line_leading_type | ||||
| 6068 | $last_line_leading_level | ||||
| 6069 | $last_last_line_leading_level | ||||
| 6070 | |||||
| 6071 | %block_leading_text | ||||
| 6072 | %block_opening_line_number | ||||
| 6073 | $csc_new_statement_ok | ||||
| 6074 | $csc_last_label | ||||
| 6075 | %csc_block_label | ||||
| 6076 | $accumulating_text_for_block | ||||
| 6077 | $leading_block_text | ||||
| 6078 | $rleading_block_if_elsif_text | ||||
| 6079 | $leading_block_text_level | ||||
| 6080 | $leading_block_text_length_exceeded | ||||
| 6081 | $leading_block_text_line_length | ||||
| 6082 | $leading_block_text_line_number | ||||
| 6083 | $closing_side_comment_prefix_pattern | ||||
| 6084 | $closing_side_comment_list_pattern | ||||
| 6085 | |||||
| 6086 | $last_nonblank_token | ||||
| 6087 | $last_nonblank_type | ||||
| 6088 | $last_last_nonblank_token | ||||
| 6089 | $last_last_nonblank_type | ||||
| 6090 | $last_nonblank_block_type | ||||
| 6091 | $last_output_level | ||||
| 6092 | %is_do_follower | ||||
| 6093 | %is_if_brace_follower | ||||
| 6094 | %space_after_keyword | ||||
| 6095 | $rbrace_follower | ||||
| 6096 | $looking_for_else | ||||
| 6097 | %is_last_next_redo_return | ||||
| 6098 | %is_other_brace_follower | ||||
| 6099 | %is_else_brace_follower | ||||
| 6100 | %is_anon_sub_brace_follower | ||||
| 6101 | %is_anon_sub_1_brace_follower | ||||
| 6102 | %is_sort_map_grep | ||||
| 6103 | %is_sort_map_grep_eval | ||||
| 6104 | %is_sort_map_grep_eval_do | ||||
| 6105 | %is_block_without_semicolon | ||||
| 6106 | %is_if_unless | ||||
| 6107 | %is_and_or | ||||
| 6108 | %is_assignment | ||||
| 6109 | %is_chain_operator | ||||
| 6110 | %is_if_unless_and_or_last_next_redo_return | ||||
| 6111 | |||||
| 6112 | @has_broken_sublist | ||||
| 6113 | @dont_align | ||||
| 6114 | @want_comma_break | ||||
| 6115 | |||||
| 6116 | $is_static_block_comment | ||||
| 6117 | $index_start_one_line_block | ||||
| 6118 | $semicolons_before_block_self_destruct | ||||
| 6119 | $index_max_forced_break | ||||
| 6120 | $input_line_number | ||||
| 6121 | $diagnostics_object | ||||
| 6122 | $vertical_aligner_object | ||||
| 6123 | $logger_object | ||||
| 6124 | $file_writer_object | ||||
| 6125 | $formatter_self | ||||
| 6126 | @ci_stack | ||||
| 6127 | $last_line_had_side_comment | ||||
| 6128 | %want_break_before | ||||
| 6129 | %outdent_keyword | ||||
| 6130 | $static_block_comment_pattern | ||||
| 6131 | $static_side_comment_pattern | ||||
| 6132 | %opening_vertical_tightness | ||||
| 6133 | %closing_vertical_tightness | ||||
| 6134 | %closing_token_indentation | ||||
| 6135 | $some_closing_token_indentation | ||||
| 6136 | |||||
| 6137 | %opening_token_right | ||||
| 6138 | %stack_opening_token | ||||
| 6139 | %stack_closing_token | ||||
| 6140 | |||||
| 6141 | $block_brace_vertical_tightness_pattern | ||||
| 6142 | |||||
| 6143 | $rOpts_add_newlines | ||||
| 6144 | $rOpts_add_whitespace | ||||
| 6145 | $rOpts_block_brace_tightness | ||||
| 6146 | $rOpts_block_brace_vertical_tightness | ||||
| 6147 | $rOpts_brace_left_and_indent | ||||
| 6148 | $rOpts_comma_arrow_breakpoints | ||||
| 6149 | $rOpts_break_at_old_keyword_breakpoints | ||||
| 6150 | $rOpts_break_at_old_comma_breakpoints | ||||
| 6151 | $rOpts_break_at_old_logical_breakpoints | ||||
| 6152 | $rOpts_break_at_old_ternary_breakpoints | ||||
| 6153 | $rOpts_break_at_old_attribute_breakpoints | ||||
| 6154 | $rOpts_closing_side_comment_else_flag | ||||
| 6155 | $rOpts_closing_side_comment_maximum_text | ||||
| 6156 | $rOpts_continuation_indentation | ||||
| 6157 | $rOpts_cuddled_else | ||||
| 6158 | $rOpts_delete_old_whitespace | ||||
| 6159 | $rOpts_fuzzy_line_length | ||||
| 6160 | $rOpts_indent_columns | ||||
| 6161 | $rOpts_line_up_parentheses | ||||
| 6162 | $rOpts_maximum_fields_per_table | ||||
| 6163 | $rOpts_maximum_line_length | ||||
| 6164 | $rOpts_variable_maximum_line_length | ||||
| 6165 | $rOpts_short_concatenation_item_length | ||||
| 6166 | $rOpts_keep_old_blank_lines | ||||
| 6167 | $rOpts_ignore_old_breakpoints | ||||
| 6168 | $rOpts_format_skipping | ||||
| 6169 | $rOpts_space_function_paren | ||||
| 6170 | $rOpts_space_keyword_paren | ||||
| 6171 | $rOpts_keep_interior_semicolons | ||||
| 6172 | $rOpts_ignore_side_comment_lengths | ||||
| 6173 | $rOpts_stack_closing_block_brace | ||||
| 6174 | $rOpts_whitespace_cycle | ||||
| 6175 | $rOpts_tight_secret_operators | ||||
| 6176 | |||||
| 6177 | %is_opening_type | ||||
| 6178 | %is_closing_type | ||||
| 6179 | %is_keyword_returning_list | ||||
| 6180 | %tightness | ||||
| 6181 | %matching_token | ||||
| 6182 | $rOpts | ||||
| 6183 | %right_bond_strength | ||||
| 6184 | %left_bond_strength | ||||
| 6185 | %binary_ws_rules | ||||
| 6186 | %want_left_space | ||||
| 6187 | %want_right_space | ||||
| 6188 | %is_digraph | ||||
| 6189 | %is_trigraph | ||||
| 6190 | $bli_pattern | ||||
| 6191 | $bli_list_string | ||||
| 6192 | %is_closing_type | ||||
| 6193 | %is_opening_type | ||||
| 6194 | %is_closing_token | ||||
| 6195 | %is_opening_token | ||||
| 6196 | 1 | 284µs | 2 | 4.13ms | }; # spent 2.07ms making 1 call to Perl::Tidy::Formatter::BEGIN@5980
# spent 2.06ms making 1 call to vars::import |
| 6197 | |||||
| 6198 | # spent 93µs within Perl::Tidy::Formatter::BEGIN@6198 which was called:
# once (93µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6279 | ||||
| 6199 | |||||
| 6200 | # default list of block types for which -bli would apply | ||||
| 6201 | 1 | 500ns | $bli_list_string = 'if else elsif unless while for foreach do : sub'; | ||
| 6202 | |||||
| 6203 | 1 | 4µs | @_ = qw( | ||
| 6204 | .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> | ||||
| 6205 | <= >= == =~ !~ != ++ -- /= x= | ||||
| 6206 | ); | ||||
| 6207 | 1 | 11µs | @is_digraph{@_} = (1) x scalar(@_); | ||
| 6208 | |||||
| 6209 | 1 | 5µs | @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); | ||
| 6210 | 1 | 3µs | @is_trigraph{@_} = (1) x scalar(@_); | ||
| 6211 | |||||
| 6212 | 1 | 4µs | @_ = qw( | ||
| 6213 | = **= += *= &= <<= &&= | ||||
| 6214 | -= /= |= >>= ||= //= | ||||
| 6215 | .= %= ^= | ||||
| 6216 | x= | ||||
| 6217 | ); | ||||
| 6218 | 1 | 4µs | @is_assignment{@_} = (1) x scalar(@_); | ||
| 6219 | |||||
| 6220 | 1 | 3µs | @_ = qw( | ||
| 6221 | grep | ||||
| 6222 | keys | ||||
| 6223 | map | ||||
| 6224 | reverse | ||||
| 6225 | sort | ||||
| 6226 | split | ||||
| 6227 | ); | ||||
| 6228 | 1 | 2µs | @is_keyword_returning_list{@_} = (1) x scalar(@_); | ||
| 6229 | |||||
| 6230 | 1 | 3µs | @_ = qw(is if unless and or err last next redo return); | ||
| 6231 | 1 | 7µs | @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); | ||
| 6232 | |||||
| 6233 | 1 | 2µs | @_ = qw(last next redo return); | ||
| 6234 | 1 | 1µs | @is_last_next_redo_return{@_} = (1) x scalar(@_); | ||
| 6235 | |||||
| 6236 | 1 | 1µs | @_ = qw(sort map grep); | ||
| 6237 | 1 | 900ns | @is_sort_map_grep{@_} = (1) x scalar(@_); | ||
| 6238 | |||||
| 6239 | 1 | 1µs | @_ = qw(sort map grep eval); | ||
| 6240 | 1 | 1µs | @is_sort_map_grep_eval{@_} = (1) x scalar(@_); | ||
| 6241 | |||||
| 6242 | 1 | 1µs | @_ = qw(sort map grep eval do); | ||
| 6243 | 1 | 1µs | @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); | ||
| 6244 | |||||
| 6245 | 1 | 900ns | @_ = qw(if unless); | ||
| 6246 | 1 | 700ns | @is_if_unless{@_} = (1) x scalar(@_); | ||
| 6247 | |||||
| 6248 | 1 | 800ns | @_ = qw(and or err); | ||
| 6249 | 1 | 700ns | @is_and_or{@_} = (1) x scalar(@_); | ||
| 6250 | |||||
| 6251 | # Identify certain operators which often occur in chains. | ||||
| 6252 | # Note: the minus (-) causes a side effect of padding of the first line in | ||||
| 6253 | # something like this (by sub set_logical_padding): | ||||
| 6254 | # Checkbutton => 'Transmission checked', | ||||
| 6255 | # -variable => \$TRANS | ||||
| 6256 | # This usually improves appearance so it seems ok. | ||||
| 6257 | 1 | 2µs | @_ = qw(&& || and or : ? . + - * /); | ||
| 6258 | 1 | 3µs | @is_chain_operator{@_} = (1) x scalar(@_); | ||
| 6259 | |||||
| 6260 | # We can remove semicolons after blocks preceded by these keywords | ||||
| 6261 | 1 | 4µs | @_ = | ||
| 6262 | qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else | ||||
| 6263 | unless while until for foreach given when default); | ||||
| 6264 | 1 | 6µs | @is_block_without_semicolon{@_} = (1) x scalar(@_); | ||
| 6265 | |||||
| 6266 | # 'L' is token for opening { at hash key | ||||
| 6267 | 1 | 2µs | @_ = qw" L { ( [ "; | ||
| 6268 | 1 | 1µs | @is_opening_type{@_} = (1) x scalar(@_); | ||
| 6269 | |||||
| 6270 | # 'R' is token for closing } at hash key | ||||
| 6271 | 1 | 1µs | @_ = qw" R } ) ] "; | ||
| 6272 | 1 | 1µs | @is_closing_type{@_} = (1) x scalar(@_); | ||
| 6273 | |||||
| 6274 | 1 | 1µs | @_ = qw" { ( [ "; | ||
| 6275 | 1 | 700ns | @is_opening_token{@_} = (1) x scalar(@_); | ||
| 6276 | |||||
| 6277 | 1 | 1µs | @_ = qw" } ) ] "; | ||
| 6278 | 1 | 6µs | @is_closing_token{@_} = (1) x scalar(@_); | ||
| 6279 | 1 | 44µs | 1 | 93µs | } # spent 93µs making 1 call to Perl::Tidy::Formatter::BEGIN@6198 |
| 6280 | |||||
| 6281 | # whitespace codes | ||||
| 6282 | 2 | 23µs | 2 | 82µs | # spent 45µs (8+37) within Perl::Tidy::Formatter::BEGIN@6282 which was called:
# once (8µs+37µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6282 # spent 45µs making 1 call to Perl::Tidy::Formatter::BEGIN@6282
# spent 37µs making 1 call to constant::import |
| 6283 | 2 | 23µs | 2 | 69µs | # spent 38µs (7+31) within Perl::Tidy::Formatter::BEGIN@6283 which was called:
# once (7µs+31µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6283 # spent 38µs making 1 call to Perl::Tidy::Formatter::BEGIN@6283
# spent 31µs making 1 call to constant::import |
| 6284 | 2 | 22µs | 2 | 67µs | # spent 36µs (6+30) within Perl::Tidy::Formatter::BEGIN@6284 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6284 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6284
# spent 30µs making 1 call to constant::import |
| 6285 | |||||
| 6286 | # Token bond strengths. | ||||
| 6287 | 2 | 20µs | 2 | 65µs | # spent 36µs (6+30) within Perl::Tidy::Formatter::BEGIN@6287 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6287 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6287
# spent 30µs making 1 call to constant::import |
| 6288 | 2 | 20µs | 2 | 67µs | # spent 37µs (7+30) within Perl::Tidy::Formatter::BEGIN@6288 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6288 # spent 37µs making 1 call to Perl::Tidy::Formatter::BEGIN@6288
# spent 30µs making 1 call to constant::import |
| 6289 | 2 | 20µs | 2 | 66µs | # spent 36µs (7+30) within Perl::Tidy::Formatter::BEGIN@6289 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6289 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6289
# spent 30µs making 1 call to constant::import |
| 6290 | 2 | 20µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Formatter::BEGIN@6290 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6290 # spent 35µs making 1 call to Perl::Tidy::Formatter::BEGIN@6290
# spent 29µs making 1 call to constant::import |
| 6291 | 2 | 20µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Formatter::BEGIN@6291 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6291 # spent 35µs making 1 call to Perl::Tidy::Formatter::BEGIN@6291
# spent 29µs making 1 call to constant::import |
| 6292 | 2 | 21µs | 2 | 66µs | # spent 36µs (7+30) within Perl::Tidy::Formatter::BEGIN@6292 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6292 # spent 36µs making 1 call to Perl::Tidy::Formatter::BEGIN@6292
# spent 30µs making 1 call to constant::import |
| 6293 | |||||
| 6294 | # values for testing indexes in output array | ||||
| 6295 | 2 | 28µs | 2 | 92µs | # spent 49µs (7+42) within Perl::Tidy::Formatter::BEGIN@6295 which was called:
# once (7µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6295 # spent 49µs making 1 call to Perl::Tidy::Formatter::BEGIN@6295
# spent 42µs making 1 call to constant::import |
| 6296 | |||||
| 6297 | # Maximum number of little messages; probably need not be changed. | ||||
| 6298 | 2 | 24µs | 2 | 77µs | # spent 42µs (7+35) within Perl::Tidy::Formatter::BEGIN@6298 which was called:
# once (7µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6298 # spent 42µs making 1 call to Perl::Tidy::Formatter::BEGIN@6298
# spent 35µs making 1 call to constant::import |
| 6299 | |||||
| 6300 | # increment between sequence numbers for each type | ||||
| 6301 | # For example, ?: pairs might have numbers 7,11,15,... | ||||
| 6302 | 2 | 5.86ms | 2 | 75µs | # spent 41µs (7+34) within Perl::Tidy::Formatter::BEGIN@6302 which was called:
# once (7µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 6302 # spent 41µs making 1 call to Perl::Tidy::Formatter::BEGIN@6302
# spent 34µs making 1 call to constant::import |
| 6303 | |||||
| 6304 | { | ||||
| 6305 | |||||
| 6306 | # methods to count instances | ||||
| 6307 | 2 | 600ns | my $_count = 0; | ||
| 6308 | sub get_count { $_count; } | ||||
| 6309 | sub _increment_count { ++$_count } | ||||
| 6310 | sub _decrement_count { --$_count } | ||||
| 6311 | } | ||||
| 6312 | |||||
| 6313 | sub trim { | ||||
| 6314 | |||||
| 6315 | # trim leading and trailing whitespace from a string | ||||
| 6316 | $_[0] =~ s/\s+$//; | ||||
| 6317 | $_[0] =~ s/^\s+//; | ||||
| 6318 | return $_[0]; | ||||
| 6319 | } | ||||
| 6320 | |||||
| 6321 | sub max { | ||||
| 6322 | my $max = shift; | ||||
| 6323 | foreach (@_) { | ||||
| 6324 | $max = ( $max < $_ ) ? $_ : $max; | ||||
| 6325 | } | ||||
| 6326 | return $max; | ||||
| 6327 | } | ||||
| 6328 | |||||
| 6329 | sub min { | ||||
| 6330 | my $min = shift; | ||||
| 6331 | foreach (@_) { | ||||
| 6332 | $min = ( $min > $_ ) ? $_ : $min; | ||||
| 6333 | } | ||||
| 6334 | return $min; | ||||
| 6335 | } | ||||
| 6336 | |||||
| 6337 | sub split_words { | ||||
| 6338 | |||||
| 6339 | # given a string containing words separated by whitespace, | ||||
| 6340 | # return the list of words | ||||
| 6341 | my ($str) = @_; | ||||
| 6342 | return unless $str; | ||||
| 6343 | $str =~ s/\s+$//; | ||||
| 6344 | $str =~ s/^\s+//; | ||||
| 6345 | return split( /\s+/, $str ); | ||||
| 6346 | } | ||||
| 6347 | |||||
| 6348 | # interface to Perl::Tidy::Logger routines | ||||
| 6349 | sub warning { | ||||
| 6350 | if ($logger_object) { | ||||
| 6351 | $logger_object->warning(@_); | ||||
| 6352 | } | ||||
| 6353 | } | ||||
| 6354 | |||||
| 6355 | sub complain { | ||||
| 6356 | if ($logger_object) { | ||||
| 6357 | $logger_object->complain(@_); | ||||
| 6358 | } | ||||
| 6359 | } | ||||
| 6360 | |||||
| 6361 | sub write_logfile_entry { | ||||
| 6362 | if ($logger_object) { | ||||
| 6363 | $logger_object->write_logfile_entry(@_); | ||||
| 6364 | } | ||||
| 6365 | } | ||||
| 6366 | |||||
| 6367 | sub black_box { | ||||
| 6368 | if ($logger_object) { | ||||
| 6369 | $logger_object->black_box(@_); | ||||
| 6370 | } | ||||
| 6371 | } | ||||
| 6372 | |||||
| 6373 | sub report_definite_bug { | ||||
| 6374 | if ($logger_object) { | ||||
| 6375 | $logger_object->report_definite_bug(); | ||||
| 6376 | } | ||||
| 6377 | } | ||||
| 6378 | |||||
| 6379 | sub get_saw_brace_error { | ||||
| 6380 | if ($logger_object) { | ||||
| 6381 | $logger_object->get_saw_brace_error(); | ||||
| 6382 | } | ||||
| 6383 | } | ||||
| 6384 | |||||
| 6385 | sub we_are_at_the_last_line { | ||||
| 6386 | if ($logger_object) { | ||||
| 6387 | $logger_object->we_are_at_the_last_line(); | ||||
| 6388 | } | ||||
| 6389 | } | ||||
| 6390 | |||||
| 6391 | # interface to Perl::Tidy::Diagnostics routine | ||||
| 6392 | sub write_diagnostics { | ||||
| 6393 | |||||
| 6394 | if ($diagnostics_object) { | ||||
| 6395 | $diagnostics_object->write_diagnostics(@_); | ||||
| 6396 | } | ||||
| 6397 | } | ||||
| 6398 | |||||
| 6399 | sub get_added_semicolon_count { | ||||
| 6400 | my $self = shift; | ||||
| 6401 | return $added_semicolon_count; | ||||
| 6402 | } | ||||
| 6403 | |||||
| 6404 | sub DESTROY { | ||||
| 6405 | $_[0]->_decrement_count(); | ||||
| 6406 | } | ||||
| 6407 | |||||
| 6408 | sub new { | ||||
| 6409 | |||||
| 6410 | my $class = shift; | ||||
| 6411 | |||||
| 6412 | # we are given an object with a write_line() method to take lines | ||||
| 6413 | my %defaults = ( | ||||
| 6414 | sink_object => undef, | ||||
| 6415 | diagnostics_object => undef, | ||||
| 6416 | logger_object => undef, | ||||
| 6417 | ); | ||||
| 6418 | my %args = ( %defaults, @_ ); | ||||
| 6419 | |||||
| 6420 | $logger_object = $args{logger_object}; | ||||
| 6421 | $diagnostics_object = $args{diagnostics_object}; | ||||
| 6422 | |||||
| 6423 | # we create another object with a get_line() and peek_ahead() method | ||||
| 6424 | my $sink_object = $args{sink_object}; | ||||
| 6425 | $file_writer_object = | ||||
| 6426 | Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); | ||||
| 6427 | |||||
| 6428 | # initialize the leading whitespace stack to negative levels | ||||
| 6429 | # so that we can never run off the end of the stack | ||||
| 6430 | $gnu_position_predictor = 0; # where the current token is predicted to be | ||||
| 6431 | $max_gnu_stack_index = 0; | ||||
| 6432 | $max_gnu_item_index = -1; | ||||
| 6433 | $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); | ||||
| 6434 | @gnu_item_list = (); | ||||
| 6435 | $last_output_indentation = 0; | ||||
| 6436 | $last_indentation_written = 0; | ||||
| 6437 | $last_unadjusted_indentation = 0; | ||||
| 6438 | $last_leading_token = ""; | ||||
| 6439 | $last_output_short_opening_token = 0; | ||||
| 6440 | |||||
| 6441 | $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; | ||||
| 6442 | $saw_END_or_DATA_ = 0; | ||||
| 6443 | |||||
| 6444 | @block_type_to_go = (); | ||||
| 6445 | @type_sequence_to_go = (); | ||||
| 6446 | @container_environment_to_go = (); | ||||
| 6447 | @bond_strength_to_go = (); | ||||
| 6448 | @forced_breakpoint_to_go = (); | ||||
| 6449 | @summed_lengths_to_go = (); # line length to start of ith token | ||||
| 6450 | @token_lengths_to_go = (); | ||||
| 6451 | @levels_to_go = (); | ||||
| 6452 | @matching_token_to_go = (); | ||||
| 6453 | @mate_index_to_go = (); | ||||
| 6454 | @nesting_blocks_to_go = (); | ||||
| 6455 | @ci_levels_to_go = (); | ||||
| 6456 | @nesting_depth_to_go = (0); | ||||
| 6457 | @nobreak_to_go = (); | ||||
| 6458 | @old_breakpoint_to_go = (); | ||||
| 6459 | @tokens_to_go = (); | ||||
| 6460 | @types_to_go = (); | ||||
| 6461 | @leading_spaces_to_go = (); | ||||
| 6462 | @reduced_spaces_to_go = (); | ||||
| 6463 | @inext_to_go = (); | ||||
| 6464 | @iprev_to_go = (); | ||||
| 6465 | |||||
| 6466 | @whitespace_level_stack = (); | ||||
| 6467 | $whitespace_last_level = -1; | ||||
| 6468 | |||||
| 6469 | @dont_align = (); | ||||
| 6470 | @has_broken_sublist = (); | ||||
| 6471 | @want_comma_break = (); | ||||
| 6472 | |||||
| 6473 | @ci_stack = (""); | ||||
| 6474 | $first_tabbing_disagreement = 0; | ||||
| 6475 | $last_tabbing_disagreement = 0; | ||||
| 6476 | $tabbing_disagreement_count = 0; | ||||
| 6477 | $in_tabbing_disagreement = 0; | ||||
| 6478 | $input_line_tabbing = undef; | ||||
| 6479 | |||||
| 6480 | $last_line_type = ""; | ||||
| 6481 | $last_last_line_leading_level = 0; | ||||
| 6482 | $last_line_leading_level = 0; | ||||
| 6483 | $last_line_leading_type = '#'; | ||||
| 6484 | |||||
| 6485 | $last_nonblank_token = ';'; | ||||
| 6486 | $last_nonblank_type = ';'; | ||||
| 6487 | $last_last_nonblank_token = ';'; | ||||
| 6488 | $last_last_nonblank_type = ';'; | ||||
| 6489 | $last_nonblank_block_type = ""; | ||||
| 6490 | $last_output_level = 0; | ||||
| 6491 | $looking_for_else = 0; | ||||
| 6492 | $embedded_tab_count = 0; | ||||
| 6493 | $first_embedded_tab_at = 0; | ||||
| 6494 | $last_embedded_tab_at = 0; | ||||
| 6495 | $deleted_semicolon_count = 0; | ||||
| 6496 | $first_deleted_semicolon_at = 0; | ||||
| 6497 | $last_deleted_semicolon_at = 0; | ||||
| 6498 | $added_semicolon_count = 0; | ||||
| 6499 | $first_added_semicolon_at = 0; | ||||
| 6500 | $last_added_semicolon_at = 0; | ||||
| 6501 | $last_line_had_side_comment = 0; | ||||
| 6502 | $is_static_block_comment = 0; | ||||
| 6503 | %postponed_breakpoint = (); | ||||
| 6504 | |||||
| 6505 | # variables for adding side comments | ||||
| 6506 | %block_leading_text = (); | ||||
| 6507 | %block_opening_line_number = (); | ||||
| 6508 | $csc_new_statement_ok = 1; | ||||
| 6509 | %csc_block_label = (); | ||||
| 6510 | |||||
| 6511 | %saved_opening_indentation = (); | ||||
| 6512 | $in_format_skipping_section = 0; | ||||
| 6513 | |||||
| 6514 | reset_block_text_accumulator(); | ||||
| 6515 | |||||
| 6516 | prepare_for_new_input_lines(); | ||||
| 6517 | |||||
| 6518 | $vertical_aligner_object = | ||||
| 6519 | Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, | ||||
| 6520 | $logger_object, $diagnostics_object ); | ||||
| 6521 | |||||
| 6522 | if ( $rOpts->{'entab-leading-whitespace'} ) { | ||||
| 6523 | write_logfile_entry( | ||||
| 6524 | "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" | ||||
| 6525 | ); | ||||
| 6526 | } | ||||
| 6527 | elsif ( $rOpts->{'tabs'} ) { | ||||
| 6528 | write_logfile_entry("Indentation will be with a tab character\n"); | ||||
| 6529 | } | ||||
| 6530 | else { | ||||
| 6531 | write_logfile_entry( | ||||
| 6532 | "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); | ||||
| 6533 | } | ||||
| 6534 | |||||
| 6535 | # This was the start of a formatter referent, but object-oriented | ||||
| 6536 | # coding has turned out to be too slow here. | ||||
| 6537 | $formatter_self = {}; | ||||
| 6538 | |||||
| 6539 | bless $formatter_self, $class; | ||||
| 6540 | |||||
| 6541 | # Safety check..this is not a class yet | ||||
| 6542 | if ( _increment_count() > 1 ) { | ||||
| 6543 | confess | ||||
| 6544 | "Attempt to create more than 1 object in $class, which is not a true class yet\n"; | ||||
| 6545 | } | ||||
| 6546 | return $formatter_self; | ||||
| 6547 | } | ||||
| 6548 | |||||
| 6549 | sub prepare_for_new_input_lines { | ||||
| 6550 | |||||
| 6551 | $gnu_sequence_number++; # increment output batch counter | ||||
| 6552 | %last_gnu_equals = (); | ||||
| 6553 | %gnu_comma_count = (); | ||||
| 6554 | %gnu_arrow_count = (); | ||||
| 6555 | $line_start_index_to_go = 0; | ||||
| 6556 | $max_gnu_item_index = UNDEFINED_INDEX; | ||||
| 6557 | $index_max_forced_break = UNDEFINED_INDEX; | ||||
| 6558 | $max_index_to_go = UNDEFINED_INDEX; | ||||
| 6559 | $last_nonblank_index_to_go = UNDEFINED_INDEX; | ||||
| 6560 | $last_nonblank_type_to_go = ''; | ||||
| 6561 | $last_nonblank_token_to_go = ''; | ||||
| 6562 | $last_last_nonblank_index_to_go = UNDEFINED_INDEX; | ||||
| 6563 | $last_last_nonblank_type_to_go = ''; | ||||
| 6564 | $last_last_nonblank_token_to_go = ''; | ||||
| 6565 | $forced_breakpoint_count = 0; | ||||
| 6566 | $forced_breakpoint_undo_count = 0; | ||||
| 6567 | $rbrace_follower = undef; | ||||
| 6568 | $summed_lengths_to_go[0] = 0; | ||||
| 6569 | $old_line_count_in_batch = 1; | ||||
| 6570 | $comma_count_in_batch = 0; | ||||
| 6571 | $starting_in_quote = 0; | ||||
| 6572 | |||||
| 6573 | destroy_one_line_block(); | ||||
| 6574 | } | ||||
| 6575 | |||||
| 6576 | sub write_line { | ||||
| 6577 | |||||
| 6578 | my $self = shift; | ||||
| 6579 | my ($line_of_tokens) = @_; | ||||
| 6580 | |||||
| 6581 | my $line_type = $line_of_tokens->{_line_type}; | ||||
| 6582 | my $input_line = $line_of_tokens->{_line_text}; | ||||
| 6583 | |||||
| 6584 | if ( $rOpts->{notidy} ) { | ||||
| 6585 | write_unindented_line($input_line); | ||||
| 6586 | $last_line_type = $line_type; | ||||
| 6587 | return; | ||||
| 6588 | } | ||||
| 6589 | |||||
| 6590 | # _line_type codes are: | ||||
| 6591 | # SYSTEM - system-specific code before hash-bang line | ||||
| 6592 | # CODE - line of perl code (including comments) | ||||
| 6593 | # POD_START - line starting pod, such as '=head' | ||||
| 6594 | # POD - pod documentation text | ||||
| 6595 | # POD_END - last line of pod section, '=cut' | ||||
| 6596 | # HERE - text of here-document | ||||
| 6597 | # HERE_END - last line of here-doc (target word) | ||||
| 6598 | # FORMAT - format section | ||||
| 6599 | # FORMAT_END - last line of format section, '.' | ||||
| 6600 | # DATA_START - __DATA__ line | ||||
| 6601 | # DATA - unidentified text following __DATA__ | ||||
| 6602 | # END_START - __END__ line | ||||
| 6603 | # END - unidentified text following __END__ | ||||
| 6604 | # ERROR - we are in big trouble, probably not a perl script | ||||
| 6605 | |||||
| 6606 | # put a blank line after an =cut which comes before __END__ and __DATA__ | ||||
| 6607 | # (required by podchecker) | ||||
| 6608 | if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { | ||||
| 6609 | $file_writer_object->reset_consecutive_blank_lines(); | ||||
| 6610 | if ( $input_line !~ /^\s*$/ ) { want_blank_line() } | ||||
| 6611 | } | ||||
| 6612 | |||||
| 6613 | # handle line of code.. | ||||
| 6614 | if ( $line_type eq 'CODE' ) { | ||||
| 6615 | |||||
| 6616 | # let logger see all non-blank lines of code | ||||
| 6617 | if ( $input_line !~ /^\s*$/ ) { | ||||
| 6618 | my $output_line_number = | ||||
| 6619 | $vertical_aligner_object->get_output_line_number(); | ||||
| 6620 | black_box( $line_of_tokens, $output_line_number ); | ||||
| 6621 | } | ||||
| 6622 | print_line_of_tokens($line_of_tokens); | ||||
| 6623 | } | ||||
| 6624 | |||||
| 6625 | # handle line of non-code.. | ||||
| 6626 | else { | ||||
| 6627 | |||||
| 6628 | # set special flags | ||||
| 6629 | my $skip_line = 0; | ||||
| 6630 | my $tee_line = 0; | ||||
| 6631 | if ( $line_type =~ /^POD/ ) { | ||||
| 6632 | |||||
| 6633 | # Pod docs should have a preceding blank line. But stay | ||||
| 6634 | # out of __END__ and __DATA__ sections, because | ||||
| 6635 | # the user may be using this section for any purpose whatsoever | ||||
| 6636 | if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } | ||||
| 6637 | if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } | ||||
| 6638 | if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } | ||||
| 6639 | if ( !$skip_line | ||||
| 6640 | && $line_type eq 'POD_START' | ||||
| 6641 | && !$saw_END_or_DATA_ ) | ||||
| 6642 | { | ||||
| 6643 | want_blank_line(); | ||||
| 6644 | } | ||||
| 6645 | } | ||||
| 6646 | |||||
| 6647 | # leave the blank counters in a predictable state | ||||
| 6648 | # after __END__ or __DATA__ | ||||
| 6649 | elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { | ||||
| 6650 | $file_writer_object->reset_consecutive_blank_lines(); | ||||
| 6651 | $saw_END_or_DATA_ = 1; | ||||
| 6652 | } | ||||
| 6653 | |||||
| 6654 | # write unindented non-code line | ||||
| 6655 | if ( !$skip_line ) { | ||||
| 6656 | if ($tee_line) { $file_writer_object->tee_on() } | ||||
| 6657 | write_unindented_line($input_line); | ||||
| 6658 | if ($tee_line) { $file_writer_object->tee_off() } | ||||
| 6659 | } | ||||
| 6660 | } | ||||
| 6661 | $last_line_type = $line_type; | ||||
| 6662 | } | ||||
| 6663 | |||||
| 6664 | sub create_one_line_block { | ||||
| 6665 | $index_start_one_line_block = $_[0]; | ||||
| 6666 | $semicolons_before_block_self_destruct = $_[1]; | ||||
| 6667 | } | ||||
| 6668 | |||||
| 6669 | sub destroy_one_line_block { | ||||
| 6670 | $index_start_one_line_block = UNDEFINED_INDEX; | ||||
| 6671 | $semicolons_before_block_self_destruct = 0; | ||||
| 6672 | } | ||||
| 6673 | |||||
| 6674 | sub leading_spaces_to_go { | ||||
| 6675 | |||||
| 6676 | # return the number of indentation spaces for a token in the output stream; | ||||
| 6677 | # these were previously stored by 'set_leading_whitespace'. | ||||
| 6678 | |||||
| 6679 | my $ii = shift; | ||||
| 6680 | if ( $ii < 0 ) { $ii = 0 } | ||||
| 6681 | return get_SPACES( $leading_spaces_to_go[$ii] ); | ||||
| 6682 | |||||
| 6683 | } | ||||
| 6684 | |||||
| 6685 | sub get_SPACES { | ||||
| 6686 | |||||
| 6687 | # return the number of leading spaces associated with an indentation | ||||
| 6688 | # variable $indentation is either a constant number of spaces or an object | ||||
| 6689 | # with a get_SPACES method. | ||||
| 6690 | my $indentation = shift; | ||||
| 6691 | return ref($indentation) ? $indentation->get_SPACES() : $indentation; | ||||
| 6692 | } | ||||
| 6693 | |||||
| 6694 | sub get_RECOVERABLE_SPACES { | ||||
| 6695 | |||||
| 6696 | # return the number of spaces (+ means shift right, - means shift left) | ||||
| 6697 | # that we would like to shift a group of lines with the same indentation | ||||
| 6698 | # to get them to line up with their opening parens | ||||
| 6699 | my $indentation = shift; | ||||
| 6700 | return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; | ||||
| 6701 | } | ||||
| 6702 | |||||
| 6703 | sub get_AVAILABLE_SPACES_to_go { | ||||
| 6704 | |||||
| 6705 | my $item = $leading_spaces_to_go[ $_[0] ]; | ||||
| 6706 | |||||
| 6707 | # return the number of available leading spaces associated with an | ||||
| 6708 | # indentation variable. $indentation is either a constant number of | ||||
| 6709 | # spaces or an object with a get_AVAILABLE_SPACES method. | ||||
| 6710 | return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; | ||||
| 6711 | } | ||||
| 6712 | |||||
| 6713 | sub new_lp_indentation_item { | ||||
| 6714 | |||||
| 6715 | # this is an interface to the IndentationItem class | ||||
| 6716 | my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; | ||||
| 6717 | |||||
| 6718 | # A negative level implies not to store the item in the item_list | ||||
| 6719 | my $index = 0; | ||||
| 6720 | if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } | ||||
| 6721 | |||||
| 6722 | my $item = Perl::Tidy::IndentationItem->new( | ||||
| 6723 | $spaces, $level, | ||||
| 6724 | $ci_level, $available_spaces, | ||||
| 6725 | $index, $gnu_sequence_number, | ||||
| 6726 | $align_paren, $max_gnu_stack_index, | ||||
| 6727 | $line_start_index_to_go, | ||||
| 6728 | ); | ||||
| 6729 | |||||
| 6730 | if ( $level >= 0 ) { | ||||
| 6731 | $gnu_item_list[$max_gnu_item_index] = $item; | ||||
| 6732 | } | ||||
| 6733 | |||||
| 6734 | return $item; | ||||
| 6735 | } | ||||
| 6736 | |||||
| 6737 | sub set_leading_whitespace { | ||||
| 6738 | |||||
| 6739 | # This routine defines leading whitespace | ||||
| 6740 | # given: the level and continuation_level of a token, | ||||
| 6741 | # define: space count of leading string which would apply if it | ||||
| 6742 | # were the first token of a new line. | ||||
| 6743 | |||||
| 6744 | my ( $level_abs, $ci_level, $in_continued_quote ) = @_; | ||||
| 6745 | |||||
| 6746 | # Adjust levels if necessary to recycle whitespace: | ||||
| 6747 | # given $level_abs, the absolute level | ||||
| 6748 | # define $level, a possibly reduced level for whitespace | ||||
| 6749 | my $level = $level_abs; | ||||
| 6750 | if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { | ||||
| 6751 | if ( $level_abs < $whitespace_last_level ) { | ||||
| 6752 | pop(@whitespace_level_stack); | ||||
| 6753 | } | ||||
| 6754 | if ( !@whitespace_level_stack ) { | ||||
| 6755 | push @whitespace_level_stack, $level_abs; | ||||
| 6756 | } | ||||
| 6757 | elsif ( $level_abs > $whitespace_last_level ) { | ||||
| 6758 | $level = $whitespace_level_stack[-1] + | ||||
| 6759 | ( $level_abs - $whitespace_last_level ); | ||||
| 6760 | |||||
| 6761 | if ( | ||||
| 6762 | # 1 Try to break at a block brace | ||||
| 6763 | ( | ||||
| 6764 | $level > $rOpts_whitespace_cycle | ||||
| 6765 | && $last_nonblank_type eq '{' | ||||
| 6766 | && $last_nonblank_token eq '{' | ||||
| 6767 | ) | ||||
| 6768 | |||||
| 6769 | # 2 Then either a brace or bracket | ||||
| 6770 | || ( $level > $rOpts_whitespace_cycle + 1 | ||||
| 6771 | && $last_nonblank_token =~ /^[\{\[]$/ ) | ||||
| 6772 | |||||
| 6773 | # 3 Then a paren too | ||||
| 6774 | || $level > $rOpts_whitespace_cycle + 2 | ||||
| 6775 | ) | ||||
| 6776 | { | ||||
| 6777 | $level = 1; | ||||
| 6778 | } | ||||
| 6779 | push @whitespace_level_stack, $level; | ||||
| 6780 | } | ||||
| 6781 | $level = $whitespace_level_stack[-1]; | ||||
| 6782 | } | ||||
| 6783 | $whitespace_last_level = $level_abs; | ||||
| 6784 | |||||
| 6785 | # modify for -bli, which adds one continuation indentation for | ||||
| 6786 | # opening braces | ||||
| 6787 | if ( $rOpts_brace_left_and_indent | ||||
| 6788 | && $max_index_to_go == 0 | ||||
| 6789 | && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) | ||||
| 6790 | { | ||||
| 6791 | $ci_level++; | ||||
| 6792 | } | ||||
| 6793 | |||||
| 6794 | # patch to avoid trouble when input file has negative indentation. | ||||
| 6795 | # other logic should catch this error. | ||||
| 6796 | if ( $level < 0 ) { $level = 0 } | ||||
| 6797 | |||||
| 6798 | #------------------------------------------- | ||||
| 6799 | # handle the standard indentation scheme | ||||
| 6800 | #------------------------------------------- | ||||
| 6801 | unless ($rOpts_line_up_parentheses) { | ||||
| 6802 | my $space_count = | ||||
| 6803 | $ci_level * $rOpts_continuation_indentation + | ||||
| 6804 | $level * $rOpts_indent_columns; | ||||
| 6805 | my $ci_spaces = | ||||
| 6806 | ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; | ||||
| 6807 | |||||
| 6808 | if ($in_continued_quote) { | ||||
| 6809 | $space_count = 0; | ||||
| 6810 | $ci_spaces = 0; | ||||
| 6811 | } | ||||
| 6812 | $leading_spaces_to_go[$max_index_to_go] = $space_count; | ||||
| 6813 | $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; | ||||
| 6814 | return; | ||||
| 6815 | } | ||||
| 6816 | |||||
| 6817 | #------------------------------------------------------------- | ||||
| 6818 | # handle case of -lp indentation.. | ||||
| 6819 | #------------------------------------------------------------- | ||||
| 6820 | |||||
| 6821 | # The continued_quote flag means that this is the first token of a | ||||
| 6822 | # line, and it is the continuation of some kind of multi-line quote | ||||
| 6823 | # or pattern. It requires special treatment because it must have no | ||||
| 6824 | # added leading whitespace. So we create a special indentation item | ||||
| 6825 | # which is not in the stack. | ||||
| 6826 | if ($in_continued_quote) { | ||||
| 6827 | my $space_count = 0; | ||||
| 6828 | my $available_space = 0; | ||||
| 6829 | $level = -1; # flag to prevent storing in item_list | ||||
| 6830 | $leading_spaces_to_go[$max_index_to_go] = | ||||
| 6831 | $reduced_spaces_to_go[$max_index_to_go] = | ||||
| 6832 | new_lp_indentation_item( $space_count, $level, $ci_level, | ||||
| 6833 | $available_space, 0 ); | ||||
| 6834 | return; | ||||
| 6835 | } | ||||
| 6836 | |||||
| 6837 | # get the top state from the stack | ||||
| 6838 | my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | ||||
| 6839 | my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); | ||||
| 6840 | my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); | ||||
| 6841 | |||||
| 6842 | my $type = $types_to_go[$max_index_to_go]; | ||||
| 6843 | my $token = $tokens_to_go[$max_index_to_go]; | ||||
| 6844 | my $total_depth = $nesting_depth_to_go[$max_index_to_go]; | ||||
| 6845 | |||||
| 6846 | if ( $type eq '{' || $type eq '(' ) { | ||||
| 6847 | |||||
| 6848 | $gnu_comma_count{ $total_depth + 1 } = 0; | ||||
| 6849 | $gnu_arrow_count{ $total_depth + 1 } = 0; | ||||
| 6850 | |||||
| 6851 | # If we come to an opening token after an '=' token of some type, | ||||
| 6852 | # see if it would be helpful to 'break' after the '=' to save space | ||||
| 6853 | my $last_equals = $last_gnu_equals{$total_depth}; | ||||
| 6854 | if ( $last_equals && $last_equals > $line_start_index_to_go ) { | ||||
| 6855 | |||||
| 6856 | # find the position if we break at the '=' | ||||
| 6857 | my $i_test = $last_equals; | ||||
| 6858 | if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } | ||||
| 6859 | |||||
| 6860 | # TESTING | ||||
| 6861 | ##my $too_close = ($i_test==$max_index_to_go-1); | ||||
| 6862 | |||||
| 6863 | my $test_position = total_line_length( $i_test, $max_index_to_go ); | ||||
| 6864 | my $mll = maximum_line_length($i_test); | ||||
| 6865 | |||||
| 6866 | if ( | ||||
| 6867 | |||||
| 6868 | # the equals is not just before an open paren (testing) | ||||
| 6869 | ##!$too_close && | ||||
| 6870 | |||||
| 6871 | # if we are beyond the midpoint | ||||
| 6872 | $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 | ||||
| 6873 | |||||
| 6874 | # or we are beyond the 1/4 point and there was an old | ||||
| 6875 | # break at the equals | ||||
| 6876 | || ( | ||||
| 6877 | $gnu_position_predictor > | ||||
| 6878 | $mll - $rOpts_maximum_line_length * 3 / 4 | ||||
| 6879 | && ( | ||||
| 6880 | $old_breakpoint_to_go[$last_equals] | ||||
| 6881 | || ( $last_equals > 0 | ||||
| 6882 | && $old_breakpoint_to_go[ $last_equals - 1 ] ) | ||||
| 6883 | || ( $last_equals > 1 | ||||
| 6884 | && $types_to_go[ $last_equals - 1 ] eq 'b' | ||||
| 6885 | && $old_breakpoint_to_go[ $last_equals - 2 ] ) | ||||
| 6886 | ) | ||||
| 6887 | ) | ||||
| 6888 | ) | ||||
| 6889 | { | ||||
| 6890 | |||||
| 6891 | # then make the switch -- note that we do not set a real | ||||
| 6892 | # breakpoint here because we may not really need one; sub | ||||
| 6893 | # scan_list will do that if necessary | ||||
| 6894 | $line_start_index_to_go = $i_test + 1; | ||||
| 6895 | $gnu_position_predictor = $test_position; | ||||
| 6896 | } | ||||
| 6897 | } | ||||
| 6898 | } | ||||
| 6899 | |||||
| 6900 | my $halfway = | ||||
| 6901 | maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; | ||||
| 6902 | |||||
| 6903 | # Check for decreasing depth .. | ||||
| 6904 | # Note that one token may have both decreasing and then increasing | ||||
| 6905 | # depth. For example, (level, ci) can go from (1,1) to (2,0). So, | ||||
| 6906 | # in this example we would first go back to (1,0) then up to (2,0) | ||||
| 6907 | # in a single call. | ||||
| 6908 | if ( $level < $current_level || $ci_level < $current_ci_level ) { | ||||
| 6909 | |||||
| 6910 | # loop to find the first entry at or completely below this level | ||||
| 6911 | my ( $lev, $ci_lev ); | ||||
| 6912 | while (1) { | ||||
| 6913 | if ($max_gnu_stack_index) { | ||||
| 6914 | |||||
| 6915 | # save index of token which closes this level | ||||
| 6916 | $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); | ||||
| 6917 | |||||
| 6918 | # Undo any extra indentation if we saw no commas | ||||
| 6919 | my $available_spaces = | ||||
| 6920 | $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); | ||||
| 6921 | |||||
| 6922 | my $comma_count = 0; | ||||
| 6923 | my $arrow_count = 0; | ||||
| 6924 | if ( $type eq '}' || $type eq ')' ) { | ||||
| 6925 | $comma_count = $gnu_comma_count{$total_depth}; | ||||
| 6926 | $arrow_count = $gnu_arrow_count{$total_depth}; | ||||
| 6927 | $comma_count = 0 unless $comma_count; | ||||
| 6928 | $arrow_count = 0 unless $arrow_count; | ||||
| 6929 | } | ||||
| 6930 | $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); | ||||
| 6931 | $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); | ||||
| 6932 | |||||
| 6933 | if ( $available_spaces > 0 ) { | ||||
| 6934 | |||||
| 6935 | if ( $comma_count <= 0 || $arrow_count > 0 ) { | ||||
| 6936 | |||||
| 6937 | my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); | ||||
| 6938 | my $seqno = | ||||
| 6939 | $gnu_stack[$max_gnu_stack_index] | ||||
| 6940 | ->get_SEQUENCE_NUMBER(); | ||||
| 6941 | |||||
| 6942 | # Be sure this item was created in this batch. This | ||||
| 6943 | # should be true because we delete any available | ||||
| 6944 | # space from open items at the end of each batch. | ||||
| 6945 | if ( $gnu_sequence_number != $seqno | ||||
| 6946 | || $i > $max_gnu_item_index ) | ||||
| 6947 | { | ||||
| 6948 | warning( | ||||
| 6949 | "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" | ||||
| 6950 | ); | ||||
| 6951 | report_definite_bug(); | ||||
| 6952 | } | ||||
| 6953 | |||||
| 6954 | else { | ||||
| 6955 | if ( $arrow_count == 0 ) { | ||||
| 6956 | $gnu_item_list[$i] | ||||
| 6957 | ->permanently_decrease_AVAILABLE_SPACES( | ||||
| 6958 | $available_spaces); | ||||
| 6959 | } | ||||
| 6960 | else { | ||||
| 6961 | $gnu_item_list[$i] | ||||
| 6962 | ->tentatively_decrease_AVAILABLE_SPACES( | ||||
| 6963 | $available_spaces); | ||||
| 6964 | } | ||||
| 6965 | |||||
| 6966 | my $j; | ||||
| 6967 | for ( | ||||
| 6968 | $j = $i + 1 ; | ||||
| 6969 | $j <= $max_gnu_item_index ; | ||||
| 6970 | $j++ | ||||
| 6971 | ) | ||||
| 6972 | { | ||||
| 6973 | $gnu_item_list[$j] | ||||
| 6974 | ->decrease_SPACES($available_spaces); | ||||
| 6975 | } | ||||
| 6976 | } | ||||
| 6977 | } | ||||
| 6978 | } | ||||
| 6979 | |||||
| 6980 | # go down one level | ||||
| 6981 | --$max_gnu_stack_index; | ||||
| 6982 | $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); | ||||
| 6983 | $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); | ||||
| 6984 | |||||
| 6985 | # stop when we reach a level at or below the current level | ||||
| 6986 | if ( $lev <= $level && $ci_lev <= $ci_level ) { | ||||
| 6987 | $space_count = | ||||
| 6988 | $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | ||||
| 6989 | $current_level = $lev; | ||||
| 6990 | $current_ci_level = $ci_lev; | ||||
| 6991 | last; | ||||
| 6992 | } | ||||
| 6993 | } | ||||
| 6994 | |||||
| 6995 | # reached bottom of stack .. should never happen because | ||||
| 6996 | # only negative levels can get here, and $level was forced | ||||
| 6997 | # to be positive above. | ||||
| 6998 | else { | ||||
| 6999 | warning( | ||||
| 7000 | "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" | ||||
| 7001 | ); | ||||
| 7002 | report_definite_bug(); | ||||
| 7003 | last; | ||||
| 7004 | } | ||||
| 7005 | } | ||||
| 7006 | } | ||||
| 7007 | |||||
| 7008 | # handle increasing depth | ||||
| 7009 | if ( $level > $current_level || $ci_level > $current_ci_level ) { | ||||
| 7010 | |||||
| 7011 | # Compute the standard incremental whitespace. This will be | ||||
| 7012 | # the minimum incremental whitespace that will be used. This | ||||
| 7013 | # choice results in a smooth transition between the gnu-style | ||||
| 7014 | # and the standard style. | ||||
| 7015 | my $standard_increment = | ||||
| 7016 | ( $level - $current_level ) * $rOpts_indent_columns + | ||||
| 7017 | ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; | ||||
| 7018 | |||||
| 7019 | # Now we have to define how much extra incremental space | ||||
| 7020 | # ("$available_space") we want. This extra space will be | ||||
| 7021 | # reduced as necessary when long lines are encountered or when | ||||
| 7022 | # it becomes clear that we do not have a good list. | ||||
| 7023 | my $available_space = 0; | ||||
| 7024 | my $align_paren = 0; | ||||
| 7025 | my $excess = 0; | ||||
| 7026 | |||||
| 7027 | # initialization on empty stack.. | ||||
| 7028 | if ( $max_gnu_stack_index == 0 ) { | ||||
| 7029 | $space_count = $level * $rOpts_indent_columns; | ||||
| 7030 | } | ||||
| 7031 | |||||
| 7032 | # if this is a BLOCK, add the standard increment | ||||
| 7033 | elsif ($last_nonblank_block_type) { | ||||
| 7034 | $space_count += $standard_increment; | ||||
| 7035 | } | ||||
| 7036 | |||||
| 7037 | # if last nonblank token was not structural indentation, | ||||
| 7038 | # just use standard increment | ||||
| 7039 | elsif ( $last_nonblank_type ne '{' ) { | ||||
| 7040 | $space_count += $standard_increment; | ||||
| 7041 | } | ||||
| 7042 | |||||
| 7043 | # otherwise use the space to the first non-blank level change token | ||||
| 7044 | else { | ||||
| 7045 | |||||
| 7046 | $space_count = $gnu_position_predictor; | ||||
| 7047 | |||||
| 7048 | my $min_gnu_indentation = | ||||
| 7049 | $gnu_stack[$max_gnu_stack_index]->get_SPACES(); | ||||
| 7050 | |||||
| 7051 | $available_space = $space_count - $min_gnu_indentation; | ||||
| 7052 | if ( $available_space >= $standard_increment ) { | ||||
| 7053 | $min_gnu_indentation += $standard_increment; | ||||
| 7054 | } | ||||
| 7055 | elsif ( $available_space > 1 ) { | ||||
| 7056 | $min_gnu_indentation += $available_space + 1; | ||||
| 7057 | } | ||||
| 7058 | elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { | ||||
| 7059 | if ( ( $tightness{$last_nonblank_token} < 2 ) ) { | ||||
| 7060 | $min_gnu_indentation += 2; | ||||
| 7061 | } | ||||
| 7062 | else { | ||||
| 7063 | $min_gnu_indentation += 1; | ||||
| 7064 | } | ||||
| 7065 | } | ||||
| 7066 | else { | ||||
| 7067 | $min_gnu_indentation += $standard_increment; | ||||
| 7068 | } | ||||
| 7069 | $available_space = $space_count - $min_gnu_indentation; | ||||
| 7070 | |||||
| 7071 | if ( $available_space < 0 ) { | ||||
| 7072 | $space_count = $min_gnu_indentation; | ||||
| 7073 | $available_space = 0; | ||||
| 7074 | } | ||||
| 7075 | $align_paren = 1; | ||||
| 7076 | } | ||||
| 7077 | |||||
| 7078 | # update state, but not on a blank token | ||||
| 7079 | if ( $types_to_go[$max_index_to_go] ne 'b' ) { | ||||
| 7080 | |||||
| 7081 | $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); | ||||
| 7082 | |||||
| 7083 | ++$max_gnu_stack_index; | ||||
| 7084 | $gnu_stack[$max_gnu_stack_index] = | ||||
| 7085 | new_lp_indentation_item( $space_count, $level, $ci_level, | ||||
| 7086 | $available_space, $align_paren ); | ||||
| 7087 | |||||
| 7088 | # If the opening paren is beyond the half-line length, then | ||||
| 7089 | # we will use the minimum (standard) indentation. This will | ||||
| 7090 | # help avoid problems associated with running out of space | ||||
| 7091 | # near the end of a line. As a result, in deeply nested | ||||
| 7092 | # lists, there will be some indentations which are limited | ||||
| 7093 | # to this minimum standard indentation. But the most deeply | ||||
| 7094 | # nested container will still probably be able to shift its | ||||
| 7095 | # parameters to the right for proper alignment, so in most | ||||
| 7096 | # cases this will not be noticeable. | ||||
| 7097 | if ( $available_space > 0 && $space_count > $halfway ) { | ||||
| 7098 | $gnu_stack[$max_gnu_stack_index] | ||||
| 7099 | ->tentatively_decrease_AVAILABLE_SPACES($available_space); | ||||
| 7100 | } | ||||
| 7101 | } | ||||
| 7102 | } | ||||
| 7103 | |||||
| 7104 | # Count commas and look for non-list characters. Once we see a | ||||
| 7105 | # non-list character, we give up and don't look for any more commas. | ||||
| 7106 | if ( $type eq '=>' ) { | ||||
| 7107 | $gnu_arrow_count{$total_depth}++; | ||||
| 7108 | |||||
| 7109 | # tentatively treating '=>' like '=' for estimating breaks | ||||
| 7110 | # TODO: this could use some experimentation | ||||
| 7111 | $last_gnu_equals{$total_depth} = $max_index_to_go; | ||||
| 7112 | } | ||||
| 7113 | |||||
| 7114 | elsif ( $type eq ',' ) { | ||||
| 7115 | $gnu_comma_count{$total_depth}++; | ||||
| 7116 | } | ||||
| 7117 | |||||
| 7118 | elsif ( $is_assignment{$type} ) { | ||||
| 7119 | $last_gnu_equals{$total_depth} = $max_index_to_go; | ||||
| 7120 | } | ||||
| 7121 | |||||
| 7122 | # this token might start a new line | ||||
| 7123 | # if this is a non-blank.. | ||||
| 7124 | if ( $type ne 'b' ) { | ||||
| 7125 | |||||
| 7126 | # and if .. | ||||
| 7127 | if ( | ||||
| 7128 | |||||
| 7129 | # this is the first nonblank token of the line | ||||
| 7130 | $max_index_to_go == 1 && $types_to_go[0] eq 'b' | ||||
| 7131 | |||||
| 7132 | # or previous character was one of these: | ||||
| 7133 | || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ | ||||
| 7134 | |||||
| 7135 | # or previous character was opening and this does not close it | ||||
| 7136 | || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) | ||||
| 7137 | || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) | ||||
| 7138 | |||||
| 7139 | # or this token is one of these: | ||||
| 7140 | || $type =~ /^([\.]|\|\||\&\&)$/ | ||||
| 7141 | |||||
| 7142 | # or this is a closing structure | ||||
| 7143 | || ( $last_nonblank_type_to_go eq '}' | ||||
| 7144 | && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) | ||||
| 7145 | |||||
| 7146 | # or previous token was keyword 'return' | ||||
| 7147 | || ( $last_nonblank_type_to_go eq 'k' | ||||
| 7148 | && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) | ||||
| 7149 | |||||
| 7150 | # or starting a new line at certain keywords is fine | ||||
| 7151 | || ( $type eq 'k' | ||||
| 7152 | && $is_if_unless_and_or_last_next_redo_return{$token} ) | ||||
| 7153 | |||||
| 7154 | # or this is after an assignment after a closing structure | ||||
| 7155 | || ( | ||||
| 7156 | $is_assignment{$last_nonblank_type_to_go} | ||||
| 7157 | && ( | ||||
| 7158 | $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ | ||||
| 7159 | |||||
| 7160 | # and it is significantly to the right | ||||
| 7161 | || $gnu_position_predictor > $halfway | ||||
| 7162 | ) | ||||
| 7163 | ) | ||||
| 7164 | ) | ||||
| 7165 | { | ||||
| 7166 | check_for_long_gnu_style_lines(); | ||||
| 7167 | $line_start_index_to_go = $max_index_to_go; | ||||
| 7168 | |||||
| 7169 | # back up 1 token if we want to break before that type | ||||
| 7170 | # otherwise, we may strand tokens like '?' or ':' on a line | ||||
| 7171 | if ( $line_start_index_to_go > 0 ) { | ||||
| 7172 | if ( $last_nonblank_type_to_go eq 'k' ) { | ||||
| 7173 | |||||
| 7174 | if ( $want_break_before{$last_nonblank_token_to_go} ) { | ||||
| 7175 | $line_start_index_to_go--; | ||||
| 7176 | } | ||||
| 7177 | } | ||||
| 7178 | elsif ( $want_break_before{$last_nonblank_type_to_go} ) { | ||||
| 7179 | $line_start_index_to_go--; | ||||
| 7180 | } | ||||
| 7181 | } | ||||
| 7182 | } | ||||
| 7183 | } | ||||
| 7184 | |||||
| 7185 | # remember the predicted position of this token on the output line | ||||
| 7186 | if ( $max_index_to_go > $line_start_index_to_go ) { | ||||
| 7187 | $gnu_position_predictor = | ||||
| 7188 | total_line_length( $line_start_index_to_go, $max_index_to_go ); | ||||
| 7189 | } | ||||
| 7190 | else { | ||||
| 7191 | $gnu_position_predictor = | ||||
| 7192 | $space_count + $token_lengths_to_go[$max_index_to_go]; | ||||
| 7193 | } | ||||
| 7194 | |||||
| 7195 | # store the indentation object for this token | ||||
| 7196 | # this allows us to manipulate the leading whitespace | ||||
| 7197 | # (in case we have to reduce indentation to fit a line) without | ||||
| 7198 | # having to change any token values | ||||
| 7199 | $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; | ||||
| 7200 | $reduced_spaces_to_go[$max_index_to_go] = | ||||
| 7201 | ( $max_gnu_stack_index > 0 && $ci_level ) | ||||
| 7202 | ? $gnu_stack[ $max_gnu_stack_index - 1 ] | ||||
| 7203 | : $gnu_stack[$max_gnu_stack_index]; | ||||
| 7204 | return; | ||||
| 7205 | } | ||||
| 7206 | |||||
| 7207 | sub check_for_long_gnu_style_lines { | ||||
| 7208 | |||||
| 7209 | # look at the current estimated maximum line length, and | ||||
| 7210 | # remove some whitespace if it exceeds the desired maximum | ||||
| 7211 | |||||
| 7212 | # this is only for the '-lp' style | ||||
| 7213 | return unless ($rOpts_line_up_parentheses); | ||||
| 7214 | |||||
| 7215 | # nothing can be done if no stack items defined for this line | ||||
| 7216 | return if ( $max_gnu_item_index == UNDEFINED_INDEX ); | ||||
| 7217 | |||||
| 7218 | # see if we have exceeded the maximum desired line length | ||||
| 7219 | # keep 2 extra free because they are needed in some cases | ||||
| 7220 | # (result of trial-and-error testing) | ||||
| 7221 | my $spaces_needed = | ||||
| 7222 | $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; | ||||
| 7223 | |||||
| 7224 | return if ( $spaces_needed <= 0 ); | ||||
| 7225 | |||||
| 7226 | # We are over the limit, so try to remove a requested number of | ||||
| 7227 | # spaces from leading whitespace. We are only allowed to remove | ||||
| 7228 | # from whitespace items created on this batch, since others have | ||||
| 7229 | # already been used and cannot be undone. | ||||
| 7230 | my @candidates = (); | ||||
| 7231 | my $i; | ||||
| 7232 | |||||
| 7233 | # loop over all whitespace items created for the current batch | ||||
| 7234 | for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { | ||||
| 7235 | my $item = $gnu_item_list[$i]; | ||||
| 7236 | |||||
| 7237 | # item must still be open to be a candidate (otherwise it | ||||
| 7238 | # cannot influence the current token) | ||||
| 7239 | next if ( $item->get_CLOSED() >= 0 ); | ||||
| 7240 | |||||
| 7241 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
| 7242 | |||||
| 7243 | if ( $available_spaces > 0 ) { | ||||
| 7244 | push( @candidates, [ $i, $available_spaces ] ); | ||||
| 7245 | } | ||||
| 7246 | } | ||||
| 7247 | |||||
| 7248 | return unless (@candidates); | ||||
| 7249 | |||||
| 7250 | # sort by available whitespace so that we can remove whitespace | ||||
| 7251 | # from the maximum available first | ||||
| 7252 | @candidates = sort { $b->[1] <=> $a->[1] } @candidates; | ||||
| 7253 | |||||
| 7254 | # keep removing whitespace until we are done or have no more | ||||
| 7255 | my $candidate; | ||||
| 7256 | foreach $candidate (@candidates) { | ||||
| 7257 | my ( $i, $available_spaces ) = @{$candidate}; | ||||
| 7258 | my $deleted_spaces = | ||||
| 7259 | ( $available_spaces > $spaces_needed ) | ||||
| 7260 | ? $spaces_needed | ||||
| 7261 | : $available_spaces; | ||||
| 7262 | |||||
| 7263 | # remove the incremental space from this item | ||||
| 7264 | $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); | ||||
| 7265 | |||||
| 7266 | my $i_debug = $i; | ||||
| 7267 | |||||
| 7268 | # update the leading whitespace of this item and all items | ||||
| 7269 | # that came after it | ||||
| 7270 | for ( ; $i <= $max_gnu_item_index ; $i++ ) { | ||||
| 7271 | |||||
| 7272 | my $old_spaces = $gnu_item_list[$i]->get_SPACES(); | ||||
| 7273 | if ( $old_spaces >= $deleted_spaces ) { | ||||
| 7274 | $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); | ||||
| 7275 | } | ||||
| 7276 | |||||
| 7277 | # shouldn't happen except for code bug: | ||||
| 7278 | else { | ||||
| 7279 | my $level = $gnu_item_list[$i_debug]->get_LEVEL(); | ||||
| 7280 | my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); | ||||
| 7281 | my $old_level = $gnu_item_list[$i]->get_LEVEL(); | ||||
| 7282 | my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); | ||||
| 7283 | warning( | ||||
| 7284 | "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" | ||||
| 7285 | ); | ||||
| 7286 | report_definite_bug(); | ||||
| 7287 | } | ||||
| 7288 | } | ||||
| 7289 | $gnu_position_predictor -= $deleted_spaces; | ||||
| 7290 | $spaces_needed -= $deleted_spaces; | ||||
| 7291 | last unless ( $spaces_needed > 0 ); | ||||
| 7292 | } | ||||
| 7293 | } | ||||
| 7294 | |||||
| 7295 | sub finish_lp_batch { | ||||
| 7296 | |||||
| 7297 | # This routine is called once after each output stream batch is | ||||
| 7298 | # finished to undo indentation for all incomplete -lp | ||||
| 7299 | # indentation levels. It is too risky to leave a level open, | ||||
| 7300 | # because then we can't backtrack in case of a long line to follow. | ||||
| 7301 | # This means that comments and blank lines will disrupt this | ||||
| 7302 | # indentation style. But the vertical aligner may be able to | ||||
| 7303 | # get the space back if there are side comments. | ||||
| 7304 | |||||
| 7305 | # this is only for the 'lp' style | ||||
| 7306 | return unless ($rOpts_line_up_parentheses); | ||||
| 7307 | |||||
| 7308 | # nothing can be done if no stack items defined for this line | ||||
| 7309 | return if ( $max_gnu_item_index == UNDEFINED_INDEX ); | ||||
| 7310 | |||||
| 7311 | # loop over all whitespace items created for the current batch | ||||
| 7312 | my $i; | ||||
| 7313 | for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { | ||||
| 7314 | my $item = $gnu_item_list[$i]; | ||||
| 7315 | |||||
| 7316 | # only look for open items | ||||
| 7317 | next if ( $item->get_CLOSED() >= 0 ); | ||||
| 7318 | |||||
| 7319 | # Tentatively remove all of the available space | ||||
| 7320 | # (The vertical aligner will try to get it back later) | ||||
| 7321 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
| 7322 | if ( $available_spaces > 0 ) { | ||||
| 7323 | |||||
| 7324 | # delete incremental space for this item | ||||
| 7325 | $gnu_item_list[$i] | ||||
| 7326 | ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); | ||||
| 7327 | |||||
| 7328 | # Reduce the total indentation space of any nodes that follow | ||||
| 7329 | # Note that any such nodes must necessarily be dependents | ||||
| 7330 | # of this node. | ||||
| 7331 | foreach ( $i + 1 .. $max_gnu_item_index ) { | ||||
| 7332 | $gnu_item_list[$_]->decrease_SPACES($available_spaces); | ||||
| 7333 | } | ||||
| 7334 | } | ||||
| 7335 | } | ||||
| 7336 | return; | ||||
| 7337 | } | ||||
| 7338 | |||||
| 7339 | sub reduce_lp_indentation { | ||||
| 7340 | |||||
| 7341 | # reduce the leading whitespace at token $i if possible by $spaces_needed | ||||
| 7342 | # (a large value of $spaces_needed will remove all excess space) | ||||
| 7343 | # NOTE: to be called from scan_list only for a sequence of tokens | ||||
| 7344 | # contained between opening and closing parens/braces/brackets | ||||
| 7345 | |||||
| 7346 | my ( $i, $spaces_wanted ) = @_; | ||||
| 7347 | my $deleted_spaces = 0; | ||||
| 7348 | |||||
| 7349 | my $item = $leading_spaces_to_go[$i]; | ||||
| 7350 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
| 7351 | |||||
| 7352 | if ( | ||||
| 7353 | $available_spaces > 0 | ||||
| 7354 | && ( ( $spaces_wanted <= $available_spaces ) | ||||
| 7355 | || !$item->get_HAVE_CHILD() ) | ||||
| 7356 | ) | ||||
| 7357 | { | ||||
| 7358 | |||||
| 7359 | # we'll remove these spaces, but mark them as recoverable | ||||
| 7360 | $deleted_spaces = | ||||
| 7361 | $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); | ||||
| 7362 | } | ||||
| 7363 | |||||
| 7364 | return $deleted_spaces; | ||||
| 7365 | } | ||||
| 7366 | |||||
| 7367 | sub token_sequence_length { | ||||
| 7368 | |||||
| 7369 | # return length of tokens ($ibeg .. $iend) including $ibeg & $iend | ||||
| 7370 | # returns 0 if $ibeg > $iend (shouldn't happen) | ||||
| 7371 | my ( $ibeg, $iend ) = @_; | ||||
| 7372 | return 0 if ( $iend < 0 || $ibeg > $iend ); | ||||
| 7373 | return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); | ||||
| 7374 | return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; | ||||
| 7375 | } | ||||
| 7376 | |||||
| 7377 | sub total_line_length { | ||||
| 7378 | |||||
| 7379 | # return length of a line of tokens ($ibeg .. $iend) | ||||
| 7380 | my ( $ibeg, $iend ) = @_; | ||||
| 7381 | return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); | ||||
| 7382 | } | ||||
| 7383 | |||||
| 7384 | sub maximum_line_length_for_level { | ||||
| 7385 | |||||
| 7386 | # return maximum line length for line starting with a given level | ||||
| 7387 | my $maximum_line_length = $rOpts_maximum_line_length; | ||||
| 7388 | |||||
| 7389 | # Modify if -vmll option is selected | ||||
| 7390 | if ($rOpts_variable_maximum_line_length) { | ||||
| 7391 | my $level = shift; | ||||
| 7392 | if ( $level < 0 ) { $level = 0 } | ||||
| 7393 | $maximum_line_length += $level * $rOpts_indent_columns; | ||||
| 7394 | } | ||||
| 7395 | return $maximum_line_length; | ||||
| 7396 | } | ||||
| 7397 | |||||
| 7398 | sub maximum_line_length { | ||||
| 7399 | |||||
| 7400 | # return maximum line length for line starting with the token at given index | ||||
| 7401 | return maximum_line_length_for_level( $levels_to_go[ $_[0] ] ); | ||||
| 7402 | |||||
| 7403 | } | ||||
| 7404 | |||||
| 7405 | sub excess_line_length { | ||||
| 7406 | |||||
| 7407 | # return number of characters by which a line of tokens ($ibeg..$iend) | ||||
| 7408 | # exceeds the allowable line length. | ||||
| 7409 | my ( $ibeg, $iend ) = @_; | ||||
| 7410 | return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg); | ||||
| 7411 | } | ||||
| 7412 | |||||
| 7413 | sub finish_formatting { | ||||
| 7414 | |||||
| 7415 | # flush buffer and write any informative messages | ||||
| 7416 | my $self = shift; | ||||
| 7417 | |||||
| 7418 | flush(); | ||||
| 7419 | $file_writer_object->decrement_output_line_number() | ||||
| 7420 | ; # fix up line number since it was incremented | ||||
| 7421 | we_are_at_the_last_line(); | ||||
| 7422 | if ( $added_semicolon_count > 0 ) { | ||||
| 7423 | my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; | ||||
| 7424 | my $what = | ||||
| 7425 | ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; | ||||
| 7426 | write_logfile_entry("$added_semicolon_count $what added:\n"); | ||||
| 7427 | write_logfile_entry( | ||||
| 7428 | " $first at input line $first_added_semicolon_at\n"); | ||||
| 7429 | |||||
| 7430 | if ( $added_semicolon_count > 1 ) { | ||||
| 7431 | write_logfile_entry( | ||||
| 7432 | " Last at input line $last_added_semicolon_at\n"); | ||||
| 7433 | } | ||||
| 7434 | write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); | ||||
| 7435 | write_logfile_entry("\n"); | ||||
| 7436 | } | ||||
| 7437 | |||||
| 7438 | if ( $deleted_semicolon_count > 0 ) { | ||||
| 7439 | my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; | ||||
| 7440 | my $what = | ||||
| 7441 | ( $deleted_semicolon_count > 1 ) | ||||
| 7442 | ? "semicolons were" | ||||
| 7443 | : "semicolon was"; | ||||
| 7444 | write_logfile_entry( | ||||
| 7445 | "$deleted_semicolon_count unnecessary $what deleted:\n"); | ||||
| 7446 | write_logfile_entry( | ||||
| 7447 | " $first at input line $first_deleted_semicolon_at\n"); | ||||
| 7448 | |||||
| 7449 | if ( $deleted_semicolon_count > 1 ) { | ||||
| 7450 | write_logfile_entry( | ||||
| 7451 | " Last at input line $last_deleted_semicolon_at\n"); | ||||
| 7452 | } | ||||
| 7453 | write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); | ||||
| 7454 | write_logfile_entry("\n"); | ||||
| 7455 | } | ||||
| 7456 | |||||
| 7457 | if ( $embedded_tab_count > 0 ) { | ||||
| 7458 | my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; | ||||
| 7459 | my $what = | ||||
| 7460 | ( $embedded_tab_count > 1 ) | ||||
| 7461 | ? "quotes or patterns" | ||||
| 7462 | : "quote or pattern"; | ||||
| 7463 | write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); | ||||
| 7464 | write_logfile_entry( | ||||
| 7465 | "This means the display of this script could vary with device or software\n" | ||||
| 7466 | ); | ||||
| 7467 | write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); | ||||
| 7468 | |||||
| 7469 | if ( $embedded_tab_count > 1 ) { | ||||
| 7470 | write_logfile_entry( | ||||
| 7471 | " Last at input line $last_embedded_tab_at\n"); | ||||
| 7472 | } | ||||
| 7473 | write_logfile_entry("\n"); | ||||
| 7474 | } | ||||
| 7475 | |||||
| 7476 | if ($first_tabbing_disagreement) { | ||||
| 7477 | write_logfile_entry( | ||||
| 7478 | "First indentation disagreement seen at input line $first_tabbing_disagreement\n" | ||||
| 7479 | ); | ||||
| 7480 | } | ||||
| 7481 | |||||
| 7482 | if ($in_tabbing_disagreement) { | ||||
| 7483 | write_logfile_entry( | ||||
| 7484 | "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" | ||||
| 7485 | ); | ||||
| 7486 | } | ||||
| 7487 | else { | ||||
| 7488 | |||||
| 7489 | if ($last_tabbing_disagreement) { | ||||
| 7490 | |||||
| 7491 | write_logfile_entry( | ||||
| 7492 | "Last indentation disagreement seen at input line $last_tabbing_disagreement\n" | ||||
| 7493 | ); | ||||
| 7494 | } | ||||
| 7495 | else { | ||||
| 7496 | write_logfile_entry("No indentation disagreement seen\n"); | ||||
| 7497 | } | ||||
| 7498 | } | ||||
| 7499 | if ($first_tabbing_disagreement) { | ||||
| 7500 | write_logfile_entry( | ||||
| 7501 | "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" | ||||
| 7502 | ); | ||||
| 7503 | } | ||||
| 7504 | write_logfile_entry("\n"); | ||||
| 7505 | |||||
| 7506 | $vertical_aligner_object->report_anything_unusual(); | ||||
| 7507 | |||||
| 7508 | $file_writer_object->report_line_length_errors(); | ||||
| 7509 | } | ||||
| 7510 | |||||
| 7511 | sub check_options { | ||||
| 7512 | |||||
| 7513 | # This routine is called to check the Opts hash after it is defined | ||||
| 7514 | |||||
| 7515 | ($rOpts) = @_; | ||||
| 7516 | |||||
| 7517 | make_static_block_comment_pattern(); | ||||
| 7518 | make_static_side_comment_pattern(); | ||||
| 7519 | make_closing_side_comment_prefix(); | ||||
| 7520 | make_closing_side_comment_list_pattern(); | ||||
| 7521 | $format_skipping_pattern_begin = | ||||
| 7522 | make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); | ||||
| 7523 | $format_skipping_pattern_end = | ||||
| 7524 | make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); | ||||
| 7525 | |||||
| 7526 | # If closing side comments ARE selected, then we can safely | ||||
| 7527 | # delete old closing side comments unless closing side comment | ||||
| 7528 | # warnings are requested. This is a good idea because it will | ||||
| 7529 | # eliminate any old csc's which fall below the line count threshold. | ||||
| 7530 | # We cannot do this if warnings are turned on, though, because we | ||||
| 7531 | # might delete some text which has been added. So that must | ||||
| 7532 | # be handled when comments are created. | ||||
| 7533 | if ( $rOpts->{'closing-side-comments'} ) { | ||||
| 7534 | if ( !$rOpts->{'closing-side-comment-warnings'} ) { | ||||
| 7535 | $rOpts->{'delete-closing-side-comments'} = 1; | ||||
| 7536 | } | ||||
| 7537 | } | ||||
| 7538 | |||||
| 7539 | # If closing side comments ARE NOT selected, but warnings ARE | ||||
| 7540 | # selected and we ARE DELETING csc's, then we will pretend to be | ||||
| 7541 | # adding with a huge interval. This will force the comments to be | ||||
| 7542 | # generated for comparison with the old comments, but not added. | ||||
| 7543 | elsif ( $rOpts->{'closing-side-comment-warnings'} ) { | ||||
| 7544 | if ( $rOpts->{'delete-closing-side-comments'} ) { | ||||
| 7545 | $rOpts->{'delete-closing-side-comments'} = 0; | ||||
| 7546 | $rOpts->{'closing-side-comments'} = 1; | ||||
| 7547 | $rOpts->{'closing-side-comment-interval'} = 100000000; | ||||
| 7548 | } | ||||
| 7549 | } | ||||
| 7550 | |||||
| 7551 | make_bli_pattern(); | ||||
| 7552 | make_block_brace_vertical_tightness_pattern(); | ||||
| 7553 | |||||
| 7554 | if ( $rOpts->{'line-up-parentheses'} ) { | ||||
| 7555 | |||||
| 7556 | if ( $rOpts->{'indent-only'} | ||||
| 7557 | || !$rOpts->{'add-newlines'} | ||||
| 7558 | || !$rOpts->{'delete-old-newlines'} ) | ||||
| 7559 | { | ||||
| 7560 | Perl::Tidy::Warn <<EOM; | ||||
| 7561 | ----------------------------------------------------------------------- | ||||
| 7562 | Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp | ||||
| 7563 | |||||
| 7564 | The -lp indentation logic requires that perltidy be able to coordinate | ||||
| 7565 | arbitrarily large numbers of line breakpoints. This isn't possible | ||||
| 7566 | with these flags. Sometimes an acceptable workaround is to use -wocb=3 | ||||
| 7567 | ----------------------------------------------------------------------- | ||||
| 7568 | EOM | ||||
| 7569 | $rOpts->{'line-up-parentheses'} = 0; | ||||
| 7570 | } | ||||
| 7571 | } | ||||
| 7572 | |||||
| 7573 | # At present, tabs are not compatible with the line-up-parentheses style | ||||
| 7574 | # (it would be possible to entab the total leading whitespace | ||||
| 7575 | # just prior to writing the line, if desired). | ||||
| 7576 | if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { | ||||
| 7577 | Perl::Tidy::Warn <<EOM; | ||||
| 7578 | Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. | ||||
| 7579 | EOM | ||||
| 7580 | $rOpts->{'tabs'} = 0; | ||||
| 7581 | } | ||||
| 7582 | |||||
| 7583 | # Likewise, tabs are not compatible with outdenting.. | ||||
| 7584 | if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { | ||||
| 7585 | Perl::Tidy::Warn <<EOM; | ||||
| 7586 | Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. | ||||
| 7587 | EOM | ||||
| 7588 | $rOpts->{'tabs'} = 0; | ||||
| 7589 | } | ||||
| 7590 | |||||
| 7591 | if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { | ||||
| 7592 | Perl::Tidy::Warn <<EOM; | ||||
| 7593 | Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. | ||||
| 7594 | EOM | ||||
| 7595 | $rOpts->{'tabs'} = 0; | ||||
| 7596 | } | ||||
| 7597 | |||||
| 7598 | if ( !$rOpts->{'space-for-semicolon'} ) { | ||||
| 7599 | $want_left_space{'f'} = -1; | ||||
| 7600 | } | ||||
| 7601 | |||||
| 7602 | if ( $rOpts->{'space-terminal-semicolon'} ) { | ||||
| 7603 | $want_left_space{';'} = 1; | ||||
| 7604 | } | ||||
| 7605 | |||||
| 7606 | # implement outdenting preferences for keywords | ||||
| 7607 | %outdent_keyword = (); | ||||
| 7608 | unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { | ||||
| 7609 | @_ = qw(next last redo goto return); # defaults | ||||
| 7610 | } | ||||
| 7611 | |||||
| 7612 | # FUTURE: if not a keyword, assume that it is an identifier | ||||
| 7613 | foreach (@_) { | ||||
| 7614 | if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { | ||||
| 7615 | $outdent_keyword{$_} = 1; | ||||
| 7616 | } | ||||
| 7617 | else { | ||||
| 7618 | Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword"; | ||||
| 7619 | } | ||||
| 7620 | } | ||||
| 7621 | |||||
| 7622 | # implement user whitespace preferences | ||||
| 7623 | if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { | ||||
| 7624 | @want_left_space{@_} = (1) x scalar(@_); | ||||
| 7625 | } | ||||
| 7626 | |||||
| 7627 | if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { | ||||
| 7628 | @want_right_space{@_} = (1) x scalar(@_); | ||||
| 7629 | } | ||||
| 7630 | |||||
| 7631 | if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { | ||||
| 7632 | @want_left_space{@_} = (-1) x scalar(@_); | ||||
| 7633 | } | ||||
| 7634 | |||||
| 7635 | if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { | ||||
| 7636 | @want_right_space{@_} = (-1) x scalar(@_); | ||||
| 7637 | } | ||||
| 7638 | if ( $rOpts->{'dump-want-left-space'} ) { | ||||
| 7639 | dump_want_left_space(*STDOUT); | ||||
| 7640 | Perl::Tidy::Exit 0; | ||||
| 7641 | } | ||||
| 7642 | |||||
| 7643 | if ( $rOpts->{'dump-want-right-space'} ) { | ||||
| 7644 | dump_want_right_space(*STDOUT); | ||||
| 7645 | Perl::Tidy::Exit 0; | ||||
| 7646 | } | ||||
| 7647 | |||||
| 7648 | # default keywords for which space is introduced before an opening paren | ||||
| 7649 | # (at present, including them messes up vertical alignment) | ||||
| 7650 | @_ = qw(my local our and or err eq ne if else elsif until | ||||
| 7651 | unless while for foreach return switch case given when); | ||||
| 7652 | @space_after_keyword{@_} = (1) x scalar(@_); | ||||
| 7653 | |||||
| 7654 | # first remove any or all of these if desired | ||||
| 7655 | if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { | ||||
| 7656 | |||||
| 7657 | # -nsak='*' selects all the above keywords | ||||
| 7658 | if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) } | ||||
| 7659 | @space_after_keyword{@_} = (0) x scalar(@_); | ||||
| 7660 | } | ||||
| 7661 | |||||
| 7662 | # then allow user to add to these defaults | ||||
| 7663 | if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { | ||||
| 7664 | @space_after_keyword{@_} = (1) x scalar(@_); | ||||
| 7665 | } | ||||
| 7666 | |||||
| 7667 | # implement user break preferences | ||||
| 7668 | my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & | ||||
| 7669 | = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= | ||||
| 7670 | . : ? && || and or err xor | ||||
| 7671 | ); | ||||
| 7672 | |||||
| 7673 | my $break_after = sub { | ||||
| 7674 | foreach my $tok (@_) { | ||||
| 7675 | if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: | ||||
| 7676 | my $lbs = $left_bond_strength{$tok}; | ||||
| 7677 | my $rbs = $right_bond_strength{$tok}; | ||||
| 7678 | if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { | ||||
| 7679 | ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = | ||||
| 7680 | ( $lbs, $rbs ); | ||||
| 7681 | } | ||||
| 7682 | } | ||||
| 7683 | }; | ||||
| 7684 | |||||
| 7685 | my $break_before = sub { | ||||
| 7686 | foreach my $tok (@_) { | ||||
| 7687 | my $lbs = $left_bond_strength{$tok}; | ||||
| 7688 | my $rbs = $right_bond_strength{$tok}; | ||||
| 7689 | if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { | ||||
| 7690 | ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = | ||||
| 7691 | ( $lbs, $rbs ); | ||||
| 7692 | } | ||||
| 7693 | } | ||||
| 7694 | }; | ||||
| 7695 | |||||
| 7696 | $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); | ||||
| 7697 | $break_before->(@all_operators) | ||||
| 7698 | if ( $rOpts->{'break-before-all-operators'} ); | ||||
| 7699 | |||||
| 7700 | $break_after->( split_words( $rOpts->{'want-break-after'} ) ); | ||||
| 7701 | $break_before->( split_words( $rOpts->{'want-break-before'} ) ); | ||||
| 7702 | |||||
| 7703 | # make note if breaks are before certain key types | ||||
| 7704 | %want_break_before = (); | ||||
| 7705 | foreach my $tok ( @all_operators, ',' ) { | ||||
| 7706 | $want_break_before{$tok} = | ||||
| 7707 | $left_bond_strength{$tok} < $right_bond_strength{$tok}; | ||||
| 7708 | } | ||||
| 7709 | |||||
| 7710 | # Coordinate ?/: breaks, which must be similar | ||||
| 7711 | if ( !$want_break_before{':'} ) { | ||||
| 7712 | $want_break_before{'?'} = $want_break_before{':'}; | ||||
| 7713 | $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; | ||||
| 7714 | $left_bond_strength{'?'} = NO_BREAK; | ||||
| 7715 | } | ||||
| 7716 | |||||
| 7717 | # Define here tokens which may follow the closing brace of a do statement | ||||
| 7718 | # on the same line, as in: | ||||
| 7719 | # } while ( $something); | ||||
| 7720 | @_ = qw(until while unless if ; : ); | ||||
| 7721 | push @_, ','; | ||||
| 7722 | @is_do_follower{@_} = (1) x scalar(@_); | ||||
| 7723 | |||||
| 7724 | # These tokens may follow the closing brace of an if or elsif block. | ||||
| 7725 | # In other words, for cuddled else we want code to look like: | ||||
| 7726 | # } elsif ( $something) { | ||||
| 7727 | # } else { | ||||
| 7728 | if ( $rOpts->{'cuddled-else'} ) { | ||||
| 7729 | @_ = qw(else elsif); | ||||
| 7730 | @is_if_brace_follower{@_} = (1) x scalar(@_); | ||||
| 7731 | } | ||||
| 7732 | else { | ||||
| 7733 | %is_if_brace_follower = (); | ||||
| 7734 | } | ||||
| 7735 | |||||
| 7736 | # nothing can follow the closing curly of an else { } block: | ||||
| 7737 | %is_else_brace_follower = (); | ||||
| 7738 | |||||
| 7739 | # what can follow a multi-line anonymous sub definition closing curly: | ||||
| 7740 | @_ = qw# ; : => or and && || ~~ !~~ ) #; | ||||
| 7741 | push @_, ','; | ||||
| 7742 | @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); | ||||
| 7743 | |||||
| 7744 | # what can follow a one-line anonymous sub closing curly: | ||||
| 7745 | # one-line anonymous subs also have ']' here... | ||||
| 7746 | # see tk3.t and PP.pm | ||||
| 7747 | @_ = qw# ; : => or and && || ) ] ~~ !~~ #; | ||||
| 7748 | push @_, ','; | ||||
| 7749 | @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); | ||||
| 7750 | |||||
| 7751 | # What can follow a closing curly of a block | ||||
| 7752 | # which is not an if/elsif/else/do/sort/map/grep/eval/sub | ||||
| 7753 | # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' | ||||
| 7754 | @_ = qw# ; : => or and && || ) #; | ||||
| 7755 | push @_, ','; | ||||
| 7756 | |||||
| 7757 | # allow cuddled continue if cuddled else is specified | ||||
| 7758 | if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } | ||||
| 7759 | |||||
| 7760 | @is_other_brace_follower{@_} = (1) x scalar(@_); | ||||
| 7761 | |||||
| 7762 | $right_bond_strength{'{'} = WEAK; | ||||
| 7763 | $left_bond_strength{'{'} = VERY_STRONG; | ||||
| 7764 | |||||
| 7765 | # make -l=0 equal to -l=infinite | ||||
| 7766 | if ( !$rOpts->{'maximum-line-length'} ) { | ||||
| 7767 | $rOpts->{'maximum-line-length'} = 1000000; | ||||
| 7768 | } | ||||
| 7769 | |||||
| 7770 | # make -lbl=0 equal to -lbl=infinite | ||||
| 7771 | if ( !$rOpts->{'long-block-line-count'} ) { | ||||
| 7772 | $rOpts->{'long-block-line-count'} = 1000000; | ||||
| 7773 | } | ||||
| 7774 | |||||
| 7775 | my $ole = $rOpts->{'output-line-ending'}; | ||||
| 7776 | if ($ole) { | ||||
| 7777 | my %endings = ( | ||||
| 7778 | dos => "\015\012", | ||||
| 7779 | win => "\015\012", | ||||
| 7780 | mac => "\015", | ||||
| 7781 | unix => "\012", | ||||
| 7782 | ); | ||||
| 7783 | $ole = lc $ole; | ||||
| 7784 | unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { | ||||
| 7785 | my $str = join " ", keys %endings; | ||||
| 7786 | Perl::Tidy::Die <<EOM; | ||||
| 7787 | Unrecognized line ending '$ole'; expecting one of: $str | ||||
| 7788 | EOM | ||||
| 7789 | } | ||||
| 7790 | if ( $rOpts->{'preserve-line-endings'} ) { | ||||
| 7791 | Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n"; | ||||
| 7792 | $rOpts->{'preserve-line-endings'} = undef; | ||||
| 7793 | } | ||||
| 7794 | } | ||||
| 7795 | |||||
| 7796 | # hashes used to simplify setting whitespace | ||||
| 7797 | %tightness = ( | ||||
| 7798 | '{' => $rOpts->{'brace-tightness'}, | ||||
| 7799 | '}' => $rOpts->{'brace-tightness'}, | ||||
| 7800 | '(' => $rOpts->{'paren-tightness'}, | ||||
| 7801 | ')' => $rOpts->{'paren-tightness'}, | ||||
| 7802 | '[' => $rOpts->{'square-bracket-tightness'}, | ||||
| 7803 | ']' => $rOpts->{'square-bracket-tightness'}, | ||||
| 7804 | ); | ||||
| 7805 | %matching_token = ( | ||||
| 7806 | '{' => '}', | ||||
| 7807 | '(' => ')', | ||||
| 7808 | '[' => ']', | ||||
| 7809 | '?' => ':', | ||||
| 7810 | ); | ||||
| 7811 | |||||
| 7812 | # frequently used parameters | ||||
| 7813 | $rOpts_add_newlines = $rOpts->{'add-newlines'}; | ||||
| 7814 | $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; | ||||
| 7815 | $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; | ||||
| 7816 | $rOpts_block_brace_vertical_tightness = | ||||
| 7817 | $rOpts->{'block-brace-vertical-tightness'}; | ||||
| 7818 | $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; | ||||
| 7819 | $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; | ||||
| 7820 | $rOpts_break_at_old_ternary_breakpoints = | ||||
| 7821 | $rOpts->{'break-at-old-ternary-breakpoints'}; | ||||
| 7822 | $rOpts_break_at_old_attribute_breakpoints = | ||||
| 7823 | $rOpts->{'break-at-old-attribute-breakpoints'}; | ||||
| 7824 | $rOpts_break_at_old_comma_breakpoints = | ||||
| 7825 | $rOpts->{'break-at-old-comma-breakpoints'}; | ||||
| 7826 | $rOpts_break_at_old_keyword_breakpoints = | ||||
| 7827 | $rOpts->{'break-at-old-keyword-breakpoints'}; | ||||
| 7828 | $rOpts_break_at_old_logical_breakpoints = | ||||
| 7829 | $rOpts->{'break-at-old-logical-breakpoints'}; | ||||
| 7830 | $rOpts_closing_side_comment_else_flag = | ||||
| 7831 | $rOpts->{'closing-side-comment-else-flag'}; | ||||
| 7832 | $rOpts_closing_side_comment_maximum_text = | ||||
| 7833 | $rOpts->{'closing-side-comment-maximum-text'}; | ||||
| 7834 | $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; | ||||
| 7835 | $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; | ||||
| 7836 | $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; | ||||
| 7837 | $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; | ||||
| 7838 | $rOpts_indent_columns = $rOpts->{'indent-columns'}; | ||||
| 7839 | $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; | ||||
| 7840 | $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; | ||||
| 7841 | $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; | ||||
| 7842 | $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; | ||||
| 7843 | |||||
| 7844 | $rOpts_variable_maximum_line_length = | ||||
| 7845 | $rOpts->{'variable-maximum-line-length'}; | ||||
| 7846 | $rOpts_short_concatenation_item_length = | ||||
| 7847 | $rOpts->{'short-concatenation-item-length'}; | ||||
| 7848 | |||||
| 7849 | $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; | ||||
| 7850 | $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; | ||||
| 7851 | $rOpts_format_skipping = $rOpts->{'format-skipping'}; | ||||
| 7852 | $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; | ||||
| 7853 | $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; | ||||
| 7854 | $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; | ||||
| 7855 | $rOpts_ignore_side_comment_lengths = | ||||
| 7856 | $rOpts->{'ignore-side-comment-lengths'}; | ||||
| 7857 | |||||
| 7858 | # Note that both opening and closing tokens can access the opening | ||||
| 7859 | # and closing flags of their container types. | ||||
| 7860 | %opening_vertical_tightness = ( | ||||
| 7861 | '(' => $rOpts->{'paren-vertical-tightness'}, | ||||
| 7862 | '{' => $rOpts->{'brace-vertical-tightness'}, | ||||
| 7863 | '[' => $rOpts->{'square-bracket-vertical-tightness'}, | ||||
| 7864 | ')' => $rOpts->{'paren-vertical-tightness'}, | ||||
| 7865 | '}' => $rOpts->{'brace-vertical-tightness'}, | ||||
| 7866 | ']' => $rOpts->{'square-bracket-vertical-tightness'}, | ||||
| 7867 | ); | ||||
| 7868 | |||||
| 7869 | %closing_vertical_tightness = ( | ||||
| 7870 | '(' => $rOpts->{'paren-vertical-tightness-closing'}, | ||||
| 7871 | '{' => $rOpts->{'brace-vertical-tightness-closing'}, | ||||
| 7872 | '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, | ||||
| 7873 | ')' => $rOpts->{'paren-vertical-tightness-closing'}, | ||||
| 7874 | '}' => $rOpts->{'brace-vertical-tightness-closing'}, | ||||
| 7875 | ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, | ||||
| 7876 | ); | ||||
| 7877 | |||||
| 7878 | $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'}; | ||||
| 7879 | |||||
| 7880 | # assume flag for '>' same as ')' for closing qw quotes | ||||
| 7881 | %closing_token_indentation = ( | ||||
| 7882 | ')' => $rOpts->{'closing-paren-indentation'}, | ||||
| 7883 | '}' => $rOpts->{'closing-brace-indentation'}, | ||||
| 7884 | ']' => $rOpts->{'closing-square-bracket-indentation'}, | ||||
| 7885 | '>' => $rOpts->{'closing-paren-indentation'}, | ||||
| 7886 | ); | ||||
| 7887 | |||||
| 7888 | # flag indicating if any closing tokens are indented | ||||
| 7889 | $some_closing_token_indentation = | ||||
| 7890 | $rOpts->{'closing-paren-indentation'} | ||||
| 7891 | || $rOpts->{'closing-brace-indentation'} | ||||
| 7892 | || $rOpts->{'closing-square-bracket-indentation'} | ||||
| 7893 | || $rOpts->{'indent-closing-brace'}; | ||||
| 7894 | |||||
| 7895 | %opening_token_right = ( | ||||
| 7896 | '(' => $rOpts->{'opening-paren-right'}, | ||||
| 7897 | '{' => $rOpts->{'opening-hash-brace-right'}, | ||||
| 7898 | '[' => $rOpts->{'opening-square-bracket-right'}, | ||||
| 7899 | ); | ||||
| 7900 | |||||
| 7901 | %stack_opening_token = ( | ||||
| 7902 | '(' => $rOpts->{'stack-opening-paren'}, | ||||
| 7903 | '{' => $rOpts->{'stack-opening-hash-brace'}, | ||||
| 7904 | '[' => $rOpts->{'stack-opening-square-bracket'}, | ||||
| 7905 | ); | ||||
| 7906 | |||||
| 7907 | %stack_closing_token = ( | ||||
| 7908 | ')' => $rOpts->{'stack-closing-paren'}, | ||||
| 7909 | '}' => $rOpts->{'stack-closing-hash-brace'}, | ||||
| 7910 | ']' => $rOpts->{'stack-closing-square-bracket'}, | ||||
| 7911 | ); | ||||
| 7912 | $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; | ||||
| 7913 | } | ||||
| 7914 | |||||
| 7915 | sub make_static_block_comment_pattern { | ||||
| 7916 | |||||
| 7917 | # create the pattern used to identify static block comments | ||||
| 7918 | $static_block_comment_pattern = '^\s*##'; | ||||
| 7919 | |||||
| 7920 | # allow the user to change it | ||||
| 7921 | if ( $rOpts->{'static-block-comment-prefix'} ) { | ||||
| 7922 | my $prefix = $rOpts->{'static-block-comment-prefix'}; | ||||
| 7923 | $prefix =~ s/^\s*//; | ||||
| 7924 | my $pattern = $prefix; | ||||
| 7925 | |||||
| 7926 | # user may give leading caret to force matching left comments only | ||||
| 7927 | if ( $prefix !~ /^\^#/ ) { | ||||
| 7928 | if ( $prefix !~ /^#/ ) { | ||||
| 7929 | Perl::Tidy::Die | ||||
| 7930 | "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; | ||||
| 7931 | } | ||||
| 7932 | $pattern = '^\s*' . $prefix; | ||||
| 7933 | } | ||||
| 7934 | eval "'##'=~/$pattern/"; | ||||
| 7935 | if ($@) { | ||||
| 7936 | Perl::Tidy::Die | ||||
| 7937 | "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; | ||||
| 7938 | } | ||||
| 7939 | $static_block_comment_pattern = $pattern; | ||||
| 7940 | } | ||||
| 7941 | } | ||||
| 7942 | |||||
| 7943 | sub make_format_skipping_pattern { | ||||
| 7944 | my ( $opt_name, $default ) = @_; | ||||
| 7945 | my $param = $rOpts->{$opt_name}; | ||||
| 7946 | unless ($param) { $param = $default } | ||||
| 7947 | $param =~ s/^\s*//; | ||||
| 7948 | if ( $param !~ /^#/ ) { | ||||
| 7949 | Perl::Tidy::Die | ||||
| 7950 | "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; | ||||
| 7951 | } | ||||
| 7952 | my $pattern = '^' . $param . '\s'; | ||||
| 7953 | eval "'#'=~/$pattern/"; | ||||
| 7954 | if ($@) { | ||||
| 7955 | Perl::Tidy::Die | ||||
| 7956 | "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; | ||||
| 7957 | } | ||||
| 7958 | return $pattern; | ||||
| 7959 | } | ||||
| 7960 | |||||
| 7961 | sub make_closing_side_comment_list_pattern { | ||||
| 7962 | |||||
| 7963 | # turn any input list into a regex for recognizing selected block types | ||||
| 7964 | $closing_side_comment_list_pattern = '^\w+'; | ||||
| 7965 | if ( defined( $rOpts->{'closing-side-comment-list'} ) | ||||
| 7966 | && $rOpts->{'closing-side-comment-list'} ) | ||||
| 7967 | { | ||||
| 7968 | $closing_side_comment_list_pattern = | ||||
| 7969 | make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); | ||||
| 7970 | } | ||||
| 7971 | } | ||||
| 7972 | |||||
| 7973 | sub make_bli_pattern { | ||||
| 7974 | |||||
| 7975 | if ( defined( $rOpts->{'brace-left-and-indent-list'} ) | ||||
| 7976 | && $rOpts->{'brace-left-and-indent-list'} ) | ||||
| 7977 | { | ||||
| 7978 | $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; | ||||
| 7979 | } | ||||
| 7980 | |||||
| 7981 | $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); | ||||
| 7982 | } | ||||
| 7983 | |||||
| 7984 | sub make_block_brace_vertical_tightness_pattern { | ||||
| 7985 | |||||
| 7986 | # turn any input list into a regex for recognizing selected block types | ||||
| 7987 | $block_brace_vertical_tightness_pattern = | ||||
| 7988 | '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; | ||||
| 7989 | if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) | ||||
| 7990 | && $rOpts->{'block-brace-vertical-tightness-list'} ) | ||||
| 7991 | { | ||||
| 7992 | $block_brace_vertical_tightness_pattern = | ||||
| 7993 | make_block_pattern( '-bbvtl', | ||||
| 7994 | $rOpts->{'block-brace-vertical-tightness-list'} ); | ||||
| 7995 | } | ||||
| 7996 | } | ||||
| 7997 | |||||
| 7998 | sub make_block_pattern { | ||||
| 7999 | |||||
| 8000 | # given a string of block-type keywords, return a regex to match them | ||||
| 8001 | # The only tricky part is that labels are indicated with a single ':' | ||||
| 8002 | # and the 'sub' token text may have additional text after it (name of | ||||
| 8003 | # sub). | ||||
| 8004 | # | ||||
| 8005 | # Example: | ||||
| 8006 | # | ||||
| 8007 | # input string: "if else elsif unless while for foreach do : sub"; | ||||
| 8008 | # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; | ||||
| 8009 | |||||
| 8010 | my ( $abbrev, $string ) = @_; | ||||
| 8011 | my @list = split_words($string); | ||||
| 8012 | my @words = (); | ||||
| 8013 | my %seen; | ||||
| 8014 | for my $i (@list) { | ||||
| 8015 | if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } | ||||
| 8016 | next if $seen{$i}; | ||||
| 8017 | $seen{$i} = 1; | ||||
| 8018 | if ( $i eq 'sub' ) { | ||||
| 8019 | } | ||||
| 8020 | elsif ( $i eq ';' ) { | ||||
| 8021 | push @words, ';'; | ||||
| 8022 | } | ||||
| 8023 | elsif ( $i eq '{' ) { | ||||
| 8024 | push @words, '\{'; | ||||
| 8025 | } | ||||
| 8026 | elsif ( $i eq ':' ) { | ||||
| 8027 | push @words, '\w+:'; | ||||
| 8028 | } | ||||
| 8029 | elsif ( $i =~ /^\w/ ) { | ||||
| 8030 | push @words, $i; | ||||
| 8031 | } | ||||
| 8032 | else { | ||||
| 8033 | Perl::Tidy::Warn | ||||
| 8034 | "unrecognized block type $i after $abbrev, ignoring\n"; | ||||
| 8035 | } | ||||
| 8036 | } | ||||
| 8037 | my $pattern = '(' . join( '|', @words ) . ')$'; | ||||
| 8038 | if ( $seen{'sub'} ) { | ||||
| 8039 | $pattern = '(' . $pattern . '|sub)'; | ||||
| 8040 | } | ||||
| 8041 | $pattern = '^' . $pattern; | ||||
| 8042 | return $pattern; | ||||
| 8043 | } | ||||
| 8044 | |||||
| 8045 | sub make_static_side_comment_pattern { | ||||
| 8046 | |||||
| 8047 | # create the pattern used to identify static side comments | ||||
| 8048 | $static_side_comment_pattern = '^##'; | ||||
| 8049 | |||||
| 8050 | # allow the user to change it | ||||
| 8051 | if ( $rOpts->{'static-side-comment-prefix'} ) { | ||||
| 8052 | my $prefix = $rOpts->{'static-side-comment-prefix'}; | ||||
| 8053 | $prefix =~ s/^\s*//; | ||||
| 8054 | my $pattern = '^' . $prefix; | ||||
| 8055 | eval "'##'=~/$pattern/"; | ||||
| 8056 | if ($@) { | ||||
| 8057 | Perl::Tidy::Die | ||||
| 8058 | "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; | ||||
| 8059 | } | ||||
| 8060 | $static_side_comment_pattern = $pattern; | ||||
| 8061 | } | ||||
| 8062 | } | ||||
| 8063 | |||||
| 8064 | sub make_closing_side_comment_prefix { | ||||
| 8065 | |||||
| 8066 | # Be sure we have a valid closing side comment prefix | ||||
| 8067 | my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; | ||||
| 8068 | my $csc_prefix_pattern; | ||||
| 8069 | if ( !defined($csc_prefix) ) { | ||||
| 8070 | $csc_prefix = '## end'; | ||||
| 8071 | $csc_prefix_pattern = '^##\s+end'; | ||||
| 8072 | } | ||||
| 8073 | else { | ||||
| 8074 | my $test_csc_prefix = $csc_prefix; | ||||
| 8075 | if ( $test_csc_prefix !~ /^#/ ) { | ||||
| 8076 | $test_csc_prefix = '#' . $test_csc_prefix; | ||||
| 8077 | } | ||||
| 8078 | |||||
| 8079 | # make a regex to recognize the prefix | ||||
| 8080 | my $test_csc_prefix_pattern = $test_csc_prefix; | ||||
| 8081 | |||||
| 8082 | # escape any special characters | ||||
| 8083 | $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; | ||||
| 8084 | |||||
| 8085 | $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; | ||||
| 8086 | |||||
| 8087 | # allow exact number of intermediate spaces to vary | ||||
| 8088 | $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; | ||||
| 8089 | |||||
| 8090 | # make sure we have a good pattern | ||||
| 8091 | # if we fail this we probably have an error in escaping | ||||
| 8092 | # characters. | ||||
| 8093 | eval "'##'=~/$test_csc_prefix_pattern/"; | ||||
| 8094 | if ($@) { | ||||
| 8095 | |||||
| 8096 | # shouldn't happen..must have screwed up escaping, above | ||||
| 8097 | report_definite_bug(); | ||||
| 8098 | Perl::Tidy::Warn | ||||
| 8099 | "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; | ||||
| 8100 | |||||
| 8101 | # just warn and keep going with defaults | ||||
| 8102 | Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n"; | ||||
| 8103 | Perl::Tidy::Warn | ||||
| 8104 | "Using default -cscp instead; please check output\n"; | ||||
| 8105 | } | ||||
| 8106 | else { | ||||
| 8107 | $csc_prefix = $test_csc_prefix; | ||||
| 8108 | $csc_prefix_pattern = $test_csc_prefix_pattern; | ||||
| 8109 | } | ||||
| 8110 | } | ||||
| 8111 | $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; | ||||
| 8112 | $closing_side_comment_prefix_pattern = $csc_prefix_pattern; | ||||
| 8113 | } | ||||
| 8114 | |||||
| 8115 | sub dump_want_left_space { | ||||
| 8116 | my $fh = shift; | ||||
| 8117 | local $" = "\n"; | ||||
| 8118 | print $fh <<EOM; | ||||
| 8119 | These values are the main control of whitespace to the left of a token type; | ||||
| 8120 | They may be altered with the -wls parameter. | ||||
| 8121 | For a list of token types, use perltidy --dump-token-types (-dtt) | ||||
| 8122 | 1 means the token wants a space to its left | ||||
| 8123 | -1 means the token does not want a space to its left | ||||
| 8124 | ------------------------------------------------------------------------ | ||||
| 8125 | EOM | ||||
| 8126 | foreach ( sort keys %want_left_space ) { | ||||
| 8127 | print $fh "$_\t$want_left_space{$_}\n"; | ||||
| 8128 | } | ||||
| 8129 | } | ||||
| 8130 | |||||
| 8131 | sub dump_want_right_space { | ||||
| 8132 | my $fh = shift; | ||||
| 8133 | local $" = "\n"; | ||||
| 8134 | print $fh <<EOM; | ||||
| 8135 | These values are the main control of whitespace to the right of a token type; | ||||
| 8136 | They may be altered with the -wrs parameter. | ||||
| 8137 | For a list of token types, use perltidy --dump-token-types (-dtt) | ||||
| 8138 | 1 means the token wants a space to its right | ||||
| 8139 | -1 means the token does not want a space to its right | ||||
| 8140 | ------------------------------------------------------------------------ | ||||
| 8141 | EOM | ||||
| 8142 | foreach ( sort keys %want_right_space ) { | ||||
| 8143 | print $fh "$_\t$want_right_space{$_}\n"; | ||||
| 8144 | } | ||||
| 8145 | } | ||||
| 8146 | |||||
| 8147 | { # begin is_essential_whitespace | ||||
| 8148 | |||||
| 8149 | 2 | 300ns | my %is_sort_grep_map; | ||
| 8150 | 1 | 200ns | my %is_for_foreach; | ||
| 8151 | |||||
| 8152 | # spent 11µs within Perl::Tidy::Formatter::BEGIN@8152 which was called:
# once (11µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 8160 | ||||
| 8153 | |||||
| 8154 | 1 | 2µs | @_ = qw(sort grep map); | ||
| 8155 | 1 | 3µs | @is_sort_grep_map{@_} = (1) x scalar(@_); | ||
| 8156 | |||||
| 8157 | 1 | 1µs | @_ = qw(for foreach); | ||
| 8158 | 1 | 10µs | @is_for_foreach{@_} = (1) x scalar(@_); | ||
| 8159 | |||||
| 8160 | 1 | 458µs | 1 | 11µs | } # spent 11µs making 1 call to Perl::Tidy::Formatter::BEGIN@8152 |
| 8161 | |||||
| 8162 | sub is_essential_whitespace { | ||||
| 8163 | |||||
| 8164 | # Essential whitespace means whitespace which cannot be safely deleted | ||||
| 8165 | # without risking the introduction of a syntax error. | ||||
| 8166 | # We are given three tokens and their types: | ||||
| 8167 | # ($tokenl, $typel) is the token to the left of the space in question | ||||
| 8168 | # ($tokenr, $typer) is the token to the right of the space in question | ||||
| 8169 | # ($tokenll, $typell) is previous nonblank token to the left of $tokenl | ||||
| 8170 | # | ||||
| 8171 | # This is a slow routine but is not needed too often except when -mangle | ||||
| 8172 | # is used. | ||||
| 8173 | # | ||||
| 8174 | # Note: This routine should almost never need to be changed. It is | ||||
| 8175 | # for avoiding syntax problems rather than for formatting. | ||||
| 8176 | my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; | ||||
| 8177 | |||||
| 8178 | my $result = | ||||
| 8179 | |||||
| 8180 | # never combine two bare words or numbers | ||||
| 8181 | # examples: and ::ok(1) | ||||
| 8182 | # return ::spw(...) | ||||
| 8183 | # for bla::bla:: abc | ||||
| 8184 | # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl | ||||
| 8185 | # $input eq"quit" to make $inputeq"quit" | ||||
| 8186 | # my $size=-s::SINK if $file; <==OK but we won't do it | ||||
| 8187 | # don't join something like: for bla::bla:: abc | ||||
| 8188 | # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl | ||||
| 8189 | ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' ) | ||||
| 8190 | && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) | ||||
| 8191 | |||||
| 8192 | # do not combine a number with a concatenation dot | ||||
| 8193 | # example: pom.caputo: | ||||
| 8194 | # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); | ||||
| 8195 | || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) | ||||
| 8196 | || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) | ||||
| 8197 | |||||
| 8198 | # do not join a minus with a bare word, because you might form | ||||
| 8199 | # a file test operator. Example from Complex.pm: | ||||
| 8200 | # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. | ||||
| 8201 | || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) | ||||
| 8202 | |||||
| 8203 | # and something like this could become ambiguous without space | ||||
| 8204 | # after the '-': | ||||
| 8205 | # use constant III=>1; | ||||
| 8206 | # $a = $b - III; | ||||
| 8207 | # and even this: | ||||
| 8208 | # $a = - III; | ||||
| 8209 | || ( ( $tokenl eq '-' ) | ||||
| 8210 | && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) | ||||
| 8211 | |||||
| 8212 | # '= -' should not become =- or you will get a warning | ||||
| 8213 | # about reversed -= | ||||
| 8214 | # || ($tokenr eq '-') | ||||
| 8215 | |||||
| 8216 | # keep a space between a quote and a bareword to prevent the | ||||
| 8217 | # bareword from becoming a quote modifier. | ||||
| 8218 | || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | ||||
| 8219 | |||||
| 8220 | # keep a space between a token ending in '$' and any word; | ||||
| 8221 | # this caused trouble: "die @$ if $@" | ||||
| 8222 | || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) | ||||
| 8223 | && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | ||||
| 8224 | |||||
| 8225 | # perl is very fussy about spaces before << | ||||
| 8226 | || ( $tokenr =~ /^\<\</ ) | ||||
| 8227 | |||||
| 8228 | # avoid combining tokens to create new meanings. Example: | ||||
| 8229 | # $a+ +$b must not become $a++$b | ||||
| 8230 | || ( $is_digraph{ $tokenl . $tokenr } ) | ||||
| 8231 | || ( $is_trigraph{ $tokenl . $tokenr } ) | ||||
| 8232 | |||||
| 8233 | # another example: do not combine these two &'s: | ||||
| 8234 | # allow_options & &OPT_EXECCGI | ||||
| 8235 | || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) | ||||
| 8236 | |||||
| 8237 | # don't combine $$ or $# with any alphanumeric | ||||
| 8238 | # (testfile mangle.t with --mangle) | ||||
| 8239 | || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) | ||||
| 8240 | |||||
| 8241 | # retain any space after possible filehandle | ||||
| 8242 | # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) | ||||
| 8243 | || ( $typel eq 'Z' ) | ||||
| 8244 | |||||
| 8245 | # Perl is sensitive to whitespace after the + here: | ||||
| 8246 | # $b = xvals $a + 0.1 * yvals $a; | ||||
| 8247 | || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ ) | ||||
| 8248 | |||||
| 8249 | # keep paren separate in 'use Foo::Bar ()' | ||||
| 8250 | || ( $tokenr eq '(' | ||||
| 8251 | && $typel eq 'w' | ||||
| 8252 | && $typell eq 'k' | ||||
| 8253 | && $tokenll eq 'use' ) | ||||
| 8254 | |||||
| 8255 | # keep any space between filehandle and paren: | ||||
| 8256 | # file mangle.t with --mangle: | ||||
| 8257 | || ( $typel eq 'Y' && $tokenr eq '(' ) | ||||
| 8258 | |||||
| 8259 | # retain any space after here doc operator ( hereerr.t) | ||||
| 8260 | || ( $typel eq 'h' ) | ||||
| 8261 | |||||
| 8262 | # be careful with a space around ++ and --, to avoid ambiguity as to | ||||
| 8263 | # which token it applies | ||||
| 8264 | || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) | ||||
| 8265 | || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) | ||||
| 8266 | |||||
| 8267 | # need space after foreach my; for example, this will fail in | ||||
| 8268 | # older versions of Perl: | ||||
| 8269 | # foreach my$ft(@filetypes)... | ||||
| 8270 | || ( | ||||
| 8271 | $tokenl eq 'my' | ||||
| 8272 | |||||
| 8273 | # /^(for|foreach)$/ | ||||
| 8274 | && $is_for_foreach{$tokenll} | ||||
| 8275 | && $tokenr =~ /^\$/ | ||||
| 8276 | ) | ||||
| 8277 | |||||
| 8278 | # must have space between grep and left paren; "grep(" will fail | ||||
| 8279 | || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} ) | ||||
| 8280 | |||||
| 8281 | # don't stick numbers next to left parens, as in: | ||||
| 8282 | #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) | ||||
| 8283 | || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) | ||||
| 8284 | |||||
| 8285 | # We must be sure that a space between a ? and a quoted string | ||||
| 8286 | # remains if the space before the ? remains. [Loca.pm, lockarea] | ||||
| 8287 | # ie, | ||||
| 8288 | # $b=join $comma ? ',' : ':', @_; # ok | ||||
| 8289 | # $b=join $comma?',' : ':', @_; # ok! | ||||
| 8290 | # $b=join $comma ?',' : ':', @_; # error! | ||||
| 8291 | # Not really required: | ||||
| 8292 | ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) | ||||
| 8293 | |||||
| 8294 | # do not remove space between an '&' and a bare word because | ||||
| 8295 | # it may turn into a function evaluation, like here | ||||
| 8296 | # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] | ||||
| 8297 | # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); | ||||
| 8298 | || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) | ||||
| 8299 | |||||
| 8300 | # space stacked labels (TODO: check if really necessary) | ||||
| 8301 | || ( $typel eq 'J' && $typer eq 'J' ) | ||||
| 8302 | |||||
| 8303 | ; # the value of this long logic sequence is the result we want | ||||
| 8304 | return $result; | ||||
| 8305 | } | ||||
| 8306 | } | ||||
| 8307 | |||||
| 8308 | { | ||||
| 8309 | 2 | 0s | my %secret_operators; | ||
| 8310 | 1 | 2µs | my %is_leading_secret_token; | ||
| 8311 | |||||
| 8312 | # spent 28µs within Perl::Tidy::Formatter::BEGIN@8312 which was called:
# once (28µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 8337 | ||||
| 8313 | |||||
| 8314 | # token lists for perl secret operators as compiled by Philippe Bruhat | ||||
| 8315 | # at: https://metacpan.org/module/perlsecret | ||||
| 8316 | 1 | 14µs | %secret_operators = ( | ||
| 8317 | 'Goatse' => [qw#= ( ) =#], #=( )= | ||||
| 8318 | 'Venus1' => [qw#0 +#], # 0+ | ||||
| 8319 | 'Venus2' => [qw#+ 0#], # +0 | ||||
| 8320 | 'Enterprise' => [qw#) x ! !#], # ()x!! | ||||
| 8321 | 'Kite1' => [qw#~ ~ <>#], # ~~<> | ||||
| 8322 | 'Kite2' => [qw#~~ <>#], # ~~<> | ||||
| 8323 | 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> | ||||
| 8324 | ); | ||||
| 8325 | |||||
| 8326 | # The following operators and constants are not included because they | ||||
| 8327 | # are normally kept tight by perltidy: | ||||
| 8328 | # !! ~~ <~> | ||||
| 8329 | # | ||||
| 8330 | |||||
| 8331 | # Make a lookup table indexed by the first token of each operator: | ||||
| 8332 | # first token => [list, list, ...] | ||||
| 8333 | 1 | 9µs | foreach my $value ( values(%secret_operators) ) { | ||
| 8334 | 7 | 1µs | my $tok = $value->[0]; | ||
| 8335 | 7 | 5µs | push @{ $is_leading_secret_token{$tok} }, $value; | ||
| 8336 | } | ||||
| 8337 | 1 | 656µs | 1 | 28µs | } # spent 28µs making 1 call to Perl::Tidy::Formatter::BEGIN@8312 |
| 8338 | |||||
| 8339 | sub secret_operator_whitespace { | ||||
| 8340 | |||||
| 8341 | my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_; | ||||
| 8342 | |||||
| 8343 | # Loop over all tokens in this line | ||||
| 8344 | my ( $j, $token, $type ); | ||||
| 8345 | for ( $j = 0 ; $j <= $jmax ; $j++ ) { | ||||
| 8346 | |||||
| 8347 | $token = $$rtokens[$j]; | ||||
| 8348 | $type = $$rtoken_type[$j]; | ||||
| 8349 | |||||
| 8350 | # Skip unless this token might start a secret operator | ||||
| 8351 | next if ( $type eq 'b' ); | ||||
| 8352 | next unless ( $is_leading_secret_token{$token} ); | ||||
| 8353 | |||||
| 8354 | # Loop over all secret operators with this leading token | ||||
| 8355 | foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { | ||||
| 8356 | my $jend = $j - 1; | ||||
| 8357 | foreach my $tok ( @{$rpattern} ) { | ||||
| 8358 | $jend++; | ||||
| 8359 | $jend++ | ||||
| 8360 | |||||
| 8361 | if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' ); | ||||
| 8362 | if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) { | ||||
| 8363 | $jend = undef; | ||||
| 8364 | last; | ||||
| 8365 | } | ||||
| 8366 | } | ||||
| 8367 | |||||
| 8368 | if ($jend) { | ||||
| 8369 | |||||
| 8370 | # set flags to prevent spaces within this operator | ||||
| 8371 | for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) { | ||||
| 8372 | $rwhite_space_flag->[$jj] = WS_NO; | ||||
| 8373 | } | ||||
| 8374 | $j = $jend; | ||||
| 8375 | last; | ||||
| 8376 | } | ||||
| 8377 | } ## End Loop over all operators | ||||
| 8378 | } ## End loop over all tokens | ||||
| 8379 | } # End sub | ||||
| 8380 | } | ||||
| 8381 | |||||
| 8382 | sub set_white_space_flag { | ||||
| 8383 | |||||
| 8384 | # This routine examines each pair of nonblank tokens and | ||||
| 8385 | # sets values for array @white_space_flag. | ||||
| 8386 | # | ||||
| 8387 | # $white_space_flag[$j] is a flag indicating whether a white space | ||||
| 8388 | # BEFORE token $j is needed, with the following values: | ||||
| 8389 | # | ||||
| 8390 | # WS_NO = -1 do not want a space before token $j | ||||
| 8391 | # WS_OPTIONAL= 0 optional space or $j is a whitespace | ||||
| 8392 | # WS_YES = 1 want a space before token $j | ||||
| 8393 | # | ||||
| 8394 | # | ||||
| 8395 | # The values for the first token will be defined based | ||||
| 8396 | # upon the contents of the "to_go" output array. | ||||
| 8397 | # | ||||
| 8398 | # Note: retain debug print statements because they are usually | ||||
| 8399 | # required after adding new token types. | ||||
| 8400 | |||||
| 8401 | # spent 90µs within Perl::Tidy::Formatter::BEGIN@8401 which was called:
# once (90µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 8522 | ||||
| 8402 | |||||
| 8403 | # initialize these global hashes, which control the use of | ||||
| 8404 | # whitespace around tokens: | ||||
| 8405 | # | ||||
| 8406 | # %binary_ws_rules | ||||
| 8407 | # %want_left_space | ||||
| 8408 | # %want_right_space | ||||
| 8409 | # %space_after_keyword | ||||
| 8410 | # | ||||
| 8411 | # Many token types are identical to the tokens themselves. | ||||
| 8412 | # See the tokenizer for a complete list. Here are some special types: | ||||
| 8413 | # k = perl keyword | ||||
| 8414 | # f = semicolon in for statement | ||||
| 8415 | # m = unary minus | ||||
| 8416 | # p = unary plus | ||||
| 8417 | # Note that :: is excluded since it should be contained in an identifier | ||||
| 8418 | # Note that '->' is excluded because it never gets space | ||||
| 8419 | # parentheses and brackets are excluded since they are handled specially | ||||
| 8420 | # curly braces are included but may be overridden by logic, such as | ||||
| 8421 | # newline logic. | ||||
| 8422 | |||||
| 8423 | # NEW_TOKENS: create a whitespace rule here. This can be as | ||||
| 8424 | # simple as adding your new letter to @spaces_both_sides, for | ||||
| 8425 | # example. | ||||
| 8426 | |||||
| 8427 | 1 | 2µs | @_ = qw" L { ( [ "; | ||
| 8428 | 1 | 2µs | @is_opening_type{@_} = (1) x scalar(@_); | ||
| 8429 | |||||
| 8430 | 1 | 2µs | @_ = qw" R } ) ] "; | ||
| 8431 | 1 | 800ns | @is_closing_type{@_} = (1) x scalar(@_); | ||
| 8432 | |||||
| 8433 | 1 | 13µs | my @spaces_both_sides = qw" | ||
| 8434 | + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= | ||||
| 8435 | .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ | ||||
| 8436 | &&= ||= //= <=> A k f w F n C Y U G v | ||||
| 8437 | "; | ||||
| 8438 | |||||
| 8439 | 1 | 2µs | my @spaces_left_side = qw" | ||
| 8440 | t ! ~ m p { \ h pp mm Z j | ||||
| 8441 | "; | ||||
| 8442 | 1 | 600ns | push( @spaces_left_side, '#' ); # avoids warning message | ||
| 8443 | |||||
| 8444 | 1 | 1µs | my @spaces_right_side = qw" | ||
| 8445 | ; } ) ] R J ++ -- **= | ||||
| 8446 | "; | ||||
| 8447 | 1 | 200ns | push( @spaces_right_side, ',' ); # avoids warning message | ||
| 8448 | |||||
| 8449 | # Note that we are in a BEGIN block here. Later in processing | ||||
| 8450 | # the values of %want_left_space and %want_right_space | ||||
| 8451 | # may be overridden by any user settings specified by the | ||||
| 8452 | # -wls and -wrs parameters. However the binary_whitespace_rules | ||||
| 8453 | # are hardwired and have priority. | ||||
| 8454 | 1 | 21µs | @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); | ||
| 8455 | 1 | 9µs | @want_right_space{@spaces_both_sides} = | ||
| 8456 | (1) x scalar(@spaces_both_sides); | ||||
| 8457 | 1 | 5µs | @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); | ||
| 8458 | 1 | 3µs | @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); | ||
| 8459 | 1 | 2µs | @want_left_space{@spaces_right_side} = | ||
| 8460 | (-1) x scalar(@spaces_right_side); | ||||
| 8461 | 1 | 1µs | @want_right_space{@spaces_right_side} = | ||
| 8462 | (1) x scalar(@spaces_right_side); | ||||
| 8463 | 1 | 300ns | $want_left_space{'->'} = WS_NO; | ||
| 8464 | 1 | 100ns | $want_right_space{'->'} = WS_NO; | ||
| 8465 | 1 | 100ns | $want_left_space{'**'} = WS_NO; | ||
| 8466 | 1 | 100ns | $want_right_space{'**'} = WS_NO; | ||
| 8467 | 1 | 100ns | $want_right_space{'CORE::'} = WS_NO; | ||
| 8468 | |||||
| 8469 | # These binary_ws_rules are hardwired and have priority over the above | ||||
| 8470 | # settings. It would be nice to allow adjustment by the user, | ||||
| 8471 | # but it would be complicated to specify. | ||||
| 8472 | # | ||||
| 8473 | # hash type information must stay tightly bound | ||||
| 8474 | # as in : ${xxxx} | ||||
| 8475 | 1 | 3µs | $binary_ws_rules{'i'}{'L'} = WS_NO; | ||
| 8476 | 1 | 300ns | $binary_ws_rules{'i'}{'{'} = WS_YES; | ||
| 8477 | 1 | 400ns | $binary_ws_rules{'k'}{'{'} = WS_YES; | ||
| 8478 | 1 | 300ns | $binary_ws_rules{'U'}{'{'} = WS_YES; | ||
| 8479 | 1 | 200ns | $binary_ws_rules{'i'}{'['} = WS_NO; | ||
| 8480 | 1 | 300ns | $binary_ws_rules{'R'}{'L'} = WS_NO; | ||
| 8481 | 1 | 200ns | $binary_ws_rules{'R'}{'{'} = WS_NO; | ||
| 8482 | 1 | 300ns | $binary_ws_rules{'t'}{'L'} = WS_NO; | ||
| 8483 | 1 | 100ns | $binary_ws_rules{'t'}{'{'} = WS_NO; | ||
| 8484 | 1 | 200ns | $binary_ws_rules{'}'}{'L'} = WS_NO; | ||
| 8485 | 1 | 200ns | $binary_ws_rules{'}'}{'{'} = WS_NO; | ||
| 8486 | 1 | 300ns | $binary_ws_rules{'$'}{'L'} = WS_NO; | ||
| 8487 | 1 | 200ns | $binary_ws_rules{'$'}{'{'} = WS_NO; | ||
| 8488 | 1 | 700ns | $binary_ws_rules{'@'}{'L'} = WS_NO; | ||
| 8489 | 1 | 100ns | $binary_ws_rules{'@'}{'{'} = WS_NO; | ||
| 8490 | 1 | 300ns | $binary_ws_rules{'='}{'L'} = WS_YES; | ||
| 8491 | 1 | 300ns | $binary_ws_rules{'J'}{'J'} = WS_YES; | ||
| 8492 | |||||
| 8493 | # the following includes ') {' | ||||
| 8494 | # as in : if ( xxx ) { yyy } | ||||
| 8495 | 1 | 300ns | $binary_ws_rules{']'}{'L'} = WS_NO; | ||
| 8496 | 1 | 100ns | $binary_ws_rules{']'}{'{'} = WS_NO; | ||
| 8497 | 1 | 300ns | $binary_ws_rules{')'}{'{'} = WS_YES; | ||
| 8498 | 1 | 100ns | $binary_ws_rules{')'}{'['} = WS_NO; | ||
| 8499 | 1 | 100ns | $binary_ws_rules{']'}{'['} = WS_NO; | ||
| 8500 | 1 | 100ns | $binary_ws_rules{']'}{'{'} = WS_NO; | ||
| 8501 | 1 | 100ns | $binary_ws_rules{'}'}{'['} = WS_NO; | ||
| 8502 | 1 | 100ns | $binary_ws_rules{'R'}{'['} = WS_NO; | ||
| 8503 | |||||
| 8504 | 1 | 100ns | $binary_ws_rules{']'}{'++'} = WS_NO; | ||
| 8505 | 1 | 100ns | $binary_ws_rules{']'}{'--'} = WS_NO; | ||
| 8506 | 1 | 100ns | $binary_ws_rules{')'}{'++'} = WS_NO; | ||
| 8507 | 1 | 100ns | $binary_ws_rules{')'}{'--'} = WS_NO; | ||
| 8508 | |||||
| 8509 | 1 | 100ns | $binary_ws_rules{'R'}{'++'} = WS_NO; | ||
| 8510 | 1 | 100ns | $binary_ws_rules{'R'}{'--'} = WS_NO; | ||
| 8511 | |||||
| 8512 | 1 | 200ns | $binary_ws_rules{'i'}{'Q'} = WS_YES; | ||
| 8513 | 1 | 300ns | $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' | ||
| 8514 | |||||
| 8515 | # FIXME: we could to split 'i' into variables and functions | ||||
| 8516 | # and have no space for functions but space for variables. For now, | ||||
| 8517 | # I have a special patch in the special rules below | ||||
| 8518 | 1 | 100ns | $binary_ws_rules{'i'}{'('} = WS_NO; | ||
| 8519 | |||||
| 8520 | 1 | 300ns | $binary_ws_rules{'w'}{'('} = WS_NO; | ||
| 8521 | 1 | 14µs | $binary_ws_rules{'w'}{'{'} = WS_YES; | ||
| 8522 | 1 | 5.94ms | 1 | 90µs | } ## end BEGIN block # spent 90µs making 1 call to Perl::Tidy::Formatter::BEGIN@8401 |
| 8523 | |||||
| 8524 | my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; | ||||
| 8525 | my ( $last_token, $last_type, $last_block_type, $token, $type, | ||||
| 8526 | $block_type ); | ||||
| 8527 | my (@white_space_flag); | ||||
| 8528 | my $j_tight_closing_paren = -1; | ||||
| 8529 | |||||
| 8530 | if ( $max_index_to_go >= 0 ) { | ||||
| 8531 | $token = $tokens_to_go[$max_index_to_go]; | ||||
| 8532 | $type = $types_to_go[$max_index_to_go]; | ||||
| 8533 | $block_type = $block_type_to_go[$max_index_to_go]; | ||||
| 8534 | |||||
| 8535 | #--------------------------------------------------------------- | ||||
| 8536 | # Patch due to splitting of tokens with leading -> | ||||
| 8537 | #--------------------------------------------------------------- | ||||
| 8538 | # | ||||
| 8539 | # This routine is dealing with the raw tokens from the tokenizer, | ||||
| 8540 | # but to get started it needs the previous token, which will | ||||
| 8541 | # have been stored in the '_to_go' arrays. | ||||
| 8542 | # | ||||
| 8543 | # This patch avoids requiring two iterations to | ||||
| 8544 | # converge for cases such as the following, where a paren | ||||
| 8545 | # comes in on a line following a variable with leading arrow: | ||||
| 8546 | # $self->{main}->add_content_defer_opening | ||||
| 8547 | # ($name, $wmkf, $self->{attrs}, $self); | ||||
| 8548 | # In this case when we see the opening paren on line 2 we need | ||||
| 8549 | # to know if the last token on the previous line had an arrow, | ||||
| 8550 | # but it has already been split off so we have to add it back | ||||
| 8551 | # in to avoid getting an unwanted space before the paren. | ||||
| 8552 | if ( $type =~ /^[wi]$/ ) { | ||||
| 8553 | my $im = $iprev_to_go[$max_index_to_go]; | ||||
| 8554 | my $tm = ( $im >= 0 ) ? $types_to_go[$im] : ""; | ||||
| 8555 | if ( $tm eq '->' ) { $token = $tm . $token } | ||||
| 8556 | } | ||||
| 8557 | |||||
| 8558 | #--------------------------------------------------------------- | ||||
| 8559 | # End patch due to splitting of tokens with leading -> | ||||
| 8560 | #--------------------------------------------------------------- | ||||
| 8561 | } | ||||
| 8562 | else { | ||||
| 8563 | $token = ' '; | ||||
| 8564 | $type = 'b'; | ||||
| 8565 | $block_type = ''; | ||||
| 8566 | } | ||||
| 8567 | |||||
| 8568 | my ( $j, $ws ); | ||||
| 8569 | |||||
| 8570 | # main loop over all tokens to define the whitespace flags | ||||
| 8571 | for ( $j = 0 ; $j <= $jmax ; $j++ ) { | ||||
| 8572 | |||||
| 8573 | if ( $$rtoken_type[$j] eq 'b' ) { | ||||
| 8574 | $white_space_flag[$j] = WS_OPTIONAL; | ||||
| 8575 | next; | ||||
| 8576 | } | ||||
| 8577 | |||||
| 8578 | # set a default value, to be changed as needed | ||||
| 8579 | $ws = undef; | ||||
| 8580 | $last_token = $token; | ||||
| 8581 | $last_type = $type; | ||||
| 8582 | $last_block_type = $block_type; | ||||
| 8583 | $token = $$rtokens[$j]; | ||||
| 8584 | $type = $$rtoken_type[$j]; | ||||
| 8585 | $block_type = $$rblock_type[$j]; | ||||
| 8586 | |||||
| 8587 | #--------------------------------------------------------------- | ||||
| 8588 | # Whitespace Rules Section 1: | ||||
| 8589 | # Handle space on the inside of opening braces. | ||||
| 8590 | #--------------------------------------------------------------- | ||||
| 8591 | |||||
| 8592 | # /^[L\{\(\[]$/ | ||||
| 8593 | if ( $is_opening_type{$last_type} ) { | ||||
| 8594 | |||||
| 8595 | $j_tight_closing_paren = -1; | ||||
| 8596 | |||||
| 8597 | # let's keep empty matched braces together: () {} [] | ||||
| 8598 | # except for BLOCKS | ||||
| 8599 | if ( $token eq $matching_token{$last_token} ) { | ||||
| 8600 | if ($block_type) { | ||||
| 8601 | $ws = WS_YES; | ||||
| 8602 | } | ||||
| 8603 | else { | ||||
| 8604 | $ws = WS_NO; | ||||
| 8605 | } | ||||
| 8606 | } | ||||
| 8607 | else { | ||||
| 8608 | |||||
| 8609 | # we're considering the right of an opening brace | ||||
| 8610 | # tightness = 0 means always pad inside with space | ||||
| 8611 | # tightness = 1 means pad inside if "complex" | ||||
| 8612 | # tightness = 2 means never pad inside with space | ||||
| 8613 | |||||
| 8614 | my $tightness; | ||||
| 8615 | if ( $last_type eq '{' | ||||
| 8616 | && $last_token eq '{' | ||||
| 8617 | && $last_block_type ) | ||||
| 8618 | { | ||||
| 8619 | $tightness = $rOpts_block_brace_tightness; | ||||
| 8620 | } | ||||
| 8621 | else { $tightness = $tightness{$last_token} } | ||||
| 8622 | |||||
| 8623 | #============================================================= | ||||
| 8624 | # Patch for test problem fabrice_bug.pl | ||||
| 8625 | # We must always avoid spaces around a bare word beginning | ||||
| 8626 | # with ^ as in: | ||||
| 8627 | # my $before = ${^PREMATCH}; | ||||
| 8628 | # Because all of the following cause an error in perl: | ||||
| 8629 | # my $before = ${ ^PREMATCH }; | ||||
| 8630 | # my $before = ${ ^PREMATCH}; | ||||
| 8631 | # my $before = ${^PREMATCH }; | ||||
| 8632 | # So if brace tightness flag is -bt=0 we must temporarily reset | ||||
| 8633 | # to bt=1. Note that here we must set tightness=1 and not 2 so | ||||
| 8634 | # that the closing space | ||||
| 8635 | # is also avoided (via the $j_tight_closing_paren flag in coding) | ||||
| 8636 | if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } | ||||
| 8637 | |||||
| 8638 | #============================================================= | ||||
| 8639 | |||||
| 8640 | if ( $tightness <= 0 ) { | ||||
| 8641 | $ws = WS_YES; | ||||
| 8642 | } | ||||
| 8643 | elsif ( $tightness > 1 ) { | ||||
| 8644 | $ws = WS_NO; | ||||
| 8645 | } | ||||
| 8646 | else { | ||||
| 8647 | |||||
| 8648 | # Patch to count '-foo' as single token so that | ||||
| 8649 | # each of $a{-foo} and $a{foo} and $a{'foo'} do | ||||
| 8650 | # not get spaces with default formatting. | ||||
| 8651 | my $j_here = $j; | ||||
| 8652 | ++$j_here | ||||
| 8653 | if ( $token eq '-' | ||||
| 8654 | && $last_token eq '{' | ||||
| 8655 | && $$rtoken_type[ $j + 1 ] eq 'w' ); | ||||
| 8656 | |||||
| 8657 | # $j_next is where a closing token should be if | ||||
| 8658 | # the container has a single token | ||||
| 8659 | my $j_next = | ||||
| 8660 | ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) | ||||
| 8661 | ? $j_here + 2 | ||||
| 8662 | : $j_here + 1; | ||||
| 8663 | my $tok_next = $$rtokens[$j_next]; | ||||
| 8664 | my $type_next = $$rtoken_type[$j_next]; | ||||
| 8665 | |||||
| 8666 | # for tightness = 1, if there is just one token | ||||
| 8667 | # within the matching pair, we will keep it tight | ||||
| 8668 | if ( | ||||
| 8669 | $tok_next eq $matching_token{$last_token} | ||||
| 8670 | |||||
| 8671 | # but watch out for this: [ [ ] (misc.t) | ||||
| 8672 | && $last_token ne $token | ||||
| 8673 | ) | ||||
| 8674 | { | ||||
| 8675 | |||||
| 8676 | # remember where to put the space for the closing paren | ||||
| 8677 | $j_tight_closing_paren = $j_next; | ||||
| 8678 | $ws = WS_NO; | ||||
| 8679 | } | ||||
| 8680 | else { | ||||
| 8681 | $ws = WS_YES; | ||||
| 8682 | } | ||||
| 8683 | } | ||||
| 8684 | } | ||||
| 8685 | } # end setting space flag inside opening tokens | ||||
| 8686 | my $ws_1 = $ws | ||||
| 8687 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
| 8688 | |||||
| 8689 | #--------------------------------------------------------------- | ||||
| 8690 | # Whitespace Rules Section 2: | ||||
| 8691 | # Handle space on inside of closing brace pairs. | ||||
| 8692 | #--------------------------------------------------------------- | ||||
| 8693 | |||||
| 8694 | # /[\}\)\]R]/ | ||||
| 8695 | if ( $is_closing_type{$type} ) { | ||||
| 8696 | |||||
| 8697 | if ( $j == $j_tight_closing_paren ) { | ||||
| 8698 | |||||
| 8699 | $j_tight_closing_paren = -1; | ||||
| 8700 | $ws = WS_NO; | ||||
| 8701 | } | ||||
| 8702 | else { | ||||
| 8703 | |||||
| 8704 | if ( !defined($ws) ) { | ||||
| 8705 | |||||
| 8706 | my $tightness; | ||||
| 8707 | if ( $type eq '}' && $token eq '}' && $block_type ) { | ||||
| 8708 | $tightness = $rOpts_block_brace_tightness; | ||||
| 8709 | } | ||||
| 8710 | else { $tightness = $tightness{$token} } | ||||
| 8711 | |||||
| 8712 | $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; | ||||
| 8713 | } | ||||
| 8714 | } | ||||
| 8715 | } # end setting space flag inside closing tokens | ||||
| 8716 | |||||
| 8717 | my $ws_2 = $ws | ||||
| 8718 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
| 8719 | |||||
| 8720 | #--------------------------------------------------------------- | ||||
| 8721 | # Whitespace Rules Section 3: | ||||
| 8722 | # Use the binary rule table. | ||||
| 8723 | #--------------------------------------------------------------- | ||||
| 8724 | if ( !defined($ws) ) { | ||||
| 8725 | $ws = $binary_ws_rules{$last_type}{$type}; | ||||
| 8726 | } | ||||
| 8727 | my $ws_3 = $ws | ||||
| 8728 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
| 8729 | |||||
| 8730 | #--------------------------------------------------------------- | ||||
| 8731 | # Whitespace Rules Section 4: | ||||
| 8732 | # Handle some special cases. | ||||
| 8733 | #--------------------------------------------------------------- | ||||
| 8734 | if ( $token eq '(' ) { | ||||
| 8735 | |||||
| 8736 | # This will have to be tweaked as tokenization changes. | ||||
| 8737 | # We usually want a space at '} (', for example: | ||||
| 8738 | # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); | ||||
| 8739 | # | ||||
| 8740 | # But not others: | ||||
| 8741 | # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); | ||||
| 8742 | # At present, the above & block is marked as type L/R so this case | ||||
| 8743 | # won't go through here. | ||||
| 8744 | if ( $last_type eq '}' ) { $ws = WS_YES } | ||||
| 8745 | |||||
| 8746 | # NOTE: some older versions of Perl had occasional problems if | ||||
| 8747 | # spaces are introduced between keywords or functions and opening | ||||
| 8748 | # parens. So the default is not to do this except is certain | ||||
| 8749 | # cases. The current Perl seems to tolerate spaces. | ||||
| 8750 | |||||
| 8751 | # Space between keyword and '(' | ||||
| 8752 | elsif ( $last_type eq 'k' ) { | ||||
| 8753 | $ws = WS_NO | ||||
| 8754 | unless ( $rOpts_space_keyword_paren | ||||
| 8755 | || $space_after_keyword{$last_token} ); | ||||
| 8756 | } | ||||
| 8757 | |||||
| 8758 | # Space between function and '(' | ||||
| 8759 | # ----------------------------------------------------- | ||||
| 8760 | # 'w' and 'i' checks for something like: | ||||
| 8761 | # myfun( &myfun( ->myfun( | ||||
| 8762 | # ----------------------------------------------------- | ||||
| 8763 | elsif (( $last_type =~ /^[wUG]$/ ) | ||||
| 8764 | || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) | ||||
| 8765 | { | ||||
| 8766 | $ws = WS_NO unless ($rOpts_space_function_paren); | ||||
| 8767 | } | ||||
| 8768 | |||||
| 8769 | # space between something like $i and ( in | ||||
| 8770 | # for $i ( 0 .. 20 ) { | ||||
| 8771 | # FIXME: eventually, type 'i' needs to be split into multiple | ||||
| 8772 | # token types so this can be a hardwired rule. | ||||
| 8773 | elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { | ||||
| 8774 | $ws = WS_YES; | ||||
| 8775 | } | ||||
| 8776 | |||||
| 8777 | # allow constant function followed by '()' to retain no space | ||||
| 8778 | elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { | ||||
| 8779 | $ws = WS_NO; | ||||
| 8780 | } | ||||
| 8781 | } | ||||
| 8782 | |||||
| 8783 | # patch for SWITCH/CASE: make space at ']{' optional | ||||
| 8784 | # since the '{' might begin a case or when block | ||||
| 8785 | elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { | ||||
| 8786 | $ws = WS_OPTIONAL; | ||||
| 8787 | } | ||||
| 8788 | |||||
| 8789 | # keep space between 'sub' and '{' for anonymous sub definition | ||||
| 8790 | if ( $type eq '{' ) { | ||||
| 8791 | if ( $last_token eq 'sub' ) { | ||||
| 8792 | $ws = WS_YES; | ||||
| 8793 | } | ||||
| 8794 | |||||
| 8795 | # this is needed to avoid no space in '){' | ||||
| 8796 | if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } | ||||
| 8797 | |||||
| 8798 | # avoid any space before the brace or bracket in something like | ||||
| 8799 | # @opts{'a','b',...} | ||||
| 8800 | if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { | ||||
| 8801 | $ws = WS_NO; | ||||
| 8802 | } | ||||
| 8803 | } | ||||
| 8804 | |||||
| 8805 | elsif ( $type eq 'i' ) { | ||||
| 8806 | |||||
| 8807 | # never a space before -> | ||||
| 8808 | if ( $token =~ /^\-\>/ ) { | ||||
| 8809 | $ws = WS_NO; | ||||
| 8810 | } | ||||
| 8811 | } | ||||
| 8812 | |||||
| 8813 | # retain any space between '-' and bare word | ||||
| 8814 | elsif ( $type eq 'w' || $type eq 'C' ) { | ||||
| 8815 | $ws = WS_OPTIONAL if $last_type eq '-'; | ||||
| 8816 | |||||
| 8817 | # never a space before -> | ||||
| 8818 | if ( $token =~ /^\-\>/ ) { | ||||
| 8819 | $ws = WS_NO; | ||||
| 8820 | } | ||||
| 8821 | } | ||||
| 8822 | |||||
| 8823 | # retain any space between '-' and bare word | ||||
| 8824 | # example: avoid space between 'USER' and '-' here: | ||||
| 8825 | # $myhash{USER-NAME}='steve'; | ||||
| 8826 | elsif ( $type eq 'm' || $type eq '-' ) { | ||||
| 8827 | $ws = WS_OPTIONAL if ( $last_type eq 'w' ); | ||||
| 8828 | } | ||||
| 8829 | |||||
| 8830 | # always space before side comment | ||||
| 8831 | elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } | ||||
| 8832 | |||||
| 8833 | # always preserver whatever space was used after a possible | ||||
| 8834 | # filehandle (except _) or here doc operator | ||||
| 8835 | if ( | ||||
| 8836 | $type ne '#' | ||||
| 8837 | && ( ( $last_type eq 'Z' && $last_token ne '_' ) | ||||
| 8838 | || $last_type eq 'h' ) | ||||
| 8839 | ) | ||||
| 8840 | { | ||||
| 8841 | $ws = WS_OPTIONAL; | ||||
| 8842 | } | ||||
| 8843 | |||||
| 8844 | my $ws_4 = $ws | ||||
| 8845 | if FORMATTER_DEBUG_FLAG_WHITE; | ||||
| 8846 | |||||
| 8847 | #--------------------------------------------------------------- | ||||
| 8848 | # Whitespace Rules Section 5: | ||||
| 8849 | # Apply default rules not covered above. | ||||
| 8850 | #--------------------------------------------------------------- | ||||
| 8851 | |||||
| 8852 | # If we fall through to here, look at the pre-defined hash tables for | ||||
| 8853 | # the two tokens, and: | ||||
| 8854 | # if (they are equal) use the common value | ||||
| 8855 | # if (either is zero or undef) use the other | ||||
| 8856 | # if (either is -1) use it | ||||
| 8857 | # That is, | ||||
| 8858 | # left vs right | ||||
| 8859 | # 1 vs 1 --> 1 | ||||
| 8860 | # 0 vs 0 --> 0 | ||||
| 8861 | # -1 vs -1 --> -1 | ||||
| 8862 | # | ||||
| 8863 | # 0 vs -1 --> -1 | ||||
| 8864 | # 0 vs 1 --> 1 | ||||
| 8865 | # 1 vs 0 --> 1 | ||||
| 8866 | # -1 vs 0 --> -1 | ||||
| 8867 | # | ||||
| 8868 | # -1 vs 1 --> -1 | ||||
| 8869 | # 1 vs -1 --> -1 | ||||
| 8870 | if ( !defined($ws) ) { | ||||
| 8871 | my $wl = $want_left_space{$type}; | ||||
| 8872 | my $wr = $want_right_space{$last_type}; | ||||
| 8873 | if ( !defined($wl) ) { $wl = 0 } | ||||
| 8874 | if ( !defined($wr) ) { $wr = 0 } | ||||
| 8875 | $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; | ||||
| 8876 | } | ||||
| 8877 | |||||
| 8878 | if ( !defined($ws) ) { | ||||
| 8879 | $ws = 0; | ||||
| 8880 | write_diagnostics( | ||||
| 8881 | "WS flag is undefined for tokens $last_token $token\n"); | ||||
| 8882 | } | ||||
| 8883 | |||||
| 8884 | # Treat newline as a whitespace. Otherwise, we might combine | ||||
| 8885 | # 'Send' and '-recipients' here according to the above rules: | ||||
| 8886 | # my $msg = new Fax::Send | ||||
| 8887 | # -recipients => $to, | ||||
| 8888 | # -data => $data; | ||||
| 8889 | if ( $ws == 0 && $j == 0 ) { $ws = 1 } | ||||
| 8890 | |||||
| 8891 | if ( ( $ws == 0 ) | ||||
| 8892 | && $j > 0 | ||||
| 8893 | && $j < $jmax | ||||
| 8894 | && ( $last_type !~ /^[Zh]$/ ) ) | ||||
| 8895 | { | ||||
| 8896 | |||||
| 8897 | # If this happens, we have a non-fatal but undesirable | ||||
| 8898 | # hole in the above rules which should be patched. | ||||
| 8899 | write_diagnostics( | ||||
| 8900 | "WS flag is zero for tokens $last_token $token\n"); | ||||
| 8901 | } | ||||
| 8902 | $white_space_flag[$j] = $ws; | ||||
| 8903 | |||||
| 8904 | FORMATTER_DEBUG_FLAG_WHITE && do { | ||||
| 8905 | my $str = substr( $last_token, 0, 15 ); | ||||
| 8906 | $str .= ' ' x ( 16 - length($str) ); | ||||
| 8907 | if ( !defined($ws_1) ) { $ws_1 = "*" } | ||||
| 8908 | if ( !defined($ws_2) ) { $ws_2 = "*" } | ||||
| 8909 | if ( !defined($ws_3) ) { $ws_3 = "*" } | ||||
| 8910 | if ( !defined($ws_4) ) { $ws_4 = "*" } | ||||
| 8911 | print STDOUT | ||||
| 8912 | "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; | ||||
| 8913 | }; | ||||
| 8914 | } ## end main loop | ||||
| 8915 | |||||
| 8916 | if ($rOpts_tight_secret_operators) { | ||||
| 8917 | secret_operator_whitespace( $jmax, $rtokens, $rtoken_type, | ||||
| 8918 | \@white_space_flag ); | ||||
| 8919 | } | ||||
| 8920 | |||||
| 8921 | return \@white_space_flag; | ||||
| 8922 | } ## end sub set_white_space_flag | ||||
| 8923 | |||||
| 8924 | { # begin print_line_of_tokens | ||||
| 8925 | |||||
| 8926 | 2 | 0s | my $rtoken_type; | ||
| 8927 | 1 | 0s | my $rtokens; | ||
| 8928 | 1 | 100ns | my $rlevels; | ||
| 8929 | 1 | 0s | my $rslevels; | ||
| 8930 | 1 | 100ns | my $rblock_type; | ||
| 8931 | 1 | 0s | my $rcontainer_type; | ||
| 8932 | 1 | 0s | my $rcontainer_environment; | ||
| 8933 | 1 | 0s | my $rtype_sequence; | ||
| 8934 | 1 | 0s | my $input_line; | ||
| 8935 | 1 | 100ns | my $rnesting_tokens; | ||
| 8936 | 1 | 100ns | my $rci_levels; | ||
| 8937 | 1 | 100ns | my $rnesting_blocks; | ||
| 8938 | |||||
| 8939 | 1 | 100ns | my $in_quote; | ||
| 8940 | 1 | 0s | my $guessed_indentation_level; | ||
| 8941 | |||||
| 8942 | # These local token variables are stored by store_token_to_go: | ||||
| 8943 | 1 | 0s | my $block_type; | ||
| 8944 | 1 | 0s | my $ci_level; | ||
| 8945 | 1 | 0s | my $container_environment; | ||
| 8946 | 1 | 0s | my $container_type; | ||
| 8947 | 1 | 0s | my $in_continued_quote; | ||
| 8948 | 1 | 0s | my $level; | ||
| 8949 | 1 | 0s | my $nesting_blocks; | ||
| 8950 | 1 | 0s | my $no_internal_newlines; | ||
| 8951 | 1 | 0s | my $slevel; | ||
| 8952 | 1 | 0s | my $token; | ||
| 8953 | 1 | 0s | my $type; | ||
| 8954 | 1 | 0s | my $type_sequence; | ||
| 8955 | |||||
| 8956 | # routine to pull the jth token from the line of tokens | ||||
| 8957 | sub extract_token { | ||||
| 8958 | my $j = shift; | ||||
| 8959 | $token = $$rtokens[$j]; | ||||
| 8960 | $type = $$rtoken_type[$j]; | ||||
| 8961 | $block_type = $$rblock_type[$j]; | ||||
| 8962 | $container_type = $$rcontainer_type[$j]; | ||||
| 8963 | $container_environment = $$rcontainer_environment[$j]; | ||||
| 8964 | $type_sequence = $$rtype_sequence[$j]; | ||||
| 8965 | $level = $$rlevels[$j]; | ||||
| 8966 | $slevel = $$rslevels[$j]; | ||||
| 8967 | $nesting_blocks = $$rnesting_blocks[$j]; | ||||
| 8968 | $ci_level = $$rci_levels[$j]; | ||||
| 8969 | } | ||||
| 8970 | |||||
| 8971 | { | ||||
| 8972 | 2 | 600ns | my @saved_token; | ||
| 8973 | |||||
| 8974 | sub save_current_token { | ||||
| 8975 | |||||
| 8976 | @saved_token = ( | ||||
| 8977 | $block_type, $ci_level, | ||||
| 8978 | $container_environment, $container_type, | ||||
| 8979 | $in_continued_quote, $level, | ||||
| 8980 | $nesting_blocks, $no_internal_newlines, | ||||
| 8981 | $slevel, $token, | ||||
| 8982 | $type, $type_sequence, | ||||
| 8983 | ); | ||||
| 8984 | } | ||||
| 8985 | |||||
| 8986 | sub restore_current_token { | ||||
| 8987 | ( | ||||
| 8988 | $block_type, $ci_level, | ||||
| 8989 | $container_environment, $container_type, | ||||
| 8990 | $in_continued_quote, $level, | ||||
| 8991 | $nesting_blocks, $no_internal_newlines, | ||||
| 8992 | $slevel, $token, | ||||
| 8993 | $type, $type_sequence, | ||||
| 8994 | ) = @saved_token; | ||||
| 8995 | } | ||||
| 8996 | } | ||||
| 8997 | |||||
| 8998 | sub token_length { | ||||
| 8999 | |||||
| 9000 | # Returns the length of a token, given: | ||||
| 9001 | # $token=text of the token | ||||
| 9002 | # $type = type | ||||
| 9003 | # $not_first_token = should be TRUE if this is not the first token of | ||||
| 9004 | # the line. It might the index of this token in an array. It is | ||||
| 9005 | # used to test for a side comment vs a block comment. | ||||
| 9006 | # Note: Eventually this should be the only routine determining the | ||||
| 9007 | # length of a token in this package. | ||||
| 9008 | my ( $token, $type, $not_first_token ) = @_; | ||||
| 9009 | my $token_length = length($token); | ||||
| 9010 | |||||
| 9011 | # We mark lengths of side comments as just 1 if we are | ||||
| 9012 | # ignoring their lengths when setting line breaks. | ||||
| 9013 | $token_length = 1 | ||||
| 9014 | if ( $rOpts_ignore_side_comment_lengths | ||||
| 9015 | && $not_first_token | ||||
| 9016 | && $type eq '#' ); | ||||
| 9017 | return $token_length; | ||||
| 9018 | } | ||||
| 9019 | |||||
| 9020 | sub rtoken_length { | ||||
| 9021 | |||||
| 9022 | # return length of ith token in @{$rtokens} | ||||
| 9023 | my ($i) = @_; | ||||
| 9024 | return token_length( $$rtokens[$i], $$rtoken_type[$i], $i ); | ||||
| 9025 | } | ||||
| 9026 | |||||
| 9027 | # Routine to place the current token into the output stream. | ||||
| 9028 | # Called once per output token. | ||||
| 9029 | sub store_token_to_go { | ||||
| 9030 | |||||
| 9031 | my $flag = $no_internal_newlines; | ||||
| 9032 | if ( $_[0] ) { $flag = 1 } | ||||
| 9033 | |||||
| 9034 | $tokens_to_go[ ++$max_index_to_go ] = $token; | ||||
| 9035 | $types_to_go[$max_index_to_go] = $type; | ||||
| 9036 | $nobreak_to_go[$max_index_to_go] = $flag; | ||||
| 9037 | $old_breakpoint_to_go[$max_index_to_go] = 0; | ||||
| 9038 | $forced_breakpoint_to_go[$max_index_to_go] = 0; | ||||
| 9039 | $block_type_to_go[$max_index_to_go] = $block_type; | ||||
| 9040 | $type_sequence_to_go[$max_index_to_go] = $type_sequence; | ||||
| 9041 | $container_environment_to_go[$max_index_to_go] = $container_environment; | ||||
| 9042 | $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; | ||||
| 9043 | $ci_levels_to_go[$max_index_to_go] = $ci_level; | ||||
| 9044 | $mate_index_to_go[$max_index_to_go] = -1; | ||||
| 9045 | $matching_token_to_go[$max_index_to_go] = ''; | ||||
| 9046 | $bond_strength_to_go[$max_index_to_go] = 0; | ||||
| 9047 | |||||
| 9048 | # Note: negative levels are currently retained as a diagnostic so that | ||||
| 9049 | # the 'final indentation level' is correctly reported for bad scripts. | ||||
| 9050 | # But this means that every use of $level as an index must be checked. | ||||
| 9051 | # If this becomes too much of a problem, we might give up and just clip | ||||
| 9052 | # them at zero. | ||||
| 9053 | ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; | ||||
| 9054 | $levels_to_go[$max_index_to_go] = $level; | ||||
| 9055 | $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; | ||||
| 9056 | |||||
| 9057 | # link the non-blank tokens | ||||
| 9058 | my $iprev = $max_index_to_go - 1; | ||||
| 9059 | $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); | ||||
| 9060 | $iprev_to_go[$max_index_to_go] = $iprev; | ||||
| 9061 | $inext_to_go[$iprev] = $max_index_to_go | ||||
| 9062 | if ( $iprev >= 0 && $type ne 'b' ); | ||||
| 9063 | $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; | ||||
| 9064 | |||||
| 9065 | $token_lengths_to_go[$max_index_to_go] = | ||||
| 9066 | token_length( $token, $type, $max_index_to_go ); | ||||
| 9067 | |||||
| 9068 | # We keep a running sum of token lengths from the start of this batch: | ||||
| 9069 | # summed_lengths_to_go[$i] = total length to just before token $i | ||||
| 9070 | # summed_lengths_to_go[$i+1] = total length to just after token $i | ||||
| 9071 | $summed_lengths_to_go[ $max_index_to_go + 1 ] = | ||||
| 9072 | $summed_lengths_to_go[$max_index_to_go] + | ||||
| 9073 | $token_lengths_to_go[$max_index_to_go]; | ||||
| 9074 | |||||
| 9075 | # Define the indentation that this token would have if it started | ||||
| 9076 | # a new line. We have to do this now because we need to know this | ||||
| 9077 | # when considering one-line blocks. | ||||
| 9078 | set_leading_whitespace( $level, $ci_level, $in_continued_quote ); | ||||
| 9079 | |||||
| 9080 | # remember previous nonblank tokens seen | ||||
| 9081 | if ( $type ne 'b' ) { | ||||
| 9082 | $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; | ||||
| 9083 | $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; | ||||
| 9084 | $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; | ||||
| 9085 | $last_nonblank_index_to_go = $max_index_to_go; | ||||
| 9086 | $last_nonblank_type_to_go = $type; | ||||
| 9087 | $last_nonblank_token_to_go = $token; | ||||
| 9088 | if ( $type eq ',' ) { | ||||
| 9089 | $comma_count_in_batch++; | ||||
| 9090 | } | ||||
| 9091 | } | ||||
| 9092 | |||||
| 9093 | FORMATTER_DEBUG_FLAG_STORE && do { | ||||
| 9094 | my ( $a, $b, $c ) = caller(); | ||||
| 9095 | print STDOUT | ||||
| 9096 | "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; | ||||
| 9097 | }; | ||||
| 9098 | } | ||||
| 9099 | |||||
| 9100 | sub insert_new_token_to_go { | ||||
| 9101 | |||||
| 9102 | # insert a new token into the output stream. use same level as | ||||
| 9103 | # previous token; assumes a character at max_index_to_go. | ||||
| 9104 | save_current_token(); | ||||
| 9105 | ( $token, $type, $slevel, $no_internal_newlines ) = @_; | ||||
| 9106 | |||||
| 9107 | if ( $max_index_to_go == UNDEFINED_INDEX ) { | ||||
| 9108 | warning("code bug: bad call to insert_new_token_to_go\n"); | ||||
| 9109 | } | ||||
| 9110 | $level = $levels_to_go[$max_index_to_go]; | ||||
| 9111 | |||||
| 9112 | # FIXME: it seems to be necessary to use the next, rather than | ||||
| 9113 | # previous, value of this variable when creating a new blank (align.t) | ||||
| 9114 | #my $slevel = $nesting_depth_to_go[$max_index_to_go]; | ||||
| 9115 | $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; | ||||
| 9116 | $ci_level = $ci_levels_to_go[$max_index_to_go]; | ||||
| 9117 | $container_environment = $container_environment_to_go[$max_index_to_go]; | ||||
| 9118 | $in_continued_quote = 0; | ||||
| 9119 | $block_type = ""; | ||||
| 9120 | $type_sequence = ""; | ||||
| 9121 | store_token_to_go(); | ||||
| 9122 | restore_current_token(); | ||||
| 9123 | return; | ||||
| 9124 | } | ||||
| 9125 | |||||
| 9126 | sub print_line_of_tokens { | ||||
| 9127 | |||||
| 9128 | my $line_of_tokens = shift; | ||||
| 9129 | |||||
| 9130 | # This routine is called once per input line to process all of | ||||
| 9131 | # the tokens on that line. This is the first stage of | ||||
| 9132 | # beautification. | ||||
| 9133 | # | ||||
| 9134 | # Full-line comments and blank lines may be processed immediately. | ||||
| 9135 | # | ||||
| 9136 | # For normal lines of code, the tokens are stored one-by-one, | ||||
| 9137 | # via calls to 'sub store_token_to_go', until a known line break | ||||
| 9138 | # point is reached. Then, the batch of collected tokens is | ||||
| 9139 | # passed along to 'sub output_line_to_go' for further | ||||
| 9140 | # processing. This routine decides if there should be | ||||
| 9141 | # whitespace between each pair of non-white tokens, so later | ||||
| 9142 | # routines only need to decide on any additional line breaks. | ||||
| 9143 | # Any whitespace is initially a single space character. Later, | ||||
| 9144 | # the vertical aligner may expand that to be multiple space | ||||
| 9145 | # characters if necessary for alignment. | ||||
| 9146 | |||||
| 9147 | # extract input line number for error messages | ||||
| 9148 | $input_line_number = $line_of_tokens->{_line_number}; | ||||
| 9149 | |||||
| 9150 | $rtoken_type = $line_of_tokens->{_rtoken_type}; | ||||
| 9151 | $rtokens = $line_of_tokens->{_rtokens}; | ||||
| 9152 | $rlevels = $line_of_tokens->{_rlevels}; | ||||
| 9153 | $rslevels = $line_of_tokens->{_rslevels}; | ||||
| 9154 | $rblock_type = $line_of_tokens->{_rblock_type}; | ||||
| 9155 | $rcontainer_type = $line_of_tokens->{_rcontainer_type}; | ||||
| 9156 | $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; | ||||
| 9157 | $rtype_sequence = $line_of_tokens->{_rtype_sequence}; | ||||
| 9158 | $input_line = $line_of_tokens->{_line_text}; | ||||
| 9159 | $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; | ||||
| 9160 | $rci_levels = $line_of_tokens->{_rci_levels}; | ||||
| 9161 | $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; | ||||
| 9162 | |||||
| 9163 | $in_continued_quote = $starting_in_quote = | ||||
| 9164 | $line_of_tokens->{_starting_in_quote}; | ||||
| 9165 | $in_quote = $line_of_tokens->{_ending_in_quote}; | ||||
| 9166 | $ending_in_quote = $in_quote; | ||||
| 9167 | $guessed_indentation_level = | ||||
| 9168 | $line_of_tokens->{_guessed_indentation_level}; | ||||
| 9169 | |||||
| 9170 | my $j; | ||||
| 9171 | my $j_next; | ||||
| 9172 | my $jmax; | ||||
| 9173 | my $next_nonblank_token; | ||||
| 9174 | my $next_nonblank_token_type; | ||||
| 9175 | my $rwhite_space_flag; | ||||
| 9176 | |||||
| 9177 | $jmax = @$rtokens - 1; | ||||
| 9178 | $block_type = ""; | ||||
| 9179 | $container_type = ""; | ||||
| 9180 | $container_environment = ""; | ||||
| 9181 | $type_sequence = ""; | ||||
| 9182 | $no_internal_newlines = 1 - $rOpts_add_newlines; | ||||
| 9183 | $is_static_block_comment = 0; | ||||
| 9184 | |||||
| 9185 | # Handle a continued quote.. | ||||
| 9186 | if ($in_continued_quote) { | ||||
| 9187 | |||||
| 9188 | # A line which is entirely a quote or pattern must go out | ||||
| 9189 | # verbatim. Note: the \n is contained in $input_line. | ||||
| 9190 | if ( $jmax <= 0 ) { | ||||
| 9191 | if ( ( $input_line =~ "\t" ) ) { | ||||
| 9192 | note_embedded_tab(); | ||||
| 9193 | } | ||||
| 9194 | write_unindented_line("$input_line"); | ||||
| 9195 | $last_line_had_side_comment = 0; | ||||
| 9196 | return; | ||||
| 9197 | } | ||||
| 9198 | } | ||||
| 9199 | |||||
| 9200 | # Write line verbatim if we are in a formatting skip section | ||||
| 9201 | if ($in_format_skipping_section) { | ||||
| 9202 | write_unindented_line("$input_line"); | ||||
| 9203 | $last_line_had_side_comment = 0; | ||||
| 9204 | |||||
| 9205 | # Note: extra space appended to comment simplifies pattern matching | ||||
| 9206 | if ( $jmax == 0 | ||||
| 9207 | && $$rtoken_type[0] eq '#' | ||||
| 9208 | && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) | ||||
| 9209 | { | ||||
| 9210 | $in_format_skipping_section = 0; | ||||
| 9211 | write_logfile_entry("Exiting formatting skip section\n"); | ||||
| 9212 | $file_writer_object->reset_consecutive_blank_lines(); | ||||
| 9213 | } | ||||
| 9214 | return; | ||||
| 9215 | } | ||||
| 9216 | |||||
| 9217 | # See if we are entering a formatting skip section | ||||
| 9218 | if ( $rOpts_format_skipping | ||||
| 9219 | && $jmax == 0 | ||||
| 9220 | && $$rtoken_type[0] eq '#' | ||||
| 9221 | && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) | ||||
| 9222 | { | ||||
| 9223 | flush(); | ||||
| 9224 | $in_format_skipping_section = 1; | ||||
| 9225 | write_logfile_entry("Entering formatting skip section\n"); | ||||
| 9226 | write_unindented_line("$input_line"); | ||||
| 9227 | $last_line_had_side_comment = 0; | ||||
| 9228 | return; | ||||
| 9229 | } | ||||
| 9230 | |||||
| 9231 | # delete trailing blank tokens | ||||
| 9232 | if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } | ||||
| 9233 | |||||
| 9234 | # Handle a blank line.. | ||||
| 9235 | if ( $jmax < 0 ) { | ||||
| 9236 | |||||
| 9237 | # If keep-old-blank-lines is zero, we delete all | ||||
| 9238 | # old blank lines and let the blank line rules generate any | ||||
| 9239 | # needed blanks. | ||||
| 9240 | if ($rOpts_keep_old_blank_lines) { | ||||
| 9241 | flush(); | ||||
| 9242 | $file_writer_object->write_blank_code_line( | ||||
| 9243 | $rOpts_keep_old_blank_lines == 2 ); | ||||
| 9244 | $last_line_leading_type = 'b'; | ||||
| 9245 | } | ||||
| 9246 | $last_line_had_side_comment = 0; | ||||
| 9247 | return; | ||||
| 9248 | } | ||||
| 9249 | |||||
| 9250 | # see if this is a static block comment (starts with ## by default) | ||||
| 9251 | my $is_static_block_comment_without_leading_space = 0; | ||||
| 9252 | if ( $jmax == 0 | ||||
| 9253 | && $$rtoken_type[0] eq '#' | ||||
| 9254 | && $rOpts->{'static-block-comments'} | ||||
| 9255 | && $input_line =~ /$static_block_comment_pattern/o ) | ||||
| 9256 | { | ||||
| 9257 | $is_static_block_comment = 1; | ||||
| 9258 | $is_static_block_comment_without_leading_space = | ||||
| 9259 | substr( $input_line, 0, 1 ) eq '#'; | ||||
| 9260 | } | ||||
| 9261 | |||||
| 9262 | # Check for comments which are line directives | ||||
| 9263 | # Treat exactly as static block comments without leading space | ||||
| 9264 | # reference: perlsyn, near end, section Plain Old Comments (Not!) | ||||
| 9265 | # example: '# line 42 "new_filename.plx"' | ||||
| 9266 | if ( | ||||
| 9267 | $jmax == 0 | ||||
| 9268 | && $$rtoken_type[0] eq '#' | ||||
| 9269 | && $input_line =~ /^\# \s* | ||||
| 9270 | line \s+ (\d+) \s* | ||||
| 9271 | (?:\s("?)([^"]+)\2)? \s* | ||||
| 9272 | $/x | ||||
| 9273 | ) | ||||
| 9274 | { | ||||
| 9275 | $is_static_block_comment = 1; | ||||
| 9276 | $is_static_block_comment_without_leading_space = 1; | ||||
| 9277 | } | ||||
| 9278 | |||||
| 9279 | # create a hanging side comment if appropriate | ||||
| 9280 | my $is_hanging_side_comment; | ||||
| 9281 | if ( | ||||
| 9282 | $jmax == 0 | ||||
| 9283 | && $$rtoken_type[0] eq '#' # only token is a comment | ||||
| 9284 | && $last_line_had_side_comment # last line had side comment | ||||
| 9285 | && $input_line =~ /^\s/ # there is some leading space | ||||
| 9286 | && !$is_static_block_comment # do not make static comment hanging | ||||
| 9287 | && $rOpts->{'hanging-side-comments'} # user is allowing | ||||
| 9288 | # hanging side comments | ||||
| 9289 | # like this | ||||
| 9290 | ) | ||||
| 9291 | { | ||||
| 9292 | |||||
| 9293 | # We will insert an empty qw string at the start of the token list | ||||
| 9294 | # to force this comment to be a side comment. The vertical aligner | ||||
| 9295 | # should then line it up with the previous side comment. | ||||
| 9296 | $is_hanging_side_comment = 1; | ||||
| 9297 | unshift @$rtoken_type, 'q'; | ||||
| 9298 | unshift @$rtokens, ''; | ||||
| 9299 | unshift @$rlevels, $$rlevels[0]; | ||||
| 9300 | unshift @$rslevels, $$rslevels[0]; | ||||
| 9301 | unshift @$rblock_type, ''; | ||||
| 9302 | unshift @$rcontainer_type, ''; | ||||
| 9303 | unshift @$rcontainer_environment, ''; | ||||
| 9304 | unshift @$rtype_sequence, ''; | ||||
| 9305 | unshift @$rnesting_tokens, $$rnesting_tokens[0]; | ||||
| 9306 | unshift @$rci_levels, $$rci_levels[0]; | ||||
| 9307 | unshift @$rnesting_blocks, $$rnesting_blocks[0]; | ||||
| 9308 | $jmax = 1; | ||||
| 9309 | } | ||||
| 9310 | |||||
| 9311 | # remember if this line has a side comment | ||||
| 9312 | $last_line_had_side_comment = | ||||
| 9313 | ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); | ||||
| 9314 | |||||
| 9315 | # Handle a block (full-line) comment.. | ||||
| 9316 | if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { | ||||
| 9317 | |||||
| 9318 | if ( $rOpts->{'delete-block-comments'} ) { return } | ||||
| 9319 | |||||
| 9320 | if ( $rOpts->{'tee-block-comments'} ) { | ||||
| 9321 | $file_writer_object->tee_on(); | ||||
| 9322 | } | ||||
| 9323 | |||||
| 9324 | destroy_one_line_block(); | ||||
| 9325 | output_line_to_go(); | ||||
| 9326 | |||||
| 9327 | # output a blank line before block comments | ||||
| 9328 | if ( | ||||
| 9329 | # unless we follow a blank or comment line | ||||
| 9330 | $last_line_leading_type !~ /^[#b]$/ | ||||
| 9331 | |||||
| 9332 | # only if allowed | ||||
| 9333 | && $rOpts->{'blanks-before-comments'} | ||||
| 9334 | |||||
| 9335 | # not if this is an empty comment line | ||||
| 9336 | && $$rtokens[0] ne '#' | ||||
| 9337 | |||||
| 9338 | # not after a short line ending in an opening token | ||||
| 9339 | # because we already have space above this comment. | ||||
| 9340 | # Note that the first comment in this if block, after | ||||
| 9341 | # the 'if (', does not get a blank line because of this. | ||||
| 9342 | && !$last_output_short_opening_token | ||||
| 9343 | |||||
| 9344 | # never before static block comments | ||||
| 9345 | && !$is_static_block_comment | ||||
| 9346 | ) | ||||
| 9347 | { | ||||
| 9348 | flush(); # switching to new output stream | ||||
| 9349 | $file_writer_object->write_blank_code_line(); | ||||
| 9350 | $last_line_leading_type = 'b'; | ||||
| 9351 | } | ||||
| 9352 | |||||
| 9353 | # TRIM COMMENTS -- This could be turned off as a option | ||||
| 9354 | $$rtokens[0] =~ s/\s*$//; # trim right end | ||||
| 9355 | |||||
| 9356 | if ( | ||||
| 9357 | $rOpts->{'indent-block-comments'} | ||||
| 9358 | && ( !$rOpts->{'indent-spaced-block-comments'} | ||||
| 9359 | || $input_line =~ /^\s+/ ) | ||||
| 9360 | && !$is_static_block_comment_without_leading_space | ||||
| 9361 | ) | ||||
| 9362 | { | ||||
| 9363 | extract_token(0); | ||||
| 9364 | store_token_to_go(); | ||||
| 9365 | output_line_to_go(); | ||||
| 9366 | } | ||||
| 9367 | else { | ||||
| 9368 | flush(); # switching to new output stream | ||||
| 9369 | $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); | ||||
| 9370 | $last_line_leading_type = '#'; | ||||
| 9371 | } | ||||
| 9372 | if ( $rOpts->{'tee-block-comments'} ) { | ||||
| 9373 | $file_writer_object->tee_off(); | ||||
| 9374 | } | ||||
| 9375 | return; | ||||
| 9376 | } | ||||
| 9377 | |||||
| 9378 | # compare input/output indentation except for continuation lines | ||||
| 9379 | # (because they have an unknown amount of initial blank space) | ||||
| 9380 | # and lines which are quotes (because they may have been outdented) | ||||
| 9381 | # Note: this test is placed here because we know the continuation flag | ||||
| 9382 | # at this point, which allows us to avoid non-meaningful checks. | ||||
| 9383 | my $structural_indentation_level = $$rlevels[0]; | ||||
| 9384 | compare_indentation_levels( $guessed_indentation_level, | ||||
| 9385 | $structural_indentation_level ) | ||||
| 9386 | unless ( $is_hanging_side_comment | ||||
| 9387 | || $$rci_levels[0] > 0 | ||||
| 9388 | || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' ); | ||||
| 9389 | |||||
| 9390 | # Patch needed for MakeMaker. Do not break a statement | ||||
| 9391 | # in which $VERSION may be calculated. See MakeMaker.pm; | ||||
| 9392 | # this is based on the coding in it. | ||||
| 9393 | # The first line of a file that matches this will be eval'd: | ||||
| 9394 | # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ | ||||
| 9395 | # Examples: | ||||
| 9396 | # *VERSION = \'1.01'; | ||||
| 9397 | # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; | ||||
| 9398 | # We will pass such a line straight through without breaking | ||||
| 9399 | # it unless -npvl is used | ||||
| 9400 | |||||
| 9401 | my $is_VERSION_statement = 0; | ||||
| 9402 | |||||
| 9403 | if ( | ||||
| 9404 | !$saw_VERSION_in_this_file | ||||
| 9405 | && $input_line =~ /VERSION/ # quick check to reject most lines | ||||
| 9406 | && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ | ||||
| 9407 | ) | ||||
| 9408 | { | ||||
| 9409 | $saw_VERSION_in_this_file = 1; | ||||
| 9410 | $is_VERSION_statement = 1; | ||||
| 9411 | write_logfile_entry("passing VERSION line; -npvl deactivates\n"); | ||||
| 9412 | $no_internal_newlines = 1; | ||||
| 9413 | } | ||||
| 9414 | |||||
| 9415 | # take care of indentation-only | ||||
| 9416 | # NOTE: In previous versions we sent all qw lines out immediately here. | ||||
| 9417 | # No longer doing this: also write a line which is entirely a 'qw' list | ||||
| 9418 | # to allow stacking of opening and closing tokens. Note that interior | ||||
| 9419 | # qw lines will still go out at the end of this routine. | ||||
| 9420 | if ( $rOpts->{'indent-only'} ) { | ||||
| 9421 | flush(); | ||||
| 9422 | trim($input_line); | ||||
| 9423 | |||||
| 9424 | extract_token(0); | ||||
| 9425 | $token = $input_line; | ||||
| 9426 | $type = 'q'; | ||||
| 9427 | $block_type = ""; | ||||
| 9428 | $container_type = ""; | ||||
| 9429 | $container_environment = ""; | ||||
| 9430 | $type_sequence = ""; | ||||
| 9431 | store_token_to_go(); | ||||
| 9432 | output_line_to_go(); | ||||
| 9433 | return; | ||||
| 9434 | } | ||||
| 9435 | |||||
| 9436 | push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding | ||||
| 9437 | push( @$rtoken_type, 'b', 'b' ); | ||||
| 9438 | ($rwhite_space_flag) = | ||||
| 9439 | set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); | ||||
| 9440 | |||||
| 9441 | # if the buffer hasn't been flushed, add a leading space if | ||||
| 9442 | # necessary to keep essential whitespace. This is really only | ||||
| 9443 | # necessary if we are squeezing out all ws. | ||||
| 9444 | if ( $max_index_to_go >= 0 ) { | ||||
| 9445 | |||||
| 9446 | $old_line_count_in_batch++; | ||||
| 9447 | |||||
| 9448 | if ( | ||||
| 9449 | is_essential_whitespace( | ||||
| 9450 | $last_last_nonblank_token, | ||||
| 9451 | $last_last_nonblank_type, | ||||
| 9452 | $tokens_to_go[$max_index_to_go], | ||||
| 9453 | $types_to_go[$max_index_to_go], | ||||
| 9454 | $$rtokens[0], | ||||
| 9455 | $$rtoken_type[0] | ||||
| 9456 | ) | ||||
| 9457 | ) | ||||
| 9458 | { | ||||
| 9459 | my $slevel = $$rslevels[0]; | ||||
| 9460 | insert_new_token_to_go( ' ', 'b', $slevel, | ||||
| 9461 | $no_internal_newlines ); | ||||
| 9462 | } | ||||
| 9463 | } | ||||
| 9464 | |||||
| 9465 | # If we just saw the end of an elsif block, write nag message | ||||
| 9466 | # if we do not see another elseif or an else. | ||||
| 9467 | if ($looking_for_else) { | ||||
| 9468 | |||||
| 9469 | unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { | ||||
| 9470 | write_logfile_entry("(No else block)\n"); | ||||
| 9471 | } | ||||
| 9472 | $looking_for_else = 0; | ||||
| 9473 | } | ||||
| 9474 | |||||
| 9475 | # This is a good place to kill incomplete one-line blocks | ||||
| 9476 | if ( ( $semicolons_before_block_self_destruct == 0 ) | ||||
| 9477 | && ( $max_index_to_go >= 0 ) | ||||
| 9478 | && ( $types_to_go[$max_index_to_go] eq ';' ) | ||||
| 9479 | && ( $$rtokens[0] ne '}' ) ) | ||||
| 9480 | { | ||||
| 9481 | destroy_one_line_block(); | ||||
| 9482 | output_line_to_go(); | ||||
| 9483 | } | ||||
| 9484 | |||||
| 9485 | # loop to process the tokens one-by-one | ||||
| 9486 | $type = 'b'; | ||||
| 9487 | $token = ""; | ||||
| 9488 | |||||
| 9489 | foreach $j ( 0 .. $jmax ) { | ||||
| 9490 | |||||
| 9491 | # pull out the local values for this token | ||||
| 9492 | extract_token($j); | ||||
| 9493 | |||||
| 9494 | if ( $type eq '#' ) { | ||||
| 9495 | |||||
| 9496 | # trim trailing whitespace | ||||
| 9497 | # (there is no option at present to prevent this) | ||||
| 9498 | $token =~ s/\s*$//; | ||||
| 9499 | |||||
| 9500 | if ( | ||||
| 9501 | $rOpts->{'delete-side-comments'} | ||||
| 9502 | |||||
| 9503 | # delete closing side comments if necessary | ||||
| 9504 | || ( $rOpts->{'delete-closing-side-comments'} | ||||
| 9505 | && $token =~ /$closing_side_comment_prefix_pattern/o | ||||
| 9506 | && $last_nonblank_block_type =~ | ||||
| 9507 | /$closing_side_comment_list_pattern/o ) | ||||
| 9508 | ) | ||||
| 9509 | { | ||||
| 9510 | if ( $types_to_go[$max_index_to_go] eq 'b' ) { | ||||
| 9511 | unstore_token_to_go(); | ||||
| 9512 | } | ||||
| 9513 | last; | ||||
| 9514 | } | ||||
| 9515 | } | ||||
| 9516 | |||||
| 9517 | # If we are continuing after seeing a right curly brace, flush | ||||
| 9518 | # buffer unless we see what we are looking for, as in | ||||
| 9519 | # } else ... | ||||
| 9520 | if ( $rbrace_follower && $type ne 'b' ) { | ||||
| 9521 | |||||
| 9522 | unless ( $rbrace_follower->{$token} ) { | ||||
| 9523 | output_line_to_go(); | ||||
| 9524 | } | ||||
| 9525 | $rbrace_follower = undef; | ||||
| 9526 | } | ||||
| 9527 | |||||
| 9528 | $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; | ||||
| 9529 | $next_nonblank_token = $$rtokens[$j_next]; | ||||
| 9530 | $next_nonblank_token_type = $$rtoken_type[$j_next]; | ||||
| 9531 | |||||
| 9532 | #-------------------------------------------------------- | ||||
| 9533 | # Start of section to patch token text | ||||
| 9534 | #-------------------------------------------------------- | ||||
| 9535 | |||||
| 9536 | # Modify certain tokens here for whitespace | ||||
| 9537 | # The following is not yet done, but could be: | ||||
| 9538 | # sub (x x x) | ||||
| 9539 | if ( $type =~ /^[wit]$/ ) { | ||||
| 9540 | |||||
| 9541 | # Examples: | ||||
| 9542 | # change '$ var' to '$var' etc | ||||
| 9543 | # '-> new' to '->new' | ||||
| 9544 | if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { | ||||
| 9545 | $token =~ s/\s*//g; | ||||
| 9546 | } | ||||
| 9547 | |||||
| 9548 | # Split identifiers with leading arrows, inserting blanks if | ||||
| 9549 | # necessary. It is easier and safer here than in the | ||||
| 9550 | # tokenizer. For example '->new' becomes two tokens, '->' and | ||||
| 9551 | # 'new' with a possible blank between. | ||||
| 9552 | # | ||||
| 9553 | # Note: there is a related patch in sub set_white_space_flag | ||||
| 9554 | if ( $token =~ /^\-\>(.*)$/ && $1 ) { | ||||
| 9555 | my $token_save = $1; | ||||
| 9556 | my $type_save = $type; | ||||
| 9557 | |||||
| 9558 | # store a blank to left of arrow if necessary | ||||
| 9559 | if ( $max_index_to_go >= 0 | ||||
| 9560 | && $types_to_go[$max_index_to_go] ne 'b' | ||||
| 9561 | && $want_left_space{'->'} == WS_YES ) | ||||
| 9562 | { | ||||
| 9563 | insert_new_token_to_go( ' ', 'b', $slevel, | ||||
| 9564 | $no_internal_newlines ); | ||||
| 9565 | } | ||||
| 9566 | |||||
| 9567 | # then store the arrow | ||||
| 9568 | $token = '->'; | ||||
| 9569 | $type = $token; | ||||
| 9570 | store_token_to_go(); | ||||
| 9571 | |||||
| 9572 | # then reset the current token to be the remainder, | ||||
| 9573 | # and reset the whitespace flag according to the arrow | ||||
| 9574 | $$rwhite_space_flag[$j] = $want_right_space{'->'}; | ||||
| 9575 | $token = $token_save; | ||||
| 9576 | $type = $type_save; | ||||
| 9577 | } | ||||
| 9578 | |||||
| 9579 | if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } | ||||
| 9580 | |||||
| 9581 | # trim identifiers of trailing blanks which can occur | ||||
| 9582 | # under some unusual circumstances, such as if the | ||||
| 9583 | # identifier 'witch' has trailing blanks on input here: | ||||
| 9584 | # | ||||
| 9585 | # sub | ||||
| 9586 | # witch | ||||
| 9587 | # () # prototype may be on new line ... | ||||
| 9588 | # ... | ||||
| 9589 | if ( $type eq 'i' ) { $token =~ s/\s+$//g } | ||||
| 9590 | } | ||||
| 9591 | |||||
| 9592 | # change 'LABEL :' to 'LABEL:' | ||||
| 9593 | elsif ( $type eq 'J' ) { $token =~ s/\s+//g } | ||||
| 9594 | |||||
| 9595 | # patch to add space to something like "x10" | ||||
| 9596 | # This avoids having to split this token in the pre-tokenizer | ||||
| 9597 | elsif ( $type eq 'n' ) { | ||||
| 9598 | if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } | ||||
| 9599 | } | ||||
| 9600 | |||||
| 9601 | elsif ( $type eq 'Q' ) { | ||||
| 9602 | note_embedded_tab() if ( $token =~ "\t" ); | ||||
| 9603 | |||||
| 9604 | # make note of something like '$var = s/xxx/yyy/;' | ||||
| 9605 | # in case it should have been '$var =~ s/xxx/yyy/;' | ||||
| 9606 | if ( | ||||
| 9607 | $token =~ /^(s|tr|y|m|\/)/ | ||||
| 9608 | && $last_nonblank_token =~ /^(=|==|!=)$/ | ||||
| 9609 | |||||
| 9610 | # preceded by simple scalar | ||||
| 9611 | && $last_last_nonblank_type eq 'i' | ||||
| 9612 | && $last_last_nonblank_token =~ /^\$/ | ||||
| 9613 | |||||
| 9614 | # followed by some kind of termination | ||||
| 9615 | # (but give complaint if we can's see far enough ahead) | ||||
| 9616 | && $next_nonblank_token =~ /^[; \)\}]$/ | ||||
| 9617 | |||||
| 9618 | # scalar is not declared | ||||
| 9619 | && !( | ||||
| 9620 | $types_to_go[0] eq 'k' | ||||
| 9621 | && $tokens_to_go[0] =~ /^(my|our|local)$/ | ||||
| 9622 | ) | ||||
| 9623 | ) | ||||
| 9624 | { | ||||
| 9625 | my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; | ||||
| 9626 | complain( | ||||
| 9627 | "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" | ||||
| 9628 | ); | ||||
| 9629 | } | ||||
| 9630 | } | ||||
| 9631 | |||||
| 9632 | # trim blanks from right of qw quotes | ||||
| 9633 | # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) | ||||
| 9634 | elsif ( $type eq 'q' ) { | ||||
| 9635 | $token =~ s/\s*$//; | ||||
| 9636 | note_embedded_tab() if ( $token =~ "\t" ); | ||||
| 9637 | } | ||||
| 9638 | |||||
| 9639 | #-------------------------------------------------------- | ||||
| 9640 | # End of section to patch token text | ||||
| 9641 | #-------------------------------------------------------- | ||||
| 9642 | |||||
| 9643 | # insert any needed whitespace | ||||
| 9644 | if ( ( $type ne 'b' ) | ||||
| 9645 | && ( $max_index_to_go >= 0 ) | ||||
| 9646 | && ( $types_to_go[$max_index_to_go] ne 'b' ) | ||||
| 9647 | && $rOpts_add_whitespace ) | ||||
| 9648 | { | ||||
| 9649 | my $ws = $$rwhite_space_flag[$j]; | ||||
| 9650 | |||||
| 9651 | if ( $ws == 1 ) { | ||||
| 9652 | insert_new_token_to_go( ' ', 'b', $slevel, | ||||
| 9653 | $no_internal_newlines ); | ||||
| 9654 | } | ||||
| 9655 | } | ||||
| 9656 | |||||
| 9657 | # Do not allow breaks which would promote a side comment to a | ||||
| 9658 | # block comment. In order to allow a break before an opening | ||||
| 9659 | # or closing BLOCK, followed by a side comment, those sections | ||||
| 9660 | # of code will handle this flag separately. | ||||
| 9661 | my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); | ||||
| 9662 | my $is_opening_BLOCK = | ||||
| 9663 | ( $type eq '{' | ||||
| 9664 | && $token eq '{' | ||||
| 9665 | && $block_type | ||||
| 9666 | && $block_type ne 't' ); | ||||
| 9667 | my $is_closing_BLOCK = | ||||
| 9668 | ( $type eq '}' | ||||
| 9669 | && $token eq '}' | ||||
| 9670 | && $block_type | ||||
| 9671 | && $block_type ne 't' ); | ||||
| 9672 | |||||
| 9673 | if ( $side_comment_follows | ||||
| 9674 | && !$is_opening_BLOCK | ||||
| 9675 | && !$is_closing_BLOCK ) | ||||
| 9676 | { | ||||
| 9677 | $no_internal_newlines = 1; | ||||
| 9678 | } | ||||
| 9679 | |||||
| 9680 | # We're only going to handle breaking for code BLOCKS at this | ||||
| 9681 | # (top) level. Other indentation breaks will be handled by | ||||
| 9682 | # sub scan_list, which is better suited to dealing with them. | ||||
| 9683 | if ($is_opening_BLOCK) { | ||||
| 9684 | |||||
| 9685 | # Tentatively output this token. This is required before | ||||
| 9686 | # calling starting_one_line_block. We may have to unstore | ||||
| 9687 | # it, though, if we have to break before it. | ||||
| 9688 | store_token_to_go($side_comment_follows); | ||||
| 9689 | |||||
| 9690 | # Look ahead to see if we might form a one-line block | ||||
| 9691 | my $too_long = | ||||
| 9692 | starting_one_line_block( $j, $jmax, $level, $slevel, | ||||
| 9693 | $ci_level, $rtokens, $rtoken_type, $rblock_type ); | ||||
| 9694 | clear_breakpoint_undo_stack(); | ||||
| 9695 | |||||
| 9696 | # to simplify the logic below, set a flag to indicate if | ||||
| 9697 | # this opening brace is far from the keyword which introduces it | ||||
| 9698 | my $keyword_on_same_line = 1; | ||||
| 9699 | if ( ( $max_index_to_go >= 0 ) | ||||
| 9700 | && ( $last_nonblank_type eq ')' ) ) | ||||
| 9701 | { | ||||
| 9702 | if ( $block_type =~ /^(if|else|elsif)$/ | ||||
| 9703 | && ( $tokens_to_go[0] eq '}' ) | ||||
| 9704 | && $rOpts_cuddled_else ) | ||||
| 9705 | { | ||||
| 9706 | $keyword_on_same_line = 1; | ||||
| 9707 | } | ||||
| 9708 | elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) | ||||
| 9709 | { | ||||
| 9710 | $keyword_on_same_line = 0; | ||||
| 9711 | } | ||||
| 9712 | } | ||||
| 9713 | |||||
| 9714 | # decide if user requested break before '{' | ||||
| 9715 | my $want_break = | ||||
| 9716 | |||||
| 9717 | # use -bl flag if not a sub block of any type | ||||
| 9718 | $block_type !~ /^sub/ | ||||
| 9719 | ? $rOpts->{'opening-brace-on-new-line'} | ||||
| 9720 | |||||
| 9721 | # use -sbl flag for a named sub block | ||||
| 9722 | : $block_type !~ /^sub\W*$/ | ||||
| 9723 | ? $rOpts->{'opening-sub-brace-on-new-line'} | ||||
| 9724 | |||||
| 9725 | # use -asbl flag for an anonymous sub block | ||||
| 9726 | : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; | ||||
| 9727 | |||||
| 9728 | # Break before an opening '{' ... | ||||
| 9729 | if ( | ||||
| 9730 | |||||
| 9731 | # if requested | ||||
| 9732 | $want_break | ||||
| 9733 | |||||
| 9734 | # and we were unable to start looking for a block, | ||||
| 9735 | && $index_start_one_line_block == UNDEFINED_INDEX | ||||
| 9736 | |||||
| 9737 | # or if it will not be on same line as its keyword, so that | ||||
| 9738 | # it will be outdented (eval.t, overload.t), and the user | ||||
| 9739 | # has not insisted on keeping it on the right | ||||
| 9740 | || ( !$keyword_on_same_line | ||||
| 9741 | && !$rOpts->{'opening-brace-always-on-right'} ) | ||||
| 9742 | |||||
| 9743 | ) | ||||
| 9744 | { | ||||
| 9745 | |||||
| 9746 | # but only if allowed | ||||
| 9747 | unless ($no_internal_newlines) { | ||||
| 9748 | |||||
| 9749 | # since we already stored this token, we must unstore it | ||||
| 9750 | unstore_token_to_go(); | ||||
| 9751 | |||||
| 9752 | # then output the line | ||||
| 9753 | output_line_to_go(); | ||||
| 9754 | |||||
| 9755 | # and now store this token at the start of a new line | ||||
| 9756 | store_token_to_go($side_comment_follows); | ||||
| 9757 | } | ||||
| 9758 | } | ||||
| 9759 | |||||
| 9760 | # Now update for side comment | ||||
| 9761 | if ($side_comment_follows) { $no_internal_newlines = 1 } | ||||
| 9762 | |||||
| 9763 | # now output this line | ||||
| 9764 | unless ($no_internal_newlines) { | ||||
| 9765 | output_line_to_go(); | ||||
| 9766 | } | ||||
| 9767 | } | ||||
| 9768 | |||||
| 9769 | elsif ($is_closing_BLOCK) { | ||||
| 9770 | |||||
| 9771 | # If there is a pending one-line block .. | ||||
| 9772 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | ||||
| 9773 | |||||
| 9774 | # we have to terminate it if.. | ||||
| 9775 | if ( | ||||
| 9776 | |||||
| 9777 | # it is too long (final length may be different from | ||||
| 9778 | # initial estimate). note: must allow 1 space for this token | ||||
| 9779 | excess_line_length( $index_start_one_line_block, | ||||
| 9780 | $max_index_to_go ) >= 0 | ||||
| 9781 | |||||
| 9782 | # or if it has too many semicolons | ||||
| 9783 | || ( $semicolons_before_block_self_destruct == 0 | ||||
| 9784 | && $last_nonblank_type ne ';' ) | ||||
| 9785 | ) | ||||
| 9786 | { | ||||
| 9787 | destroy_one_line_block(); | ||||
| 9788 | } | ||||
| 9789 | } | ||||
| 9790 | |||||
| 9791 | # put a break before this closing curly brace if appropriate | ||||
| 9792 | unless ( $no_internal_newlines | ||||
| 9793 | || $index_start_one_line_block != UNDEFINED_INDEX ) | ||||
| 9794 | { | ||||
| 9795 | |||||
| 9796 | # add missing semicolon if ... | ||||
| 9797 | # there are some tokens | ||||
| 9798 | if ( | ||||
| 9799 | ( $max_index_to_go > 0 ) | ||||
| 9800 | |||||
| 9801 | # and we don't have one | ||||
| 9802 | && ( $last_nonblank_type ne ';' ) | ||||
| 9803 | |||||
| 9804 | # patch until some block type issues are fixed: | ||||
| 9805 | # Do not add semi-colon for block types '{', | ||||
| 9806 | # '}', and ';' because we cannot be sure yet | ||||
| 9807 | # that this is a block and not an anonymous | ||||
| 9808 | # hash (blktype.t, blktype1.t) | ||||
| 9809 | && ( $block_type !~ /^[\{\};]$/ ) | ||||
| 9810 | |||||
| 9811 | # patch: and do not add semi-colons for recently | ||||
| 9812 | # added block types (see tmp/semicolon.t) | ||||
| 9813 | && ( $block_type !~ | ||||
| 9814 | /^(switch|case|given|when|default)$/ ) | ||||
| 9815 | |||||
| 9816 | # it seems best not to add semicolons in these | ||||
| 9817 | # special block types: sort|map|grep | ||||
| 9818 | && ( !$is_sort_map_grep{$block_type} ) | ||||
| 9819 | |||||
| 9820 | # and we are allowed to do so. | ||||
| 9821 | && $rOpts->{'add-semicolons'} | ||||
| 9822 | ) | ||||
| 9823 | { | ||||
| 9824 | |||||
| 9825 | save_current_token(); | ||||
| 9826 | $token = ';'; | ||||
| 9827 | $type = ';'; | ||||
| 9828 | $level = $levels_to_go[$max_index_to_go]; | ||||
| 9829 | $slevel = $nesting_depth_to_go[$max_index_to_go]; | ||||
| 9830 | $nesting_blocks = | ||||
| 9831 | $nesting_blocks_to_go[$max_index_to_go]; | ||||
| 9832 | $ci_level = $ci_levels_to_go[$max_index_to_go]; | ||||
| 9833 | $block_type = ""; | ||||
| 9834 | $container_type = ""; | ||||
| 9835 | $container_environment = ""; | ||||
| 9836 | $type_sequence = ""; | ||||
| 9837 | |||||
| 9838 | # Note - we remove any blank AFTER extracting its | ||||
| 9839 | # parameters such as level, etc, above | ||||
| 9840 | if ( $types_to_go[$max_index_to_go] eq 'b' ) { | ||||
| 9841 | unstore_token_to_go(); | ||||
| 9842 | } | ||||
| 9843 | store_token_to_go(); | ||||
| 9844 | |||||
| 9845 | note_added_semicolon(); | ||||
| 9846 | restore_current_token(); | ||||
| 9847 | } | ||||
| 9848 | |||||
| 9849 | # then write out everything before this closing curly brace | ||||
| 9850 | output_line_to_go(); | ||||
| 9851 | |||||
| 9852 | } | ||||
| 9853 | |||||
| 9854 | # Now update for side comment | ||||
| 9855 | if ($side_comment_follows) { $no_internal_newlines = 1 } | ||||
| 9856 | |||||
| 9857 | # store the closing curly brace | ||||
| 9858 | store_token_to_go(); | ||||
| 9859 | |||||
| 9860 | # ok, we just stored a closing curly brace. Often, but | ||||
| 9861 | # not always, we want to end the line immediately. | ||||
| 9862 | # So now we have to check for special cases. | ||||
| 9863 | |||||
| 9864 | # if this '}' successfully ends a one-line block.. | ||||
| 9865 | my $is_one_line_block = 0; | ||||
| 9866 | my $keep_going = 0; | ||||
| 9867 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | ||||
| 9868 | |||||
| 9869 | # Remember the type of token just before the | ||||
| 9870 | # opening brace. It would be more general to use | ||||
| 9871 | # a stack, but this will work for one-line blocks. | ||||
| 9872 | $is_one_line_block = | ||||
| 9873 | $types_to_go[$index_start_one_line_block]; | ||||
| 9874 | |||||
| 9875 | # we have to actually make it by removing tentative | ||||
| 9876 | # breaks that were set within it | ||||
| 9877 | undo_forced_breakpoint_stack(0); | ||||
| 9878 | set_nobreaks( $index_start_one_line_block, | ||||
| 9879 | $max_index_to_go - 1 ); | ||||
| 9880 | |||||
| 9881 | # then re-initialize for the next one-line block | ||||
| 9882 | destroy_one_line_block(); | ||||
| 9883 | |||||
| 9884 | # then decide if we want to break after the '}' .. | ||||
| 9885 | # We will keep going to allow certain brace followers as in: | ||||
| 9886 | # do { $ifclosed = 1; last } unless $losing; | ||||
| 9887 | # | ||||
| 9888 | # But make a line break if the curly ends a | ||||
| 9889 | # significant block: | ||||
| 9890 | if ( | ||||
| 9891 | $is_block_without_semicolon{$block_type} | ||||
| 9892 | |||||
| 9893 | # if needless semicolon follows we handle it later | ||||
| 9894 | && $next_nonblank_token ne ';' | ||||
| 9895 | ) | ||||
| 9896 | { | ||||
| 9897 | output_line_to_go() unless ($no_internal_newlines); | ||||
| 9898 | } | ||||
| 9899 | } | ||||
| 9900 | |||||
| 9901 | # set string indicating what we need to look for brace follower | ||||
| 9902 | # tokens | ||||
| 9903 | if ( $block_type eq 'do' ) { | ||||
| 9904 | $rbrace_follower = \%is_do_follower; | ||||
| 9905 | } | ||||
| 9906 | elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { | ||||
| 9907 | $rbrace_follower = \%is_if_brace_follower; | ||||
| 9908 | } | ||||
| 9909 | elsif ( $block_type eq 'else' ) { | ||||
| 9910 | $rbrace_follower = \%is_else_brace_follower; | ||||
| 9911 | } | ||||
| 9912 | |||||
| 9913 | # added eval for borris.t | ||||
| 9914 | elsif ($is_sort_map_grep_eval{$block_type} | ||||
| 9915 | || $is_one_line_block eq 'G' ) | ||||
| 9916 | { | ||||
| 9917 | $rbrace_follower = undef; | ||||
| 9918 | $keep_going = 1; | ||||
| 9919 | } | ||||
| 9920 | |||||
| 9921 | # anonymous sub | ||||
| 9922 | elsif ( $block_type =~ /^sub\W*$/ ) { | ||||
| 9923 | |||||
| 9924 | if ($is_one_line_block) { | ||||
| 9925 | $rbrace_follower = \%is_anon_sub_1_brace_follower; | ||||
| 9926 | } | ||||
| 9927 | else { | ||||
| 9928 | $rbrace_follower = \%is_anon_sub_brace_follower; | ||||
| 9929 | } | ||||
| 9930 | } | ||||
| 9931 | |||||
| 9932 | # None of the above: specify what can follow a closing | ||||
| 9933 | # brace of a block which is not an | ||||
| 9934 | # if/elsif/else/do/sort/map/grep/eval | ||||
| 9935 | # Testfiles: | ||||
| 9936 | # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t | ||||
| 9937 | else { | ||||
| 9938 | $rbrace_follower = \%is_other_brace_follower; | ||||
| 9939 | } | ||||
| 9940 | |||||
| 9941 | # See if an elsif block is followed by another elsif or else; | ||||
| 9942 | # complain if not. | ||||
| 9943 | if ( $block_type eq 'elsif' ) { | ||||
| 9944 | |||||
| 9945 | if ( $next_nonblank_token_type eq 'b' ) { # end of line? | ||||
| 9946 | $looking_for_else = 1; # ok, check on next line | ||||
| 9947 | } | ||||
| 9948 | else { | ||||
| 9949 | |||||
| 9950 | unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { | ||||
| 9951 | write_logfile_entry("No else block :(\n"); | ||||
| 9952 | } | ||||
| 9953 | } | ||||
| 9954 | } | ||||
| 9955 | |||||
| 9956 | # keep going after certain block types (map,sort,grep,eval) | ||||
| 9957 | # added eval for borris.t | ||||
| 9958 | if ($keep_going) { | ||||
| 9959 | |||||
| 9960 | # keep going | ||||
| 9961 | } | ||||
| 9962 | |||||
| 9963 | # if no more tokens, postpone decision until re-entring | ||||
| 9964 | elsif ( ( $next_nonblank_token_type eq 'b' ) | ||||
| 9965 | && $rOpts_add_newlines ) | ||||
| 9966 | { | ||||
| 9967 | unless ($rbrace_follower) { | ||||
| 9968 | output_line_to_go() unless ($no_internal_newlines); | ||||
| 9969 | } | ||||
| 9970 | } | ||||
| 9971 | |||||
| 9972 | elsif ($rbrace_follower) { | ||||
| 9973 | |||||
| 9974 | unless ( $rbrace_follower->{$next_nonblank_token} ) { | ||||
| 9975 | output_line_to_go() unless ($no_internal_newlines); | ||||
| 9976 | } | ||||
| 9977 | $rbrace_follower = undef; | ||||
| 9978 | } | ||||
| 9979 | |||||
| 9980 | else { | ||||
| 9981 | output_line_to_go() unless ($no_internal_newlines); | ||||
| 9982 | } | ||||
| 9983 | |||||
| 9984 | } # end treatment of closing block token | ||||
| 9985 | |||||
| 9986 | # handle semicolon | ||||
| 9987 | elsif ( $type eq ';' ) { | ||||
| 9988 | |||||
| 9989 | # kill one-line blocks with too many semicolons | ||||
| 9990 | $semicolons_before_block_self_destruct--; | ||||
| 9991 | if ( | ||||
| 9992 | ( $semicolons_before_block_self_destruct < 0 ) | ||||
| 9993 | || ( $semicolons_before_block_self_destruct == 0 | ||||
| 9994 | && $next_nonblank_token_type !~ /^[b\}]$/ ) | ||||
| 9995 | ) | ||||
| 9996 | { | ||||
| 9997 | destroy_one_line_block(); | ||||
| 9998 | } | ||||
| 9999 | |||||
| 10000 | # Remove unnecessary semicolons, but not after bare | ||||
| 10001 | # blocks, where it could be unsafe if the brace is | ||||
| 10002 | # mistokenized. | ||||
| 10003 | if ( | ||||
| 10004 | ( | ||||
| 10005 | $last_nonblank_token eq '}' | ||||
| 10006 | && ( | ||||
| 10007 | $is_block_without_semicolon{ | ||||
| 10008 | $last_nonblank_block_type} | ||||
| 10009 | || $last_nonblank_block_type =~ /^sub\s+\w/ | ||||
| 10010 | || $last_nonblank_block_type =~ /^\w+:$/ ) | ||||
| 10011 | ) | ||||
| 10012 | || $last_nonblank_type eq ';' | ||||
| 10013 | ) | ||||
| 10014 | { | ||||
| 10015 | |||||
| 10016 | if ( | ||||
| 10017 | $rOpts->{'delete-semicolons'} | ||||
| 10018 | |||||
| 10019 | # don't delete ; before a # because it would promote it | ||||
| 10020 | # to a block comment | ||||
| 10021 | && ( $next_nonblank_token_type ne '#' ) | ||||
| 10022 | ) | ||||
| 10023 | { | ||||
| 10024 | note_deleted_semicolon(); | ||||
| 10025 | output_line_to_go() | ||||
| 10026 | unless ( $no_internal_newlines | ||||
| 10027 | || $index_start_one_line_block != UNDEFINED_INDEX ); | ||||
| 10028 | next; | ||||
| 10029 | } | ||||
| 10030 | else { | ||||
| 10031 | write_logfile_entry("Extra ';'\n"); | ||||
| 10032 | } | ||||
| 10033 | } | ||||
| 10034 | store_token_to_go(); | ||||
| 10035 | |||||
| 10036 | output_line_to_go() | ||||
| 10037 | unless ( $no_internal_newlines | ||||
| 10038 | || ( $rOpts_keep_interior_semicolons && $j < $jmax ) | ||||
| 10039 | || ( $next_nonblank_token eq '}' ) ); | ||||
| 10040 | |||||
| 10041 | } | ||||
| 10042 | |||||
| 10043 | # handle here_doc target string | ||||
| 10044 | elsif ( $type eq 'h' ) { | ||||
| 10045 | $no_internal_newlines = | ||||
| 10046 | 1; # no newlines after seeing here-target | ||||
| 10047 | destroy_one_line_block(); | ||||
| 10048 | store_token_to_go(); | ||||
| 10049 | } | ||||
| 10050 | |||||
| 10051 | # handle all other token types | ||||
| 10052 | else { | ||||
| 10053 | |||||
| 10054 | # if this is a blank... | ||||
| 10055 | if ( $type eq 'b' ) { | ||||
| 10056 | |||||
| 10057 | # make it just one character | ||||
| 10058 | $token = ' ' if $rOpts_add_whitespace; | ||||
| 10059 | |||||
| 10060 | # delete it if unwanted by whitespace rules | ||||
| 10061 | # or we are deleting all whitespace | ||||
| 10062 | my $ws = $$rwhite_space_flag[ $j + 1 ]; | ||||
| 10063 | if ( ( defined($ws) && $ws == -1 ) | ||||
| 10064 | || $rOpts_delete_old_whitespace ) | ||||
| 10065 | { | ||||
| 10066 | |||||
| 10067 | # unless it might make a syntax error | ||||
| 10068 | next | ||||
| 10069 | unless is_essential_whitespace( | ||||
| 10070 | $last_last_nonblank_token, | ||||
| 10071 | $last_last_nonblank_type, | ||||
| 10072 | $tokens_to_go[$max_index_to_go], | ||||
| 10073 | $types_to_go[$max_index_to_go], | ||||
| 10074 | $$rtokens[ $j + 1 ], | ||||
| 10075 | $$rtoken_type[ $j + 1 ] | ||||
| 10076 | ); | ||||
| 10077 | } | ||||
| 10078 | } | ||||
| 10079 | store_token_to_go(); | ||||
| 10080 | } | ||||
| 10081 | |||||
| 10082 | # remember two previous nonblank OUTPUT tokens | ||||
| 10083 | if ( $type ne '#' && $type ne 'b' ) { | ||||
| 10084 | $last_last_nonblank_token = $last_nonblank_token; | ||||
| 10085 | $last_last_nonblank_type = $last_nonblank_type; | ||||
| 10086 | $last_nonblank_token = $token; | ||||
| 10087 | $last_nonblank_type = $type; | ||||
| 10088 | $last_nonblank_block_type = $block_type; | ||||
| 10089 | } | ||||
| 10090 | |||||
| 10091 | # unset the continued-quote flag since it only applies to the | ||||
| 10092 | # first token, and we want to resume normal formatting if | ||||
| 10093 | # there are additional tokens on the line | ||||
| 10094 | $in_continued_quote = 0; | ||||
| 10095 | |||||
| 10096 | } # end of loop over all tokens in this 'line_of_tokens' | ||||
| 10097 | |||||
| 10098 | # we have to flush .. | ||||
| 10099 | if ( | ||||
| 10100 | |||||
| 10101 | # if there is a side comment | ||||
| 10102 | ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) | ||||
| 10103 | |||||
| 10104 | # if this line ends in a quote | ||||
| 10105 | # NOTE: This is critically important for insuring that quoted lines | ||||
| 10106 | # do not get processed by things like -sot and -sct | ||||
| 10107 | || $in_quote | ||||
| 10108 | |||||
| 10109 | # if this is a VERSION statement | ||||
| 10110 | || $is_VERSION_statement | ||||
| 10111 | |||||
| 10112 | # to keep a label at the end of a line | ||||
| 10113 | || $type eq 'J' | ||||
| 10114 | |||||
| 10115 | # if we are instructed to keep all old line breaks | ||||
| 10116 | || !$rOpts->{'delete-old-newlines'} | ||||
| 10117 | ) | ||||
| 10118 | { | ||||
| 10119 | destroy_one_line_block(); | ||||
| 10120 | output_line_to_go(); | ||||
| 10121 | } | ||||
| 10122 | |||||
| 10123 | # mark old line breakpoints in current output stream | ||||
| 10124 | if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { | ||||
| 10125 | $old_breakpoint_to_go[$max_index_to_go] = 1; | ||||
| 10126 | } | ||||
| 10127 | } ## end sub print_line_of_tokens | ||||
| 10128 | } ## end block print_line_of_tokens | ||||
| 10129 | |||||
| 10130 | # sub output_line_to_go sends one logical line of tokens on down the | ||||
| 10131 | # pipeline to the VerticalAligner package, breaking the line into continuation | ||||
| 10132 | # lines as necessary. The line of tokens is ready to go in the "to_go" | ||||
| 10133 | # arrays. | ||||
| 10134 | sub output_line_to_go { | ||||
| 10135 | |||||
| 10136 | # debug stuff; this routine can be called from many points | ||||
| 10137 | FORMATTER_DEBUG_FLAG_OUTPUT && do { | ||||
| 10138 | my ( $a, $b, $c ) = caller; | ||||
| 10139 | write_diagnostics( | ||||
| 10140 | "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" | ||||
| 10141 | ); | ||||
| 10142 | my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; | ||||
| 10143 | write_diagnostics("$output_str\n"); | ||||
| 10144 | }; | ||||
| 10145 | |||||
| 10146 | # just set a tentative breakpoint if we might be in a one-line block | ||||
| 10147 | if ( $index_start_one_line_block != UNDEFINED_INDEX ) { | ||||
| 10148 | set_forced_breakpoint($max_index_to_go); | ||||
| 10149 | return; | ||||
| 10150 | } | ||||
| 10151 | |||||
| 10152 | my $cscw_block_comment; | ||||
| 10153 | $cscw_block_comment = add_closing_side_comment() | ||||
| 10154 | if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); | ||||
| 10155 | |||||
| 10156 | my $comma_arrow_count_contained = match_opening_and_closing_tokens(); | ||||
| 10157 | |||||
| 10158 | # tell the -lp option we are outputting a batch so it can close | ||||
| 10159 | # any unfinished items in its stack | ||||
| 10160 | finish_lp_batch(); | ||||
| 10161 | |||||
| 10162 | # If this line ends in a code block brace, set breaks at any | ||||
| 10163 | # previous closing code block braces to breakup a chain of code | ||||
| 10164 | # blocks on one line. This is very rare but can happen for | ||||
| 10165 | # user-defined subs. For example we might be looking at this: | ||||
| 10166 | # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { | ||||
| 10167 | my $saw_good_break = 0; # flag to force breaks even if short line | ||||
| 10168 | if ( | ||||
| 10169 | |||||
| 10170 | # looking for opening or closing block brace | ||||
| 10171 | $block_type_to_go[$max_index_to_go] | ||||
| 10172 | |||||
| 10173 | # but not one of these which are never duplicated on a line: | ||||
| 10174 | # until|while|for|if|elsif|else | ||||
| 10175 | && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } | ||||
| 10176 | ) | ||||
| 10177 | { | ||||
| 10178 | my $lev = $nesting_depth_to_go[$max_index_to_go]; | ||||
| 10179 | |||||
| 10180 | # Walk backwards from the end and | ||||
| 10181 | # set break at any closing block braces at the same level. | ||||
| 10182 | # But quit if we are not in a chain of blocks. | ||||
| 10183 | for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { | ||||
| 10184 | last if ( $levels_to_go[$i] < $lev ); # stop at a lower level | ||||
| 10185 | next if ( $levels_to_go[$i] > $lev ); # skip past higher level | ||||
| 10186 | |||||
| 10187 | if ( $block_type_to_go[$i] ) { | ||||
| 10188 | if ( $tokens_to_go[$i] eq '}' ) { | ||||
| 10189 | set_forced_breakpoint($i); | ||||
| 10190 | $saw_good_break = 1; | ||||
| 10191 | } | ||||
| 10192 | } | ||||
| 10193 | |||||
| 10194 | # quit if we see anything besides words, function, blanks | ||||
| 10195 | # at this level | ||||
| 10196 | elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } | ||||
| 10197 | } | ||||
| 10198 | } | ||||
| 10199 | |||||
| 10200 | my $imin = 0; | ||||
| 10201 | my $imax = $max_index_to_go; | ||||
| 10202 | |||||
| 10203 | # trim any blank tokens | ||||
| 10204 | if ( $max_index_to_go >= 0 ) { | ||||
| 10205 | if ( $types_to_go[$imin] eq 'b' ) { $imin++ } | ||||
| 10206 | if ( $types_to_go[$imax] eq 'b' ) { $imax-- } | ||||
| 10207 | } | ||||
| 10208 | |||||
| 10209 | # anything left to write? | ||||
| 10210 | if ( $imin <= $imax ) { | ||||
| 10211 | |||||
| 10212 | # add a blank line before certain key types but not after a comment | ||||
| 10213 | if ( $last_line_leading_type !~ /^[#]/ ) { | ||||
| 10214 | my $want_blank = 0; | ||||
| 10215 | my $leading_token = $tokens_to_go[$imin]; | ||||
| 10216 | my $leading_type = $types_to_go[$imin]; | ||||
| 10217 | |||||
| 10218 | # blank lines before subs except declarations and one-liners | ||||
| 10219 | # MCONVERSION LOCATION - for sub tokenization change | ||||
| 10220 | if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { | ||||
| 10221 | $want_blank = $rOpts->{'blank-lines-before-subs'} | ||||
| 10222 | if ( | ||||
| 10223 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | ||||
| 10224 | $imax ) !~ /^[\;\}]$/ | ||||
| 10225 | ); | ||||
| 10226 | } | ||||
| 10227 | |||||
| 10228 | # break before all package declarations | ||||
| 10229 | # MCONVERSION LOCATION - for tokenizaton change | ||||
| 10230 | elsif ($leading_token =~ /^(package\s)/ | ||||
| 10231 | && $leading_type eq 'i' ) | ||||
| 10232 | { | ||||
| 10233 | $want_blank = $rOpts->{'blank-lines-before-packages'}; | ||||
| 10234 | } | ||||
| 10235 | |||||
| 10236 | # break before certain key blocks except one-liners | ||||
| 10237 | if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { | ||||
| 10238 | $want_blank = $rOpts->{'blank-lines-before-subs'} | ||||
| 10239 | if ( | ||||
| 10240 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | ||||
| 10241 | $imax ) ne '}' | ||||
| 10242 | ); | ||||
| 10243 | } | ||||
| 10244 | |||||
| 10245 | # Break before certain block types if we haven't had a | ||||
| 10246 | # break at this level for a while. This is the | ||||
| 10247 | # difficult decision.. | ||||
| 10248 | elsif ($leading_type eq 'k' | ||||
| 10249 | && $last_line_leading_type ne 'b' | ||||
| 10250 | && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) | ||||
| 10251 | { | ||||
| 10252 | my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; | ||||
| 10253 | if ( !defined($lc) ) { $lc = 0 } | ||||
| 10254 | |||||
| 10255 | $want_blank = | ||||
| 10256 | $rOpts->{'blanks-before-blocks'} | ||||
| 10257 | && $lc >= $rOpts->{'long-block-line-count'} | ||||
| 10258 | && $file_writer_object->get_consecutive_nonblank_lines() >= | ||||
| 10259 | $rOpts->{'long-block-line-count'} | ||||
| 10260 | && ( | ||||
| 10261 | terminal_type( \@types_to_go, \@block_type_to_go, $imin, | ||||
| 10262 | $imax ) ne '}' | ||||
| 10263 | ); | ||||
| 10264 | } | ||||
| 10265 | |||||
| 10266 | if ($want_blank) { | ||||
| 10267 | |||||
| 10268 | # future: send blank line down normal path to VerticalAligner | ||||
| 10269 | Perl::Tidy::VerticalAligner::flush(); | ||||
| 10270 | $file_writer_object->require_blank_code_lines($want_blank); | ||||
| 10271 | } | ||||
| 10272 | } | ||||
| 10273 | |||||
| 10274 | # update blank line variables and count number of consecutive | ||||
| 10275 | # non-blank, non-comment lines at this level | ||||
| 10276 | $last_last_line_leading_level = $last_line_leading_level; | ||||
| 10277 | $last_line_leading_level = $levels_to_go[$imin]; | ||||
| 10278 | if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } | ||||
| 10279 | $last_line_leading_type = $types_to_go[$imin]; | ||||
| 10280 | if ( $last_line_leading_level == $last_last_line_leading_level | ||||
| 10281 | && $last_line_leading_type ne 'b' | ||||
| 10282 | && $last_line_leading_type ne '#' | ||||
| 10283 | && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) | ||||
| 10284 | { | ||||
| 10285 | $nonblank_lines_at_depth[$last_line_leading_level]++; | ||||
| 10286 | } | ||||
| 10287 | else { | ||||
| 10288 | $nonblank_lines_at_depth[$last_line_leading_level] = 1; | ||||
| 10289 | } | ||||
| 10290 | |||||
| 10291 | FORMATTER_DEBUG_FLAG_FLUSH && do { | ||||
| 10292 | my ( $package, $file, $line ) = caller; | ||||
| 10293 | print STDOUT | ||||
| 10294 | "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; | ||||
| 10295 | }; | ||||
| 10296 | |||||
| 10297 | # add a couple of extra terminal blank tokens | ||||
| 10298 | pad_array_to_go(); | ||||
| 10299 | |||||
| 10300 | # set all forced breakpoints for good list formatting | ||||
| 10301 | my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; | ||||
| 10302 | |||||
| 10303 | if ( | ||||
| 10304 | $is_long_line | ||||
| 10305 | || $old_line_count_in_batch > 1 | ||||
| 10306 | |||||
| 10307 | # must always call scan_list() with unbalanced batches because it | ||||
| 10308 | # is maintaining some stacks | ||||
| 10309 | || is_unbalanced_batch() | ||||
| 10310 | |||||
| 10311 | # call scan_list if we might want to break at commas | ||||
| 10312 | || ( | ||||
| 10313 | $comma_count_in_batch | ||||
| 10314 | && ( $rOpts_maximum_fields_per_table > 0 | ||||
| 10315 | || $rOpts_comma_arrow_breakpoints == 0 ) | ||||
| 10316 | ) | ||||
| 10317 | |||||
| 10318 | # call scan_list if user may want to break open some one-line | ||||
| 10319 | # hash references | ||||
| 10320 | || ( $comma_arrow_count_contained | ||||
| 10321 | && $rOpts_comma_arrow_breakpoints != 3 ) | ||||
| 10322 | ) | ||||
| 10323 | { | ||||
| 10324 | ## This caused problems in one version of perl for unknown reasons: | ||||
| 10325 | ## $saw_good_break ||= scan_list(); | ||||
| 10326 | my $sgb = scan_list(); | ||||
| 10327 | $saw_good_break ||= $sgb; | ||||
| 10328 | } | ||||
| 10329 | |||||
| 10330 | # let $ri_first and $ri_last be references to lists of | ||||
| 10331 | # first and last tokens of line fragments to output.. | ||||
| 10332 | my ( $ri_first, $ri_last ); | ||||
| 10333 | |||||
| 10334 | # write a single line if.. | ||||
| 10335 | if ( | ||||
| 10336 | |||||
| 10337 | # we aren't allowed to add any newlines | ||||
| 10338 | !$rOpts_add_newlines | ||||
| 10339 | |||||
| 10340 | # or, we don't already have an interior breakpoint | ||||
| 10341 | # and we didn't see a good breakpoint | ||||
| 10342 | || ( | ||||
| 10343 | !$forced_breakpoint_count | ||||
| 10344 | && !$saw_good_break | ||||
| 10345 | |||||
| 10346 | # and this line is 'short' | ||||
| 10347 | && !$is_long_line | ||||
| 10348 | ) | ||||
| 10349 | ) | ||||
| 10350 | { | ||||
| 10351 | @$ri_first = ($imin); | ||||
| 10352 | @$ri_last = ($imax); | ||||
| 10353 | } | ||||
| 10354 | |||||
| 10355 | # otherwise use multiple lines | ||||
| 10356 | else { | ||||
| 10357 | |||||
| 10358 | ( $ri_first, $ri_last, my $colon_count ) = | ||||
| 10359 | set_continuation_breaks($saw_good_break); | ||||
| 10360 | |||||
| 10361 | break_all_chain_tokens( $ri_first, $ri_last ); | ||||
| 10362 | |||||
| 10363 | break_equals( $ri_first, $ri_last ); | ||||
| 10364 | |||||
| 10365 | # now we do a correction step to clean this up a bit | ||||
| 10366 | # (The only time we would not do this is for debugging) | ||||
| 10367 | if ( $rOpts->{'recombine'} ) { | ||||
| 10368 | ( $ri_first, $ri_last ) = | ||||
| 10369 | recombine_breakpoints( $ri_first, $ri_last ); | ||||
| 10370 | } | ||||
| 10371 | |||||
| 10372 | insert_final_breaks( $ri_first, $ri_last ) if $colon_count; | ||||
| 10373 | } | ||||
| 10374 | |||||
| 10375 | # do corrector step if -lp option is used | ||||
| 10376 | my $do_not_pad = 0; | ||||
| 10377 | if ($rOpts_line_up_parentheses) { | ||||
| 10378 | $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); | ||||
| 10379 | } | ||||
| 10380 | send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); | ||||
| 10381 | } | ||||
| 10382 | prepare_for_new_input_lines(); | ||||
| 10383 | |||||
| 10384 | # output any new -cscw block comment | ||||
| 10385 | if ($cscw_block_comment) { | ||||
| 10386 | flush(); | ||||
| 10387 | $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); | ||||
| 10388 | } | ||||
| 10389 | } | ||||
| 10390 | |||||
| 10391 | sub note_added_semicolon { | ||||
| 10392 | $last_added_semicolon_at = $input_line_number; | ||||
| 10393 | if ( $added_semicolon_count == 0 ) { | ||||
| 10394 | $first_added_semicolon_at = $last_added_semicolon_at; | ||||
| 10395 | } | ||||
| 10396 | $added_semicolon_count++; | ||||
| 10397 | write_logfile_entry("Added ';' here\n"); | ||||
| 10398 | } | ||||
| 10399 | |||||
| 10400 | sub note_deleted_semicolon { | ||||
| 10401 | $last_deleted_semicolon_at = $input_line_number; | ||||
| 10402 | if ( $deleted_semicolon_count == 0 ) { | ||||
| 10403 | $first_deleted_semicolon_at = $last_deleted_semicolon_at; | ||||
| 10404 | } | ||||
| 10405 | $deleted_semicolon_count++; | ||||
| 10406 | write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) | ||||
| 10407 | } | ||||
| 10408 | |||||
| 10409 | sub note_embedded_tab { | ||||
| 10410 | $embedded_tab_count++; | ||||
| 10411 | $last_embedded_tab_at = $input_line_number; | ||||
| 10412 | if ( !$first_embedded_tab_at ) { | ||||
| 10413 | $first_embedded_tab_at = $last_embedded_tab_at; | ||||
| 10414 | } | ||||
| 10415 | |||||
| 10416 | if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { | ||||
| 10417 | write_logfile_entry("Embedded tabs in quote or pattern\n"); | ||||
| 10418 | } | ||||
| 10419 | } | ||||
| 10420 | |||||
| 10421 | sub starting_one_line_block { | ||||
| 10422 | |||||
| 10423 | # after seeing an opening curly brace, look for the closing brace | ||||
| 10424 | # and see if the entire block will fit on a line. This routine is | ||||
| 10425 | # not always right because it uses the old whitespace, so a check | ||||
| 10426 | # is made later (at the closing brace) to make sure we really | ||||
| 10427 | # have a one-line block. We have to do this preliminary check, | ||||
| 10428 | # though, because otherwise we would always break at a semicolon | ||||
| 10429 | # within a one-line block if the block contains multiple statements. | ||||
| 10430 | |||||
| 10431 | my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, | ||||
| 10432 | $rblock_type ) | ||||
| 10433 | = @_; | ||||
| 10434 | |||||
| 10435 | # kill any current block - we can only go 1 deep | ||||
| 10436 | destroy_one_line_block(); | ||||
| 10437 | |||||
| 10438 | # return value: | ||||
| 10439 | # 1=distance from start of block to opening brace exceeds line length | ||||
| 10440 | # 0=otherwise | ||||
| 10441 | |||||
| 10442 | my $i_start = 0; | ||||
| 10443 | |||||
| 10444 | # shouldn't happen: there must have been a prior call to | ||||
| 10445 | # store_token_to_go to put the opening brace in the output stream | ||||
| 10446 | if ( $max_index_to_go < 0 ) { | ||||
| 10447 | warning("program bug: store_token_to_go called incorrectly\n"); | ||||
| 10448 | report_definite_bug(); | ||||
| 10449 | } | ||||
| 10450 | else { | ||||
| 10451 | |||||
| 10452 | # cannot use one-line blocks with cuddled else/elsif lines | ||||
| 10453 | if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { | ||||
| 10454 | return 0; | ||||
| 10455 | } | ||||
| 10456 | } | ||||
| 10457 | |||||
| 10458 | my $block_type = $$rblock_type[$j]; | ||||
| 10459 | |||||
| 10460 | # find the starting keyword for this block (such as 'if', 'else', ...) | ||||
| 10461 | |||||
| 10462 | if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { | ||||
| 10463 | $i_start = $max_index_to_go; | ||||
| 10464 | } | ||||
| 10465 | |||||
| 10466 | elsif ( $last_last_nonblank_token_to_go eq ')' ) { | ||||
| 10467 | |||||
| 10468 | # For something like "if (xxx) {", the keyword "if" will be | ||||
| 10469 | # just after the most recent break. This will be 0 unless | ||||
| 10470 | # we have just killed a one-line block and are starting another. | ||||
| 10471 | # (doif.t) | ||||
| 10472 | # Note: cannot use inext_index_to_go[] here because that array | ||||
| 10473 | # is still being constructed. | ||||
| 10474 | $i_start = $index_max_forced_break + 1; | ||||
| 10475 | if ( $types_to_go[$i_start] eq 'b' ) { | ||||
| 10476 | $i_start++; | ||||
| 10477 | } | ||||
| 10478 | |||||
| 10479 | unless ( $tokens_to_go[$i_start] eq $block_type ) { | ||||
| 10480 | return 0; | ||||
| 10481 | } | ||||
| 10482 | } | ||||
| 10483 | |||||
| 10484 | # the previous nonblank token should start these block types | ||||
| 10485 | elsif (( $last_last_nonblank_token_to_go eq $block_type ) | ||||
| 10486 | || ( $block_type =~ /^sub/ ) ) | ||||
| 10487 | { | ||||
| 10488 | $i_start = $last_last_nonblank_index_to_go; | ||||
| 10489 | } | ||||
| 10490 | |||||
| 10491 | # patch for SWITCH/CASE to retain one-line case/when blocks | ||||
| 10492 | elsif ( $block_type eq 'case' || $block_type eq 'when' ) { | ||||
| 10493 | |||||
| 10494 | # Note: cannot use inext_index_to_go[] here because that array | ||||
| 10495 | # is still being constructed. | ||||
| 10496 | $i_start = $index_max_forced_break + 1; | ||||
| 10497 | if ( $types_to_go[$i_start] eq 'b' ) { | ||||
| 10498 | $i_start++; | ||||
| 10499 | } | ||||
| 10500 | unless ( $tokens_to_go[$i_start] eq $block_type ) { | ||||
| 10501 | return 0; | ||||
| 10502 | } | ||||
| 10503 | } | ||||
| 10504 | |||||
| 10505 | else { | ||||
| 10506 | return 1; | ||||
| 10507 | } | ||||
| 10508 | |||||
| 10509 | my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; | ||||
| 10510 | |||||
| 10511 | my $i; | ||||
| 10512 | |||||
| 10513 | # see if length is too long to even start | ||||
| 10514 | if ( $pos > maximum_line_length($i_start) ) { | ||||
| 10515 | return 1; | ||||
| 10516 | } | ||||
| 10517 | |||||
| 10518 | for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { | ||||
| 10519 | |||||
| 10520 | # old whitespace could be arbitrarily large, so don't use it | ||||
| 10521 | if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } | ||||
| 10522 | else { $pos += rtoken_length($i) } | ||||
| 10523 | |||||
| 10524 | # Return false result if we exceed the maximum line length, | ||||
| 10525 | if ( $pos > maximum_line_length($i_start) ) { | ||||
| 10526 | return 0; | ||||
| 10527 | } | ||||
| 10528 | |||||
| 10529 | # or encounter another opening brace before finding the closing brace. | ||||
| 10530 | elsif ($$rtokens[$i] eq '{' | ||||
| 10531 | && $$rtoken_type[$i] eq '{' | ||||
| 10532 | && $$rblock_type[$i] ) | ||||
| 10533 | { | ||||
| 10534 | return 0; | ||||
| 10535 | } | ||||
| 10536 | |||||
| 10537 | # if we find our closing brace.. | ||||
| 10538 | elsif ($$rtokens[$i] eq '}' | ||||
| 10539 | && $$rtoken_type[$i] eq '}' | ||||
| 10540 | && $$rblock_type[$i] ) | ||||
| 10541 | { | ||||
| 10542 | |||||
| 10543 | # be sure any trailing comment also fits on the line | ||||
| 10544 | my $i_nonblank = | ||||
| 10545 | ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; | ||||
| 10546 | |||||
| 10547 | # Patch for one-line sort/map/grep/eval blocks with side comments: | ||||
| 10548 | # We will ignore the side comment length for sort/map/grep/eval | ||||
| 10549 | # because this can lead to statements which change every time | ||||
| 10550 | # perltidy is run. Here is an example from Denis Moskowitz which | ||||
| 10551 | # oscillates between these two states without this patch: | ||||
| 10552 | |||||
| 10553 | ## -------- | ||||
| 10554 | ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf | ||||
| 10555 | ## @baz; | ||||
| 10556 | ## | ||||
| 10557 | ## grep { | ||||
| 10558 | ## $_->foo ne 'bar' | ||||
| 10559 | ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf | ||||
| 10560 | ## @baz; | ||||
| 10561 | ## -------- | ||||
| 10562 | |||||
| 10563 | # When the first line is input it gets broken apart by the main | ||||
| 10564 | # line break logic in sub print_line_of_tokens. | ||||
| 10565 | # When the second line is input it gets recombined by | ||||
| 10566 | # print_line_of_tokens and passed to the output routines. The | ||||
| 10567 | # output routines (set_continuation_breaks) do not break it apart | ||||
| 10568 | # because the bond strengths are set to the highest possible value | ||||
| 10569 | # for grep/map/eval/sort blocks, so the first version gets output. | ||||
| 10570 | # It would be possible to fix this by changing bond strengths, | ||||
| 10571 | # but they are high to prevent errors in older versions of perl. | ||||
| 10572 | |||||
| 10573 | if ( $$rtoken_type[$i_nonblank] eq '#' | ||||
| 10574 | && !$is_sort_map_grep{$block_type} ) | ||||
| 10575 | { | ||||
| 10576 | |||||
| 10577 | $pos += rtoken_length($i_nonblank); | ||||
| 10578 | |||||
| 10579 | if ( $i_nonblank > $i + 1 ) { | ||||
| 10580 | |||||
| 10581 | # source whitespace could be anything, assume | ||||
| 10582 | # at least one space before the hash on output | ||||
| 10583 | if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 } | ||||
| 10584 | else { $pos += rtoken_length( $i + 1 ) } | ||||
| 10585 | } | ||||
| 10586 | |||||
| 10587 | if ( $pos >= maximum_line_length($i_start) ) { | ||||
| 10588 | return 0; | ||||
| 10589 | } | ||||
| 10590 | } | ||||
| 10591 | |||||
| 10592 | # ok, it's a one-line block | ||||
| 10593 | create_one_line_block( $i_start, 20 ); | ||||
| 10594 | return 0; | ||||
| 10595 | } | ||||
| 10596 | |||||
| 10597 | # just keep going for other characters | ||||
| 10598 | else { | ||||
| 10599 | } | ||||
| 10600 | } | ||||
| 10601 | |||||
| 10602 | # Allow certain types of new one-line blocks to form by joining | ||||
| 10603 | # input lines. These can be safely done, but for other block types, | ||||
| 10604 | # we keep old one-line blocks but do not form new ones. It is not | ||||
| 10605 | # always a good idea to make as many one-line blocks as possible, | ||||
| 10606 | # so other types are not done. The user can always use -mangle. | ||||
| 10607 | if ( $is_sort_map_grep_eval{$block_type} ) { | ||||
| 10608 | create_one_line_block( $i_start, 1 ); | ||||
| 10609 | } | ||||
| 10610 | |||||
| 10611 | return 0; | ||||
| 10612 | } | ||||
| 10613 | |||||
| 10614 | sub unstore_token_to_go { | ||||
| 10615 | |||||
| 10616 | # remove most recent token from output stream | ||||
| 10617 | if ( $max_index_to_go > 0 ) { | ||||
| 10618 | $max_index_to_go--; | ||||
| 10619 | } | ||||
| 10620 | else { | ||||
| 10621 | $max_index_to_go = UNDEFINED_INDEX; | ||||
| 10622 | } | ||||
| 10623 | |||||
| 10624 | } | ||||
| 10625 | |||||
| 10626 | sub want_blank_line { | ||||
| 10627 | flush(); | ||||
| 10628 | $file_writer_object->want_blank_line(); | ||||
| 10629 | } | ||||
| 10630 | |||||
| 10631 | sub write_unindented_line { | ||||
| 10632 | flush(); | ||||
| 10633 | $file_writer_object->write_line( $_[0] ); | ||||
| 10634 | } | ||||
| 10635 | |||||
| 10636 | sub undo_ci { | ||||
| 10637 | |||||
| 10638 | # Undo continuation indentation in certain sequences | ||||
| 10639 | # For example, we can undo continuation indentation in sort/map/grep chains | ||||
| 10640 | # my $dat1 = pack( "n*", | ||||
| 10641 | # map { $_, $lookup->{$_} } | ||||
| 10642 | # sort { $a <=> $b } | ||||
| 10643 | # grep { $lookup->{$_} ne $default } keys %$lookup ); | ||||
| 10644 | # To align the map/sort/grep keywords like this: | ||||
| 10645 | # my $dat1 = pack( "n*", | ||||
| 10646 | # map { $_, $lookup->{$_} } | ||||
| 10647 | # sort { $a <=> $b } | ||||
| 10648 | # grep { $lookup->{$_} ne $default } keys %$lookup ); | ||||
| 10649 | my ( $ri_first, $ri_last ) = @_; | ||||
| 10650 | my ( $line_1, $line_2, $lev_last ); | ||||
| 10651 | my $this_line_is_semicolon_terminated; | ||||
| 10652 | my $max_line = @$ri_first - 1; | ||||
| 10653 | |||||
| 10654 | # looking at each line of this batch.. | ||||
| 10655 | # We are looking at leading tokens and looking for a sequence | ||||
| 10656 | # all at the same level and higher level than enclosing lines. | ||||
| 10657 | foreach my $line ( 0 .. $max_line ) { | ||||
| 10658 | |||||
| 10659 | my $ibeg = $$ri_first[$line]; | ||||
| 10660 | my $lev = $levels_to_go[$ibeg]; | ||||
| 10661 | if ( $line > 0 ) { | ||||
| 10662 | |||||
| 10663 | # if we have started a chain.. | ||||
| 10664 | if ($line_1) { | ||||
| 10665 | |||||
| 10666 | # see if it continues.. | ||||
| 10667 | if ( $lev == $lev_last ) { | ||||
| 10668 | if ( $types_to_go[$ibeg] eq 'k' | ||||
| 10669 | && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) | ||||
| 10670 | { | ||||
| 10671 | |||||
| 10672 | # chain continues... | ||||
| 10673 | # check for chain ending at end of a statement | ||||
| 10674 | if ( $line == $max_line ) { | ||||
| 10675 | |||||
| 10676 | # see of this line ends a statement | ||||
| 10677 | my $iend = $$ri_last[$line]; | ||||
| 10678 | $this_line_is_semicolon_terminated = | ||||
| 10679 | $types_to_go[$iend] eq ';' | ||||
| 10680 | |||||
| 10681 | # with possible side comment | ||||
| 10682 | || ( $types_to_go[$iend] eq '#' | ||||
| 10683 | && $iend - $ibeg >= 2 | ||||
| 10684 | && $types_to_go[ $iend - 2 ] eq ';' | ||||
| 10685 | && $types_to_go[ $iend - 1 ] eq 'b' ); | ||||
| 10686 | } | ||||
| 10687 | $line_2 = $line if ($this_line_is_semicolon_terminated); | ||||
| 10688 | } | ||||
| 10689 | else { | ||||
| 10690 | |||||
| 10691 | # kill chain | ||||
| 10692 | $line_1 = undef; | ||||
| 10693 | } | ||||
| 10694 | } | ||||
| 10695 | elsif ( $lev < $lev_last ) { | ||||
| 10696 | |||||
| 10697 | # chain ends with previous line | ||||
| 10698 | $line_2 = $line - 1; | ||||
| 10699 | } | ||||
| 10700 | elsif ( $lev > $lev_last ) { | ||||
| 10701 | |||||
| 10702 | # kill chain | ||||
| 10703 | $line_1 = undef; | ||||
| 10704 | } | ||||
| 10705 | |||||
| 10706 | # undo the continuation indentation if a chain ends | ||||
| 10707 | if ( defined($line_2) && defined($line_1) ) { | ||||
| 10708 | my $continuation_line_count = $line_2 - $line_1 + 1; | ||||
| 10709 | @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = | ||||
| 10710 | (0) x ($continuation_line_count); | ||||
| 10711 | @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = | ||||
| 10712 | @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ]; | ||||
| 10713 | $line_1 = undef; | ||||
| 10714 | } | ||||
| 10715 | } | ||||
| 10716 | |||||
| 10717 | # not in a chain yet.. | ||||
| 10718 | else { | ||||
| 10719 | |||||
| 10720 | # look for start of a new sort/map/grep chain | ||||
| 10721 | if ( $lev > $lev_last ) { | ||||
| 10722 | if ( $types_to_go[$ibeg] eq 'k' | ||||
| 10723 | && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) | ||||
| 10724 | { | ||||
| 10725 | $line_1 = $line; | ||||
| 10726 | } | ||||
| 10727 | } | ||||
| 10728 | } | ||||
| 10729 | } | ||||
| 10730 | $lev_last = $lev; | ||||
| 10731 | } | ||||
| 10732 | } | ||||
| 10733 | |||||
| 10734 | sub undo_lp_ci { | ||||
| 10735 | |||||
| 10736 | # If there is a single, long parameter within parens, like this: | ||||
| 10737 | # | ||||
| 10738 | # $self->command( "/msg " | ||||
| 10739 | # . $infoline->chan | ||||
| 10740 | # . " You said $1, but did you know that it's square was " | ||||
| 10741 | # . $1 * $1 . " ?" ); | ||||
| 10742 | # | ||||
| 10743 | # we can remove the continuation indentation of the 2nd and higher lines | ||||
| 10744 | # to achieve this effect, which is more pleasing: | ||||
| 10745 | # | ||||
| 10746 | # $self->command("/msg " | ||||
| 10747 | # . $infoline->chan | ||||
| 10748 | # . " You said $1, but did you know that it's square was " | ||||
| 10749 | # . $1 * $1 . " ?"); | ||||
| 10750 | |||||
| 10751 | my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; | ||||
| 10752 | my $max_line = @$ri_first - 1; | ||||
| 10753 | |||||
| 10754 | # must be multiple lines | ||||
| 10755 | return unless $max_line > $line_open; | ||||
| 10756 | |||||
| 10757 | my $lev_start = $levels_to_go[$i_start]; | ||||
| 10758 | my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; | ||||
| 10759 | |||||
| 10760 | # see if all additional lines in this container have continuation | ||||
| 10761 | # indentation | ||||
| 10762 | my $n; | ||||
| 10763 | my $line_1 = 1 + $line_open; | ||||
| 10764 | for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { | ||||
| 10765 | my $ibeg = $$ri_first[$n]; | ||||
| 10766 | my $iend = $$ri_last[$n]; | ||||
| 10767 | if ( $ibeg eq $closing_index ) { $n--; last } | ||||
| 10768 | return if ( $lev_start != $levels_to_go[$ibeg] ); | ||||
| 10769 | return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); | ||||
| 10770 | last if ( $closing_index <= $iend ); | ||||
| 10771 | } | ||||
| 10772 | |||||
| 10773 | # we can reduce the indentation of all continuation lines | ||||
| 10774 | my $continuation_line_count = $n - $line_open; | ||||
| 10775 | @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = | ||||
| 10776 | (0) x ($continuation_line_count); | ||||
| 10777 | @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = | ||||
| 10778 | @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; | ||||
| 10779 | } | ||||
| 10780 | |||||
| 10781 | sub pad_token { | ||||
| 10782 | |||||
| 10783 | # insert $pad_spaces before token number $ipad | ||||
| 10784 | my ( $ipad, $pad_spaces ) = @_; | ||||
| 10785 | if ( $pad_spaces > 0 ) { | ||||
| 10786 | $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; | ||||
| 10787 | } | ||||
| 10788 | elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { | ||||
| 10789 | $tokens_to_go[$ipad] = ""; | ||||
| 10790 | } | ||||
| 10791 | else { | ||||
| 10792 | |||||
| 10793 | # shouldn't happen | ||||
| 10794 | return; | ||||
| 10795 | } | ||||
| 10796 | |||||
| 10797 | $token_lengths_to_go[$ipad] += $pad_spaces; | ||||
| 10798 | for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) { | ||||
| 10799 | $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; | ||||
| 10800 | } | ||||
| 10801 | } | ||||
| 10802 | |||||
| 10803 | { | ||||
| 10804 | 2 | 700ns | my %is_math_op; | ||
| 10805 | |||||
| 10806 | # spent 8µs within Perl::Tidy::Formatter::BEGIN@10806 which was called:
# once (8µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 10810 | ||||
| 10807 | |||||
| 10808 | 1 | 1µs | @_ = qw( + - * / ); | ||
| 10809 | 1 | 8µs | @is_math_op{@_} = (1) x scalar(@_); | ||
| 10810 | 1 | 1.57ms | 1 | 8µs | } # spent 8µs making 1 call to Perl::Tidy::Formatter::BEGIN@10806 |
| 10811 | |||||
| 10812 | sub set_logical_padding { | ||||
| 10813 | |||||
| 10814 | # Look at a batch of lines and see if extra padding can improve the | ||||
| 10815 | # alignment when there are certain leading operators. Here is an | ||||
| 10816 | # example, in which some extra space is introduced before | ||||
| 10817 | # '( $year' to make it line up with the subsequent lines: | ||||
| 10818 | # | ||||
| 10819 | # if ( ( $Year < 1601 ) | ||||
| 10820 | # || ( $Year > 2899 ) | ||||
| 10821 | # || ( $EndYear < 1601 ) | ||||
| 10822 | # || ( $EndYear > 2899 ) ) | ||||
| 10823 | # { | ||||
| 10824 | # &Error_OutOfRange; | ||||
| 10825 | # } | ||||
| 10826 | # | ||||
| 10827 | my ( $ri_first, $ri_last ) = @_; | ||||
| 10828 | my $max_line = @$ri_first - 1; | ||||
| 10829 | |||||
| 10830 | my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, | ||||
| 10831 | $pad_spaces, | ||||
| 10832 | $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); | ||||
| 10833 | |||||
| 10834 | # looking at each line of this batch.. | ||||
| 10835 | foreach $line ( 0 .. $max_line - 1 ) { | ||||
| 10836 | |||||
| 10837 | # see if the next line begins with a logical operator | ||||
| 10838 | $ibeg = $$ri_first[$line]; | ||||
| 10839 | $iend = $$ri_last[$line]; | ||||
| 10840 | $ibeg_next = $$ri_first[ $line + 1 ]; | ||||
| 10841 | $tok_next = $tokens_to_go[$ibeg_next]; | ||||
| 10842 | $type_next = $types_to_go[$ibeg_next]; | ||||
| 10843 | |||||
| 10844 | $has_leading_op_next = ( $tok_next =~ /^\w/ ) | ||||
| 10845 | ? $is_chain_operator{$tok_next} # + - * / : ? && || | ||||
| 10846 | : $is_chain_operator{$type_next}; # and, or | ||||
| 10847 | |||||
| 10848 | next unless ($has_leading_op_next); | ||||
| 10849 | |||||
| 10850 | # next line must not be at lesser depth | ||||
| 10851 | next | ||||
| 10852 | if ( $nesting_depth_to_go[$ibeg] > | ||||
| 10853 | $nesting_depth_to_go[$ibeg_next] ); | ||||
| 10854 | |||||
| 10855 | # identify the token in this line to be padded on the left | ||||
| 10856 | $ipad = undef; | ||||
| 10857 | |||||
| 10858 | # handle lines at same depth... | ||||
| 10859 | if ( $nesting_depth_to_go[$ibeg] == | ||||
| 10860 | $nesting_depth_to_go[$ibeg_next] ) | ||||
| 10861 | { | ||||
| 10862 | |||||
| 10863 | # if this is not first line of the batch ... | ||||
| 10864 | if ( $line > 0 ) { | ||||
| 10865 | |||||
| 10866 | # and we have leading operator.. | ||||
| 10867 | next if $has_leading_op; | ||||
| 10868 | |||||
| 10869 | # Introduce padding if.. | ||||
| 10870 | # 1. the previous line is at lesser depth, or | ||||
| 10871 | # 2. the previous line ends in an assignment | ||||
| 10872 | # 3. the previous line ends in a 'return' | ||||
| 10873 | # 4. the previous line ends in a comma | ||||
| 10874 | # Example 1: previous line at lesser depth | ||||
| 10875 | # if ( ( $Year < 1601 ) # <- we are here but | ||||
| 10876 | # || ( $Year > 2899 ) # list has not yet | ||||
| 10877 | # || ( $EndYear < 1601 ) # collapsed vertically | ||||
| 10878 | # || ( $EndYear > 2899 ) ) | ||||
| 10879 | # { | ||||
| 10880 | # | ||||
| 10881 | # Example 2: previous line ending in assignment: | ||||
| 10882 | # $leapyear = | ||||
| 10883 | # $year % 4 ? 0 # <- We are here | ||||
| 10884 | # : $year % 100 ? 1 | ||||
| 10885 | # : $year % 400 ? 0 | ||||
| 10886 | # : 1; | ||||
| 10887 | # | ||||
| 10888 | # Example 3: previous line ending in comma: | ||||
| 10889 | # push @expr, | ||||
| 10890 | # /test/ ? undef | ||||
| 10891 | # : eval($_) ? 1 | ||||
| 10892 | # : eval($_) ? 1 | ||||
| 10893 | # : 0; | ||||
| 10894 | |||||
| 10895 | # be sure levels agree (do not indent after an indented 'if') | ||||
| 10896 | next | ||||
| 10897 | if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); | ||||
| 10898 | |||||
| 10899 | # allow padding on first line after a comma but only if: | ||||
| 10900 | # (1) this is line 2 and | ||||
| 10901 | # (2) there are at more than three lines and | ||||
| 10902 | # (3) lines 3 and 4 have the same leading operator | ||||
| 10903 | # These rules try to prevent padding within a long | ||||
| 10904 | # comma-separated list. | ||||
| 10905 | my $ok_comma; | ||||
| 10906 | if ( $types_to_go[$iendm] eq ',' | ||||
| 10907 | && $line == 1 | ||||
| 10908 | && $max_line > 2 ) | ||||
| 10909 | { | ||||
| 10910 | my $ibeg_next_next = $$ri_first[ $line + 2 ]; | ||||
| 10911 | my $tok_next_next = $tokens_to_go[$ibeg_next_next]; | ||||
| 10912 | $ok_comma = $tok_next_next eq $tok_next; | ||||
| 10913 | } | ||||
| 10914 | |||||
| 10915 | next | ||||
| 10916 | unless ( | ||||
| 10917 | $is_assignment{ $types_to_go[$iendm] } | ||||
| 10918 | || $ok_comma | ||||
| 10919 | || ( $nesting_depth_to_go[$ibegm] < | ||||
| 10920 | $nesting_depth_to_go[$ibeg] ) | ||||
| 10921 | || ( $types_to_go[$iendm] eq 'k' | ||||
| 10922 | && $tokens_to_go[$iendm] eq 'return' ) | ||||
| 10923 | ); | ||||
| 10924 | |||||
| 10925 | # we will add padding before the first token | ||||
| 10926 | $ipad = $ibeg; | ||||
| 10927 | } | ||||
| 10928 | |||||
| 10929 | # for first line of the batch.. | ||||
| 10930 | else { | ||||
| 10931 | |||||
| 10932 | # WARNING: Never indent if first line is starting in a | ||||
| 10933 | # continued quote, which would change the quote. | ||||
| 10934 | next if $starting_in_quote; | ||||
| 10935 | |||||
| 10936 | # if this is text after closing '}' | ||||
| 10937 | # then look for an interior token to pad | ||||
| 10938 | if ( $types_to_go[$ibeg] eq '}' ) { | ||||
| 10939 | |||||
| 10940 | } | ||||
| 10941 | |||||
| 10942 | # otherwise, we might pad if it looks really good | ||||
| 10943 | else { | ||||
| 10944 | |||||
| 10945 | # we might pad token $ibeg, so be sure that it | ||||
| 10946 | # is at the same depth as the next line. | ||||
| 10947 | next | ||||
| 10948 | if ( $nesting_depth_to_go[$ibeg] != | ||||
| 10949 | $nesting_depth_to_go[$ibeg_next] ); | ||||
| 10950 | |||||
| 10951 | # We can pad on line 1 of a statement if at least 3 | ||||
| 10952 | # lines will be aligned. Otherwise, it | ||||
| 10953 | # can look very confusing. | ||||
| 10954 | |||||
| 10955 | # We have to be careful not to pad if there are too few | ||||
| 10956 | # lines. The current rule is: | ||||
| 10957 | # (1) in general we require at least 3 consecutive lines | ||||
| 10958 | # with the same leading chain operator token, | ||||
| 10959 | # (2) but an exception is that we only require two lines | ||||
| 10960 | # with leading colons if there are no more lines. For example, | ||||
| 10961 | # the first $i in the following snippet would get padding | ||||
| 10962 | # by the second rule: | ||||
| 10963 | # | ||||
| 10964 | # $i == 1 ? ( "First", "Color" ) | ||||
| 10965 | # : $i == 2 ? ( "Then", "Rarity" ) | ||||
| 10966 | # : ( "Then", "Name" ); | ||||
| 10967 | |||||
| 10968 | if ( $max_line > 1 ) { | ||||
| 10969 | my $leading_token = $tokens_to_go[$ibeg_next]; | ||||
| 10970 | my $tokens_differ; | ||||
| 10971 | |||||
| 10972 | # never indent line 1 of a '.' series because | ||||
| 10973 | # previous line is most likely at same level. | ||||
| 10974 | # TODO: we should also look at the leasing_spaces | ||||
| 10975 | # of the last output line and skip if it is same | ||||
| 10976 | # as this line. | ||||
| 10977 | next if ( $leading_token eq '.' ); | ||||
| 10978 | |||||
| 10979 | my $count = 1; | ||||
| 10980 | foreach my $l ( 2 .. 3 ) { | ||||
| 10981 | last if ( $line + $l > $max_line ); | ||||
| 10982 | my $ibeg_next_next = $$ri_first[ $line + $l ]; | ||||
| 10983 | if ( $tokens_to_go[$ibeg_next_next] ne | ||||
| 10984 | $leading_token ) | ||||
| 10985 | { | ||||
| 10986 | $tokens_differ = 1; | ||||
| 10987 | last; | ||||
| 10988 | } | ||||
| 10989 | $count++; | ||||
| 10990 | } | ||||
| 10991 | next if ($tokens_differ); | ||||
| 10992 | next if ( $count < 3 && $leading_token ne ':' ); | ||||
| 10993 | $ipad = $ibeg; | ||||
| 10994 | } | ||||
| 10995 | else { | ||||
| 10996 | next; | ||||
| 10997 | } | ||||
| 10998 | } | ||||
| 10999 | } | ||||
| 11000 | } | ||||
| 11001 | |||||
| 11002 | # find interior token to pad if necessary | ||||
| 11003 | if ( !defined($ipad) ) { | ||||
| 11004 | |||||
| 11005 | for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { | ||||
| 11006 | |||||
| 11007 | # find any unclosed container | ||||
| 11008 | next | ||||
| 11009 | unless ( $type_sequence_to_go[$i] | ||||
| 11010 | && $mate_index_to_go[$i] > $iend ); | ||||
| 11011 | |||||
| 11012 | # find next nonblank token to pad | ||||
| 11013 | $ipad = $inext_to_go[$i]; | ||||
| 11014 | last if ( $ipad > $iend ); | ||||
| 11015 | } | ||||
| 11016 | last unless $ipad; | ||||
| 11017 | } | ||||
| 11018 | |||||
| 11019 | # We cannot pad a leading token at the lowest level because | ||||
| 11020 | # it could cause a bug in which the starting indentation | ||||
| 11021 | # level is guessed incorrectly each time the code is run | ||||
| 11022 | # though perltidy, thus causing the code to march off to | ||||
| 11023 | # the right. For example, the following snippet would have | ||||
| 11024 | # this problem: | ||||
| 11025 | |||||
| 11026 | ## ov_method mycan( $package, '(""' ), $package | ||||
| 11027 | ## or ov_method mycan( $package, '(0+' ), $package | ||||
| 11028 | ## or ov_method mycan( $package, '(bool' ), $package | ||||
| 11029 | ## or ov_method mycan( $package, '(nomethod' ), $package; | ||||
| 11030 | |||||
| 11031 | # If this snippet is within a block this won't happen | ||||
| 11032 | # unless the user just processes the snippet alone within | ||||
| 11033 | # an editor. In that case either the user will see and | ||||
| 11034 | # fix the problem or it will be corrected next time the | ||||
| 11035 | # entire file is processed with perltidy. | ||||
| 11036 | next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); | ||||
| 11037 | |||||
| 11038 | ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT | ||||
| 11039 | ## IT DID MORE HARM THAN GOOD | ||||
| 11040 | ## ceil( | ||||
| 11041 | ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 | ||||
| 11042 | ## / $upem | ||||
| 11043 | ## ), | ||||
| 11044 | ##? # do not put leading padding for just 2 lines of math | ||||
| 11045 | ##? if ( $ipad == $ibeg | ||||
| 11046 | ##? && $line > 0 | ||||
| 11047 | ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] | ||||
| 11048 | ##? && $is_math_op{$type_next} | ||||
| 11049 | ##? && $line + 2 <= $max_line ) | ||||
| 11050 | ##? { | ||||
| 11051 | ##? my $ibeg_next_next = $$ri_first[ $line + 2 ]; | ||||
| 11052 | ##? my $type_next_next = $types_to_go[$ibeg_next_next]; | ||||
| 11053 | ##? next if !$is_math_op{$type_next_next}; | ||||
| 11054 | ##? } | ||||
| 11055 | |||||
| 11056 | # next line must not be at greater depth | ||||
| 11057 | my $iend_next = $$ri_last[ $line + 1 ]; | ||||
| 11058 | next | ||||
| 11059 | if ( $nesting_depth_to_go[ $iend_next + 1 ] > | ||||
| 11060 | $nesting_depth_to_go[$ipad] ); | ||||
| 11061 | |||||
| 11062 | # lines must be somewhat similar to be padded.. | ||||
| 11063 | my $inext_next = $inext_to_go[$ibeg_next]; | ||||
| 11064 | my $type = $types_to_go[$ipad]; | ||||
| 11065 | my $type_next = $types_to_go[ $ipad + 1 ]; | ||||
| 11066 | |||||
| 11067 | # see if there are multiple continuation lines | ||||
| 11068 | my $logical_continuation_lines = 1; | ||||
| 11069 | if ( $line + 2 <= $max_line ) { | ||||
| 11070 | my $leading_token = $tokens_to_go[$ibeg_next]; | ||||
| 11071 | my $ibeg_next_next = $$ri_first[ $line + 2 ]; | ||||
| 11072 | if ( $tokens_to_go[$ibeg_next_next] eq $leading_token | ||||
| 11073 | && $nesting_depth_to_go[$ibeg_next] eq | ||||
| 11074 | $nesting_depth_to_go[$ibeg_next_next] ) | ||||
| 11075 | { | ||||
| 11076 | $logical_continuation_lines++; | ||||
| 11077 | } | ||||
| 11078 | } | ||||
| 11079 | |||||
| 11080 | # see if leading types match | ||||
| 11081 | my $types_match = $types_to_go[$inext_next] eq $type; | ||||
| 11082 | my $matches_without_bang; | ||||
| 11083 | |||||
| 11084 | # if first line has leading ! then compare the following token | ||||
| 11085 | if ( !$types_match && $type eq '!' ) { | ||||
| 11086 | $types_match = $matches_without_bang = | ||||
| 11087 | $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; | ||||
| 11088 | } | ||||
| 11089 | |||||
| 11090 | if ( | ||||
| 11091 | |||||
| 11092 | # either we have multiple continuation lines to follow | ||||
| 11093 | # and we are not padding the first token | ||||
| 11094 | ( $logical_continuation_lines > 1 && $ipad > 0 ) | ||||
| 11095 | |||||
| 11096 | # or.. | ||||
| 11097 | || ( | ||||
| 11098 | |||||
| 11099 | # types must match | ||||
| 11100 | $types_match | ||||
| 11101 | |||||
| 11102 | # and keywords must match if keyword | ||||
| 11103 | && !( | ||||
| 11104 | $type eq 'k' | ||||
| 11105 | && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] | ||||
| 11106 | ) | ||||
| 11107 | ) | ||||
| 11108 | ) | ||||
| 11109 | { | ||||
| 11110 | |||||
| 11111 | #----------------------begin special checks-------------- | ||||
| 11112 | # | ||||
| 11113 | # SPECIAL CHECK 1: | ||||
| 11114 | # A check is needed before we can make the pad. | ||||
| 11115 | # If we are in a list with some long items, we want each | ||||
| 11116 | # item to stand out. So in the following example, the | ||||
| 11117 | # first line beginning with '$casefold->' would look good | ||||
| 11118 | # padded to align with the next line, but then it | ||||
| 11119 | # would be indented more than the last line, so we | ||||
| 11120 | # won't do it. | ||||
| 11121 | # | ||||
| 11122 | # ok( | ||||
| 11123 | # $casefold->{code} eq '0041' | ||||
| 11124 | # && $casefold->{status} eq 'C' | ||||
| 11125 | # && $casefold->{mapping} eq '0061', | ||||
| 11126 | # 'casefold 0x41' | ||||
| 11127 | # ); | ||||
| 11128 | # | ||||
| 11129 | # Note: | ||||
| 11130 | # It would be faster, and almost as good, to use a comma | ||||
| 11131 | # count, and not pad if comma_count > 1 and the previous | ||||
| 11132 | # line did not end with a comma. | ||||
| 11133 | # | ||||
| 11134 | my $ok_to_pad = 1; | ||||
| 11135 | |||||
| 11136 | my $ibg = $$ri_first[ $line + 1 ]; | ||||
| 11137 | my $depth = $nesting_depth_to_go[ $ibg + 1 ]; | ||||
| 11138 | |||||
| 11139 | # just use simplified formula for leading spaces to avoid | ||||
| 11140 | # needless sub calls | ||||
| 11141 | my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; | ||||
| 11142 | |||||
| 11143 | # look at each line beyond the next .. | ||||
| 11144 | my $l = $line + 1; | ||||
| 11145 | foreach $l ( $line + 2 .. $max_line ) { | ||||
| 11146 | my $ibg = $$ri_first[$l]; | ||||
| 11147 | |||||
| 11148 | # quit looking at the end of this container | ||||
| 11149 | last | ||||
| 11150 | if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) | ||||
| 11151 | || ( $nesting_depth_to_go[$ibg] < $depth ); | ||||
| 11152 | |||||
| 11153 | # cannot do the pad if a later line would be | ||||
| 11154 | # outdented more | ||||
| 11155 | if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { | ||||
| 11156 | $ok_to_pad = 0; | ||||
| 11157 | last; | ||||
| 11158 | } | ||||
| 11159 | } | ||||
| 11160 | |||||
| 11161 | # don't pad if we end in a broken list | ||||
| 11162 | if ( $l == $max_line ) { | ||||
| 11163 | my $i2 = $$ri_last[$l]; | ||||
| 11164 | if ( $types_to_go[$i2] eq '#' ) { | ||||
| 11165 | my $i1 = $$ri_first[$l]; | ||||
| 11166 | next | ||||
| 11167 | if ( | ||||
| 11168 | terminal_type( \@types_to_go, \@block_type_to_go, | ||||
| 11169 | $i1, $i2 ) eq ',' | ||||
| 11170 | ); | ||||
| 11171 | } | ||||
| 11172 | } | ||||
| 11173 | |||||
| 11174 | # SPECIAL CHECK 2: | ||||
| 11175 | # a minus may introduce a quoted variable, and we will | ||||
| 11176 | # add the pad only if this line begins with a bare word, | ||||
| 11177 | # such as for the word 'Button' here: | ||||
| 11178 | # [ | ||||
| 11179 | # Button => "Print letter \"~$_\"", | ||||
| 11180 | # -command => [ sub { print "$_[0]\n" }, $_ ], | ||||
| 11181 | # -accelerator => "Meta+$_" | ||||
| 11182 | # ]; | ||||
| 11183 | # | ||||
| 11184 | # On the other hand, if 'Button' is quoted, it looks best | ||||
| 11185 | # not to pad: | ||||
| 11186 | # [ | ||||
| 11187 | # 'Button' => "Print letter \"~$_\"", | ||||
| 11188 | # -command => [ sub { print "$_[0]\n" }, $_ ], | ||||
| 11189 | # -accelerator => "Meta+$_" | ||||
| 11190 | # ]; | ||||
| 11191 | if ( $types_to_go[$ibeg_next] eq 'm' ) { | ||||
| 11192 | $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; | ||||
| 11193 | } | ||||
| 11194 | |||||
| 11195 | next unless $ok_to_pad; | ||||
| 11196 | |||||
| 11197 | #----------------------end special check--------------- | ||||
| 11198 | |||||
| 11199 | my $length_1 = total_line_length( $ibeg, $ipad - 1 ); | ||||
| 11200 | my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); | ||||
| 11201 | $pad_spaces = $length_2 - $length_1; | ||||
| 11202 | |||||
| 11203 | # If the first line has a leading ! and the second does | ||||
| 11204 | # not, then remove one space to try to align the next | ||||
| 11205 | # leading characters, which are often the same. For example: | ||||
| 11206 | # if ( !$ts | ||||
| 11207 | # || $ts == $self->Holder | ||||
| 11208 | # || $self->Holder->Type eq "Arena" ) | ||||
| 11209 | # | ||||
| 11210 | # This usually helps readability, but if there are subsequent | ||||
| 11211 | # ! operators things will still get messed up. For example: | ||||
| 11212 | # | ||||
| 11213 | # if ( !exists $Net::DNS::typesbyname{$qtype} | ||||
| 11214 | # && exists $Net::DNS::classesbyname{$qtype} | ||||
| 11215 | # && !exists $Net::DNS::classesbyname{$qclass} | ||||
| 11216 | # && exists $Net::DNS::typesbyname{$qclass} ) | ||||
| 11217 | # We can't fix that. | ||||
| 11218 | if ($matches_without_bang) { $pad_spaces-- } | ||||
| 11219 | |||||
| 11220 | # make sure this won't change if -lp is used | ||||
| 11221 | my $indentation_1 = $leading_spaces_to_go[$ibeg]; | ||||
| 11222 | if ( ref($indentation_1) ) { | ||||
| 11223 | if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { | ||||
| 11224 | my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; | ||||
| 11225 | unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) | ||||
| 11226 | { | ||||
| 11227 | $pad_spaces = 0; | ||||
| 11228 | } | ||||
| 11229 | } | ||||
| 11230 | } | ||||
| 11231 | |||||
| 11232 | # we might be able to handle a pad of -1 by removing a blank | ||||
| 11233 | # token | ||||
| 11234 | if ( $pad_spaces < 0 ) { | ||||
| 11235 | |||||
| 11236 | if ( $pad_spaces == -1 ) { | ||||
| 11237 | if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) | ||||
| 11238 | { | ||||
| 11239 | pad_token( $ipad - 1, $pad_spaces ); | ||||
| 11240 | } | ||||
| 11241 | } | ||||
| 11242 | $pad_spaces = 0; | ||||
| 11243 | } | ||||
| 11244 | |||||
| 11245 | # now apply any padding for alignment | ||||
| 11246 | if ( $ipad >= 0 && $pad_spaces ) { | ||||
| 11247 | |||||
| 11248 | my $length_t = total_line_length( $ibeg, $iend ); | ||||
| 11249 | if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) | ||||
| 11250 | { | ||||
| 11251 | pad_token( $ipad, $pad_spaces ); | ||||
| 11252 | } | ||||
| 11253 | } | ||||
| 11254 | } | ||||
| 11255 | } | ||||
| 11256 | continue { | ||||
| 11257 | $iendm = $iend; | ||||
| 11258 | $ibegm = $ibeg; | ||||
| 11259 | $has_leading_op = $has_leading_op_next; | ||||
| 11260 | } # end of loop over lines | ||||
| 11261 | return; | ||||
| 11262 | } | ||||
| 11263 | } | ||||
| 11264 | |||||
| 11265 | sub correct_lp_indentation { | ||||
| 11266 | |||||
| 11267 | # When the -lp option is used, we need to make a last pass through | ||||
| 11268 | # each line to correct the indentation positions in case they differ | ||||
| 11269 | # from the predictions. This is necessary because perltidy uses a | ||||
| 11270 | # predictor/corrector method for aligning with opening parens. The | ||||
| 11271 | # predictor is usually good, but sometimes stumbles. The corrector | ||||
| 11272 | # tries to patch things up once the actual opening paren locations | ||||
| 11273 | # are known. | ||||
| 11274 | my ( $ri_first, $ri_last ) = @_; | ||||
| 11275 | my $do_not_pad = 0; | ||||
| 11276 | |||||
| 11277 | # Note on flag '$do_not_pad': | ||||
| 11278 | # We want to avoid a situation like this, where the aligner inserts | ||||
| 11279 | # whitespace before the '=' to align it with a previous '=', because | ||||
| 11280 | # otherwise the parens might become mis-aligned in a situation like | ||||
| 11281 | # this, where the '=' has become aligned with the previous line, | ||||
| 11282 | # pushing the opening '(' forward beyond where we want it. | ||||
| 11283 | # | ||||
| 11284 | # $mkFloor::currentRoom = ''; | ||||
| 11285 | # $mkFloor::c_entry = $c->Entry( | ||||
| 11286 | # -width => '10', | ||||
| 11287 | # -relief => 'sunken', | ||||
| 11288 | # ... | ||||
| 11289 | # ); | ||||
| 11290 | # | ||||
| 11291 | # We leave it to the aligner to decide how to do this. | ||||
| 11292 | |||||
| 11293 | # first remove continuation indentation if appropriate | ||||
| 11294 | my $max_line = @$ri_first - 1; | ||||
| 11295 | |||||
| 11296 | # looking at each line of this batch.. | ||||
| 11297 | my ( $ibeg, $iend ); | ||||
| 11298 | my $line; | ||||
| 11299 | foreach $line ( 0 .. $max_line ) { | ||||
| 11300 | $ibeg = $$ri_first[$line]; | ||||
| 11301 | $iend = $$ri_last[$line]; | ||||
| 11302 | |||||
| 11303 | # looking at each token in this output line.. | ||||
| 11304 | my $i; | ||||
| 11305 | foreach $i ( $ibeg .. $iend ) { | ||||
| 11306 | |||||
| 11307 | # How many space characters to place before this token | ||||
| 11308 | # for special alignment. Actual padding is done in the | ||||
| 11309 | # continue block. | ||||
| 11310 | |||||
| 11311 | # looking for next unvisited indentation item | ||||
| 11312 | my $indentation = $leading_spaces_to_go[$i]; | ||||
| 11313 | if ( !$indentation->get_MARKED() ) { | ||||
| 11314 | $indentation->set_MARKED(1); | ||||
| 11315 | |||||
| 11316 | # looking for indentation item for which we are aligning | ||||
| 11317 | # with parens, braces, and brackets | ||||
| 11318 | next unless ( $indentation->get_ALIGN_PAREN() ); | ||||
| 11319 | |||||
| 11320 | # skip closed container on this line | ||||
| 11321 | if ( $i > $ibeg ) { | ||||
| 11322 | my $im = max( $ibeg, $iprev_to_go[$i] ); | ||||
| 11323 | if ( $type_sequence_to_go[$im] | ||||
| 11324 | && $mate_index_to_go[$im] <= $iend ) | ||||
| 11325 | { | ||||
| 11326 | next; | ||||
| 11327 | } | ||||
| 11328 | } | ||||
| 11329 | |||||
| 11330 | if ( $line == 1 && $i == $ibeg ) { | ||||
| 11331 | $do_not_pad = 1; | ||||
| 11332 | } | ||||
| 11333 | |||||
| 11334 | # Ok, let's see what the error is and try to fix it | ||||
| 11335 | my $actual_pos; | ||||
| 11336 | my $predicted_pos = $indentation->get_SPACES(); | ||||
| 11337 | if ( $i > $ibeg ) { | ||||
| 11338 | |||||
| 11339 | # token is mid-line - use length to previous token | ||||
| 11340 | $actual_pos = total_line_length( $ibeg, $i - 1 ); | ||||
| 11341 | |||||
| 11342 | # for mid-line token, we must check to see if all | ||||
| 11343 | # additional lines have continuation indentation, | ||||
| 11344 | # and remove it if so. Otherwise, we do not get | ||||
| 11345 | # good alignment. | ||||
| 11346 | my $closing_index = $indentation->get_CLOSED(); | ||||
| 11347 | if ( $closing_index > $iend ) { | ||||
| 11348 | my $ibeg_next = $$ri_first[ $line + 1 ]; | ||||
| 11349 | if ( $ci_levels_to_go[$ibeg_next] > 0 ) { | ||||
| 11350 | undo_lp_ci( $line, $i, $closing_index, $ri_first, | ||||
| 11351 | $ri_last ); | ||||
| 11352 | } | ||||
| 11353 | } | ||||
| 11354 | } | ||||
| 11355 | elsif ( $line > 0 ) { | ||||
| 11356 | |||||
| 11357 | # handle case where token starts a new line; | ||||
| 11358 | # use length of previous line | ||||
| 11359 | my $ibegm = $$ri_first[ $line - 1 ]; | ||||
| 11360 | my $iendm = $$ri_last[ $line - 1 ]; | ||||
| 11361 | $actual_pos = total_line_length( $ibegm, $iendm ); | ||||
| 11362 | |||||
| 11363 | # follow -pt style | ||||
| 11364 | ++$actual_pos | ||||
| 11365 | if ( $types_to_go[ $iendm + 1 ] eq 'b' ); | ||||
| 11366 | } | ||||
| 11367 | else { | ||||
| 11368 | |||||
| 11369 | # token is first character of first line of batch | ||||
| 11370 | $actual_pos = $predicted_pos; | ||||
| 11371 | } | ||||
| 11372 | |||||
| 11373 | my $move_right = $actual_pos - $predicted_pos; | ||||
| 11374 | |||||
| 11375 | # done if no error to correct (gnu2.t) | ||||
| 11376 | if ( $move_right == 0 ) { | ||||
| 11377 | $indentation->set_RECOVERABLE_SPACES($move_right); | ||||
| 11378 | next; | ||||
| 11379 | } | ||||
| 11380 | |||||
| 11381 | # if we have not seen closure for this indentation in | ||||
| 11382 | # this batch, we can only pass on a request to the | ||||
| 11383 | # vertical aligner | ||||
| 11384 | my $closing_index = $indentation->get_CLOSED(); | ||||
| 11385 | |||||
| 11386 | if ( $closing_index < 0 ) { | ||||
| 11387 | $indentation->set_RECOVERABLE_SPACES($move_right); | ||||
| 11388 | next; | ||||
| 11389 | } | ||||
| 11390 | |||||
| 11391 | # If necessary, look ahead to see if there is really any | ||||
| 11392 | # leading whitespace dependent on this whitespace, and | ||||
| 11393 | # also find the longest line using this whitespace. | ||||
| 11394 | # Since it is always safe to move left if there are no | ||||
| 11395 | # dependents, we only need to do this if we may have | ||||
| 11396 | # dependent nodes or need to move right. | ||||
| 11397 | |||||
| 11398 | my $right_margin = 0; | ||||
| 11399 | my $have_child = $indentation->get_HAVE_CHILD(); | ||||
| 11400 | |||||
| 11401 | my %saw_indentation; | ||||
| 11402 | my $line_count = 1; | ||||
| 11403 | $saw_indentation{$indentation} = $indentation; | ||||
| 11404 | |||||
| 11405 | if ( $have_child || $move_right > 0 ) { | ||||
| 11406 | $have_child = 0; | ||||
| 11407 | my $max_length = 0; | ||||
| 11408 | if ( $i == $ibeg ) { | ||||
| 11409 | $max_length = total_line_length( $ibeg, $iend ); | ||||
| 11410 | } | ||||
| 11411 | |||||
| 11412 | # look ahead at the rest of the lines of this batch.. | ||||
| 11413 | my $line_t; | ||||
| 11414 | foreach $line_t ( $line + 1 .. $max_line ) { | ||||
| 11415 | my $ibeg_t = $$ri_first[$line_t]; | ||||
| 11416 | my $iend_t = $$ri_last[$line_t]; | ||||
| 11417 | last if ( $closing_index <= $ibeg_t ); | ||||
| 11418 | |||||
| 11419 | # remember all different indentation objects | ||||
| 11420 | my $indentation_t = $leading_spaces_to_go[$ibeg_t]; | ||||
| 11421 | $saw_indentation{$indentation_t} = $indentation_t; | ||||
| 11422 | $line_count++; | ||||
| 11423 | |||||
| 11424 | # remember longest line in the group | ||||
| 11425 | my $length_t = total_line_length( $ibeg_t, $iend_t ); | ||||
| 11426 | if ( $length_t > $max_length ) { | ||||
| 11427 | $max_length = $length_t; | ||||
| 11428 | } | ||||
| 11429 | } | ||||
| 11430 | $right_margin = maximum_line_length($ibeg) - $max_length; | ||||
| 11431 | if ( $right_margin < 0 ) { $right_margin = 0 } | ||||
| 11432 | } | ||||
| 11433 | |||||
| 11434 | my $first_line_comma_count = | ||||
| 11435 | grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; | ||||
| 11436 | my $comma_count = $indentation->get_COMMA_COUNT(); | ||||
| 11437 | my $arrow_count = $indentation->get_ARROW_COUNT(); | ||||
| 11438 | |||||
| 11439 | # This is a simple approximate test for vertical alignment: | ||||
| 11440 | # if we broke just after an opening paren, brace, bracket, | ||||
| 11441 | # and there are 2 or more commas in the first line, | ||||
| 11442 | # and there are no '=>'s, | ||||
| 11443 | # then we are probably vertically aligned. We could set | ||||
| 11444 | # an exact flag in sub scan_list, but this is good | ||||
| 11445 | # enough. | ||||
| 11446 | my $indentation_count = keys %saw_indentation; | ||||
| 11447 | my $is_vertically_aligned = | ||||
| 11448 | ( $i == $ibeg | ||||
| 11449 | && $first_line_comma_count > 1 | ||||
| 11450 | && $indentation_count == 1 | ||||
| 11451 | && ( $arrow_count == 0 || $arrow_count == $line_count ) ); | ||||
| 11452 | |||||
| 11453 | # Make the move if possible .. | ||||
| 11454 | if ( | ||||
| 11455 | |||||
| 11456 | # we can always move left | ||||
| 11457 | $move_right < 0 | ||||
| 11458 | |||||
| 11459 | # but we should only move right if we are sure it will | ||||
| 11460 | # not spoil vertical alignment | ||||
| 11461 | || ( $comma_count == 0 ) | ||||
| 11462 | || ( $comma_count > 0 && !$is_vertically_aligned ) | ||||
| 11463 | ) | ||||
| 11464 | { | ||||
| 11465 | my $move = | ||||
| 11466 | ( $move_right <= $right_margin ) | ||||
| 11467 | ? $move_right | ||||
| 11468 | : $right_margin; | ||||
| 11469 | |||||
| 11470 | foreach ( keys %saw_indentation ) { | ||||
| 11471 | $saw_indentation{$_} | ||||
| 11472 | ->permanently_decrease_AVAILABLE_SPACES( -$move ); | ||||
| 11473 | } | ||||
| 11474 | } | ||||
| 11475 | |||||
| 11476 | # Otherwise, record what we want and the vertical aligner | ||||
| 11477 | # will try to recover it. | ||||
| 11478 | else { | ||||
| 11479 | $indentation->set_RECOVERABLE_SPACES($move_right); | ||||
| 11480 | } | ||||
| 11481 | } | ||||
| 11482 | } | ||||
| 11483 | } | ||||
| 11484 | return $do_not_pad; | ||||
| 11485 | } | ||||
| 11486 | |||||
| 11487 | # flush is called to output any tokens in the pipeline, so that | ||||
| 11488 | # an alternate source of lines can be written in the correct order | ||||
| 11489 | |||||
| 11490 | sub flush { | ||||
| 11491 | destroy_one_line_block(); | ||||
| 11492 | output_line_to_go(); | ||||
| 11493 | Perl::Tidy::VerticalAligner::flush(); | ||||
| 11494 | } | ||||
| 11495 | |||||
| 11496 | sub reset_block_text_accumulator { | ||||
| 11497 | |||||
| 11498 | # save text after 'if' and 'elsif' to append after 'else' | ||||
| 11499 | if ($accumulating_text_for_block) { | ||||
| 11500 | |||||
| 11501 | if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { | ||||
| 11502 | push @{$rleading_block_if_elsif_text}, $leading_block_text; | ||||
| 11503 | } | ||||
| 11504 | } | ||||
| 11505 | $accumulating_text_for_block = ""; | ||||
| 11506 | $leading_block_text = ""; | ||||
| 11507 | $leading_block_text_level = 0; | ||||
| 11508 | $leading_block_text_length_exceeded = 0; | ||||
| 11509 | $leading_block_text_line_number = 0; | ||||
| 11510 | $leading_block_text_line_length = 0; | ||||
| 11511 | } | ||||
| 11512 | |||||
| 11513 | sub set_block_text_accumulator { | ||||
| 11514 | my $i = shift; | ||||
| 11515 | $accumulating_text_for_block = $tokens_to_go[$i]; | ||||
| 11516 | if ( $accumulating_text_for_block !~ /^els/ ) { | ||||
| 11517 | $rleading_block_if_elsif_text = []; | ||||
| 11518 | } | ||||
| 11519 | $leading_block_text = ""; | ||||
| 11520 | $leading_block_text_level = $levels_to_go[$i]; | ||||
| 11521 | $leading_block_text_line_number = | ||||
| 11522 | $vertical_aligner_object->get_output_line_number(); | ||||
| 11523 | $leading_block_text_length_exceeded = 0; | ||||
| 11524 | |||||
| 11525 | # this will contain the column number of the last character | ||||
| 11526 | # of the closing side comment | ||||
| 11527 | $leading_block_text_line_length = | ||||
| 11528 | length($csc_last_label) + | ||||
| 11529 | length($accumulating_text_for_block) + | ||||
| 11530 | length( $rOpts->{'closing-side-comment-prefix'} ) + | ||||
| 11531 | $leading_block_text_level * $rOpts_indent_columns + 3; | ||||
| 11532 | } | ||||
| 11533 | |||||
| 11534 | sub accumulate_block_text { | ||||
| 11535 | my $i = shift; | ||||
| 11536 | |||||
| 11537 | # accumulate leading text for -csc, ignoring any side comments | ||||
| 11538 | if ( $accumulating_text_for_block | ||||
| 11539 | && !$leading_block_text_length_exceeded | ||||
| 11540 | && $types_to_go[$i] ne '#' ) | ||||
| 11541 | { | ||||
| 11542 | |||||
| 11543 | my $added_length = $token_lengths_to_go[$i]; | ||||
| 11544 | $added_length += 1 if $i == 0; | ||||
| 11545 | my $new_line_length = $leading_block_text_line_length + $added_length; | ||||
| 11546 | |||||
| 11547 | # we can add this text if we don't exceed some limits.. | ||||
| 11548 | if ( | ||||
| 11549 | |||||
| 11550 | # we must not have already exceeded the text length limit | ||||
| 11551 | length($leading_block_text) < | ||||
| 11552 | $rOpts_closing_side_comment_maximum_text | ||||
| 11553 | |||||
| 11554 | # and either: | ||||
| 11555 | # the new total line length must be below the line length limit | ||||
| 11556 | # or the new length must be below the text length limit | ||||
| 11557 | # (ie, we may allow one token to exceed the text length limit) | ||||
| 11558 | && ( | ||||
| 11559 | $new_line_length < | ||||
| 11560 | maximum_line_length_for_level($leading_block_text_level) | ||||
| 11561 | |||||
| 11562 | || length($leading_block_text) + $added_length < | ||||
| 11563 | $rOpts_closing_side_comment_maximum_text | ||||
| 11564 | ) | ||||
| 11565 | |||||
| 11566 | # UNLESS: we are adding a closing paren before the brace we seek. | ||||
| 11567 | # This is an attempt to avoid situations where the ... to be | ||||
| 11568 | # added are longer than the omitted right paren, as in: | ||||
| 11569 | |||||
| 11570 | # foreach my $item (@a_rather_long_variable_name_here) { | ||||
| 11571 | # &whatever; | ||||
| 11572 | # } ## end foreach my $item (@a_rather_long_variable_name_here... | ||||
| 11573 | |||||
| 11574 | || ( | ||||
| 11575 | $tokens_to_go[$i] eq ')' | ||||
| 11576 | && ( | ||||
| 11577 | ( | ||||
| 11578 | $i + 1 <= $max_index_to_go | ||||
| 11579 | && $block_type_to_go[ $i + 1 ] eq | ||||
| 11580 | $accumulating_text_for_block | ||||
| 11581 | ) | ||||
| 11582 | || ( $i + 2 <= $max_index_to_go | ||||
| 11583 | && $block_type_to_go[ $i + 2 ] eq | ||||
| 11584 | $accumulating_text_for_block ) | ||||
| 11585 | ) | ||||
| 11586 | ) | ||||
| 11587 | ) | ||||
| 11588 | { | ||||
| 11589 | |||||
| 11590 | # add an extra space at each newline | ||||
| 11591 | if ( $i == 0 ) { $leading_block_text .= ' ' } | ||||
| 11592 | |||||
| 11593 | # add the token text | ||||
| 11594 | $leading_block_text .= $tokens_to_go[$i]; | ||||
| 11595 | $leading_block_text_line_length = $new_line_length; | ||||
| 11596 | } | ||||
| 11597 | |||||
| 11598 | # show that text was truncated if necessary | ||||
| 11599 | elsif ( $types_to_go[$i] ne 'b' ) { | ||||
| 11600 | $leading_block_text_length_exceeded = 1; | ||||
| 11601 | ## Please see file perltidy.ERR | ||||
| 11602 | $leading_block_text .= '...'; | ||||
| 11603 | } | ||||
| 11604 | } | ||||
| 11605 | } | ||||
| 11606 | |||||
| 11607 | { | ||||
| 11608 | 2 | 200ns | my %is_if_elsif_else_unless_while_until_for_foreach; | ||
| 11609 | |||||
| 11610 | # spent 12µs within Perl::Tidy::Formatter::BEGIN@11610 which was called:
# once (12µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 11619 | ||||
| 11611 | |||||
| 11612 | # These block types may have text between the keyword and opening | ||||
| 11613 | # curly. Note: 'else' does not, but must be included to allow trailing | ||||
| 11614 | # if/elsif text to be appended. | ||||
| 11615 | # patch for SWITCH/CASE: added 'case' and 'when' | ||||
| 11616 | 1 | 2µs | @_ = qw(if elsif else unless while until for foreach case when); | ||
| 11617 | 1 | 11µs | @is_if_elsif_else_unless_while_until_for_foreach{@_} = | ||
| 11618 | (1) x scalar(@_); | ||||
| 11619 | 1 | 554µs | 1 | 12µs | } # spent 12µs making 1 call to Perl::Tidy::Formatter::BEGIN@11610 |
| 11620 | |||||
| 11621 | sub accumulate_csc_text { | ||||
| 11622 | |||||
| 11623 | # called once per output buffer when -csc is used. Accumulates | ||||
| 11624 | # the text placed after certain closing block braces. | ||||
| 11625 | # Defines and returns the following for this buffer: | ||||
| 11626 | |||||
| 11627 | my $block_leading_text = ""; # the leading text of the last '}' | ||||
| 11628 | my $rblock_leading_if_elsif_text; | ||||
| 11629 | my $i_block_leading_text = | ||||
| 11630 | -1; # index of token owning block_leading_text | ||||
| 11631 | my $block_line_count = 100; # how many lines the block spans | ||||
| 11632 | my $terminal_type = 'b'; # type of last nonblank token | ||||
| 11633 | my $i_terminal = 0; # index of last nonblank token | ||||
| 11634 | my $terminal_block_type = ""; | ||||
| 11635 | |||||
| 11636 | # update most recent statement label | ||||
| 11637 | $csc_last_label = "" unless ($csc_last_label); | ||||
| 11638 | if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } | ||||
| 11639 | my $block_label = $csc_last_label; | ||||
| 11640 | |||||
| 11641 | # Loop over all tokens of this batch | ||||
| 11642 | for my $i ( 0 .. $max_index_to_go ) { | ||||
| 11643 | my $type = $types_to_go[$i]; | ||||
| 11644 | my $block_type = $block_type_to_go[$i]; | ||||
| 11645 | my $token = $tokens_to_go[$i]; | ||||
| 11646 | |||||
| 11647 | # remember last nonblank token type | ||||
| 11648 | if ( $type ne '#' && $type ne 'b' ) { | ||||
| 11649 | $terminal_type = $type; | ||||
| 11650 | $terminal_block_type = $block_type; | ||||
| 11651 | $i_terminal = $i; | ||||
| 11652 | } | ||||
| 11653 | |||||
| 11654 | my $type_sequence = $type_sequence_to_go[$i]; | ||||
| 11655 | if ( $block_type && $type_sequence ) { | ||||
| 11656 | |||||
| 11657 | if ( $token eq '}' ) { | ||||
| 11658 | |||||
| 11659 | # restore any leading text saved when we entered this block | ||||
| 11660 | if ( defined( $block_leading_text{$type_sequence} ) ) { | ||||
| 11661 | ( $block_leading_text, $rblock_leading_if_elsif_text ) | ||||
| 11662 | = @{ $block_leading_text{$type_sequence} }; | ||||
| 11663 | $i_block_leading_text = $i; | ||||
| 11664 | delete $block_leading_text{$type_sequence}; | ||||
| 11665 | $rleading_block_if_elsif_text = | ||||
| 11666 | $rblock_leading_if_elsif_text; | ||||
| 11667 | } | ||||
| 11668 | |||||
| 11669 | if ( defined( $csc_block_label{$type_sequence} ) ) { | ||||
| 11670 | $block_label = $csc_block_label{$type_sequence}; | ||||
| 11671 | delete $csc_block_label{$type_sequence}; | ||||
| 11672 | } | ||||
| 11673 | |||||
| 11674 | # if we run into a '}' then we probably started accumulating | ||||
| 11675 | # at something like a trailing 'if' clause..no harm done. | ||||
| 11676 | if ( $accumulating_text_for_block | ||||
| 11677 | && $levels_to_go[$i] <= $leading_block_text_level ) | ||||
| 11678 | { | ||||
| 11679 | my $lev = $levels_to_go[$i]; | ||||
| 11680 | reset_block_text_accumulator(); | ||||
| 11681 | } | ||||
| 11682 | |||||
| 11683 | if ( defined( $block_opening_line_number{$type_sequence} ) ) | ||||
| 11684 | { | ||||
| 11685 | my $output_line_number = | ||||
| 11686 | $vertical_aligner_object->get_output_line_number(); | ||||
| 11687 | $block_line_count = | ||||
| 11688 | $output_line_number - | ||||
| 11689 | $block_opening_line_number{$type_sequence} + 1; | ||||
| 11690 | delete $block_opening_line_number{$type_sequence}; | ||||
| 11691 | } | ||||
| 11692 | else { | ||||
| 11693 | |||||
| 11694 | # Error: block opening line undefined for this line.. | ||||
| 11695 | # This shouldn't be possible, but it is not a | ||||
| 11696 | # significant problem. | ||||
| 11697 | } | ||||
| 11698 | } | ||||
| 11699 | |||||
| 11700 | elsif ( $token eq '{' ) { | ||||
| 11701 | |||||
| 11702 | my $line_number = | ||||
| 11703 | $vertical_aligner_object->get_output_line_number(); | ||||
| 11704 | $block_opening_line_number{$type_sequence} = $line_number; | ||||
| 11705 | |||||
| 11706 | # set a label for this block, except for | ||||
| 11707 | # a bare block which already has the label | ||||
| 11708 | # A label can only be used on the next { | ||||
| 11709 | if ( $block_type =~ /:$/ ) { $csc_last_label = "" } | ||||
| 11710 | $csc_block_label{$type_sequence} = $csc_last_label; | ||||
| 11711 | $csc_last_label = ""; | ||||
| 11712 | |||||
| 11713 | if ( $accumulating_text_for_block | ||||
| 11714 | && $levels_to_go[$i] == $leading_block_text_level ) | ||||
| 11715 | { | ||||
| 11716 | |||||
| 11717 | if ( $accumulating_text_for_block eq $block_type ) { | ||||
| 11718 | |||||
| 11719 | # save any leading text before we enter this block | ||||
| 11720 | $block_leading_text{$type_sequence} = [ | ||||
| 11721 | $leading_block_text, | ||||
| 11722 | $rleading_block_if_elsif_text | ||||
| 11723 | ]; | ||||
| 11724 | $block_opening_line_number{$type_sequence} = | ||||
| 11725 | $leading_block_text_line_number; | ||||
| 11726 | reset_block_text_accumulator(); | ||||
| 11727 | } | ||||
| 11728 | else { | ||||
| 11729 | |||||
| 11730 | # shouldn't happen, but not a serious error. | ||||
| 11731 | # We were accumulating -csc text for block type | ||||
| 11732 | # $accumulating_text_for_block and unexpectedly | ||||
| 11733 | # encountered a '{' for block type $block_type. | ||||
| 11734 | } | ||||
| 11735 | } | ||||
| 11736 | } | ||||
| 11737 | } | ||||
| 11738 | |||||
| 11739 | if ( $type eq 'k' | ||||
| 11740 | && $csc_new_statement_ok | ||||
| 11741 | && $is_if_elsif_else_unless_while_until_for_foreach{$token} | ||||
| 11742 | && $token =~ /$closing_side_comment_list_pattern/o ) | ||||
| 11743 | { | ||||
| 11744 | set_block_text_accumulator($i); | ||||
| 11745 | } | ||||
| 11746 | else { | ||||
| 11747 | |||||
| 11748 | # note: ignoring type 'q' because of tricks being played | ||||
| 11749 | # with 'q' for hanging side comments | ||||
| 11750 | if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { | ||||
| 11751 | $csc_new_statement_ok = | ||||
| 11752 | ( $block_type || $type eq 'J' || $type eq ';' ); | ||||
| 11753 | } | ||||
| 11754 | if ( $type eq ';' | ||||
| 11755 | && $accumulating_text_for_block | ||||
| 11756 | && $levels_to_go[$i] == $leading_block_text_level ) | ||||
| 11757 | { | ||||
| 11758 | reset_block_text_accumulator(); | ||||
| 11759 | } | ||||
| 11760 | else { | ||||
| 11761 | accumulate_block_text($i); | ||||
| 11762 | } | ||||
| 11763 | } | ||||
| 11764 | } | ||||
| 11765 | |||||
| 11766 | # Treat an 'else' block specially by adding preceding 'if' and | ||||
| 11767 | # 'elsif' text. Otherwise, the 'end else' is not helpful, | ||||
| 11768 | # especially for cuddled-else formatting. | ||||
| 11769 | if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { | ||||
| 11770 | $block_leading_text = | ||||
| 11771 | make_else_csc_text( $i_terminal, $terminal_block_type, | ||||
| 11772 | $block_leading_text, $rblock_leading_if_elsif_text ); | ||||
| 11773 | } | ||||
| 11774 | |||||
| 11775 | # if this line ends in a label then remember it for the next pass | ||||
| 11776 | $csc_last_label = ""; | ||||
| 11777 | if ( $terminal_type eq 'J' ) { | ||||
| 11778 | $csc_last_label = $tokens_to_go[$i_terminal]; | ||||
| 11779 | } | ||||
| 11780 | |||||
| 11781 | return ( $terminal_type, $i_terminal, $i_block_leading_text, | ||||
| 11782 | $block_leading_text, $block_line_count, $block_label ); | ||||
| 11783 | } | ||||
| 11784 | } | ||||
| 11785 | |||||
| 11786 | sub make_else_csc_text { | ||||
| 11787 | |||||
| 11788 | # create additional -csc text for an 'else' and optionally 'elsif', | ||||
| 11789 | # depending on the value of switch | ||||
| 11790 | # $rOpts_closing_side_comment_else_flag: | ||||
| 11791 | # | ||||
| 11792 | # = 0 add 'if' text to trailing else | ||||
| 11793 | # = 1 same as 0 plus: | ||||
| 11794 | # add 'if' to 'elsif's if can fit in line length | ||||
| 11795 | # add last 'elsif' to trailing else if can fit in one line | ||||
| 11796 | # = 2 same as 1 but do not check if exceed line length | ||||
| 11797 | # | ||||
| 11798 | # $rif_elsif_text = a reference to a list of all previous closing | ||||
| 11799 | # side comments created for this if block | ||||
| 11800 | # | ||||
| 11801 | my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; | ||||
| 11802 | my $csc_text = $block_leading_text; | ||||
| 11803 | |||||
| 11804 | if ( $block_type eq 'elsif' | ||||
| 11805 | && $rOpts_closing_side_comment_else_flag == 0 ) | ||||
| 11806 | { | ||||
| 11807 | return $csc_text; | ||||
| 11808 | } | ||||
| 11809 | |||||
| 11810 | my $count = @{$rif_elsif_text}; | ||||
| 11811 | return $csc_text unless ($count); | ||||
| 11812 | |||||
| 11813 | my $if_text = '[ if' . $rif_elsif_text->[0]; | ||||
| 11814 | |||||
| 11815 | # always show the leading 'if' text on 'else' | ||||
| 11816 | if ( $block_type eq 'else' ) { | ||||
| 11817 | $csc_text .= $if_text; | ||||
| 11818 | } | ||||
| 11819 | |||||
| 11820 | # see if that's all | ||||
| 11821 | if ( $rOpts_closing_side_comment_else_flag == 0 ) { | ||||
| 11822 | return $csc_text; | ||||
| 11823 | } | ||||
| 11824 | |||||
| 11825 | my $last_elsif_text = ""; | ||||
| 11826 | if ( $count > 1 ) { | ||||
| 11827 | $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; | ||||
| 11828 | if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } | ||||
| 11829 | } | ||||
| 11830 | |||||
| 11831 | # tentatively append one more item | ||||
| 11832 | my $saved_text = $csc_text; | ||||
| 11833 | if ( $block_type eq 'else' ) { | ||||
| 11834 | $csc_text .= $last_elsif_text; | ||||
| 11835 | } | ||||
| 11836 | else { | ||||
| 11837 | $csc_text .= ' ' . $if_text; | ||||
| 11838 | } | ||||
| 11839 | |||||
| 11840 | # all done if no length checks requested | ||||
| 11841 | if ( $rOpts_closing_side_comment_else_flag == 2 ) { | ||||
| 11842 | return $csc_text; | ||||
| 11843 | } | ||||
| 11844 | |||||
| 11845 | # undo it if line length exceeded | ||||
| 11846 | my $length = | ||||
| 11847 | length($csc_text) + | ||||
| 11848 | length($block_type) + | ||||
| 11849 | length( $rOpts->{'closing-side-comment-prefix'} ) + | ||||
| 11850 | $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; | ||||
| 11851 | if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { | ||||
| 11852 | $csc_text = $saved_text; | ||||
| 11853 | } | ||||
| 11854 | return $csc_text; | ||||
| 11855 | } | ||||
| 11856 | |||||
| 11857 | { # sub balance_csc_text | ||||
| 11858 | |||||
| 11859 | 2 | 100ns | my %matching_char; | ||
| 11860 | |||||
| 11861 | # spent 7µs within Perl::Tidy::Formatter::BEGIN@11861 which was called:
# once (7µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 11870 | ||||
| 11862 | 1 | 7µs | %matching_char = ( | ||
| 11863 | '{' => '}', | ||||
| 11864 | '(' => ')', | ||||
| 11865 | '[' => ']', | ||||
| 11866 | '}' => '{', | ||||
| 11867 | ')' => '(', | ||||
| 11868 | ']' => '[', | ||||
| 11869 | ); | ||||
| 11870 | 1 | 1.17ms | 1 | 7µs | } # spent 7µs making 1 call to Perl::Tidy::Formatter::BEGIN@11861 |
| 11871 | |||||
| 11872 | sub balance_csc_text { | ||||
| 11873 | |||||
| 11874 | # Append characters to balance a closing side comment so that editors | ||||
| 11875 | # such as vim can correctly jump through code. | ||||
| 11876 | # Simple Example: | ||||
| 11877 | # input = ## end foreach my $foo ( sort { $b ... | ||||
| 11878 | # output = ## end foreach my $foo ( sort { $b ...}) | ||||
| 11879 | |||||
| 11880 | # NOTE: This routine does not currently filter out structures within | ||||
| 11881 | # quoted text because the bounce algorithms in text editors do not | ||||
| 11882 | # necessarily do this either (a version of vim was checked and | ||||
| 11883 | # did not do this). | ||||
| 11884 | |||||
| 11885 | # Some complex examples which will cause trouble for some editors: | ||||
| 11886 | # while ( $mask_string =~ /\{[^{]*?\}/g ) { | ||||
| 11887 | # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { | ||||
| 11888 | # if ( $1 eq '{' ) { | ||||
| 11889 | # test file test1/braces.pl has many such examples. | ||||
| 11890 | |||||
| 11891 | my ($csc) = @_; | ||||
| 11892 | |||||
| 11893 | # loop to examine characters one-by-one, RIGHT to LEFT and | ||||
| 11894 | # build a balancing ending, LEFT to RIGHT. | ||||
| 11895 | for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { | ||||
| 11896 | |||||
| 11897 | my $char = substr( $csc, $pos, 1 ); | ||||
| 11898 | |||||
| 11899 | # ignore everything except structural characters | ||||
| 11900 | next unless ( $matching_char{$char} ); | ||||
| 11901 | |||||
| 11902 | # pop most recently appended character | ||||
| 11903 | my $top = chop($csc); | ||||
| 11904 | |||||
| 11905 | # push it back plus the mate to the newest character | ||||
| 11906 | # unless they balance each other. | ||||
| 11907 | $csc = $csc . $top . $matching_char{$char} unless $top eq $char; | ||||
| 11908 | } | ||||
| 11909 | |||||
| 11910 | # return the balanced string | ||||
| 11911 | return $csc; | ||||
| 11912 | } | ||||
| 11913 | } | ||||
| 11914 | |||||
| 11915 | sub add_closing_side_comment { | ||||
| 11916 | |||||
| 11917 | # add closing side comments after closing block braces if -csc used | ||||
| 11918 | my $cscw_block_comment; | ||||
| 11919 | |||||
| 11920 | #--------------------------------------------------------------- | ||||
| 11921 | # Step 1: loop through all tokens of this line to accumulate | ||||
| 11922 | # the text needed to create the closing side comments. Also see | ||||
| 11923 | # how the line ends. | ||||
| 11924 | #--------------------------------------------------------------- | ||||
| 11925 | |||||
| 11926 | my ( $terminal_type, $i_terminal, $i_block_leading_text, | ||||
| 11927 | $block_leading_text, $block_line_count, $block_label ) | ||||
| 11928 | = accumulate_csc_text(); | ||||
| 11929 | |||||
| 11930 | #--------------------------------------------------------------- | ||||
| 11931 | # Step 2: make the closing side comment if this ends a block | ||||
| 11932 | #--------------------------------------------------------------- | ||||
| 11933 | my $have_side_comment = $i_terminal != $max_index_to_go; | ||||
| 11934 | |||||
| 11935 | # if this line might end in a block closure.. | ||||
| 11936 | if ( | ||||
| 11937 | $terminal_type eq '}' | ||||
| 11938 | |||||
| 11939 | # ..and either | ||||
| 11940 | && ( | ||||
| 11941 | |||||
| 11942 | # the block is long enough | ||||
| 11943 | ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) | ||||
| 11944 | |||||
| 11945 | # or there is an existing comment to check | ||||
| 11946 | || ( $have_side_comment | ||||
| 11947 | && $rOpts->{'closing-side-comment-warnings'} ) | ||||
| 11948 | ) | ||||
| 11949 | |||||
| 11950 | # .. and if this is one of the types of interest | ||||
| 11951 | && $block_type_to_go[$i_terminal] =~ | ||||
| 11952 | /$closing_side_comment_list_pattern/o | ||||
| 11953 | |||||
| 11954 | # .. but not an anonymous sub | ||||
| 11955 | # These are not normally of interest, and their closing braces are | ||||
| 11956 | # often followed by commas or semicolons anyway. This also avoids | ||||
| 11957 | # possible erratic output due to line numbering inconsistencies | ||||
| 11958 | # in the cases where their closing braces terminate a line. | ||||
| 11959 | && $block_type_to_go[$i_terminal] ne 'sub' | ||||
| 11960 | |||||
| 11961 | # ..and the corresponding opening brace must is not in this batch | ||||
| 11962 | # (because we do not need to tag one-line blocks, although this | ||||
| 11963 | # should also be caught with a positive -csci value) | ||||
| 11964 | && $mate_index_to_go[$i_terminal] < 0 | ||||
| 11965 | |||||
| 11966 | # ..and either | ||||
| 11967 | && ( | ||||
| 11968 | |||||
| 11969 | # this is the last token (line doesn't have a side comment) | ||||
| 11970 | !$have_side_comment | ||||
| 11971 | |||||
| 11972 | # or the old side comment is a closing side comment | ||||
| 11973 | || $tokens_to_go[$max_index_to_go] =~ | ||||
| 11974 | /$closing_side_comment_prefix_pattern/o | ||||
| 11975 | ) | ||||
| 11976 | ) | ||||
| 11977 | { | ||||
| 11978 | |||||
| 11979 | # then make the closing side comment text | ||||
| 11980 | if ($block_label) { $block_label .= " " } | ||||
| 11981 | my $token = | ||||
| 11982 | "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; | ||||
| 11983 | |||||
| 11984 | # append any extra descriptive text collected above | ||||
| 11985 | if ( $i_block_leading_text == $i_terminal ) { | ||||
| 11986 | $token .= $block_leading_text; | ||||
| 11987 | } | ||||
| 11988 | |||||
| 11989 | $token = balance_csc_text($token) | ||||
| 11990 | if $rOpts->{'closing-side-comments-balanced'}; | ||||
| 11991 | |||||
| 11992 | $token =~ s/\s*$//; # trim any trailing whitespace | ||||
| 11993 | |||||
| 11994 | # handle case of existing closing side comment | ||||
| 11995 | if ($have_side_comment) { | ||||
| 11996 | |||||
| 11997 | # warn if requested and tokens differ significantly | ||||
| 11998 | if ( $rOpts->{'closing-side-comment-warnings'} ) { | ||||
| 11999 | my $old_csc = $tokens_to_go[$max_index_to_go]; | ||||
| 12000 | my $new_csc = $token; | ||||
| 12001 | $new_csc =~ s/\s+//g; # trim all whitespace | ||||
| 12002 | $old_csc =~ s/\s+//g; # trim all whitespace | ||||
| 12003 | $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures | ||||
| 12004 | $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures | ||||
| 12005 | $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' | ||||
| 12006 | my $new_trailing_dots = $1; | ||||
| 12007 | $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' | ||||
| 12008 | |||||
| 12009 | # Patch to handle multiple closing side comments at | ||||
| 12010 | # else and elsif's. These have become too complicated | ||||
| 12011 | # to check, so if we see an indication of | ||||
| 12012 | # '[ if' or '[ # elsif', then assume they were made | ||||
| 12013 | # by perltidy. | ||||
| 12014 | if ( $block_type_to_go[$i_terminal] eq 'else' ) { | ||||
| 12015 | if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } | ||||
| 12016 | } | ||||
| 12017 | elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { | ||||
| 12018 | if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } | ||||
| 12019 | } | ||||
| 12020 | |||||
| 12021 | # if old comment is contained in new comment, | ||||
| 12022 | # only compare the common part. | ||||
| 12023 | if ( length($new_csc) > length($old_csc) ) { | ||||
| 12024 | $new_csc = substr( $new_csc, 0, length($old_csc) ); | ||||
| 12025 | } | ||||
| 12026 | |||||
| 12027 | # if the new comment is shorter and has been limited, | ||||
| 12028 | # only compare the common part. | ||||
| 12029 | if ( length($new_csc) < length($old_csc) | ||||
| 12030 | && $new_trailing_dots ) | ||||
| 12031 | { | ||||
| 12032 | $old_csc = substr( $old_csc, 0, length($new_csc) ); | ||||
| 12033 | } | ||||
| 12034 | |||||
| 12035 | # any remaining difference? | ||||
| 12036 | if ( $new_csc ne $old_csc ) { | ||||
| 12037 | |||||
| 12038 | # just leave the old comment if we are below the threshold | ||||
| 12039 | # for creating side comments | ||||
| 12040 | if ( $block_line_count < | ||||
| 12041 | $rOpts->{'closing-side-comment-interval'} ) | ||||
| 12042 | { | ||||
| 12043 | $token = undef; | ||||
| 12044 | } | ||||
| 12045 | |||||
| 12046 | # otherwise we'll make a note of it | ||||
| 12047 | else { | ||||
| 12048 | |||||
| 12049 | warning( | ||||
| 12050 | "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" | ||||
| 12051 | ); | ||||
| 12052 | |||||
| 12053 | # save the old side comment in a new trailing block comment | ||||
| 12054 | my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; | ||||
| 12055 | $year += 1900; | ||||
| 12056 | $month += 1; | ||||
| 12057 | $cscw_block_comment = | ||||
| 12058 | "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; | ||||
| 12059 | } | ||||
| 12060 | } | ||||
| 12061 | else { | ||||
| 12062 | |||||
| 12063 | # No differences.. we can safely delete old comment if we | ||||
| 12064 | # are below the threshold | ||||
| 12065 | if ( $block_line_count < | ||||
| 12066 | $rOpts->{'closing-side-comment-interval'} ) | ||||
| 12067 | { | ||||
| 12068 | $token = undef; | ||||
| 12069 | unstore_token_to_go() | ||||
| 12070 | if ( $types_to_go[$max_index_to_go] eq '#' ); | ||||
| 12071 | unstore_token_to_go() | ||||
| 12072 | if ( $types_to_go[$max_index_to_go] eq 'b' ); | ||||
| 12073 | } | ||||
| 12074 | } | ||||
| 12075 | } | ||||
| 12076 | |||||
| 12077 | # switch to the new csc (unless we deleted it!) | ||||
| 12078 | $tokens_to_go[$max_index_to_go] = $token if $token; | ||||
| 12079 | } | ||||
| 12080 | |||||
| 12081 | # handle case of NO existing closing side comment | ||||
| 12082 | else { | ||||
| 12083 | |||||
| 12084 | # insert the new side comment into the output token stream | ||||
| 12085 | my $type = '#'; | ||||
| 12086 | my $block_type = ''; | ||||
| 12087 | my $type_sequence = ''; | ||||
| 12088 | my $container_environment = | ||||
| 12089 | $container_environment_to_go[$max_index_to_go]; | ||||
| 12090 | my $level = $levels_to_go[$max_index_to_go]; | ||||
| 12091 | my $slevel = $nesting_depth_to_go[$max_index_to_go]; | ||||
| 12092 | my $no_internal_newlines = 0; | ||||
| 12093 | |||||
| 12094 | my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; | ||||
| 12095 | my $ci_level = $ci_levels_to_go[$max_index_to_go]; | ||||
| 12096 | my $in_continued_quote = 0; | ||||
| 12097 | |||||
| 12098 | # first insert a blank token | ||||
| 12099 | insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); | ||||
| 12100 | |||||
| 12101 | # then the side comment | ||||
| 12102 | insert_new_token_to_go( $token, $type, $slevel, | ||||
| 12103 | $no_internal_newlines ); | ||||
| 12104 | } | ||||
| 12105 | } | ||||
| 12106 | return $cscw_block_comment; | ||||
| 12107 | } | ||||
| 12108 | |||||
| 12109 | sub previous_nonblank_token { | ||||
| 12110 | my ($i) = @_; | ||||
| 12111 | my $name = ""; | ||||
| 12112 | my $im = $i - 1; | ||||
| 12113 | return "" if ( $im < 0 ); | ||||
| 12114 | if ( $types_to_go[$im] eq 'b' ) { $im--; } | ||||
| 12115 | return "" if ( $im < 0 ); | ||||
| 12116 | $name = $tokens_to_go[$im]; | ||||
| 12117 | |||||
| 12118 | # prepend any sub name to an isolated -> to avoid unwanted alignments | ||||
| 12119 | # [test case is test8/penco.pl] | ||||
| 12120 | if ( $name eq '->' ) { | ||||
| 12121 | $im--; | ||||
| 12122 | if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { | ||||
| 12123 | $name = $tokens_to_go[$im] . $name; | ||||
| 12124 | } | ||||
| 12125 | } | ||||
| 12126 | return $name; | ||||
| 12127 | } | ||||
| 12128 | |||||
| 12129 | sub send_lines_to_vertical_aligner { | ||||
| 12130 | |||||
| 12131 | my ( $ri_first, $ri_last, $do_not_pad ) = @_; | ||||
| 12132 | |||||
| 12133 | my $rindentation_list = [0]; # ref to indentations for each line | ||||
| 12134 | |||||
| 12135 | # define the array @matching_token_to_go for the output tokens | ||||
| 12136 | # which will be non-blank for each special token (such as =>) | ||||
| 12137 | # for which alignment is required. | ||||
| 12138 | set_vertical_alignment_markers( $ri_first, $ri_last ); | ||||
| 12139 | |||||
| 12140 | # flush if necessary to avoid unwanted alignment | ||||
| 12141 | my $must_flush = 0; | ||||
| 12142 | if ( @$ri_first > 1 ) { | ||||
| 12143 | |||||
| 12144 | # flush before a long if statement | ||||
| 12145 | if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { | ||||
| 12146 | $must_flush = 1; | ||||
| 12147 | } | ||||
| 12148 | } | ||||
| 12149 | if ($must_flush) { | ||||
| 12150 | Perl::Tidy::VerticalAligner::flush(); | ||||
| 12151 | } | ||||
| 12152 | |||||
| 12153 | undo_ci( $ri_first, $ri_last ); | ||||
| 12154 | |||||
| 12155 | set_logical_padding( $ri_first, $ri_last ); | ||||
| 12156 | |||||
| 12157 | # loop to prepare each line for shipment | ||||
| 12158 | my $n_last_line = @$ri_first - 1; | ||||
| 12159 | my $in_comma_list; | ||||
| 12160 | for my $n ( 0 .. $n_last_line ) { | ||||
| 12161 | my $ibeg = $$ri_first[$n]; | ||||
| 12162 | my $iend = $$ri_last[$n]; | ||||
| 12163 | |||||
| 12164 | my ( $rtokens, $rfields, $rpatterns ) = | ||||
| 12165 | make_alignment_patterns( $ibeg, $iend ); | ||||
| 12166 | |||||
| 12167 | # Set flag to show how much level changes between this line | ||||
| 12168 | # and the next line, if we have it. | ||||
| 12169 | my $ljump = 0; | ||||
| 12170 | if ( $n < $n_last_line ) { | ||||
| 12171 | my $ibegp = $$ri_first[ $n + 1 ]; | ||||
| 12172 | $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; | ||||
| 12173 | } | ||||
| 12174 | |||||
| 12175 | my ( $indentation, $lev, $level_end, $terminal_type, | ||||
| 12176 | $is_semicolon_terminated, $is_outdented_line ) | ||||
| 12177 | = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, | ||||
| 12178 | $ri_first, $ri_last, $rindentation_list, $ljump ); | ||||
| 12179 | |||||
| 12180 | # we will allow outdenting of long lines.. | ||||
| 12181 | my $outdent_long_lines = ( | ||||
| 12182 | |||||
| 12183 | # which are long quotes, if allowed | ||||
| 12184 | ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) | ||||
| 12185 | |||||
| 12186 | # which are long block comments, if allowed | ||||
| 12187 | || ( | ||||
| 12188 | $types_to_go[$ibeg] eq '#' | ||||
| 12189 | && $rOpts->{'outdent-long-comments'} | ||||
| 12190 | |||||
| 12191 | # but not if this is a static block comment | ||||
| 12192 | && !$is_static_block_comment | ||||
| 12193 | ) | ||||
| 12194 | ); | ||||
| 12195 | |||||
| 12196 | my $level_jump = | ||||
| 12197 | $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; | ||||
| 12198 | |||||
| 12199 | my $rvertical_tightness_flags = | ||||
| 12200 | set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, | ||||
| 12201 | $ri_first, $ri_last ); | ||||
| 12202 | |||||
| 12203 | # flush an outdented line to avoid any unwanted vertical alignment | ||||
| 12204 | Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); | ||||
| 12205 | |||||
| 12206 | # Set a flag at the final ':' of a ternary chain to request | ||||
| 12207 | # vertical alignment of the final term. Here is a | ||||
| 12208 | # slightly complex example: | ||||
| 12209 | # | ||||
| 12210 | # $self->{_text} = ( | ||||
| 12211 | # !$section ? '' | ||||
| 12212 | # : $type eq 'item' ? "the $section entry" | ||||
| 12213 | # : "the section on $section" | ||||
| 12214 | # ) | ||||
| 12215 | # . ( | ||||
| 12216 | # $page | ||||
| 12217 | # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" | ||||
| 12218 | # : ' elsewhere in this document' | ||||
| 12219 | # ); | ||||
| 12220 | # | ||||
| 12221 | my $is_terminal_ternary = 0; | ||||
| 12222 | if ( $tokens_to_go[$ibeg] eq ':' | ||||
| 12223 | || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) | ||||
| 12224 | { | ||||
| 12225 | my $last_leading_type = ":"; | ||||
| 12226 | if ( $n > 0 ) { | ||||
| 12227 | my $iprev = $$ri_first[ $n - 1 ]; | ||||
| 12228 | $last_leading_type = $types_to_go[$iprev]; | ||||
| 12229 | } | ||||
| 12230 | if ( $terminal_type ne ';' | ||||
| 12231 | && $n_last_line > $n | ||||
| 12232 | && $level_end == $lev ) | ||||
| 12233 | { | ||||
| 12234 | my $inext = $$ri_first[ $n + 1 ]; | ||||
| 12235 | $level_end = $levels_to_go[$inext]; | ||||
| 12236 | $terminal_type = $types_to_go[$inext]; | ||||
| 12237 | } | ||||
| 12238 | |||||
| 12239 | $is_terminal_ternary = $last_leading_type eq ':' | ||||
| 12240 | && ( ( $terminal_type eq ';' && $level_end <= $lev ) | ||||
| 12241 | || ( $terminal_type ne ':' && $level_end < $lev ) ) | ||||
| 12242 | |||||
| 12243 | # the terminal term must not contain any ternary terms, as in | ||||
| 12244 | # my $ECHO = ( | ||||
| 12245 | # $Is_MSWin32 ? ".\\echo$$" | ||||
| 12246 | # : $Is_MacOS ? ":echo$$" | ||||
| 12247 | # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) | ||||
| 12248 | # ); | ||||
| 12249 | && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ]; | ||||
| 12250 | } | ||||
| 12251 | |||||
| 12252 | # send this new line down the pipe | ||||
| 12253 | my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; | ||||
| 12254 | Perl::Tidy::VerticalAligner::valign_input( | ||||
| 12255 | $lev, | ||||
| 12256 | $level_end, | ||||
| 12257 | $indentation, | ||||
| 12258 | $rfields, | ||||
| 12259 | $rtokens, | ||||
| 12260 | $rpatterns, | ||||
| 12261 | $forced_breakpoint_to_go[$iend] || $in_comma_list, | ||||
| 12262 | $outdent_long_lines, | ||||
| 12263 | $is_terminal_ternary, | ||||
| 12264 | $is_semicolon_terminated, | ||||
| 12265 | $do_not_pad, | ||||
| 12266 | $rvertical_tightness_flags, | ||||
| 12267 | $level_jump, | ||||
| 12268 | ); | ||||
| 12269 | $in_comma_list = | ||||
| 12270 | $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; | ||||
| 12271 | |||||
| 12272 | # flush an outdented line to avoid any unwanted vertical alignment | ||||
| 12273 | Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); | ||||
| 12274 | |||||
| 12275 | $do_not_pad = 0; | ||||
| 12276 | |||||
| 12277 | # Set flag indicating if this line ends in an opening | ||||
| 12278 | # token and is very short, so that a blank line is not | ||||
| 12279 | # needed if the subsequent line is a comment. | ||||
| 12280 | # Examples of what we are looking for: | ||||
| 12281 | # { | ||||
| 12282 | # && ( | ||||
| 12283 | # BEGIN { | ||||
| 12284 | # default { | ||||
| 12285 | # sub { | ||||
| 12286 | $last_output_short_opening_token | ||||
| 12287 | |||||
| 12288 | # line ends in opening token | ||||
| 12289 | = $types_to_go[$iend] =~ /^[\{\(\[L]$/ | ||||
| 12290 | |||||
| 12291 | # and either | ||||
| 12292 | && ( | ||||
| 12293 | # line has either single opening token | ||||
| 12294 | $iend == $ibeg | ||||
| 12295 | |||||
| 12296 | # or is a single token followed by opening token. | ||||
| 12297 | # Note that sub identifiers have blanks like 'sub doit' | ||||
| 12298 | || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) | ||||
| 12299 | ) | ||||
| 12300 | |||||
| 12301 | # and limit total to 10 character widths | ||||
| 12302 | && token_sequence_length( $ibeg, $iend ) <= 10; | ||||
| 12303 | |||||
| 12304 | } # end of loop to output each line | ||||
| 12305 | |||||
| 12306 | # remember indentation of lines containing opening containers for | ||||
| 12307 | # later use by sub set_adjusted_indentation | ||||
| 12308 | save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); | ||||
| 12309 | } | ||||
| 12310 | |||||
| 12311 | { # begin make_alignment_patterns | ||||
| 12312 | |||||
| 12313 | 2 | 0s | my %block_type_map; | ||
| 12314 | 1 | 200ns | my %keyword_map; | ||
| 12315 | |||||
| 12316 | # spent 10µs within Perl::Tidy::Formatter::BEGIN@12316 which was called:
# once (10µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 12344 | ||||
| 12317 | |||||
| 12318 | # map related block names into a common name to | ||||
| 12319 | # allow alignment | ||||
| 12320 | 1 | 3µs | %block_type_map = ( | ||
| 12321 | 'unless' => 'if', | ||||
| 12322 | 'else' => 'if', | ||||
| 12323 | 'elsif' => 'if', | ||||
| 12324 | 'when' => 'if', | ||||
| 12325 | 'default' => 'if', | ||||
| 12326 | 'case' => 'if', | ||||
| 12327 | 'sort' => 'map', | ||||
| 12328 | 'grep' => 'map', | ||||
| 12329 | ); | ||||
| 12330 | |||||
| 12331 | # map certain keywords to the same 'if' class to align | ||||
| 12332 | # long if/elsif sequences. [elsif.pl] | ||||
| 12333 | 1 | 9µs | %keyword_map = ( | ||
| 12334 | 'unless' => 'if', | ||||
| 12335 | 'else' => 'if', | ||||
| 12336 | 'elsif' => 'if', | ||||
| 12337 | 'when' => 'given', | ||||
| 12338 | 'default' => 'given', | ||||
| 12339 | 'case' => 'switch', | ||||
| 12340 | |||||
| 12341 | # treat an 'undef' similar to numbers and quotes | ||||
| 12342 | 'undef' => 'Q', | ||||
| 12343 | ); | ||||
| 12344 | 1 | 1.02ms | 1 | 10µs | } # spent 10µs making 1 call to Perl::Tidy::Formatter::BEGIN@12316 |
| 12345 | |||||
| 12346 | sub make_alignment_patterns { | ||||
| 12347 | |||||
| 12348 | # Here we do some important preliminary work for the | ||||
| 12349 | # vertical aligner. We create three arrays for one | ||||
| 12350 | # output line. These arrays contain strings that can | ||||
| 12351 | # be tested by the vertical aligner to see if | ||||
| 12352 | # consecutive lines can be aligned vertically. | ||||
| 12353 | # | ||||
| 12354 | # The three arrays are indexed on the vertical | ||||
| 12355 | # alignment fields and are: | ||||
| 12356 | # @tokens - a list of any vertical alignment tokens for this line. | ||||
| 12357 | # These are tokens, such as '=' '&&' '#' etc which | ||||
| 12358 | # we want to might align vertically. These are | ||||
| 12359 | # decorated with various information such as | ||||
| 12360 | # nesting depth to prevent unwanted vertical | ||||
| 12361 | # alignment matches. | ||||
| 12362 | # @fields - the actual text of the line between the vertical alignment | ||||
| 12363 | # tokens. | ||||
| 12364 | # @patterns - a modified list of token types, one for each alignment | ||||
| 12365 | # field. These should normally each match before alignment is | ||||
| 12366 | # allowed, even when the alignment tokens match. | ||||
| 12367 | my ( $ibeg, $iend ) = @_; | ||||
| 12368 | my @tokens = (); | ||||
| 12369 | my @fields = (); | ||||
| 12370 | my @patterns = (); | ||||
| 12371 | my $i_start = $ibeg; | ||||
| 12372 | my $i; | ||||
| 12373 | |||||
| 12374 | my $depth = 0; | ||||
| 12375 | my @container_name = (""); | ||||
| 12376 | my @multiple_comma_arrows = (undef); | ||||
| 12377 | |||||
| 12378 | my $j = 0; # field index | ||||
| 12379 | |||||
| 12380 | $patterns[0] = ""; | ||||
| 12381 | for $i ( $ibeg .. $iend ) { | ||||
| 12382 | |||||
| 12383 | # Keep track of containers balanced on this line only. | ||||
| 12384 | # These are used below to prevent unwanted cross-line alignments. | ||||
| 12385 | # Unbalanced containers already avoid aligning across | ||||
| 12386 | # container boundaries. | ||||
| 12387 | if ( $tokens_to_go[$i] eq '(' ) { | ||||
| 12388 | |||||
| 12389 | # if container is balanced on this line... | ||||
| 12390 | my $i_mate = $mate_index_to_go[$i]; | ||||
| 12391 | if ( $i_mate > $i && $i_mate <= $iend ) { | ||||
| 12392 | $depth++; | ||||
| 12393 | my $seqno = $type_sequence_to_go[$i]; | ||||
| 12394 | my $count = comma_arrow_count($seqno); | ||||
| 12395 | $multiple_comma_arrows[$depth] = $count && $count > 1; | ||||
| 12396 | |||||
| 12397 | # Append the previous token name to make the container name | ||||
| 12398 | # more unique. This name will also be given to any commas | ||||
| 12399 | # within this container, and it helps avoid undesirable | ||||
| 12400 | # alignments of different types of containers. | ||||
| 12401 | my $name = previous_nonblank_token($i); | ||||
| 12402 | $name =~ s/^->//; | ||||
| 12403 | $container_name[$depth] = "+" . $name; | ||||
| 12404 | |||||
| 12405 | # Make the container name even more unique if necessary. | ||||
| 12406 | # If we are not vertically aligning this opening paren, | ||||
| 12407 | # append a character count to avoid bad alignment because | ||||
| 12408 | # it usually looks bad to align commas within containers | ||||
| 12409 | # for which the opening parens do not align. Here | ||||
| 12410 | # is an example very BAD alignment of commas (because | ||||
| 12411 | # the atan2 functions are not all aligned): | ||||
| 12412 | # $XY = | ||||
| 12413 | # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + | ||||
| 12414 | # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - | ||||
| 12415 | # $X * atan2( $X, 1 ) - | ||||
| 12416 | # $Y * atan2( $Y, 1 ); | ||||
| 12417 | # | ||||
| 12418 | # On the other hand, it is usually okay to align commas if | ||||
| 12419 | # opening parens align, such as: | ||||
| 12420 | # glVertex3d( $cx + $s * $xs, $cy, $z ); | ||||
| 12421 | # glVertex3d( $cx, $cy + $s * $ys, $z ); | ||||
| 12422 | # glVertex3d( $cx - $s * $xs, $cy, $z ); | ||||
| 12423 | # glVertex3d( $cx, $cy - $s * $ys, $z ); | ||||
| 12424 | # | ||||
| 12425 | # To distinguish between these situations, we will | ||||
| 12426 | # append the length of the line from the previous matching | ||||
| 12427 | # token, or beginning of line, to the function name. This | ||||
| 12428 | # will allow the vertical aligner to reject undesirable | ||||
| 12429 | # matches. | ||||
| 12430 | |||||
| 12431 | # if we are not aligning on this paren... | ||||
| 12432 | if ( $matching_token_to_go[$i] eq '' ) { | ||||
| 12433 | |||||
| 12434 | # Sum length from previous alignment, or start of line. | ||||
| 12435 | my $len = | ||||
| 12436 | ( $i_start == $ibeg ) | ||||
| 12437 | ? total_line_length( $i_start, $i - 1 ) | ||||
| 12438 | : token_sequence_length( $i_start, $i - 1 ); | ||||
| 12439 | |||||
| 12440 | # tack length onto the container name to make unique | ||||
| 12441 | $container_name[$depth] .= "-" . $len; | ||||
| 12442 | } | ||||
| 12443 | } | ||||
| 12444 | } | ||||
| 12445 | elsif ( $tokens_to_go[$i] eq ')' ) { | ||||
| 12446 | $depth-- if $depth > 0; | ||||
| 12447 | } | ||||
| 12448 | |||||
| 12449 | # if we find a new synchronization token, we are done with | ||||
| 12450 | # a field | ||||
| 12451 | if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { | ||||
| 12452 | |||||
| 12453 | my $tok = my $raw_tok = $matching_token_to_go[$i]; | ||||
| 12454 | |||||
| 12455 | # make separators in different nesting depths unique | ||||
| 12456 | # by appending the nesting depth digit. | ||||
| 12457 | if ( $raw_tok ne '#' ) { | ||||
| 12458 | $tok .= "$nesting_depth_to_go[$i]"; | ||||
| 12459 | } | ||||
| 12460 | |||||
| 12461 | # also decorate commas with any container name to avoid | ||||
| 12462 | # unwanted cross-line alignments. | ||||
| 12463 | if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { | ||||
| 12464 | if ( $container_name[$depth] ) { | ||||
| 12465 | $tok .= $container_name[$depth]; | ||||
| 12466 | } | ||||
| 12467 | } | ||||
| 12468 | |||||
| 12469 | # Patch to avoid aligning leading and trailing if, unless. | ||||
| 12470 | # Mark trailing if, unless statements with container names. | ||||
| 12471 | # This makes them different from leading if, unless which | ||||
| 12472 | # are not so marked at present. If we ever need to name | ||||
| 12473 | # them too, we could use ci to distinguish them. | ||||
| 12474 | # Example problem to avoid: | ||||
| 12475 | # return ( 2, "DBERROR" ) | ||||
| 12476 | # if ( $retval == 2 ); | ||||
| 12477 | # if ( scalar @_ ) { | ||||
| 12478 | # my ( $a, $b, $c, $d, $e, $f ) = @_; | ||||
| 12479 | # } | ||||
| 12480 | if ( $raw_tok eq '(' ) { | ||||
| 12481 | my $ci = $ci_levels_to_go[$ibeg]; | ||||
| 12482 | if ( $container_name[$depth] =~ /^\+(if|unless)/ | ||||
| 12483 | && $ci ) | ||||
| 12484 | { | ||||
| 12485 | $tok .= $container_name[$depth]; | ||||
| 12486 | } | ||||
| 12487 | } | ||||
| 12488 | |||||
| 12489 | # Decorate block braces with block types to avoid | ||||
| 12490 | # unwanted alignments such as the following: | ||||
| 12491 | # foreach ( @{$routput_array} ) { $fh->print($_) } | ||||
| 12492 | # eval { $fh->close() }; | ||||
| 12493 | if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { | ||||
| 12494 | my $block_type = $block_type_to_go[$i]; | ||||
| 12495 | |||||
| 12496 | # map certain related block types to allow | ||||
| 12497 | # else blocks to align | ||||
| 12498 | $block_type = $block_type_map{$block_type} | ||||
| 12499 | if ( defined( $block_type_map{$block_type} ) ); | ||||
| 12500 | |||||
| 12501 | # remove sub names to allow one-line sub braces to align | ||||
| 12502 | # regardless of name | ||||
| 12503 | if ( $block_type =~ /^sub / ) { $block_type = 'sub' } | ||||
| 12504 | |||||
| 12505 | # allow all control-type blocks to align | ||||
| 12506 | if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } | ||||
| 12507 | |||||
| 12508 | $tok .= $block_type; | ||||
| 12509 | } | ||||
| 12510 | |||||
| 12511 | # concatenate the text of the consecutive tokens to form | ||||
| 12512 | # the field | ||||
| 12513 | push( @fields, | ||||
| 12514 | join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); | ||||
| 12515 | |||||
| 12516 | # store the alignment token for this field | ||||
| 12517 | push( @tokens, $tok ); | ||||
| 12518 | |||||
| 12519 | # get ready for the next batch | ||||
| 12520 | $i_start = $i; | ||||
| 12521 | $j++; | ||||
| 12522 | $patterns[$j] = ""; | ||||
| 12523 | } | ||||
| 12524 | |||||
| 12525 | # continue accumulating tokens | ||||
| 12526 | # handle non-keywords.. | ||||
| 12527 | if ( $types_to_go[$i] ne 'k' ) { | ||||
| 12528 | my $type = $types_to_go[$i]; | ||||
| 12529 | |||||
| 12530 | # Mark most things before arrows as a quote to | ||||
| 12531 | # get them to line up. Testfile: mixed.pl. | ||||
| 12532 | if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { | ||||
| 12533 | my $next_type = $types_to_go[ $i + 1 ]; | ||||
| 12534 | my $i_next_nonblank = | ||||
| 12535 | ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | ||||
| 12536 | |||||
| 12537 | if ( $types_to_go[$i_next_nonblank] eq '=>' ) { | ||||
| 12538 | $type = 'Q'; | ||||
| 12539 | |||||
| 12540 | # Patch to ignore leading minus before words, | ||||
| 12541 | # by changing pattern 'mQ' into just 'Q', | ||||
| 12542 | # so that we can align things like this: | ||||
| 12543 | # Button => "Print letter \"~$_\"", | ||||
| 12544 | # -command => [ sub { print "$_[0]\n" }, $_ ], | ||||
| 12545 | if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } | ||||
| 12546 | } | ||||
| 12547 | } | ||||
| 12548 | |||||
| 12549 | # patch to make numbers and quotes align | ||||
| 12550 | if ( $type eq 'n' ) { $type = 'Q' } | ||||
| 12551 | |||||
| 12552 | # patch to ignore any ! in patterns | ||||
| 12553 | if ( $type eq '!' ) { $type = '' } | ||||
| 12554 | |||||
| 12555 | $patterns[$j] .= $type; | ||||
| 12556 | } | ||||
| 12557 | |||||
| 12558 | # for keywords we have to use the actual text | ||||
| 12559 | else { | ||||
| 12560 | |||||
| 12561 | my $tok = $tokens_to_go[$i]; | ||||
| 12562 | |||||
| 12563 | # but map certain keywords to a common string to allow | ||||
| 12564 | # alignment. | ||||
| 12565 | $tok = $keyword_map{$tok} | ||||
| 12566 | if ( defined( $keyword_map{$tok} ) ); | ||||
| 12567 | $patterns[$j] .= $tok; | ||||
| 12568 | } | ||||
| 12569 | } | ||||
| 12570 | |||||
| 12571 | # done with this line .. join text of tokens to make the last field | ||||
| 12572 | push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); | ||||
| 12573 | return ( \@tokens, \@fields, \@patterns ); | ||||
| 12574 | } | ||||
| 12575 | |||||
| 12576 | } # end make_alignment_patterns | ||||
| 12577 | |||||
| 12578 | { # begin unmatched_indexes | ||||
| 12579 | |||||
| 12580 | # closure to keep track of unbalanced containers. | ||||
| 12581 | # arrays shared by the routines in this block: | ||||
| 12582 | 2 | 0s | my @unmatched_opening_indexes_in_this_batch; | ||
| 12583 | 1 | 0s | my @unmatched_closing_indexes_in_this_batch; | ||
| 12584 | 1 | 200ns | my %comma_arrow_count; | ||
| 12585 | |||||
| 12586 | sub is_unbalanced_batch { | ||||
| 12587 | @unmatched_opening_indexes_in_this_batch + | ||||
| 12588 | @unmatched_closing_indexes_in_this_batch; | ||||
| 12589 | } | ||||
| 12590 | |||||
| 12591 | sub comma_arrow_count { | ||||
| 12592 | my $seqno = $_[0]; | ||||
| 12593 | return $comma_arrow_count{$seqno}; | ||||
| 12594 | } | ||||
| 12595 | |||||
| 12596 | sub match_opening_and_closing_tokens { | ||||
| 12597 | |||||
| 12598 | # Match up indexes of opening and closing braces, etc, in this batch. | ||||
| 12599 | # This has to be done after all tokens are stored because unstoring | ||||
| 12600 | # of tokens would otherwise cause trouble. | ||||
| 12601 | |||||
| 12602 | @unmatched_opening_indexes_in_this_batch = (); | ||||
| 12603 | @unmatched_closing_indexes_in_this_batch = (); | ||||
| 12604 | %comma_arrow_count = (); | ||||
| 12605 | my $comma_arrow_count_contained = 0; | ||||
| 12606 | |||||
| 12607 | my ( $i, $i_mate, $token ); | ||||
| 12608 | foreach $i ( 0 .. $max_index_to_go ) { | ||||
| 12609 | if ( $type_sequence_to_go[$i] ) { | ||||
| 12610 | $token = $tokens_to_go[$i]; | ||||
| 12611 | if ( $token =~ /^[\(\[\{\?]$/ ) { | ||||
| 12612 | push @unmatched_opening_indexes_in_this_batch, $i; | ||||
| 12613 | } | ||||
| 12614 | elsif ( $token =~ /^[\)\]\}\:]$/ ) { | ||||
| 12615 | |||||
| 12616 | $i_mate = pop @unmatched_opening_indexes_in_this_batch; | ||||
| 12617 | if ( defined($i_mate) && $i_mate >= 0 ) { | ||||
| 12618 | if ( $type_sequence_to_go[$i_mate] == | ||||
| 12619 | $type_sequence_to_go[$i] ) | ||||
| 12620 | { | ||||
| 12621 | $mate_index_to_go[$i] = $i_mate; | ||||
| 12622 | $mate_index_to_go[$i_mate] = $i; | ||||
| 12623 | my $seqno = $type_sequence_to_go[$i]; | ||||
| 12624 | if ( $comma_arrow_count{$seqno} ) { | ||||
| 12625 | $comma_arrow_count_contained += | ||||
| 12626 | $comma_arrow_count{$seqno}; | ||||
| 12627 | } | ||||
| 12628 | } | ||||
| 12629 | else { | ||||
| 12630 | push @unmatched_opening_indexes_in_this_batch, | ||||
| 12631 | $i_mate; | ||||
| 12632 | push @unmatched_closing_indexes_in_this_batch, $i; | ||||
| 12633 | } | ||||
| 12634 | } | ||||
| 12635 | else { | ||||
| 12636 | push @unmatched_closing_indexes_in_this_batch, $i; | ||||
| 12637 | } | ||||
| 12638 | } | ||||
| 12639 | } | ||||
| 12640 | elsif ( $tokens_to_go[$i] eq '=>' ) { | ||||
| 12641 | if (@unmatched_opening_indexes_in_this_batch) { | ||||
| 12642 | my $j = $unmatched_opening_indexes_in_this_batch[-1]; | ||||
| 12643 | my $seqno = $type_sequence_to_go[$j]; | ||||
| 12644 | $comma_arrow_count{$seqno}++; | ||||
| 12645 | } | ||||
| 12646 | } | ||||
| 12647 | } | ||||
| 12648 | return $comma_arrow_count_contained; | ||||
| 12649 | } | ||||
| 12650 | |||||
| 12651 | sub save_opening_indentation { | ||||
| 12652 | |||||
| 12653 | # This should be called after each batch of tokens is output. It | ||||
| 12654 | # saves indentations of lines of all unmatched opening tokens. | ||||
| 12655 | # These will be used by sub get_opening_indentation. | ||||
| 12656 | |||||
| 12657 | my ( $ri_first, $ri_last, $rindentation_list ) = @_; | ||||
| 12658 | |||||
| 12659 | # we no longer need indentations of any saved indentations which | ||||
| 12660 | # are unmatched closing tokens in this batch, because we will | ||||
| 12661 | # never encounter them again. So we can delete them to keep | ||||
| 12662 | # the hash size down. | ||||
| 12663 | foreach (@unmatched_closing_indexes_in_this_batch) { | ||||
| 12664 | my $seqno = $type_sequence_to_go[$_]; | ||||
| 12665 | delete $saved_opening_indentation{$seqno}; | ||||
| 12666 | } | ||||
| 12667 | |||||
| 12668 | # we need to save indentations of any unmatched opening tokens | ||||
| 12669 | # in this batch because we may need them in a subsequent batch. | ||||
| 12670 | foreach (@unmatched_opening_indexes_in_this_batch) { | ||||
| 12671 | my $seqno = $type_sequence_to_go[$_]; | ||||
| 12672 | $saved_opening_indentation{$seqno} = [ | ||||
| 12673 | lookup_opening_indentation( | ||||
| 12674 | $_, $ri_first, $ri_last, $rindentation_list | ||||
| 12675 | ) | ||||
| 12676 | ]; | ||||
| 12677 | } | ||||
| 12678 | } | ||||
| 12679 | } # end unmatched_indexes | ||||
| 12680 | |||||
| 12681 | sub get_opening_indentation { | ||||
| 12682 | |||||
| 12683 | # get the indentation of the line which output the opening token | ||||
| 12684 | # corresponding to a given closing token in the current output batch. | ||||
| 12685 | # | ||||
| 12686 | # given: | ||||
| 12687 | # $i_closing - index in this line of a closing token ')' '}' or ']' | ||||
| 12688 | # | ||||
| 12689 | # $ri_first - reference to list of the first index $i for each output | ||||
| 12690 | # line in this batch | ||||
| 12691 | # $ri_last - reference to list of the last index $i for each output line | ||||
| 12692 | # in this batch | ||||
| 12693 | # $rindentation_list - reference to a list containing the indentation | ||||
| 12694 | # used for each line. | ||||
| 12695 | # | ||||
| 12696 | # return: | ||||
| 12697 | # -the indentation of the line which contained the opening token | ||||
| 12698 | # which matches the token at index $i_opening | ||||
| 12699 | # -and its offset (number of columns) from the start of the line | ||||
| 12700 | # | ||||
| 12701 | my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; | ||||
| 12702 | |||||
| 12703 | # first, see if the opening token is in the current batch | ||||
| 12704 | my $i_opening = $mate_index_to_go[$i_closing]; | ||||
| 12705 | my ( $indent, $offset, $is_leading, $exists ); | ||||
| 12706 | $exists = 1; | ||||
| 12707 | if ( $i_opening >= 0 ) { | ||||
| 12708 | |||||
| 12709 | # it is..look up the indentation | ||||
| 12710 | ( $indent, $offset, $is_leading ) = | ||||
| 12711 | lookup_opening_indentation( $i_opening, $ri_first, $ri_last, | ||||
| 12712 | $rindentation_list ); | ||||
| 12713 | } | ||||
| 12714 | |||||
| 12715 | # if not, it should have been stored in the hash by a previous batch | ||||
| 12716 | else { | ||||
| 12717 | my $seqno = $type_sequence_to_go[$i_closing]; | ||||
| 12718 | if ($seqno) { | ||||
| 12719 | if ( $saved_opening_indentation{$seqno} ) { | ||||
| 12720 | ( $indent, $offset, $is_leading ) = | ||||
| 12721 | @{ $saved_opening_indentation{$seqno} }; | ||||
| 12722 | } | ||||
| 12723 | |||||
| 12724 | # some kind of serious error | ||||
| 12725 | # (example is badfile.t) | ||||
| 12726 | else { | ||||
| 12727 | $indent = 0; | ||||
| 12728 | $offset = 0; | ||||
| 12729 | $is_leading = 0; | ||||
| 12730 | $exists = 0; | ||||
| 12731 | } | ||||
| 12732 | } | ||||
| 12733 | |||||
| 12734 | # if no sequence number it must be an unbalanced container | ||||
| 12735 | else { | ||||
| 12736 | $indent = 0; | ||||
| 12737 | $offset = 0; | ||||
| 12738 | $is_leading = 0; | ||||
| 12739 | $exists = 0; | ||||
| 12740 | } | ||||
| 12741 | } | ||||
| 12742 | return ( $indent, $offset, $is_leading, $exists ); | ||||
| 12743 | } | ||||
| 12744 | |||||
| 12745 | sub lookup_opening_indentation { | ||||
| 12746 | |||||
| 12747 | # get the indentation of the line in the current output batch | ||||
| 12748 | # which output a selected opening token | ||||
| 12749 | # | ||||
| 12750 | # given: | ||||
| 12751 | # $i_opening - index of an opening token in the current output batch | ||||
| 12752 | # whose line indentation we need | ||||
| 12753 | # $ri_first - reference to list of the first index $i for each output | ||||
| 12754 | # line in this batch | ||||
| 12755 | # $ri_last - reference to list of the last index $i for each output line | ||||
| 12756 | # in this batch | ||||
| 12757 | # $rindentation_list - reference to a list containing the indentation | ||||
| 12758 | # used for each line. (NOTE: the first slot in | ||||
| 12759 | # this list is the last returned line number, and this is | ||||
| 12760 | # followed by the list of indentations). | ||||
| 12761 | # | ||||
| 12762 | # return | ||||
| 12763 | # -the indentation of the line which contained token $i_opening | ||||
| 12764 | # -and its offset (number of columns) from the start of the line | ||||
| 12765 | |||||
| 12766 | my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; | ||||
| 12767 | |||||
| 12768 | my $nline = $rindentation_list->[0]; # line number of previous lookup | ||||
| 12769 | |||||
| 12770 | # reset line location if necessary | ||||
| 12771 | $nline = 0 if ( $i_opening < $ri_start->[$nline] ); | ||||
| 12772 | |||||
| 12773 | # find the correct line | ||||
| 12774 | unless ( $i_opening > $ri_last->[-1] ) { | ||||
| 12775 | while ( $i_opening > $ri_last->[$nline] ) { $nline++; } | ||||
| 12776 | } | ||||
| 12777 | |||||
| 12778 | # error - token index is out of bounds - shouldn't happen | ||||
| 12779 | else { | ||||
| 12780 | warning( | ||||
| 12781 | "non-fatal program bug in lookup_opening_indentation - index out of range\n" | ||||
| 12782 | ); | ||||
| 12783 | report_definite_bug(); | ||||
| 12784 | $nline = $#{$ri_last}; | ||||
| 12785 | } | ||||
| 12786 | |||||
| 12787 | $rindentation_list->[0] = | ||||
| 12788 | $nline; # save line number to start looking next call | ||||
| 12789 | my $ibeg = $ri_start->[$nline]; | ||||
| 12790 | my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; | ||||
| 12791 | my $is_leading = ( $ibeg == $i_opening ); | ||||
| 12792 | return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); | ||||
| 12793 | } | ||||
| 12794 | |||||
| 12795 | { | ||||
| 12796 | 2 | 100ns | my %is_if_elsif_else_unless_while_until_for_foreach; | ||
| 12797 | |||||
| 12798 | # spent 10µs within Perl::Tidy::Formatter::BEGIN@12798 which was called:
# once (10µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 12807 | ||||
| 12799 | |||||
| 12800 | # These block types may have text between the keyword and opening | ||||
| 12801 | # curly. Note: 'else' does not, but must be included to allow trailing | ||||
| 12802 | # if/elsif text to be appended. | ||||
| 12803 | # patch for SWITCH/CASE: added 'case' and 'when' | ||||
| 12804 | 1 | 2µs | @_ = qw(if elsif else unless while until for foreach case when); | ||
| 12805 | 1 | 10µs | @is_if_elsif_else_unless_while_until_for_foreach{@_} = | ||
| 12806 | (1) x scalar(@_); | ||||
| 12807 | 1 | 1.53ms | 1 | 10µs | } # spent 10µs making 1 call to Perl::Tidy::Formatter::BEGIN@12798 |
| 12808 | |||||
| 12809 | sub set_adjusted_indentation { | ||||
| 12810 | |||||
| 12811 | # This routine has the final say regarding the actual indentation of | ||||
| 12812 | # a line. It starts with the basic indentation which has been | ||||
| 12813 | # defined for the leading token, and then takes into account any | ||||
| 12814 | # options that the user has set regarding special indenting and | ||||
| 12815 | # outdenting. | ||||
| 12816 | |||||
| 12817 | my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, | ||||
| 12818 | $rindentation_list, $level_jump ) | ||||
| 12819 | = @_; | ||||
| 12820 | |||||
| 12821 | # we need to know the last token of this line | ||||
| 12822 | my ( $terminal_type, $i_terminal ) = | ||||
| 12823 | terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); | ||||
| 12824 | |||||
| 12825 | my $is_outdented_line = 0; | ||||
| 12826 | |||||
| 12827 | my $is_semicolon_terminated = $terminal_type eq ';' | ||||
| 12828 | && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; | ||||
| 12829 | |||||
| 12830 | ########################################################## | ||||
| 12831 | # Section 1: set a flag and a default indentation | ||||
| 12832 | # | ||||
| 12833 | # Most lines are indented according to the initial token. | ||||
| 12834 | # But it is common to outdent to the level just after the | ||||
| 12835 | # terminal token in certain cases... | ||||
| 12836 | # adjust_indentation flag: | ||||
| 12837 | # 0 - do not adjust | ||||
| 12838 | # 1 - outdent | ||||
| 12839 | # 2 - vertically align with opening token | ||||
| 12840 | # 3 - indent | ||||
| 12841 | ########################################################## | ||||
| 12842 | my $adjust_indentation = 0; | ||||
| 12843 | my $default_adjust_indentation = $adjust_indentation; | ||||
| 12844 | |||||
| 12845 | my ( | ||||
| 12846 | $opening_indentation, $opening_offset, | ||||
| 12847 | $is_leading, $opening_exists | ||||
| 12848 | ); | ||||
| 12849 | |||||
| 12850 | # if we are at a closing token of some type.. | ||||
| 12851 | if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) { | ||||
| 12852 | |||||
| 12853 | # get the indentation of the line containing the corresponding | ||||
| 12854 | # opening token | ||||
| 12855 | ( | ||||
| 12856 | $opening_indentation, $opening_offset, | ||||
| 12857 | $is_leading, $opening_exists | ||||
| 12858 | ) | ||||
| 12859 | = get_opening_indentation( $ibeg, $ri_first, $ri_last, | ||||
| 12860 | $rindentation_list ); | ||||
| 12861 | |||||
| 12862 | # First set the default behavior: | ||||
| 12863 | if ( | ||||
| 12864 | |||||
| 12865 | # default behavior is to outdent closing lines | ||||
| 12866 | # of the form: "); }; ]; )->xxx;" | ||||
| 12867 | $is_semicolon_terminated | ||||
| 12868 | |||||
| 12869 | # and 'cuddled parens' of the form: ")->pack(" | ||||
| 12870 | || ( | ||||
| 12871 | $terminal_type eq '(' | ||||
| 12872 | && $types_to_go[$ibeg] eq ')' | ||||
| 12873 | && ( $nesting_depth_to_go[$iend] + 1 == | ||||
| 12874 | $nesting_depth_to_go[$ibeg] ) | ||||
| 12875 | ) | ||||
| 12876 | |||||
| 12877 | # and when the next line is at a lower indentation level | ||||
| 12878 | # PATCH: and only if the style allows undoing continuation | ||||
| 12879 | # for all closing token types. We should really wait until | ||||
| 12880 | # the indentation of the next line is known and then make | ||||
| 12881 | # a decision, but that would require another pass. | ||||
| 12882 | || ( $level_jump < 0 && !$some_closing_token_indentation ) | ||||
| 12883 | ) | ||||
| 12884 | { | ||||
| 12885 | $adjust_indentation = 1; | ||||
| 12886 | } | ||||
| 12887 | |||||
| 12888 | # outdent something like '),' | ||||
| 12889 | if ( | ||||
| 12890 | $terminal_type eq ',' | ||||
| 12891 | |||||
| 12892 | # allow just one character before the comma | ||||
| 12893 | && $i_terminal == $ibeg + 1 | ||||
| 12894 | |||||
| 12895 | # require LIST environment; otherwise, we may outdent too much - | ||||
| 12896 | # this can happen in calls without parentheses (overload.t); | ||||
| 12897 | && $container_environment_to_go[$i_terminal] eq 'LIST' | ||||
| 12898 | ) | ||||
| 12899 | { | ||||
| 12900 | $adjust_indentation = 1; | ||||
| 12901 | } | ||||
| 12902 | |||||
| 12903 | # undo continuation indentation of a terminal closing token if | ||||
| 12904 | # it is the last token before a level decrease. This will allow | ||||
| 12905 | # a closing token to line up with its opening counterpart, and | ||||
| 12906 | # avoids a indentation jump larger than 1 level. | ||||
| 12907 | if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ | ||||
| 12908 | && $i_terminal == $ibeg ) | ||||
| 12909 | { | ||||
| 12910 | my $ci = $ci_levels_to_go[$ibeg]; | ||||
| 12911 | my $lev = $levels_to_go[$ibeg]; | ||||
| 12912 | my $next_type = $types_to_go[ $ibeg + 1 ]; | ||||
| 12913 | my $i_next_nonblank = | ||||
| 12914 | ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); | ||||
| 12915 | if ( $i_next_nonblank <= $max_index_to_go | ||||
| 12916 | && $levels_to_go[$i_next_nonblank] < $lev ) | ||||
| 12917 | { | ||||
| 12918 | $adjust_indentation = 1; | ||||
| 12919 | } | ||||
| 12920 | } | ||||
| 12921 | |||||
| 12922 | # YVES patch 1 of 2: | ||||
| 12923 | # Undo ci of line with leading closing eval brace, | ||||
| 12924 | # but not beyond the indention of the line with | ||||
| 12925 | # the opening brace. | ||||
| 12926 | if ( $block_type_to_go[$ibeg] eq 'eval' | ||||
| 12927 | && !$rOpts->{'line-up-parentheses'} | ||||
| 12928 | && !$rOpts->{'indent-closing-brace'} ) | ||||
| 12929 | { | ||||
| 12930 | ( | ||||
| 12931 | $opening_indentation, $opening_offset, | ||||
| 12932 | $is_leading, $opening_exists | ||||
| 12933 | ) | ||||
| 12934 | = get_opening_indentation( $ibeg, $ri_first, $ri_last, | ||||
| 12935 | $rindentation_list ); | ||||
| 12936 | my $indentation = $leading_spaces_to_go[$ibeg]; | ||||
| 12937 | if ( defined($opening_indentation) | ||||
| 12938 | && $indentation > $opening_indentation ) | ||||
| 12939 | { | ||||
| 12940 | $adjust_indentation = 1; | ||||
| 12941 | } | ||||
| 12942 | } | ||||
| 12943 | |||||
| 12944 | $default_adjust_indentation = $adjust_indentation; | ||||
| 12945 | |||||
| 12946 | # Now modify default behavior according to user request: | ||||
| 12947 | # handle option to indent non-blocks of the form ); }; ]; | ||||
| 12948 | # But don't do special indentation to something like ')->pack(' | ||||
| 12949 | if ( !$block_type_to_go[$ibeg] ) { | ||||
| 12950 | my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; | ||||
| 12951 | if ( $cti == 1 ) { | ||||
| 12952 | if ( $i_terminal <= $ibeg + 1 | ||||
| 12953 | || $is_semicolon_terminated ) | ||||
| 12954 | { | ||||
| 12955 | $adjust_indentation = 2; | ||||
| 12956 | } | ||||
| 12957 | else { | ||||
| 12958 | $adjust_indentation = 0; | ||||
| 12959 | } | ||||
| 12960 | } | ||||
| 12961 | elsif ( $cti == 2 ) { | ||||
| 12962 | if ($is_semicolon_terminated) { | ||||
| 12963 | $adjust_indentation = 3; | ||||
| 12964 | } | ||||
| 12965 | else { | ||||
| 12966 | $adjust_indentation = 0; | ||||
| 12967 | } | ||||
| 12968 | } | ||||
| 12969 | elsif ( $cti == 3 ) { | ||||
| 12970 | $adjust_indentation = 3; | ||||
| 12971 | } | ||||
| 12972 | } | ||||
| 12973 | |||||
| 12974 | # handle option to indent blocks | ||||
| 12975 | else { | ||||
| 12976 | if ( | ||||
| 12977 | $rOpts->{'indent-closing-brace'} | ||||
| 12978 | && ( | ||||
| 12979 | $i_terminal == $ibeg # isolated terminal '}' | ||||
| 12980 | || $is_semicolon_terminated | ||||
| 12981 | ) | ||||
| 12982 | ) # } xxxx ; | ||||
| 12983 | { | ||||
| 12984 | $adjust_indentation = 3; | ||||
| 12985 | } | ||||
| 12986 | } | ||||
| 12987 | } | ||||
| 12988 | |||||
| 12989 | # if at ');', '};', '>;', and '];' of a terminal qw quote | ||||
| 12990 | elsif ($$rpatterns[0] =~ /^qb*;$/ | ||||
| 12991 | && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) | ||||
| 12992 | { | ||||
| 12993 | if ( $closing_token_indentation{$1} == 0 ) { | ||||
| 12994 | $adjust_indentation = 1; | ||||
| 12995 | } | ||||
| 12996 | else { | ||||
| 12997 | $adjust_indentation = 3; | ||||
| 12998 | } | ||||
| 12999 | } | ||||
| 13000 | |||||
| 13001 | # if line begins with a ':', align it with any | ||||
| 13002 | # previous line leading with corresponding ? | ||||
| 13003 | elsif ( $types_to_go[$ibeg] eq ':' ) { | ||||
| 13004 | ( | ||||
| 13005 | $opening_indentation, $opening_offset, | ||||
| 13006 | $is_leading, $opening_exists | ||||
| 13007 | ) | ||||
| 13008 | = get_opening_indentation( $ibeg, $ri_first, $ri_last, | ||||
| 13009 | $rindentation_list ); | ||||
| 13010 | if ($is_leading) { $adjust_indentation = 2; } | ||||
| 13011 | } | ||||
| 13012 | |||||
| 13013 | ########################################################## | ||||
| 13014 | # Section 2: set indentation according to flag set above | ||||
| 13015 | # | ||||
| 13016 | # Select the indentation object to define leading | ||||
| 13017 | # whitespace. If we are outdenting something like '} } );' | ||||
| 13018 | # then we want to use one level below the last token | ||||
| 13019 | # ($i_terminal) in order to get it to fully outdent through | ||||
| 13020 | # all levels. | ||||
| 13021 | ########################################################## | ||||
| 13022 | my $indentation; | ||||
| 13023 | my $lev; | ||||
| 13024 | my $level_end = $levels_to_go[$iend]; | ||||
| 13025 | |||||
| 13026 | if ( $adjust_indentation == 0 ) { | ||||
| 13027 | $indentation = $leading_spaces_to_go[$ibeg]; | ||||
| 13028 | $lev = $levels_to_go[$ibeg]; | ||||
| 13029 | } | ||||
| 13030 | elsif ( $adjust_indentation == 1 ) { | ||||
| 13031 | $indentation = $reduced_spaces_to_go[$i_terminal]; | ||||
| 13032 | $lev = $levels_to_go[$i_terminal]; | ||||
| 13033 | } | ||||
| 13034 | |||||
| 13035 | # handle indented closing token which aligns with opening token | ||||
| 13036 | elsif ( $adjust_indentation == 2 ) { | ||||
| 13037 | |||||
| 13038 | # handle option to align closing token with opening token | ||||
| 13039 | $lev = $levels_to_go[$ibeg]; | ||||
| 13040 | |||||
| 13041 | # calculate spaces needed to align with opening token | ||||
| 13042 | my $space_count = | ||||
| 13043 | get_SPACES($opening_indentation) + $opening_offset; | ||||
| 13044 | |||||
| 13045 | # Indent less than the previous line. | ||||
| 13046 | # | ||||
| 13047 | # Problem: For -lp we don't exactly know what it was if there | ||||
| 13048 | # were recoverable spaces sent to the aligner. A good solution | ||||
| 13049 | # would be to force a flush of the vertical alignment buffer, so | ||||
| 13050 | # that we would know. For now, this rule is used for -lp: | ||||
| 13051 | # | ||||
| 13052 | # When the last line did not start with a closing token we will | ||||
| 13053 | # be optimistic that the aligner will recover everything wanted. | ||||
| 13054 | # | ||||
| 13055 | # This rule will prevent us from breaking a hierarchy of closing | ||||
| 13056 | # tokens, and in a worst case will leave a closing paren too far | ||||
| 13057 | # indented, but this is better than frequently leaving it not | ||||
| 13058 | # indented enough. | ||||
| 13059 | my $last_spaces = get_SPACES($last_indentation_written); | ||||
| 13060 | if ( $last_leading_token !~ /^[\}\]\)]$/ ) { | ||||
| 13061 | $last_spaces += | ||||
| 13062 | get_RECOVERABLE_SPACES($last_indentation_written); | ||||
| 13063 | } | ||||
| 13064 | |||||
| 13065 | # reset the indentation to the new space count if it works | ||||
| 13066 | # only options are all or none: nothing in-between looks good | ||||
| 13067 | $lev = $levels_to_go[$ibeg]; | ||||
| 13068 | if ( $space_count < $last_spaces ) { | ||||
| 13069 | if ($rOpts_line_up_parentheses) { | ||||
| 13070 | my $lev = $levels_to_go[$ibeg]; | ||||
| 13071 | $indentation = | ||||
| 13072 | new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); | ||||
| 13073 | } | ||||
| 13074 | else { | ||||
| 13075 | $indentation = $space_count; | ||||
| 13076 | } | ||||
| 13077 | } | ||||
| 13078 | |||||
| 13079 | # revert to default if it doesn't work | ||||
| 13080 | else { | ||||
| 13081 | $space_count = leading_spaces_to_go($ibeg); | ||||
| 13082 | if ( $default_adjust_indentation == 0 ) { | ||||
| 13083 | $indentation = $leading_spaces_to_go[$ibeg]; | ||||
| 13084 | } | ||||
| 13085 | elsif ( $default_adjust_indentation == 1 ) { | ||||
| 13086 | $indentation = $reduced_spaces_to_go[$i_terminal]; | ||||
| 13087 | $lev = $levels_to_go[$i_terminal]; | ||||
| 13088 | } | ||||
| 13089 | } | ||||
| 13090 | } | ||||
| 13091 | |||||
| 13092 | # Full indentaion of closing tokens (-icb and -icp or -cti=2) | ||||
| 13093 | else { | ||||
| 13094 | |||||
| 13095 | # handle -icb (indented closing code block braces) | ||||
| 13096 | # Updated method for indented block braces: indent one full level if | ||||
| 13097 | # there is no continuation indentation. This will occur for major | ||||
| 13098 | # structures such as sub, if, else, but not for things like map | ||||
| 13099 | # blocks. | ||||
| 13100 | # | ||||
| 13101 | # Note: only code blocks without continuation indentation are | ||||
| 13102 | # handled here (if, else, unless, ..). In the following snippet, | ||||
| 13103 | # the terminal brace of the sort block will have continuation | ||||
| 13104 | # indentation as shown so it will not be handled by the coding | ||||
| 13105 | # here. We would have to undo the continuation indentation to do | ||||
| 13106 | # this, but it probably looks ok as is. This is a possible future | ||||
| 13107 | # update for semicolon terminated lines. | ||||
| 13108 | # | ||||
| 13109 | # if ($sortby eq 'date' or $sortby eq 'size') { | ||||
| 13110 | # @files = sort { | ||||
| 13111 | # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} | ||||
| 13112 | # or $a cmp $b | ||||
| 13113 | # } @files; | ||||
| 13114 | # } | ||||
| 13115 | # | ||||
| 13116 | if ( $block_type_to_go[$ibeg] | ||||
| 13117 | && $ci_levels_to_go[$i_terminal] == 0 ) | ||||
| 13118 | { | ||||
| 13119 | my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); | ||||
| 13120 | $indentation = $spaces + $rOpts_indent_columns; | ||||
| 13121 | |||||
| 13122 | # NOTE: for -lp we could create a new indentation object, but | ||||
| 13123 | # there is probably no need to do it | ||||
| 13124 | } | ||||
| 13125 | |||||
| 13126 | # handle -icp and any -icb block braces which fall through above | ||||
| 13127 | # test such as the 'sort' block mentioned above. | ||||
| 13128 | else { | ||||
| 13129 | |||||
| 13130 | # There are currently two ways to handle -icp... | ||||
| 13131 | # One way is to use the indentation of the previous line: | ||||
| 13132 | # $indentation = $last_indentation_written; | ||||
| 13133 | |||||
| 13134 | # The other way is to use the indentation that the previous line | ||||
| 13135 | # would have had if it hadn't been adjusted: | ||||
| 13136 | $indentation = $last_unadjusted_indentation; | ||||
| 13137 | |||||
| 13138 | # Current method: use the minimum of the two. This avoids | ||||
| 13139 | # inconsistent indentation. | ||||
| 13140 | if ( get_SPACES($last_indentation_written) < | ||||
| 13141 | get_SPACES($indentation) ) | ||||
| 13142 | { | ||||
| 13143 | $indentation = $last_indentation_written; | ||||
| 13144 | } | ||||
| 13145 | } | ||||
| 13146 | |||||
| 13147 | # use previous indentation but use own level | ||||
| 13148 | # to cause list to be flushed properly | ||||
| 13149 | $lev = $levels_to_go[$ibeg]; | ||||
| 13150 | } | ||||
| 13151 | |||||
| 13152 | # remember indentation except for multi-line quotes, which get | ||||
| 13153 | # no indentation | ||||
| 13154 | unless ( $ibeg == 0 && $starting_in_quote ) { | ||||
| 13155 | $last_indentation_written = $indentation; | ||||
| 13156 | $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; | ||||
| 13157 | $last_leading_token = $tokens_to_go[$ibeg]; | ||||
| 13158 | } | ||||
| 13159 | |||||
| 13160 | # be sure lines with leading closing tokens are not outdented more | ||||
| 13161 | # than the line which contained the corresponding opening token. | ||||
| 13162 | |||||
| 13163 | ############################################################# | ||||
| 13164 | # updated per bug report in alex_bug.pl: we must not | ||||
| 13165 | # mess with the indentation of closing logical braces so | ||||
| 13166 | # we must treat something like '} else {' as if it were | ||||
| 13167 | # an isolated brace my $is_isolated_block_brace = ( | ||||
| 13168 | # $iend == $ibeg ) && $block_type_to_go[$ibeg]; | ||||
| 13169 | ############################################################# | ||||
| 13170 | my $is_isolated_block_brace = $block_type_to_go[$ibeg] | ||||
| 13171 | && ( $iend == $ibeg | ||||
| 13172 | || $is_if_elsif_else_unless_while_until_for_foreach{ | ||||
| 13173 | $block_type_to_go[$ibeg] | ||||
| 13174 | } ); | ||||
| 13175 | |||||
| 13176 | # only do this for a ':; which is aligned with its leading '?' | ||||
| 13177 | my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; | ||||
| 13178 | if ( defined($opening_indentation) | ||||
| 13179 | && !$is_isolated_block_brace | ||||
| 13180 | && !$is_unaligned_colon ) | ||||
| 13181 | { | ||||
| 13182 | if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { | ||||
| 13183 | $indentation = $opening_indentation; | ||||
| 13184 | } | ||||
| 13185 | } | ||||
| 13186 | |||||
| 13187 | # remember the indentation of each line of this batch | ||||
| 13188 | push @{$rindentation_list}, $indentation; | ||||
| 13189 | |||||
| 13190 | # outdent lines with certain leading tokens... | ||||
| 13191 | if ( | ||||
| 13192 | |||||
| 13193 | # must be first word of this batch | ||||
| 13194 | $ibeg == 0 | ||||
| 13195 | |||||
| 13196 | # and ... | ||||
| 13197 | && ( | ||||
| 13198 | |||||
| 13199 | # certain leading keywords if requested | ||||
| 13200 | ( | ||||
| 13201 | $rOpts->{'outdent-keywords'} | ||||
| 13202 | && $types_to_go[$ibeg] eq 'k' | ||||
| 13203 | && $outdent_keyword{ $tokens_to_go[$ibeg] } | ||||
| 13204 | ) | ||||
| 13205 | |||||
| 13206 | # or labels if requested | ||||
| 13207 | || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) | ||||
| 13208 | |||||
| 13209 | # or static block comments if requested | ||||
| 13210 | || ( $types_to_go[$ibeg] eq '#' | ||||
| 13211 | && $rOpts->{'outdent-static-block-comments'} | ||||
| 13212 | && $is_static_block_comment ) | ||||
| 13213 | ) | ||||
| 13214 | ) | ||||
| 13215 | |||||
| 13216 | { | ||||
| 13217 | my $space_count = leading_spaces_to_go($ibeg); | ||||
| 13218 | if ( $space_count > 0 ) { | ||||
| 13219 | $space_count -= $rOpts_continuation_indentation; | ||||
| 13220 | $is_outdented_line = 1; | ||||
| 13221 | if ( $space_count < 0 ) { $space_count = 0 } | ||||
| 13222 | |||||
| 13223 | # do not promote a spaced static block comment to non-spaced; | ||||
| 13224 | # this is not normally necessary but could be for some | ||||
| 13225 | # unusual user inputs (such as -ci = -i) | ||||
| 13226 | if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { | ||||
| 13227 | $space_count = 1; | ||||
| 13228 | } | ||||
| 13229 | |||||
| 13230 | if ($rOpts_line_up_parentheses) { | ||||
| 13231 | $indentation = | ||||
| 13232 | new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); | ||||
| 13233 | } | ||||
| 13234 | else { | ||||
| 13235 | $indentation = $space_count; | ||||
| 13236 | } | ||||
| 13237 | } | ||||
| 13238 | } | ||||
| 13239 | |||||
| 13240 | return ( $indentation, $lev, $level_end, $terminal_type, | ||||
| 13241 | $is_semicolon_terminated, $is_outdented_line ); | ||||
| 13242 | } | ||||
| 13243 | } | ||||
| 13244 | |||||
| 13245 | sub set_vertical_tightness_flags { | ||||
| 13246 | |||||
| 13247 | my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; | ||||
| 13248 | |||||
| 13249 | # Define vertical tightness controls for the nth line of a batch. | ||||
| 13250 | # We create an array of parameters which tell the vertical aligner | ||||
| 13251 | # if we should combine this line with the next line to achieve the | ||||
| 13252 | # desired vertical tightness. The array of parameters contains: | ||||
| 13253 | # | ||||
| 13254 | # [0] type: 1=opening non-block 2=closing non-block | ||||
| 13255 | # 3=opening block brace 4=closing block brace | ||||
| 13256 | # | ||||
| 13257 | # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok | ||||
| 13258 | # if closing: spaces of padding to use | ||||
| 13259 | # [2] sequence number of container | ||||
| 13260 | # [3] valid flag: do not append if this flag is false. Will be | ||||
| 13261 | # true if appropriate -vt flag is set. Otherwise, Will be | ||||
| 13262 | # made true only for 2 line container in parens with -lp | ||||
| 13263 | # | ||||
| 13264 | # These flags are used by sub set_leading_whitespace in | ||||
| 13265 | # the vertical aligner | ||||
| 13266 | |||||
| 13267 | my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; | ||||
| 13268 | |||||
| 13269 | #-------------------------------------------------------------- | ||||
| 13270 | # Vertical Tightness Flags Section 1: | ||||
| 13271 | # Handle Lines 1 .. n-1 but not the last line | ||||
| 13272 | # For non-BLOCK tokens, we will need to examine the next line | ||||
| 13273 | # too, so we won't consider the last line. | ||||
| 13274 | #-------------------------------------------------------------- | ||||
| 13275 | if ( $n < $n_last_line ) { | ||||
| 13276 | |||||
| 13277 | #-------------------------------------------------------------- | ||||
| 13278 | # Vertical Tightness Flags Section 1a: | ||||
| 13279 | # Look for Type 1, last token of this line is a non-block opening token | ||||
| 13280 | #-------------------------------------------------------------- | ||||
| 13281 | my $ibeg_next = $$ri_first[ $n + 1 ]; | ||||
| 13282 | my $token_end = $tokens_to_go[$iend]; | ||||
| 13283 | my $iend_next = $$ri_last[ $n + 1 ]; | ||||
| 13284 | if ( | ||||
| 13285 | $type_sequence_to_go[$iend] | ||||
| 13286 | && !$block_type_to_go[$iend] | ||||
| 13287 | && $is_opening_token{$token_end} | ||||
| 13288 | && ( | ||||
| 13289 | $opening_vertical_tightness{$token_end} > 0 | ||||
| 13290 | |||||
| 13291 | # allow 2-line method call to be closed up | ||||
| 13292 | || ( $rOpts_line_up_parentheses | ||||
| 13293 | && $token_end eq '(' | ||||
| 13294 | && $iend > $ibeg | ||||
| 13295 | && $types_to_go[ $iend - 1 ] ne 'b' ) | ||||
| 13296 | ) | ||||
| 13297 | ) | ||||
| 13298 | { | ||||
| 13299 | |||||
| 13300 | # avoid multiple jumps in nesting depth in one line if | ||||
| 13301 | # requested | ||||
| 13302 | my $ovt = $opening_vertical_tightness{$token_end}; | ||||
| 13303 | my $iend_next = $$ri_last[ $n + 1 ]; | ||||
| 13304 | unless ( | ||||
| 13305 | $ovt < 2 | ||||
| 13306 | && ( $nesting_depth_to_go[ $iend_next + 1 ] != | ||||
| 13307 | $nesting_depth_to_go[$ibeg_next] ) | ||||
| 13308 | ) | ||||
| 13309 | { | ||||
| 13310 | |||||
| 13311 | # If -vt flag has not been set, mark this as invalid | ||||
| 13312 | # and aligner will validate it if it sees the closing paren | ||||
| 13313 | # within 2 lines. | ||||
| 13314 | my $valid_flag = $ovt; | ||||
| 13315 | @{$rvertical_tightness_flags} = | ||||
| 13316 | ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); | ||||
| 13317 | } | ||||
| 13318 | } | ||||
| 13319 | |||||
| 13320 | #-------------------------------------------------------------- | ||||
| 13321 | # Vertical Tightness Flags Section 1b: | ||||
| 13322 | # Look for Type 2, first token of next line is a non-block closing | ||||
| 13323 | # token .. and be sure this line does not have a side comment | ||||
| 13324 | #-------------------------------------------------------------- | ||||
| 13325 | my $token_next = $tokens_to_go[$ibeg_next]; | ||||
| 13326 | if ( $type_sequence_to_go[$ibeg_next] | ||||
| 13327 | && !$block_type_to_go[$ibeg_next] | ||||
| 13328 | && $is_closing_token{$token_next} | ||||
| 13329 | && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! | ||||
| 13330 | { | ||||
| 13331 | my $ovt = $opening_vertical_tightness{$token_next}; | ||||
| 13332 | my $cvt = $closing_vertical_tightness{$token_next}; | ||||
| 13333 | if ( | ||||
| 13334 | |||||
| 13335 | # never append a trailing line like )->pack( | ||||
| 13336 | # because it will throw off later alignment | ||||
| 13337 | ( | ||||
| 13338 | $nesting_depth_to_go[$ibeg_next] == | ||||
| 13339 | $nesting_depth_to_go[ $iend_next + 1 ] + 1 | ||||
| 13340 | ) | ||||
| 13341 | && ( | ||||
| 13342 | $cvt == 2 | ||||
| 13343 | || ( | ||||
| 13344 | $container_environment_to_go[$ibeg_next] ne 'LIST' | ||||
| 13345 | && ( | ||||
| 13346 | $cvt == 1 | ||||
| 13347 | |||||
| 13348 | # allow closing up 2-line method calls | ||||
| 13349 | || ( $rOpts_line_up_parentheses | ||||
| 13350 | && $token_next eq ')' ) | ||||
| 13351 | ) | ||||
| 13352 | ) | ||||
| 13353 | ) | ||||
| 13354 | ) | ||||
| 13355 | { | ||||
| 13356 | |||||
| 13357 | # decide which trailing closing tokens to append.. | ||||
| 13358 | my $ok = 0; | ||||
| 13359 | if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } | ||||
| 13360 | else { | ||||
| 13361 | my $str = join( '', | ||||
| 13362 | @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); | ||||
| 13363 | |||||
| 13364 | # append closing token if followed by comment or ';' | ||||
| 13365 | if ( $str =~ /^b?[#;]/ ) { $ok = 1 } | ||||
| 13366 | } | ||||
| 13367 | |||||
| 13368 | if ($ok) { | ||||
| 13369 | my $valid_flag = $cvt; | ||||
| 13370 | @{$rvertical_tightness_flags} = ( | ||||
| 13371 | 2, | ||||
| 13372 | $tightness{$token_next} == 2 ? 0 : 1, | ||||
| 13373 | $type_sequence_to_go[$ibeg_next], $valid_flag, | ||||
| 13374 | ); | ||||
| 13375 | } | ||||
| 13376 | } | ||||
| 13377 | } | ||||
| 13378 | |||||
| 13379 | #-------------------------------------------------------------- | ||||
| 13380 | # Vertical Tightness Flags Section 1c: | ||||
| 13381 | # Implement the Opening Token Right flag (Type 2).. | ||||
| 13382 | # If requested, move an isolated trailing opening token to the end of | ||||
| 13383 | # the previous line which ended in a comma. We could do this | ||||
| 13384 | # in sub recombine_breakpoints but that would cause problems | ||||
| 13385 | # with -lp formatting. The problem is that indentation will | ||||
| 13386 | # quickly move far to the right in nested expressions. By | ||||
| 13387 | # doing it after indentation has been set, we avoid changes | ||||
| 13388 | # to the indentation. Actual movement of the token takes place | ||||
| 13389 | # in sub valign_output_step_B. | ||||
| 13390 | #-------------------------------------------------------------- | ||||
| 13391 | if ( | ||||
| 13392 | $opening_token_right{ $tokens_to_go[$ibeg_next] } | ||||
| 13393 | |||||
| 13394 | # previous line is not opening | ||||
| 13395 | # (use -sot to combine with it) | ||||
| 13396 | && !$is_opening_token{$token_end} | ||||
| 13397 | |||||
| 13398 | # previous line ended in one of these | ||||
| 13399 | # (add other cases if necessary; '=>' and '.' are not necessary | ||||
| 13400 | && !$block_type_to_go[$ibeg_next] | ||||
| 13401 | |||||
| 13402 | # this is a line with just an opening token | ||||
| 13403 | && ( $iend_next == $ibeg_next | ||||
| 13404 | || $iend_next == $ibeg_next + 2 | ||||
| 13405 | && $types_to_go[$iend_next] eq '#' ) | ||||
| 13406 | |||||
| 13407 | # looks bad if we align vertically with the wrong container | ||||
| 13408 | && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] | ||||
| 13409 | ) | ||||
| 13410 | { | ||||
| 13411 | my $valid_flag = 1; | ||||
| 13412 | my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; | ||||
| 13413 | @{$rvertical_tightness_flags} = | ||||
| 13414 | ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); | ||||
| 13415 | } | ||||
| 13416 | |||||
| 13417 | #-------------------------------------------------------------- | ||||
| 13418 | # Vertical Tightness Flags Section 1d: | ||||
| 13419 | # Stacking of opening and closing tokens (Type 2) | ||||
| 13420 | #-------------------------------------------------------------- | ||||
| 13421 | my $stackable; | ||||
| 13422 | my $token_beg_next = $tokens_to_go[$ibeg_next]; | ||||
| 13423 | |||||
| 13424 | # patch to make something like 'qw(' behave like an opening paren | ||||
| 13425 | # (aran.t) | ||||
| 13426 | if ( $types_to_go[$ibeg_next] eq 'q' ) { | ||||
| 13427 | if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { | ||||
| 13428 | $token_beg_next = $1; | ||||
| 13429 | } | ||||
| 13430 | } | ||||
| 13431 | |||||
| 13432 | if ( $is_closing_token{$token_end} | ||||
| 13433 | && $is_closing_token{$token_beg_next} ) | ||||
| 13434 | { | ||||
| 13435 | $stackable = $stack_closing_token{$token_beg_next} | ||||
| 13436 | unless ( $block_type_to_go[$ibeg_next] ) | ||||
| 13437 | ; # shouldn't happen; just checking | ||||
| 13438 | } | ||||
| 13439 | elsif ($is_opening_token{$token_end} | ||||
| 13440 | && $is_opening_token{$token_beg_next} ) | ||||
| 13441 | { | ||||
| 13442 | $stackable = $stack_opening_token{$token_beg_next} | ||||
| 13443 | unless ( $block_type_to_go[$ibeg_next] ) | ||||
| 13444 | ; # shouldn't happen; just checking | ||||
| 13445 | } | ||||
| 13446 | |||||
| 13447 | if ($stackable) { | ||||
| 13448 | |||||
| 13449 | my $is_semicolon_terminated; | ||||
| 13450 | if ( $n + 1 == $n_last_line ) { | ||||
| 13451 | my ( $terminal_type, $i_terminal ) = terminal_type( | ||||
| 13452 | \@types_to_go, \@block_type_to_go, | ||||
| 13453 | $ibeg_next, $iend_next | ||||
| 13454 | ); | ||||
| 13455 | $is_semicolon_terminated = $terminal_type eq ';' | ||||
| 13456 | && $nesting_depth_to_go[$iend_next] < | ||||
| 13457 | $nesting_depth_to_go[$ibeg_next]; | ||||
| 13458 | } | ||||
| 13459 | |||||
| 13460 | # this must be a line with just an opening token | ||||
| 13461 | # or end in a semicolon | ||||
| 13462 | if ( | ||||
| 13463 | $is_semicolon_terminated | ||||
| 13464 | || ( $iend_next == $ibeg_next | ||||
| 13465 | || $iend_next == $ibeg_next + 2 | ||||
| 13466 | && $types_to_go[$iend_next] eq '#' ) | ||||
| 13467 | ) | ||||
| 13468 | { | ||||
| 13469 | my $valid_flag = 1; | ||||
| 13470 | my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; | ||||
| 13471 | @{$rvertical_tightness_flags} = | ||||
| 13472 | ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, | ||||
| 13473 | ); | ||||
| 13474 | } | ||||
| 13475 | } | ||||
| 13476 | } | ||||
| 13477 | |||||
| 13478 | #-------------------------------------------------------------- | ||||
| 13479 | # Vertical Tightness Flags Section 2: | ||||
| 13480 | # Handle type 3, opening block braces on last line of the batch | ||||
| 13481 | # Check for a last line with isolated opening BLOCK curly | ||||
| 13482 | #-------------------------------------------------------------- | ||||
| 13483 | elsif ($rOpts_block_brace_vertical_tightness | ||||
| 13484 | && $ibeg eq $iend | ||||
| 13485 | && $types_to_go[$iend] eq '{' | ||||
| 13486 | && $block_type_to_go[$iend] =~ | ||||
| 13487 | /$block_brace_vertical_tightness_pattern/o ) | ||||
| 13488 | { | ||||
| 13489 | @{$rvertical_tightness_flags} = | ||||
| 13490 | ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); | ||||
| 13491 | } | ||||
| 13492 | |||||
| 13493 | #-------------------------------------------------------------- | ||||
| 13494 | # Vertical Tightness Flags Section 3: | ||||
| 13495 | # Handle type 4, a closing block brace on the last line of the batch Check | ||||
| 13496 | # for a last line with isolated closing BLOCK curly | ||||
| 13497 | #-------------------------------------------------------------- | ||||
| 13498 | elsif ($rOpts_stack_closing_block_brace | ||||
| 13499 | && $ibeg eq $iend | ||||
| 13500 | && $block_type_to_go[$iend] | ||||
| 13501 | && $types_to_go[$iend] eq '}' ) | ||||
| 13502 | { | ||||
| 13503 | my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; | ||||
| 13504 | @{$rvertical_tightness_flags} = | ||||
| 13505 | ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); | ||||
| 13506 | } | ||||
| 13507 | |||||
| 13508 | # pack in the sequence numbers of the ends of this line | ||||
| 13509 | $rvertical_tightness_flags->[4] = get_seqno($ibeg); | ||||
| 13510 | $rvertical_tightness_flags->[5] = get_seqno($iend); | ||||
| 13511 | return $rvertical_tightness_flags; | ||||
| 13512 | } | ||||
| 13513 | |||||
| 13514 | sub get_seqno { | ||||
| 13515 | |||||
| 13516 | # get opening and closing sequence numbers of a token for the vertical | ||||
| 13517 | # aligner. Assign qw quotes a value to allow qw opening and closing tokens | ||||
| 13518 | # to be treated somewhat like opening and closing tokens for stacking | ||||
| 13519 | # tokens by the vertical aligner. | ||||
| 13520 | my ($ii) = @_; | ||||
| 13521 | my $seqno = $type_sequence_to_go[$ii]; | ||||
| 13522 | if ( $types_to_go[$ii] eq 'q' ) { | ||||
| 13523 | my $SEQ_QW = -1; | ||||
| 13524 | if ( $ii > 0 ) { | ||||
| 13525 | $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); | ||||
| 13526 | } | ||||
| 13527 | else { | ||||
| 13528 | if ( !$ending_in_quote ) { | ||||
| 13529 | $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); | ||||
| 13530 | } | ||||
| 13531 | } | ||||
| 13532 | } | ||||
| 13533 | return ($seqno); | ||||
| 13534 | } | ||||
| 13535 | |||||
| 13536 | { | ||||
| 13537 | 2 | 0s | my %is_vertical_alignment_type; | ||
| 13538 | 1 | 0s | my %is_vertical_alignment_keyword; | ||
| 13539 | 1 | 300ns | my %is_terminal_alignment_type; | ||
| 13540 | |||||
| 13541 | # spent 30µs within Perl::Tidy::Formatter::BEGIN@13541 which was called:
# once (30µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 13557 | ||||
| 13542 | |||||
| 13543 | # Removed =~ from list to improve chances of alignment | ||||
| 13544 | 1 | 4µs | @_ = qw# | ||
| 13545 | = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= | ||||
| 13546 | { ? : => && || // ~~ !~~ | ||||
| 13547 | #; | ||||
| 13548 | 1 | 12µs | @is_vertical_alignment_type{@_} = (1) x scalar(@_); | ||
| 13549 | |||||
| 13550 | # only align these at end of line | ||||
| 13551 | 1 | 3µs | @_ = qw(&& ||); | ||
| 13552 | 1 | 900ns | @is_terminal_alignment_type{@_} = (1) x scalar(@_); | ||
| 13553 | |||||
| 13554 | # eq and ne were removed from this list to improve alignment chances | ||||
| 13555 | 1 | 2µs | @_ = qw(if unless and or err for foreach while until); | ||
| 13556 | 1 | 9µs | @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); | ||
| 13557 | 1 | 1.92ms | 1 | 30µs | } # spent 30µs making 1 call to Perl::Tidy::Formatter::BEGIN@13541 |
| 13558 | |||||
| 13559 | sub set_vertical_alignment_markers { | ||||
| 13560 | |||||
| 13561 | # This routine takes the first step toward vertical alignment of the | ||||
| 13562 | # lines of output text. It looks for certain tokens which can serve as | ||||
| 13563 | # vertical alignment markers (such as an '='). | ||||
| 13564 | # | ||||
| 13565 | # Method: We look at each token $i in this output batch and set | ||||
| 13566 | # $matching_token_to_go[$i] equal to those tokens at which we would | ||||
| 13567 | # accept vertical alignment. | ||||
| 13568 | |||||
| 13569 | # nothing to do if we aren't allowed to change whitespace | ||||
| 13570 | if ( !$rOpts_add_whitespace ) { | ||||
| 13571 | for my $i ( 0 .. $max_index_to_go ) { | ||||
| 13572 | $matching_token_to_go[$i] = ''; | ||||
| 13573 | } | ||||
| 13574 | return; | ||||
| 13575 | } | ||||
| 13576 | |||||
| 13577 | my ( $ri_first, $ri_last ) = @_; | ||||
| 13578 | |||||
| 13579 | # remember the index of last nonblank token before any sidecomment | ||||
| 13580 | my $i_terminal = $max_index_to_go; | ||||
| 13581 | if ( $types_to_go[$i_terminal] eq '#' ) { | ||||
| 13582 | if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { | ||||
| 13583 | if ( $i_terminal > 0 ) { --$i_terminal } | ||||
| 13584 | } | ||||
| 13585 | } | ||||
| 13586 | |||||
| 13587 | # look at each line of this batch.. | ||||
| 13588 | my $last_vertical_alignment_before_index; | ||||
| 13589 | my $vert_last_nonblank_type; | ||||
| 13590 | my $vert_last_nonblank_token; | ||||
| 13591 | my $vert_last_nonblank_block_type; | ||||
| 13592 | my $max_line = @$ri_first - 1; | ||||
| 13593 | my ( $i, $type, $token, $block_type, $alignment_type ); | ||||
| 13594 | my ( $ibeg, $iend, $line ); | ||||
| 13595 | |||||
| 13596 | foreach $line ( 0 .. $max_line ) { | ||||
| 13597 | $ibeg = $$ri_first[$line]; | ||||
| 13598 | $iend = $$ri_last[$line]; | ||||
| 13599 | $last_vertical_alignment_before_index = -1; | ||||
| 13600 | $vert_last_nonblank_type = ''; | ||||
| 13601 | $vert_last_nonblank_token = ''; | ||||
| 13602 | $vert_last_nonblank_block_type = ''; | ||||
| 13603 | |||||
| 13604 | # look at each token in this output line.. | ||||
| 13605 | foreach $i ( $ibeg .. $iend ) { | ||||
| 13606 | $alignment_type = ''; | ||||
| 13607 | $type = $types_to_go[$i]; | ||||
| 13608 | $block_type = $block_type_to_go[$i]; | ||||
| 13609 | $token = $tokens_to_go[$i]; | ||||
| 13610 | |||||
| 13611 | # check for flag indicating that we should not align | ||||
| 13612 | # this token | ||||
| 13613 | if ( $matching_token_to_go[$i] ) { | ||||
| 13614 | $matching_token_to_go[$i] = ''; | ||||
| 13615 | next; | ||||
| 13616 | } | ||||
| 13617 | |||||
| 13618 | #-------------------------------------------------------- | ||||
| 13619 | # First see if we want to align BEFORE this token | ||||
| 13620 | #-------------------------------------------------------- | ||||
| 13621 | |||||
| 13622 | # The first possible token that we can align before | ||||
| 13623 | # is index 2 because: 1) it doesn't normally make sense to | ||||
| 13624 | # align before the first token and 2) the second | ||||
| 13625 | # token must be a blank if we are to align before | ||||
| 13626 | # the third | ||||
| 13627 | if ( $i < $ibeg + 2 ) { } | ||||
| 13628 | |||||
| 13629 | # must follow a blank token | ||||
| 13630 | elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } | ||||
| 13631 | |||||
| 13632 | # align a side comment -- | ||||
| 13633 | elsif ( $type eq '#' ) { | ||||
| 13634 | |||||
| 13635 | unless ( | ||||
| 13636 | |||||
| 13637 | # it is a static side comment | ||||
| 13638 | ( | ||||
| 13639 | $rOpts->{'static-side-comments'} | ||||
| 13640 | && $token =~ /$static_side_comment_pattern/o | ||||
| 13641 | ) | ||||
| 13642 | |||||
| 13643 | # or a closing side comment | ||||
| 13644 | || ( $vert_last_nonblank_block_type | ||||
| 13645 | && $token =~ | ||||
| 13646 | /$closing_side_comment_prefix_pattern/o ) | ||||
| 13647 | ) | ||||
| 13648 | { | ||||
| 13649 | $alignment_type = $type; | ||||
| 13650 | } ## Example of a static side comment | ||||
| 13651 | } | ||||
| 13652 | |||||
| 13653 | # otherwise, do not align two in a row to create a | ||||
| 13654 | # blank field | ||||
| 13655 | elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } | ||||
| 13656 | |||||
| 13657 | # align before one of these keywords | ||||
| 13658 | # (within a line, since $i>1) | ||||
| 13659 | elsif ( $type eq 'k' ) { | ||||
| 13660 | |||||
| 13661 | # /^(if|unless|and|or|eq|ne)$/ | ||||
| 13662 | if ( $is_vertical_alignment_keyword{$token} ) { | ||||
| 13663 | $alignment_type = $token; | ||||
| 13664 | } | ||||
| 13665 | } | ||||
| 13666 | |||||
| 13667 | # align before one of these types.. | ||||
| 13668 | # Note: add '.' after new vertical aligner is operational | ||||
| 13669 | elsif ( $is_vertical_alignment_type{$type} ) { | ||||
| 13670 | $alignment_type = $token; | ||||
| 13671 | |||||
| 13672 | # Do not align a terminal token. Although it might | ||||
| 13673 | # occasionally look ok to do this, this has been found to be | ||||
| 13674 | # a good general rule. The main problems are: | ||||
| 13675 | # (1) that the terminal token (such as an = or :) might get | ||||
| 13676 | # moved far to the right where it is hard to see because | ||||
| 13677 | # nothing follows it, and | ||||
| 13678 | # (2) doing so may prevent other good alignments. | ||||
| 13679 | # Current exceptions are && and || | ||||
| 13680 | if ( $i == $iend || $i >= $i_terminal ) { | ||||
| 13681 | $alignment_type = "" | ||||
| 13682 | unless ( $is_terminal_alignment_type{$type} ); | ||||
| 13683 | } | ||||
| 13684 | |||||
| 13685 | # Do not align leading ': (' or '. ('. This would prevent | ||||
| 13686 | # alignment in something like the following: | ||||
| 13687 | # $extra_space .= | ||||
| 13688 | # ( $input_line_number < 10 ) ? " " | ||||
| 13689 | # : ( $input_line_number < 100 ) ? " " | ||||
| 13690 | # : ""; | ||||
| 13691 | # or | ||||
| 13692 | # $code = | ||||
| 13693 | # ( $case_matters ? $accessor : " lc($accessor) " ) | ||||
| 13694 | # . ( $yesno ? " eq " : " ne " ) | ||||
| 13695 | if ( $i == $ibeg + 2 | ||||
| 13696 | && $types_to_go[$ibeg] =~ /^[\.\:]$/ | ||||
| 13697 | && $types_to_go[ $i - 1 ] eq 'b' ) | ||||
| 13698 | { | ||||
| 13699 | $alignment_type = ""; | ||||
| 13700 | } | ||||
| 13701 | |||||
| 13702 | # For a paren after keyword, only align something like this: | ||||
| 13703 | # if ( $a ) { &a } | ||||
| 13704 | # elsif ( $b ) { &b } | ||||
| 13705 | if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { | ||||
| 13706 | $alignment_type = "" | ||||
| 13707 | unless $vert_last_nonblank_token =~ | ||||
| 13708 | /^(if|unless|elsif)$/; | ||||
| 13709 | } | ||||
| 13710 | |||||
| 13711 | # be sure the alignment tokens are unique | ||||
| 13712 | # This didn't work well: reason not determined | ||||
| 13713 | # if ($token ne $type) {$alignment_type .= $type} | ||||
| 13714 | } | ||||
| 13715 | |||||
| 13716 | # NOTE: This is deactivated because it causes the previous | ||||
| 13717 | # if/elsif alignment to fail | ||||
| 13718 | #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) | ||||
| 13719 | #{ $alignment_type = $type; } | ||||
| 13720 | |||||
| 13721 | if ($alignment_type) { | ||||
| 13722 | $last_vertical_alignment_before_index = $i; | ||||
| 13723 | } | ||||
| 13724 | |||||
| 13725 | #-------------------------------------------------------- | ||||
| 13726 | # Next see if we want to align AFTER the previous nonblank | ||||
| 13727 | #-------------------------------------------------------- | ||||
| 13728 | |||||
| 13729 | # We want to line up ',' and interior ';' tokens, with the added | ||||
| 13730 | # space AFTER these tokens. (Note: interior ';' is included | ||||
| 13731 | # because it may occur in short blocks). | ||||
| 13732 | if ( | ||||
| 13733 | |||||
| 13734 | # we haven't already set it | ||||
| 13735 | !$alignment_type | ||||
| 13736 | |||||
| 13737 | # and its not the first token of the line | ||||
| 13738 | && ( $i > $ibeg ) | ||||
| 13739 | |||||
| 13740 | # and it follows a blank | ||||
| 13741 | && $types_to_go[ $i - 1 ] eq 'b' | ||||
| 13742 | |||||
| 13743 | # and previous token IS one of these: | ||||
| 13744 | && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) | ||||
| 13745 | |||||
| 13746 | # and it's NOT one of these | ||||
| 13747 | && ( $type !~ /^[b\#\)\]\}]$/ ) | ||||
| 13748 | |||||
| 13749 | # then go ahead and align | ||||
| 13750 | ) | ||||
| 13751 | |||||
| 13752 | { | ||||
| 13753 | $alignment_type = $vert_last_nonblank_type; | ||||
| 13754 | } | ||||
| 13755 | |||||
| 13756 | #-------------------------------------------------------- | ||||
| 13757 | # then store the value | ||||
| 13758 | #-------------------------------------------------------- | ||||
| 13759 | $matching_token_to_go[$i] = $alignment_type; | ||||
| 13760 | if ( $type ne 'b' ) { | ||||
| 13761 | $vert_last_nonblank_type = $type; | ||||
| 13762 | $vert_last_nonblank_token = $token; | ||||
| 13763 | $vert_last_nonblank_block_type = $block_type; | ||||
| 13764 | } | ||||
| 13765 | } | ||||
| 13766 | } | ||||
| 13767 | } | ||||
| 13768 | } | ||||
| 13769 | |||||
| 13770 | sub terminal_type { | ||||
| 13771 | |||||
| 13772 | # returns type of last token on this line (terminal token), as follows: | ||||
| 13773 | # returns # for a full-line comment | ||||
| 13774 | # returns ' ' for a blank line | ||||
| 13775 | # otherwise returns final token type | ||||
| 13776 | |||||
| 13777 | my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; | ||||
| 13778 | |||||
| 13779 | # check for full-line comment.. | ||||
| 13780 | if ( $$rtype[$ibeg] eq '#' ) { | ||||
| 13781 | return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; | ||||
| 13782 | } | ||||
| 13783 | else { | ||||
| 13784 | |||||
| 13785 | # start at end and walk backwards.. | ||||
| 13786 | for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { | ||||
| 13787 | |||||
| 13788 | # skip past any side comment and blanks | ||||
| 13789 | next if ( $$rtype[$i] eq 'b' ); | ||||
| 13790 | next if ( $$rtype[$i] eq '#' ); | ||||
| 13791 | |||||
| 13792 | # found it..make sure it is a BLOCK termination, | ||||
| 13793 | # but hide a terminal } after sort/grep/map because it is not | ||||
| 13794 | # necessarily the end of the line. (terminal.t) | ||||
| 13795 | my $terminal_type = $$rtype[$i]; | ||||
| 13796 | if ( | ||||
| 13797 | $terminal_type eq '}' | ||||
| 13798 | && ( !$$rblock_type[$i] | ||||
| 13799 | || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) | ||||
| 13800 | ) | ||||
| 13801 | { | ||||
| 13802 | $terminal_type = 'b'; | ||||
| 13803 | } | ||||
| 13804 | return wantarray ? ( $terminal_type, $i ) : $terminal_type; | ||||
| 13805 | } | ||||
| 13806 | |||||
| 13807 | # empty line | ||||
| 13808 | return wantarray ? ( ' ', $ibeg ) : ' '; | ||||
| 13809 | } | ||||
| 13810 | } | ||||
| 13811 | |||||
| 13812 | { # set_bond_strengths | ||||
| 13813 | |||||
| 13814 | 2 | 100ns | my %is_good_keyword_breakpoint; | ||
| 13815 | 1 | 0s | my %is_lt_gt_le_ge; | ||
| 13816 | |||||
| 13817 | 1 | 0s | my %binary_bond_strength; | ||
| 13818 | 1 | 0s | my %nobreak_lhs; | ||
| 13819 | 1 | 0s | my %nobreak_rhs; | ||
| 13820 | |||||
| 13821 | 1 | 0s | my @bias_tokens; | ||
| 13822 | 1 | 400ns | my $delta_bias; | ||
| 13823 | |||||
| 13824 | sub bias_table_key { | ||||
| 13825 | my ( $type, $token ) = @_; | ||||
| 13826 | my $bias_table_key = $type; | ||||
| 13827 | if ( $type eq 'k' ) { | ||||
| 13828 | $bias_table_key = $token; | ||||
| 13829 | if ( $token eq 'err' ) { $bias_table_key = 'or' } | ||||
| 13830 | } | ||||
| 13831 | return $bias_table_key; | ||||
| 13832 | } | ||||
| 13833 | |||||
| 13834 | sub set_bond_strengths { | ||||
| 13835 | |||||
| 13836 | # spent 86µs within Perl::Tidy::Formatter::BEGIN@13836 which was called:
# once (86µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 14245 | ||||
| 13837 | |||||
| 13838 | 1 | 2µs | @_ = qw(if unless while until for foreach); | ||
| 13839 | 1 | 2µs | @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); | ||
| 13840 | |||||
| 13841 | 1 | 2µs | @_ = qw(lt gt le ge); | ||
| 13842 | 1 | 2µs | @is_lt_gt_le_ge{@_} = (1) x scalar(@_); | ||
| 13843 | # | ||||
| 13844 | # The decision about where to break a line depends upon a "bond | ||||
| 13845 | # strength" between tokens. The LOWER the bond strength, the MORE | ||||
| 13846 | # likely a break. A bond strength may be any value but to simplify | ||||
| 13847 | # things there are several pre-defined strength levels: | ||||
| 13848 | |||||
| 13849 | # NO_BREAK => 10000; | ||||
| 13850 | # VERY_STRONG => 100; | ||||
| 13851 | # STRONG => 2.1; | ||||
| 13852 | # NOMINAL => 1.1; | ||||
| 13853 | # WEAK => 0.8; | ||||
| 13854 | # VERY_WEAK => 0.55; | ||||
| 13855 | |||||
| 13856 | # The strength values are based on trial-and-error, and need to be | ||||
| 13857 | # tweaked occasionally to get desired results. Some comments: | ||||
| 13858 | # | ||||
| 13859 | # 1. Only relative strengths are important. small differences | ||||
| 13860 | # in strengths can make big formatting differences. | ||||
| 13861 | # 2. Each indentation level adds one unit of bond strength. | ||||
| 13862 | # 3. A value of NO_BREAK makes an unbreakable bond | ||||
| 13863 | # 4. A value of VERY_WEAK is the strength of a ',' | ||||
| 13864 | # 5. Values below NOMINAL are considered ok break points. | ||||
| 13865 | # 6. Values above NOMINAL are considered poor break points. | ||||
| 13866 | # | ||||
| 13867 | # The bond strengths should roughly follow precedence order where | ||||
| 13868 | # possible. If you make changes, please check the results very | ||||
| 13869 | # carefully on a variety of scripts. Testing with the -extrude | ||||
| 13870 | # options is particularly helpful in exercising all of the rules. | ||||
| 13871 | |||||
| 13872 | # Wherever possible, bond strengths are defined in the following | ||||
| 13873 | # tables. There are two main stages to setting bond strengths and | ||||
| 13874 | # two types of tables: | ||||
| 13875 | # | ||||
| 13876 | # The first stage involves looking at each token individually and | ||||
| 13877 | # defining left and right bond strengths, according to if we want | ||||
| 13878 | # to break to the left or right side, and how good a break point it | ||||
| 13879 | # is. For example tokens like =, ||, && make good break points and | ||||
| 13880 | # will have low strengths, but one might want to break on either | ||||
| 13881 | # side to put them at the end of one line or beginning of the next. | ||||
| 13882 | # | ||||
| 13883 | # The second stage involves looking at certain pairs of tokens and | ||||
| 13884 | # defining a bond strength for that particular pair. This second | ||||
| 13885 | # stage has priority. | ||||
| 13886 | |||||
| 13887 | #--------------------------------------------------------------- | ||||
| 13888 | # Bond Strength BEGIN Section 1. | ||||
| 13889 | # Set left and right bond strengths of individual tokens. | ||||
| 13890 | #--------------------------------------------------------------- | ||||
| 13891 | |||||
| 13892 | # NOTE: NO_BREAK's set in this section first are HINTS which will | ||||
| 13893 | # probably not be honored. Essential NO_BREAKS's should be set in | ||||
| 13894 | # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end | ||||
| 13895 | # of this subroutine. | ||||
| 13896 | |||||
| 13897 | # Note that we are setting defaults in this section. The user | ||||
| 13898 | # cannot change bond strengths but can cause the left and right | ||||
| 13899 | # bond strengths of any token type to be swapped through the use of | ||||
| 13900 | # the -wba and -wbb flags. In this way the user can determine if a | ||||
| 13901 | # breakpoint token should appear at the end of one line or the | ||||
| 13902 | # beginning of the next line. | ||||
| 13903 | |||||
| 13904 | # The hash keys in this section are token types, plus the text of | ||||
| 13905 | # certain keywords like 'or', 'and'. | ||||
| 13906 | |||||
| 13907 | # no break around possible filehandle | ||||
| 13908 | 1 | 500ns | $left_bond_strength{'Z'} = NO_BREAK; | ||
| 13909 | 1 | 200ns | $right_bond_strength{'Z'} = NO_BREAK; | ||
| 13910 | |||||
| 13911 | # never put a bare word on a new line: | ||||
| 13912 | # example print (STDERR, "bla"); will fail with break after ( | ||||
| 13913 | 1 | 100ns | $left_bond_strength{'w'} = NO_BREAK; | ||
| 13914 | |||||
| 13915 | # blanks always have infinite strength to force breaks after | ||||
| 13916 | # real tokens | ||||
| 13917 | 1 | 200ns | $right_bond_strength{'b'} = NO_BREAK; | ||
| 13918 | |||||
| 13919 | # try not to break on exponentation | ||||
| 13920 | 1 | 1µs | @_ = qw" ** .. ... <=> "; | ||
| 13921 | 1 | 2µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13922 | 1 | 1µs | @right_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13923 | |||||
| 13924 | # The comma-arrow has very low precedence but not a good break point | ||||
| 13925 | 1 | 200ns | $left_bond_strength{'=>'} = NO_BREAK; | ||
| 13926 | 1 | 200ns | $right_bond_strength{'=>'} = NOMINAL; | ||
| 13927 | |||||
| 13928 | # ok to break after label | ||||
| 13929 | 1 | 700ns | $left_bond_strength{'J'} = NO_BREAK; | ||
| 13930 | 1 | 500ns | $right_bond_strength{'J'} = NOMINAL; | ||
| 13931 | 1 | 100ns | $left_bond_strength{'j'} = STRONG; | ||
| 13932 | 1 | 100ns | $right_bond_strength{'j'} = STRONG; | ||
| 13933 | 1 | 100ns | $left_bond_strength{'A'} = STRONG; | ||
| 13934 | 1 | 100ns | $right_bond_strength{'A'} = STRONG; | ||
| 13935 | |||||
| 13936 | 1 | 100ns | $left_bond_strength{'->'} = STRONG; | ||
| 13937 | 1 | 100ns | $right_bond_strength{'->'} = VERY_STRONG; | ||
| 13938 | |||||
| 13939 | 1 | 200ns | $left_bond_strength{'CORE::'} = NOMINAL; | ||
| 13940 | 1 | 100ns | $right_bond_strength{'CORE::'} = NO_BREAK; | ||
| 13941 | |||||
| 13942 | # breaking AFTER modulus operator is ok: | ||||
| 13943 | 1 | 900ns | @_ = qw" % "; | ||
| 13944 | 1 | 800ns | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13945 | 1 | 400ns | @right_bond_strength{@_} = | ||
| 13946 | ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); | ||||
| 13947 | |||||
| 13948 | # Break AFTER math operators * and / | ||||
| 13949 | 1 | 800ns | @_ = qw" * / x "; | ||
| 13950 | 1 | 2µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13951 | 1 | 4µs | @right_bond_strength{@_} = (NOMINAL) x scalar(@_); | ||
| 13952 | |||||
| 13953 | # Break AFTER weakest math operators + and - | ||||
| 13954 | # Make them weaker than * but a bit stronger than '.' | ||||
| 13955 | 1 | 1µs | @_ = qw" + - "; | ||
| 13956 | 1 | 700ns | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13957 | 1 | 500ns | @right_bond_strength{@_} = | ||
| 13958 | ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); | ||||
| 13959 | |||||
| 13960 | # breaking BEFORE these is just ok: | ||||
| 13961 | 1 | 700ns | @_ = qw" >> << "; | ||
| 13962 | 1 | 700ns | @right_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13963 | 1 | 500ns | @left_bond_strength{@_} = (NOMINAL) x scalar(@_); | ||
| 13964 | |||||
| 13965 | # breaking before the string concatenation operator seems best | ||||
| 13966 | # because it can be hard to see at the end of a line | ||||
| 13967 | 1 | 200ns | $right_bond_strength{'.'} = STRONG; | ||
| 13968 | 1 | 200ns | $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; | ||
| 13969 | |||||
| 13970 | 1 | 1µs | @_ = qw"} ] ) R"; | ||
| 13971 | 1 | 1µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13972 | 1 | 700ns | @right_bond_strength{@_} = (NOMINAL) x scalar(@_); | ||
| 13973 | |||||
| 13974 | # make these a little weaker than nominal so that they get | ||||
| 13975 | # favored for end-of-line characters | ||||
| 13976 | 1 | 2µs | @_ = qw"!= == =~ !~ ~~ !~~"; | ||
| 13977 | 1 | 2µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 13978 | 1 | 1µs | @right_bond_strength{@_} = | ||
| 13979 | ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); | ||||
| 13980 | |||||
| 13981 | # break AFTER these | ||||
| 13982 | 1 | 2µs | @_ = qw" < > | & >= <="; | ||
| 13983 | 1 | 3µs | @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); | ||
| 13984 | 1 | 2µs | @right_bond_strength{@_} = | ||
| 13985 | ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); | ||||
| 13986 | |||||
| 13987 | # breaking either before or after a quote is ok | ||||
| 13988 | # but bias for breaking before a quote | ||||
| 13989 | 1 | 200ns | $left_bond_strength{'Q'} = NOMINAL; | ||
| 13990 | 1 | 100ns | $right_bond_strength{'Q'} = NOMINAL + 0.02; | ||
| 13991 | 1 | 200ns | $left_bond_strength{'q'} = NOMINAL; | ||
| 13992 | 1 | 100ns | $right_bond_strength{'q'} = NOMINAL; | ||
| 13993 | |||||
| 13994 | # starting a line with a keyword is usually ok | ||||
| 13995 | 1 | 100ns | $left_bond_strength{'k'} = NOMINAL; | ||
| 13996 | |||||
| 13997 | # we usually want to bond a keyword strongly to what immediately | ||||
| 13998 | # follows, rather than leaving it stranded at the end of a line | ||||
| 13999 | 1 | 100ns | $right_bond_strength{'k'} = STRONG; | ||
| 14000 | |||||
| 14001 | 1 | 100ns | $left_bond_strength{'G'} = NOMINAL; | ||
| 14002 | 1 | 100ns | $right_bond_strength{'G'} = STRONG; | ||
| 14003 | |||||
| 14004 | # assignment operators | ||||
| 14005 | 1 | 4µs | @_ = qw( | ||
| 14006 | = **= += *= &= <<= &&= | ||||
| 14007 | -= /= |= >>= ||= //= | ||||
| 14008 | .= %= ^= | ||||
| 14009 | x= | ||||
| 14010 | ); | ||||
| 14011 | |||||
| 14012 | # Default is to break AFTER various assignment operators | ||||
| 14013 | 1 | 4µs | @left_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 14014 | 1 | 2µs | @right_bond_strength{@_} = | ||
| 14015 | ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); | ||||
| 14016 | |||||
| 14017 | # Default is to break BEFORE '&&' and '||' and '//' | ||||
| 14018 | # set strength of '||' to same as '=' so that chains like | ||||
| 14019 | # $a = $b || $c || $d will break before the first '||' | ||||
| 14020 | 1 | 100ns | $right_bond_strength{'||'} = NOMINAL; | ||
| 14021 | 1 | 300ns | $left_bond_strength{'||'} = $right_bond_strength{'='}; | ||
| 14022 | |||||
| 14023 | # same thing for '//' | ||||
| 14024 | 1 | 200ns | $right_bond_strength{'//'} = NOMINAL; | ||
| 14025 | 1 | 200ns | $left_bond_strength{'//'} = $right_bond_strength{'='}; | ||
| 14026 | |||||
| 14027 | # set strength of && a little higher than || | ||||
| 14028 | 1 | 200ns | $right_bond_strength{'&&'} = NOMINAL; | ||
| 14029 | 1 | 500ns | $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; | ||
| 14030 | |||||
| 14031 | 1 | 200ns | $left_bond_strength{';'} = VERY_STRONG; | ||
| 14032 | 1 | 200ns | $right_bond_strength{';'} = VERY_WEAK; | ||
| 14033 | 1 | 100ns | $left_bond_strength{'f'} = VERY_STRONG; | ||
| 14034 | |||||
| 14035 | # make right strength of for ';' a little less than '=' | ||||
| 14036 | # to make for contents break after the ';' to avoid this: | ||||
| 14037 | # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += | ||||
| 14038 | # $number_of_fields ) | ||||
| 14039 | # and make it weaker than ',' and 'and' too | ||||
| 14040 | 1 | 100ns | $right_bond_strength{'f'} = VERY_WEAK - 0.03; | ||
| 14041 | |||||
| 14042 | # The strengths of ?/: should be somewhere between | ||||
| 14043 | # an '=' and a quote (NOMINAL), | ||||
| 14044 | # make strength of ':' slightly less than '?' to help | ||||
| 14045 | # break long chains of ? : after the colons | ||||
| 14046 | 1 | 2µs | $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; | ||
| 14047 | 1 | 1µs | $right_bond_strength{':'} = NO_BREAK; | ||
| 14048 | 1 | 400ns | $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; | ||
| 14049 | 1 | 200ns | $right_bond_strength{'?'} = NO_BREAK; | ||
| 14050 | |||||
| 14051 | 1 | 100ns | $left_bond_strength{','} = VERY_STRONG; | ||
| 14052 | 1 | 100ns | $right_bond_strength{','} = VERY_WEAK; | ||
| 14053 | |||||
| 14054 | # remaining digraphs and trigraphs not defined above | ||||
| 14055 | 1 | 2µs | @_ = qw( :: <> ++ --); | ||
| 14056 | 1 | 1µs | @left_bond_strength{@_} = (WEAK) x scalar(@_); | ||
| 14057 | 1 | 1µs | @right_bond_strength{@_} = (STRONG) x scalar(@_); | ||
| 14058 | |||||
| 14059 | # Set bond strengths of certain keywords | ||||
| 14060 | # make 'or', 'err', 'and' slightly weaker than a ',' | ||||
| 14061 | 1 | 200ns | $left_bond_strength{'and'} = VERY_WEAK - 0.01; | ||
| 14062 | 1 | 100ns | $left_bond_strength{'or'} = VERY_WEAK - 0.02; | ||
| 14063 | 1 | 100ns | $left_bond_strength{'err'} = VERY_WEAK - 0.02; | ||
| 14064 | 1 | 200ns | $left_bond_strength{'xor'} = NOMINAL; | ||
| 14065 | 1 | 100ns | $right_bond_strength{'and'} = NOMINAL; | ||
| 14066 | 1 | 100ns | $right_bond_strength{'or'} = NOMINAL; | ||
| 14067 | 1 | 100ns | $right_bond_strength{'err'} = NOMINAL; | ||
| 14068 | 1 | 200ns | $right_bond_strength{'xor'} = STRONG; | ||
| 14069 | |||||
| 14070 | #--------------------------------------------------------------- | ||||
| 14071 | # Bond Strength BEGIN Section 2. | ||||
| 14072 | # Set binary rules for bond strengths between certain token types. | ||||
| 14073 | #--------------------------------------------------------------- | ||||
| 14074 | |||||
| 14075 | # We have a little problem making tables which apply to the | ||||
| 14076 | # container tokens. Here is a list of container tokens and | ||||
| 14077 | # their types: | ||||
| 14078 | # | ||||
| 14079 | # type tokens // meaning | ||||
| 14080 | # { {, [, ( // indent | ||||
| 14081 | # } }, ], ) // outdent | ||||
| 14082 | # [ [ // left non-structural [ (enclosing an array index) | ||||
| 14083 | # ] ] // right non-structural square bracket | ||||
| 14084 | # ( ( // left non-structural paren | ||||
| 14085 | # ) ) // right non-structural paren | ||||
| 14086 | # L { // left non-structural curly brace (enclosing a key) | ||||
| 14087 | # R } // right non-structural curly brace | ||||
| 14088 | # | ||||
| 14089 | # Some rules apply to token types and some to just the token | ||||
| 14090 | # itself. We solve the problem by combining type and token into a | ||||
| 14091 | # new hash key for the container types. | ||||
| 14092 | # | ||||
| 14093 | # If a rule applies to a token 'type' then we need to make rules | ||||
| 14094 | # for each of these 'type.token' combinations: | ||||
| 14095 | # Type Type.Token | ||||
| 14096 | # { {{, {[, {( | ||||
| 14097 | # [ [[ | ||||
| 14098 | # ( (( | ||||
| 14099 | # L L{ | ||||
| 14100 | # } }}, }], }) | ||||
| 14101 | # ] ]] | ||||
| 14102 | # ) )) | ||||
| 14103 | # R R} | ||||
| 14104 | # | ||||
| 14105 | # If a rule applies to a token then we need to make rules for | ||||
| 14106 | # these 'type.token' combinations: | ||||
| 14107 | # Token Type.Token | ||||
| 14108 | # { {{, L{ | ||||
| 14109 | # [ {[, [[ | ||||
| 14110 | # ( {(, (( | ||||
| 14111 | # } }}, R} | ||||
| 14112 | # ] }], ]] | ||||
| 14113 | # ) }), )) | ||||
| 14114 | |||||
| 14115 | # allow long lines before final { in an if statement, as in: | ||||
| 14116 | # if (.......... | ||||
| 14117 | # ..........) | ||||
| 14118 | # { | ||||
| 14119 | # | ||||
| 14120 | # Otherwise, the line before the { tends to be too short. | ||||
| 14121 | |||||
| 14122 | 1 | 700ns | $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; | ||
| 14123 | 1 | 400ns | $binary_bond_strength{'(('}{'{{'} = NOMINAL; | ||
| 14124 | |||||
| 14125 | # break on something like '} (', but keep this stronger than a ',' | ||||
| 14126 | # example is in 'howe.pl' | ||||
| 14127 | 1 | 300ns | $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; | ||
| 14128 | 1 | 300ns | $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; | ||
| 14129 | |||||
| 14130 | # keep matrix and hash indices together | ||||
| 14131 | # but make them a little below STRONG to allow breaking open | ||||
| 14132 | # something like {'some-word'}{'some-very-long-word'} at the }{ | ||||
| 14133 | # (bracebrk.t) | ||||
| 14134 | 1 | 300ns | $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
| 14135 | 1 | 200ns | $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
| 14136 | 1 | 200ns | $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
| 14137 | 1 | 100ns | $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; | ||
| 14138 | |||||
| 14139 | # increase strength to the point where a break in the following | ||||
| 14140 | # will be after the opening paren rather than at the arrow: | ||||
| 14141 | # $a->$b($c); | ||||
| 14142 | 1 | 300ns | $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; | ||
| 14143 | |||||
| 14144 | 1 | 100ns | $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
| 14145 | 1 | 100ns | $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
| 14146 | 1 | 300ns | $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
| 14147 | 1 | 700ns | $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
| 14148 | 1 | 100ns | $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
| 14149 | 1 | 100ns | $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; | ||
| 14150 | |||||
| 14151 | 1 | 100ns | $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
| 14152 | 1 | 100ns | $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
| 14153 | 1 | 200ns | $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
| 14154 | 1 | 100ns | $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; | ||
| 14155 | |||||
| 14156 | #--------------------------------------------------------------- | ||||
| 14157 | # Binary NO_BREAK rules | ||||
| 14158 | #--------------------------------------------------------------- | ||||
| 14159 | |||||
| 14160 | # use strict requires that bare word and => not be separated | ||||
| 14161 | 1 | 400ns | $binary_bond_strength{'C'}{'=>'} = NO_BREAK; | ||
| 14162 | 1 | 300ns | $binary_bond_strength{'U'}{'=>'} = NO_BREAK; | ||
| 14163 | |||||
| 14164 | # Never break between a bareword and a following paren because | ||||
| 14165 | # perl may give an error. For example, if a break is placed | ||||
| 14166 | # between 'to_filehandle' and its '(' the following line will | ||||
| 14167 | # give a syntax error [Carp.pm]: my( $no) =fileno( | ||||
| 14168 | # to_filehandle( $in)) ; | ||||
| 14169 | 1 | 200ns | $binary_bond_strength{'C'}{'(('} = NO_BREAK; | ||
| 14170 | 1 | 100ns | $binary_bond_strength{'C'}{'{('} = NO_BREAK; | ||
| 14171 | 1 | 200ns | $binary_bond_strength{'U'}{'(('} = NO_BREAK; | ||
| 14172 | 1 | 100ns | $binary_bond_strength{'U'}{'{('} = NO_BREAK; | ||
| 14173 | |||||
| 14174 | # use strict requires that bare word within braces not start new | ||||
| 14175 | # line | ||||
| 14176 | 1 | 300ns | $binary_bond_strength{'L{'}{'w'} = NO_BREAK; | ||
| 14177 | |||||
| 14178 | 1 | 200ns | $binary_bond_strength{'w'}{'R}'} = NO_BREAK; | ||
| 14179 | |||||
| 14180 | # use strict requires that bare word and => not be separated | ||||
| 14181 | 1 | 200ns | $binary_bond_strength{'w'}{'=>'} = NO_BREAK; | ||
| 14182 | |||||
| 14183 | # use strict does not allow separating type info from trailing { } | ||||
| 14184 | # testfile is readmail.pl | ||||
| 14185 | 1 | 300ns | $binary_bond_strength{'t'}{'L{'} = NO_BREAK; | ||
| 14186 | 1 | 100ns | $binary_bond_strength{'i'}{'L{'} = NO_BREAK; | ||
| 14187 | |||||
| 14188 | # As a defensive measure, do not break between a '(' and a | ||||
| 14189 | # filehandle. In some cases, this can cause an error. For | ||||
| 14190 | # example, the following program works: | ||||
| 14191 | # my $msg="hi!\n"; | ||||
| 14192 | |||||
| 14193 | # ( STDOUT | ||||
| 14194 | # $msg | ||||
| 14195 | # ); | ||||
| 14196 | # | ||||
| 14197 | # But this program fails: | ||||
| 14198 | # my $msg="hi!\n"; | ||||
| 14199 | |||||
| 14200 | # ( | ||||
| 14201 | # STDOUT | ||||
| 14202 | # $msg | ||||
| 14203 | # ); | ||||
| 14204 | # | ||||
| 14205 | # This is normally only a problem with the 'extrude' option | ||||
| 14206 | 1 | 200ns | $binary_bond_strength{'(('}{'Y'} = NO_BREAK; | ||
| 14207 | 1 | 200ns | $binary_bond_strength{'{('}{'Y'} = NO_BREAK; | ||
| 14208 | |||||
| 14209 | # never break between sub name and opening paren | ||||
| 14210 | 1 | 100ns | $binary_bond_strength{'w'}{'(('} = NO_BREAK; | ||
| 14211 | 1 | 100ns | $binary_bond_strength{'w'}{'{('} = NO_BREAK; | ||
| 14212 | |||||
| 14213 | # keep '}' together with ';' | ||||
| 14214 | 1 | 200ns | $binary_bond_strength{'}}'}{';'} = NO_BREAK; | ||
| 14215 | |||||
| 14216 | # Breaking before a ++ can cause perl to guess wrong. For | ||||
| 14217 | # example the following line will cause a syntax error | ||||
| 14218 | # with -extrude if we break between '$i' and '++' [fixstyle2] | ||||
| 14219 | # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); | ||||
| 14220 | 1 | 200ns | $nobreak_lhs{'++'} = NO_BREAK; | ||
| 14221 | |||||
| 14222 | # Do not break before a possible file handle | ||||
| 14223 | 1 | 100ns | $nobreak_lhs{'Z'} = NO_BREAK; | ||
| 14224 | |||||
| 14225 | # use strict hates bare words on any new line. For | ||||
| 14226 | # example, a break before the underscore here provokes the | ||||
| 14227 | # wrath of use strict: | ||||
| 14228 | # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { | ||||
| 14229 | 1 | 200ns | $nobreak_rhs{'F'} = NO_BREAK; | ||
| 14230 | 1 | 100ns | $nobreak_rhs{'CORE::'} = NO_BREAK; | ||
| 14231 | |||||
| 14232 | #--------------------------------------------------------------- | ||||
| 14233 | # Bond Strength BEGIN Section 3. | ||||
| 14234 | # Define tables and values for applying a small bias to the above | ||||
| 14235 | # values. | ||||
| 14236 | #--------------------------------------------------------------- | ||||
| 14237 | # Adding a small 'bias' to strengths is a simple way to make a line | ||||
| 14238 | # break at the first of a sequence of identical terms. For | ||||
| 14239 | # example, to force long string of conditional operators to break | ||||
| 14240 | # with each line ending in a ':', we can add a small number to the | ||||
| 14241 | # bond strength of each ':' (colon.t) | ||||
| 14242 | 1 | 1µs | @bias_tokens = qw( : && || f and or . ); # tokens which get bias | ||
| 14243 | 1 | 6µs | $delta_bias = 0.0001; # a very small strength level | ||
| 14244 | |||||
| 14245 | 1 | 1.56ms | 1 | 86µs | } ## end BEGIN # spent 86µs making 1 call to Perl::Tidy::Formatter::BEGIN@13836 |
| 14246 | |||||
| 14247 | # patch-its always ok to break at end of line | ||||
| 14248 | $nobreak_to_go[$max_index_to_go] = 0; | ||||
| 14249 | |||||
| 14250 | # we start a new set of bias values for each line | ||||
| 14251 | my %bias; | ||||
| 14252 | @bias{@bias_tokens} = (0) x scalar(@bias_tokens); | ||||
| 14253 | my $code_bias = -.01; # bias for closing block braces | ||||
| 14254 | |||||
| 14255 | my $type = 'b'; | ||||
| 14256 | my $token = ' '; | ||||
| 14257 | my $last_type; | ||||
| 14258 | my $last_nonblank_type = $type; | ||||
| 14259 | my $last_nonblank_token = $token; | ||||
| 14260 | my $list_str = $left_bond_strength{'?'}; | ||||
| 14261 | |||||
| 14262 | my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, | ||||
| 14263 | $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, | ||||
| 14264 | ); | ||||
| 14265 | |||||
| 14266 | # main loop to compute bond strengths between each pair of tokens | ||||
| 14267 | for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { | ||||
| 14268 | $last_type = $type; | ||||
| 14269 | if ( $type ne 'b' ) { | ||||
| 14270 | $last_nonblank_type = $type; | ||||
| 14271 | $last_nonblank_token = $token; | ||||
| 14272 | } | ||||
| 14273 | $type = $types_to_go[$i]; | ||||
| 14274 | |||||
| 14275 | # strength on both sides of a blank is the same | ||||
| 14276 | if ( $type eq 'b' && $last_type ne 'b' ) { | ||||
| 14277 | $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; | ||||
| 14278 | next; | ||||
| 14279 | } | ||||
| 14280 | |||||
| 14281 | $token = $tokens_to_go[$i]; | ||||
| 14282 | $block_type = $block_type_to_go[$i]; | ||||
| 14283 | $i_next = $i + 1; | ||||
| 14284 | $next_type = $types_to_go[$i_next]; | ||||
| 14285 | $next_token = $tokens_to_go[$i_next]; | ||||
| 14286 | $total_nesting_depth = $nesting_depth_to_go[$i_next]; | ||||
| 14287 | $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | ||||
| 14288 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
| 14289 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
| 14290 | |||||
| 14291 | # We are computing the strength of the bond between the current | ||||
| 14292 | # token and the NEXT token. | ||||
| 14293 | |||||
| 14294 | #--------------------------------------------------------------- | ||||
| 14295 | # Bond Strength Section 1: | ||||
| 14296 | # First Approximation. | ||||
| 14297 | # Use minimum of individual left and right tabulated bond | ||||
| 14298 | # strengths. | ||||
| 14299 | #--------------------------------------------------------------- | ||||
| 14300 | my $bsr = $right_bond_strength{$type}; | ||||
| 14301 | my $bsl = $left_bond_strength{$next_nonblank_type}; | ||||
| 14302 | |||||
| 14303 | # define right bond strengths of certain keywords | ||||
| 14304 | if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { | ||||
| 14305 | $bsr = $right_bond_strength{$token}; | ||||
| 14306 | } | ||||
| 14307 | elsif ( $token eq 'ne' or $token eq 'eq' ) { | ||||
| 14308 | $bsr = NOMINAL; | ||||
| 14309 | } | ||||
| 14310 | |||||
| 14311 | # set terminal bond strength to the nominal value | ||||
| 14312 | # this will cause good preceding breaks to be retained | ||||
| 14313 | if ( $i_next_nonblank > $max_index_to_go ) { | ||||
| 14314 | $bsl = NOMINAL; | ||||
| 14315 | } | ||||
| 14316 | |||||
| 14317 | # define right bond strengths of certain keywords | ||||
| 14318 | if ( $next_nonblank_type eq 'k' | ||||
| 14319 | && defined( $left_bond_strength{$next_nonblank_token} ) ) | ||||
| 14320 | { | ||||
| 14321 | $bsl = $left_bond_strength{$next_nonblank_token}; | ||||
| 14322 | } | ||||
| 14323 | elsif ($next_nonblank_token eq 'ne' | ||||
| 14324 | or $next_nonblank_token eq 'eq' ) | ||||
| 14325 | { | ||||
| 14326 | $bsl = NOMINAL; | ||||
| 14327 | } | ||||
| 14328 | elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { | ||||
| 14329 | $bsl = 0.9 * NOMINAL + 0.1 * STRONG; | ||||
| 14330 | } | ||||
| 14331 | |||||
| 14332 | # Use the minimum of the left and right strengths. Note: it might | ||||
| 14333 | # seem that we would want to keep a NO_BREAK if either token has | ||||
| 14334 | # this value. This didn't work, for example because in an arrow | ||||
| 14335 | # list, it prevents the comma from separating from the following | ||||
| 14336 | # bare word (which is probably quoted by its arrow). So necessary | ||||
| 14337 | # NO_BREAK's have to be handled as special cases in the final | ||||
| 14338 | # section. | ||||
| 14339 | if ( !defined($bsr) ) { $bsr = VERY_STRONG } | ||||
| 14340 | if ( !defined($bsl) ) { $bsl = VERY_STRONG } | ||||
| 14341 | my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; | ||||
| 14342 | my $bond_str_1 = $bond_str; | ||||
| 14343 | |||||
| 14344 | #--------------------------------------------------------------- | ||||
| 14345 | # Bond Strength Section 2: | ||||
| 14346 | # Apply hardwired rules.. | ||||
| 14347 | #--------------------------------------------------------------- | ||||
| 14348 | |||||
| 14349 | # Patch to put terminal or clauses on a new line: Weaken the bond | ||||
| 14350 | # at an || followed by die or similar keyword to make the terminal | ||||
| 14351 | # or clause fall on a new line, like this: | ||||
| 14352 | # | ||||
| 14353 | # my $class = shift | ||||
| 14354 | # || die "Cannot add broadcast: No class identifier found"; | ||||
| 14355 | # | ||||
| 14356 | # Otherwise the break will be at the previous '=' since the || and | ||||
| 14357 | # = have the same starting strength and the or is biased, like | ||||
| 14358 | # this: | ||||
| 14359 | # | ||||
| 14360 | # my $class = | ||||
| 14361 | # shift || die "Cannot add broadcast: No class identifier found"; | ||||
| 14362 | # | ||||
| 14363 | # In any case if the user places a break at either the = or the || | ||||
| 14364 | # it should remain there. | ||||
| 14365 | if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { | ||||
| 14366 | if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { | ||||
| 14367 | if ( $want_break_before{$token} && $i > 0 ) { | ||||
| 14368 | $bond_strength_to_go[ $i - 1 ] -= $delta_bias; | ||||
| 14369 | } | ||||
| 14370 | else { | ||||
| 14371 | $bond_str -= $delta_bias; | ||||
| 14372 | } | ||||
| 14373 | } | ||||
| 14374 | } | ||||
| 14375 | |||||
| 14376 | # good to break after end of code blocks | ||||
| 14377 | if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { | ||||
| 14378 | |||||
| 14379 | $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; | ||||
| 14380 | $code_bias += $delta_bias; | ||||
| 14381 | } | ||||
| 14382 | |||||
| 14383 | if ( $type eq 'k' ) { | ||||
| 14384 | |||||
| 14385 | # allow certain control keywords to stand out | ||||
| 14386 | if ( $next_nonblank_type eq 'k' | ||||
| 14387 | && $is_last_next_redo_return{$token} ) | ||||
| 14388 | { | ||||
| 14389 | $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; | ||||
| 14390 | } | ||||
| 14391 | |||||
| 14392 | # Don't break after keyword my. This is a quick fix for a | ||||
| 14393 | # rare problem with perl. An example is this line from file | ||||
| 14394 | # Container.pm: | ||||
| 14395 | |||||
| 14396 | # foreach my $question( Debian::DebConf::ConfigDb::gettree( | ||||
| 14397 | # $this->{'question'} ) ) | ||||
| 14398 | |||||
| 14399 | if ( $token eq 'my' ) { | ||||
| 14400 | $bond_str = NO_BREAK; | ||||
| 14401 | } | ||||
| 14402 | |||||
| 14403 | } | ||||
| 14404 | |||||
| 14405 | # good to break before 'if', 'unless', etc | ||||
| 14406 | if ( $is_if_brace_follower{$next_nonblank_token} ) { | ||||
| 14407 | $bond_str = VERY_WEAK; | ||||
| 14408 | } | ||||
| 14409 | |||||
| 14410 | if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { | ||||
| 14411 | |||||
| 14412 | # FIXME: needs more testing | ||||
| 14413 | if ( $is_keyword_returning_list{$next_nonblank_token} ) { | ||||
| 14414 | $bond_str = $list_str if ( $bond_str > $list_str ); | ||||
| 14415 | } | ||||
| 14416 | |||||
| 14417 | # keywords like 'unless', 'if', etc, within statements | ||||
| 14418 | # make good breaks | ||||
| 14419 | if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { | ||||
| 14420 | $bond_str = VERY_WEAK / 1.05; | ||||
| 14421 | } | ||||
| 14422 | } | ||||
| 14423 | |||||
| 14424 | # try not to break before a comma-arrow | ||||
| 14425 | elsif ( $next_nonblank_type eq '=>' ) { | ||||
| 14426 | if ( $bond_str < STRONG ) { $bond_str = STRONG } | ||||
| 14427 | } | ||||
| 14428 | |||||
| 14429 | #--------------------------------------------------------------- | ||||
| 14430 | # Additional hardwired NOBREAK rules | ||||
| 14431 | #--------------------------------------------------------------- | ||||
| 14432 | |||||
| 14433 | # map1.t -- correct for a quirk in perl | ||||
| 14434 | if ( $token eq '(' | ||||
| 14435 | && $next_nonblank_type eq 'i' | ||||
| 14436 | && $last_nonblank_type eq 'k' | ||||
| 14437 | && $is_sort_map_grep{$last_nonblank_token} ) | ||||
| 14438 | |||||
| 14439 | # /^(sort|map|grep)$/ ) | ||||
| 14440 | { | ||||
| 14441 | $bond_str = NO_BREAK; | ||||
| 14442 | } | ||||
| 14443 | |||||
| 14444 | # extrude.t: do not break before paren at: | ||||
| 14445 | # -l pid_filename( | ||||
| 14446 | if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { | ||||
| 14447 | $bond_str = NO_BREAK; | ||||
| 14448 | } | ||||
| 14449 | |||||
| 14450 | # in older version of perl, use strict can cause problems with | ||||
| 14451 | # breaks before bare words following opening parens. For example, | ||||
| 14452 | # this will fail under older versions if a break is made between | ||||
| 14453 | # '(' and 'MAIL': use strict; open( MAIL, "a long filename or | ||||
| 14454 | # command"); close MAIL; | ||||
| 14455 | if ( $type eq '{' ) { | ||||
| 14456 | |||||
| 14457 | if ( $token eq '(' && $next_nonblank_type eq 'w' ) { | ||||
| 14458 | |||||
| 14459 | # but it's fine to break if the word is followed by a '=>' | ||||
| 14460 | # or if it is obviously a sub call | ||||
| 14461 | my $i_next_next_nonblank = $i_next_nonblank + 1; | ||||
| 14462 | my $next_next_type = $types_to_go[$i_next_next_nonblank]; | ||||
| 14463 | if ( $next_next_type eq 'b' | ||||
| 14464 | && $i_next_nonblank < $max_index_to_go ) | ||||
| 14465 | { | ||||
| 14466 | $i_next_next_nonblank++; | ||||
| 14467 | $next_next_type = $types_to_go[$i_next_next_nonblank]; | ||||
| 14468 | } | ||||
| 14469 | |||||
| 14470 | # We'll check for an old breakpoint and keep a leading | ||||
| 14471 | # bareword if it was that way in the input file. | ||||
| 14472 | # Presumably it was ok that way. For example, the | ||||
| 14473 | # following would remain unchanged: | ||||
| 14474 | # | ||||
| 14475 | # @months = ( | ||||
| 14476 | # January, February, March, April, | ||||
| 14477 | # May, June, July, August, | ||||
| 14478 | # September, October, November, December, | ||||
| 14479 | # ); | ||||
| 14480 | # | ||||
| 14481 | # This should be sufficient: | ||||
| 14482 | if ( | ||||
| 14483 | !$old_breakpoint_to_go[$i] | ||||
| 14484 | && ( $next_next_type eq ',' | ||||
| 14485 | || $next_next_type eq '}' ) | ||||
| 14486 | ) | ||||
| 14487 | { | ||||
| 14488 | $bond_str = NO_BREAK; | ||||
| 14489 | } | ||||
| 14490 | } | ||||
| 14491 | } | ||||
| 14492 | |||||
| 14493 | # Do not break between a possible filehandle and a ? or / and do | ||||
| 14494 | # not introduce a break after it if there is no blank | ||||
| 14495 | # (extrude.t) | ||||
| 14496 | elsif ( $type eq 'Z' ) { | ||||
| 14497 | |||||
| 14498 | # don't break.. | ||||
| 14499 | if ( | ||||
| 14500 | |||||
| 14501 | # if there is no blank and we do not want one. Examples: | ||||
| 14502 | # print $x++ # do not break after $x | ||||
| 14503 | # print HTML"HELLO" # break ok after HTML | ||||
| 14504 | ( | ||||
| 14505 | $next_type ne 'b' | ||||
| 14506 | && defined( $want_left_space{$next_type} ) | ||||
| 14507 | && $want_left_space{$next_type} == WS_NO | ||||
| 14508 | ) | ||||
| 14509 | |||||
| 14510 | # or we might be followed by the start of a quote | ||||
| 14511 | || $next_nonblank_type =~ /^[\/\?]$/ | ||||
| 14512 | ) | ||||
| 14513 | { | ||||
| 14514 | $bond_str = NO_BREAK; | ||||
| 14515 | } | ||||
| 14516 | } | ||||
| 14517 | |||||
| 14518 | # Breaking before a ? before a quote can cause trouble if | ||||
| 14519 | # they are not separated by a blank. | ||||
| 14520 | # Example: a syntax error occurs if you break before the ? here | ||||
| 14521 | # my$logic=join$all?' && ':' || ',@regexps; | ||||
| 14522 | # From: Professional_Perl_Programming_Code/multifind.pl | ||||
| 14523 | if ( $next_nonblank_type eq '?' ) { | ||||
| 14524 | $bond_str = NO_BREAK | ||||
| 14525 | if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); | ||||
| 14526 | } | ||||
| 14527 | |||||
| 14528 | # Breaking before a . followed by a number | ||||
| 14529 | # can cause trouble if there is no intervening space | ||||
| 14530 | # Example: a syntax error occurs if you break before the .2 here | ||||
| 14531 | # $str .= pack($endian.2, ensurrogate($ord)); | ||||
| 14532 | # From: perl58/Unicode.pm | ||||
| 14533 | elsif ( $next_nonblank_type eq '.' ) { | ||||
| 14534 | $bond_str = NO_BREAK | ||||
| 14535 | if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); | ||||
| 14536 | } | ||||
| 14537 | |||||
| 14538 | # patch to put cuddled elses back together when on multiple | ||||
| 14539 | # lines, as in: } \n else \n { \n | ||||
| 14540 | if ($rOpts_cuddled_else) { | ||||
| 14541 | |||||
| 14542 | if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) | ||||
| 14543 | || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) | ||||
| 14544 | { | ||||
| 14545 | $bond_str = NO_BREAK; | ||||
| 14546 | } | ||||
| 14547 | } | ||||
| 14548 | my $bond_str_2 = $bond_str; | ||||
| 14549 | |||||
| 14550 | #--------------------------------------------------------------- | ||||
| 14551 | # End of hardwired rules | ||||
| 14552 | #--------------------------------------------------------------- | ||||
| 14553 | |||||
| 14554 | #--------------------------------------------------------------- | ||||
| 14555 | # Bond Strength Section 3: | ||||
| 14556 | # Apply table rules. These have priority over the above | ||||
| 14557 | # hardwired rules. | ||||
| 14558 | #--------------------------------------------------------------- | ||||
| 14559 | |||||
| 14560 | my $tabulated_bond_str; | ||||
| 14561 | my $ltype = $type; | ||||
| 14562 | my $rtype = $next_nonblank_type; | ||||
| 14563 | if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } | ||||
| 14564 | if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { | ||||
| 14565 | $rtype = $next_nonblank_type . $next_nonblank_token; | ||||
| 14566 | } | ||||
| 14567 | |||||
| 14568 | if ( $binary_bond_strength{$ltype}{$rtype} ) { | ||||
| 14569 | $bond_str = $binary_bond_strength{$ltype}{$rtype}; | ||||
| 14570 | $tabulated_bond_str = $bond_str; | ||||
| 14571 | } | ||||
| 14572 | |||||
| 14573 | if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { | ||||
| 14574 | $bond_str = NO_BREAK; | ||||
| 14575 | $tabulated_bond_str = $bond_str; | ||||
| 14576 | } | ||||
| 14577 | my $bond_str_3 = $bond_str; | ||||
| 14578 | |||||
| 14579 | # If the hardwired rules conflict with the tabulated bond | ||||
| 14580 | # strength then there is an inconsistency that should be fixed | ||||
| 14581 | FORMATTER_DEBUG_FLAG_BOND_TABLES | ||||
| 14582 | && $tabulated_bond_str | ||||
| 14583 | && $bond_str_1 | ||||
| 14584 | && $bond_str_1 != $bond_str_2 | ||||
| 14585 | && $bond_str_2 != $tabulated_bond_str | ||||
| 14586 | && do { | ||||
| 14587 | print STDERR | ||||
| 14588 | "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; | ||||
| 14589 | }; | ||||
| 14590 | |||||
| 14591 | #----------------------------------------------------------------- | ||||
| 14592 | # Bond Strength Section 4: | ||||
| 14593 | # Modify strengths of certain tokens which often occur in sequence | ||||
| 14594 | # by adding a small bias to each one in turn so that the breaks | ||||
| 14595 | # occur from left to right. | ||||
| 14596 | # | ||||
| 14597 | # Note that we only changing strengths by small amounts here, | ||||
| 14598 | # and usually increasing, so we should not be altering any NO_BREAKs. | ||||
| 14599 | # Other routines which check for NO_BREAKs will use a tolerance | ||||
| 14600 | # of one to avoid any problem. | ||||
| 14601 | #----------------------------------------------------------------- | ||||
| 14602 | |||||
| 14603 | # The bias tables use special keys | ||||
| 14604 | my $left_key = bias_table_key( $type, $token ); | ||||
| 14605 | my $right_key = | ||||
| 14606 | bias_table_key( $next_nonblank_type, $next_nonblank_token ); | ||||
| 14607 | |||||
| 14608 | # add any bias set by sub scan_list at old comma break points. | ||||
| 14609 | if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } | ||||
| 14610 | |||||
| 14611 | # bias left token | ||||
| 14612 | elsif ( defined( $bias{$left_key} ) ) { | ||||
| 14613 | if ( !$want_break_before{$left_key} ) { | ||||
| 14614 | $bias{$left_key} += $delta_bias; | ||||
| 14615 | $bond_str += $bias{$left_key}; | ||||
| 14616 | } | ||||
| 14617 | } | ||||
| 14618 | |||||
| 14619 | # bias right token | ||||
| 14620 | if ( defined( $bias{$right_key} ) ) { | ||||
| 14621 | if ( $want_break_before{$right_key} ) { | ||||
| 14622 | |||||
| 14623 | # for leading '.' align all but 'short' quotes; the idea | ||||
| 14624 | # is to not place something like "\n" on a single line. | ||||
| 14625 | if ( $right_key eq '.' ) { | ||||
| 14626 | unless ( | ||||
| 14627 | $last_nonblank_type eq '.' | ||||
| 14628 | && ( | ||||
| 14629 | length($token) <= | ||||
| 14630 | $rOpts_short_concatenation_item_length ) | ||||
| 14631 | && ( $token !~ /^[\)\]\}]$/ ) | ||||
| 14632 | ) | ||||
| 14633 | { | ||||
| 14634 | $bias{$right_key} += $delta_bias; | ||||
| 14635 | } | ||||
| 14636 | } | ||||
| 14637 | else { | ||||
| 14638 | $bias{$right_key} += $delta_bias; | ||||
| 14639 | } | ||||
| 14640 | $bond_str += $bias{$right_key}; | ||||
| 14641 | } | ||||
| 14642 | } | ||||
| 14643 | my $bond_str_4 = $bond_str; | ||||
| 14644 | |||||
| 14645 | #--------------------------------------------------------------- | ||||
| 14646 | # Bond Strength Section 5: | ||||
| 14647 | # Fifth Approximation. | ||||
| 14648 | # Take nesting depth into account by adding the nesting depth | ||||
| 14649 | # to the bond strength. | ||||
| 14650 | #--------------------------------------------------------------- | ||||
| 14651 | my $strength; | ||||
| 14652 | |||||
| 14653 | if ( defined($bond_str) && !$nobreak_to_go[$i] ) { | ||||
| 14654 | if ( $total_nesting_depth > 0 ) { | ||||
| 14655 | $strength = $bond_str + $total_nesting_depth; | ||||
| 14656 | } | ||||
| 14657 | else { | ||||
| 14658 | $strength = $bond_str; | ||||
| 14659 | } | ||||
| 14660 | } | ||||
| 14661 | else { | ||||
| 14662 | $strength = NO_BREAK; | ||||
| 14663 | } | ||||
| 14664 | |||||
| 14665 | # always break after side comment | ||||
| 14666 | if ( $type eq '#' ) { $strength = 0 } | ||||
| 14667 | |||||
| 14668 | $bond_strength_to_go[$i] = $strength; | ||||
| 14669 | |||||
| 14670 | FORMATTER_DEBUG_FLAG_BOND && do { | ||||
| 14671 | my $str = substr( $token, 0, 15 ); | ||||
| 14672 | $str .= ' ' x ( 16 - length($str) ); | ||||
| 14673 | print STDOUT | ||||
| 14674 | "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; | ||||
| 14675 | }; | ||||
| 14676 | } ## end main loop | ||||
| 14677 | } ## end sub set_bond_strengths | ||||
| 14678 | } | ||||
| 14679 | |||||
| 14680 | sub pad_array_to_go { | ||||
| 14681 | |||||
| 14682 | # to simplify coding in scan_list and set_bond_strengths, it helps | ||||
| 14683 | # to create some extra blank tokens at the end of the arrays | ||||
| 14684 | $tokens_to_go[ $max_index_to_go + 1 ] = ''; | ||||
| 14685 | $tokens_to_go[ $max_index_to_go + 2 ] = ''; | ||||
| 14686 | $types_to_go[ $max_index_to_go + 1 ] = 'b'; | ||||
| 14687 | $types_to_go[ $max_index_to_go + 2 ] = 'b'; | ||||
| 14688 | $nesting_depth_to_go[ $max_index_to_go + 1 ] = | ||||
| 14689 | $nesting_depth_to_go[$max_index_to_go]; | ||||
| 14690 | |||||
| 14691 | # /^[R\}\)\]]$/ | ||||
| 14692 | if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { | ||||
| 14693 | if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { | ||||
| 14694 | |||||
| 14695 | # shouldn't happen: | ||||
| 14696 | unless ( get_saw_brace_error() ) { | ||||
| 14697 | warning( | ||||
| 14698 | "Program bug in scan_list: hit nesting error which should have been caught\n" | ||||
| 14699 | ); | ||||
| 14700 | report_definite_bug(); | ||||
| 14701 | } | ||||
| 14702 | } | ||||
| 14703 | else { | ||||
| 14704 | $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; | ||||
| 14705 | } | ||||
| 14706 | } | ||||
| 14707 | |||||
| 14708 | # /^[L\{\(\[]$/ | ||||
| 14709 | elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { | ||||
| 14710 | $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; | ||||
| 14711 | } | ||||
| 14712 | } | ||||
| 14713 | |||||
| 14714 | { # begin scan_list | ||||
| 14715 | |||||
| 14716 | 1 | 100ns | my ( | ||
| 14717 | 1 | 200ns | $block_type, $current_depth, | ||
| 14718 | $depth, $i, | ||||
| 14719 | $i_last_nonblank_token, $last_colon_sequence_number, | ||||
| 14720 | $last_nonblank_token, $last_nonblank_type, | ||||
| 14721 | $last_nonblank_block_type, $last_old_breakpoint_count, | ||||
| 14722 | $minimum_depth, $next_nonblank_block_type, | ||||
| 14723 | $next_nonblank_token, $next_nonblank_type, | ||||
| 14724 | $old_breakpoint_count, $starting_breakpoint_count, | ||||
| 14725 | $starting_depth, $token, | ||||
| 14726 | $type, $type_sequence, | ||||
| 14727 | ); | ||||
| 14728 | |||||
| 14729 | my ( | ||||
| 14730 | @breakpoint_stack, @breakpoint_undo_stack, | ||||
| 14731 | @comma_index, @container_type, | ||||
| 14732 | @identifier_count_stack, @index_before_arrow, | ||||
| 14733 | @interrupted_list, @item_count_stack, | ||||
| 14734 | @last_comma_index, @last_dot_index, | ||||
| 14735 | @last_nonblank_type, @old_breakpoint_count_stack, | ||||
| 14736 | @opening_structure_index_stack, @rfor_semicolon_list, | ||||
| 14737 | @has_old_logical_breakpoints, @rand_or_list, | ||||
| 14738 | @i_equals, | ||||
| 14739 | ); | ||||
| 14740 | |||||
| 14741 | # routine to define essential variables when we go 'up' to | ||||
| 14742 | # a new depth | ||||
| 14743 | sub check_for_new_minimum_depth { | ||||
| 14744 | my $depth = shift; | ||||
| 14745 | if ( $depth < $minimum_depth ) { | ||||
| 14746 | |||||
| 14747 | $minimum_depth = $depth; | ||||
| 14748 | |||||
| 14749 | # these arrays need not retain values between calls | ||||
| 14750 | $breakpoint_stack[$depth] = $starting_breakpoint_count; | ||||
| 14751 | $container_type[$depth] = ""; | ||||
| 14752 | $identifier_count_stack[$depth] = 0; | ||||
| 14753 | $index_before_arrow[$depth] = -1; | ||||
| 14754 | $interrupted_list[$depth] = 1; | ||||
| 14755 | $item_count_stack[$depth] = 0; | ||||
| 14756 | $last_nonblank_type[$depth] = ""; | ||||
| 14757 | $opening_structure_index_stack[$depth] = -1; | ||||
| 14758 | |||||
| 14759 | $breakpoint_undo_stack[$depth] = undef; | ||||
| 14760 | $comma_index[$depth] = undef; | ||||
| 14761 | $last_comma_index[$depth] = undef; | ||||
| 14762 | $last_dot_index[$depth] = undef; | ||||
| 14763 | $old_breakpoint_count_stack[$depth] = undef; | ||||
| 14764 | $has_old_logical_breakpoints[$depth] = 0; | ||||
| 14765 | $rand_or_list[$depth] = []; | ||||
| 14766 | $rfor_semicolon_list[$depth] = []; | ||||
| 14767 | $i_equals[$depth] = -1; | ||||
| 14768 | |||||
| 14769 | # these arrays must retain values between calls | ||||
| 14770 | if ( !defined( $has_broken_sublist[$depth] ) ) { | ||||
| 14771 | $dont_align[$depth] = 0; | ||||
| 14772 | $has_broken_sublist[$depth] = 0; | ||||
| 14773 | $want_comma_break[$depth] = 0; | ||||
| 14774 | } | ||||
| 14775 | } | ||||
| 14776 | } | ||||
| 14777 | |||||
| 14778 | # routine to decide which commas to break at within a container; | ||||
| 14779 | # returns: | ||||
| 14780 | # $bp_count = number of comma breakpoints set | ||||
| 14781 | # $do_not_break_apart = a flag indicating if container need not | ||||
| 14782 | # be broken open | ||||
| 14783 | sub set_comma_breakpoints { | ||||
| 14784 | |||||
| 14785 | my $dd = shift; | ||||
| 14786 | my $bp_count = 0; | ||||
| 14787 | my $do_not_break_apart = 0; | ||||
| 14788 | |||||
| 14789 | # anything to do? | ||||
| 14790 | if ( $item_count_stack[$dd] ) { | ||||
| 14791 | |||||
| 14792 | # handle commas not in containers... | ||||
| 14793 | if ( $dont_align[$dd] ) { | ||||
| 14794 | do_uncontained_comma_breaks($dd); | ||||
| 14795 | } | ||||
| 14796 | |||||
| 14797 | # handle commas within containers... | ||||
| 14798 | else { | ||||
| 14799 | my $fbc = $forced_breakpoint_count; | ||||
| 14800 | |||||
| 14801 | # always open comma lists not preceded by keywords, | ||||
| 14802 | # barewords, identifiers (that is, anything that doesn't | ||||
| 14803 | # look like a function call) | ||||
| 14804 | my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; | ||||
| 14805 | |||||
| 14806 | set_comma_breakpoints_do( | ||||
| 14807 | $dd, | ||||
| 14808 | $opening_structure_index_stack[$dd], | ||||
| 14809 | $i, | ||||
| 14810 | $item_count_stack[$dd], | ||||
| 14811 | $identifier_count_stack[$dd], | ||||
| 14812 | $comma_index[$dd], | ||||
| 14813 | $next_nonblank_type, | ||||
| 14814 | $container_type[$dd], | ||||
| 14815 | $interrupted_list[$dd], | ||||
| 14816 | \$do_not_break_apart, | ||||
| 14817 | $must_break_open, | ||||
| 14818 | ); | ||||
| 14819 | $bp_count = $forced_breakpoint_count - $fbc; | ||||
| 14820 | $do_not_break_apart = 0 if $must_break_open; | ||||
| 14821 | } | ||||
| 14822 | } | ||||
| 14823 | return ( $bp_count, $do_not_break_apart ); | ||||
| 14824 | } | ||||
| 14825 | |||||
| 14826 | sub do_uncontained_comma_breaks { | ||||
| 14827 | |||||
| 14828 | # Handle commas not in containers... | ||||
| 14829 | # This is a catch-all routine for commas that we | ||||
| 14830 | # don't know what to do with because the don't fall | ||||
| 14831 | # within containers. We will bias the bond strength | ||||
| 14832 | # to break at commas which ended lines in the input | ||||
| 14833 | # file. This usually works better than just trying | ||||
| 14834 | # to put as many items on a line as possible. A | ||||
| 14835 | # downside is that if the input file is garbage it | ||||
| 14836 | # won't work very well. However, the user can always | ||||
| 14837 | # prevent following the old breakpoints with the | ||||
| 14838 | # -iob flag. | ||||
| 14839 | my $dd = shift; | ||||
| 14840 | my $bias = -.01; | ||||
| 14841 | my $old_comma_break_count = 0; | ||||
| 14842 | foreach my $ii ( @{ $comma_index[$dd] } ) { | ||||
| 14843 | if ( $old_breakpoint_to_go[$ii] ) { | ||||
| 14844 | $old_comma_break_count++; | ||||
| 14845 | $bond_strength_to_go[$ii] = $bias; | ||||
| 14846 | |||||
| 14847 | # reduce bias magnitude to force breaks in order | ||||
| 14848 | $bias *= 0.99; | ||||
| 14849 | } | ||||
| 14850 | } | ||||
| 14851 | |||||
| 14852 | # Also put a break before the first comma if | ||||
| 14853 | # (1) there was a break there in the input, and | ||||
| 14854 | # (2) there was exactly one old break before the first comma break | ||||
| 14855 | # (3) OLD: there are multiple old comma breaks | ||||
| 14856 | # (3) NEW: there are one or more old comma breaks (see return example) | ||||
| 14857 | # | ||||
| 14858 | # For example, we will follow the user and break after | ||||
| 14859 | # 'print' in this snippet: | ||||
| 14860 | |||||
| 14861 | # "conformability (Not the same dimension)\n", | ||||
| 14862 | # "\t", $have, " is ", text_unit($hu), "\n", | ||||
| 14863 | # "\t", $want, " is ", text_unit($wu), "\n", | ||||
| 14864 | # ; | ||||
| 14865 | # | ||||
| 14866 | # Another example, just one comma, where we will break after | ||||
| 14867 | # the return: | ||||
| 14868 | # return | ||||
| 14869 | # $x * cos($a) - $y * sin($a), | ||||
| 14870 | # $x * sin($a) + $y * cos($a); | ||||
| 14871 | |||||
| 14872 | # Breaking a print statement: | ||||
| 14873 | # print SAVEOUT | ||||
| 14874 | # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", | ||||
| 14875 | # ( $? & 128 ) ? " -- core dumped" : "", "\n"; | ||||
| 14876 | # | ||||
| 14877 | # But we will not force a break after the opening paren here | ||||
| 14878 | # (causes a blinker): | ||||
| 14879 | # $heap->{stream}->set_output_filter( | ||||
| 14880 | # poe::filter::reference->new('myotherfreezer') ), | ||||
| 14881 | # ; | ||||
| 14882 | # | ||||
| 14883 | my $i_first_comma = $comma_index[$dd]->[0]; | ||||
| 14884 | if ( $old_breakpoint_to_go[$i_first_comma] ) { | ||||
| 14885 | my $level_comma = $levels_to_go[$i_first_comma]; | ||||
| 14886 | my $ibreak = -1; | ||||
| 14887 | my $obp_count = 0; | ||||
| 14888 | for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { | ||||
| 14889 | if ( $old_breakpoint_to_go[$ii] ) { | ||||
| 14890 | $obp_count++; | ||||
| 14891 | last if ( $obp_count > 1 ); | ||||
| 14892 | $ibreak = $ii | ||||
| 14893 | if ( $levels_to_go[$ii] == $level_comma ); | ||||
| 14894 | } | ||||
| 14895 | } | ||||
| 14896 | |||||
| 14897 | # Changed rule from multiple old commas to just one here: | ||||
| 14898 | if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) | ||||
| 14899 | { | ||||
| 14900 | # Do not to break before an opening token because | ||||
| 14901 | # it can lead to "blinkers". | ||||
| 14902 | my $ibreakm = $ibreak; | ||||
| 14903 | $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); | ||||
| 14904 | if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) | ||||
| 14905 | { | ||||
| 14906 | set_forced_breakpoint($ibreak); | ||||
| 14907 | } | ||||
| 14908 | } | ||||
| 14909 | } | ||||
| 14910 | } | ||||
| 14911 | |||||
| 14912 | 1 | 900ns | my %is_logical_container; | ||
| 14913 | |||||
| 14914 | # spent 14µs within Perl::Tidy::Formatter::BEGIN@14914 which was called:
# once (14µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 14917 | ||||
| 14915 | 1 | 5µs | @_ = qw# if elsif unless while and or err not && | || ? : ! #; | ||
| 14916 | 1 | 12µs | @is_logical_container{@_} = (1) x scalar(@_); | ||
| 14917 | 1 | 2.48ms | 1 | 14µs | } # spent 14µs making 1 call to Perl::Tidy::Formatter::BEGIN@14914 |
| 14918 | |||||
| 14919 | sub set_for_semicolon_breakpoints { | ||||
| 14920 | my $dd = shift; | ||||
| 14921 | foreach ( @{ $rfor_semicolon_list[$dd] } ) { | ||||
| 14922 | set_forced_breakpoint($_); | ||||
| 14923 | } | ||||
| 14924 | } | ||||
| 14925 | |||||
| 14926 | sub set_logical_breakpoints { | ||||
| 14927 | my $dd = shift; | ||||
| 14928 | if ( | ||||
| 14929 | $item_count_stack[$dd] == 0 | ||||
| 14930 | && $is_logical_container{ $container_type[$dd] } | ||||
| 14931 | |||||
| 14932 | || $has_old_logical_breakpoints[$dd] | ||||
| 14933 | ) | ||||
| 14934 | { | ||||
| 14935 | |||||
| 14936 | # Look for breaks in this order: | ||||
| 14937 | # 0 1 2 3 | ||||
| 14938 | # or and || && | ||||
| 14939 | foreach my $i ( 0 .. 3 ) { | ||||
| 14940 | if ( $rand_or_list[$dd][$i] ) { | ||||
| 14941 | foreach ( @{ $rand_or_list[$dd][$i] } ) { | ||||
| 14942 | set_forced_breakpoint($_); | ||||
| 14943 | } | ||||
| 14944 | |||||
| 14945 | # break at any 'if' and 'unless' too | ||||
| 14946 | foreach ( @{ $rand_or_list[$dd][4] } ) { | ||||
| 14947 | set_forced_breakpoint($_); | ||||
| 14948 | } | ||||
| 14949 | $rand_or_list[$dd] = []; | ||||
| 14950 | last; | ||||
| 14951 | } | ||||
| 14952 | } | ||||
| 14953 | } | ||||
| 14954 | } | ||||
| 14955 | |||||
| 14956 | sub is_unbreakable_container { | ||||
| 14957 | |||||
| 14958 | # never break a container of one of these types | ||||
| 14959 | # because bad things can happen (map1.t) | ||||
| 14960 | my $dd = shift; | ||||
| 14961 | $is_sort_map_grep{ $container_type[$dd] }; | ||||
| 14962 | } | ||||
| 14963 | |||||
| 14964 | sub scan_list { | ||||
| 14965 | |||||
| 14966 | # This routine is responsible for setting line breaks for all lists, | ||||
| 14967 | # so that hierarchical structure can be displayed and so that list | ||||
| 14968 | # items can be vertically aligned. The output of this routine is | ||||
| 14969 | # stored in the array @forced_breakpoint_to_go, which is used to set | ||||
| 14970 | # final breakpoints. | ||||
| 14971 | |||||
| 14972 | $starting_depth = $nesting_depth_to_go[0]; | ||||
| 14973 | |||||
| 14974 | $block_type = ' '; | ||||
| 14975 | $current_depth = $starting_depth; | ||||
| 14976 | $i = -1; | ||||
| 14977 | $last_colon_sequence_number = -1; | ||||
| 14978 | $last_nonblank_token = ';'; | ||||
| 14979 | $last_nonblank_type = ';'; | ||||
| 14980 | $last_nonblank_block_type = ' '; | ||||
| 14981 | $last_old_breakpoint_count = 0; | ||||
| 14982 | $minimum_depth = $current_depth + 1; # forces update in check below | ||||
| 14983 | $old_breakpoint_count = 0; | ||||
| 14984 | $starting_breakpoint_count = $forced_breakpoint_count; | ||||
| 14985 | $token = ';'; | ||||
| 14986 | $type = ';'; | ||||
| 14987 | $type_sequence = ''; | ||||
| 14988 | |||||
| 14989 | my $total_depth_variation = 0; | ||||
| 14990 | my $i_old_assignment_break; | ||||
| 14991 | my $depth_last = $starting_depth; | ||||
| 14992 | |||||
| 14993 | check_for_new_minimum_depth($current_depth); | ||||
| 14994 | |||||
| 14995 | my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; | ||||
| 14996 | my $want_previous_breakpoint = -1; | ||||
| 14997 | |||||
| 14998 | my $saw_good_breakpoint; | ||||
| 14999 | my $i_line_end = -1; | ||||
| 15000 | my $i_line_start = -1; | ||||
| 15001 | |||||
| 15002 | # loop over all tokens in this batch | ||||
| 15003 | while ( ++$i <= $max_index_to_go ) { | ||||
| 15004 | if ( $type ne 'b' ) { | ||||
| 15005 | $i_last_nonblank_token = $i - 1; | ||||
| 15006 | $last_nonblank_type = $type; | ||||
| 15007 | $last_nonblank_token = $token; | ||||
| 15008 | $last_nonblank_block_type = $block_type; | ||||
| 15009 | } ## end if ( $type ne 'b' ) | ||||
| 15010 | $type = $types_to_go[$i]; | ||||
| 15011 | $block_type = $block_type_to_go[$i]; | ||||
| 15012 | $token = $tokens_to_go[$i]; | ||||
| 15013 | $type_sequence = $type_sequence_to_go[$i]; | ||||
| 15014 | my $next_type = $types_to_go[ $i + 1 ]; | ||||
| 15015 | my $next_token = $tokens_to_go[ $i + 1 ]; | ||||
| 15016 | my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); | ||||
| 15017 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
| 15018 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
| 15019 | $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; | ||||
| 15020 | |||||
| 15021 | # set break if flag was set | ||||
| 15022 | if ( $want_previous_breakpoint >= 0 ) { | ||||
| 15023 | set_forced_breakpoint($want_previous_breakpoint); | ||||
| 15024 | $want_previous_breakpoint = -1; | ||||
| 15025 | } | ||||
| 15026 | |||||
| 15027 | $last_old_breakpoint_count = $old_breakpoint_count; | ||||
| 15028 | if ( $old_breakpoint_to_go[$i] ) { | ||||
| 15029 | $i_line_end = $i; | ||||
| 15030 | $i_line_start = $i_next_nonblank; | ||||
| 15031 | |||||
| 15032 | $old_breakpoint_count++; | ||||
| 15033 | |||||
| 15034 | # Break before certain keywords if user broke there and | ||||
| 15035 | # this is a 'safe' break point. The idea is to retain | ||||
| 15036 | # any preferred breaks for sequential list operations, | ||||
| 15037 | # like a schwartzian transform. | ||||
| 15038 | if ($rOpts_break_at_old_keyword_breakpoints) { | ||||
| 15039 | if ( | ||||
| 15040 | $next_nonblank_type eq 'k' | ||||
| 15041 | && $is_keyword_returning_list{$next_nonblank_token} | ||||
| 15042 | && ( $type =~ /^[=\)\]\}Riw]$/ | ||||
| 15043 | || $type eq 'k' | ||||
| 15044 | && $is_keyword_returning_list{$token} ) | ||||
| 15045 | ) | ||||
| 15046 | { | ||||
| 15047 | |||||
| 15048 | # we actually have to set this break next time through | ||||
| 15049 | # the loop because if we are at a closing token (such | ||||
| 15050 | # as '}') which forms a one-line block, this break might | ||||
| 15051 | # get undone. | ||||
| 15052 | $want_previous_breakpoint = $i; | ||||
| 15053 | } ## end if ( $next_nonblank_type...) | ||||
| 15054 | } ## end if ($rOpts_break_at_old_keyword_breakpoints) | ||||
| 15055 | |||||
| 15056 | # Break before attributes if user broke there | ||||
| 15057 | if ($rOpts_break_at_old_attribute_breakpoints) { | ||||
| 15058 | if ( $next_nonblank_type eq 'A' ) { | ||||
| 15059 | $want_previous_breakpoint = $i; | ||||
| 15060 | } | ||||
| 15061 | } | ||||
| 15062 | |||||
| 15063 | # remember an = break as possible good break point | ||||
| 15064 | if ( $is_assignment{$type} ) { | ||||
| 15065 | $i_old_assignment_break = $i; | ||||
| 15066 | } | ||||
| 15067 | elsif ( $is_assignment{$next_nonblank_type} ) { | ||||
| 15068 | $i_old_assignment_break = $i_next_nonblank; | ||||
| 15069 | } | ||||
| 15070 | } ## end if ( $old_breakpoint_to_go...) | ||||
| 15071 | next if ( $type eq 'b' ); | ||||
| 15072 | $depth = $nesting_depth_to_go[ $i + 1 ]; | ||||
| 15073 | |||||
| 15074 | $total_depth_variation += abs( $depth - $depth_last ); | ||||
| 15075 | $depth_last = $depth; | ||||
| 15076 | |||||
| 15077 | # safety check - be sure we always break after a comment | ||||
| 15078 | # Shouldn't happen .. an error here probably means that the | ||||
| 15079 | # nobreak flag did not get turned off correctly during | ||||
| 15080 | # formatting. | ||||
| 15081 | if ( $type eq '#' ) { | ||||
| 15082 | if ( $i != $max_index_to_go ) { | ||||
| 15083 | warning( | ||||
| 15084 | "Non-fatal program bug: backup logic needed to break after a comment\n" | ||||
| 15085 | ); | ||||
| 15086 | report_definite_bug(); | ||||
| 15087 | $nobreak_to_go[$i] = 0; | ||||
| 15088 | set_forced_breakpoint($i); | ||||
| 15089 | } ## end if ( $i != $max_index_to_go) | ||||
| 15090 | } ## end if ( $type eq '#' ) | ||||
| 15091 | |||||
| 15092 | # Force breakpoints at certain tokens in long lines. | ||||
| 15093 | # Note that such breakpoints will be undone later if these tokens | ||||
| 15094 | # are fully contained within parens on a line. | ||||
| 15095 | if ( | ||||
| 15096 | |||||
| 15097 | # break before a keyword within a line | ||||
| 15098 | $type eq 'k' | ||||
| 15099 | && $i > 0 | ||||
| 15100 | |||||
| 15101 | # if one of these keywords: | ||||
| 15102 | && $token =~ /^(if|unless|while|until|for)$/ | ||||
| 15103 | |||||
| 15104 | # but do not break at something like '1 while' | ||||
| 15105 | && ( $last_nonblank_type ne 'n' || $i > 2 ) | ||||
| 15106 | |||||
| 15107 | # and let keywords follow a closing 'do' brace | ||||
| 15108 | && $last_nonblank_block_type ne 'do' | ||||
| 15109 | |||||
| 15110 | && ( | ||||
| 15111 | $is_long_line | ||||
| 15112 | |||||
| 15113 | # or container is broken (by side-comment, etc) | ||||
| 15114 | || ( $next_nonblank_token eq '(' | ||||
| 15115 | && $mate_index_to_go[$i_next_nonblank] < $i ) | ||||
| 15116 | ) | ||||
| 15117 | ) | ||||
| 15118 | { | ||||
| 15119 | set_forced_breakpoint( $i - 1 ); | ||||
| 15120 | } ## end if ( $type eq 'k' && $i...) | ||||
| 15121 | |||||
| 15122 | # remember locations of '||' and '&&' for possible breaks if we | ||||
| 15123 | # decide this is a long logical expression. | ||||
| 15124 | if ( $type eq '||' ) { | ||||
| 15125 | push @{ $rand_or_list[$depth][2] }, $i; | ||||
| 15126 | ++$has_old_logical_breakpoints[$depth] | ||||
| 15127 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15128 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
| 15129 | } ## end if ( $type eq '||' ) | ||||
| 15130 | elsif ( $type eq '&&' ) { | ||||
| 15131 | push @{ $rand_or_list[$depth][3] }, $i; | ||||
| 15132 | ++$has_old_logical_breakpoints[$depth] | ||||
| 15133 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15134 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
| 15135 | } ## end elsif ( $type eq '&&' ) | ||||
| 15136 | elsif ( $type eq 'f' ) { | ||||
| 15137 | push @{ $rfor_semicolon_list[$depth] }, $i; | ||||
| 15138 | } | ||||
| 15139 | elsif ( $type eq 'k' ) { | ||||
| 15140 | if ( $token eq 'and' ) { | ||||
| 15141 | push @{ $rand_or_list[$depth][1] }, $i; | ||||
| 15142 | ++$has_old_logical_breakpoints[$depth] | ||||
| 15143 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15144 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
| 15145 | } ## end if ( $token eq 'and' ) | ||||
| 15146 | |||||
| 15147 | # break immediately at 'or's which are probably not in a logical | ||||
| 15148 | # block -- but we will break in logical breaks below so that | ||||
| 15149 | # they do not add to the forced_breakpoint_count | ||||
| 15150 | elsif ( $token eq 'or' ) { | ||||
| 15151 | push @{ $rand_or_list[$depth][0] }, $i; | ||||
| 15152 | ++$has_old_logical_breakpoints[$depth] | ||||
| 15153 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15154 | && $rOpts_break_at_old_logical_breakpoints ); | ||||
| 15155 | if ( $is_logical_container{ $container_type[$depth] } ) { | ||||
| 15156 | } | ||||
| 15157 | else { | ||||
| 15158 | if ($is_long_line) { set_forced_breakpoint($i) } | ||||
| 15159 | elsif ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15160 | && $rOpts_break_at_old_logical_breakpoints ) | ||||
| 15161 | { | ||||
| 15162 | $saw_good_breakpoint = 1; | ||||
| 15163 | } | ||||
| 15164 | } ## end else [ if ( $is_logical_container...)] | ||||
| 15165 | } ## end elsif ( $token eq 'or' ) | ||||
| 15166 | elsif ( $token eq 'if' || $token eq 'unless' ) { | ||||
| 15167 | push @{ $rand_or_list[$depth][4] }, $i; | ||||
| 15168 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15169 | && $rOpts_break_at_old_logical_breakpoints ) | ||||
| 15170 | { | ||||
| 15171 | set_forced_breakpoint($i); | ||||
| 15172 | } | ||||
| 15173 | } ## end elsif ( $token eq 'if' ||...) | ||||
| 15174 | } ## end elsif ( $type eq 'k' ) | ||||
| 15175 | elsif ( $is_assignment{$type} ) { | ||||
| 15176 | $i_equals[$depth] = $i; | ||||
| 15177 | } | ||||
| 15178 | |||||
| 15179 | if ($type_sequence) { | ||||
| 15180 | |||||
| 15181 | # handle any postponed closing breakpoints | ||||
| 15182 | if ( $token =~ /^[\)\]\}\:]$/ ) { | ||||
| 15183 | if ( $type eq ':' ) { | ||||
| 15184 | $last_colon_sequence_number = $type_sequence; | ||||
| 15185 | |||||
| 15186 | # retain break at a ':' line break | ||||
| 15187 | if ( ( $i == $i_line_start || $i == $i_line_end ) | ||||
| 15188 | && $rOpts_break_at_old_ternary_breakpoints ) | ||||
| 15189 | { | ||||
| 15190 | |||||
| 15191 | set_forced_breakpoint($i); | ||||
| 15192 | |||||
| 15193 | # break at previous '=' | ||||
| 15194 | if ( $i_equals[$depth] > 0 ) { | ||||
| 15195 | set_forced_breakpoint( $i_equals[$depth] ); | ||||
| 15196 | $i_equals[$depth] = -1; | ||||
| 15197 | } | ||||
| 15198 | } ## end if ( ( $i == $i_line_start...)) | ||||
| 15199 | } ## end if ( $type eq ':' ) | ||||
| 15200 | if ( defined( $postponed_breakpoint{$type_sequence} ) ) { | ||||
| 15201 | my $inc = ( $type eq ':' ) ? 0 : 1; | ||||
| 15202 | set_forced_breakpoint( $i - $inc ); | ||||
| 15203 | delete $postponed_breakpoint{$type_sequence}; | ||||
| 15204 | } | ||||
| 15205 | } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) | ||||
| 15206 | |||||
| 15207 | # set breaks at ?/: if they will get separated (and are | ||||
| 15208 | # not a ?/: chain), or if the '?' is at the end of the | ||||
| 15209 | # line | ||||
| 15210 | elsif ( $token eq '?' ) { | ||||
| 15211 | my $i_colon = $mate_index_to_go[$i]; | ||||
| 15212 | if ( | ||||
| 15213 | $i_colon <= 0 # the ':' is not in this batch | ||||
| 15214 | || $i == 0 # this '?' is the first token of the line | ||||
| 15215 | || $i == | ||||
| 15216 | $max_index_to_go # or this '?' is the last token | ||||
| 15217 | ) | ||||
| 15218 | { | ||||
| 15219 | |||||
| 15220 | # don't break at a '?' if preceded by ':' on | ||||
| 15221 | # this line of previous ?/: pair on this line. | ||||
| 15222 | # This is an attempt to preserve a chain of ?/: | ||||
| 15223 | # expressions (elsif2.t). And don't break if | ||||
| 15224 | # this has a side comment. | ||||
| 15225 | set_forced_breakpoint($i) | ||||
| 15226 | unless ( | ||||
| 15227 | $type_sequence == ( | ||||
| 15228 | $last_colon_sequence_number + | ||||
| 15229 | TYPE_SEQUENCE_INCREMENT | ||||
| 15230 | ) | ||||
| 15231 | || $tokens_to_go[$max_index_to_go] eq '#' | ||||
| 15232 | ); | ||||
| 15233 | set_closing_breakpoint($i); | ||||
| 15234 | } ## end if ( $i_colon <= 0 ||...) | ||||
| 15235 | } ## end elsif ( $token eq '?' ) | ||||
| 15236 | } ## end if ($type_sequence) | ||||
| 15237 | |||||
| 15238 | #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; | ||||
| 15239 | |||||
| 15240 | #------------------------------------------------------------ | ||||
| 15241 | # Handle Increasing Depth.. | ||||
| 15242 | # | ||||
| 15243 | # prepare for a new list when depth increases | ||||
| 15244 | # token $i is a '(','{', or '[' | ||||
| 15245 | #------------------------------------------------------------ | ||||
| 15246 | if ( $depth > $current_depth ) { | ||||
| 15247 | |||||
| 15248 | $breakpoint_stack[$depth] = $forced_breakpoint_count; | ||||
| 15249 | $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; | ||||
| 15250 | $has_broken_sublist[$depth] = 0; | ||||
| 15251 | $identifier_count_stack[$depth] = 0; | ||||
| 15252 | $index_before_arrow[$depth] = -1; | ||||
| 15253 | $interrupted_list[$depth] = 0; | ||||
| 15254 | $item_count_stack[$depth] = 0; | ||||
| 15255 | $last_comma_index[$depth] = undef; | ||||
| 15256 | $last_dot_index[$depth] = undef; | ||||
| 15257 | $last_nonblank_type[$depth] = $last_nonblank_type; | ||||
| 15258 | $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; | ||||
| 15259 | $opening_structure_index_stack[$depth] = $i; | ||||
| 15260 | $rand_or_list[$depth] = []; | ||||
| 15261 | $rfor_semicolon_list[$depth] = []; | ||||
| 15262 | $i_equals[$depth] = -1; | ||||
| 15263 | $want_comma_break[$depth] = 0; | ||||
| 15264 | $container_type[$depth] = | ||||
| 15265 | ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) | ||||
| 15266 | ? $last_nonblank_token | ||||
| 15267 | : ""; | ||||
| 15268 | $has_old_logical_breakpoints[$depth] = 0; | ||||
| 15269 | |||||
| 15270 | # if line ends here then signal closing token to break | ||||
| 15271 | if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) | ||||
| 15272 | { | ||||
| 15273 | set_closing_breakpoint($i); | ||||
| 15274 | } | ||||
| 15275 | |||||
| 15276 | # Not all lists of values should be vertically aligned.. | ||||
| 15277 | $dont_align[$depth] = | ||||
| 15278 | |||||
| 15279 | # code BLOCKS are handled at a higher level | ||||
| 15280 | ( $block_type ne "" ) | ||||
| 15281 | |||||
| 15282 | # certain paren lists | ||||
| 15283 | || ( $type eq '(' ) && ( | ||||
| 15284 | |||||
| 15285 | # it does not usually look good to align a list of | ||||
| 15286 | # identifiers in a parameter list, as in: | ||||
| 15287 | # my($var1, $var2, ...) | ||||
| 15288 | # (This test should probably be refined, for now I'm just | ||||
| 15289 | # testing for any keyword) | ||||
| 15290 | ( $last_nonblank_type eq 'k' ) | ||||
| 15291 | |||||
| 15292 | # a trailing '(' usually indicates a non-list | ||||
| 15293 | || ( $next_nonblank_type eq '(' ) | ||||
| 15294 | ); | ||||
| 15295 | |||||
| 15296 | # patch to outdent opening brace of long if/for/.. | ||||
| 15297 | # statements (like this one). See similar coding in | ||||
| 15298 | # set_continuation breaks. We have also catch it here for | ||||
| 15299 | # short line fragments which otherwise will not go through | ||||
| 15300 | # set_continuation_breaks. | ||||
| 15301 | if ( | ||||
| 15302 | $block_type | ||||
| 15303 | |||||
| 15304 | # if we have the ')' but not its '(' in this batch.. | ||||
| 15305 | && ( $last_nonblank_token eq ')' ) | ||||
| 15306 | && $mate_index_to_go[$i_last_nonblank_token] < 0 | ||||
| 15307 | |||||
| 15308 | # and user wants brace to left | ||||
| 15309 | && !$rOpts->{'opening-brace-always-on-right'} | ||||
| 15310 | |||||
| 15311 | && ( $type eq '{' ) # should be true | ||||
| 15312 | && ( $token eq '{' ) # should be true | ||||
| 15313 | ) | ||||
| 15314 | { | ||||
| 15315 | set_forced_breakpoint( $i - 1 ); | ||||
| 15316 | } ## end if ( $block_type && ( ...)) | ||||
| 15317 | } ## end if ( $depth > $current_depth) | ||||
| 15318 | |||||
| 15319 | #------------------------------------------------------------ | ||||
| 15320 | # Handle Decreasing Depth.. | ||||
| 15321 | # | ||||
| 15322 | # finish off any old list when depth decreases | ||||
| 15323 | # token $i is a ')','}', or ']' | ||||
| 15324 | #------------------------------------------------------------ | ||||
| 15325 | elsif ( $depth < $current_depth ) { | ||||
| 15326 | |||||
| 15327 | check_for_new_minimum_depth($depth); | ||||
| 15328 | |||||
| 15329 | # force all outer logical containers to break after we see on | ||||
| 15330 | # old breakpoint | ||||
| 15331 | $has_old_logical_breakpoints[$depth] ||= | ||||
| 15332 | $has_old_logical_breakpoints[$current_depth]; | ||||
| 15333 | |||||
| 15334 | # Patch to break between ') {' if the paren list is broken. | ||||
| 15335 | # There is similar logic in set_continuation_breaks for | ||||
| 15336 | # non-broken lists. | ||||
| 15337 | if ( $token eq ')' | ||||
| 15338 | && $next_nonblank_block_type | ||||
| 15339 | && $interrupted_list[$current_depth] | ||||
| 15340 | && $next_nonblank_type eq '{' | ||||
| 15341 | && !$rOpts->{'opening-brace-always-on-right'} ) | ||||
| 15342 | { | ||||
| 15343 | set_forced_breakpoint($i); | ||||
| 15344 | } ## end if ( $token eq ')' && ... | ||||
| 15345 | |||||
| 15346 | #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; | ||||
| 15347 | |||||
| 15348 | # set breaks at commas if necessary | ||||
| 15349 | my ( $bp_count, $do_not_break_apart ) = | ||||
| 15350 | set_comma_breakpoints($current_depth); | ||||
| 15351 | |||||
| 15352 | my $i_opening = $opening_structure_index_stack[$current_depth]; | ||||
| 15353 | my $saw_opening_structure = ( $i_opening >= 0 ); | ||||
| 15354 | |||||
| 15355 | # this term is long if we had to break at interior commas.. | ||||
| 15356 | my $is_long_term = $bp_count > 0; | ||||
| 15357 | |||||
| 15358 | # If this is a short container with one or more comma arrows, | ||||
| 15359 | # then we will mark it as a long term to open it if requested. | ||||
| 15360 | # $rOpts_comma_arrow_breakpoints = | ||||
| 15361 | # 0 - open only if comma precedes closing brace | ||||
| 15362 | # 1 - stable: except for one line blocks | ||||
| 15363 | # 2 - try to form 1 line blocks | ||||
| 15364 | # 3 - ignore => | ||||
| 15365 | # 4 - always open up if vt=0 | ||||
| 15366 | # 5 - stable: even for one line blocks if vt=0 | ||||
| 15367 | if ( !$is_long_term | ||||
| 15368 | && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/ | ||||
| 15369 | && $index_before_arrow[ $depth + 1 ] > 0 | ||||
| 15370 | && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } | ||||
| 15371 | ) | ||||
| 15372 | { | ||||
| 15373 | $is_long_term = $rOpts_comma_arrow_breakpoints == 4 | ||||
| 15374 | || ( $rOpts_comma_arrow_breakpoints == 0 | ||||
| 15375 | && $last_nonblank_token eq ',' ) | ||||
| 15376 | || ( $rOpts_comma_arrow_breakpoints == 5 | ||||
| 15377 | && $old_breakpoint_to_go[$i_opening] ); | ||||
| 15378 | } ## end if ( !$is_long_term &&...) | ||||
| 15379 | |||||
| 15380 | # mark term as long if the length between opening and closing | ||||
| 15381 | # parens exceeds allowed line length | ||||
| 15382 | if ( !$is_long_term && $saw_opening_structure ) { | ||||
| 15383 | my $i_opening_minus = find_token_starting_list($i_opening); | ||||
| 15384 | |||||
| 15385 | # Note: we have to allow for one extra space after a | ||||
| 15386 | # closing token so that we do not strand a comma or | ||||
| 15387 | # semicolon, hence the '>=' here (oneline.t) | ||||
| 15388 | $is_long_term = | ||||
| 15389 | excess_line_length( $i_opening_minus, $i ) >= 0; | ||||
| 15390 | } ## end if ( !$is_long_term &&...) | ||||
| 15391 | |||||
| 15392 | # We've set breaks after all comma-arrows. Now we have to | ||||
| 15393 | # undo them if this can be a one-line block | ||||
| 15394 | # (the only breakpoints set will be due to comma-arrows) | ||||
| 15395 | if ( | ||||
| 15396 | |||||
| 15397 | # user doesn't require breaking after all comma-arrows | ||||
| 15398 | ( $rOpts_comma_arrow_breakpoints != 0 ) | ||||
| 15399 | && ( $rOpts_comma_arrow_breakpoints != 4 ) | ||||
| 15400 | |||||
| 15401 | # and if the opening structure is in this batch | ||||
| 15402 | && $saw_opening_structure | ||||
| 15403 | |||||
| 15404 | # and either on the same old line | ||||
| 15405 | && ( | ||||
| 15406 | $old_breakpoint_count_stack[$current_depth] == | ||||
| 15407 | $last_old_breakpoint_count | ||||
| 15408 | |||||
| 15409 | # or user wants to form long blocks with arrows | ||||
| 15410 | || $rOpts_comma_arrow_breakpoints == 2 | ||||
| 15411 | ) | ||||
| 15412 | |||||
| 15413 | # and we made some breakpoints between the opening and closing | ||||
| 15414 | && ( $breakpoint_undo_stack[$current_depth] < | ||||
| 15415 | $forced_breakpoint_undo_count ) | ||||
| 15416 | |||||
| 15417 | # and this block is short enough to fit on one line | ||||
| 15418 | # Note: use < because need 1 more space for possible comma | ||||
| 15419 | && !$is_long_term | ||||
| 15420 | |||||
| 15421 | ) | ||||
| 15422 | { | ||||
| 15423 | undo_forced_breakpoint_stack( | ||||
| 15424 | $breakpoint_undo_stack[$current_depth] ); | ||||
| 15425 | } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) | ||||
| 15426 | |||||
| 15427 | # now see if we have any comma breakpoints left | ||||
| 15428 | my $has_comma_breakpoints = | ||||
| 15429 | ( $breakpoint_stack[$current_depth] != | ||||
| 15430 | $forced_breakpoint_count ); | ||||
| 15431 | |||||
| 15432 | # update broken-sublist flag of the outer container | ||||
| 15433 | $has_broken_sublist[$depth] = | ||||
| 15434 | $has_broken_sublist[$depth] | ||||
| 15435 | || $has_broken_sublist[$current_depth] | ||||
| 15436 | || $is_long_term | ||||
| 15437 | || $has_comma_breakpoints; | ||||
| 15438 | |||||
| 15439 | # Having come to the closing ')', '}', or ']', now we have to decide if we | ||||
| 15440 | # should 'open up' the structure by placing breaks at the opening and | ||||
| 15441 | # closing containers. This is a tricky decision. Here are some of the | ||||
| 15442 | # basic considerations: | ||||
| 15443 | # | ||||
| 15444 | # -If this is a BLOCK container, then any breakpoints will have already | ||||
| 15445 | # been set (and according to user preferences), so we need do nothing here. | ||||
| 15446 | # | ||||
| 15447 | # -If we have a comma-separated list for which we can align the list items, | ||||
| 15448 | # then we need to do so because otherwise the vertical aligner cannot | ||||
| 15449 | # currently do the alignment. | ||||
| 15450 | # | ||||
| 15451 | # -If this container does itself contain a container which has been broken | ||||
| 15452 | # open, then it should be broken open to properly show the structure. | ||||
| 15453 | # | ||||
| 15454 | # -If there is nothing to align, and no other reason to break apart, | ||||
| 15455 | # then do not do it. | ||||
| 15456 | # | ||||
| 15457 | # We will not break open the parens of a long but 'simple' logical expression. | ||||
| 15458 | # For example: | ||||
| 15459 | # | ||||
| 15460 | # This is an example of a simple logical expression and its formatting: | ||||
| 15461 | # | ||||
| 15462 | # if ( $bigwasteofspace1 && $bigwasteofspace2 | ||||
| 15463 | # || $bigwasteofspace3 && $bigwasteofspace4 ) | ||||
| 15464 | # | ||||
| 15465 | # Most people would prefer this than the 'spacey' version: | ||||
| 15466 | # | ||||
| 15467 | # if ( | ||||
| 15468 | # $bigwasteofspace1 && $bigwasteofspace2 | ||||
| 15469 | # || $bigwasteofspace3 && $bigwasteofspace4 | ||||
| 15470 | # ) | ||||
| 15471 | # | ||||
| 15472 | # To illustrate the rules for breaking logical expressions, consider: | ||||
| 15473 | # | ||||
| 15474 | # FULLY DENSE: | ||||
| 15475 | # if ( $opt_excl | ||||
| 15476 | # and ( exists $ids_excl_uc{$id_uc} | ||||
| 15477 | # or grep $id_uc =~ /$_/, @ids_excl_uc )) | ||||
| 15478 | # | ||||
| 15479 | # This is on the verge of being difficult to read. The current default is to | ||||
| 15480 | # open it up like this: | ||||
| 15481 | # | ||||
| 15482 | # DEFAULT: | ||||
| 15483 | # if ( | ||||
| 15484 | # $opt_excl | ||||
| 15485 | # and ( exists $ids_excl_uc{$id_uc} | ||||
| 15486 | # or grep $id_uc =~ /$_/, @ids_excl_uc ) | ||||
| 15487 | # ) | ||||
| 15488 | # | ||||
| 15489 | # This is a compromise which tries to avoid being too dense and to spacey. | ||||
| 15490 | # A more spaced version would be: | ||||
| 15491 | # | ||||
| 15492 | # SPACEY: | ||||
| 15493 | # if ( | ||||
| 15494 | # $opt_excl | ||||
| 15495 | # and ( | ||||
| 15496 | # exists $ids_excl_uc{$id_uc} | ||||
| 15497 | # or grep $id_uc =~ /$_/, @ids_excl_uc | ||||
| 15498 | # ) | ||||
| 15499 | # ) | ||||
| 15500 | # | ||||
| 15501 | # Some people might prefer the spacey version -- an option could be added. The | ||||
| 15502 | # innermost expression contains a long block '( exists $ids_... ')'. | ||||
| 15503 | # | ||||
| 15504 | # Here is how the logic goes: We will force a break at the 'or' that the | ||||
| 15505 | # innermost expression contains, but we will not break apart its opening and | ||||
| 15506 | # closing containers because (1) it contains no multi-line sub-containers itself, | ||||
| 15507 | # and (2) there is no alignment to be gained by breaking it open like this | ||||
| 15508 | # | ||||
| 15509 | # and ( | ||||
| 15510 | # exists $ids_excl_uc{$id_uc} | ||||
| 15511 | # or grep $id_uc =~ /$_/, @ids_excl_uc | ||||
| 15512 | # ) | ||||
| 15513 | # | ||||
| 15514 | # (although this looks perfectly ok and might be good for long expressions). The | ||||
| 15515 | # outer 'if' container, though, contains a broken sub-container, so it will be | ||||
| 15516 | # broken open to avoid too much density. Also, since it contains no 'or's, there | ||||
| 15517 | # will be a forced break at its 'and'. | ||||
| 15518 | |||||
| 15519 | # set some flags telling something about this container.. | ||||
| 15520 | my $is_simple_logical_expression = 0; | ||||
| 15521 | if ( $item_count_stack[$current_depth] == 0 | ||||
| 15522 | && $saw_opening_structure | ||||
| 15523 | && $tokens_to_go[$i_opening] eq '(' | ||||
| 15524 | && $is_logical_container{ $container_type[$current_depth] } | ||||
| 15525 | ) | ||||
| 15526 | { | ||||
| 15527 | |||||
| 15528 | # This seems to be a simple logical expression with | ||||
| 15529 | # no existing breakpoints. Set a flag to prevent | ||||
| 15530 | # opening it up. | ||||
| 15531 | if ( !$has_comma_breakpoints ) { | ||||
| 15532 | $is_simple_logical_expression = 1; | ||||
| 15533 | } | ||||
| 15534 | |||||
| 15535 | # This seems to be a simple logical expression with | ||||
| 15536 | # breakpoints (broken sublists, for example). Break | ||||
| 15537 | # at all 'or's and '||'s. | ||||
| 15538 | else { | ||||
| 15539 | set_logical_breakpoints($current_depth); | ||||
| 15540 | } | ||||
| 15541 | } ## end if ( $item_count_stack...) | ||||
| 15542 | |||||
| 15543 | if ( $is_long_term | ||||
| 15544 | && @{ $rfor_semicolon_list[$current_depth] } ) | ||||
| 15545 | { | ||||
| 15546 | set_for_semicolon_breakpoints($current_depth); | ||||
| 15547 | |||||
| 15548 | # open up a long 'for' or 'foreach' container to allow | ||||
| 15549 | # leading term alignment unless -lp is used. | ||||
| 15550 | $has_comma_breakpoints = 1 | ||||
| 15551 | unless $rOpts_line_up_parentheses; | ||||
| 15552 | } ## end if ( $is_long_term && ...) | ||||
| 15553 | |||||
| 15554 | if ( | ||||
| 15555 | |||||
| 15556 | # breaks for code BLOCKS are handled at a higher level | ||||
| 15557 | !$block_type | ||||
| 15558 | |||||
| 15559 | # we do not need to break at the top level of an 'if' | ||||
| 15560 | # type expression | ||||
| 15561 | && !$is_simple_logical_expression | ||||
| 15562 | |||||
| 15563 | ## modification to keep ': (' containers vertically tight; | ||||
| 15564 | ## but probably better to let user set -vt=1 to avoid | ||||
| 15565 | ## inconsistency with other paren types | ||||
| 15566 | ## && ($container_type[$current_depth] ne ':') | ||||
| 15567 | |||||
| 15568 | # otherwise, we require one of these reasons for breaking: | ||||
| 15569 | && ( | ||||
| 15570 | |||||
| 15571 | # - this term has forced line breaks | ||||
| 15572 | $has_comma_breakpoints | ||||
| 15573 | |||||
| 15574 | # - the opening container is separated from this batch | ||||
| 15575 | # for some reason (comment, blank line, code block) | ||||
| 15576 | # - this is a non-paren container spanning multiple lines | ||||
| 15577 | || !$saw_opening_structure | ||||
| 15578 | |||||
| 15579 | # - this is a long block contained in another breakable | ||||
| 15580 | # container | ||||
| 15581 | || ( $is_long_term | ||||
| 15582 | && $container_environment_to_go[$i_opening] ne | ||||
| 15583 | 'BLOCK' ) | ||||
| 15584 | ) | ||||
| 15585 | ) | ||||
| 15586 | { | ||||
| 15587 | |||||
| 15588 | # For -lp option, we must put a breakpoint before | ||||
| 15589 | # the token which has been identified as starting | ||||
| 15590 | # this indentation level. This is necessary for | ||||
| 15591 | # proper alignment. | ||||
| 15592 | if ( $rOpts_line_up_parentheses && $saw_opening_structure ) | ||||
| 15593 | { | ||||
| 15594 | my $item = $leading_spaces_to_go[ $i_opening + 1 ]; | ||||
| 15595 | if ( $i_opening + 1 < $max_index_to_go | ||||
| 15596 | && $types_to_go[ $i_opening + 1 ] eq 'b' ) | ||||
| 15597 | { | ||||
| 15598 | $item = $leading_spaces_to_go[ $i_opening + 2 ]; | ||||
| 15599 | } | ||||
| 15600 | if ( defined($item) ) { | ||||
| 15601 | my $i_start_2 = $item->get_STARTING_INDEX(); | ||||
| 15602 | if ( | ||||
| 15603 | defined($i_start_2) | ||||
| 15604 | |||||
| 15605 | # we are breaking after an opening brace, paren, | ||||
| 15606 | # so don't break before it too | ||||
| 15607 | && $i_start_2 ne $i_opening | ||||
| 15608 | ) | ||||
| 15609 | { | ||||
| 15610 | |||||
| 15611 | # Only break for breakpoints at the same | ||||
| 15612 | # indentation level as the opening paren | ||||
| 15613 | my $test1 = $nesting_depth_to_go[$i_opening]; | ||||
| 15614 | my $test2 = $nesting_depth_to_go[$i_start_2]; | ||||
| 15615 | if ( $test2 == $test1 ) { | ||||
| 15616 | set_forced_breakpoint( $i_start_2 - 1 ); | ||||
| 15617 | } | ||||
| 15618 | } ## end if ( defined($i_start_2...)) | ||||
| 15619 | } ## end if ( defined($item) ) | ||||
| 15620 | } ## end if ( $rOpts_line_up_parentheses...) | ||||
| 15621 | |||||
| 15622 | # break after opening structure. | ||||
| 15623 | # note: break before closing structure will be automatic | ||||
| 15624 | if ( $minimum_depth <= $current_depth ) { | ||||
| 15625 | |||||
| 15626 | set_forced_breakpoint($i_opening) | ||||
| 15627 | unless ( $do_not_break_apart | ||||
| 15628 | || is_unbreakable_container($current_depth) ); | ||||
| 15629 | |||||
| 15630 | # break at ',' of lower depth level before opening token | ||||
| 15631 | if ( $last_comma_index[$depth] ) { | ||||
| 15632 | set_forced_breakpoint( $last_comma_index[$depth] ); | ||||
| 15633 | } | ||||
| 15634 | |||||
| 15635 | # break at '.' of lower depth level before opening token | ||||
| 15636 | if ( $last_dot_index[$depth] ) { | ||||
| 15637 | set_forced_breakpoint( $last_dot_index[$depth] ); | ||||
| 15638 | } | ||||
| 15639 | |||||
| 15640 | # break before opening structure if preceded by another | ||||
| 15641 | # closing structure and a comma. This is normally | ||||
| 15642 | # done by the previous closing brace, but not | ||||
| 15643 | # if it was a one-line block. | ||||
| 15644 | if ( $i_opening > 2 ) { | ||||
| 15645 | my $i_prev = | ||||
| 15646 | ( $types_to_go[ $i_opening - 1 ] eq 'b' ) | ||||
| 15647 | ? $i_opening - 2 | ||||
| 15648 | : $i_opening - 1; | ||||
| 15649 | |||||
| 15650 | if ( $types_to_go[$i_prev] eq ',' | ||||
| 15651 | && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) | ||||
| 15652 | { | ||||
| 15653 | set_forced_breakpoint($i_prev); | ||||
| 15654 | } | ||||
| 15655 | |||||
| 15656 | # also break before something like ':(' or '?(' | ||||
| 15657 | # if appropriate. | ||||
| 15658 | elsif ( | ||||
| 15659 | $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) | ||||
| 15660 | { | ||||
| 15661 | my $token_prev = $tokens_to_go[$i_prev]; | ||||
| 15662 | if ( $want_break_before{$token_prev} ) { | ||||
| 15663 | set_forced_breakpoint($i_prev); | ||||
| 15664 | } | ||||
| 15665 | } ## end elsif ( $types_to_go[$i_prev...]) | ||||
| 15666 | } ## end if ( $i_opening > 2 ) | ||||
| 15667 | } ## end if ( $minimum_depth <=...) | ||||
| 15668 | |||||
| 15669 | # break after comma following closing structure | ||||
| 15670 | if ( $next_type eq ',' ) { | ||||
| 15671 | set_forced_breakpoint( $i + 1 ); | ||||
| 15672 | } | ||||
| 15673 | |||||
| 15674 | # break before an '=' following closing structure | ||||
| 15675 | if ( | ||||
| 15676 | $is_assignment{$next_nonblank_type} | ||||
| 15677 | && ( $breakpoint_stack[$current_depth] != | ||||
| 15678 | $forced_breakpoint_count ) | ||||
| 15679 | ) | ||||
| 15680 | { | ||||
| 15681 | set_forced_breakpoint($i); | ||||
| 15682 | } ## end if ( $is_assignment{$next_nonblank_type...}) | ||||
| 15683 | |||||
| 15684 | # break at any comma before the opening structure Added | ||||
| 15685 | # for -lp, but seems to be good in general. It isn't | ||||
| 15686 | # obvious how far back to look; the '5' below seems to | ||||
| 15687 | # work well and will catch the comma in something like | ||||
| 15688 | # push @list, myfunc( $param, $param, .. | ||||
| 15689 | |||||
| 15690 | my $icomma = $last_comma_index[$depth]; | ||||
| 15691 | if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { | ||||
| 15692 | unless ( $forced_breakpoint_to_go[$icomma] ) { | ||||
| 15693 | set_forced_breakpoint($icomma); | ||||
| 15694 | } | ||||
| 15695 | } | ||||
| 15696 | } # end logic to open up a container | ||||
| 15697 | |||||
| 15698 | # Break open a logical container open if it was already open | ||||
| 15699 | elsif ($is_simple_logical_expression | ||||
| 15700 | && $has_old_logical_breakpoints[$current_depth] ) | ||||
| 15701 | { | ||||
| 15702 | set_logical_breakpoints($current_depth); | ||||
| 15703 | } | ||||
| 15704 | |||||
| 15705 | # Handle long container which does not get opened up | ||||
| 15706 | elsif ($is_long_term) { | ||||
| 15707 | |||||
| 15708 | # must set fake breakpoint to alert outer containers that | ||||
| 15709 | # they are complex | ||||
| 15710 | set_fake_breakpoint(); | ||||
| 15711 | } ## end elsif ($is_long_term) | ||||
| 15712 | |||||
| 15713 | } ## end elsif ( $depth < $current_depth) | ||||
| 15714 | |||||
| 15715 | #------------------------------------------------------------ | ||||
| 15716 | # Handle this token | ||||
| 15717 | #------------------------------------------------------------ | ||||
| 15718 | |||||
| 15719 | $current_depth = $depth; | ||||
| 15720 | |||||
| 15721 | # handle comma-arrow | ||||
| 15722 | if ( $type eq '=>' ) { | ||||
| 15723 | next if ( $last_nonblank_type eq '=>' ); | ||||
| 15724 | next if $rOpts_break_at_old_comma_breakpoints; | ||||
| 15725 | next if $rOpts_comma_arrow_breakpoints == 3; | ||||
| 15726 | $want_comma_break[$depth] = 1; | ||||
| 15727 | $index_before_arrow[$depth] = $i_last_nonblank_token; | ||||
| 15728 | next; | ||||
| 15729 | } ## end if ( $type eq '=>' ) | ||||
| 15730 | |||||
| 15731 | elsif ( $type eq '.' ) { | ||||
| 15732 | $last_dot_index[$depth] = $i; | ||||
| 15733 | } | ||||
| 15734 | |||||
| 15735 | # Turn off alignment if we are sure that this is not a list | ||||
| 15736 | # environment. To be safe, we will do this if we see certain | ||||
| 15737 | # non-list tokens, such as ';', and also the environment is | ||||
| 15738 | # not a list. Note that '=' could be in any of the = operators | ||||
| 15739 | # (lextest.t). We can't just use the reported environment | ||||
| 15740 | # because it can be incorrect in some cases. | ||||
| 15741 | elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) | ||||
| 15742 | && $container_environment_to_go[$i] ne 'LIST' ) | ||||
| 15743 | { | ||||
| 15744 | $dont_align[$depth] = 1; | ||||
| 15745 | $want_comma_break[$depth] = 0; | ||||
| 15746 | $index_before_arrow[$depth] = -1; | ||||
| 15747 | } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) | ||||
| 15748 | |||||
| 15749 | # now just handle any commas | ||||
| 15750 | next unless ( $type eq ',' ); | ||||
| 15751 | |||||
| 15752 | $last_dot_index[$depth] = undef; | ||||
| 15753 | $last_comma_index[$depth] = $i; | ||||
| 15754 | |||||
| 15755 | # break here if this comma follows a '=>' | ||||
| 15756 | # but not if there is a side comment after the comma | ||||
| 15757 | if ( $want_comma_break[$depth] ) { | ||||
| 15758 | |||||
| 15759 | if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { | ||||
| 15760 | if ($rOpts_comma_arrow_breakpoints) { | ||||
| 15761 | $want_comma_break[$depth] = 0; | ||||
| 15762 | ##$index_before_arrow[$depth] = -1; | ||||
| 15763 | next; | ||||
| 15764 | } | ||||
| 15765 | } | ||||
| 15766 | |||||
| 15767 | set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); | ||||
| 15768 | |||||
| 15769 | # break before the previous token if it looks safe | ||||
| 15770 | # Example of something that we will not try to break before: | ||||
| 15771 | # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, | ||||
| 15772 | # Also we don't want to break at a binary operator (like +): | ||||
| 15773 | # $c->createOval( | ||||
| 15774 | # $x + $R, $y + | ||||
| 15775 | # $R => $x - $R, | ||||
| 15776 | # $y - $R, -fill => 'black', | ||||
| 15777 | # ); | ||||
| 15778 | my $ibreak = $index_before_arrow[$depth] - 1; | ||||
| 15779 | if ( $ibreak > 0 | ||||
| 15780 | && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) | ||||
| 15781 | { | ||||
| 15782 | if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } | ||||
| 15783 | if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } | ||||
| 15784 | if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { | ||||
| 15785 | |||||
| 15786 | # don't break pointer calls, such as the following: | ||||
| 15787 | # File::Spec->curdir => 1, | ||||
| 15788 | # (This is tokenized as adjacent 'w' tokens) | ||||
| 15789 | ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { | ||||
| 15790 | |||||
| 15791 | # And don't break before a comma, as in the following: | ||||
| 15792 | # ( LONGER_THAN,=> 1, | ||||
| 15793 | # EIGHTY_CHARACTERS,=> 2, | ||||
| 15794 | # CAUSES_FORMATTING,=> 3, | ||||
| 15795 | # LIKE_THIS,=> 4, | ||||
| 15796 | # ); | ||||
| 15797 | # This example is for -tso but should be general rule | ||||
| 15798 | if ( $tokens_to_go[ $ibreak + 1 ] ne '->' | ||||
| 15799 | && $tokens_to_go[ $ibreak + 1 ] ne ',' ) | ||||
| 15800 | { | ||||
| 15801 | set_forced_breakpoint($ibreak); | ||||
| 15802 | } | ||||
| 15803 | } ## end if ( $types_to_go[$ibreak...]) | ||||
| 15804 | } ## end if ( $ibreak > 0 && $tokens_to_go...) | ||||
| 15805 | |||||
| 15806 | $want_comma_break[$depth] = 0; | ||||
| 15807 | $index_before_arrow[$depth] = -1; | ||||
| 15808 | |||||
| 15809 | # handle list which mixes '=>'s and ','s: | ||||
| 15810 | # treat any list items so far as an interrupted list | ||||
| 15811 | $interrupted_list[$depth] = 1; | ||||
| 15812 | next; | ||||
| 15813 | } ## end if ( $want_comma_break...) | ||||
| 15814 | |||||
| 15815 | # break after all commas above starting depth | ||||
| 15816 | if ( $depth < $starting_depth && !$dont_align[$depth] ) { | ||||
| 15817 | set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); | ||||
| 15818 | next; | ||||
| 15819 | } | ||||
| 15820 | |||||
| 15821 | # add this comma to the list.. | ||||
| 15822 | my $item_count = $item_count_stack[$depth]; | ||||
| 15823 | if ( $item_count == 0 ) { | ||||
| 15824 | |||||
| 15825 | # but do not form a list with no opening structure | ||||
| 15826 | # for example: | ||||
| 15827 | |||||
| 15828 | # open INFILE_COPY, ">$input_file_copy" | ||||
| 15829 | # or die ("very long message"); | ||||
| 15830 | |||||
| 15831 | if ( ( $opening_structure_index_stack[$depth] < 0 ) | ||||
| 15832 | && $container_environment_to_go[$i] eq 'BLOCK' ) | ||||
| 15833 | { | ||||
| 15834 | $dont_align[$depth] = 1; | ||||
| 15835 | } | ||||
| 15836 | } ## end if ( $item_count == 0 ) | ||||
| 15837 | |||||
| 15838 | $comma_index[$depth][$item_count] = $i; | ||||
| 15839 | ++$item_count_stack[$depth]; | ||||
| 15840 | if ( $last_nonblank_type =~ /^[iR\]]$/ ) { | ||||
| 15841 | $identifier_count_stack[$depth]++; | ||||
| 15842 | } | ||||
| 15843 | } ## end while ( ++$i <= $max_index_to_go) | ||||
| 15844 | |||||
| 15845 | #------------------------------------------- | ||||
| 15846 | # end of loop over all tokens in this batch | ||||
| 15847 | #------------------------------------------- | ||||
| 15848 | |||||
| 15849 | # set breaks for any unfinished lists .. | ||||
| 15850 | for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { | ||||
| 15851 | |||||
| 15852 | $interrupted_list[$dd] = 1; | ||||
| 15853 | $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); | ||||
| 15854 | set_comma_breakpoints($dd); | ||||
| 15855 | set_logical_breakpoints($dd) | ||||
| 15856 | if ( $has_old_logical_breakpoints[$dd] ); | ||||
| 15857 | set_for_semicolon_breakpoints($dd); | ||||
| 15858 | |||||
| 15859 | # break open container... | ||||
| 15860 | my $i_opening = $opening_structure_index_stack[$dd]; | ||||
| 15861 | set_forced_breakpoint($i_opening) | ||||
| 15862 | unless ( | ||||
| 15863 | is_unbreakable_container($dd) | ||||
| 15864 | |||||
| 15865 | # Avoid a break which would place an isolated ' or " | ||||
| 15866 | # on a line | ||||
| 15867 | || ( $type eq 'Q' | ||||
| 15868 | && $i_opening >= $max_index_to_go - 2 | ||||
| 15869 | && $token =~ /^['"]$/ ) | ||||
| 15870 | ); | ||||
| 15871 | } ## end for ( my $dd = $current_depth...) | ||||
| 15872 | |||||
| 15873 | # Return a flag indicating if the input file had some good breakpoints. | ||||
| 15874 | # This flag will be used to force a break in a line shorter than the | ||||
| 15875 | # allowed line length. | ||||
| 15876 | if ( $has_old_logical_breakpoints[$current_depth] ) { | ||||
| 15877 | $saw_good_breakpoint = 1; | ||||
| 15878 | } | ||||
| 15879 | |||||
| 15880 | # A complex line with one break at an = has a good breakpoint. | ||||
| 15881 | # This is not complex ($total_depth_variation=0): | ||||
| 15882 | # $res1 | ||||
| 15883 | # = 10; | ||||
| 15884 | # | ||||
| 15885 | # This is complex ($total_depth_variation=6): | ||||
| 15886 | # $res2 = | ||||
| 15887 | # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); | ||||
| 15888 | elsif ($i_old_assignment_break | ||||
| 15889 | && $total_depth_variation > 4 | ||||
| 15890 | && $old_breakpoint_count == 1 ) | ||||
| 15891 | { | ||||
| 15892 | $saw_good_breakpoint = 1; | ||||
| 15893 | } ## end elsif ( $i_old_assignment_break...) | ||||
| 15894 | |||||
| 15895 | return $saw_good_breakpoint; | ||||
| 15896 | } ## end sub scan_list | ||||
| 15897 | } # end scan_list | ||||
| 15898 | |||||
| 15899 | sub find_token_starting_list { | ||||
| 15900 | |||||
| 15901 | # When testing to see if a block will fit on one line, some | ||||
| 15902 | # previous token(s) may also need to be on the line; particularly | ||||
| 15903 | # if this is a sub call. So we will look back at least one | ||||
| 15904 | # token. NOTE: This isn't perfect, but not critical, because | ||||
| 15905 | # if we mis-identify a block, it will be wrapped and therefore | ||||
| 15906 | # fixed the next time it is formatted. | ||||
| 15907 | my $i_opening_paren = shift; | ||||
| 15908 | my $i_opening_minus = $i_opening_paren; | ||||
| 15909 | my $im1 = $i_opening_paren - 1; | ||||
| 15910 | my $im2 = $i_opening_paren - 2; | ||||
| 15911 | my $im3 = $i_opening_paren - 3; | ||||
| 15912 | my $typem1 = $types_to_go[$im1]; | ||||
| 15913 | my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; | ||||
| 15914 | if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { | ||||
| 15915 | $i_opening_minus = $i_opening_paren; | ||||
| 15916 | } | ||||
| 15917 | elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { | ||||
| 15918 | $i_opening_minus = $im1 if $im1 >= 0; | ||||
| 15919 | |||||
| 15920 | # walk back to improve length estimate | ||||
| 15921 | for ( my $j = $im1 ; $j >= 0 ; $j-- ) { | ||||
| 15922 | last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); | ||||
| 15923 | $i_opening_minus = $j; | ||||
| 15924 | } | ||||
| 15925 | if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } | ||||
| 15926 | } | ||||
| 15927 | elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } | ||||
| 15928 | elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { | ||||
| 15929 | $i_opening_minus = $im2; | ||||
| 15930 | } | ||||
| 15931 | return $i_opening_minus; | ||||
| 15932 | } | ||||
| 15933 | |||||
| 15934 | { # begin set_comma_breakpoints_do | ||||
| 15935 | |||||
| 15936 | 2 | 100ns | my %is_keyword_with_special_leading_term; | ||
| 15937 | |||||
| 15938 | # spent 12µs within Perl::Tidy::Formatter::BEGIN@15938 which was called:
# once (12µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 15945 | ||||
| 15939 | |||||
| 15940 | # These keywords have prototypes which allow a special leading item | ||||
| 15941 | # followed by a list | ||||
| 15942 | 1 | 2µs | @_ = | ||
| 15943 | qw(formline grep kill map printf sprintf push chmod join pack unshift); | ||||
| 15944 | 1 | 12µs | @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); | ||
| 15945 | 1 | 2.92ms | 1 | 12µs | } # spent 12µs making 1 call to Perl::Tidy::Formatter::BEGIN@15938 |
| 15946 | |||||
| 15947 | sub set_comma_breakpoints_do { | ||||
| 15948 | |||||
| 15949 | # Given a list with some commas, set breakpoints at some of the | ||||
| 15950 | # commas, if necessary, to make it easy to read. This list is | ||||
| 15951 | # an example: | ||||
| 15952 | my ( | ||||
| 15953 | $depth, $i_opening_paren, $i_closing_paren, | ||||
| 15954 | $item_count, $identifier_count, $rcomma_index, | ||||
| 15955 | $next_nonblank_type, $list_type, $interrupted, | ||||
| 15956 | $rdo_not_break_apart, $must_break_open, | ||||
| 15957 | ) = @_; | ||||
| 15958 | |||||
| 15959 | # nothing to do if no commas seen | ||||
| 15960 | return if ( $item_count < 1 ); | ||||
| 15961 | my $i_first_comma = $$rcomma_index[0]; | ||||
| 15962 | my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; | ||||
| 15963 | my $i_last_comma = $i_true_last_comma; | ||||
| 15964 | if ( $i_last_comma >= $max_index_to_go ) { | ||||
| 15965 | $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; | ||||
| 15966 | return if ( $item_count < 1 ); | ||||
| 15967 | } | ||||
| 15968 | |||||
| 15969 | #--------------------------------------------------------------- | ||||
| 15970 | # find lengths of all items in the list to calculate page layout | ||||
| 15971 | #--------------------------------------------------------------- | ||||
| 15972 | my $comma_count = $item_count; | ||||
| 15973 | my @item_lengths; | ||||
| 15974 | my @i_term_begin; | ||||
| 15975 | my @i_term_end; | ||||
| 15976 | my @i_term_comma; | ||||
| 15977 | my $i_prev_plus; | ||||
| 15978 | my @max_length = ( 0, 0 ); | ||||
| 15979 | my $first_term_length; | ||||
| 15980 | my $i = $i_opening_paren; | ||||
| 15981 | my $is_odd = 1; | ||||
| 15982 | |||||
| 15983 | for ( my $j = 0 ; $j < $comma_count ; $j++ ) { | ||||
| 15984 | $is_odd = 1 - $is_odd; | ||||
| 15985 | $i_prev_plus = $i + 1; | ||||
| 15986 | $i = $$rcomma_index[$j]; | ||||
| 15987 | |||||
| 15988 | my $i_term_end = | ||||
| 15989 | ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; | ||||
| 15990 | my $i_term_begin = | ||||
| 15991 | ( $types_to_go[$i_prev_plus] eq 'b' ) | ||||
| 15992 | ? $i_prev_plus + 1 | ||||
| 15993 | : $i_prev_plus; | ||||
| 15994 | push @i_term_begin, $i_term_begin; | ||||
| 15995 | push @i_term_end, $i_term_end; | ||||
| 15996 | push @i_term_comma, $i; | ||||
| 15997 | |||||
| 15998 | # note: currently adding 2 to all lengths (for comma and space) | ||||
| 15999 | my $length = | ||||
| 16000 | 2 + token_sequence_length( $i_term_begin, $i_term_end ); | ||||
| 16001 | push @item_lengths, $length; | ||||
| 16002 | |||||
| 16003 | if ( $j == 0 ) { | ||||
| 16004 | $first_term_length = $length; | ||||
| 16005 | } | ||||
| 16006 | else { | ||||
| 16007 | |||||
| 16008 | if ( $length > $max_length[$is_odd] ) { | ||||
| 16009 | $max_length[$is_odd] = $length; | ||||
| 16010 | } | ||||
| 16011 | } | ||||
| 16012 | } | ||||
| 16013 | |||||
| 16014 | # now we have to make a distinction between the comma count and item | ||||
| 16015 | # count, because the item count will be one greater than the comma | ||||
| 16016 | # count if the last item is not terminated with a comma | ||||
| 16017 | my $i_b = | ||||
| 16018 | ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) | ||||
| 16019 | ? $i_last_comma + 1 | ||||
| 16020 | : $i_last_comma; | ||||
| 16021 | my $i_e = | ||||
| 16022 | ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) | ||||
| 16023 | ? $i_closing_paren - 2 | ||||
| 16024 | : $i_closing_paren - 1; | ||||
| 16025 | my $i_effective_last_comma = $i_last_comma; | ||||
| 16026 | |||||
| 16027 | my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); | ||||
| 16028 | |||||
| 16029 | if ( $last_item_length > 0 ) { | ||||
| 16030 | |||||
| 16031 | # add 2 to length because other lengths include a comma and a blank | ||||
| 16032 | $last_item_length += 2; | ||||
| 16033 | push @item_lengths, $last_item_length; | ||||
| 16034 | push @i_term_begin, $i_b + 1; | ||||
| 16035 | push @i_term_end, $i_e; | ||||
| 16036 | push @i_term_comma, undef; | ||||
| 16037 | |||||
| 16038 | my $i_odd = $item_count % 2; | ||||
| 16039 | |||||
| 16040 | if ( $last_item_length > $max_length[$i_odd] ) { | ||||
| 16041 | $max_length[$i_odd] = $last_item_length; | ||||
| 16042 | } | ||||
| 16043 | |||||
| 16044 | $item_count++; | ||||
| 16045 | $i_effective_last_comma = $i_e + 1; | ||||
| 16046 | |||||
| 16047 | if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { | ||||
| 16048 | $identifier_count++; | ||||
| 16049 | } | ||||
| 16050 | } | ||||
| 16051 | |||||
| 16052 | #--------------------------------------------------------------- | ||||
| 16053 | # End of length calculations | ||||
| 16054 | #--------------------------------------------------------------- | ||||
| 16055 | |||||
| 16056 | #--------------------------------------------------------------- | ||||
| 16057 | # Compound List Rule 1: | ||||
| 16058 | # Break at (almost) every comma for a list containing a broken | ||||
| 16059 | # sublist. This has higher priority than the Interrupted List | ||||
| 16060 | # Rule. | ||||
| 16061 | #--------------------------------------------------------------- | ||||
| 16062 | if ( $has_broken_sublist[$depth] ) { | ||||
| 16063 | |||||
| 16064 | # Break at every comma except for a comma between two | ||||
| 16065 | # simple, small terms. This prevents long vertical | ||||
| 16066 | # columns of, say, just 0's. | ||||
| 16067 | my $small_length = 10; # 2 + actual maximum length wanted | ||||
| 16068 | |||||
| 16069 | # We'll insert a break in long runs of small terms to | ||||
| 16070 | # allow alignment in uniform tables. | ||||
| 16071 | my $skipped_count = 0; | ||||
| 16072 | my $columns = table_columns_available($i_first_comma); | ||||
| 16073 | my $fields = int( $columns / $small_length ); | ||||
| 16074 | if ( $rOpts_maximum_fields_per_table | ||||
| 16075 | && $fields > $rOpts_maximum_fields_per_table ) | ||||
| 16076 | { | ||||
| 16077 | $fields = $rOpts_maximum_fields_per_table; | ||||
| 16078 | } | ||||
| 16079 | my $max_skipped_count = $fields - 1; | ||||
| 16080 | |||||
| 16081 | my $is_simple_last_term = 0; | ||||
| 16082 | my $is_simple_next_term = 0; | ||||
| 16083 | foreach my $j ( 0 .. $item_count ) { | ||||
| 16084 | $is_simple_last_term = $is_simple_next_term; | ||||
| 16085 | $is_simple_next_term = 0; | ||||
| 16086 | if ( $j < $item_count | ||||
| 16087 | && $i_term_end[$j] == $i_term_begin[$j] | ||||
| 16088 | && $item_lengths[$j] <= $small_length ) | ||||
| 16089 | { | ||||
| 16090 | $is_simple_next_term = 1; | ||||
| 16091 | } | ||||
| 16092 | next if $j == 0; | ||||
| 16093 | if ( $is_simple_last_term | ||||
| 16094 | && $is_simple_next_term | ||||
| 16095 | && $skipped_count < $max_skipped_count ) | ||||
| 16096 | { | ||||
| 16097 | $skipped_count++; | ||||
| 16098 | } | ||||
| 16099 | else { | ||||
| 16100 | $skipped_count = 0; | ||||
| 16101 | my $i = $i_term_comma[ $j - 1 ]; | ||||
| 16102 | last unless defined $i; | ||||
| 16103 | set_forced_breakpoint($i); | ||||
| 16104 | } | ||||
| 16105 | } | ||||
| 16106 | |||||
| 16107 | # always break at the last comma if this list is | ||||
| 16108 | # interrupted; we wouldn't want to leave a terminal '{', for | ||||
| 16109 | # example. | ||||
| 16110 | if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } | ||||
| 16111 | return; | ||||
| 16112 | } | ||||
| 16113 | |||||
| 16114 | #my ( $a, $b, $c ) = caller(); | ||||
| 16115 | #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count | ||||
| 16116 | #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; | ||||
| 16117 | #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; | ||||
| 16118 | |||||
| 16119 | #--------------------------------------------------------------- | ||||
| 16120 | # Interrupted List Rule: | ||||
| 16121 | # A list is forced to use old breakpoints if it was interrupted | ||||
| 16122 | # by side comments or blank lines, or requested by user. | ||||
| 16123 | #--------------------------------------------------------------- | ||||
| 16124 | if ( $rOpts_break_at_old_comma_breakpoints | ||||
| 16125 | || $interrupted | ||||
| 16126 | || $i_opening_paren < 0 ) | ||||
| 16127 | { | ||||
| 16128 | copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); | ||||
| 16129 | return; | ||||
| 16130 | } | ||||
| 16131 | |||||
| 16132 | #--------------------------------------------------------------- | ||||
| 16133 | # Looks like a list of items. We have to look at it and size it up. | ||||
| 16134 | #--------------------------------------------------------------- | ||||
| 16135 | |||||
| 16136 | my $opening_token = $tokens_to_go[$i_opening_paren]; | ||||
| 16137 | my $opening_environment = | ||||
| 16138 | $container_environment_to_go[$i_opening_paren]; | ||||
| 16139 | |||||
| 16140 | #------------------------------------------------------------------- | ||||
| 16141 | # Return if this will fit on one line | ||||
| 16142 | #------------------------------------------------------------------- | ||||
| 16143 | |||||
| 16144 | my $i_opening_minus = find_token_starting_list($i_opening_paren); | ||||
| 16145 | return | ||||
| 16146 | unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; | ||||
| 16147 | |||||
| 16148 | #------------------------------------------------------------------- | ||||
| 16149 | # Now we know that this block spans multiple lines; we have to set | ||||
| 16150 | # at least one breakpoint -- real or fake -- as a signal to break | ||||
| 16151 | # open any outer containers. | ||||
| 16152 | #------------------------------------------------------------------- | ||||
| 16153 | set_fake_breakpoint(); | ||||
| 16154 | |||||
| 16155 | # be sure we do not extend beyond the current list length | ||||
| 16156 | if ( $i_effective_last_comma >= $max_index_to_go ) { | ||||
| 16157 | $i_effective_last_comma = $max_index_to_go - 1; | ||||
| 16158 | } | ||||
| 16159 | |||||
| 16160 | # Set a flag indicating if we need to break open to keep -lp | ||||
| 16161 | # items aligned. This is necessary if any of the list terms | ||||
| 16162 | # exceeds the available space after the '('. | ||||
| 16163 | my $need_lp_break_open = $must_break_open; | ||||
| 16164 | if ( $rOpts_line_up_parentheses && !$must_break_open ) { | ||||
| 16165 | my $columns_if_unbroken = | ||||
| 16166 | maximum_line_length($i_opening_minus) - | ||||
| 16167 | total_line_length( $i_opening_minus, $i_opening_paren ); | ||||
| 16168 | $need_lp_break_open = | ||||
| 16169 | ( $max_length[0] > $columns_if_unbroken ) | ||||
| 16170 | || ( $max_length[1] > $columns_if_unbroken ) | ||||
| 16171 | || ( $first_term_length > $columns_if_unbroken ); | ||||
| 16172 | } | ||||
| 16173 | |||||
| 16174 | # Specify if the list must have an even number of fields or not. | ||||
| 16175 | # It is generally safest to assume an even number, because the | ||||
| 16176 | # list items might be a hash list. But if we can be sure that | ||||
| 16177 | # it is not a hash, then we can allow an odd number for more | ||||
| 16178 | # flexibility. | ||||
| 16179 | my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count | ||||
| 16180 | |||||
| 16181 | if ( $identifier_count >= $item_count - 1 | ||||
| 16182 | || $is_assignment{$next_nonblank_type} | ||||
| 16183 | || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) | ||||
| 16184 | ) | ||||
| 16185 | { | ||||
| 16186 | $odd_or_even = 1; | ||||
| 16187 | } | ||||
| 16188 | |||||
| 16189 | # do we have a long first term which should be | ||||
| 16190 | # left on a line by itself? | ||||
| 16191 | my $use_separate_first_term = ( | ||||
| 16192 | $odd_or_even == 1 # only if we can use 1 field/line | ||||
| 16193 | && $item_count > 3 # need several items | ||||
| 16194 | && $first_term_length > | ||||
| 16195 | 2 * $max_length[0] - 2 # need long first term | ||||
| 16196 | && $first_term_length > | ||||
| 16197 | 2 * $max_length[1] - 2 # need long first term | ||||
| 16198 | ); | ||||
| 16199 | |||||
| 16200 | # or do we know from the type of list that the first term should | ||||
| 16201 | # be placed alone? | ||||
| 16202 | if ( !$use_separate_first_term ) { | ||||
| 16203 | if ( $is_keyword_with_special_leading_term{$list_type} ) { | ||||
| 16204 | $use_separate_first_term = 1; | ||||
| 16205 | |||||
| 16206 | # should the container be broken open? | ||||
| 16207 | if ( $item_count < 3 ) { | ||||
| 16208 | if ( $i_first_comma - $i_opening_paren < 4 ) { | ||||
| 16209 | $$rdo_not_break_apart = 1; | ||||
| 16210 | } | ||||
| 16211 | } | ||||
| 16212 | elsif ($first_term_length < 20 | ||||
| 16213 | && $i_first_comma - $i_opening_paren < 4 ) | ||||
| 16214 | { | ||||
| 16215 | my $columns = table_columns_available($i_first_comma); | ||||
| 16216 | if ( $first_term_length < $columns ) { | ||||
| 16217 | $$rdo_not_break_apart = 1; | ||||
| 16218 | } | ||||
| 16219 | } | ||||
| 16220 | } | ||||
| 16221 | } | ||||
| 16222 | |||||
| 16223 | # if so, | ||||
| 16224 | if ($use_separate_first_term) { | ||||
| 16225 | |||||
| 16226 | # ..set a break and update starting values | ||||
| 16227 | $use_separate_first_term = 1; | ||||
| 16228 | set_forced_breakpoint($i_first_comma); | ||||
| 16229 | $i_opening_paren = $i_first_comma; | ||||
| 16230 | $i_first_comma = $$rcomma_index[1]; | ||||
| 16231 | $item_count--; | ||||
| 16232 | return if $comma_count == 1; | ||||
| 16233 | shift @item_lengths; | ||||
| 16234 | shift @i_term_begin; | ||||
| 16235 | shift @i_term_end; | ||||
| 16236 | shift @i_term_comma; | ||||
| 16237 | } | ||||
| 16238 | |||||
| 16239 | # if not, update the metrics to include the first term | ||||
| 16240 | else { | ||||
| 16241 | if ( $first_term_length > $max_length[0] ) { | ||||
| 16242 | $max_length[0] = $first_term_length; | ||||
| 16243 | } | ||||
| 16244 | } | ||||
| 16245 | |||||
| 16246 | # Field width parameters | ||||
| 16247 | my $pair_width = ( $max_length[0] + $max_length[1] ); | ||||
| 16248 | my $max_width = | ||||
| 16249 | ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; | ||||
| 16250 | |||||
| 16251 | # Number of free columns across the page width for laying out tables | ||||
| 16252 | my $columns = table_columns_available($i_first_comma); | ||||
| 16253 | |||||
| 16254 | # Estimated maximum number of fields which fit this space | ||||
| 16255 | # This will be our first guess | ||||
| 16256 | my $number_of_fields_max = | ||||
| 16257 | maximum_number_of_fields( $columns, $odd_or_even, $max_width, | ||||
| 16258 | $pair_width ); | ||||
| 16259 | my $number_of_fields = $number_of_fields_max; | ||||
| 16260 | |||||
| 16261 | # Find the best-looking number of fields | ||||
| 16262 | # and make this our second guess if possible | ||||
| 16263 | my ( $number_of_fields_best, $ri_ragged_break_list, | ||||
| 16264 | $new_identifier_count ) | ||||
| 16265 | = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, | ||||
| 16266 | $max_width ); | ||||
| 16267 | |||||
| 16268 | if ( $number_of_fields_best != 0 | ||||
| 16269 | && $number_of_fields_best < $number_of_fields_max ) | ||||
| 16270 | { | ||||
| 16271 | $number_of_fields = $number_of_fields_best; | ||||
| 16272 | } | ||||
| 16273 | |||||
| 16274 | # ---------------------------------------------------------------------- | ||||
| 16275 | # If we are crowded and the -lp option is being used, try to | ||||
| 16276 | # undo some indentation | ||||
| 16277 | # ---------------------------------------------------------------------- | ||||
| 16278 | if ( | ||||
| 16279 | $rOpts_line_up_parentheses | ||||
| 16280 | && ( | ||||
| 16281 | $number_of_fields == 0 | ||||
| 16282 | || ( $number_of_fields == 1 | ||||
| 16283 | && $number_of_fields != $number_of_fields_best ) | ||||
| 16284 | ) | ||||
| 16285 | ) | ||||
| 16286 | { | ||||
| 16287 | my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); | ||||
| 16288 | if ( $available_spaces > 0 ) { | ||||
| 16289 | |||||
| 16290 | my $spaces_wanted = $max_width - $columns; # for 1 field | ||||
| 16291 | |||||
| 16292 | if ( $number_of_fields_best == 0 ) { | ||||
| 16293 | $number_of_fields_best = | ||||
| 16294 | get_maximum_fields_wanted( \@item_lengths ); | ||||
| 16295 | } | ||||
| 16296 | |||||
| 16297 | if ( $number_of_fields_best != 1 ) { | ||||
| 16298 | my $spaces_wanted_2 = | ||||
| 16299 | 1 + $pair_width - $columns; # for 2 fields | ||||
| 16300 | if ( $available_spaces > $spaces_wanted_2 ) { | ||||
| 16301 | $spaces_wanted = $spaces_wanted_2; | ||||
| 16302 | } | ||||
| 16303 | } | ||||
| 16304 | |||||
| 16305 | if ( $spaces_wanted > 0 ) { | ||||
| 16306 | my $deleted_spaces = | ||||
| 16307 | reduce_lp_indentation( $i_first_comma, $spaces_wanted ); | ||||
| 16308 | |||||
| 16309 | # redo the math | ||||
| 16310 | if ( $deleted_spaces > 0 ) { | ||||
| 16311 | $columns = table_columns_available($i_first_comma); | ||||
| 16312 | $number_of_fields_max = | ||||
| 16313 | maximum_number_of_fields( $columns, $odd_or_even, | ||||
| 16314 | $max_width, $pair_width ); | ||||
| 16315 | $number_of_fields = $number_of_fields_max; | ||||
| 16316 | |||||
| 16317 | if ( $number_of_fields_best == 1 | ||||
| 16318 | && $number_of_fields >= 1 ) | ||||
| 16319 | { | ||||
| 16320 | $number_of_fields = $number_of_fields_best; | ||||
| 16321 | } | ||||
| 16322 | } | ||||
| 16323 | } | ||||
| 16324 | } | ||||
| 16325 | } | ||||
| 16326 | |||||
| 16327 | # try for one column if two won't work | ||||
| 16328 | if ( $number_of_fields <= 0 ) { | ||||
| 16329 | $number_of_fields = int( $columns / $max_width ); | ||||
| 16330 | } | ||||
| 16331 | |||||
| 16332 | # The user can place an upper bound on the number of fields, | ||||
| 16333 | # which can be useful for doing maintenance on tables | ||||
| 16334 | if ( $rOpts_maximum_fields_per_table | ||||
| 16335 | && $number_of_fields > $rOpts_maximum_fields_per_table ) | ||||
| 16336 | { | ||||
| 16337 | $number_of_fields = $rOpts_maximum_fields_per_table; | ||||
| 16338 | } | ||||
| 16339 | |||||
| 16340 | # How many columns (characters) and lines would this container take | ||||
| 16341 | # if no additional whitespace were added? | ||||
| 16342 | my $packed_columns = token_sequence_length( $i_opening_paren + 1, | ||||
| 16343 | $i_effective_last_comma + 1 ); | ||||
| 16344 | if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero | ||||
| 16345 | my $packed_lines = 1 + int( $packed_columns / $columns ); | ||||
| 16346 | |||||
| 16347 | # are we an item contained in an outer list? | ||||
| 16348 | my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; | ||||
| 16349 | |||||
| 16350 | if ( $number_of_fields <= 0 ) { | ||||
| 16351 | |||||
| 16352 | # #--------------------------------------------------------------- | ||||
| 16353 | # # We're in trouble. We can't find a single field width that works. | ||||
| 16354 | # # There is no simple answer here; we may have a single long list | ||||
| 16355 | # # item, or many. | ||||
| 16356 | # #--------------------------------------------------------------- | ||||
| 16357 | # | ||||
| 16358 | # In many cases, it may be best to not force a break if there is just one | ||||
| 16359 | # comma, because the standard continuation break logic will do a better | ||||
| 16360 | # job without it. | ||||
| 16361 | # | ||||
| 16362 | # In the common case that all but one of the terms can fit | ||||
| 16363 | # on a single line, it may look better not to break open the | ||||
| 16364 | # containing parens. Consider, for example | ||||
| 16365 | # | ||||
| 16366 | # $color = | ||||
| 16367 | # join ( '/', | ||||
| 16368 | # sort { $color_value{$::a} <=> $color_value{$::b}; } | ||||
| 16369 | # keys %colors ); | ||||
| 16370 | # | ||||
| 16371 | # which will look like this with the container broken: | ||||
| 16372 | # | ||||
| 16373 | # $color = join ( | ||||
| 16374 | # '/', | ||||
| 16375 | # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors | ||||
| 16376 | # ); | ||||
| 16377 | # | ||||
| 16378 | # Here is an example of this rule for a long last term: | ||||
| 16379 | # | ||||
| 16380 | # log_message( 0, 256, 128, | ||||
| 16381 | # "Number of routes in adj-RIB-in to be considered: $peercount" ); | ||||
| 16382 | # | ||||
| 16383 | # And here is an example with a long first term: | ||||
| 16384 | # | ||||
| 16385 | # $s = sprintf( | ||||
| 16386 | # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", | ||||
| 16387 | # $r, $pu, $ps, $cu, $cs, $tt | ||||
| 16388 | # ) | ||||
| 16389 | # if $style eq 'all'; | ||||
| 16390 | |||||
| 16391 | my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; | ||||
| 16392 | my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; | ||||
| 16393 | my $long_first_term = | ||||
| 16394 | excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; | ||||
| 16395 | |||||
| 16396 | # break at every comma ... | ||||
| 16397 | if ( | ||||
| 16398 | |||||
| 16399 | # if requested by user or is best looking | ||||
| 16400 | $number_of_fields_best == 1 | ||||
| 16401 | |||||
| 16402 | # or if this is a sublist of a larger list | ||||
| 16403 | || $in_hierarchical_list | ||||
| 16404 | |||||
| 16405 | # or if multiple commas and we don't have a long first or last | ||||
| 16406 | # term | ||||
| 16407 | || ( $comma_count > 1 | ||||
| 16408 | && !( $long_last_term || $long_first_term ) ) | ||||
| 16409 | ) | ||||
| 16410 | { | ||||
| 16411 | foreach ( 0 .. $comma_count - 1 ) { | ||||
| 16412 | set_forced_breakpoint( $$rcomma_index[$_] ); | ||||
| 16413 | } | ||||
| 16414 | } | ||||
| 16415 | elsif ($long_last_term) { | ||||
| 16416 | |||||
| 16417 | set_forced_breakpoint($i_last_comma); | ||||
| 16418 | $$rdo_not_break_apart = 1 unless $must_break_open; | ||||
| 16419 | } | ||||
| 16420 | elsif ($long_first_term) { | ||||
| 16421 | |||||
| 16422 | set_forced_breakpoint($i_first_comma); | ||||
| 16423 | } | ||||
| 16424 | else { | ||||
| 16425 | |||||
| 16426 | # let breaks be defined by default bond strength logic | ||||
| 16427 | } | ||||
| 16428 | return; | ||||
| 16429 | } | ||||
| 16430 | |||||
| 16431 | # -------------------------------------------------------- | ||||
| 16432 | # We have a tentative field count that seems to work. | ||||
| 16433 | # How many lines will this require? | ||||
| 16434 | # -------------------------------------------------------- | ||||
| 16435 | my $formatted_lines = $item_count / ($number_of_fields); | ||||
| 16436 | if ( $formatted_lines != int $formatted_lines ) { | ||||
| 16437 | $formatted_lines = 1 + int $formatted_lines; | ||||
| 16438 | } | ||||
| 16439 | |||||
| 16440 | # So far we've been trying to fill out to the right margin. But | ||||
| 16441 | # compact tables are easier to read, so let's see if we can use fewer | ||||
| 16442 | # fields without increasing the number of lines. | ||||
| 16443 | $number_of_fields = | ||||
| 16444 | compactify_table( $item_count, $number_of_fields, $formatted_lines, | ||||
| 16445 | $odd_or_even ); | ||||
| 16446 | |||||
| 16447 | # How many spaces across the page will we fill? | ||||
| 16448 | my $columns_per_line = | ||||
| 16449 | ( int $number_of_fields / 2 ) * $pair_width + | ||||
| 16450 | ( $number_of_fields % 2 ) * $max_width; | ||||
| 16451 | |||||
| 16452 | my $formatted_columns; | ||||
| 16453 | |||||
| 16454 | if ( $number_of_fields > 1 ) { | ||||
| 16455 | $formatted_columns = | ||||
| 16456 | ( $pair_width * ( int( $item_count / 2 ) ) + | ||||
| 16457 | ( $item_count % 2 ) * $max_width ); | ||||
| 16458 | } | ||||
| 16459 | else { | ||||
| 16460 | $formatted_columns = $max_width * $item_count; | ||||
| 16461 | } | ||||
| 16462 | if ( $formatted_columns < $packed_columns ) { | ||||
| 16463 | $formatted_columns = $packed_columns; | ||||
| 16464 | } | ||||
| 16465 | |||||
| 16466 | my $unused_columns = $formatted_columns - $packed_columns; | ||||
| 16467 | |||||
| 16468 | # set some empirical parameters to help decide if we should try to | ||||
| 16469 | # align; high sparsity does not look good, especially with few lines | ||||
| 16470 | my $sparsity = ($unused_columns) / ($formatted_columns); | ||||
| 16471 | my $max_allowed_sparsity = | ||||
| 16472 | ( $item_count < 3 ) ? 0.1 | ||||
| 16473 | : ( $packed_lines == 1 ) ? 0.15 | ||||
| 16474 | : ( $packed_lines == 2 ) ? 0.4 | ||||
| 16475 | : 0.7; | ||||
| 16476 | |||||
| 16477 | # Begin check for shortcut methods, which avoid treating a list | ||||
| 16478 | # as a table for relatively small parenthesized lists. These | ||||
| 16479 | # are usually easier to read if not formatted as tables. | ||||
| 16480 | if ( | ||||
| 16481 | $packed_lines <= 2 # probably can fit in 2 lines | ||||
| 16482 | && $item_count < 9 # doesn't have too many items | ||||
| 16483 | && $opening_environment eq 'BLOCK' # not a sub-container | ||||
| 16484 | && $opening_token eq '(' # is paren list | ||||
| 16485 | ) | ||||
| 16486 | { | ||||
| 16487 | |||||
| 16488 | # Shortcut method 1: for -lp and just one comma: | ||||
| 16489 | # This is a no-brainer, just break at the comma. | ||||
| 16490 | if ( | ||||
| 16491 | $rOpts_line_up_parentheses # -lp | ||||
| 16492 | && $item_count == 2 # two items, one comma | ||||
| 16493 | && !$must_break_open | ||||
| 16494 | ) | ||||
| 16495 | { | ||||
| 16496 | my $i_break = $$rcomma_index[0]; | ||||
| 16497 | set_forced_breakpoint($i_break); | ||||
| 16498 | $$rdo_not_break_apart = 1; | ||||
| 16499 | set_non_alignment_flags( $comma_count, $rcomma_index ); | ||||
| 16500 | return; | ||||
| 16501 | |||||
| 16502 | } | ||||
| 16503 | |||||
| 16504 | # method 2 is for most small ragged lists which might look | ||||
| 16505 | # best if not displayed as a table. | ||||
| 16506 | if ( | ||||
| 16507 | ( $number_of_fields == 2 && $item_count == 3 ) | ||||
| 16508 | || ( | ||||
| 16509 | $new_identifier_count > 0 # isn't all quotes | ||||
| 16510 | && $sparsity > 0.15 | ||||
| 16511 | ) # would be fairly spaced gaps if aligned | ||||
| 16512 | ) | ||||
| 16513 | { | ||||
| 16514 | |||||
| 16515 | my $break_count = set_ragged_breakpoints( \@i_term_comma, | ||||
| 16516 | $ri_ragged_break_list ); | ||||
| 16517 | ++$break_count if ($use_separate_first_term); | ||||
| 16518 | |||||
| 16519 | # NOTE: we should really use the true break count here, | ||||
| 16520 | # which can be greater if there are large terms and | ||||
| 16521 | # little space, but usually this will work well enough. | ||||
| 16522 | unless ($must_break_open) { | ||||
| 16523 | |||||
| 16524 | if ( $break_count <= 1 ) { | ||||
| 16525 | $$rdo_not_break_apart = 1; | ||||
| 16526 | } | ||||
| 16527 | elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) | ||||
| 16528 | { | ||||
| 16529 | $$rdo_not_break_apart = 1; | ||||
| 16530 | } | ||||
| 16531 | } | ||||
| 16532 | set_non_alignment_flags( $comma_count, $rcomma_index ); | ||||
| 16533 | return; | ||||
| 16534 | } | ||||
| 16535 | |||||
| 16536 | } # end shortcut methods | ||||
| 16537 | |||||
| 16538 | # debug stuff | ||||
| 16539 | |||||
| 16540 | FORMATTER_DEBUG_FLAG_SPARSE && do { | ||||
| 16541 | print STDOUT | ||||
| 16542 | "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; | ||||
| 16543 | |||||
| 16544 | }; | ||||
| 16545 | |||||
| 16546 | #--------------------------------------------------------------- | ||||
| 16547 | # Compound List Rule 2: | ||||
| 16548 | # If this list is too long for one line, and it is an item of a | ||||
| 16549 | # larger list, then we must format it, regardless of sparsity | ||||
| 16550 | # (ian.t). One reason that we have to do this is to trigger | ||||
| 16551 | # Compound List Rule 1, above, which causes breaks at all commas of | ||||
| 16552 | # all outer lists. In this way, the structure will be properly | ||||
| 16553 | # displayed. | ||||
| 16554 | #--------------------------------------------------------------- | ||||
| 16555 | |||||
| 16556 | # Decide if this list is too long for one line unless broken | ||||
| 16557 | my $total_columns = table_columns_available($i_opening_paren); | ||||
| 16558 | my $too_long = $packed_columns > $total_columns; | ||||
| 16559 | |||||
| 16560 | # For a paren list, include the length of the token just before the | ||||
| 16561 | # '(' because this is likely a sub call, and we would have to | ||||
| 16562 | # include the sub name on the same line as the list. This is still | ||||
| 16563 | # imprecise, but not too bad. (steve.t) | ||||
| 16564 | if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { | ||||
| 16565 | |||||
| 16566 | $too_long = excess_line_length( $i_opening_minus, | ||||
| 16567 | $i_effective_last_comma + 1 ) > 0; | ||||
| 16568 | } | ||||
| 16569 | |||||
| 16570 | # FIXME: For an item after a '=>', try to include the length of the | ||||
| 16571 | # thing before the '=>'. This is crude and should be improved by | ||||
| 16572 | # actually looking back token by token. | ||||
| 16573 | if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { | ||||
| 16574 | my $i_opening_minus = $i_opening_paren - 4; | ||||
| 16575 | if ( $i_opening_minus >= 0 ) { | ||||
| 16576 | $too_long = excess_line_length( $i_opening_minus, | ||||
| 16577 | $i_effective_last_comma + 1 ) > 0; | ||||
| 16578 | } | ||||
| 16579 | } | ||||
| 16580 | |||||
| 16581 | # Always break lists contained in '[' and '{' if too long for 1 line, | ||||
| 16582 | # and always break lists which are too long and part of a more complex | ||||
| 16583 | # structure. | ||||
| 16584 | my $must_break_open_container = $must_break_open | ||||
| 16585 | || ( $too_long | ||||
| 16586 | && ( $in_hierarchical_list || $opening_token ne '(' ) ); | ||||
| 16587 | |||||
| 16588 | #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; | ||||
| 16589 | |||||
| 16590 | #--------------------------------------------------------------- | ||||
| 16591 | # The main decision: | ||||
| 16592 | # Now decide if we will align the data into aligned columns. Do not | ||||
| 16593 | # attempt to align columns if this is a tiny table or it would be | ||||
| 16594 | # too spaced. It seems that the more packed lines we have, the | ||||
| 16595 | # sparser the list that can be allowed and still look ok. | ||||
| 16596 | #--------------------------------------------------------------- | ||||
| 16597 | |||||
| 16598 | if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) | ||||
| 16599 | || ( $formatted_lines < 2 ) | ||||
| 16600 | || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) | ||||
| 16601 | ) | ||||
| 16602 | { | ||||
| 16603 | |||||
| 16604 | #--------------------------------------------------------------- | ||||
| 16605 | # too sparse: would look ugly if aligned in a table; | ||||
| 16606 | #--------------------------------------------------------------- | ||||
| 16607 | |||||
| 16608 | # use old breakpoints if this is a 'big' list | ||||
| 16609 | # FIXME: goal is to improve set_ragged_breakpoints so that | ||||
| 16610 | # this is not necessary. | ||||
| 16611 | if ( $packed_lines > 2 && $item_count > 10 ) { | ||||
| 16612 | write_logfile_entry("List sparse: using old breakpoints\n"); | ||||
| 16613 | copy_old_breakpoints( $i_first_comma, $i_last_comma ); | ||||
| 16614 | } | ||||
| 16615 | |||||
| 16616 | # let the continuation logic handle it if 2 lines | ||||
| 16617 | else { | ||||
| 16618 | |||||
| 16619 | my $break_count = set_ragged_breakpoints( \@i_term_comma, | ||||
| 16620 | $ri_ragged_break_list ); | ||||
| 16621 | ++$break_count if ($use_separate_first_term); | ||||
| 16622 | |||||
| 16623 | unless ($must_break_open_container) { | ||||
| 16624 | if ( $break_count <= 1 ) { | ||||
| 16625 | $$rdo_not_break_apart = 1; | ||||
| 16626 | } | ||||
| 16627 | elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) | ||||
| 16628 | { | ||||
| 16629 | $$rdo_not_break_apart = 1; | ||||
| 16630 | } | ||||
| 16631 | } | ||||
| 16632 | set_non_alignment_flags( $comma_count, $rcomma_index ); | ||||
| 16633 | } | ||||
| 16634 | return; | ||||
| 16635 | } | ||||
| 16636 | |||||
| 16637 | #--------------------------------------------------------------- | ||||
| 16638 | # go ahead and format as a table | ||||
| 16639 | #--------------------------------------------------------------- | ||||
| 16640 | write_logfile_entry( | ||||
| 16641 | "List: auto formatting with $number_of_fields fields/row\n"); | ||||
| 16642 | |||||
| 16643 | my $j_first_break = | ||||
| 16644 | $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; | ||||
| 16645 | |||||
| 16646 | for ( | ||||
| 16647 | my $j = $j_first_break ; | ||||
| 16648 | $j < $comma_count ; | ||||
| 16649 | $j += $number_of_fields | ||||
| 16650 | ) | ||||
| 16651 | { | ||||
| 16652 | my $i = $$rcomma_index[$j]; | ||||
| 16653 | set_forced_breakpoint($i); | ||||
| 16654 | } | ||||
| 16655 | return; | ||||
| 16656 | } | ||||
| 16657 | } | ||||
| 16658 | |||||
| 16659 | sub set_non_alignment_flags { | ||||
| 16660 | |||||
| 16661 | # set flag which indicates that these commas should not be | ||||
| 16662 | # aligned | ||||
| 16663 | my ( $comma_count, $rcomma_index ) = @_; | ||||
| 16664 | foreach ( 0 .. $comma_count - 1 ) { | ||||
| 16665 | $matching_token_to_go[ $$rcomma_index[$_] ] = 1; | ||||
| 16666 | } | ||||
| 16667 | } | ||||
| 16668 | |||||
| 16669 | sub study_list_complexity { | ||||
| 16670 | |||||
| 16671 | # Look for complex tables which should be formatted with one term per line. | ||||
| 16672 | # Returns the following: | ||||
| 16673 | # | ||||
| 16674 | # \@i_ragged_break_list = list of good breakpoints to avoid lines | ||||
| 16675 | # which are hard to read | ||||
| 16676 | # $number_of_fields_best = suggested number of fields based on | ||||
| 16677 | # complexity; = 0 if any number may be used. | ||||
| 16678 | # | ||||
| 16679 | my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; | ||||
| 16680 | my $item_count = @{$ri_term_begin}; | ||||
| 16681 | my $complex_item_count = 0; | ||||
| 16682 | my $number_of_fields_best = $rOpts_maximum_fields_per_table; | ||||
| 16683 | my $i_max = @{$ritem_lengths} - 1; | ||||
| 16684 | ##my @item_complexity; | ||||
| 16685 | |||||
| 16686 | my $i_last_last_break = -3; | ||||
| 16687 | my $i_last_break = -2; | ||||
| 16688 | my @i_ragged_break_list; | ||||
| 16689 | |||||
| 16690 | my $definitely_complex = 30; | ||||
| 16691 | my $definitely_simple = 12; | ||||
| 16692 | my $quote_count = 0; | ||||
| 16693 | |||||
| 16694 | for my $i ( 0 .. $i_max ) { | ||||
| 16695 | my $ib = $ri_term_begin->[$i]; | ||||
| 16696 | my $ie = $ri_term_end->[$i]; | ||||
| 16697 | |||||
| 16698 | # define complexity: start with the actual term length | ||||
| 16699 | my $weighted_length = ( $ritem_lengths->[$i] - 2 ); | ||||
| 16700 | |||||
| 16701 | ##TBD: join types here and check for variations | ||||
| 16702 | ##my $str=join "", @tokens_to_go[$ib..$ie]; | ||||
| 16703 | |||||
| 16704 | my $is_quote = 0; | ||||
| 16705 | if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { | ||||
| 16706 | $is_quote = 1; | ||||
| 16707 | $quote_count++; | ||||
| 16708 | } | ||||
| 16709 | elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { | ||||
| 16710 | $quote_count++; | ||||
| 16711 | } | ||||
| 16712 | |||||
| 16713 | if ( $ib eq $ie ) { | ||||
| 16714 | if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { | ||||
| 16715 | $complex_item_count++; | ||||
| 16716 | $weighted_length *= 2; | ||||
| 16717 | } | ||||
| 16718 | else { | ||||
| 16719 | } | ||||
| 16720 | } | ||||
| 16721 | else { | ||||
| 16722 | if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { | ||||
| 16723 | $complex_item_count++; | ||||
| 16724 | $weighted_length *= 2; | ||||
| 16725 | } | ||||
| 16726 | if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { | ||||
| 16727 | $weighted_length += 4; | ||||
| 16728 | } | ||||
| 16729 | } | ||||
| 16730 | |||||
| 16731 | # add weight for extra tokens. | ||||
| 16732 | $weighted_length += 2 * ( $ie - $ib ); | ||||
| 16733 | |||||
| 16734 | ## my $BUB = join '', @tokens_to_go[$ib..$ie]; | ||||
| 16735 | ## print "# COMPLEXITY:$weighted_length $BUB\n"; | ||||
| 16736 | |||||
| 16737 | ##push @item_complexity, $weighted_length; | ||||
| 16738 | |||||
| 16739 | # now mark a ragged break after this item it if it is 'long and | ||||
| 16740 | # complex': | ||||
| 16741 | if ( $weighted_length >= $definitely_complex ) { | ||||
| 16742 | |||||
| 16743 | # if we broke after the previous term | ||||
| 16744 | # then break before it too | ||||
| 16745 | if ( $i_last_break == $i - 1 | ||||
| 16746 | && $i > 1 | ||||
| 16747 | && $i_last_last_break != $i - 2 ) | ||||
| 16748 | { | ||||
| 16749 | |||||
| 16750 | ## FIXME: don't strand a small term | ||||
| 16751 | pop @i_ragged_break_list; | ||||
| 16752 | push @i_ragged_break_list, $i - 2; | ||||
| 16753 | push @i_ragged_break_list, $i - 1; | ||||
| 16754 | } | ||||
| 16755 | |||||
| 16756 | push @i_ragged_break_list, $i; | ||||
| 16757 | $i_last_last_break = $i_last_break; | ||||
| 16758 | $i_last_break = $i; | ||||
| 16759 | } | ||||
| 16760 | |||||
| 16761 | # don't break before a small last term -- it will | ||||
| 16762 | # not look good on a line by itself. | ||||
| 16763 | elsif ($i == $i_max | ||||
| 16764 | && $i_last_break == $i - 1 | ||||
| 16765 | && $weighted_length <= $definitely_simple ) | ||||
| 16766 | { | ||||
| 16767 | pop @i_ragged_break_list; | ||||
| 16768 | } | ||||
| 16769 | } | ||||
| 16770 | |||||
| 16771 | my $identifier_count = $i_max + 1 - $quote_count; | ||||
| 16772 | |||||
| 16773 | # Need more tuning here.. | ||||
| 16774 | if ( $max_width > 12 | ||||
| 16775 | && $complex_item_count > $item_count / 2 | ||||
| 16776 | && $number_of_fields_best != 2 ) | ||||
| 16777 | { | ||||
| 16778 | $number_of_fields_best = 1; | ||||
| 16779 | } | ||||
| 16780 | |||||
| 16781 | return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); | ||||
| 16782 | } | ||||
| 16783 | |||||
| 16784 | sub get_maximum_fields_wanted { | ||||
| 16785 | |||||
| 16786 | # Not all tables look good with more than one field of items. | ||||
| 16787 | # This routine looks at a table and decides if it should be | ||||
| 16788 | # formatted with just one field or not. | ||||
| 16789 | # This coding is still under development. | ||||
| 16790 | my ($ritem_lengths) = @_; | ||||
| 16791 | |||||
| 16792 | my $number_of_fields_best = 0; | ||||
| 16793 | |||||
| 16794 | # For just a few items, we tentatively assume just 1 field. | ||||
| 16795 | my $item_count = @{$ritem_lengths}; | ||||
| 16796 | if ( $item_count <= 5 ) { | ||||
| 16797 | $number_of_fields_best = 1; | ||||
| 16798 | } | ||||
| 16799 | |||||
| 16800 | # For larger tables, look at it both ways and see what looks best | ||||
| 16801 | else { | ||||
| 16802 | |||||
| 16803 | my $is_odd = 1; | ||||
| 16804 | my @max_length = ( 0, 0 ); | ||||
| 16805 | my @last_length_2 = ( undef, undef ); | ||||
| 16806 | my @first_length_2 = ( undef, undef ); | ||||
| 16807 | my $last_length = undef; | ||||
| 16808 | my $total_variation_1 = 0; | ||||
| 16809 | my $total_variation_2 = 0; | ||||
| 16810 | my @total_variation_2 = ( 0, 0 ); | ||||
| 16811 | for ( my $j = 0 ; $j < $item_count ; $j++ ) { | ||||
| 16812 | |||||
| 16813 | $is_odd = 1 - $is_odd; | ||||
| 16814 | my $length = $ritem_lengths->[$j]; | ||||
| 16815 | if ( $length > $max_length[$is_odd] ) { | ||||
| 16816 | $max_length[$is_odd] = $length; | ||||
| 16817 | } | ||||
| 16818 | |||||
| 16819 | if ( defined($last_length) ) { | ||||
| 16820 | my $dl = abs( $length - $last_length ); | ||||
| 16821 | $total_variation_1 += $dl; | ||||
| 16822 | } | ||||
| 16823 | $last_length = $length; | ||||
| 16824 | |||||
| 16825 | my $ll = $last_length_2[$is_odd]; | ||||
| 16826 | if ( defined($ll) ) { | ||||
| 16827 | my $dl = abs( $length - $ll ); | ||||
| 16828 | $total_variation_2[$is_odd] += $dl; | ||||
| 16829 | } | ||||
| 16830 | else { | ||||
| 16831 | $first_length_2[$is_odd] = $length; | ||||
| 16832 | } | ||||
| 16833 | $last_length_2[$is_odd] = $length; | ||||
| 16834 | } | ||||
| 16835 | $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; | ||||
| 16836 | |||||
| 16837 | my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; | ||||
| 16838 | unless ( $total_variation_2 < $factor * $total_variation_1 ) { | ||||
| 16839 | $number_of_fields_best = 1; | ||||
| 16840 | } | ||||
| 16841 | } | ||||
| 16842 | return ($number_of_fields_best); | ||||
| 16843 | } | ||||
| 16844 | |||||
| 16845 | sub table_columns_available { | ||||
| 16846 | my $i_first_comma = shift; | ||||
| 16847 | my $columns = | ||||
| 16848 | maximum_line_length($i_first_comma) - | ||||
| 16849 | leading_spaces_to_go($i_first_comma); | ||||
| 16850 | |||||
| 16851 | # Patch: the vertical formatter does not line up lines whose lengths | ||||
| 16852 | # exactly equal the available line length because of allowances | ||||
| 16853 | # that must be made for side comments. Therefore, the number of | ||||
| 16854 | # available columns is reduced by 1 character. | ||||
| 16855 | $columns -= 1; | ||||
| 16856 | return $columns; | ||||
| 16857 | } | ||||
| 16858 | |||||
| 16859 | sub maximum_number_of_fields { | ||||
| 16860 | |||||
| 16861 | # how many fields will fit in the available space? | ||||
| 16862 | my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; | ||||
| 16863 | my $max_pairs = int( $columns / $pair_width ); | ||||
| 16864 | my $number_of_fields = $max_pairs * 2; | ||||
| 16865 | if ( $odd_or_even == 1 | ||||
| 16866 | && $max_pairs * $pair_width + $max_width <= $columns ) | ||||
| 16867 | { | ||||
| 16868 | $number_of_fields++; | ||||
| 16869 | } | ||||
| 16870 | return $number_of_fields; | ||||
| 16871 | } | ||||
| 16872 | |||||
| 16873 | sub compactify_table { | ||||
| 16874 | |||||
| 16875 | # given a table with a certain number of fields and a certain number | ||||
| 16876 | # of lines, see if reducing the number of fields will make it look | ||||
| 16877 | # better. | ||||
| 16878 | my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; | ||||
| 16879 | if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { | ||||
| 16880 | my $min_fields; | ||||
| 16881 | |||||
| 16882 | for ( | ||||
| 16883 | $min_fields = $number_of_fields ; | ||||
| 16884 | $min_fields >= $odd_or_even | ||||
| 16885 | && $min_fields * $formatted_lines >= $item_count ; | ||||
| 16886 | $min_fields -= $odd_or_even | ||||
| 16887 | ) | ||||
| 16888 | { | ||||
| 16889 | $number_of_fields = $min_fields; | ||||
| 16890 | } | ||||
| 16891 | } | ||||
| 16892 | return $number_of_fields; | ||||
| 16893 | } | ||||
| 16894 | |||||
| 16895 | sub set_ragged_breakpoints { | ||||
| 16896 | |||||
| 16897 | # Set breakpoints in a list that cannot be formatted nicely as a | ||||
| 16898 | # table. | ||||
| 16899 | my ( $ri_term_comma, $ri_ragged_break_list ) = @_; | ||||
| 16900 | |||||
| 16901 | my $break_count = 0; | ||||
| 16902 | foreach (@$ri_ragged_break_list) { | ||||
| 16903 | my $j = $ri_term_comma->[$_]; | ||||
| 16904 | if ($j) { | ||||
| 16905 | set_forced_breakpoint($j); | ||||
| 16906 | $break_count++; | ||||
| 16907 | } | ||||
| 16908 | } | ||||
| 16909 | return $break_count; | ||||
| 16910 | } | ||||
| 16911 | |||||
| 16912 | sub copy_old_breakpoints { | ||||
| 16913 | my ( $i_first_comma, $i_last_comma ) = @_; | ||||
| 16914 | for my $i ( $i_first_comma .. $i_last_comma ) { | ||||
| 16915 | if ( $old_breakpoint_to_go[$i] ) { | ||||
| 16916 | set_forced_breakpoint($i); | ||||
| 16917 | } | ||||
| 16918 | } | ||||
| 16919 | } | ||||
| 16920 | |||||
| 16921 | sub set_nobreaks { | ||||
| 16922 | my ( $i, $j ) = @_; | ||||
| 16923 | if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { | ||||
| 16924 | |||||
| 16925 | FORMATTER_DEBUG_FLAG_NOBREAK && do { | ||||
| 16926 | my ( $a, $b, $c ) = caller(); | ||||
| 16927 | print STDOUT | ||||
| 16928 | "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; | ||||
| 16929 | }; | ||||
| 16930 | |||||
| 16931 | @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); | ||||
| 16932 | } | ||||
| 16933 | |||||
| 16934 | # shouldn't happen; non-critical error | ||||
| 16935 | else { | ||||
| 16936 | FORMATTER_DEBUG_FLAG_NOBREAK && do { | ||||
| 16937 | my ( $a, $b, $c ) = caller(); | ||||
| 16938 | print STDOUT | ||||
| 16939 | "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; | ||||
| 16940 | }; | ||||
| 16941 | } | ||||
| 16942 | } | ||||
| 16943 | |||||
| 16944 | sub set_fake_breakpoint { | ||||
| 16945 | |||||
| 16946 | # Just bump up the breakpoint count as a signal that there are breaks. | ||||
| 16947 | # This is useful if we have breaks but may want to postpone deciding where | ||||
| 16948 | # to make them. | ||||
| 16949 | $forced_breakpoint_count++; | ||||
| 16950 | } | ||||
| 16951 | |||||
| 16952 | sub set_forced_breakpoint { | ||||
| 16953 | my $i = shift; | ||||
| 16954 | |||||
| 16955 | return unless defined $i && $i >= 0; | ||||
| 16956 | |||||
| 16957 | # when called with certain tokens, use bond strengths to decide | ||||
| 16958 | # if we break before or after it | ||||
| 16959 | my $token = $tokens_to_go[$i]; | ||||
| 16960 | |||||
| 16961 | if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { | ||||
| 16962 | if ( $want_break_before{$token} && $i >= 0 ) { $i-- } | ||||
| 16963 | } | ||||
| 16964 | |||||
| 16965 | # breaks are forced before 'if' and 'unless' | ||||
| 16966 | elsif ( $is_if_unless{$token} ) { $i-- } | ||||
| 16967 | |||||
| 16968 | if ( $i >= 0 && $i <= $max_index_to_go ) { | ||||
| 16969 | my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; | ||||
| 16970 | |||||
| 16971 | FORMATTER_DEBUG_FLAG_FORCE && do { | ||||
| 16972 | my ( $a, $b, $c ) = caller(); | ||||
| 16973 | print STDOUT | ||||
| 16974 | "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; | ||||
| 16975 | }; | ||||
| 16976 | |||||
| 16977 | if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { | ||||
| 16978 | $forced_breakpoint_to_go[$i_nonblank] = 1; | ||||
| 16979 | |||||
| 16980 | if ( $i_nonblank > $index_max_forced_break ) { | ||||
| 16981 | $index_max_forced_break = $i_nonblank; | ||||
| 16982 | } | ||||
| 16983 | $forced_breakpoint_count++; | ||||
| 16984 | $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = | ||||
| 16985 | $i_nonblank; | ||||
| 16986 | |||||
| 16987 | # if we break at an opening container..break at the closing | ||||
| 16988 | if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { | ||||
| 16989 | set_closing_breakpoint($i_nonblank); | ||||
| 16990 | } | ||||
| 16991 | } | ||||
| 16992 | } | ||||
| 16993 | } | ||||
| 16994 | |||||
| 16995 | sub clear_breakpoint_undo_stack { | ||||
| 16996 | $forced_breakpoint_undo_count = 0; | ||||
| 16997 | } | ||||
| 16998 | |||||
| 16999 | sub undo_forced_breakpoint_stack { | ||||
| 17000 | |||||
| 17001 | my $i_start = shift; | ||||
| 17002 | if ( $i_start < 0 ) { | ||||
| 17003 | $i_start = 0; | ||||
| 17004 | my ( $a, $b, $c ) = caller(); | ||||
| 17005 | warning( | ||||
| 17006 | "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " | ||||
| 17007 | ); | ||||
| 17008 | } | ||||
| 17009 | |||||
| 17010 | while ( $forced_breakpoint_undo_count > $i_start ) { | ||||
| 17011 | my $i = | ||||
| 17012 | $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; | ||||
| 17013 | if ( $i >= 0 && $i <= $max_index_to_go ) { | ||||
| 17014 | $forced_breakpoint_to_go[$i] = 0; | ||||
| 17015 | $forced_breakpoint_count--; | ||||
| 17016 | |||||
| 17017 | FORMATTER_DEBUG_FLAG_UNDOBP && do { | ||||
| 17018 | my ( $a, $b, $c ) = caller(); | ||||
| 17019 | print STDOUT | ||||
| 17020 | "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; | ||||
| 17021 | }; | ||||
| 17022 | } | ||||
| 17023 | |||||
| 17024 | # shouldn't happen, but not a critical error | ||||
| 17025 | else { | ||||
| 17026 | FORMATTER_DEBUG_FLAG_UNDOBP && do { | ||||
| 17027 | my ( $a, $b, $c ) = caller(); | ||||
| 17028 | print STDOUT | ||||
| 17029 | "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; | ||||
| 17030 | }; | ||||
| 17031 | } | ||||
| 17032 | } | ||||
| 17033 | } | ||||
| 17034 | |||||
| 17035 | { # begin recombine_breakpoints | ||||
| 17036 | |||||
| 17037 | 2 | 100ns | my %is_amp_amp; | ||
| 17038 | 1 | 0s | my %is_ternary; | ||
| 17039 | 1 | 0s | my %is_math_op; | ||
| 17040 | 1 | 0s | my %is_plus_minus; | ||
| 17041 | 1 | 300ns | my %is_mult_div; | ||
| 17042 | |||||
| 17043 | # spent 15µs within Perl::Tidy::Formatter::BEGIN@17043 which was called:
# once (15µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 17059 | ||||
| 17044 | |||||
| 17045 | 1 | 1µs | @_ = qw( && || ); | ||
| 17046 | 1 | 2µs | @is_amp_amp{@_} = (1) x scalar(@_); | ||
| 17047 | |||||
| 17048 | 1 | 1µs | @_ = qw( ? : ); | ||
| 17049 | 1 | 900ns | @is_ternary{@_} = (1) x scalar(@_); | ||
| 17050 | |||||
| 17051 | 1 | 1µs | @_ = qw( + - * / ); | ||
| 17052 | 1 | 1µs | @is_math_op{@_} = (1) x scalar(@_); | ||
| 17053 | |||||
| 17054 | 1 | 900ns | @_ = qw( + - ); | ||
| 17055 | 1 | 700ns | @is_plus_minus{@_} = (1) x scalar(@_); | ||
| 17056 | |||||
| 17057 | 1 | 600ns | @_ = qw( * / ); | ||
| 17058 | 1 | 7µs | @is_mult_div{@_} = (1) x scalar(@_); | ||
| 17059 | 1 | 4.57ms | 1 | 15µs | } # spent 15µs making 1 call to Perl::Tidy::Formatter::BEGIN@17043 |
| 17060 | |||||
| 17061 | sub recombine_breakpoints { | ||||
| 17062 | |||||
| 17063 | # sub set_continuation_breaks is very liberal in setting line breaks | ||||
| 17064 | # for long lines, always setting breaks at good breakpoints, even | ||||
| 17065 | # when that creates small lines. Sometimes small line fragments | ||||
| 17066 | # are produced which would look better if they were combined. | ||||
| 17067 | # That's the task of this routine. | ||||
| 17068 | # | ||||
| 17069 | # We are given indexes to the current lines: | ||||
| 17070 | # $ri_beg = ref to array of BEGinning indexes of each line | ||||
| 17071 | # $ri_end = ref to array of ENDing indexes of each line | ||||
| 17072 | my ( $ri_beg, $ri_end ) = @_; | ||||
| 17073 | |||||
| 17074 | # Make a list of all good joining tokens between the lines | ||||
| 17075 | # n-1 and n. | ||||
| 17076 | my @joint; | ||||
| 17077 | my $nmax = @$ri_end - 1; | ||||
| 17078 | for my $n ( 1 .. $nmax ) { | ||||
| 17079 | my $ibeg_1 = $$ri_beg[ $n - 1 ]; | ||||
| 17080 | my $iend_1 = $$ri_end[ $n - 1 ]; | ||||
| 17081 | my $iend_2 = $$ri_end[$n]; | ||||
| 17082 | my $ibeg_2 = $$ri_beg[$n]; | ||||
| 17083 | |||||
| 17084 | my ( $itok, $itokp, $itokm ); | ||||
| 17085 | |||||
| 17086 | foreach my $itest ( $iend_1, $ibeg_2 ) { | ||||
| 17087 | my $type = $types_to_go[$itest]; | ||||
| 17088 | if ( $is_math_op{$type} | ||||
| 17089 | || $is_amp_amp{$type} | ||||
| 17090 | || $is_assignment{$type} | ||||
| 17091 | || $type eq ':' ) | ||||
| 17092 | { | ||||
| 17093 | $itok = $itest; | ||||
| 17094 | } | ||||
| 17095 | } | ||||
| 17096 | $joint[$n] = [$itok]; | ||||
| 17097 | } | ||||
| 17098 | |||||
| 17099 | my $more_to_do = 1; | ||||
| 17100 | |||||
| 17101 | # We keep looping over all of the lines of this batch | ||||
| 17102 | # until there are no more possible recombinations | ||||
| 17103 | my $nmax_last = @$ri_end; | ||||
| 17104 | while ($more_to_do) { | ||||
| 17105 | my $n_best = 0; | ||||
| 17106 | my $bs_best; | ||||
| 17107 | my $n; | ||||
| 17108 | my $nmax = @$ri_end - 1; | ||||
| 17109 | |||||
| 17110 | # Safety check for infinite loop | ||||
| 17111 | unless ( $nmax < $nmax_last ) { | ||||
| 17112 | |||||
| 17113 | # Shouldn't happen because splice below decreases nmax on each | ||||
| 17114 | # pass. | ||||
| 17115 | Perl::Tidy::Die | ||||
| 17116 | "Program bug-infinite loop in recombine breakpoints\n"; | ||||
| 17117 | } | ||||
| 17118 | $nmax_last = $nmax; | ||||
| 17119 | $more_to_do = 0; | ||||
| 17120 | my $previous_outdentable_closing_paren; | ||||
| 17121 | my $leading_amp_count = 0; | ||||
| 17122 | my $this_line_is_semicolon_terminated; | ||||
| 17123 | |||||
| 17124 | # loop over all remaining lines in this batch | ||||
| 17125 | for $n ( 1 .. $nmax ) { | ||||
| 17126 | |||||
| 17127 | #---------------------------------------------------------- | ||||
| 17128 | # If we join the current pair of lines, | ||||
| 17129 | # line $n-1 will become the left part of the joined line | ||||
| 17130 | # line $n will become the right part of the joined line | ||||
| 17131 | # | ||||
| 17132 | # Here are Indexes of the endpoint tokens of the two lines: | ||||
| 17133 | # | ||||
| 17134 | # -----line $n-1--- | -----line $n----- | ||||
| 17135 | # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 | ||||
| 17136 | # ^ | ||||
| 17137 | # | | ||||
| 17138 | # We want to decide if we should remove the line break | ||||
| 17139 | # between the tokens at $iend_1 and $ibeg_2 | ||||
| 17140 | # | ||||
| 17141 | # We will apply a number of ad-hoc tests to see if joining | ||||
| 17142 | # here will look ok. The code will just issue a 'next' | ||||
| 17143 | # command if the join doesn't look good. If we get through | ||||
| 17144 | # the gauntlet of tests, the lines will be recombined. | ||||
| 17145 | #---------------------------------------------------------- | ||||
| 17146 | # | ||||
| 17147 | # beginning and ending tokens of the lines we are working on | ||||
| 17148 | my $ibeg_1 = $$ri_beg[ $n - 1 ]; | ||||
| 17149 | my $iend_1 = $$ri_end[ $n - 1 ]; | ||||
| 17150 | my $iend_2 = $$ri_end[$n]; | ||||
| 17151 | my $ibeg_2 = $$ri_beg[$n]; | ||||
| 17152 | my $ibeg_nmax = $$ri_beg[$nmax]; | ||||
| 17153 | |||||
| 17154 | my $type_iend_1 = $types_to_go[$iend_1]; | ||||
| 17155 | my $type_iend_2 = $types_to_go[$iend_2]; | ||||
| 17156 | my $type_ibeg_1 = $types_to_go[$ibeg_1]; | ||||
| 17157 | my $type_ibeg_2 = $types_to_go[$ibeg_2]; | ||||
| 17158 | |||||
| 17159 | # some beginning indexes of other lines, which may not exist | ||||
| 17160 | my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; | ||||
| 17161 | my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; | ||||
| 17162 | my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; | ||||
| 17163 | |||||
| 17164 | my $bs_tweak = 0; | ||||
| 17165 | |||||
| 17166 | #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - | ||||
| 17167 | # $nesting_depth_to_go[$ibeg_1] ); | ||||
| 17168 | |||||
| 17169 | FORMATTER_DEBUG_FLAG_RECOMBINE && do { | ||||
| 17170 | print STDERR | ||||
| 17171 | "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; | ||||
| 17172 | }; | ||||
| 17173 | |||||
| 17174 | # If line $n is the last line, we set some flags and | ||||
| 17175 | # do any special checks for it | ||||
| 17176 | if ( $n == $nmax ) { | ||||
| 17177 | |||||
| 17178 | # a terminal '{' should stay where it is | ||||
| 17179 | next if $type_ibeg_2 eq '{'; | ||||
| 17180 | |||||
| 17181 | # set flag if statement $n ends in ';' | ||||
| 17182 | $this_line_is_semicolon_terminated = $type_iend_2 eq ';' | ||||
| 17183 | |||||
| 17184 | # with possible side comment | ||||
| 17185 | || ( $type_iend_2 eq '#' | ||||
| 17186 | && $iend_2 - $ibeg_2 >= 2 | ||||
| 17187 | && $types_to_go[ $iend_2 - 2 ] eq ';' | ||||
| 17188 | && $types_to_go[ $iend_2 - 1 ] eq 'b' ); | ||||
| 17189 | } | ||||
| 17190 | |||||
| 17191 | #---------------------------------------------------------- | ||||
| 17192 | # Recombine Section 1: | ||||
| 17193 | # Examine the special token joining this line pair, if any. | ||||
| 17194 | # Put as many tests in this section to avoid duplicate code and | ||||
| 17195 | # to make formatting independent of whether breaks are to the | ||||
| 17196 | # left or right of an operator. | ||||
| 17197 | #---------------------------------------------------------- | ||||
| 17198 | |||||
| 17199 | my ($itok) = @{ $joint[$n] }; | ||||
| 17200 | if ($itok) { | ||||
| 17201 | |||||
| 17202 | # FIXME: Patch - may not be necessary | ||||
| 17203 | my $iend_1 = | ||||
| 17204 | $type_iend_1 eq 'b' | ||||
| 17205 | ? $iend_1 - 1 | ||||
| 17206 | : $iend_1; | ||||
| 17207 | |||||
| 17208 | my $iend_2 = | ||||
| 17209 | $type_iend_2 eq 'b' | ||||
| 17210 | ? $iend_2 - 1 | ||||
| 17211 | : $iend_2; | ||||
| 17212 | ## END PATCH | ||||
| 17213 | |||||
| 17214 | my $type = $types_to_go[$itok]; | ||||
| 17215 | |||||
| 17216 | if ( $type eq ':' ) { | ||||
| 17217 | |||||
| 17218 | # do not join at a colon unless it disobeys the break request | ||||
| 17219 | if ( $itok eq $iend_1 ) { | ||||
| 17220 | next unless $want_break_before{$type}; | ||||
| 17221 | } | ||||
| 17222 | else { | ||||
| 17223 | $leading_amp_count++; | ||||
| 17224 | next if $want_break_before{$type}; | ||||
| 17225 | } | ||||
| 17226 | } ## end if ':' | ||||
| 17227 | |||||
| 17228 | # handle math operators + - * / | ||||
| 17229 | elsif ( $is_math_op{$type} ) { | ||||
| 17230 | |||||
| 17231 | # Combine these lines if this line is a single | ||||
| 17232 | # number, or if it is a short term with same | ||||
| 17233 | # operator as the previous line. For example, in | ||||
| 17234 | # the following code we will combine all of the | ||||
| 17235 | # short terms $A, $B, $C, $D, $E, $F, together | ||||
| 17236 | # instead of leaving them one per line: | ||||
| 17237 | # my $time = | ||||
| 17238 | # $A * $B * $C * $D * $E * $F * | ||||
| 17239 | # ( 2. * $eps * $sigma * $area ) * | ||||
| 17240 | # ( 1. / $tcold**3 - 1. / $thot**3 ); | ||||
| 17241 | |||||
| 17242 | # This can be important in math-intensive code. | ||||
| 17243 | |||||
| 17244 | my $good_combo; | ||||
| 17245 | |||||
| 17246 | my $itokp = min( $inext_to_go[$itok], $iend_2 ); | ||||
| 17247 | my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); | ||||
| 17248 | my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); | ||||
| 17249 | my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); | ||||
| 17250 | |||||
| 17251 | # check for a number on the right | ||||
| 17252 | if ( $types_to_go[$itokp] eq 'n' ) { | ||||
| 17253 | |||||
| 17254 | # ok if nothing else on right | ||||
| 17255 | if ( $itokp == $iend_2 ) { | ||||
| 17256 | $good_combo = 1; | ||||
| 17257 | } | ||||
| 17258 | else { | ||||
| 17259 | |||||
| 17260 | # look one more token to right.. | ||||
| 17261 | # okay if math operator or some termination | ||||
| 17262 | $good_combo = | ||||
| 17263 | ( ( $itokpp == $iend_2 ) | ||||
| 17264 | && $is_math_op{ $types_to_go[$itokpp] } ) | ||||
| 17265 | || $types_to_go[$itokpp] =~ /^[#,;]$/; | ||||
| 17266 | } | ||||
| 17267 | } | ||||
| 17268 | |||||
| 17269 | # check for a number on the left | ||||
| 17270 | if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { | ||||
| 17271 | |||||
| 17272 | # okay if nothing else to left | ||||
| 17273 | if ( $itokm == $ibeg_1 ) { | ||||
| 17274 | $good_combo = 1; | ||||
| 17275 | } | ||||
| 17276 | |||||
| 17277 | # otherwise look one more token to left | ||||
| 17278 | else { | ||||
| 17279 | |||||
| 17280 | # okay if math operator, comma, or assignment | ||||
| 17281 | $good_combo = ( $itokmm == $ibeg_1 ) | ||||
| 17282 | && ( $is_math_op{ $types_to_go[$itokmm] } | ||||
| 17283 | || $types_to_go[$itokmm] =~ /^[,]$/ | ||||
| 17284 | || $is_assignment{ $types_to_go[$itokmm] } | ||||
| 17285 | ); | ||||
| 17286 | } | ||||
| 17287 | } | ||||
| 17288 | |||||
| 17289 | # look for a single short token either side of the | ||||
| 17290 | # operator | ||||
| 17291 | if ( !$good_combo ) { | ||||
| 17292 | |||||
| 17293 | # Slight adjustment factor to make results | ||||
| 17294 | # independent of break before or after operator in | ||||
| 17295 | # long summed lists. (An operator and a space make | ||||
| 17296 | # two spaces). | ||||
| 17297 | my $two = ( $itok eq $iend_1 ) ? 2 : 0; | ||||
| 17298 | |||||
| 17299 | $good_combo = | ||||
| 17300 | |||||
| 17301 | # numbers or id's on both sides of this joint | ||||
| 17302 | $types_to_go[$itokp] =~ /^[in]$/ | ||||
| 17303 | && $types_to_go[$itokm] =~ /^[in]$/ | ||||
| 17304 | |||||
| 17305 | # one of the two lines must be short: | ||||
| 17306 | && ( | ||||
| 17307 | ( | ||||
| 17308 | # no more than 2 nonblank tokens right of | ||||
| 17309 | # joint | ||||
| 17310 | $itokpp == $iend_2 | ||||
| 17311 | |||||
| 17312 | # short | ||||
| 17313 | && token_sequence_length( $itokp, $iend_2 ) | ||||
| 17314 | < $two + | ||||
| 17315 | $rOpts_short_concatenation_item_length | ||||
| 17316 | ) | ||||
| 17317 | || ( | ||||
| 17318 | # no more than 2 nonblank tokens left of | ||||
| 17319 | # joint | ||||
| 17320 | $itokmm == $ibeg_1 | ||||
| 17321 | |||||
| 17322 | # short | ||||
| 17323 | && token_sequence_length( $ibeg_1, $itokm ) | ||||
| 17324 | < 2 - $two + | ||||
| 17325 | $rOpts_short_concatenation_item_length | ||||
| 17326 | ) | ||||
| 17327 | |||||
| 17328 | ) | ||||
| 17329 | |||||
| 17330 | # keep pure terms; don't mix +- with */ | ||||
| 17331 | && !( | ||||
| 17332 | $is_plus_minus{$type} | ||||
| 17333 | && ( $is_mult_div{ $types_to_go[$itokmm] } | ||||
| 17334 | || $is_mult_div{ $types_to_go[$itokpp] } ) | ||||
| 17335 | ) | ||||
| 17336 | && !( | ||||
| 17337 | $is_mult_div{$type} | ||||
| 17338 | && ( $is_plus_minus{ $types_to_go[$itokmm] } | ||||
| 17339 | || $is_plus_minus{ $types_to_go[$itokpp] } ) | ||||
| 17340 | ) | ||||
| 17341 | |||||
| 17342 | ; | ||||
| 17343 | } | ||||
| 17344 | |||||
| 17345 | # it is also good to combine if we can reduce to 2 lines | ||||
| 17346 | if ( !$good_combo ) { | ||||
| 17347 | |||||
| 17348 | # index on other line where same token would be in a | ||||
| 17349 | # long chain. | ||||
| 17350 | my $iother = | ||||
| 17351 | ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; | ||||
| 17352 | |||||
| 17353 | $good_combo = | ||||
| 17354 | $n == 2 | ||||
| 17355 | && $n == $nmax | ||||
| 17356 | && $types_to_go[$iother] ne $type; | ||||
| 17357 | } | ||||
| 17358 | |||||
| 17359 | next unless ($good_combo); | ||||
| 17360 | |||||
| 17361 | } ## end math | ||||
| 17362 | |||||
| 17363 | elsif ( $is_amp_amp{$type} ) { | ||||
| 17364 | ##TBD | ||||
| 17365 | } ## end &&, || | ||||
| 17366 | |||||
| 17367 | elsif ( $is_assignment{$type} ) { | ||||
| 17368 | ##TBD | ||||
| 17369 | } ## end assignment | ||||
| 17370 | } | ||||
| 17371 | |||||
| 17372 | #---------------------------------------------------------- | ||||
| 17373 | # Recombine Section 2: | ||||
| 17374 | # Examine token at $iend_1 (right end of first line of pair) | ||||
| 17375 | #---------------------------------------------------------- | ||||
| 17376 | |||||
| 17377 | # an isolated '}' may join with a ';' terminated segment | ||||
| 17378 | if ( $type_iend_1 eq '}' ) { | ||||
| 17379 | |||||
| 17380 | # Check for cases where combining a semicolon terminated | ||||
| 17381 | # statement with a previous isolated closing paren will | ||||
| 17382 | # allow the combined line to be outdented. This is | ||||
| 17383 | # generally a good move. For example, we can join up | ||||
| 17384 | # the last two lines here: | ||||
| 17385 | # ( | ||||
| 17386 | # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, | ||||
| 17387 | # $size, $atime, $mtime, $ctime, $blksize, $blocks | ||||
| 17388 | # ) | ||||
| 17389 | # = stat($file); | ||||
| 17390 | # | ||||
| 17391 | # to get: | ||||
| 17392 | # ( | ||||
| 17393 | # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, | ||||
| 17394 | # $size, $atime, $mtime, $ctime, $blksize, $blocks | ||||
| 17395 | # ) = stat($file); | ||||
| 17396 | # | ||||
| 17397 | # which makes the parens line up. | ||||
| 17398 | # | ||||
| 17399 | # Another example, from Joe Matarazzo, probably looks best | ||||
| 17400 | # with the 'or' clause appended to the trailing paren: | ||||
| 17401 | # $self->some_method( | ||||
| 17402 | # PARAM1 => 'foo', | ||||
| 17403 | # PARAM2 => 'bar' | ||||
| 17404 | # ) or die "Some_method didn't work"; | ||||
| 17405 | # | ||||
| 17406 | # But we do not want to do this for something like the -lp | ||||
| 17407 | # option where the paren is not outdentable because the | ||||
| 17408 | # trailing clause will be far to the right. | ||||
| 17409 | # | ||||
| 17410 | # The logic here is synchronized with the logic in sub | ||||
| 17411 | # sub set_adjusted_indentation, which actually does | ||||
| 17412 | # the outdenting. | ||||
| 17413 | # | ||||
| 17414 | $previous_outdentable_closing_paren = | ||||
| 17415 | $this_line_is_semicolon_terminated | ||||
| 17416 | |||||
| 17417 | # only one token on last line | ||||
| 17418 | && $ibeg_1 == $iend_1 | ||||
| 17419 | |||||
| 17420 | # must be structural paren | ||||
| 17421 | && $tokens_to_go[$iend_1] eq ')' | ||||
| 17422 | |||||
| 17423 | # style must allow outdenting, | ||||
| 17424 | && !$closing_token_indentation{')'} | ||||
| 17425 | |||||
| 17426 | # only leading '&&', '||', and ':' if no others seen | ||||
| 17427 | # (but note: our count made below could be wrong | ||||
| 17428 | # due to intervening comments) | ||||
| 17429 | && ( $leading_amp_count == 0 | ||||
| 17430 | || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) | ||||
| 17431 | |||||
| 17432 | # but leading colons probably line up with a | ||||
| 17433 | # previous colon or question (count could be wrong). | ||||
| 17434 | && $type_ibeg_2 ne ':' | ||||
| 17435 | |||||
| 17436 | # only one step in depth allowed. this line must not | ||||
| 17437 | # begin with a ')' itself. | ||||
| 17438 | && ( $nesting_depth_to_go[$iend_1] == | ||||
| 17439 | $nesting_depth_to_go[$iend_2] + 1 ); | ||||
| 17440 | |||||
| 17441 | # YVES patch 2 of 2: | ||||
| 17442 | # Allow cuddled eval chains, like this: | ||||
| 17443 | # eval { | ||||
| 17444 | # #STUFF; | ||||
| 17445 | # 1; # return true | ||||
| 17446 | # } or do { | ||||
| 17447 | # #handle error | ||||
| 17448 | # }; | ||||
| 17449 | # This patch works together with a patch in | ||||
| 17450 | # setting adjusted indentation (where the closing eval | ||||
| 17451 | # brace is outdented if possible). | ||||
| 17452 | # The problem is that an 'eval' block has continuation | ||||
| 17453 | # indentation and it looks better to undo it in some | ||||
| 17454 | # cases. If we do not use this patch we would get: | ||||
| 17455 | # eval { | ||||
| 17456 | # #STUFF; | ||||
| 17457 | # 1; # return true | ||||
| 17458 | # } | ||||
| 17459 | # or do { | ||||
| 17460 | # #handle error | ||||
| 17461 | # }; | ||||
| 17462 | # The alternative, for uncuddled style, is to create | ||||
| 17463 | # a patch in set_adjusted_indentation which undoes | ||||
| 17464 | # the indentation of a leading line like 'or do {'. | ||||
| 17465 | # This doesn't work well with -icb through | ||||
| 17466 | if ( | ||||
| 17467 | $block_type_to_go[$iend_1] eq 'eval' | ||||
| 17468 | && !$rOpts->{'line-up-parentheses'} | ||||
| 17469 | && !$rOpts->{'indent-closing-brace'} | ||||
| 17470 | && $tokens_to_go[$iend_2] eq '{' | ||||
| 17471 | && ( | ||||
| 17472 | ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) | ||||
| 17473 | || ( $type_ibeg_2 eq 'k' | ||||
| 17474 | && $is_and_or{ $tokens_to_go[$ibeg_2] } ) | ||||
| 17475 | || $is_if_unless{ $tokens_to_go[$ibeg_2] } | ||||
| 17476 | ) | ||||
| 17477 | ) | ||||
| 17478 | { | ||||
| 17479 | $previous_outdentable_closing_paren ||= 1; | ||||
| 17480 | } | ||||
| 17481 | |||||
| 17482 | next | ||||
| 17483 | unless ( | ||||
| 17484 | $previous_outdentable_closing_paren | ||||
| 17485 | |||||
| 17486 | # handle '.' and '?' specially below | ||||
| 17487 | || ( $type_ibeg_2 =~ /^[\.\?]$/ ) | ||||
| 17488 | ); | ||||
| 17489 | } | ||||
| 17490 | |||||
| 17491 | # YVES | ||||
| 17492 | # honor breaks at opening brace | ||||
| 17493 | # Added to prevent recombining something like this: | ||||
| 17494 | # } || eval { package main; | ||||
| 17495 | elsif ( $type_iend_1 eq '{' ) { | ||||
| 17496 | next if $forced_breakpoint_to_go[$iend_1]; | ||||
| 17497 | } | ||||
| 17498 | |||||
| 17499 | # do not recombine lines with ending &&, ||, | ||||
| 17500 | elsif ( $is_amp_amp{$type_iend_1} ) { | ||||
| 17501 | next unless $want_break_before{$type_iend_1}; | ||||
| 17502 | } | ||||
| 17503 | |||||
| 17504 | # Identify and recombine a broken ?/: chain | ||||
| 17505 | elsif ( $type_iend_1 eq '?' ) { | ||||
| 17506 | |||||
| 17507 | # Do not recombine different levels | ||||
| 17508 | next | ||||
| 17509 | if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); | ||||
| 17510 | |||||
| 17511 | # do not recombine unless next line ends in : | ||||
| 17512 | next unless $type_iend_2 eq ':'; | ||||
| 17513 | } | ||||
| 17514 | |||||
| 17515 | # for lines ending in a comma... | ||||
| 17516 | elsif ( $type_iend_1 eq ',' ) { | ||||
| 17517 | |||||
| 17518 | # Do not recombine at comma which is following the | ||||
| 17519 | # input bias. | ||||
| 17520 | # TODO: might be best to make a special flag | ||||
| 17521 | next if ( $old_breakpoint_to_go[$iend_1] ); | ||||
| 17522 | |||||
| 17523 | # an isolated '},' may join with an identifier + ';' | ||||
| 17524 | # this is useful for the class of a 'bless' statement (bless.t) | ||||
| 17525 | if ( $type_ibeg_1 eq '}' | ||||
| 17526 | && $type_ibeg_2 eq 'i' ) | ||||
| 17527 | { | ||||
| 17528 | next | ||||
| 17529 | unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) | ||||
| 17530 | && ( $iend_2 == ( $ibeg_2 + 1 ) ) | ||||
| 17531 | && $this_line_is_semicolon_terminated ); | ||||
| 17532 | |||||
| 17533 | # override breakpoint | ||||
| 17534 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
| 17535 | } | ||||
| 17536 | |||||
| 17537 | # but otherwise .. | ||||
| 17538 | else { | ||||
| 17539 | |||||
| 17540 | # do not recombine after a comma unless this will leave | ||||
| 17541 | # just 1 more line | ||||
| 17542 | next unless ( $n + 1 >= $nmax ); | ||||
| 17543 | |||||
| 17544 | # do not recombine if there is a change in indentation depth | ||||
| 17545 | next | ||||
| 17546 | if ( | ||||
| 17547 | $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); | ||||
| 17548 | |||||
| 17549 | # do not recombine a "complex expression" after a | ||||
| 17550 | # comma. "complex" means no parens. | ||||
| 17551 | my $saw_paren; | ||||
| 17552 | foreach my $ii ( $ibeg_2 .. $iend_2 ) { | ||||
| 17553 | if ( $tokens_to_go[$ii] eq '(' ) { | ||||
| 17554 | $saw_paren = 1; | ||||
| 17555 | last; | ||||
| 17556 | } | ||||
| 17557 | } | ||||
| 17558 | next if $saw_paren; | ||||
| 17559 | } | ||||
| 17560 | } | ||||
| 17561 | |||||
| 17562 | # opening paren.. | ||||
| 17563 | elsif ( $type_iend_1 eq '(' ) { | ||||
| 17564 | |||||
| 17565 | # No longer doing this | ||||
| 17566 | } | ||||
| 17567 | |||||
| 17568 | elsif ( $type_iend_1 eq ')' ) { | ||||
| 17569 | |||||
| 17570 | # No longer doing this | ||||
| 17571 | } | ||||
| 17572 | |||||
| 17573 | # keep a terminal for-semicolon | ||||
| 17574 | elsif ( $type_iend_1 eq 'f' ) { | ||||
| 17575 | next; | ||||
| 17576 | } | ||||
| 17577 | |||||
| 17578 | # if '=' at end of line ... | ||||
| 17579 | elsif ( $is_assignment{$type_iend_1} ) { | ||||
| 17580 | |||||
| 17581 | # keep break after = if it was in input stream | ||||
| 17582 | # this helps prevent 'blinkers' | ||||
| 17583 | next if $old_breakpoint_to_go[$iend_1] | ||||
| 17584 | |||||
| 17585 | # don't strand an isolated '=' | ||||
| 17586 | && $iend_1 != $ibeg_1; | ||||
| 17587 | |||||
| 17588 | my $is_short_quote = | ||||
| 17589 | ( $type_ibeg_2 eq 'Q' | ||||
| 17590 | && $ibeg_2 == $iend_2 | ||||
| 17591 | && token_sequence_length( $ibeg_2, $ibeg_2 ) < | ||||
| 17592 | $rOpts_short_concatenation_item_length ); | ||||
| 17593 | my $is_ternary = | ||||
| 17594 | ( $type_ibeg_1 eq '?' | ||||
| 17595 | && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); | ||||
| 17596 | |||||
| 17597 | # always join an isolated '=', a short quote, or if this | ||||
| 17598 | # will put ?/: at start of adjacent lines | ||||
| 17599 | if ( $ibeg_1 != $iend_1 | ||||
| 17600 | && !$is_short_quote | ||||
| 17601 | && !$is_ternary ) | ||||
| 17602 | { | ||||
| 17603 | next | ||||
| 17604 | unless ( | ||||
| 17605 | ( | ||||
| 17606 | |||||
| 17607 | # unless we can reduce this to two lines | ||||
| 17608 | $nmax < $n + 2 | ||||
| 17609 | |||||
| 17610 | # or three lines, the last with a leading semicolon | ||||
| 17611 | || ( $nmax == $n + 2 | ||||
| 17612 | && $types_to_go[$ibeg_nmax] eq ';' ) | ||||
| 17613 | |||||
| 17614 | # or the next line ends with a here doc | ||||
| 17615 | || $type_iend_2 eq 'h' | ||||
| 17616 | |||||
| 17617 | # or the next line ends in an open paren or brace | ||||
| 17618 | # and the break hasn't been forced [dima.t] | ||||
| 17619 | || ( !$forced_breakpoint_to_go[$iend_1] | ||||
| 17620 | && $type_iend_2 eq '{' ) | ||||
| 17621 | ) | ||||
| 17622 | |||||
| 17623 | # do not recombine if the two lines might align well | ||||
| 17624 | # this is a very approximate test for this | ||||
| 17625 | && ( $ibeg_3 >= 0 | ||||
| 17626 | && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) | ||||
| 17627 | ); | ||||
| 17628 | |||||
| 17629 | if ( | ||||
| 17630 | |||||
| 17631 | # Recombine if we can make two lines | ||||
| 17632 | $nmax >= $n + 2 | ||||
| 17633 | |||||
| 17634 | # -lp users often prefer this: | ||||
| 17635 | # my $title = function($env, $env, $sysarea, | ||||
| 17636 | # "bubba Borrower Entry"); | ||||
| 17637 | # so we will recombine if -lp is used we have | ||||
| 17638 | # ending comma | ||||
| 17639 | && ( !$rOpts_line_up_parentheses | ||||
| 17640 | || $type_iend_2 ne ',' ) | ||||
| 17641 | ) | ||||
| 17642 | { | ||||
| 17643 | |||||
| 17644 | # otherwise, scan the rhs line up to last token for | ||||
| 17645 | # complexity. Note that we are not counting the last | ||||
| 17646 | # token in case it is an opening paren. | ||||
| 17647 | my $tv = 0; | ||||
| 17648 | my $depth = $nesting_depth_to_go[$ibeg_2]; | ||||
| 17649 | for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { | ||||
| 17650 | if ( $nesting_depth_to_go[$i] != $depth ) { | ||||
| 17651 | $tv++; | ||||
| 17652 | last if ( $tv > 1 ); | ||||
| 17653 | } | ||||
| 17654 | $depth = $nesting_depth_to_go[$i]; | ||||
| 17655 | } | ||||
| 17656 | |||||
| 17657 | # ok to recombine if no level changes before last token | ||||
| 17658 | if ( $tv > 0 ) { | ||||
| 17659 | |||||
| 17660 | # otherwise, do not recombine if more than two | ||||
| 17661 | # level changes. | ||||
| 17662 | next if ( $tv > 1 ); | ||||
| 17663 | |||||
| 17664 | # check total complexity of the two adjacent lines | ||||
| 17665 | # that will occur if we do this join | ||||
| 17666 | my $istop = | ||||
| 17667 | ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; | ||||
| 17668 | for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { | ||||
| 17669 | if ( $nesting_depth_to_go[$i] != $depth ) { | ||||
| 17670 | $tv++; | ||||
| 17671 | last if ( $tv > 2 ); | ||||
| 17672 | } | ||||
| 17673 | $depth = $nesting_depth_to_go[$i]; | ||||
| 17674 | } | ||||
| 17675 | |||||
| 17676 | # do not recombine if total is more than 2 level changes | ||||
| 17677 | next if ( $tv > 2 ); | ||||
| 17678 | } | ||||
| 17679 | } | ||||
| 17680 | } | ||||
| 17681 | |||||
| 17682 | unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { | ||||
| 17683 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
| 17684 | } | ||||
| 17685 | } | ||||
| 17686 | |||||
| 17687 | # for keywords.. | ||||
| 17688 | elsif ( $type_iend_1 eq 'k' ) { | ||||
| 17689 | |||||
| 17690 | # make major control keywords stand out | ||||
| 17691 | # (recombine.t) | ||||
| 17692 | next | ||||
| 17693 | if ( | ||||
| 17694 | |||||
| 17695 | #/^(last|next|redo|return)$/ | ||||
| 17696 | $is_last_next_redo_return{ $tokens_to_go[$iend_1] } | ||||
| 17697 | |||||
| 17698 | # but only if followed by multiple lines | ||||
| 17699 | && $n < $nmax | ||||
| 17700 | ); | ||||
| 17701 | |||||
| 17702 | if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { | ||||
| 17703 | next | ||||
| 17704 | unless $want_break_before{ $tokens_to_go[$iend_1] }; | ||||
| 17705 | } | ||||
| 17706 | } | ||||
| 17707 | |||||
| 17708 | #---------------------------------------------------------- | ||||
| 17709 | # Recombine Section 3: | ||||
| 17710 | # Examine token at $ibeg_2 (left end of second line of pair) | ||||
| 17711 | #---------------------------------------------------------- | ||||
| 17712 | |||||
| 17713 | # join lines identified above as capable of | ||||
| 17714 | # causing an outdented line with leading closing paren | ||||
| 17715 | # Note that we are skipping the rest of this section | ||||
| 17716 | if ($previous_outdentable_closing_paren) { | ||||
| 17717 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
| 17718 | } | ||||
| 17719 | |||||
| 17720 | # handle lines with leading &&, || | ||||
| 17721 | elsif ( $is_amp_amp{$type_ibeg_2} ) { | ||||
| 17722 | |||||
| 17723 | $leading_amp_count++; | ||||
| 17724 | |||||
| 17725 | # ok to recombine if it follows a ? or : | ||||
| 17726 | # and is followed by an open paren.. | ||||
| 17727 | my $ok = | ||||
| 17728 | ( $is_ternary{$type_ibeg_1} | ||||
| 17729 | && $tokens_to_go[$iend_2] eq '(' ) | ||||
| 17730 | |||||
| 17731 | # or is followed by a ? or : at same depth | ||||
| 17732 | # | ||||
| 17733 | # We are looking for something like this. We can | ||||
| 17734 | # recombine the && line with the line above to make the | ||||
| 17735 | # structure more clear: | ||||
| 17736 | # return | ||||
| 17737 | # exists $G->{Attr}->{V} | ||||
| 17738 | # && exists $G->{Attr}->{V}->{$u} | ||||
| 17739 | # ? %{ $G->{Attr}->{V}->{$u} } | ||||
| 17740 | # : (); | ||||
| 17741 | # | ||||
| 17742 | # We should probably leave something like this alone: | ||||
| 17743 | # return | ||||
| 17744 | # exists $G->{Attr}->{E} | ||||
| 17745 | # && exists $G->{Attr}->{E}->{$u} | ||||
| 17746 | # && exists $G->{Attr}->{E}->{$u}->{$v} | ||||
| 17747 | # ? %{ $G->{Attr}->{E}->{$u}->{$v} } | ||||
| 17748 | # : (); | ||||
| 17749 | # so that we either have all of the &&'s (or ||'s) | ||||
| 17750 | # on one line, as in the first example, or break at | ||||
| 17751 | # each one as in the second example. However, it | ||||
| 17752 | # sometimes makes things worse to check for this because | ||||
| 17753 | # it prevents multiple recombinations. So this is not done. | ||||
| 17754 | || ( $ibeg_3 >= 0 | ||||
| 17755 | && $is_ternary{ $types_to_go[$ibeg_3] } | ||||
| 17756 | && $nesting_depth_to_go[$ibeg_3] == | ||||
| 17757 | $nesting_depth_to_go[$ibeg_2] ); | ||||
| 17758 | |||||
| 17759 | next if !$ok && $want_break_before{$type_ibeg_2}; | ||||
| 17760 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
| 17761 | |||||
| 17762 | # tweak the bond strength to give this joint priority | ||||
| 17763 | # over ? and : | ||||
| 17764 | $bs_tweak = 0.25; | ||||
| 17765 | } | ||||
| 17766 | |||||
| 17767 | # Identify and recombine a broken ?/: chain | ||||
| 17768 | elsif ( $type_ibeg_2 eq '?' ) { | ||||
| 17769 | |||||
| 17770 | # Do not recombine different levels | ||||
| 17771 | my $lev = $levels_to_go[$ibeg_2]; | ||||
| 17772 | next if ( $lev ne $levels_to_go[$ibeg_1] ); | ||||
| 17773 | |||||
| 17774 | # Do not recombine a '?' if either next line or | ||||
| 17775 | # previous line does not start with a ':'. The reasons | ||||
| 17776 | # are that (1) no alignment of the ? will be possible | ||||
| 17777 | # and (2) the expression is somewhat complex, so the | ||||
| 17778 | # '?' is harder to see in the interior of the line. | ||||
| 17779 | my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; | ||||
| 17780 | my $precedes_colon = | ||||
| 17781 | $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; | ||||
| 17782 | next unless ( $follows_colon || $precedes_colon ); | ||||
| 17783 | |||||
| 17784 | # we will always combining a ? line following a : line | ||||
| 17785 | if ( !$follows_colon ) { | ||||
| 17786 | |||||
| 17787 | # ...otherwise recombine only if it looks like a chain. | ||||
| 17788 | # we will just look at a few nearby lines to see if | ||||
| 17789 | # this looks like a chain. | ||||
| 17790 | my $local_count = 0; | ||||
| 17791 | foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { | ||||
| 17792 | $local_count++ | ||||
| 17793 | if $ii >= 0 | ||||
| 17794 | && $types_to_go[$ii] eq ':' | ||||
| 17795 | && $levels_to_go[$ii] == $lev; | ||||
| 17796 | } | ||||
| 17797 | next unless ( $local_count > 1 ); | ||||
| 17798 | } | ||||
| 17799 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
| 17800 | } | ||||
| 17801 | |||||
| 17802 | # do not recombine lines with leading '.' | ||||
| 17803 | elsif ( $type_ibeg_2 eq '.' ) { | ||||
| 17804 | my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); | ||||
| 17805 | next | ||||
| 17806 | unless ( | ||||
| 17807 | |||||
| 17808 | # ... unless there is just one and we can reduce | ||||
| 17809 | # this to two lines if we do. For example, this | ||||
| 17810 | # | ||||
| 17811 | # | ||||
| 17812 | # $bodyA .= | ||||
| 17813 | # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' | ||||
| 17814 | # | ||||
| 17815 | # looks better than this: | ||||
| 17816 | # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' | ||||
| 17817 | # . '$args .= $pat;' | ||||
| 17818 | |||||
| 17819 | ( | ||||
| 17820 | $n == 2 | ||||
| 17821 | && $n == $nmax | ||||
| 17822 | && $type_ibeg_1 ne $type_ibeg_2 | ||||
| 17823 | ) | ||||
| 17824 | |||||
| 17825 | # ... or this would strand a short quote , like this | ||||
| 17826 | # . "some long quote" | ||||
| 17827 | # . "\n"; | ||||
| 17828 | |||||
| 17829 | || ( $types_to_go[$i_next_nonblank] eq 'Q' | ||||
| 17830 | && $i_next_nonblank >= $iend_2 - 1 | ||||
| 17831 | && $token_lengths_to_go[$i_next_nonblank] < | ||||
| 17832 | $rOpts_short_concatenation_item_length ) | ||||
| 17833 | ); | ||||
| 17834 | } | ||||
| 17835 | |||||
| 17836 | # handle leading keyword.. | ||||
| 17837 | elsif ( $type_ibeg_2 eq 'k' ) { | ||||
| 17838 | |||||
| 17839 | # handle leading "or" | ||||
| 17840 | if ( $tokens_to_go[$ibeg_2] eq 'or' ) { | ||||
| 17841 | next | ||||
| 17842 | unless ( | ||||
| 17843 | $this_line_is_semicolon_terminated | ||||
| 17844 | && ( | ||||
| 17845 | |||||
| 17846 | # following 'if' or 'unless' or 'or' | ||||
| 17847 | $type_ibeg_1 eq 'k' | ||||
| 17848 | && $is_if_unless{ $tokens_to_go[$ibeg_1] } | ||||
| 17849 | |||||
| 17850 | # important: only combine a very simple or | ||||
| 17851 | # statement because the step below may have | ||||
| 17852 | # combined a trailing 'and' with this or, | ||||
| 17853 | # and we do not want to then combine | ||||
| 17854 | # everything together | ||||
| 17855 | && ( $iend_2 - $ibeg_2 <= 7 ) | ||||
| 17856 | ) | ||||
| 17857 | ); | ||||
| 17858 | ##X: RT #81854 | ||||
| 17859 | $forced_breakpoint_to_go[$iend_1] = 0 | ||||
| 17860 | unless $old_breakpoint_to_go[$iend_1]; | ||||
| 17861 | } | ||||
| 17862 | |||||
| 17863 | # handle leading 'and' | ||||
| 17864 | elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { | ||||
| 17865 | |||||
| 17866 | # Decide if we will combine a single terminal 'and' | ||||
| 17867 | # after an 'if' or 'unless'. | ||||
| 17868 | |||||
| 17869 | # This looks best with the 'and' on the same | ||||
| 17870 | # line as the 'if': | ||||
| 17871 | # | ||||
| 17872 | # $a = 1 | ||||
| 17873 | # if $seconds and $nu < 2; | ||||
| 17874 | # | ||||
| 17875 | # But this looks better as shown: | ||||
| 17876 | # | ||||
| 17877 | # $a = 1 | ||||
| 17878 | # if !$this->{Parents}{$_} | ||||
| 17879 | # or $this->{Parents}{$_} eq $_; | ||||
| 17880 | # | ||||
| 17881 | next | ||||
| 17882 | unless ( | ||||
| 17883 | $this_line_is_semicolon_terminated | ||||
| 17884 | && ( | ||||
| 17885 | |||||
| 17886 | # following 'if' or 'unless' or 'or' | ||||
| 17887 | $type_ibeg_1 eq 'k' | ||||
| 17888 | && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } | ||||
| 17889 | || $tokens_to_go[$ibeg_1] eq 'or' ) | ||||
| 17890 | ) | ||||
| 17891 | ); | ||||
| 17892 | } | ||||
| 17893 | |||||
| 17894 | # handle leading "if" and "unless" | ||||
| 17895 | elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { | ||||
| 17896 | |||||
| 17897 | # FIXME: This is still experimental..may not be too useful | ||||
| 17898 | next | ||||
| 17899 | unless ( | ||||
| 17900 | $this_line_is_semicolon_terminated | ||||
| 17901 | |||||
| 17902 | # previous line begins with 'and' or 'or' | ||||
| 17903 | && $type_ibeg_1 eq 'k' | ||||
| 17904 | && $is_and_or{ $tokens_to_go[$ibeg_1] } | ||||
| 17905 | |||||
| 17906 | ); | ||||
| 17907 | } | ||||
| 17908 | |||||
| 17909 | # handle all other leading keywords | ||||
| 17910 | else { | ||||
| 17911 | |||||
| 17912 | # keywords look best at start of lines, | ||||
| 17913 | # but combine things like "1 while" | ||||
| 17914 | unless ( $is_assignment{$type_iend_1} ) { | ||||
| 17915 | next | ||||
| 17916 | if ( ( $type_iend_1 ne 'k' ) | ||||
| 17917 | && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); | ||||
| 17918 | } | ||||
| 17919 | } | ||||
| 17920 | } | ||||
| 17921 | |||||
| 17922 | # similar treatment of && and || as above for 'and' and 'or': | ||||
| 17923 | # NOTE: This block of code is currently bypassed because | ||||
| 17924 | # of a previous block but is retained for possible future use. | ||||
| 17925 | elsif ( $is_amp_amp{$type_ibeg_2} ) { | ||||
| 17926 | |||||
| 17927 | # maybe looking at something like: | ||||
| 17928 | # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; | ||||
| 17929 | |||||
| 17930 | next | ||||
| 17931 | unless ( | ||||
| 17932 | $this_line_is_semicolon_terminated | ||||
| 17933 | |||||
| 17934 | # previous line begins with an 'if' or 'unless' keyword | ||||
| 17935 | && $type_ibeg_1 eq 'k' | ||||
| 17936 | && $is_if_unless{ $tokens_to_go[$ibeg_1] } | ||||
| 17937 | |||||
| 17938 | ); | ||||
| 17939 | } | ||||
| 17940 | |||||
| 17941 | # handle line with leading = or similar | ||||
| 17942 | elsif ( $is_assignment{$type_ibeg_2} ) { | ||||
| 17943 | next unless ( $n == 1 || $n == $nmax ); | ||||
| 17944 | next if $old_breakpoint_to_go[$iend_1]; | ||||
| 17945 | next | ||||
| 17946 | unless ( | ||||
| 17947 | |||||
| 17948 | # unless we can reduce this to two lines | ||||
| 17949 | $nmax == 2 | ||||
| 17950 | |||||
| 17951 | # or three lines, the last with a leading semicolon | ||||
| 17952 | || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) | ||||
| 17953 | |||||
| 17954 | # or the next line ends with a here doc | ||||
| 17955 | || $type_iend_2 eq 'h' | ||||
| 17956 | |||||
| 17957 | # or this is a short line ending in ; | ||||
| 17958 | || ( $n == $nmax && $this_line_is_semicolon_terminated ) | ||||
| 17959 | ); | ||||
| 17960 | $forced_breakpoint_to_go[$iend_1] = 0; | ||||
| 17961 | } | ||||
| 17962 | |||||
| 17963 | #---------------------------------------------------------- | ||||
| 17964 | # Recombine Section 4: | ||||
| 17965 | # Combine the lines if we arrive here and it is possible | ||||
| 17966 | #---------------------------------------------------------- | ||||
| 17967 | |||||
| 17968 | # honor hard breakpoints | ||||
| 17969 | next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); | ||||
| 17970 | |||||
| 17971 | my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; | ||||
| 17972 | |||||
| 17973 | # combined line cannot be too long | ||||
| 17974 | my $excess = excess_line_length( $ibeg_1, $iend_2 ); | ||||
| 17975 | next if ( $excess > 0 ); | ||||
| 17976 | |||||
| 17977 | # Require a few extra spaces before recombining lines if we are | ||||
| 17978 | # at an old breakpoint unless this is a simple list or terminal | ||||
| 17979 | # line. The goal is to avoid oscillating between two | ||||
| 17980 | # quasi-stable end states. For example this snippet caused | ||||
| 17981 | # problems: | ||||
| 17982 | ## my $this = | ||||
| 17983 | ## bless { | ||||
| 17984 | ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" | ||||
| 17985 | ## }, | ||||
| 17986 | ## $type; | ||||
| 17987 | next | ||||
| 17988 | if ( $old_breakpoint_to_go[$iend_1] | ||||
| 17989 | && !$this_line_is_semicolon_terminated | ||||
| 17990 | && $n < $nmax | ||||
| 17991 | && $excess + 4 > 0 | ||||
| 17992 | && $type_iend_2 ne ',' ); | ||||
| 17993 | |||||
| 17994 | # do not recombine if we would skip in indentation levels | ||||
| 17995 | if ( $n < $nmax ) { | ||||
| 17996 | my $if_next = $$ri_beg[ $n + 1 ]; | ||||
| 17997 | next | ||||
| 17998 | if ( | ||||
| 17999 | $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] | ||||
| 18000 | && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] | ||||
| 18001 | |||||
| 18002 | # but an isolated 'if (' is undesirable | ||||
| 18003 | && !( | ||||
| 18004 | $n == 1 | ||||
| 18005 | && $iend_1 - $ibeg_1 <= 2 | ||||
| 18006 | && $type_ibeg_1 eq 'k' | ||||
| 18007 | && $tokens_to_go[$ibeg_1] eq 'if' | ||||
| 18008 | && $tokens_to_go[$iend_1] ne '(' | ||||
| 18009 | ) | ||||
| 18010 | ); | ||||
| 18011 | } | ||||
| 18012 | |||||
| 18013 | # honor no-break's | ||||
| 18014 | next if ( $bs >= NO_BREAK - 1 ); | ||||
| 18015 | |||||
| 18016 | # remember the pair with the greatest bond strength | ||||
| 18017 | if ( !$n_best ) { | ||||
| 18018 | $n_best = $n; | ||||
| 18019 | $bs_best = $bs; | ||||
| 18020 | } | ||||
| 18021 | else { | ||||
| 18022 | |||||
| 18023 | if ( $bs > $bs_best ) { | ||||
| 18024 | $n_best = $n; | ||||
| 18025 | $bs_best = $bs; | ||||
| 18026 | } | ||||
| 18027 | } | ||||
| 18028 | } | ||||
| 18029 | |||||
| 18030 | # recombine the pair with the greatest bond strength | ||||
| 18031 | if ($n_best) { | ||||
| 18032 | splice @$ri_beg, $n_best, 1; | ||||
| 18033 | splice @$ri_end, $n_best - 1, 1; | ||||
| 18034 | splice @joint, $n_best, 1; | ||||
| 18035 | |||||
| 18036 | # keep going if we are still making progress | ||||
| 18037 | $more_to_do++; | ||||
| 18038 | } | ||||
| 18039 | } | ||||
| 18040 | return ( $ri_beg, $ri_end ); | ||||
| 18041 | } | ||||
| 18042 | } # end recombine_breakpoints | ||||
| 18043 | |||||
| 18044 | sub break_all_chain_tokens { | ||||
| 18045 | |||||
| 18046 | # scan the current breakpoints looking for breaks at certain "chain | ||||
| 18047 | # operators" (. : && || + etc) which often occur repeatedly in a long | ||||
| 18048 | # statement. If we see a break at any one, break at all similar tokens | ||||
| 18049 | # within the same container. | ||||
| 18050 | # | ||||
| 18051 | my ( $ri_left, $ri_right ) = @_; | ||||
| 18052 | |||||
| 18053 | my %saw_chain_type; | ||||
| 18054 | my %left_chain_type; | ||||
| 18055 | my %right_chain_type; | ||||
| 18056 | my %interior_chain_type; | ||||
| 18057 | my $nmax = @$ri_right - 1; | ||||
| 18058 | |||||
| 18059 | # scan the left and right end tokens of all lines | ||||
| 18060 | my $count = 0; | ||||
| 18061 | for my $n ( 0 .. $nmax ) { | ||||
| 18062 | my $il = $$ri_left[$n]; | ||||
| 18063 | my $ir = $$ri_right[$n]; | ||||
| 18064 | my $typel = $types_to_go[$il]; | ||||
| 18065 | my $typer = $types_to_go[$ir]; | ||||
| 18066 | $typel = '+' if ( $typel eq '-' ); # treat + and - the same | ||||
| 18067 | $typer = '+' if ( $typer eq '-' ); | ||||
| 18068 | $typel = '*' if ( $typel eq '/' ); # treat * and / the same | ||||
| 18069 | $typer = '*' if ( $typer eq '/' ); | ||||
| 18070 | my $tokenl = $tokens_to_go[$il]; | ||||
| 18071 | my $tokenr = $tokens_to_go[$ir]; | ||||
| 18072 | |||||
| 18073 | if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { | ||||
| 18074 | next if ( $typel eq '?' ); | ||||
| 18075 | push @{ $left_chain_type{$typel} }, $il; | ||||
| 18076 | $saw_chain_type{$typel} = 1; | ||||
| 18077 | $count++; | ||||
| 18078 | } | ||||
| 18079 | if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { | ||||
| 18080 | next if ( $typer eq '?' ); | ||||
| 18081 | push @{ $right_chain_type{$typer} }, $ir; | ||||
| 18082 | $saw_chain_type{$typer} = 1; | ||||
| 18083 | $count++; | ||||
| 18084 | } | ||||
| 18085 | } | ||||
| 18086 | return unless $count; | ||||
| 18087 | |||||
| 18088 | # now look for any interior tokens of the same types | ||||
| 18089 | $count = 0; | ||||
| 18090 | for my $n ( 0 .. $nmax ) { | ||||
| 18091 | my $il = $$ri_left[$n]; | ||||
| 18092 | my $ir = $$ri_right[$n]; | ||||
| 18093 | for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { | ||||
| 18094 | my $type = $types_to_go[$i]; | ||||
| 18095 | $type = '+' if ( $type eq '-' ); | ||||
| 18096 | $type = '*' if ( $type eq '/' ); | ||||
| 18097 | if ( $saw_chain_type{$type} ) { | ||||
| 18098 | push @{ $interior_chain_type{$type} }, $i; | ||||
| 18099 | $count++; | ||||
| 18100 | } | ||||
| 18101 | } | ||||
| 18102 | } | ||||
| 18103 | return unless $count; | ||||
| 18104 | |||||
| 18105 | # now make a list of all new break points | ||||
| 18106 | my @insert_list; | ||||
| 18107 | |||||
| 18108 | # loop over all chain types | ||||
| 18109 | foreach my $type ( keys %saw_chain_type ) { | ||||
| 18110 | |||||
| 18111 | # quit if just ONE continuation line with leading . For example-- | ||||
| 18112 | # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' | ||||
| 18113 | # . $contents; | ||||
| 18114 | last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); | ||||
| 18115 | |||||
| 18116 | # loop over all interior chain tokens | ||||
| 18117 | foreach my $itest ( @{ $interior_chain_type{$type} } ) { | ||||
| 18118 | |||||
| 18119 | # loop over all left end tokens of same type | ||||
| 18120 | if ( $left_chain_type{$type} ) { | ||||
| 18121 | next if $nobreak_to_go[ $itest - 1 ]; | ||||
| 18122 | foreach my $i ( @{ $left_chain_type{$type} } ) { | ||||
| 18123 | next unless in_same_container( $i, $itest ); | ||||
| 18124 | push @insert_list, $itest - 1; | ||||
| 18125 | |||||
| 18126 | # Break at matching ? if this : is at a different level. | ||||
| 18127 | # For example, the ? before $THRf_DEAD in the following | ||||
| 18128 | # should get a break if its : gets a break. | ||||
| 18129 | # | ||||
| 18130 | # my $flags = | ||||
| 18131 | # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE | ||||
| 18132 | # : ( $_ & 4 ) ? $THRf_R_DETACHED | ||||
| 18133 | # : $THRf_R_JOINABLE; | ||||
| 18134 | if ( $type eq ':' | ||||
| 18135 | && $levels_to_go[$i] != $levels_to_go[$itest] ) | ||||
| 18136 | { | ||||
| 18137 | my $i_question = $mate_index_to_go[$itest]; | ||||
| 18138 | if ( $i_question > 0 ) { | ||||
| 18139 | push @insert_list, $i_question - 1; | ||||
| 18140 | } | ||||
| 18141 | } | ||||
| 18142 | last; | ||||
| 18143 | } | ||||
| 18144 | } | ||||
| 18145 | |||||
| 18146 | # loop over all right end tokens of same type | ||||
| 18147 | if ( $right_chain_type{$type} ) { | ||||
| 18148 | next if $nobreak_to_go[$itest]; | ||||
| 18149 | foreach my $i ( @{ $right_chain_type{$type} } ) { | ||||
| 18150 | next unless in_same_container( $i, $itest ); | ||||
| 18151 | push @insert_list, $itest; | ||||
| 18152 | |||||
| 18153 | # break at matching ? if this : is at a different level | ||||
| 18154 | if ( $type eq ':' | ||||
| 18155 | && $levels_to_go[$i] != $levels_to_go[$itest] ) | ||||
| 18156 | { | ||||
| 18157 | my $i_question = $mate_index_to_go[$itest]; | ||||
| 18158 | if ( $i_question >= 0 ) { | ||||
| 18159 | push @insert_list, $i_question; | ||||
| 18160 | } | ||||
| 18161 | } | ||||
| 18162 | last; | ||||
| 18163 | } | ||||
| 18164 | } | ||||
| 18165 | } | ||||
| 18166 | } | ||||
| 18167 | |||||
| 18168 | # insert any new break points | ||||
| 18169 | if (@insert_list) { | ||||
| 18170 | insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); | ||||
| 18171 | } | ||||
| 18172 | } | ||||
| 18173 | |||||
| 18174 | sub break_equals { | ||||
| 18175 | |||||
| 18176 | # Look for assignment operators that could use a breakpoint. | ||||
| 18177 | # For example, in the following snippet | ||||
| 18178 | # | ||||
| 18179 | # $HOME = $ENV{HOME} | ||||
| 18180 | # || $ENV{LOGDIR} | ||||
| 18181 | # || $pw[7] | ||||
| 18182 | # || die "no home directory for user $<"; | ||||
| 18183 | # | ||||
| 18184 | # we could break at the = to get this, which is a little nicer: | ||||
| 18185 | # $HOME = | ||||
| 18186 | # $ENV{HOME} | ||||
| 18187 | # || $ENV{LOGDIR} | ||||
| 18188 | # || $pw[7] | ||||
| 18189 | # || die "no home directory for user $<"; | ||||
| 18190 | # | ||||
| 18191 | # The logic here follows the logic in set_logical_padding, which | ||||
| 18192 | # will add the padding in the second line to improve alignment. | ||||
| 18193 | # | ||||
| 18194 | my ( $ri_left, $ri_right ) = @_; | ||||
| 18195 | my $nmax = @$ri_right - 1; | ||||
| 18196 | return unless ( $nmax >= 2 ); | ||||
| 18197 | |||||
| 18198 | # scan the left ends of first two lines | ||||
| 18199 | my $tokbeg = ""; | ||||
| 18200 | my $depth_beg; | ||||
| 18201 | for my $n ( 1 .. 2 ) { | ||||
| 18202 | my $il = $$ri_left[$n]; | ||||
| 18203 | my $typel = $types_to_go[$il]; | ||||
| 18204 | my $tokenl = $tokens_to_go[$il]; | ||||
| 18205 | |||||
| 18206 | my $has_leading_op = ( $tokenl =~ /^\w/ ) | ||||
| 18207 | ? $is_chain_operator{$tokenl} # + - * / : ? && || | ||||
| 18208 | : $is_chain_operator{$typel}; # and, or | ||||
| 18209 | return unless ($has_leading_op); | ||||
| 18210 | if ( $n > 1 ) { | ||||
| 18211 | return | ||||
| 18212 | unless ( $tokenl eq $tokbeg | ||||
| 18213 | && $nesting_depth_to_go[$il] eq $depth_beg ); | ||||
| 18214 | } | ||||
| 18215 | $tokbeg = $tokenl; | ||||
| 18216 | $depth_beg = $nesting_depth_to_go[$il]; | ||||
| 18217 | } | ||||
| 18218 | |||||
| 18219 | # now look for any interior tokens of the same types | ||||
| 18220 | my $il = $$ri_left[0]; | ||||
| 18221 | my $ir = $$ri_right[0]; | ||||
| 18222 | |||||
| 18223 | # now make a list of all new break points | ||||
| 18224 | my @insert_list; | ||||
| 18225 | for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { | ||||
| 18226 | my $type = $types_to_go[$i]; | ||||
| 18227 | if ( $is_assignment{$type} | ||||
| 18228 | && $nesting_depth_to_go[$i] eq $depth_beg ) | ||||
| 18229 | { | ||||
| 18230 | if ( $want_break_before{$type} ) { | ||||
| 18231 | push @insert_list, $i - 1; | ||||
| 18232 | } | ||||
| 18233 | else { | ||||
| 18234 | push @insert_list, $i; | ||||
| 18235 | } | ||||
| 18236 | } | ||||
| 18237 | } | ||||
| 18238 | |||||
| 18239 | # Break after a 'return' followed by a chain of operators | ||||
| 18240 | # return ( $^O !~ /win32|dos/i ) | ||||
| 18241 | # && ( $^O ne 'VMS' ) | ||||
| 18242 | # && ( $^O ne 'OS2' ) | ||||
| 18243 | # && ( $^O ne 'MacOS' ); | ||||
| 18244 | # To give: | ||||
| 18245 | # return | ||||
| 18246 | # ( $^O !~ /win32|dos/i ) | ||||
| 18247 | # && ( $^O ne 'VMS' ) | ||||
| 18248 | # && ( $^O ne 'OS2' ) | ||||
| 18249 | # && ( $^O ne 'MacOS' ); | ||||
| 18250 | my $i = 0; | ||||
| 18251 | if ( $types_to_go[$i] eq 'k' | ||||
| 18252 | && $tokens_to_go[$i] eq 'return' | ||||
| 18253 | && $ir > $il | ||||
| 18254 | && $nesting_depth_to_go[$i] eq $depth_beg ) | ||||
| 18255 | { | ||||
| 18256 | push @insert_list, $i; | ||||
| 18257 | } | ||||
| 18258 | |||||
| 18259 | return unless (@insert_list); | ||||
| 18260 | |||||
| 18261 | # One final check... | ||||
| 18262 | # scan second and third lines and be sure there are no assignments | ||||
| 18263 | # we want to avoid breaking at an = to make something like this: | ||||
| 18264 | # unless ( $icon = | ||||
| 18265 | # $html_icons{"$type-$state"} | ||||
| 18266 | # or $icon = $html_icons{$type} | ||||
| 18267 | # or $icon = $html_icons{$state} ) | ||||
| 18268 | for my $n ( 1 .. 2 ) { | ||||
| 18269 | my $il = $$ri_left[$n]; | ||||
| 18270 | my $ir = $$ri_right[$n]; | ||||
| 18271 | for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { | ||||
| 18272 | my $type = $types_to_go[$i]; | ||||
| 18273 | return | ||||
| 18274 | if ( $is_assignment{$type} | ||||
| 18275 | && $nesting_depth_to_go[$i] eq $depth_beg ); | ||||
| 18276 | } | ||||
| 18277 | } | ||||
| 18278 | |||||
| 18279 | # ok, insert any new break point | ||||
| 18280 | if (@insert_list) { | ||||
| 18281 | insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); | ||||
| 18282 | } | ||||
| 18283 | } | ||||
| 18284 | |||||
| 18285 | sub insert_final_breaks { | ||||
| 18286 | |||||
| 18287 | my ( $ri_left, $ri_right ) = @_; | ||||
| 18288 | |||||
| 18289 | my $nmax = @$ri_right - 1; | ||||
| 18290 | |||||
| 18291 | # scan the left and right end tokens of all lines | ||||
| 18292 | my $count = 0; | ||||
| 18293 | my $i_first_colon = -1; | ||||
| 18294 | for my $n ( 0 .. $nmax ) { | ||||
| 18295 | my $il = $$ri_left[$n]; | ||||
| 18296 | my $ir = $$ri_right[$n]; | ||||
| 18297 | my $typel = $types_to_go[$il]; | ||||
| 18298 | my $typer = $types_to_go[$ir]; | ||||
| 18299 | return if ( $typel eq '?' ); | ||||
| 18300 | return if ( $typer eq '?' ); | ||||
| 18301 | if ( $typel eq ':' ) { $i_first_colon = $il; last; } | ||||
| 18302 | elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } | ||||
| 18303 | } | ||||
| 18304 | |||||
| 18305 | # For long ternary chains, | ||||
| 18306 | # if the first : we see has its # ? is in the interior | ||||
| 18307 | # of a preceding line, then see if there are any good | ||||
| 18308 | # breakpoints before the ?. | ||||
| 18309 | if ( $i_first_colon > 0 ) { | ||||
| 18310 | my $i_question = $mate_index_to_go[$i_first_colon]; | ||||
| 18311 | if ( $i_question > 0 ) { | ||||
| 18312 | my @insert_list; | ||||
| 18313 | for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { | ||||
| 18314 | my $token = $tokens_to_go[$ii]; | ||||
| 18315 | my $type = $types_to_go[$ii]; | ||||
| 18316 | |||||
| 18317 | # For now, a good break is either a comma or a 'return'. | ||||
| 18318 | if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) | ||||
| 18319 | && in_same_container( $ii, $i_question ) ) | ||||
| 18320 | { | ||||
| 18321 | push @insert_list, $ii; | ||||
| 18322 | last; | ||||
| 18323 | } | ||||
| 18324 | } | ||||
| 18325 | |||||
| 18326 | # insert any new break points | ||||
| 18327 | if (@insert_list) { | ||||
| 18328 | insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); | ||||
| 18329 | } | ||||
| 18330 | } | ||||
| 18331 | } | ||||
| 18332 | } | ||||
| 18333 | |||||
| 18334 | sub in_same_container { | ||||
| 18335 | |||||
| 18336 | # check to see if tokens at i1 and i2 are in the | ||||
| 18337 | # same container, and not separated by a comma, ? or : | ||||
| 18338 | my ( $i1, $i2 ) = @_; | ||||
| 18339 | my $type = $types_to_go[$i1]; | ||||
| 18340 | my $depth = $nesting_depth_to_go[$i1]; | ||||
| 18341 | return unless ( $nesting_depth_to_go[$i2] == $depth ); | ||||
| 18342 | if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } | ||||
| 18343 | |||||
| 18344 | ########################################################### | ||||
| 18345 | # This is potentially a very slow routine and not critical. | ||||
| 18346 | # For safety just give up for large differences. | ||||
| 18347 | # See test file 'infinite_loop.txt' | ||||
| 18348 | # TODO: replace this loop with a data structure | ||||
| 18349 | ########################################################### | ||||
| 18350 | return if ( $i2 - $i1 > 200 ); | ||||
| 18351 | |||||
| 18352 | for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { | ||||
| 18353 | next if ( $nesting_depth_to_go[$i] > $depth ); | ||||
| 18354 | return if ( $nesting_depth_to_go[$i] < $depth ); | ||||
| 18355 | |||||
| 18356 | my $tok = $tokens_to_go[$i]; | ||||
| 18357 | $tok = ',' if $tok eq '=>'; # treat => same as , | ||||
| 18358 | |||||
| 18359 | # Example: we would not want to break at any of these .'s | ||||
| 18360 | # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>" | ||||
| 18361 | if ( $type ne ':' ) { | ||||
| 18362 | return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; | ||||
| 18363 | } | ||||
| 18364 | else { | ||||
| 18365 | return if ( $tok =~ /^[\,]$/ ); | ||||
| 18366 | } | ||||
| 18367 | } | ||||
| 18368 | return 1; | ||||
| 18369 | } | ||||
| 18370 | |||||
| 18371 | sub set_continuation_breaks { | ||||
| 18372 | |||||
| 18373 | # Define an array of indexes for inserting newline characters to | ||||
| 18374 | # keep the line lengths below the maximum desired length. There is | ||||
| 18375 | # an implied break after the last token, so it need not be included. | ||||
| 18376 | |||||
| 18377 | # Method: | ||||
| 18378 | # This routine is part of series of routines which adjust line | ||||
| 18379 | # lengths. It is only called if a statement is longer than the | ||||
| 18380 | # maximum line length, or if a preliminary scanning located | ||||
| 18381 | # desirable break points. Sub scan_list has already looked at | ||||
| 18382 | # these tokens and set breakpoints (in array | ||||
| 18383 | # $forced_breakpoint_to_go[$i]) where it wants breaks (for example | ||||
| 18384 | # after commas, after opening parens, and before closing parens). | ||||
| 18385 | # This routine will honor these breakpoints and also add additional | ||||
| 18386 | # breakpoints as necessary to keep the line length below the maximum | ||||
| 18387 | # requested. It bases its decision on where the 'bond strength' is | ||||
| 18388 | # lowest. | ||||
| 18389 | |||||
| 18390 | # Output: returns references to the arrays: | ||||
| 18391 | # @i_first | ||||
| 18392 | # @i_last | ||||
| 18393 | # which contain the indexes $i of the first and last tokens on each | ||||
| 18394 | # line. | ||||
| 18395 | |||||
| 18396 | # In addition, the array: | ||||
| 18397 | # $forced_breakpoint_to_go[$i] | ||||
| 18398 | # may be updated to be =1 for any index $i after which there must be | ||||
| 18399 | # a break. This signals later routines not to undo the breakpoint. | ||||
| 18400 | |||||
| 18401 | my $saw_good_break = shift; | ||||
| 18402 | my @i_first = (); # the first index to output | ||||
| 18403 | my @i_last = (); # the last index to output | ||||
| 18404 | my @i_colon_breaks = (); # needed to decide if we have to break at ?'s | ||||
| 18405 | if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } | ||||
| 18406 | |||||
| 18407 | set_bond_strengths(); | ||||
| 18408 | |||||
| 18409 | my $imin = 0; | ||||
| 18410 | my $imax = $max_index_to_go; | ||||
| 18411 | if ( $types_to_go[$imin] eq 'b' ) { $imin++ } | ||||
| 18412 | if ( $types_to_go[$imax] eq 'b' ) { $imax-- } | ||||
| 18413 | my $i_begin = $imin; # index for starting next iteration | ||||
| 18414 | |||||
| 18415 | my $leading_spaces = leading_spaces_to_go($imin); | ||||
| 18416 | my $line_count = 0; | ||||
| 18417 | my $last_break_strength = NO_BREAK; | ||||
| 18418 | my $i_last_break = -1; | ||||
| 18419 | my $max_bias = 0.001; | ||||
| 18420 | my $tiny_bias = 0.0001; | ||||
| 18421 | my $leading_alignment_token = ""; | ||||
| 18422 | my $leading_alignment_type = ""; | ||||
| 18423 | |||||
| 18424 | # see if any ?/:'s are in order | ||||
| 18425 | my $colons_in_order = 1; | ||||
| 18426 | my $last_tok = ""; | ||||
| 18427 | my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ]; | ||||
| 18428 | my $colon_count = @colon_list; | ||||
| 18429 | foreach (@colon_list) { | ||||
| 18430 | if ( $_ eq $last_tok ) { $colons_in_order = 0; last } | ||||
| 18431 | $last_tok = $_; | ||||
| 18432 | } | ||||
| 18433 | |||||
| 18434 | # This is a sufficient but not necessary condition for colon chain | ||||
| 18435 | my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); | ||||
| 18436 | |||||
| 18437 | #------------------------------------------------------- | ||||
| 18438 | # BEGINNING of main loop to set continuation breakpoints | ||||
| 18439 | # Keep iterating until we reach the end | ||||
| 18440 | #------------------------------------------------------- | ||||
| 18441 | while ( $i_begin <= $imax ) { | ||||
| 18442 | my $lowest_strength = NO_BREAK; | ||||
| 18443 | my $starting_sum = $summed_lengths_to_go[$i_begin]; | ||||
| 18444 | my $i_lowest = -1; | ||||
| 18445 | my $i_test = -1; | ||||
| 18446 | my $lowest_next_token = ''; | ||||
| 18447 | my $lowest_next_type = 'b'; | ||||
| 18448 | my $i_lowest_next_nonblank = -1; | ||||
| 18449 | |||||
| 18450 | #------------------------------------------------------- | ||||
| 18451 | # BEGINNING of inner loop to find the best next breakpoint | ||||
| 18452 | #------------------------------------------------------- | ||||
| 18453 | for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { | ||||
| 18454 | my $type = $types_to_go[$i_test]; | ||||
| 18455 | my $token = $tokens_to_go[$i_test]; | ||||
| 18456 | my $next_type = $types_to_go[ $i_test + 1 ]; | ||||
| 18457 | my $next_token = $tokens_to_go[ $i_test + 1 ]; | ||||
| 18458 | my $i_next_nonblank = $inext_to_go[$i_test]; | ||||
| 18459 | my $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
| 18460 | my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
| 18461 | my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; | ||||
| 18462 | my $strength = $bond_strength_to_go[$i_test]; | ||||
| 18463 | my $maximum_line_length = maximum_line_length($i_begin); | ||||
| 18464 | |||||
| 18465 | # use old breaks as a tie-breaker. For example to | ||||
| 18466 | # prevent blinkers with -pbp in this code: | ||||
| 18467 | |||||
| 18468 | ##@keywords{ | ||||
| 18469 | ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} | ||||
| 18470 | ## = (); | ||||
| 18471 | |||||
| 18472 | # At the same time try to prevent a leading * in this code | ||||
| 18473 | # with the default formatting: | ||||
| 18474 | # | ||||
| 18475 | ## return | ||||
| 18476 | ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) | ||||
| 18477 | ## * ( $x**( $a - 1 ) ) | ||||
| 18478 | ## * ( ( 1 - $x )**( $b - 1 ) ); | ||||
| 18479 | |||||
| 18480 | # reduce strength a bit to break ties at an old breakpoint ... | ||||
| 18481 | if ( | ||||
| 18482 | $old_breakpoint_to_go[$i_test] | ||||
| 18483 | |||||
| 18484 | # which is a 'good' breakpoint, meaning ... | ||||
| 18485 | # we don't want to break before it | ||||
| 18486 | && !$want_break_before{$type} | ||||
| 18487 | |||||
| 18488 | # and either we want to break before the next token | ||||
| 18489 | # or the next token is not short (i.e. not a '*', '/' etc.) | ||||
| 18490 | && $i_next_nonblank <= $imax | ||||
| 18491 | && ( $want_break_before{$next_nonblank_type} | ||||
| 18492 | || $token_lengths_to_go[$i_next_nonblank] > 2 | ||||
| 18493 | || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) | ||||
| 18494 | ) | ||||
| 18495 | { | ||||
| 18496 | $strength -= $tiny_bias; | ||||
| 18497 | } | ||||
| 18498 | |||||
| 18499 | # otherwise increase strength a bit if this token would be at the | ||||
| 18500 | # maximum line length. This is necessary to avoid blinking | ||||
| 18501 | # in the above example when the -iob flag is added. | ||||
| 18502 | else { | ||||
| 18503 | my $len = | ||||
| 18504 | $leading_spaces + | ||||
| 18505 | $summed_lengths_to_go[ $i_test + 1 ] - | ||||
| 18506 | $starting_sum; | ||||
| 18507 | if ( $len >= $maximum_line_length ) { | ||||
| 18508 | $strength += $tiny_bias; | ||||
| 18509 | } | ||||
| 18510 | } | ||||
| 18511 | |||||
| 18512 | my $must_break = 0; | ||||
| 18513 | |||||
| 18514 | # Force an immediate break at certain operators | ||||
| 18515 | # with lower level than the start of the line, | ||||
| 18516 | # unless we've already seen a better break. | ||||
| 18517 | # | ||||
| 18518 | ############################################## | ||||
| 18519 | # Note on an issue with a preceding ? | ||||
| 18520 | ############################################## | ||||
| 18521 | # We don't include a ? in the above list, but there may | ||||
| 18522 | # be a break at a previous ? if the line is long. | ||||
| 18523 | # Because of this we do not want to force a break if | ||||
| 18524 | # there is a previous ? on this line. For now the best way | ||||
| 18525 | # to do this is to not break if we have seen a lower strength | ||||
| 18526 | # point, which is probably a ?. | ||||
| 18527 | # | ||||
| 18528 | # Example of unwanted breaks we are avoiding at a '.' following a ? | ||||
| 18529 | # from pod2html using perltidy -gnu: | ||||
| 18530 | # ) | ||||
| 18531 | # ? "\n<A NAME=\"" | ||||
| 18532 | # . $value | ||||
| 18533 | # . "\">\n$text</A>\n" | ||||
| 18534 | # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; | ||||
| 18535 | if ( | ||||
| 18536 | ( | ||||
| 18537 | $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ | ||||
| 18538 | || ( $next_nonblank_type eq 'k' | ||||
| 18539 | && $next_nonblank_token =~ /^(and|or)$/ ) | ||||
| 18540 | ) | ||||
| 18541 | && ( $nesting_depth_to_go[$i_begin] > | ||||
| 18542 | $nesting_depth_to_go[$i_next_nonblank] ) | ||||
| 18543 | && ( $strength <= $lowest_strength ) | ||||
| 18544 | ) | ||||
| 18545 | { | ||||
| 18546 | set_forced_breakpoint($i_next_nonblank); | ||||
| 18547 | } | ||||
| 18548 | |||||
| 18549 | if ( | ||||
| 18550 | |||||
| 18551 | # Try to put a break where requested by scan_list | ||||
| 18552 | $forced_breakpoint_to_go[$i_test] | ||||
| 18553 | |||||
| 18554 | # break between ) { in a continued line so that the '{' can | ||||
| 18555 | # be outdented | ||||
| 18556 | # See similar logic in scan_list which catches instances | ||||
| 18557 | # where a line is just something like ') {'. We have to | ||||
| 18558 | # be careful because the corresponding block keyword might | ||||
| 18559 | # not be on the first line, such as 'for' here: | ||||
| 18560 | # | ||||
| 18561 | # eval { | ||||
| 18562 | # for ("a") { | ||||
| 18563 | # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } | ||||
| 18564 | # } | ||||
| 18565 | # }; | ||||
| 18566 | # | ||||
| 18567 | || ( $line_count | ||||
| 18568 | && ( $token eq ')' ) | ||||
| 18569 | && ( $next_nonblank_type eq '{' ) | ||||
| 18570 | && ($next_nonblank_block_type) | ||||
| 18571 | && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) | ||||
| 18572 | && !$rOpts->{'opening-brace-always-on-right'} ) | ||||
| 18573 | |||||
| 18574 | # There is an implied forced break at a terminal opening brace | ||||
| 18575 | || ( ( $type eq '{' ) && ( $i_test == $imax ) ) | ||||
| 18576 | ) | ||||
| 18577 | { | ||||
| 18578 | |||||
| 18579 | # Forced breakpoints must sometimes be overridden, for example | ||||
| 18580 | # because of a side comment causing a NO_BREAK. It is easier | ||||
| 18581 | # to catch this here than when they are set. | ||||
| 18582 | if ( $strength < NO_BREAK - 1 ) { | ||||
| 18583 | $strength = $lowest_strength - $tiny_bias; | ||||
| 18584 | $must_break = 1; | ||||
| 18585 | } | ||||
| 18586 | } | ||||
| 18587 | |||||
| 18588 | # quit if a break here would put a good terminal token on | ||||
| 18589 | # the next line and we already have a possible break | ||||
| 18590 | if ( | ||||
| 18591 | !$must_break | ||||
| 18592 | && ( $next_nonblank_type =~ /^[\;\,]$/ ) | ||||
| 18593 | && ( | ||||
| 18594 | ( | ||||
| 18595 | $leading_spaces + | ||||
| 18596 | $summed_lengths_to_go[ $i_next_nonblank + 1 ] - | ||||
| 18597 | $starting_sum | ||||
| 18598 | ) > $maximum_line_length | ||||
| 18599 | ) | ||||
| 18600 | ) | ||||
| 18601 | { | ||||
| 18602 | last if ( $i_lowest >= 0 ); | ||||
| 18603 | } | ||||
| 18604 | |||||
| 18605 | # Avoid a break which would strand a single punctuation | ||||
| 18606 | # token. For example, we do not want to strand a leading | ||||
| 18607 | # '.' which is followed by a long quoted string. | ||||
| 18608 | # But note that we do want to do this with -extrude (l=1) | ||||
| 18609 | # so please test any changes to this code on -extrude. | ||||
| 18610 | if ( | ||||
| 18611 | !$must_break | ||||
| 18612 | && ( $i_test == $i_begin ) | ||||
| 18613 | && ( $i_test < $imax ) | ||||
| 18614 | && ( $token eq $type ) | ||||
| 18615 | && ( | ||||
| 18616 | ( | ||||
| 18617 | $leading_spaces + | ||||
| 18618 | $summed_lengths_to_go[ $i_test + 1 ] - | ||||
| 18619 | $starting_sum | ||||
| 18620 | ) < $maximum_line_length | ||||
| 18621 | ) | ||||
| 18622 | ) | ||||
| 18623 | { | ||||
| 18624 | $i_test = min( $imax, $inext_to_go[$i_test] ); | ||||
| 18625 | redo; | ||||
| 18626 | } | ||||
| 18627 | |||||
| 18628 | if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) | ||||
| 18629 | { | ||||
| 18630 | |||||
| 18631 | # break at previous best break if it would have produced | ||||
| 18632 | # a leading alignment of certain common tokens, and it | ||||
| 18633 | # is different from the latest candidate break | ||||
| 18634 | last | ||||
| 18635 | if ($leading_alignment_type); | ||||
| 18636 | |||||
| 18637 | # Force at least one breakpoint if old code had good | ||||
| 18638 | # break It is only called if a breakpoint is required or | ||||
| 18639 | # desired. This will probably need some adjustments | ||||
| 18640 | # over time. A goal is to try to be sure that, if a new | ||||
| 18641 | # side comment is introduced into formatted text, then | ||||
| 18642 | # the same breakpoints will occur. scbreak.t | ||||
| 18643 | last | ||||
| 18644 | if ( | ||||
| 18645 | $i_test == $imax # we are at the end | ||||
| 18646 | && !$forced_breakpoint_count # | ||||
| 18647 | && $saw_good_break # old line had good break | ||||
| 18648 | && $type =~ /^[#;\{]$/ # and this line ends in | ||||
| 18649 | # ';' or side comment | ||||
| 18650 | && $i_last_break < 0 # and we haven't made a break | ||||
| 18651 | && $i_lowest >= 0 # and we saw a possible break | ||||
| 18652 | && $i_lowest < $imax - 1 # (but not just before this ;) | ||||
| 18653 | && $strength - $lowest_strength < 0.5 * WEAK # and it's good | ||||
| 18654 | ); | ||||
| 18655 | |||||
| 18656 | # Do not skip past an important break point in a short final | ||||
| 18657 | # segment. For example, without this check we would miss the | ||||
| 18658 | # break at the final / in the following code: | ||||
| 18659 | # | ||||
| 18660 | # $depth_stop = | ||||
| 18661 | # ( $tau * $mass_pellet * $q_0 * | ||||
| 18662 | # ( 1. - exp( -$t_stop / $tau ) ) - | ||||
| 18663 | # 4. * $pi * $factor * $k_ice * | ||||
| 18664 | # ( $t_melt - $t_ice ) * | ||||
| 18665 | # $r_pellet * | ||||
| 18666 | # $t_stop ) / | ||||
| 18667 | # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); | ||||
| 18668 | # | ||||
| 18669 | if ( $line_count > 2 | ||||
| 18670 | && $i_lowest < $i_test | ||||
| 18671 | && $i_test > $imax - 2 | ||||
| 18672 | && $nesting_depth_to_go[$i_begin] > | ||||
| 18673 | $nesting_depth_to_go[$i_lowest] | ||||
| 18674 | && $lowest_strength < $last_break_strength - .5 * WEAK ) | ||||
| 18675 | { | ||||
| 18676 | # Make this break for math operators for now | ||||
| 18677 | my $ir = $inext_to_go[$i_lowest]; | ||||
| 18678 | my $il = $iprev_to_go[$ir]; | ||||
| 18679 | last | ||||
| 18680 | if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ | ||||
| 18681 | || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); | ||||
| 18682 | } | ||||
| 18683 | |||||
| 18684 | # Update the minimum bond strength location | ||||
| 18685 | $lowest_strength = $strength; | ||||
| 18686 | $i_lowest = $i_test; | ||||
| 18687 | $lowest_next_token = $next_nonblank_token; | ||||
| 18688 | $lowest_next_type = $next_nonblank_type; | ||||
| 18689 | $i_lowest_next_nonblank = $i_next_nonblank; | ||||
| 18690 | last if $must_break; | ||||
| 18691 | |||||
| 18692 | # set flags to remember if a break here will produce a | ||||
| 18693 | # leading alignment of certain common tokens | ||||
| 18694 | if ( $line_count > 0 | ||||
| 18695 | && $i_test < $imax | ||||
| 18696 | && ( $lowest_strength - $last_break_strength <= $max_bias ) | ||||
| 18697 | ) | ||||
| 18698 | { | ||||
| 18699 | my $i_last_end = $iprev_to_go[$i_begin]; | ||||
| 18700 | my $tok_beg = $tokens_to_go[$i_begin]; | ||||
| 18701 | my $type_beg = $types_to_go[$i_begin]; | ||||
| 18702 | if ( | ||||
| 18703 | |||||
| 18704 | # check for leading alignment of certain tokens | ||||
| 18705 | ( | ||||
| 18706 | $tok_beg eq $next_nonblank_token | ||||
| 18707 | && $is_chain_operator{$tok_beg} | ||||
| 18708 | && ( $type_beg eq 'k' | ||||
| 18709 | || $type_beg eq $tok_beg ) | ||||
| 18710 | && $nesting_depth_to_go[$i_begin] >= | ||||
| 18711 | $nesting_depth_to_go[$i_next_nonblank] | ||||
| 18712 | ) | ||||
| 18713 | |||||
| 18714 | || ( $tokens_to_go[$i_last_end] eq $token | ||||
| 18715 | && $is_chain_operator{$token} | ||||
| 18716 | && ( $type eq 'k' || $type eq $token ) | ||||
| 18717 | && $nesting_depth_to_go[$i_last_end] >= | ||||
| 18718 | $nesting_depth_to_go[$i_test] ) | ||||
| 18719 | ) | ||||
| 18720 | { | ||||
| 18721 | $leading_alignment_token = $next_nonblank_token; | ||||
| 18722 | $leading_alignment_type = $next_nonblank_type; | ||||
| 18723 | } | ||||
| 18724 | } | ||||
| 18725 | } | ||||
| 18726 | |||||
| 18727 | my $too_long = ( $i_test >= $imax ); | ||||
| 18728 | if ( !$too_long ) { | ||||
| 18729 | my $next_length = | ||||
| 18730 | $leading_spaces + | ||||
| 18731 | $summed_lengths_to_go[ $i_test + 2 ] - | ||||
| 18732 | $starting_sum; | ||||
| 18733 | $too_long = $next_length > $maximum_line_length; | ||||
| 18734 | |||||
| 18735 | # To prevent blinkers we will avoid leaving a token exactly at | ||||
| 18736 | # the line length limit unless it is the last token or one of | ||||
| 18737 | # several "good" types. | ||||
| 18738 | # | ||||
| 18739 | # The following code was a blinker with -pbp before this | ||||
| 18740 | # modification: | ||||
| 18741 | ## $last_nonblank_token eq '(' | ||||
| 18742 | ## && $is_indirect_object_taker{ $paren_type | ||||
| 18743 | ## [$paren_depth] } | ||||
| 18744 | # The issue causing the problem is that if the | ||||
| 18745 | # term [$paren_depth] gets broken across a line then | ||||
| 18746 | # the whitespace routine doesn't see both opening and closing | ||||
| 18747 | # brackets and will format like '[ $paren_depth ]'. This | ||||
| 18748 | # leads to an oscillation in length depending if we break | ||||
| 18749 | # before the closing bracket or not. | ||||
| 18750 | if ( !$too_long | ||||
| 18751 | && $i_test + 1 < $imax | ||||
| 18752 | && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) | ||||
| 18753 | { | ||||
| 18754 | $too_long = $next_length >= $maximum_line_length; | ||||
| 18755 | } | ||||
| 18756 | } | ||||
| 18757 | |||||
| 18758 | FORMATTER_DEBUG_FLAG_BREAK | ||||
| 18759 | && do { | ||||
| 18760 | my $ltok = $token; | ||||
| 18761 | my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; | ||||
| 18762 | my $i_testp2 = $i_test + 2; | ||||
| 18763 | if ( $i_testp2 > $max_index_to_go + 1 ) { | ||||
| 18764 | $i_testp2 = $max_index_to_go + 1; | ||||
| 18765 | } | ||||
| 18766 | if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } | ||||
| 18767 | if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } | ||||
| 18768 | print STDOUT | ||||
| 18769 | "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; | ||||
| 18770 | }; | ||||
| 18771 | |||||
| 18772 | # allow one extra terminal token after exceeding line length | ||||
| 18773 | # if it would strand this token. | ||||
| 18774 | if ( $rOpts_fuzzy_line_length | ||||
| 18775 | && $too_long | ||||
| 18776 | && $i_lowest == $i_test | ||||
| 18777 | && $token_lengths_to_go[$i_test] > 1 | ||||
| 18778 | && $next_nonblank_type =~ /^[\;\,]$/ ) | ||||
| 18779 | { | ||||
| 18780 | $too_long = 0; | ||||
| 18781 | } | ||||
| 18782 | |||||
| 18783 | last | ||||
| 18784 | if ( | ||||
| 18785 | ( $i_test == $imax ) # we're done if no more tokens, | ||||
| 18786 | || ( | ||||
| 18787 | ( $i_lowest >= 0 ) # or no more space and we have a break | ||||
| 18788 | && $too_long | ||||
| 18789 | ) | ||||
| 18790 | ); | ||||
| 18791 | } | ||||
| 18792 | |||||
| 18793 | #------------------------------------------------------- | ||||
| 18794 | # END of inner loop to find the best next breakpoint | ||||
| 18795 | # Now decide exactly where to put the breakpoint | ||||
| 18796 | #------------------------------------------------------- | ||||
| 18797 | |||||
| 18798 | # it's always ok to break at imax if no other break was found | ||||
| 18799 | if ( $i_lowest < 0 ) { $i_lowest = $imax } | ||||
| 18800 | |||||
| 18801 | # semi-final index calculation | ||||
| 18802 | my $i_next_nonblank = $inext_to_go[$i_lowest]; | ||||
| 18803 | my $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
| 18804 | my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
| 18805 | |||||
| 18806 | #------------------------------------------------------- | ||||
| 18807 | # ?/: rule 1 : if a break here will separate a '?' on this | ||||
| 18808 | # line from its closing ':', then break at the '?' instead. | ||||
| 18809 | #------------------------------------------------------- | ||||
| 18810 | my $i; | ||||
| 18811 | foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { | ||||
| 18812 | next unless ( $tokens_to_go[$i] eq '?' ); | ||||
| 18813 | |||||
| 18814 | # do not break if probable sequence of ?/: statements | ||||
| 18815 | next if ($is_colon_chain); | ||||
| 18816 | |||||
| 18817 | # do not break if statement is broken by side comment | ||||
| 18818 | next | ||||
| 18819 | if ( | ||||
| 18820 | $tokens_to_go[$max_index_to_go] eq '#' | ||||
| 18821 | && terminal_type( \@types_to_go, \@block_type_to_go, 0, | ||||
| 18822 | $max_index_to_go ) !~ /^[\;\}]$/ | ||||
| 18823 | ); | ||||
| 18824 | |||||
| 18825 | # no break needed if matching : is also on the line | ||||
| 18826 | next | ||||
| 18827 | if ( $mate_index_to_go[$i] >= 0 | ||||
| 18828 | && $mate_index_to_go[$i] <= $i_next_nonblank ); | ||||
| 18829 | |||||
| 18830 | $i_lowest = $i; | ||||
| 18831 | if ( $want_break_before{'?'} ) { $i_lowest-- } | ||||
| 18832 | last; | ||||
| 18833 | } | ||||
| 18834 | |||||
| 18835 | #------------------------------------------------------- | ||||
| 18836 | # END of inner loop to find the best next breakpoint: | ||||
| 18837 | # Break the line after the token with index i=$i_lowest | ||||
| 18838 | #------------------------------------------------------- | ||||
| 18839 | |||||
| 18840 | # final index calculation | ||||
| 18841 | $i_next_nonblank = $inext_to_go[$i_lowest]; | ||||
| 18842 | $next_nonblank_type = $types_to_go[$i_next_nonblank]; | ||||
| 18843 | $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; | ||||
| 18844 | |||||
| 18845 | FORMATTER_DEBUG_FLAG_BREAK | ||||
| 18846 | && print STDOUT | ||||
| 18847 | "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; | ||||
| 18848 | |||||
| 18849 | #------------------------------------------------------- | ||||
| 18850 | # ?/: rule 2 : if we break at a '?', then break at its ':' | ||||
| 18851 | # | ||||
| 18852 | # Note: this rule is also in sub scan_list to handle a break | ||||
| 18853 | # at the start and end of a line (in case breaks are dictated | ||||
| 18854 | # by side comments). | ||||
| 18855 | #------------------------------------------------------- | ||||
| 18856 | if ( $next_nonblank_type eq '?' ) { | ||||
| 18857 | set_closing_breakpoint($i_next_nonblank); | ||||
| 18858 | } | ||||
| 18859 | elsif ( $types_to_go[$i_lowest] eq '?' ) { | ||||
| 18860 | set_closing_breakpoint($i_lowest); | ||||
| 18861 | } | ||||
| 18862 | |||||
| 18863 | #------------------------------------------------------- | ||||
| 18864 | # ?/: rule 3 : if we break at a ':' then we save | ||||
| 18865 | # its location for further work below. We may need to go | ||||
| 18866 | # back and break at its '?'. | ||||
| 18867 | #------------------------------------------------------- | ||||
| 18868 | if ( $next_nonblank_type eq ':' ) { | ||||
| 18869 | push @i_colon_breaks, $i_next_nonblank; | ||||
| 18870 | } | ||||
| 18871 | elsif ( $types_to_go[$i_lowest] eq ':' ) { | ||||
| 18872 | push @i_colon_breaks, $i_lowest; | ||||
| 18873 | } | ||||
| 18874 | |||||
| 18875 | # here we should set breaks for all '?'/':' pairs which are | ||||
| 18876 | # separated by this line | ||||
| 18877 | |||||
| 18878 | $line_count++; | ||||
| 18879 | |||||
| 18880 | # save this line segment, after trimming blanks at the ends | ||||
| 18881 | push( @i_first, | ||||
| 18882 | ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); | ||||
| 18883 | push( @i_last, | ||||
| 18884 | ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); | ||||
| 18885 | |||||
| 18886 | # set a forced breakpoint at a container opening, if necessary, to | ||||
| 18887 | # signal a break at a closing container. Excepting '(' for now. | ||||
| 18888 | if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ | ||||
| 18889 | && !$forced_breakpoint_to_go[$i_lowest] ) | ||||
| 18890 | { | ||||
| 18891 | set_closing_breakpoint($i_lowest); | ||||
| 18892 | } | ||||
| 18893 | |||||
| 18894 | # get ready to go again | ||||
| 18895 | $i_begin = $i_lowest + 1; | ||||
| 18896 | $last_break_strength = $lowest_strength; | ||||
| 18897 | $i_last_break = $i_lowest; | ||||
| 18898 | $leading_alignment_token = ""; | ||||
| 18899 | $leading_alignment_type = ""; | ||||
| 18900 | $lowest_next_token = ''; | ||||
| 18901 | $lowest_next_type = 'b'; | ||||
| 18902 | |||||
| 18903 | if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { | ||||
| 18904 | $i_begin++; | ||||
| 18905 | } | ||||
| 18906 | |||||
| 18907 | # update indentation size | ||||
| 18908 | if ( $i_begin <= $imax ) { | ||||
| 18909 | $leading_spaces = leading_spaces_to_go($i_begin); | ||||
| 18910 | } | ||||
| 18911 | } | ||||
| 18912 | |||||
| 18913 | #------------------------------------------------------- | ||||
| 18914 | # END of main loop to set continuation breakpoints | ||||
| 18915 | # Now go back and make any necessary corrections | ||||
| 18916 | #------------------------------------------------------- | ||||
| 18917 | |||||
| 18918 | #------------------------------------------------------- | ||||
| 18919 | # ?/: rule 4 -- if we broke at a ':', then break at | ||||
| 18920 | # corresponding '?' unless this is a chain of ?: expressions | ||||
| 18921 | #------------------------------------------------------- | ||||
| 18922 | if (@i_colon_breaks) { | ||||
| 18923 | |||||
| 18924 | # using a simple method for deciding if we are in a ?/: chain -- | ||||
| 18925 | # this is a chain if it has multiple ?/: pairs all in order; | ||||
| 18926 | # otherwise not. | ||||
| 18927 | # Note that if line starts in a ':' we count that above as a break | ||||
| 18928 | my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); | ||||
| 18929 | |||||
| 18930 | unless ($is_chain) { | ||||
| 18931 | my @insert_list = (); | ||||
| 18932 | foreach (@i_colon_breaks) { | ||||
| 18933 | my $i_question = $mate_index_to_go[$_]; | ||||
| 18934 | if ( $i_question >= 0 ) { | ||||
| 18935 | if ( $want_break_before{'?'} ) { | ||||
| 18936 | $i_question = $iprev_to_go[$i_question]; | ||||
| 18937 | } | ||||
| 18938 | |||||
| 18939 | if ( $i_question >= 0 ) { | ||||
| 18940 | push @insert_list, $i_question; | ||||
| 18941 | } | ||||
| 18942 | } | ||||
| 18943 | insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); | ||||
| 18944 | } | ||||
| 18945 | } | ||||
| 18946 | } | ||||
| 18947 | return ( \@i_first, \@i_last, $colon_count ); | ||||
| 18948 | } | ||||
| 18949 | |||||
| 18950 | sub insert_additional_breaks { | ||||
| 18951 | |||||
| 18952 | # this routine will add line breaks at requested locations after | ||||
| 18953 | # sub set_continuation_breaks has made preliminary breaks. | ||||
| 18954 | |||||
| 18955 | my ( $ri_break_list, $ri_first, $ri_last ) = @_; | ||||
| 18956 | my $i_f; | ||||
| 18957 | my $i_l; | ||||
| 18958 | my $line_number = 0; | ||||
| 18959 | my $i_break_left; | ||||
| 18960 | foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) { | ||||
| 18961 | |||||
| 18962 | $i_f = $$ri_first[$line_number]; | ||||
| 18963 | $i_l = $$ri_last[$line_number]; | ||||
| 18964 | while ( $i_break_left >= $i_l ) { | ||||
| 18965 | $line_number++; | ||||
| 18966 | |||||
| 18967 | # shouldn't happen unless caller passes bad indexes | ||||
| 18968 | if ( $line_number >= @$ri_last ) { | ||||
| 18969 | warning( | ||||
| 18970 | "Non-fatal program bug: couldn't set break at $i_break_left\n" | ||||
| 18971 | ); | ||||
| 18972 | report_definite_bug(); | ||||
| 18973 | return; | ||||
| 18974 | } | ||||
| 18975 | $i_f = $$ri_first[$line_number]; | ||||
| 18976 | $i_l = $$ri_last[$line_number]; | ||||
| 18977 | } | ||||
| 18978 | |||||
| 18979 | # Do not leave a blank at the end of a line; back up if necessary | ||||
| 18980 | if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } | ||||
| 18981 | |||||
| 18982 | my $i_break_right = $inext_to_go[$i_break_left]; | ||||
| 18983 | if ( $i_break_left >= $i_f | ||||
| 18984 | && $i_break_left < $i_l | ||||
| 18985 | && $i_break_right > $i_f | ||||
| 18986 | && $i_break_right <= $i_l ) | ||||
| 18987 | { | ||||
| 18988 | splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); | ||||
| 18989 | splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); | ||||
| 18990 | } | ||||
| 18991 | } | ||||
| 18992 | } | ||||
| 18993 | |||||
| 18994 | sub set_closing_breakpoint { | ||||
| 18995 | |||||
| 18996 | # set a breakpoint at a matching closing token | ||||
| 18997 | # at present, this is only used to break at a ':' which matches a '?' | ||||
| 18998 | my $i_break = shift; | ||||
| 18999 | |||||
| 19000 | if ( $mate_index_to_go[$i_break] >= 0 ) { | ||||
| 19001 | |||||
| 19002 | # CAUTION: infinite recursion possible here: | ||||
| 19003 | # set_closing_breakpoint calls set_forced_breakpoint, and | ||||
| 19004 | # set_forced_breakpoint call set_closing_breakpoint | ||||
| 19005 | # ( test files attrib.t, BasicLyx.pm.html). | ||||
| 19006 | # Don't reduce the '2' in the statement below | ||||
| 19007 | if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { | ||||
| 19008 | |||||
| 19009 | # break before } ] and ), but sub set_forced_breakpoint will decide | ||||
| 19010 | # to break before or after a ? and : | ||||
| 19011 | my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; | ||||
| 19012 | set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); | ||||
| 19013 | } | ||||
| 19014 | } | ||||
| 19015 | else { | ||||
| 19016 | my $type_sequence = $type_sequence_to_go[$i_break]; | ||||
| 19017 | if ($type_sequence) { | ||||
| 19018 | my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; | ||||
| 19019 | $postponed_breakpoint{$type_sequence} = 1; | ||||
| 19020 | } | ||||
| 19021 | } | ||||
| 19022 | } | ||||
| 19023 | |||||
| 19024 | sub compare_indentation_levels { | ||||
| 19025 | |||||
| 19026 | # check to see if output line tabbing agrees with input line | ||||
| 19027 | # this can be very useful for debugging a script which has an extra | ||||
| 19028 | # or missing brace | ||||
| 19029 | my ( $guessed_indentation_level, $structural_indentation_level ) = @_; | ||||
| 19030 | if ( $guessed_indentation_level ne $structural_indentation_level ) { | ||||
| 19031 | $last_tabbing_disagreement = $input_line_number; | ||||
| 19032 | |||||
| 19033 | if ($in_tabbing_disagreement) { | ||||
| 19034 | } | ||||
| 19035 | else { | ||||
| 19036 | $tabbing_disagreement_count++; | ||||
| 19037 | |||||
| 19038 | if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { | ||||
| 19039 | write_logfile_entry( | ||||
| 19040 | "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" | ||||
| 19041 | ); | ||||
| 19042 | } | ||||
| 19043 | $in_tabbing_disagreement = $input_line_number; | ||||
| 19044 | $first_tabbing_disagreement = $in_tabbing_disagreement | ||||
| 19045 | unless ($first_tabbing_disagreement); | ||||
| 19046 | } | ||||
| 19047 | } | ||||
| 19048 | else { | ||||
| 19049 | |||||
| 19050 | if ($in_tabbing_disagreement) { | ||||
| 19051 | |||||
| 19052 | if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { | ||||
| 19053 | write_logfile_entry( | ||||
| 19054 | "End indentation disagreement from input line $in_tabbing_disagreement\n" | ||||
| 19055 | ); | ||||
| 19056 | |||||
| 19057 | if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { | ||||
| 19058 | write_logfile_entry( | ||||
| 19059 | "No further tabbing disagreements will be noted\n"); | ||||
| 19060 | } | ||||
| 19061 | } | ||||
| 19062 | $in_tabbing_disagreement = 0; | ||||
| 19063 | } | ||||
| 19064 | } | ||||
| 19065 | } | ||||
| 19066 | |||||
| 19067 | ##################################################################### | ||||
| 19068 | # | ||||
| 19069 | # the Perl::Tidy::IndentationItem class supplies items which contain | ||||
| 19070 | # how much whitespace should be used at the start of a line | ||||
| 19071 | # | ||||
| 19072 | ##################################################################### | ||||
| 19073 | |||||
| 19074 | package Perl::Tidy::IndentationItem; | ||||
| 19075 | |||||
| 19076 | # Indexes for indentation items | ||||
| 19077 | 2 | 28µs | 2 | 133µs | # spent 72µs (11+61) within Perl::Tidy::IndentationItem::BEGIN@19077 which was called:
# once (11µs+61µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19077 # spent 72µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19077
# spent 61µs making 1 call to constant::import |
| 19078 | 2 | 23µs | 2 | 72µs | # spent 39µs (7+33) within Perl::Tidy::IndentationItem::BEGIN@19078 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19078 # spent 39µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19078
# spent 32µs making 1 call to constant::import |
| 19079 | 2 | 22µs | 2 | 67µs | # spent 37µs (7+30) within Perl::Tidy::IndentationItem::BEGIN@19079 which was called:
# once (7µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19079 # spent 37µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19079
# spent 30µs making 1 call to constant::import |
| 19080 | 2 | 21µs | 2 | 74µs | # spent 40µs (6+34) within Perl::Tidy::IndentationItem::BEGIN@19080 which was called:
# once (6µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19080 # spent 40µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19080
# spent 34µs making 1 call to constant::import |
| 19081 | # for this level | ||||
| 19082 | 2 | 24µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19082 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19082 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19082
# spent 29µs making 1 call to constant::import |
| 19083 | 2 | 21µs | 2 | 65µs | # spent 36µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19083 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19083 # spent 36µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19083
# spent 29µs making 1 call to constant::import |
| 19084 | 2 | 20µs | 2 | 65µs | # spent 36µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19084 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19084 # spent 36µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19084
# spent 29µs making 1 call to constant::import |
| 19085 | 2 | 20µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19085 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19085 # spent 34µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19085
# spent 28µs making 1 call to constant::import |
| 19086 | 2 | 20µs | 2 | 63µs | # spent 35µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19086 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19086 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19086
# spent 28µs making 1 call to constant::import |
| 19087 | 2 | 35µs | 2 | 90µs | # spent 48µs (6+42) within Perl::Tidy::IndentationItem::BEGIN@19087 which was called:
# once (6µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19087 # spent 48µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19087
# spent 42µs making 1 call to constant::import |
| 19088 | # we would like to move to get | ||||
| 19089 | # alignment (negative if left) | ||||
| 19090 | 2 | 31µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::IndentationItem::BEGIN@19090 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19090 # spent 36µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19090
# spent 30µs making 1 call to constant::import |
| 19091 | # with an opening structure? | ||||
| 19092 | 2 | 29µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19092 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19092 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19092
# spent 29µs making 1 call to constant::import |
| 19093 | 2 | 19µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19093 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19093 # spent 34µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19093
# spent 28µs making 1 call to constant::import |
| 19094 | 2 | 23µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::IndentationItem::BEGIN@19094 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19094 # spent 34µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19094
# spent 28µs making 1 call to constant::import |
| 19095 | 2 | 926µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::IndentationItem::BEGIN@19095 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19095 # spent 35µs making 1 call to Perl::Tidy::IndentationItem::BEGIN@19095
# spent 29µs making 1 call to constant::import |
| 19096 | |||||
| 19097 | sub new { | ||||
| 19098 | |||||
| 19099 | # Create an 'indentation_item' which describes one level of leading | ||||
| 19100 | # whitespace when the '-lp' indentation is used. We return | ||||
| 19101 | # a reference to an anonymous array of associated variables. | ||||
| 19102 | # See above constants for storage scheme. | ||||
| 19103 | my ( | ||||
| 19104 | $class, $spaces, $level, | ||||
| 19105 | $ci_level, $available_spaces, $index, | ||||
| 19106 | $gnu_sequence_number, $align_paren, $stack_depth, | ||||
| 19107 | $starting_index, | ||||
| 19108 | ) = @_; | ||||
| 19109 | my $closed = -1; | ||||
| 19110 | my $arrow_count = 0; | ||||
| 19111 | my $comma_count = 0; | ||||
| 19112 | my $have_child = 0; | ||||
| 19113 | my $want_right_spaces = 0; | ||||
| 19114 | my $marked = 0; | ||||
| 19115 | bless [ | ||||
| 19116 | $spaces, $level, $ci_level, | ||||
| 19117 | $available_spaces, $closed, $comma_count, | ||||
| 19118 | $gnu_sequence_number, $index, $have_child, | ||||
| 19119 | $want_right_spaces, $align_paren, $marked, | ||||
| 19120 | $stack_depth, $starting_index, $arrow_count, | ||||
| 19121 | ], $class; | ||||
| 19122 | } | ||||
| 19123 | |||||
| 19124 | sub permanently_decrease_AVAILABLE_SPACES { | ||||
| 19125 | |||||
| 19126 | # make a permanent reduction in the available indentation spaces | ||||
| 19127 | # at one indentation item. NOTE: if there are child nodes, their | ||||
| 19128 | # total SPACES must be reduced by the caller. | ||||
| 19129 | |||||
| 19130 | my ( $item, $spaces_needed ) = @_; | ||||
| 19131 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
| 19132 | my $deleted_spaces = | ||||
| 19133 | ( $available_spaces > $spaces_needed ) | ||||
| 19134 | ? $spaces_needed | ||||
| 19135 | : $available_spaces; | ||||
| 19136 | $item->decrease_AVAILABLE_SPACES($deleted_spaces); | ||||
| 19137 | $item->decrease_SPACES($deleted_spaces); | ||||
| 19138 | $item->set_RECOVERABLE_SPACES(0); | ||||
| 19139 | |||||
| 19140 | return $deleted_spaces; | ||||
| 19141 | } | ||||
| 19142 | |||||
| 19143 | sub tentatively_decrease_AVAILABLE_SPACES { | ||||
| 19144 | |||||
| 19145 | # We are asked to tentatively delete $spaces_needed of indentation | ||||
| 19146 | # for a indentation item. We may want to undo this later. NOTE: if | ||||
| 19147 | # there are child nodes, their total SPACES must be reduced by the | ||||
| 19148 | # caller. | ||||
| 19149 | my ( $item, $spaces_needed ) = @_; | ||||
| 19150 | my $available_spaces = $item->get_AVAILABLE_SPACES(); | ||||
| 19151 | my $deleted_spaces = | ||||
| 19152 | ( $available_spaces > $spaces_needed ) | ||||
| 19153 | ? $spaces_needed | ||||
| 19154 | : $available_spaces; | ||||
| 19155 | $item->decrease_AVAILABLE_SPACES($deleted_spaces); | ||||
| 19156 | $item->decrease_SPACES($deleted_spaces); | ||||
| 19157 | $item->increase_RECOVERABLE_SPACES($deleted_spaces); | ||||
| 19158 | return $deleted_spaces; | ||||
| 19159 | } | ||||
| 19160 | |||||
| 19161 | sub get_STACK_DEPTH { | ||||
| 19162 | my $self = shift; | ||||
| 19163 | return $self->[STACK_DEPTH]; | ||||
| 19164 | } | ||||
| 19165 | |||||
| 19166 | sub get_SPACES { | ||||
| 19167 | my $self = shift; | ||||
| 19168 | return $self->[SPACES]; | ||||
| 19169 | } | ||||
| 19170 | |||||
| 19171 | sub get_MARKED { | ||||
| 19172 | my $self = shift; | ||||
| 19173 | return $self->[MARKED]; | ||||
| 19174 | } | ||||
| 19175 | |||||
| 19176 | sub set_MARKED { | ||||
| 19177 | my ( $self, $value ) = @_; | ||||
| 19178 | if ( defined($value) ) { | ||||
| 19179 | $self->[MARKED] = $value; | ||||
| 19180 | } | ||||
| 19181 | return $self->[MARKED]; | ||||
| 19182 | } | ||||
| 19183 | |||||
| 19184 | sub get_AVAILABLE_SPACES { | ||||
| 19185 | my $self = shift; | ||||
| 19186 | return $self->[AVAILABLE_SPACES]; | ||||
| 19187 | } | ||||
| 19188 | |||||
| 19189 | sub decrease_SPACES { | ||||
| 19190 | my ( $self, $value ) = @_; | ||||
| 19191 | if ( defined($value) ) { | ||||
| 19192 | $self->[SPACES] -= $value; | ||||
| 19193 | } | ||||
| 19194 | return $self->[SPACES]; | ||||
| 19195 | } | ||||
| 19196 | |||||
| 19197 | sub decrease_AVAILABLE_SPACES { | ||||
| 19198 | my ( $self, $value ) = @_; | ||||
| 19199 | if ( defined($value) ) { | ||||
| 19200 | $self->[AVAILABLE_SPACES] -= $value; | ||||
| 19201 | } | ||||
| 19202 | return $self->[AVAILABLE_SPACES]; | ||||
| 19203 | } | ||||
| 19204 | |||||
| 19205 | sub get_ALIGN_PAREN { | ||||
| 19206 | my $self = shift; | ||||
| 19207 | return $self->[ALIGN_PAREN]; | ||||
| 19208 | } | ||||
| 19209 | |||||
| 19210 | sub get_RECOVERABLE_SPACES { | ||||
| 19211 | my $self = shift; | ||||
| 19212 | return $self->[RECOVERABLE_SPACES]; | ||||
| 19213 | } | ||||
| 19214 | |||||
| 19215 | sub set_RECOVERABLE_SPACES { | ||||
| 19216 | my ( $self, $value ) = @_; | ||||
| 19217 | if ( defined($value) ) { | ||||
| 19218 | $self->[RECOVERABLE_SPACES] = $value; | ||||
| 19219 | } | ||||
| 19220 | return $self->[RECOVERABLE_SPACES]; | ||||
| 19221 | } | ||||
| 19222 | |||||
| 19223 | sub increase_RECOVERABLE_SPACES { | ||||
| 19224 | my ( $self, $value ) = @_; | ||||
| 19225 | if ( defined($value) ) { | ||||
| 19226 | $self->[RECOVERABLE_SPACES] += $value; | ||||
| 19227 | } | ||||
| 19228 | return $self->[RECOVERABLE_SPACES]; | ||||
| 19229 | } | ||||
| 19230 | |||||
| 19231 | sub get_CI_LEVEL { | ||||
| 19232 | my $self = shift; | ||||
| 19233 | return $self->[CI_LEVEL]; | ||||
| 19234 | } | ||||
| 19235 | |||||
| 19236 | sub get_LEVEL { | ||||
| 19237 | my $self = shift; | ||||
| 19238 | return $self->[LEVEL]; | ||||
| 19239 | } | ||||
| 19240 | |||||
| 19241 | sub get_SEQUENCE_NUMBER { | ||||
| 19242 | my $self = shift; | ||||
| 19243 | return $self->[SEQUENCE_NUMBER]; | ||||
| 19244 | } | ||||
| 19245 | |||||
| 19246 | sub get_INDEX { | ||||
| 19247 | my $self = shift; | ||||
| 19248 | return $self->[INDEX]; | ||||
| 19249 | } | ||||
| 19250 | |||||
| 19251 | sub get_STARTING_INDEX { | ||||
| 19252 | my $self = shift; | ||||
| 19253 | return $self->[STARTING_INDEX]; | ||||
| 19254 | } | ||||
| 19255 | |||||
| 19256 | sub set_HAVE_CHILD { | ||||
| 19257 | my ( $self, $value ) = @_; | ||||
| 19258 | if ( defined($value) ) { | ||||
| 19259 | $self->[HAVE_CHILD] = $value; | ||||
| 19260 | } | ||||
| 19261 | return $self->[HAVE_CHILD]; | ||||
| 19262 | } | ||||
| 19263 | |||||
| 19264 | sub get_HAVE_CHILD { | ||||
| 19265 | my $self = shift; | ||||
| 19266 | return $self->[HAVE_CHILD]; | ||||
| 19267 | } | ||||
| 19268 | |||||
| 19269 | sub set_ARROW_COUNT { | ||||
| 19270 | my ( $self, $value ) = @_; | ||||
| 19271 | if ( defined($value) ) { | ||||
| 19272 | $self->[ARROW_COUNT] = $value; | ||||
| 19273 | } | ||||
| 19274 | return $self->[ARROW_COUNT]; | ||||
| 19275 | } | ||||
| 19276 | |||||
| 19277 | sub get_ARROW_COUNT { | ||||
| 19278 | my $self = shift; | ||||
| 19279 | return $self->[ARROW_COUNT]; | ||||
| 19280 | } | ||||
| 19281 | |||||
| 19282 | sub set_COMMA_COUNT { | ||||
| 19283 | my ( $self, $value ) = @_; | ||||
| 19284 | if ( defined($value) ) { | ||||
| 19285 | $self->[COMMA_COUNT] = $value; | ||||
| 19286 | } | ||||
| 19287 | return $self->[COMMA_COUNT]; | ||||
| 19288 | } | ||||
| 19289 | |||||
| 19290 | sub get_COMMA_COUNT { | ||||
| 19291 | my $self = shift; | ||||
| 19292 | return $self->[COMMA_COUNT]; | ||||
| 19293 | } | ||||
| 19294 | |||||
| 19295 | sub set_CLOSED { | ||||
| 19296 | my ( $self, $value ) = @_; | ||||
| 19297 | if ( defined($value) ) { | ||||
| 19298 | $self->[CLOSED] = $value; | ||||
| 19299 | } | ||||
| 19300 | return $self->[CLOSED]; | ||||
| 19301 | } | ||||
| 19302 | |||||
| 19303 | sub get_CLOSED { | ||||
| 19304 | my $self = shift; | ||||
| 19305 | return $self->[CLOSED]; | ||||
| 19306 | } | ||||
| 19307 | |||||
| 19308 | ##################################################################### | ||||
| 19309 | # | ||||
| 19310 | # the Perl::Tidy::VerticalAligner::Line class supplies an object to | ||||
| 19311 | # contain a single output line | ||||
| 19312 | # | ||||
| 19313 | ##################################################################### | ||||
| 19314 | |||||
| 19315 | package Perl::Tidy::VerticalAligner::Line; | ||||
| 19316 | |||||
| 19317 | { | ||||
| 19318 | |||||
| 19319 | 3 | 25µs | 2 | 48µs | # spent 29µs (11+19) within Perl::Tidy::VerticalAligner::Line::BEGIN@19319 which was called:
# once (11µs+19µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19319 # spent 29µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19319
# spent 19µs making 1 call to strict::import |
| 19320 | 2 | 26µs | 2 | 111µs | # spent 61µs (10+50) within Perl::Tidy::VerticalAligner::Line::BEGIN@19320 which was called:
# once (10µs+50µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19320 # spent 61µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19320
# spent 50µs making 1 call to Exporter::import |
| 19321 | |||||
| 19322 | 2 | 24µs | 2 | 82µs | # spent 46µs (11+35) within Perl::Tidy::VerticalAligner::Line::BEGIN@19322 which was called:
# once (11µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19322 # spent 46µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19322
# spent 35µs making 1 call to constant::import |
| 19323 | 2 | 29µs | 2 | 74µs | # spent 40µs (7+33) within Perl::Tidy::VerticalAligner::Line::BEGIN@19323 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19323 # spent 40µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19323
# spent 33µs making 1 call to constant::import |
| 19324 | 2 | 22µs | 2 | 71µs | # spent 39µs (7+32) within Perl::Tidy::VerticalAligner::Line::BEGIN@19324 which was called:
# once (7µs+32µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19324 # spent 39µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19324
# spent 32µs making 1 call to constant::import |
| 19325 | 2 | 25µs | 2 | 71µs | # spent 39µs (6+32) within Perl::Tidy::VerticalAligner::Line::BEGIN@19325 which was called:
# once (6µs+32µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19325 # spent 39µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19325
# spent 32µs making 1 call to constant::import |
| 19326 | 2 | 21µs | 2 | 70µs | # spent 38µs (7+32) within Perl::Tidy::VerticalAligner::Line::BEGIN@19326 which was called:
# once (7µs+32µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19326 # spent 38µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19326
# spent 32µs making 1 call to constant::import |
| 19327 | 2 | 20µs | 2 | 84µs | # spent 46µs (6+39) within Perl::Tidy::VerticalAligner::Line::BEGIN@19327 which was called:
# once (6µs+39µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19327 # spent 46µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19327
# spent 39µs making 1 call to constant::import |
| 19328 | 2 | 26µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19328 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19328 # spent 35µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19328
# spent 29µs making 1 call to constant::import |
| 19329 | 2 | 20µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19329 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19329 # spent 35µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19329
# spent 29µs making 1 call to constant::import |
| 19330 | 2 | 20µs | 2 | 62µs | # spent 34µs (6+28) within Perl::Tidy::VerticalAligner::Line::BEGIN@19330 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19330 # spent 34µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19330
# spent 28µs making 1 call to constant::import |
| 19331 | 2 | 19µs | 2 | 63µs | # spent 35µs (6+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19331 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19331 # spent 35µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19331
# spent 29µs making 1 call to constant::import |
| 19332 | 2 | 19µs | 2 | 61µs | # spent 34µs (6+28) within Perl::Tidy::VerticalAligner::Line::BEGIN@19332 which was called:
# once (6µs+28µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19332 # spent 34µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19332
# spent 28µs making 1 call to constant::import |
| 19333 | 2 | 20µs | 2 | 71µs | # spent 39µs (6+33) within Perl::Tidy::VerticalAligner::Line::BEGIN@19333 which was called:
# once (6µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19333 # spent 39µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19333
# spent 33µs making 1 call to constant::import |
| 19334 | 2 | 271µs | 2 | 67µs | # spent 38µs (9+29) within Perl::Tidy::VerticalAligner::Line::BEGIN@19334 which was called:
# once (9µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19334 # spent 38µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19334
# spent 29µs making 1 call to constant::import |
| 19335 | |||||
| 19336 | 1 | 0s | my %_index_map; | ||
| 19337 | 1 | 600ns | $_index_map{jmax} = JMAX; | ||
| 19338 | 1 | 200ns | $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; | ||
| 19339 | 1 | 200ns | $_index_map{rtokens} = RTOKENS; | ||
| 19340 | 1 | 200ns | $_index_map{rfields} = RFIELDS; | ||
| 19341 | 1 | 200ns | $_index_map{rpatterns} = RPATTERNS; | ||
| 19342 | 1 | 100ns | $_index_map{indentation} = INDENTATION; | ||
| 19343 | 1 | 200ns | $_index_map{leading_space_count} = LEADING_SPACE_COUNT; | ||
| 19344 | 1 | 500ns | $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; | ||
| 19345 | 1 | 200ns | $_index_map{list_type} = LIST_TYPE; | ||
| 19346 | 1 | 100ns | $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; | ||
| 19347 | 1 | 100ns | $_index_map{ralignments} = RALIGNMENTS; | ||
| 19348 | 1 | 100ns | $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; | ||
| 19349 | 1 | 100ns | $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; | ||
| 19350 | |||||
| 19351 | 1 | 400ns | my @_default_data = (); | ||
| 19352 | 1 | 300ns | $_default_data[JMAX] = undef; | ||
| 19353 | 1 | 0s | $_default_data[JMAX_ORIGINAL_LINE] = undef; | ||
| 19354 | 1 | 0s | $_default_data[RTOKENS] = undef; | ||
| 19355 | 1 | 0s | $_default_data[RFIELDS] = undef; | ||
| 19356 | 1 | 400ns | $_default_data[RPATTERNS] = undef; | ||
| 19357 | 1 | 100ns | $_default_data[INDENTATION] = undef; | ||
| 19358 | 1 | 200ns | $_default_data[LEADING_SPACE_COUNT] = undef; | ||
| 19359 | 1 | 0s | $_default_data[OUTDENT_LONG_LINES] = undef; | ||
| 19360 | 1 | 300ns | $_default_data[LIST_TYPE] = undef; | ||
| 19361 | 1 | 100ns | $_default_data[IS_HANGING_SIDE_COMMENT] = undef; | ||
| 19362 | 1 | 500ns | $_default_data[RALIGNMENTS] = []; | ||
| 19363 | 1 | 100ns | $_default_data[MAXIMUM_LINE_LENGTH] = undef; | ||
| 19364 | 1 | 200ns | $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; | ||
| 19365 | |||||
| 19366 | { | ||||
| 19367 | |||||
| 19368 | # methods to count object population | ||||
| 19369 | 2 | 400ns | my $_count = 0; | ||
| 19370 | sub get_count { $_count; } | ||||
| 19371 | sub _increment_count { ++$_count } | ||||
| 19372 | sub _decrement_count { --$_count } | ||||
| 19373 | } | ||||
| 19374 | |||||
| 19375 | # Constructor may be called as a class method | ||||
| 19376 | sub new { | ||||
| 19377 | my ( $caller, %arg ) = @_; | ||||
| 19378 | my $caller_is_obj = ref($caller); | ||||
| 19379 | my $class = $caller_is_obj || $caller; | ||||
| 19380 | 2 | 885µs | 2 | 30µs | # spent 19µs (8+11) within Perl::Tidy::VerticalAligner::Line::BEGIN@19380 which was called:
# once (8µs+11µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19380 # spent 19µs making 1 call to Perl::Tidy::VerticalAligner::Line::BEGIN@19380
# spent 11µs making 1 call to strict::unimport |
| 19381 | my $self = bless [], $class; | ||||
| 19382 | |||||
| 19383 | $self->[RALIGNMENTS] = []; | ||||
| 19384 | |||||
| 19385 | my $index; | ||||
| 19386 | foreach ( keys %_index_map ) { | ||||
| 19387 | $index = $_index_map{$_}; | ||||
| 19388 | if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } | ||||
| 19389 | elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } | ||||
| 19390 | else { $self->[$index] = $_default_data[$index] } | ||||
| 19391 | } | ||||
| 19392 | |||||
| 19393 | $self->_increment_count(); | ||||
| 19394 | return $self; | ||||
| 19395 | } | ||||
| 19396 | |||||
| 19397 | sub DESTROY { | ||||
| 19398 | $_[0]->_decrement_count(); | ||||
| 19399 | } | ||||
| 19400 | |||||
| 19401 | sub get_jmax { $_[0]->[JMAX] } | ||||
| 19402 | sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } | ||||
| 19403 | sub get_rtokens { $_[0]->[RTOKENS] } | ||||
| 19404 | sub get_rfields { $_[0]->[RFIELDS] } | ||||
| 19405 | sub get_rpatterns { $_[0]->[RPATTERNS] } | ||||
| 19406 | sub get_indentation { $_[0]->[INDENTATION] } | ||||
| 19407 | sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } | ||||
| 19408 | sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } | ||||
| 19409 | sub get_list_type { $_[0]->[LIST_TYPE] } | ||||
| 19410 | sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } | ||||
| 19411 | sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } | ||||
| 19412 | |||||
| 19413 | sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } | ||||
| 19414 | sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } | ||||
| 19415 | sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } | ||||
| 19416 | sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } | ||||
| 19417 | |||||
| 19418 | sub get_starting_column { | ||||
| 19419 | $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); | ||||
| 19420 | } | ||||
| 19421 | |||||
| 19422 | sub increment_column { | ||||
| 19423 | $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); | ||||
| 19424 | } | ||||
| 19425 | sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } | ||||
| 19426 | |||||
| 19427 | sub current_field_width { | ||||
| 19428 | my $self = shift; | ||||
| 19429 | my ($j) = @_; | ||||
| 19430 | if ( $j == 0 ) { | ||||
| 19431 | return $self->get_column($j); | ||||
| 19432 | } | ||||
| 19433 | else { | ||||
| 19434 | return $self->get_column($j) - $self->get_column( $j - 1 ); | ||||
| 19435 | } | ||||
| 19436 | } | ||||
| 19437 | |||||
| 19438 | sub field_width_growth { | ||||
| 19439 | my $self = shift; | ||||
| 19440 | my $j = shift; | ||||
| 19441 | return $self->get_column($j) - $self->get_starting_column($j); | ||||
| 19442 | } | ||||
| 19443 | |||||
| 19444 | sub starting_field_width { | ||||
| 19445 | my $self = shift; | ||||
| 19446 | my $j = shift; | ||||
| 19447 | if ( $j == 0 ) { | ||||
| 19448 | return $self->get_starting_column($j); | ||||
| 19449 | } | ||||
| 19450 | else { | ||||
| 19451 | return $self->get_starting_column($j) - | ||||
| 19452 | $self->get_starting_column( $j - 1 ); | ||||
| 19453 | } | ||||
| 19454 | } | ||||
| 19455 | |||||
| 19456 | sub increase_field_width { | ||||
| 19457 | |||||
| 19458 | my $self = shift; | ||||
| 19459 | my ( $j, $pad ) = @_; | ||||
| 19460 | my $jmax = $self->get_jmax(); | ||||
| 19461 | for my $k ( $j .. $jmax ) { | ||||
| 19462 | $self->increment_column( $k, $pad ); | ||||
| 19463 | } | ||||
| 19464 | } | ||||
| 19465 | |||||
| 19466 | sub get_available_space_on_right { | ||||
| 19467 | my $self = shift; | ||||
| 19468 | my $jmax = $self->get_jmax(); | ||||
| 19469 | return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); | ||||
| 19470 | } | ||||
| 19471 | |||||
| 19472 | sub set_jmax { $_[0]->[JMAX] = $_[1] } | ||||
| 19473 | sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } | ||||
| 19474 | sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } | ||||
| 19475 | sub set_rfields { $_[0]->[RFIELDS] = $_[1] } | ||||
| 19476 | sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } | ||||
| 19477 | sub set_indentation { $_[0]->[INDENTATION] = $_[1] } | ||||
| 19478 | sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } | ||||
| 19479 | sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } | ||||
| 19480 | sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } | ||||
| 19481 | sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } | ||||
| 19482 | sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } | ||||
| 19483 | |||||
| 19484 | } | ||||
| 19485 | |||||
| 19486 | ##################################################################### | ||||
| 19487 | # | ||||
| 19488 | # the Perl::Tidy::VerticalAligner::Alignment class holds information | ||||
| 19489 | # on a single column being aligned | ||||
| 19490 | # | ||||
| 19491 | ##################################################################### | ||||
| 19492 | package Perl::Tidy::VerticalAligner::Alignment; | ||||
| 19493 | |||||
| 19494 | { | ||||
| 19495 | |||||
| 19496 | 3 | 24µs | 2 | 34µs | # spent 22µs (10+12) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19496 which was called:
# once (10µs+12µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19496 # spent 22µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19496
# spent 12µs making 1 call to strict::import |
| 19497 | |||||
| 19498 | #use Carp; | ||||
| 19499 | |||||
| 19500 | # Symbolic array indexes | ||||
| 19501 | 2 | 22µs | 2 | 72µs | # spent 40µs (7+33) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19501 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19501 # spent 40µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19501
# spent 33µs making 1 call to constant::import |
| 19502 | 2 | 22µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19502 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19502 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19502
# spent 30µs making 1 call to constant::import |
| 19503 | 2 | 21µs | 2 | 73µs | # spent 40µs (7+33) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19503 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19503 # spent 40µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19503
# spent 33µs making 1 call to constant::import |
| 19504 | 2 | 31µs | 2 | 65µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19504 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19504 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19504
# spent 30µs making 1 call to constant::import |
| 19505 | 2 | 30µs | 2 | 66µs | # spent 36µs (7+29) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19505 which was called:
# once (7µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19505 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19505
# spent 29µs making 1 call to constant::import |
| 19506 | 2 | 21µs | 2 | 67µs | # spent 37µs (8+30) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19506 which was called:
# once (8µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19506 # spent 37µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19506
# spent 30µs making 1 call to constant::import |
| 19507 | 2 | 183µs | 2 | 65µs | # spent 36µs (7+29) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19507 which was called:
# once (7µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19507 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19507
# spent 29µs making 1 call to constant::import |
| 19508 | # (just its index in an array) | ||||
| 19509 | |||||
| 19510 | # Correspondence between variables and array indexes | ||||
| 19511 | 1 | 0s | my %_index_map; | ||
| 19512 | 1 | 400ns | $_index_map{column} = COLUMN; | ||
| 19513 | 1 | 200ns | $_index_map{starting_column} = STARTING_COLUMN; | ||
| 19514 | 1 | 300ns | $_index_map{matching_token} = MATCHING_TOKEN; | ||
| 19515 | 1 | 100ns | $_index_map{starting_line} = STARTING_LINE; | ||
| 19516 | 1 | 100ns | $_index_map{ending_line} = ENDING_LINE; | ||
| 19517 | 1 | 100ns | $_index_map{saved_column} = SAVED_COLUMN; | ||
| 19518 | 1 | 100ns | $_index_map{serial_number} = SERIAL_NUMBER; | ||
| 19519 | |||||
| 19520 | 1 | 200ns | my @_default_data = (); | ||
| 19521 | 1 | 200ns | $_default_data[COLUMN] = undef; | ||
| 19522 | 1 | 100ns | $_default_data[STARTING_COLUMN] = undef; | ||
| 19523 | 1 | 100ns | $_default_data[MATCHING_TOKEN] = undef; | ||
| 19524 | 1 | 100ns | $_default_data[STARTING_LINE] = undef; | ||
| 19525 | 1 | 200ns | $_default_data[ENDING_LINE] = undef; | ||
| 19526 | 1 | 100ns | $_default_data[SAVED_COLUMN] = undef; | ||
| 19527 | 1 | 200ns | $_default_data[SERIAL_NUMBER] = undef; | ||
| 19528 | |||||
| 19529 | # class population count | ||||
| 19530 | { | ||||
| 19531 | 2 | 200ns | my $_count = 0; | ||
| 19532 | sub get_count { $_count; } | ||||
| 19533 | sub _increment_count { ++$_count } | ||||
| 19534 | sub _decrement_count { --$_count } | ||||
| 19535 | } | ||||
| 19536 | |||||
| 19537 | # constructor | ||||
| 19538 | sub new { | ||||
| 19539 | my ( $caller, %arg ) = @_; | ||||
| 19540 | my $caller_is_obj = ref($caller); | ||||
| 19541 | my $class = $caller_is_obj || $caller; | ||||
| 19542 | 2 | 405µs | 2 | 26µs | # spent 16µs (7+10) within Perl::Tidy::VerticalAligner::Alignment::BEGIN@19542 which was called:
# once (7µs+10µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19542 # spent 16µs making 1 call to Perl::Tidy::VerticalAligner::Alignment::BEGIN@19542
# spent 10µs making 1 call to strict::unimport |
| 19543 | my $self = bless [], $class; | ||||
| 19544 | |||||
| 19545 | foreach ( keys %_index_map ) { | ||||
| 19546 | my $index = $_index_map{$_}; | ||||
| 19547 | if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } | ||||
| 19548 | elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } | ||||
| 19549 | else { $self->[$index] = $_default_data[$index] } | ||||
| 19550 | } | ||||
| 19551 | $self->_increment_count(); | ||||
| 19552 | return $self; | ||||
| 19553 | } | ||||
| 19554 | |||||
| 19555 | sub DESTROY { | ||||
| 19556 | $_[0]->_decrement_count(); | ||||
| 19557 | } | ||||
| 19558 | |||||
| 19559 | sub get_column { return $_[0]->[COLUMN] } | ||||
| 19560 | sub get_starting_column { return $_[0]->[STARTING_COLUMN] } | ||||
| 19561 | sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } | ||||
| 19562 | sub get_starting_line { return $_[0]->[STARTING_LINE] } | ||||
| 19563 | sub get_ending_line { return $_[0]->[ENDING_LINE] } | ||||
| 19564 | sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } | ||||
| 19565 | |||||
| 19566 | sub set_column { $_[0]->[COLUMN] = $_[1] } | ||||
| 19567 | sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } | ||||
| 19568 | sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } | ||||
| 19569 | sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } | ||||
| 19570 | sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } | ||||
| 19571 | sub increment_column { $_[0]->[COLUMN] += $_[1] } | ||||
| 19572 | |||||
| 19573 | sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } | ||||
| 19574 | sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } | ||||
| 19575 | |||||
| 19576 | } | ||||
| 19577 | |||||
| 19578 | package Perl::Tidy::VerticalAligner; | ||||
| 19579 | |||||
| 19580 | # The Perl::Tidy::VerticalAligner package collects output lines and | ||||
| 19581 | # attempts to line up certain common tokens, such as => and #, which are | ||||
| 19582 | # identified by the calling routine. | ||||
| 19583 | # | ||||
| 19584 | # There are two main routines: valign_input and flush. Append acts as a | ||||
| 19585 | # storage buffer, collecting lines into a group which can be vertically | ||||
| 19586 | # aligned. When alignment is no longer possible or desirable, it dumps | ||||
| 19587 | # the group to flush. | ||||
| 19588 | # | ||||
| 19589 | # valign_input -----> flush | ||||
| 19590 | # | ||||
| 19591 | # collects writes | ||||
| 19592 | # vertical one | ||||
| 19593 | # groups group | ||||
| 19594 | |||||
| 19595 | # spent 6µs within Perl::Tidy::VerticalAligner::BEGIN@19595 which was called:
# once (6µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19614 | ||||
| 19596 | |||||
| 19597 | # Caution: these debug flags produce a lot of output | ||||
| 19598 | # They should all be 0 except when debugging small scripts | ||||
| 19599 | |||||
| 19600 | 2 | 22µs | 2 | 74µs | # spent 41µs (8+33) within Perl::Tidy::VerticalAligner::BEGIN@19600 which was called:
# once (8µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19600 # spent 41µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19600
# spent 33µs making 1 call to constant::import |
| 19601 | 2 | 24µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::BEGIN@19601 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19601 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19601
# spent 30µs making 1 call to constant::import |
| 19602 | 2 | 20µs | 2 | 67µs | # spent 36µs (6+30) within Perl::Tidy::VerticalAligner::BEGIN@19602 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19602 # spent 36µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19602
# spent 30µs making 1 call to constant::import |
| 19603 | 2 | 85µs | 2 | 63µs | # spent 34µs (6+29) within Perl::Tidy::VerticalAligner::BEGIN@19603 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19603 # spent 34µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19603
# spent 29µs making 1 call to constant::import |
| 19604 | |||||
| 19605 | my $debug_warning = sub { | ||||
| 19606 | print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; | ||||
| 19607 | 1 | 2µs | }; | ||
| 19608 | |||||
| 19609 | VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); | ||||
| 19610 | 1 | 0s | VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); | ||
| 19611 | VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); | ||||
| 19612 | 1 | 6µs | VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); | ||
| 19613 | |||||
| 19614 | 1 | 80µs | 1 | 6µs | } # spent 6µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19595 |
| 19615 | |||||
| 19616 | 1 | 600ns | # spent 580µs (8+572) within Perl::Tidy::VerticalAligner::BEGIN@19616 which was called:
# once (8µs+572µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 19677 | ||
| 19617 | $vertical_aligner_self | ||||
| 19618 | $current_line | ||||
| 19619 | $maximum_alignment_index | ||||
| 19620 | $ralignment_list | ||||
| 19621 | $maximum_jmax_seen | ||||
| 19622 | $minimum_jmax_seen | ||||
| 19623 | $previous_minimum_jmax_seen | ||||
| 19624 | $previous_maximum_jmax_seen | ||||
| 19625 | $maximum_line_index | ||||
| 19626 | $group_level | ||||
| 19627 | $group_type | ||||
| 19628 | $group_maximum_gap | ||||
| 19629 | $marginal_match | ||||
| 19630 | $last_level_written | ||||
| 19631 | $last_leading_space_count | ||||
| 19632 | $extra_indent_ok | ||||
| 19633 | $zero_count | ||||
| 19634 | @group_lines | ||||
| 19635 | $last_comment_column | ||||
| 19636 | $last_side_comment_line_number | ||||
| 19637 | $last_side_comment_length | ||||
| 19638 | $last_side_comment_level | ||||
| 19639 | $outdented_line_count | ||||
| 19640 | $first_outdented_line_at | ||||
| 19641 | $last_outdented_line_at | ||||
| 19642 | $diagnostics_object | ||||
| 19643 | $logger_object | ||||
| 19644 | $file_writer_object | ||||
| 19645 | @side_comment_history | ||||
| 19646 | $comment_leading_space_count | ||||
| 19647 | $is_matching_terminal_line | ||||
| 19648 | $consecutive_block_comments | ||||
| 19649 | |||||
| 19650 | $cached_line_text | ||||
| 19651 | $cached_line_type | ||||
| 19652 | $cached_line_flag | ||||
| 19653 | $cached_seqno | ||||
| 19654 | $cached_line_valid | ||||
| 19655 | $cached_line_leading_space_count | ||||
| 19656 | $cached_seqno_string | ||||
| 19657 | |||||
| 19658 | $valign_buffer_filling | ||||
| 19659 | @valign_buffer | ||||
| 19660 | |||||
| 19661 | $seqno_string | ||||
| 19662 | $last_nonblank_seqno_string | ||||
| 19663 | |||||
| 19664 | $rOpts | ||||
| 19665 | |||||
| 19666 | $rOpts_maximum_line_length | ||||
| 19667 | $rOpts_variable_maximum_line_length | ||||
| 19668 | $rOpts_continuation_indentation | ||||
| 19669 | $rOpts_indent_columns | ||||
| 19670 | $rOpts_tabs | ||||
| 19671 | $rOpts_entab_leading_whitespace | ||||
| 19672 | $rOpts_valign | ||||
| 19673 | |||||
| 19674 | $rOpts_fixed_position_side_comment | ||||
| 19675 | $rOpts_minimum_space_to_comment | ||||
| 19676 | |||||
| 19677 | 1 | 4.90ms | 2 | 1.15ms | ); # spent 580µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@19616
# spent 572µs making 1 call to vars::import |
| 19678 | |||||
| 19679 | sub initialize { | ||||
| 19680 | |||||
| 19681 | my $class; | ||||
| 19682 | |||||
| 19683 | ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) | ||||
| 19684 | = @_; | ||||
| 19685 | |||||
| 19686 | # variables describing the entire space group: | ||||
| 19687 | $ralignment_list = []; | ||||
| 19688 | $group_level = 0; | ||||
| 19689 | $last_level_written = -1; | ||||
| 19690 | $extra_indent_ok = 0; # can we move all lines to the right? | ||||
| 19691 | $last_side_comment_length = 0; | ||||
| 19692 | $maximum_jmax_seen = 0; | ||||
| 19693 | $minimum_jmax_seen = 0; | ||||
| 19694 | $previous_minimum_jmax_seen = 0; | ||||
| 19695 | $previous_maximum_jmax_seen = 0; | ||||
| 19696 | |||||
| 19697 | # variables describing each line of the group | ||||
| 19698 | @group_lines = (); # list of all lines in group | ||||
| 19699 | |||||
| 19700 | $outdented_line_count = 0; | ||||
| 19701 | $first_outdented_line_at = 0; | ||||
| 19702 | $last_outdented_line_at = 0; | ||||
| 19703 | $last_side_comment_line_number = 0; | ||||
| 19704 | $last_side_comment_level = -1; | ||||
| 19705 | $is_matching_terminal_line = 0; | ||||
| 19706 | |||||
| 19707 | # most recent 3 side comments; [ line number, column ] | ||||
| 19708 | $side_comment_history[0] = [ -300, 0 ]; | ||||
| 19709 | $side_comment_history[1] = [ -200, 0 ]; | ||||
| 19710 | $side_comment_history[2] = [ -100, 0 ]; | ||||
| 19711 | |||||
| 19712 | # valign_output_step_B cache: | ||||
| 19713 | $cached_line_text = ""; | ||||
| 19714 | $cached_line_type = 0; | ||||
| 19715 | $cached_line_flag = 0; | ||||
| 19716 | $cached_seqno = 0; | ||||
| 19717 | $cached_line_valid = 0; | ||||
| 19718 | $cached_line_leading_space_count = 0; | ||||
| 19719 | $cached_seqno_string = ""; | ||||
| 19720 | |||||
| 19721 | # string of sequence numbers joined together | ||||
| 19722 | $seqno_string = ""; | ||||
| 19723 | $last_nonblank_seqno_string = ""; | ||||
| 19724 | |||||
| 19725 | # frequently used parameters | ||||
| 19726 | $rOpts_indent_columns = $rOpts->{'indent-columns'}; | ||||
| 19727 | $rOpts_tabs = $rOpts->{'tabs'}; | ||||
| 19728 | $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; | ||||
| 19729 | $rOpts_fixed_position_side_comment = | ||||
| 19730 | $rOpts->{'fixed-position-side-comment'}; | ||||
| 19731 | $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; | ||||
| 19732 | $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; | ||||
| 19733 | $rOpts_variable_maximum_line_length = | ||||
| 19734 | $rOpts->{'variable-maximum-line-length'}; | ||||
| 19735 | $rOpts_valign = $rOpts->{'valign'}; | ||||
| 19736 | |||||
| 19737 | $consecutive_block_comments = 0; | ||||
| 19738 | forget_side_comment(); | ||||
| 19739 | |||||
| 19740 | initialize_for_new_group(); | ||||
| 19741 | |||||
| 19742 | $vertical_aligner_self = {}; | ||||
| 19743 | bless $vertical_aligner_self, $class; | ||||
| 19744 | return $vertical_aligner_self; | ||||
| 19745 | } | ||||
| 19746 | |||||
| 19747 | sub initialize_for_new_group { | ||||
| 19748 | $maximum_line_index = -1; # lines in the current group | ||||
| 19749 | $maximum_alignment_index = -1; # alignments in current group | ||||
| 19750 | $zero_count = 0; # count consecutive lines without tokens | ||||
| 19751 | $current_line = undef; # line being matched for alignment | ||||
| 19752 | $group_maximum_gap = 0; # largest gap introduced | ||||
| 19753 | $group_type = ""; | ||||
| 19754 | $marginal_match = 0; | ||||
| 19755 | $comment_leading_space_count = 0; | ||||
| 19756 | $last_leading_space_count = 0; | ||||
| 19757 | } | ||||
| 19758 | |||||
| 19759 | # interface to Perl::Tidy::Diagnostics routines | ||||
| 19760 | sub write_diagnostics { | ||||
| 19761 | if ($diagnostics_object) { | ||||
| 19762 | $diagnostics_object->write_diagnostics(@_); | ||||
| 19763 | } | ||||
| 19764 | } | ||||
| 19765 | |||||
| 19766 | # interface to Perl::Tidy::Logger routines | ||||
| 19767 | sub warning { | ||||
| 19768 | if ($logger_object) { | ||||
| 19769 | $logger_object->warning(@_); | ||||
| 19770 | } | ||||
| 19771 | } | ||||
| 19772 | |||||
| 19773 | sub write_logfile_entry { | ||||
| 19774 | if ($logger_object) { | ||||
| 19775 | $logger_object->write_logfile_entry(@_); | ||||
| 19776 | } | ||||
| 19777 | } | ||||
| 19778 | |||||
| 19779 | sub report_definite_bug { | ||||
| 19780 | if ($logger_object) { | ||||
| 19781 | $logger_object->report_definite_bug(); | ||||
| 19782 | } | ||||
| 19783 | } | ||||
| 19784 | |||||
| 19785 | sub get_SPACES { | ||||
| 19786 | |||||
| 19787 | # return the number of leading spaces associated with an indentation | ||||
| 19788 | # variable $indentation is either a constant number of spaces or an | ||||
| 19789 | # object with a get_SPACES method. | ||||
| 19790 | my $indentation = shift; | ||||
| 19791 | return ref($indentation) ? $indentation->get_SPACES() : $indentation; | ||||
| 19792 | } | ||||
| 19793 | |||||
| 19794 | sub get_RECOVERABLE_SPACES { | ||||
| 19795 | |||||
| 19796 | # return the number of spaces (+ means shift right, - means shift left) | ||||
| 19797 | # that we would like to shift a group of lines with the same indentation | ||||
| 19798 | # to get them to line up with their opening parens | ||||
| 19799 | my $indentation = shift; | ||||
| 19800 | return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; | ||||
| 19801 | } | ||||
| 19802 | |||||
| 19803 | sub get_STACK_DEPTH { | ||||
| 19804 | |||||
| 19805 | my $indentation = shift; | ||||
| 19806 | return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; | ||||
| 19807 | } | ||||
| 19808 | |||||
| 19809 | sub make_alignment { | ||||
| 19810 | my ( $col, $token ) = @_; | ||||
| 19811 | |||||
| 19812 | # make one new alignment at column $col which aligns token $token | ||||
| 19813 | ++$maximum_alignment_index; | ||||
| 19814 | my $alignment = new Perl::Tidy::VerticalAligner::Alignment( | ||||
| 19815 | column => $col, | ||||
| 19816 | starting_column => $col, | ||||
| 19817 | matching_token => $token, | ||||
| 19818 | starting_line => $maximum_line_index, | ||||
| 19819 | ending_line => $maximum_line_index, | ||||
| 19820 | serial_number => $maximum_alignment_index, | ||||
| 19821 | ); | ||||
| 19822 | $ralignment_list->[$maximum_alignment_index] = $alignment; | ||||
| 19823 | return $alignment; | ||||
| 19824 | } | ||||
| 19825 | |||||
| 19826 | sub dump_alignments { | ||||
| 19827 | print STDOUT | ||||
| 19828 | "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; | ||||
| 19829 | for my $i ( 0 .. $maximum_alignment_index ) { | ||||
| 19830 | my $column = $ralignment_list->[$i]->get_column(); | ||||
| 19831 | my $starting_column = $ralignment_list->[$i]->get_starting_column(); | ||||
| 19832 | my $matching_token = $ralignment_list->[$i]->get_matching_token(); | ||||
| 19833 | my $starting_line = $ralignment_list->[$i]->get_starting_line(); | ||||
| 19834 | my $ending_line = $ralignment_list->[$i]->get_ending_line(); | ||||
| 19835 | print STDOUT | ||||
| 19836 | "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; | ||||
| 19837 | } | ||||
| 19838 | } | ||||
| 19839 | |||||
| 19840 | sub save_alignment_columns { | ||||
| 19841 | for my $i ( 0 .. $maximum_alignment_index ) { | ||||
| 19842 | $ralignment_list->[$i]->save_column(); | ||||
| 19843 | } | ||||
| 19844 | } | ||||
| 19845 | |||||
| 19846 | sub restore_alignment_columns { | ||||
| 19847 | for my $i ( 0 .. $maximum_alignment_index ) { | ||||
| 19848 | $ralignment_list->[$i]->restore_column(); | ||||
| 19849 | } | ||||
| 19850 | } | ||||
| 19851 | |||||
| 19852 | sub forget_side_comment { | ||||
| 19853 | $last_comment_column = 0; | ||||
| 19854 | } | ||||
| 19855 | |||||
| 19856 | sub maximum_line_length_for_level { | ||||
| 19857 | |||||
| 19858 | # return maximum line length for line starting with a given level | ||||
| 19859 | my $maximum_line_length = $rOpts_maximum_line_length; | ||||
| 19860 | if ($rOpts_variable_maximum_line_length) { | ||||
| 19861 | my $level = shift; | ||||
| 19862 | if ( $level < 0 ) { $level = 0 } | ||||
| 19863 | $maximum_line_length += $level * $rOpts_indent_columns; | ||||
| 19864 | } | ||||
| 19865 | return $maximum_line_length; | ||||
| 19866 | } | ||||
| 19867 | |||||
| 19868 | sub valign_input { | ||||
| 19869 | |||||
| 19870 | # Place one line in the current vertical group. | ||||
| 19871 | # | ||||
| 19872 | # The input parameters are: | ||||
| 19873 | # $level = indentation level of this line | ||||
| 19874 | # $rfields = reference to array of fields | ||||
| 19875 | # $rpatterns = reference to array of patterns, one per field | ||||
| 19876 | # $rtokens = reference to array of tokens starting fields 1,2,.. | ||||
| 19877 | # | ||||
| 19878 | # Here is an example of what this package does. In this example, | ||||
| 19879 | # we are trying to line up both the '=>' and the '#'. | ||||
| 19880 | # | ||||
| 19881 | # '18' => 'grave', # \` | ||||
| 19882 | # '19' => 'acute', # `' | ||||
| 19883 | # '20' => 'caron', # \v | ||||
| 19884 | # <-tabs-><f1-><--field 2 ---><-f3-> | ||||
| 19885 | # | | | | | ||||
| 19886 | # | | | | | ||||
| 19887 | # col1 col2 col3 col4 | ||||
| 19888 | # | ||||
| 19889 | # The calling routine has already broken the entire line into 3 fields as | ||||
| 19890 | # indicated. (So the work of identifying promising common tokens has | ||||
| 19891 | # already been done). | ||||
| 19892 | # | ||||
| 19893 | # In this example, there will be 2 tokens being matched: '=>' and '#'. | ||||
| 19894 | # They are the leading parts of fields 2 and 3, but we do need to know | ||||
| 19895 | # what they are so that we can dump a group of lines when these tokens | ||||
| 19896 | # change. | ||||
| 19897 | # | ||||
| 19898 | # The fields contain the actual characters of each field. The patterns | ||||
| 19899 | # are like the fields, but they contain mainly token types instead | ||||
| 19900 | # of tokens, so they have fewer characters. They are used to be | ||||
| 19901 | # sure we are matching fields of similar type. | ||||
| 19902 | # | ||||
| 19903 | # In this example, there will be 4 column indexes being adjusted. The | ||||
| 19904 | # first one is always at zero. The interior columns are at the start of | ||||
| 19905 | # the matching tokens, and the last one tracks the maximum line length. | ||||
| 19906 | # | ||||
| 19907 | # Each time a new line comes in, it joins the current vertical | ||||
| 19908 | # group if possible. Otherwise it causes the current group to be dumped | ||||
| 19909 | # and a new group is started. | ||||
| 19910 | # | ||||
| 19911 | # For each new group member, the column locations are increased, as | ||||
| 19912 | # necessary, to make room for the new fields. When the group is finally | ||||
| 19913 | # output, these column numbers are used to compute the amount of spaces of | ||||
| 19914 | # padding needed for each field. | ||||
| 19915 | # | ||||
| 19916 | # Programming note: the fields are assumed not to have any tab characters. | ||||
| 19917 | # Tabs have been previously removed except for tabs in quoted strings and | ||||
| 19918 | # side comments. Tabs in these fields can mess up the column counting. | ||||
| 19919 | # The log file warns the user if there are any such tabs. | ||||
| 19920 | |||||
| 19921 | my ( | ||||
| 19922 | $level, $level_end, | ||||
| 19923 | $indentation, $rfields, | ||||
| 19924 | $rtokens, $rpatterns, | ||||
| 19925 | $is_forced_break, $outdent_long_lines, | ||||
| 19926 | $is_terminal_ternary, $is_terminal_statement, | ||||
| 19927 | $do_not_pad, $rvertical_tightness_flags, | ||||
| 19928 | $level_jump, | ||||
| 19929 | ) = @_; | ||||
| 19930 | |||||
| 19931 | # number of fields is $jmax | ||||
| 19932 | # number of tokens between fields is $jmax-1 | ||||
| 19933 | my $jmax = $#{$rfields}; | ||||
| 19934 | |||||
| 19935 | my $leading_space_count = get_SPACES($indentation); | ||||
| 19936 | |||||
| 19937 | # set outdented flag to be sure we either align within statements or | ||||
| 19938 | # across statement boundaries, but not both. | ||||
| 19939 | my $is_outdented = $last_leading_space_count > $leading_space_count; | ||||
| 19940 | $last_leading_space_count = $leading_space_count; | ||||
| 19941 | |||||
| 19942 | # Patch: undo for hanging side comment | ||||
| 19943 | my $is_hanging_side_comment = | ||||
| 19944 | ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); | ||||
| 19945 | $is_outdented = 0 if $is_hanging_side_comment; | ||||
| 19946 | |||||
| 19947 | # Forget side comment alignment after seeing 2 or more block comments | ||||
| 19948 | my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); | ||||
| 19949 | if ($is_block_comment) { | ||||
| 19950 | $consecutive_block_comments++; | ||||
| 19951 | } | ||||
| 19952 | else { | ||||
| 19953 | if ( $consecutive_block_comments > 1 ) { forget_side_comment() } | ||||
| 19954 | $consecutive_block_comments = 0; | ||||
| 19955 | } | ||||
| 19956 | |||||
| 19957 | VALIGN_DEBUG_FLAG_APPEND0 && do { | ||||
| 19958 | print STDOUT | ||||
| 19959 | "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; | ||||
| 19960 | }; | ||||
| 19961 | |||||
| 19962 | # Validate cached line if necessary: If we can produce a container | ||||
| 19963 | # with just 2 lines total by combining an existing cached opening | ||||
| 19964 | # token with the closing token to follow, then we will mark both | ||||
| 19965 | # cached flags as valid. | ||||
| 19966 | if ($rvertical_tightness_flags) { | ||||
| 19967 | if ( $maximum_line_index <= 0 | ||||
| 19968 | && $cached_line_type | ||||
| 19969 | && $cached_seqno | ||||
| 19970 | && $rvertical_tightness_flags->[2] | ||||
| 19971 | && $rvertical_tightness_flags->[2] == $cached_seqno ) | ||||
| 19972 | { | ||||
| 19973 | $rvertical_tightness_flags->[3] ||= 1; | ||||
| 19974 | $cached_line_valid ||= 1; | ||||
| 19975 | } | ||||
| 19976 | } | ||||
| 19977 | |||||
| 19978 | # do not join an opening block brace with an unbalanced line | ||||
| 19979 | # unless requested with a flag value of 2 | ||||
| 19980 | if ( $cached_line_type == 3 | ||||
| 19981 | && $maximum_line_index < 0 | ||||
| 19982 | && $cached_line_flag < 2 | ||||
| 19983 | && $level_jump != 0 ) | ||||
| 19984 | { | ||||
| 19985 | $cached_line_valid = 0; | ||||
| 19986 | } | ||||
| 19987 | |||||
| 19988 | # patch until new aligner is finished | ||||
| 19989 | if ($do_not_pad) { my_flush() } | ||||
| 19990 | |||||
| 19991 | # shouldn't happen: | ||||
| 19992 | if ( $level < 0 ) { $level = 0 } | ||||
| 19993 | |||||
| 19994 | # do not align code across indentation level changes | ||||
| 19995 | # or if vertical alignment is turned off for debugging | ||||
| 19996 | if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { | ||||
| 19997 | |||||
| 19998 | # we are allowed to shift a group of lines to the right if its | ||||
| 19999 | # level is greater than the previous and next group | ||||
| 20000 | $extra_indent_ok = | ||||
| 20001 | ( $level < $group_level && $last_level_written < $group_level ); | ||||
| 20002 | |||||
| 20003 | my_flush(); | ||||
| 20004 | |||||
| 20005 | # If we know that this line will get flushed out by itself because | ||||
| 20006 | # of level changes, we can leave the extra_indent_ok flag set. | ||||
| 20007 | # That way, if we get an external flush call, we will still be | ||||
| 20008 | # able to do some -lp alignment if necessary. | ||||
| 20009 | $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); | ||||
| 20010 | |||||
| 20011 | $group_level = $level; | ||||
| 20012 | |||||
| 20013 | # wait until after the above flush to get the leading space | ||||
| 20014 | # count because it may have been changed if the -icp flag is in | ||||
| 20015 | # effect | ||||
| 20016 | $leading_space_count = get_SPACES($indentation); | ||||
| 20017 | |||||
| 20018 | } | ||||
| 20019 | |||||
| 20020 | # -------------------------------------------------------------------- | ||||
| 20021 | # Patch to collect outdentable block COMMENTS | ||||
| 20022 | # -------------------------------------------------------------------- | ||||
| 20023 | my $is_blank_line = ""; | ||||
| 20024 | if ( $group_type eq 'COMMENT' ) { | ||||
| 20025 | if ( | ||||
| 20026 | ( | ||||
| 20027 | $is_block_comment | ||||
| 20028 | && $outdent_long_lines | ||||
| 20029 | && $leading_space_count == $comment_leading_space_count | ||||
| 20030 | ) | ||||
| 20031 | || $is_blank_line | ||||
| 20032 | ) | ||||
| 20033 | { | ||||
| 20034 | $group_lines[ ++$maximum_line_index ] = $rfields->[0]; | ||||
| 20035 | return; | ||||
| 20036 | } | ||||
| 20037 | else { | ||||
| 20038 | my_flush(); | ||||
| 20039 | } | ||||
| 20040 | } | ||||
| 20041 | |||||
| 20042 | # -------------------------------------------------------------------- | ||||
| 20043 | # add dummy fields for terminal ternary | ||||
| 20044 | # -------------------------------------------------------------------- | ||||
| 20045 | my $j_terminal_match; | ||||
| 20046 | if ( $is_terminal_ternary && $current_line ) { | ||||
| 20047 | $j_terminal_match = | ||||
| 20048 | fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); | ||||
| 20049 | $jmax = @{$rfields} - 1; | ||||
| 20050 | } | ||||
| 20051 | |||||
| 20052 | # -------------------------------------------------------------------- | ||||
| 20053 | # add dummy fields for else statement | ||||
| 20054 | # -------------------------------------------------------------------- | ||||
| 20055 | if ( $rfields->[0] =~ /^else\s*$/ | ||||
| 20056 | && $current_line | ||||
| 20057 | && $level_jump == 0 ) | ||||
| 20058 | { | ||||
| 20059 | $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); | ||||
| 20060 | $jmax = @{$rfields} - 1; | ||||
| 20061 | } | ||||
| 20062 | |||||
| 20063 | # -------------------------------------------------------------------- | ||||
| 20064 | # Step 1. Handle simple line of code with no fields to match. | ||||
| 20065 | # -------------------------------------------------------------------- | ||||
| 20066 | if ( $jmax <= 0 ) { | ||||
| 20067 | $zero_count++; | ||||
| 20068 | |||||
| 20069 | if ( $maximum_line_index >= 0 | ||||
| 20070 | && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) | ||||
| 20071 | { | ||||
| 20072 | |||||
| 20073 | # flush the current group if it has some aligned columns.. | ||||
| 20074 | if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } | ||||
| 20075 | |||||
| 20076 | # flush current group if we are just collecting side comments.. | ||||
| 20077 | elsif ( | ||||
| 20078 | |||||
| 20079 | # ...and we haven't seen a comment lately | ||||
| 20080 | ( $zero_count > 3 ) | ||||
| 20081 | |||||
| 20082 | # ..or if this new line doesn't fit to the left of the comments | ||||
| 20083 | || ( ( $leading_space_count + length( $$rfields[0] ) ) > | ||||
| 20084 | $group_lines[0]->get_column(0) ) | ||||
| 20085 | ) | ||||
| 20086 | { | ||||
| 20087 | my_flush(); | ||||
| 20088 | } | ||||
| 20089 | } | ||||
| 20090 | |||||
| 20091 | # patch to start new COMMENT group if this comment may be outdented | ||||
| 20092 | if ( $is_block_comment | ||||
| 20093 | && $outdent_long_lines | ||||
| 20094 | && $maximum_line_index < 0 ) | ||||
| 20095 | { | ||||
| 20096 | $group_type = 'COMMENT'; | ||||
| 20097 | $comment_leading_space_count = $leading_space_count; | ||||
| 20098 | $group_lines[ ++$maximum_line_index ] = $rfields->[0]; | ||||
| 20099 | return; | ||||
| 20100 | } | ||||
| 20101 | |||||
| 20102 | # just write this line directly if no current group, no side comment, | ||||
| 20103 | # and no space recovery is needed. | ||||
| 20104 | if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) | ||||
| 20105 | { | ||||
| 20106 | valign_output_step_B( $leading_space_count, $$rfields[0], 0, | ||||
| 20107 | $outdent_long_lines, $rvertical_tightness_flags, $level ); | ||||
| 20108 | return; | ||||
| 20109 | } | ||||
| 20110 | } | ||||
| 20111 | else { | ||||
| 20112 | $zero_count = 0; | ||||
| 20113 | } | ||||
| 20114 | |||||
| 20115 | # programming check: (shouldn't happen) | ||||
| 20116 | # an error here implies an incorrect call was made | ||||
| 20117 | if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { | ||||
| 20118 | warning( | ||||
| 20119 | "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" | ||||
| 20120 | ); | ||||
| 20121 | report_definite_bug(); | ||||
| 20122 | } | ||||
| 20123 | |||||
| 20124 | # -------------------------------------------------------------------- | ||||
| 20125 | # create an object to hold this line | ||||
| 20126 | # -------------------------------------------------------------------- | ||||
| 20127 | my $new_line = new Perl::Tidy::VerticalAligner::Line( | ||||
| 20128 | jmax => $jmax, | ||||
| 20129 | jmax_original_line => $jmax, | ||||
| 20130 | rtokens => $rtokens, | ||||
| 20131 | rfields => $rfields, | ||||
| 20132 | rpatterns => $rpatterns, | ||||
| 20133 | indentation => $indentation, | ||||
| 20134 | leading_space_count => $leading_space_count, | ||||
| 20135 | outdent_long_lines => $outdent_long_lines, | ||||
| 20136 | list_type => "", | ||||
| 20137 | is_hanging_side_comment => $is_hanging_side_comment, | ||||
| 20138 | maximum_line_length => maximum_line_length_for_level($level), | ||||
| 20139 | rvertical_tightness_flags => $rvertical_tightness_flags, | ||||
| 20140 | ); | ||||
| 20141 | |||||
| 20142 | # Initialize a global flag saying if the last line of the group should | ||||
| 20143 | # match end of group and also terminate the group. There should be no | ||||
| 20144 | # returns between here and where the flag is handled at the bottom. | ||||
| 20145 | my $col_matching_terminal = 0; | ||||
| 20146 | if ( defined($j_terminal_match) ) { | ||||
| 20147 | |||||
| 20148 | # remember the column of the terminal ? or { to match with | ||||
| 20149 | $col_matching_terminal = $current_line->get_column($j_terminal_match); | ||||
| 20150 | |||||
| 20151 | # set global flag for sub decide_if_aligned | ||||
| 20152 | $is_matching_terminal_line = 1; | ||||
| 20153 | } | ||||
| 20154 | |||||
| 20155 | # -------------------------------------------------------------------- | ||||
| 20156 | # It simplifies things to create a zero length side comment | ||||
| 20157 | # if none exists. | ||||
| 20158 | # -------------------------------------------------------------------- | ||||
| 20159 | make_side_comment( $new_line, $level_end ); | ||||
| 20160 | |||||
| 20161 | # -------------------------------------------------------------------- | ||||
| 20162 | # Decide if this is a simple list of items. | ||||
| 20163 | # There are 3 list types: none, comma, comma-arrow. | ||||
| 20164 | # We use this below to be less restrictive in deciding what to align. | ||||
| 20165 | # -------------------------------------------------------------------- | ||||
| 20166 | if ($is_forced_break) { | ||||
| 20167 | decide_if_list($new_line); | ||||
| 20168 | } | ||||
| 20169 | |||||
| 20170 | if ($current_line) { | ||||
| 20171 | |||||
| 20172 | # -------------------------------------------------------------------- | ||||
| 20173 | # Allow hanging side comment to join current group, if any | ||||
| 20174 | # This will help keep side comments aligned, because otherwise we | ||||
| 20175 | # will have to start a new group, making alignment less likely. | ||||
| 20176 | # -------------------------------------------------------------------- | ||||
| 20177 | join_hanging_comment( $new_line, $current_line ) | ||||
| 20178 | if $is_hanging_side_comment; | ||||
| 20179 | |||||
| 20180 | # -------------------------------------------------------------------- | ||||
| 20181 | # If there is just one previous line, and it has more fields | ||||
| 20182 | # than the new line, try to join fields together to get a match with | ||||
| 20183 | # the new line. At the present time, only a single leading '=' is | ||||
| 20184 | # allowed to be compressed out. This is useful in rare cases where | ||||
| 20185 | # a table is forced to use old breakpoints because of side comments, | ||||
| 20186 | # and the table starts out something like this: | ||||
| 20187 | # my %MonthChars = ('0', 'Jan', # side comment | ||||
| 20188 | # '1', 'Feb', | ||||
| 20189 | # '2', 'Mar', | ||||
| 20190 | # Eliminating the '=' field will allow the remaining fields to line up. | ||||
| 20191 | # This situation does not occur if there are no side comments | ||||
| 20192 | # because scan_list would put a break after the opening '('. | ||||
| 20193 | # -------------------------------------------------------------------- | ||||
| 20194 | eliminate_old_fields( $new_line, $current_line ); | ||||
| 20195 | |||||
| 20196 | # -------------------------------------------------------------------- | ||||
| 20197 | # If the new line has more fields than the current group, | ||||
| 20198 | # see if we can match the first fields and combine the remaining | ||||
| 20199 | # fields of the new line. | ||||
| 20200 | # -------------------------------------------------------------------- | ||||
| 20201 | eliminate_new_fields( $new_line, $current_line ); | ||||
| 20202 | |||||
| 20203 | # -------------------------------------------------------------------- | ||||
| 20204 | # Flush previous group unless all common tokens and patterns match.. | ||||
| 20205 | # -------------------------------------------------------------------- | ||||
| 20206 | check_match( $new_line, $current_line ); | ||||
| 20207 | |||||
| 20208 | # -------------------------------------------------------------------- | ||||
| 20209 | # See if there is space for this line in the current group (if any) | ||||
| 20210 | # -------------------------------------------------------------------- | ||||
| 20211 | if ($current_line) { | ||||
| 20212 | check_fit( $new_line, $current_line ); | ||||
| 20213 | } | ||||
| 20214 | } | ||||
| 20215 | |||||
| 20216 | # -------------------------------------------------------------------- | ||||
| 20217 | # Append this line to the current group (or start new group) | ||||
| 20218 | # -------------------------------------------------------------------- | ||||
| 20219 | add_to_group($new_line); | ||||
| 20220 | |||||
| 20221 | # Future update to allow this to vary: | ||||
| 20222 | $current_line = $new_line if ( $maximum_line_index == 0 ); | ||||
| 20223 | |||||
| 20224 | # output this group if it ends in a terminal else or ternary line | ||||
| 20225 | if ( defined($j_terminal_match) ) { | ||||
| 20226 | |||||
| 20227 | # if there is only one line in the group (maybe due to failure to match | ||||
| 20228 | # perfectly with previous lines), then align the ? or { of this | ||||
| 20229 | # terminal line with the previous one unless that would make the line | ||||
| 20230 | # too long | ||||
| 20231 | if ( $maximum_line_index == 0 ) { | ||||
| 20232 | my $col_now = $current_line->get_column($j_terminal_match); | ||||
| 20233 | my $pad = $col_matching_terminal - $col_now; | ||||
| 20234 | my $padding_available = | ||||
| 20235 | $current_line->get_available_space_on_right(); | ||||
| 20236 | if ( $pad > 0 && $pad <= $padding_available ) { | ||||
| 20237 | $current_line->increase_field_width( $j_terminal_match, $pad ); | ||||
| 20238 | } | ||||
| 20239 | } | ||||
| 20240 | my_flush(); | ||||
| 20241 | $is_matching_terminal_line = 0; | ||||
| 20242 | } | ||||
| 20243 | |||||
| 20244 | # -------------------------------------------------------------------- | ||||
| 20245 | # Step 8. Some old debugging stuff | ||||
| 20246 | # -------------------------------------------------------------------- | ||||
| 20247 | VALIGN_DEBUG_FLAG_APPEND && do { | ||||
| 20248 | print STDOUT "APPEND fields:"; | ||||
| 20249 | dump_array(@$rfields); | ||||
| 20250 | print STDOUT "APPEND tokens:"; | ||||
| 20251 | dump_array(@$rtokens); | ||||
| 20252 | print STDOUT "APPEND patterns:"; | ||||
| 20253 | dump_array(@$rpatterns); | ||||
| 20254 | dump_alignments(); | ||||
| 20255 | }; | ||||
| 20256 | |||||
| 20257 | return; | ||||
| 20258 | } | ||||
| 20259 | |||||
| 20260 | sub join_hanging_comment { | ||||
| 20261 | |||||
| 20262 | my $line = shift; | ||||
| 20263 | my $jmax = $line->get_jmax(); | ||||
| 20264 | return 0 unless $jmax == 1; # must be 2 fields | ||||
| 20265 | my $rtokens = $line->get_rtokens(); | ||||
| 20266 | return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. | ||||
| 20267 | my $rfields = $line->get_rfields(); | ||||
| 20268 | return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... | ||||
| 20269 | my $old_line = shift; | ||||
| 20270 | my $maximum_field_index = $old_line->get_jmax(); | ||||
| 20271 | return 0 | ||||
| 20272 | unless $maximum_field_index > $jmax; # the current line has more fields | ||||
| 20273 | my $rpatterns = $line->get_rpatterns(); | ||||
| 20274 | |||||
| 20275 | $line->set_is_hanging_side_comment(1); | ||||
| 20276 | $jmax = $maximum_field_index; | ||||
| 20277 | $line->set_jmax($jmax); | ||||
| 20278 | $$rfields[$jmax] = $$rfields[1]; | ||||
| 20279 | $$rtokens[ $jmax - 1 ] = $$rtokens[0]; | ||||
| 20280 | $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; | ||||
| 20281 | for ( my $j = 1 ; $j < $jmax ; $j++ ) { | ||||
| 20282 | $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? | ||||
| 20283 | $$rtokens[ $j - 1 ] = ""; | ||||
| 20284 | $$rpatterns[ $j - 1 ] = ""; | ||||
| 20285 | } | ||||
| 20286 | return 1; | ||||
| 20287 | } | ||||
| 20288 | |||||
| 20289 | sub eliminate_old_fields { | ||||
| 20290 | |||||
| 20291 | my $new_line = shift; | ||||
| 20292 | my $jmax = $new_line->get_jmax(); | ||||
| 20293 | if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } | ||||
| 20294 | if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } | ||||
| 20295 | |||||
| 20296 | # there must be one previous line | ||||
| 20297 | return unless ( $maximum_line_index == 0 ); | ||||
| 20298 | |||||
| 20299 | my $old_line = shift; | ||||
| 20300 | my $maximum_field_index = $old_line->get_jmax(); | ||||
| 20301 | |||||
| 20302 | ############################################### | ||||
| 20303 | # this line must have fewer fields | ||||
| 20304 | return unless $maximum_field_index > $jmax; | ||||
| 20305 | ############################################### | ||||
| 20306 | |||||
| 20307 | # Identify specific cases where field elimination is allowed: | ||||
| 20308 | # case=1: both lines have comma-separated lists, and the first | ||||
| 20309 | # line has an equals | ||||
| 20310 | # case=2: both lines have leading equals | ||||
| 20311 | |||||
| 20312 | # case 1 is the default | ||||
| 20313 | my $case = 1; | ||||
| 20314 | |||||
| 20315 | # See if case 2: both lines have leading '=' | ||||
| 20316 | # We'll require similar leading patterns in this case | ||||
| 20317 | my $old_rtokens = $old_line->get_rtokens(); | ||||
| 20318 | my $rtokens = $new_line->get_rtokens(); | ||||
| 20319 | my $rpatterns = $new_line->get_rpatterns(); | ||||
| 20320 | my $old_rpatterns = $old_line->get_rpatterns(); | ||||
| 20321 | if ( $rtokens->[0] =~ /^=\d*$/ | ||||
| 20322 | && $old_rtokens->[0] eq $rtokens->[0] | ||||
| 20323 | && $old_rpatterns->[0] eq $rpatterns->[0] ) | ||||
| 20324 | { | ||||
| 20325 | $case = 2; | ||||
| 20326 | } | ||||
| 20327 | |||||
| 20328 | # not too many fewer fields in new line for case 1 | ||||
| 20329 | return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); | ||||
| 20330 | |||||
| 20331 | # case 1 must have side comment | ||||
| 20332 | my $old_rfields = $old_line->get_rfields(); | ||||
| 20333 | return | ||||
| 20334 | if ( $case == 1 | ||||
| 20335 | && length( $$old_rfields[$maximum_field_index] ) == 0 ); | ||||
| 20336 | |||||
| 20337 | my $rfields = $new_line->get_rfields(); | ||||
| 20338 | |||||
| 20339 | my $hid_equals = 0; | ||||
| 20340 | |||||
| 20341 | my @new_alignments = (); | ||||
| 20342 | my @new_fields = (); | ||||
| 20343 | my @new_matching_patterns = (); | ||||
| 20344 | my @new_matching_tokens = (); | ||||
| 20345 | |||||
| 20346 | my $j = 0; | ||||
| 20347 | my $k; | ||||
| 20348 | my $current_field = ''; | ||||
| 20349 | my $current_pattern = ''; | ||||
| 20350 | |||||
| 20351 | # loop over all old tokens | ||||
| 20352 | my $in_match = 0; | ||||
| 20353 | for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { | ||||
| 20354 | $current_field .= $$old_rfields[$k]; | ||||
| 20355 | $current_pattern .= $$old_rpatterns[$k]; | ||||
| 20356 | last if ( $j > $jmax - 1 ); | ||||
| 20357 | |||||
| 20358 | if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { | ||||
| 20359 | $in_match = 1; | ||||
| 20360 | $new_fields[$j] = $current_field; | ||||
| 20361 | $new_matching_patterns[$j] = $current_pattern; | ||||
| 20362 | $current_field = ''; | ||||
| 20363 | $current_pattern = ''; | ||||
| 20364 | $new_matching_tokens[$j] = $$old_rtokens[$k]; | ||||
| 20365 | $new_alignments[$j] = $old_line->get_alignment($k); | ||||
| 20366 | $j++; | ||||
| 20367 | } | ||||
| 20368 | else { | ||||
| 20369 | |||||
| 20370 | if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { | ||||
| 20371 | last if ( $case == 2 ); # avoid problems with stuff | ||||
| 20372 | # like: $a=$b=$c=$d; | ||||
| 20373 | $hid_equals = 1; | ||||
| 20374 | } | ||||
| 20375 | last | ||||
| 20376 | if ( $in_match && $case == 1 ) | ||||
| 20377 | ; # disallow gaps in matching field types in case 1 | ||||
| 20378 | } | ||||
| 20379 | } | ||||
| 20380 | |||||
| 20381 | # Modify the current state if we are successful. | ||||
| 20382 | # We must exactly reach the ends of both lists for success. | ||||
| 20383 | if ( ( $j == $jmax ) | ||||
| 20384 | && ( $current_field eq '' ) | ||||
| 20385 | && ( $case != 1 || $hid_equals ) ) | ||||
| 20386 | { | ||||
| 20387 | $k = $maximum_field_index; | ||||
| 20388 | $current_field .= $$old_rfields[$k]; | ||||
| 20389 | $current_pattern .= $$old_rpatterns[$k]; | ||||
| 20390 | $new_fields[$j] = $current_field; | ||||
| 20391 | $new_matching_patterns[$j] = $current_pattern; | ||||
| 20392 | |||||
| 20393 | $new_alignments[$j] = $old_line->get_alignment($k); | ||||
| 20394 | $maximum_field_index = $j; | ||||
| 20395 | |||||
| 20396 | $old_line->set_alignments(@new_alignments); | ||||
| 20397 | $old_line->set_jmax($jmax); | ||||
| 20398 | $old_line->set_rtokens( \@new_matching_tokens ); | ||||
| 20399 | $old_line->set_rfields( \@new_fields ); | ||||
| 20400 | $old_line->set_rpatterns( \@$rpatterns ); | ||||
| 20401 | } | ||||
| 20402 | } | ||||
| 20403 | |||||
| 20404 | # create an empty side comment if none exists | ||||
| 20405 | sub make_side_comment { | ||||
| 20406 | my $new_line = shift; | ||||
| 20407 | my $level_end = shift; | ||||
| 20408 | my $jmax = $new_line->get_jmax(); | ||||
| 20409 | my $rtokens = $new_line->get_rtokens(); | ||||
| 20410 | |||||
| 20411 | # if line does not have a side comment... | ||||
| 20412 | if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { | ||||
| 20413 | my $rfields = $new_line->get_rfields(); | ||||
| 20414 | my $rpatterns = $new_line->get_rpatterns(); | ||||
| 20415 | $$rtokens[$jmax] = '#'; | ||||
| 20416 | $$rfields[ ++$jmax ] = ''; | ||||
| 20417 | $$rpatterns[$jmax] = '#'; | ||||
| 20418 | $new_line->set_jmax($jmax); | ||||
| 20419 | $new_line->set_jmax_original_line($jmax); | ||||
| 20420 | } | ||||
| 20421 | |||||
| 20422 | # line has a side comment.. | ||||
| 20423 | else { | ||||
| 20424 | |||||
| 20425 | # don't remember old side comment location for very long | ||||
| 20426 | my $line_number = $vertical_aligner_self->get_output_line_number(); | ||||
| 20427 | my $rfields = $new_line->get_rfields(); | ||||
| 20428 | if ( | ||||
| 20429 | $line_number - $last_side_comment_line_number > 12 | ||||
| 20430 | |||||
| 20431 | # and don't remember comment location across block level changes | ||||
| 20432 | || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) | ||||
| 20433 | ) | ||||
| 20434 | { | ||||
| 20435 | forget_side_comment(); | ||||
| 20436 | } | ||||
| 20437 | $last_side_comment_line_number = $line_number; | ||||
| 20438 | $last_side_comment_level = $level_end; | ||||
| 20439 | } | ||||
| 20440 | } | ||||
| 20441 | |||||
| 20442 | sub decide_if_list { | ||||
| 20443 | |||||
| 20444 | my $line = shift; | ||||
| 20445 | |||||
| 20446 | # A list will be taken to be a line with a forced break in which all | ||||
| 20447 | # of the field separators are commas or comma-arrows (except for the | ||||
| 20448 | # trailing #) | ||||
| 20449 | |||||
| 20450 | # List separator tokens are things like ',3' or '=>2', | ||||
| 20451 | # where the trailing digit is the nesting depth. Allow braces | ||||
| 20452 | # to allow nested list items. | ||||
| 20453 | my $rtokens = $line->get_rtokens(); | ||||
| 20454 | my $test_token = $$rtokens[0]; | ||||
| 20455 | if ( $test_token =~ /^(\,|=>)/ ) { | ||||
| 20456 | my $list_type = $test_token; | ||||
| 20457 | my $jmax = $line->get_jmax(); | ||||
| 20458 | |||||
| 20459 | foreach ( 1 .. $jmax - 2 ) { | ||||
| 20460 | if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { | ||||
| 20461 | $list_type = ""; | ||||
| 20462 | last; | ||||
| 20463 | } | ||||
| 20464 | } | ||||
| 20465 | $line->set_list_type($list_type); | ||||
| 20466 | } | ||||
| 20467 | } | ||||
| 20468 | |||||
| 20469 | sub eliminate_new_fields { | ||||
| 20470 | |||||
| 20471 | return unless ( $maximum_line_index >= 0 ); | ||||
| 20472 | my ( $new_line, $old_line ) = @_; | ||||
| 20473 | my $jmax = $new_line->get_jmax(); | ||||
| 20474 | |||||
| 20475 | my $old_rtokens = $old_line->get_rtokens(); | ||||
| 20476 | my $rtokens = $new_line->get_rtokens(); | ||||
| 20477 | my $is_assignment = | ||||
| 20478 | ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); | ||||
| 20479 | |||||
| 20480 | # must be monotonic variation | ||||
| 20481 | return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); | ||||
| 20482 | |||||
| 20483 | # must be more fields in the new line | ||||
| 20484 | my $maximum_field_index = $old_line->get_jmax(); | ||||
| 20485 | return unless ( $maximum_field_index < $jmax ); | ||||
| 20486 | |||||
| 20487 | unless ($is_assignment) { | ||||
| 20488 | return | ||||
| 20489 | unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) | ||||
| 20490 | ; # only if monotonic | ||||
| 20491 | |||||
| 20492 | # never combine fields of a comma list | ||||
| 20493 | return | ||||
| 20494 | unless ( $maximum_field_index > 1 ) | ||||
| 20495 | && ( $new_line->get_list_type() !~ /^,/ ); | ||||
| 20496 | } | ||||
| 20497 | |||||
| 20498 | my $rfields = $new_line->get_rfields(); | ||||
| 20499 | my $rpatterns = $new_line->get_rpatterns(); | ||||
| 20500 | my $old_rpatterns = $old_line->get_rpatterns(); | ||||
| 20501 | |||||
| 20502 | # loop over all OLD tokens except comment and check match | ||||
| 20503 | my $match = 1; | ||||
| 20504 | my $k; | ||||
| 20505 | for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { | ||||
| 20506 | if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) | ||||
| 20507 | || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) | ||||
| 20508 | { | ||||
| 20509 | $match = 0; | ||||
| 20510 | last; | ||||
| 20511 | } | ||||
| 20512 | } | ||||
| 20513 | |||||
| 20514 | # first tokens agree, so combine extra new tokens | ||||
| 20515 | if ($match) { | ||||
| 20516 | for $k ( $maximum_field_index .. $jmax - 1 ) { | ||||
| 20517 | |||||
| 20518 | $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; | ||||
| 20519 | $$rfields[$k] = ""; | ||||
| 20520 | $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; | ||||
| 20521 | $$rpatterns[$k] = ""; | ||||
| 20522 | } | ||||
| 20523 | |||||
| 20524 | $$rtokens[ $maximum_field_index - 1 ] = '#'; | ||||
| 20525 | $$rfields[$maximum_field_index] = $$rfields[$jmax]; | ||||
| 20526 | $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; | ||||
| 20527 | $jmax = $maximum_field_index; | ||||
| 20528 | } | ||||
| 20529 | $new_line->set_jmax($jmax); | ||||
| 20530 | } | ||||
| 20531 | |||||
| 20532 | sub fix_terminal_ternary { | ||||
| 20533 | |||||
| 20534 | # Add empty fields as necessary to align a ternary term | ||||
| 20535 | # like this: | ||||
| 20536 | # | ||||
| 20537 | # my $leapyear = | ||||
| 20538 | # $year % 4 ? 0 | ||||
| 20539 | # : $year % 100 ? 1 | ||||
| 20540 | # : $year % 400 ? 0 | ||||
| 20541 | # : 1; | ||||
| 20542 | # | ||||
| 20543 | # returns 1 if the terminal item should be indented | ||||
| 20544 | |||||
| 20545 | my ( $rfields, $rtokens, $rpatterns ) = @_; | ||||
| 20546 | |||||
| 20547 | my $jmax = @{$rfields} - 1; | ||||
| 20548 | my $old_line = $group_lines[$maximum_line_index]; | ||||
| 20549 | my $rfields_old = $old_line->get_rfields(); | ||||
| 20550 | |||||
| 20551 | my $rpatterns_old = $old_line->get_rpatterns(); | ||||
| 20552 | my $rtokens_old = $old_line->get_rtokens(); | ||||
| 20553 | my $maximum_field_index = $old_line->get_jmax(); | ||||
| 20554 | |||||
| 20555 | # look for the question mark after the : | ||||
| 20556 | my ($jquestion); | ||||
| 20557 | my $depth_question; | ||||
| 20558 | my $pad = ""; | ||||
| 20559 | for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) { | ||||
| 20560 | my $tok = $rtokens_old->[$j]; | ||||
| 20561 | if ( $tok =~ /^\?(\d+)$/ ) { | ||||
| 20562 | $depth_question = $1; | ||||
| 20563 | |||||
| 20564 | # depth must be correct | ||||
| 20565 | next unless ( $depth_question eq $group_level ); | ||||
| 20566 | |||||
| 20567 | $jquestion = $j; | ||||
| 20568 | if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { | ||||
| 20569 | $pad = " " x length($1); | ||||
| 20570 | } | ||||
| 20571 | else { | ||||
| 20572 | return; # shouldn't happen | ||||
| 20573 | } | ||||
| 20574 | last; | ||||
| 20575 | } | ||||
| 20576 | } | ||||
| 20577 | return unless ( defined($jquestion) ); # shouldn't happen | ||||
| 20578 | |||||
| 20579 | # Now splice the tokens and patterns of the previous line | ||||
| 20580 | # into the else line to insure a match. Add empty fields | ||||
| 20581 | # as necessary. | ||||
| 20582 | my $jadd = $jquestion; | ||||
| 20583 | |||||
| 20584 | # Work on copies of the actual arrays in case we have | ||||
| 20585 | # to return due to an error | ||||
| 20586 | my @fields = @{$rfields}; | ||||
| 20587 | my @patterns = @{$rpatterns}; | ||||
| 20588 | my @tokens = @{$rtokens}; | ||||
| 20589 | |||||
| 20590 | VALIGN_DEBUG_FLAG_TERNARY && do { | ||||
| 20591 | local $" = '><'; | ||||
| 20592 | print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; | ||||
| 20593 | print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; | ||||
| 20594 | print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; | ||||
| 20595 | print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; | ||||
| 20596 | print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; | ||||
| 20597 | print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; | ||||
| 20598 | }; | ||||
| 20599 | |||||
| 20600 | # handle cases of leading colon on this line | ||||
| 20601 | if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { | ||||
| 20602 | |||||
| 20603 | my ( $colon, $therest ) = ( $1, $2 ); | ||||
| 20604 | |||||
| 20605 | # Handle sub-case of first field with leading colon plus additional code | ||||
| 20606 | # This is the usual situation as at the '1' below: | ||||
| 20607 | # ... | ||||
| 20608 | # : $year % 400 ? 0 | ||||
| 20609 | # : 1; | ||||
| 20610 | if ($therest) { | ||||
| 20611 | |||||
| 20612 | # Split the first field after the leading colon and insert padding. | ||||
| 20613 | # Note that this padding will remain even if the terminal value goes | ||||
| 20614 | # out on a separate line. This does not seem to look to bad, so no | ||||
| 20615 | # mechanism has been included to undo it. | ||||
| 20616 | my $field1 = shift @fields; | ||||
| 20617 | unshift @fields, ( $colon, $pad . $therest ); | ||||
| 20618 | |||||
| 20619 | # change the leading pattern from : to ? | ||||
| 20620 | return unless ( $patterns[0] =~ s/^\:/?/ ); | ||||
| 20621 | |||||
| 20622 | # install leading tokens and patterns of existing line | ||||
| 20623 | unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); | ||||
| 20624 | unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); | ||||
| 20625 | |||||
| 20626 | # insert appropriate number of empty fields | ||||
| 20627 | splice( @fields, 1, 0, ('') x $jadd ) if $jadd; | ||||
| 20628 | } | ||||
| 20629 | |||||
| 20630 | # handle sub-case of first field just equal to leading colon. | ||||
| 20631 | # This can happen for example in the example below where | ||||
| 20632 | # the leading '(' would create a new alignment token | ||||
| 20633 | # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) | ||||
| 20634 | # : ( $mname = $name . '->' ); | ||||
| 20635 | else { | ||||
| 20636 | |||||
| 20637 | return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen | ||||
| 20638 | |||||
| 20639 | # prepend a leading ? onto the second pattern | ||||
| 20640 | $patterns[1] = "?b" . $patterns[1]; | ||||
| 20641 | |||||
| 20642 | # pad the second field | ||||
| 20643 | $fields[1] = $pad . $fields[1]; | ||||
| 20644 | |||||
| 20645 | # install leading tokens and patterns of existing line, replacing | ||||
| 20646 | # leading token and inserting appropriate number of empty fields | ||||
| 20647 | splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); | ||||
| 20648 | splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); | ||||
| 20649 | splice( @fields, 1, 0, ('') x $jadd ) if $jadd; | ||||
| 20650 | } | ||||
| 20651 | } | ||||
| 20652 | |||||
| 20653 | # Handle case of no leading colon on this line. This will | ||||
| 20654 | # be the case when -wba=':' is used. For example, | ||||
| 20655 | # $year % 400 ? 0 : | ||||
| 20656 | # 1; | ||||
| 20657 | else { | ||||
| 20658 | |||||
| 20659 | # install leading tokens and patterns of existing line | ||||
| 20660 | $patterns[0] = '?' . 'b' . $patterns[0]; | ||||
| 20661 | unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); | ||||
| 20662 | unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); | ||||
| 20663 | |||||
| 20664 | # insert appropriate number of empty fields | ||||
| 20665 | $jadd = $jquestion + 1; | ||||
| 20666 | $fields[0] = $pad . $fields[0]; | ||||
| 20667 | splice( @fields, 0, 0, ('') x $jadd ) if $jadd; | ||||
| 20668 | } | ||||
| 20669 | |||||
| 20670 | VALIGN_DEBUG_FLAG_TERNARY && do { | ||||
| 20671 | local $" = '><'; | ||||
| 20672 | print STDOUT "MODIFIED TOKENS=<@tokens>\n"; | ||||
| 20673 | print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; | ||||
| 20674 | print STDOUT "MODIFIED FIELDS=<@fields>\n"; | ||||
| 20675 | }; | ||||
| 20676 | |||||
| 20677 | # all ok .. update the arrays | ||||
| 20678 | @{$rfields} = @fields; | ||||
| 20679 | @{$rtokens} = @tokens; | ||||
| 20680 | @{$rpatterns} = @patterns; | ||||
| 20681 | |||||
| 20682 | # force a flush after this line | ||||
| 20683 | return $jquestion; | ||||
| 20684 | } | ||||
| 20685 | |||||
| 20686 | sub fix_terminal_else { | ||||
| 20687 | |||||
| 20688 | # Add empty fields as necessary to align a balanced terminal | ||||
| 20689 | # else block to a previous if/elsif/unless block, | ||||
| 20690 | # like this: | ||||
| 20691 | # | ||||
| 20692 | # if ( 1 || $x ) { print "ok 13\n"; } | ||||
| 20693 | # else { print "not ok 13\n"; } | ||||
| 20694 | # | ||||
| 20695 | # returns 1 if the else block should be indented | ||||
| 20696 | # | ||||
| 20697 | my ( $rfields, $rtokens, $rpatterns ) = @_; | ||||
| 20698 | my $jmax = @{$rfields} - 1; | ||||
| 20699 | return unless ( $jmax > 0 ); | ||||
| 20700 | |||||
| 20701 | # check for balanced else block following if/elsif/unless | ||||
| 20702 | my $rfields_old = $current_line->get_rfields(); | ||||
| 20703 | |||||
| 20704 | # TBD: add handling for 'case' | ||||
| 20705 | return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); | ||||
| 20706 | |||||
| 20707 | # look for the opening brace after the else, and extract the depth | ||||
| 20708 | my $tok_brace = $rtokens->[0]; | ||||
| 20709 | my $depth_brace; | ||||
| 20710 | if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } | ||||
| 20711 | |||||
| 20712 | # probably: "else # side_comment" | ||||
| 20713 | else { return } | ||||
| 20714 | |||||
| 20715 | my $rpatterns_old = $current_line->get_rpatterns(); | ||||
| 20716 | my $rtokens_old = $current_line->get_rtokens(); | ||||
| 20717 | my $maximum_field_index = $current_line->get_jmax(); | ||||
| 20718 | |||||
| 20719 | # be sure the previous if/elsif is followed by an opening paren | ||||
| 20720 | my $jparen = 0; | ||||
| 20721 | my $tok_paren = '(' . $depth_brace; | ||||
| 20722 | my $tok_test = $rtokens_old->[$jparen]; | ||||
| 20723 | return unless ( $tok_test eq $tok_paren ); # shouldn't happen | ||||
| 20724 | |||||
| 20725 | # Now find the opening block brace | ||||
| 20726 | my ($jbrace); | ||||
| 20727 | for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) { | ||||
| 20728 | my $tok = $rtokens_old->[$j]; | ||||
| 20729 | if ( $tok eq $tok_brace ) { | ||||
| 20730 | $jbrace = $j; | ||||
| 20731 | last; | ||||
| 20732 | } | ||||
| 20733 | } | ||||
| 20734 | return unless ( defined($jbrace) ); # shouldn't happen | ||||
| 20735 | |||||
| 20736 | # Now splice the tokens and patterns of the previous line | ||||
| 20737 | # into the else line to insure a match. Add empty fields | ||||
| 20738 | # as necessary. | ||||
| 20739 | my $jadd = $jbrace - $jparen; | ||||
| 20740 | splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); | ||||
| 20741 | splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); | ||||
| 20742 | splice( @{$rfields}, 1, 0, ('') x $jadd ); | ||||
| 20743 | |||||
| 20744 | # force a flush after this line if it does not follow a case | ||||
| 20745 | return $jbrace | ||||
| 20746 | unless ( $rfields_old->[0] =~ /^case\s*$/ ); | ||||
| 20747 | } | ||||
| 20748 | |||||
| 20749 | { # sub check_match | ||||
| 20750 | 2 | 200ns | my %is_good_alignment; | ||
| 20751 | |||||
| 20752 | # spent 14µs within Perl::Tidy::VerticalAligner::BEGIN@20752 which was called:
# once (14µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 20759 | ||||
| 20753 | |||||
| 20754 | # Vertically aligning on certain "good" tokens is usually okay | ||||
| 20755 | # so we can be less restrictive in marginal cases. | ||||
| 20756 | 1 | 2µs | @_ = qw( { ? => = ); | ||
| 20757 | 1 | 1µs | push @_, (','); | ||
| 20758 | 1 | 13µs | @is_good_alignment{@_} = (1) x scalar(@_); | ||
| 20759 | 1 | 4.90ms | 1 | 14µs | } # spent 14µs making 1 call to Perl::Tidy::VerticalAligner::BEGIN@20752 |
| 20760 | |||||
| 20761 | sub check_match { | ||||
| 20762 | |||||
| 20763 | # See if the current line matches the current vertical alignment group. | ||||
| 20764 | # If not, flush the current group. | ||||
| 20765 | my $new_line = shift; | ||||
| 20766 | my $old_line = shift; | ||||
| 20767 | |||||
| 20768 | # uses global variables: | ||||
| 20769 | # $previous_minimum_jmax_seen | ||||
| 20770 | # $maximum_jmax_seen | ||||
| 20771 | # $maximum_line_index | ||||
| 20772 | # $marginal_match | ||||
| 20773 | my $jmax = $new_line->get_jmax(); | ||||
| 20774 | my $maximum_field_index = $old_line->get_jmax(); | ||||
| 20775 | |||||
| 20776 | # flush if this line has too many fields | ||||
| 20777 | if ( $jmax > $maximum_field_index ) { goto NO_MATCH } | ||||
| 20778 | |||||
| 20779 | # flush if adding this line would make a non-monotonic field count | ||||
| 20780 | if ( | ||||
| 20781 | ( $maximum_field_index > $jmax ) # this has too few fields | ||||
| 20782 | && ( | ||||
| 20783 | ( $previous_minimum_jmax_seen < | ||||
| 20784 | $jmax ) # and wouldn't be monotonic | ||||
| 20785 | || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) | ||||
| 20786 | ) | ||||
| 20787 | ) | ||||
| 20788 | { | ||||
| 20789 | goto NO_MATCH; | ||||
| 20790 | } | ||||
| 20791 | |||||
| 20792 | # otherwise see if this line matches the current group | ||||
| 20793 | my $jmax_original_line = $new_line->get_jmax_original_line(); | ||||
| 20794 | my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); | ||||
| 20795 | my $rtokens = $new_line->get_rtokens(); | ||||
| 20796 | my $rfields = $new_line->get_rfields(); | ||||
| 20797 | my $rpatterns = $new_line->get_rpatterns(); | ||||
| 20798 | my $list_type = $new_line->get_list_type(); | ||||
| 20799 | |||||
| 20800 | my $group_list_type = $old_line->get_list_type(); | ||||
| 20801 | my $old_rpatterns = $old_line->get_rpatterns(); | ||||
| 20802 | my $old_rtokens = $old_line->get_rtokens(); | ||||
| 20803 | |||||
| 20804 | my $jlimit = $jmax - 1; | ||||
| 20805 | if ( $maximum_field_index > $jmax ) { | ||||
| 20806 | $jlimit = $jmax_original_line; | ||||
| 20807 | --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); | ||||
| 20808 | } | ||||
| 20809 | |||||
| 20810 | # handle comma-separated lists .. | ||||
| 20811 | if ( $group_list_type && ( $list_type eq $group_list_type ) ) { | ||||
| 20812 | for my $j ( 0 .. $jlimit ) { | ||||
| 20813 | my $old_tok = $$old_rtokens[$j]; | ||||
| 20814 | next unless $old_tok; | ||||
| 20815 | my $new_tok = $$rtokens[$j]; | ||||
| 20816 | next unless $new_tok; | ||||
| 20817 | |||||
| 20818 | # lists always match ... | ||||
| 20819 | # unless they would align any '=>'s with ','s | ||||
| 20820 | goto NO_MATCH | ||||
| 20821 | if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ | ||||
| 20822 | || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); | ||||
| 20823 | } | ||||
| 20824 | } | ||||
| 20825 | |||||
| 20826 | # do detailed check for everything else except hanging side comments | ||||
| 20827 | elsif ( !$is_hanging_side_comment ) { | ||||
| 20828 | |||||
| 20829 | my $leading_space_count = $new_line->get_leading_space_count(); | ||||
| 20830 | |||||
| 20831 | my $max_pad = 0; | ||||
| 20832 | my $min_pad = 0; | ||||
| 20833 | my $saw_good_alignment; | ||||
| 20834 | |||||
| 20835 | for my $j ( 0 .. $jlimit ) { | ||||
| 20836 | |||||
| 20837 | my $old_tok = $$old_rtokens[$j]; | ||||
| 20838 | my $new_tok = $$rtokens[$j]; | ||||
| 20839 | |||||
| 20840 | # Note on encoding used for alignment tokens: | ||||
| 20841 | # ------------------------------------------- | ||||
| 20842 | # Tokens are "decorated" with information which can help | ||||
| 20843 | # prevent unwanted alignments. Consider for example the | ||||
| 20844 | # following two lines: | ||||
| 20845 | # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); | ||||
| 20846 | # local ( $i, $f ) = &'bdiv( $xn, $xd ); | ||||
| 20847 | # There are three alignment tokens in each line, a comma, | ||||
| 20848 | # an =, and a comma. In the first line these three tokens | ||||
| 20849 | # are encoded as: | ||||
| 20850 | # ,4+local-18 =3 ,4+split-7 | ||||
| 20851 | # and in the second line they are encoded as | ||||
| 20852 | # ,4+local-18 =3 ,4+&'bdiv-8 | ||||
| 20853 | # Tokens always at least have token name and nesting | ||||
| 20854 | # depth. So in this example the ='s are at depth 3 and | ||||
| 20855 | # the ,'s are at depth 4. This prevents aligning tokens | ||||
| 20856 | # of different depths. Commas contain additional | ||||
| 20857 | # information, as follows: | ||||
| 20858 | # , {depth} + {container name} - {spaces to opening paren} | ||||
| 20859 | # This allows us to reject matching the rightmost commas | ||||
| 20860 | # in the above two lines, since they are for different | ||||
| 20861 | # function calls. This encoding is done in | ||||
| 20862 | # 'sub send_lines_to_vertical_aligner'. | ||||
| 20863 | |||||
| 20864 | # Pick off actual token. | ||||
| 20865 | # Everything up to the first digit is the actual token. | ||||
| 20866 | my $alignment_token = $new_tok; | ||||
| 20867 | if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } | ||||
| 20868 | |||||
| 20869 | # see if the decorated tokens match | ||||
| 20870 | my $tokens_match = $new_tok eq $old_tok | ||||
| 20871 | |||||
| 20872 | # Exception for matching terminal : of ternary statement.. | ||||
| 20873 | # consider containers prefixed by ? and : a match | ||||
| 20874 | || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); | ||||
| 20875 | |||||
| 20876 | # No match if the alignment tokens differ... | ||||
| 20877 | if ( !$tokens_match ) { | ||||
| 20878 | |||||
| 20879 | # ...Unless this is a side comment | ||||
| 20880 | if ( | ||||
| 20881 | $j == $jlimit | ||||
| 20882 | |||||
| 20883 | # and there is either at least one alignment token | ||||
| 20884 | # or this is a single item following a list. This | ||||
| 20885 | # latter rule is required for 'December' to join | ||||
| 20886 | # the following list: | ||||
| 20887 | # my (@months) = ( | ||||
| 20888 | # '', 'January', 'February', 'March', | ||||
| 20889 | # 'April', 'May', 'June', 'July', | ||||
| 20890 | # 'August', 'September', 'October', 'November', | ||||
| 20891 | # 'December' | ||||
| 20892 | # ); | ||||
| 20893 | # If it doesn't then the -lp formatting will fail. | ||||
| 20894 | && ( $j > 0 || $old_tok =~ /^,/ ) | ||||
| 20895 | ) | ||||
| 20896 | { | ||||
| 20897 | $marginal_match = 1 | ||||
| 20898 | if ( $marginal_match == 0 | ||||
| 20899 | && $maximum_line_index == 0 ); | ||||
| 20900 | last; | ||||
| 20901 | } | ||||
| 20902 | |||||
| 20903 | goto NO_MATCH; | ||||
| 20904 | } | ||||
| 20905 | |||||
| 20906 | # Calculate amount of padding required to fit this in. | ||||
| 20907 | # $pad is the number of spaces by which we must increase | ||||
| 20908 | # the current field to squeeze in this field. | ||||
| 20909 | my $pad = | ||||
| 20910 | length( $$rfields[$j] ) - $old_line->current_field_width($j); | ||||
| 20911 | if ( $j == 0 ) { $pad += $leading_space_count; } | ||||
| 20912 | |||||
| 20913 | # remember max pads to limit marginal cases | ||||
| 20914 | if ( $alignment_token ne '#' ) { | ||||
| 20915 | if ( $pad > $max_pad ) { $max_pad = $pad } | ||||
| 20916 | if ( $pad < $min_pad ) { $min_pad = $pad } | ||||
| 20917 | } | ||||
| 20918 | if ( $is_good_alignment{$alignment_token} ) { | ||||
| 20919 | $saw_good_alignment = 1; | ||||
| 20920 | } | ||||
| 20921 | |||||
| 20922 | # If patterns don't match, we have to be careful... | ||||
| 20923 | if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { | ||||
| 20924 | |||||
| 20925 | # flag this as a marginal match since patterns differ | ||||
| 20926 | $marginal_match = 1 | ||||
| 20927 | if ( $marginal_match == 0 && $maximum_line_index == 0 ); | ||||
| 20928 | |||||
| 20929 | # We have to be very careful about aligning commas | ||||
| 20930 | # when the pattern's don't match, because it can be | ||||
| 20931 | # worse to create an alignment where none is needed | ||||
| 20932 | # than to omit one. Here's an example where the ','s | ||||
| 20933 | # are not in named containers. The first line below | ||||
| 20934 | # should not match the next two: | ||||
| 20935 | # ( $a, $b ) = ( $b, $r ); | ||||
| 20936 | # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); | ||||
| 20937 | # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); | ||||
| 20938 | if ( $alignment_token eq ',' ) { | ||||
| 20939 | |||||
| 20940 | # do not align commas unless they are in named containers | ||||
| 20941 | goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); | ||||
| 20942 | } | ||||
| 20943 | |||||
| 20944 | # do not align parens unless patterns match; | ||||
| 20945 | # large ugly spaces can occur in math expressions. | ||||
| 20946 | elsif ( $alignment_token eq '(' ) { | ||||
| 20947 | |||||
| 20948 | # But we can allow a match if the parens don't | ||||
| 20949 | # require any padding. | ||||
| 20950 | if ( $pad != 0 ) { goto NO_MATCH } | ||||
| 20951 | } | ||||
| 20952 | |||||
| 20953 | # Handle an '=' alignment with different patterns to | ||||
| 20954 | # the left. | ||||
| 20955 | elsif ( $alignment_token eq '=' ) { | ||||
| 20956 | |||||
| 20957 | # It is best to be a little restrictive when | ||||
| 20958 | # aligning '=' tokens. Here is an example of | ||||
| 20959 | # two lines that we will not align: | ||||
| 20960 | # my $variable=6; | ||||
| 20961 | # $bb=4; | ||||
| 20962 | # The problem is that one is a 'my' declaration, | ||||
| 20963 | # and the other isn't, so they're not very similar. | ||||
| 20964 | # We will filter these out by comparing the first | ||||
| 20965 | # letter of the pattern. This is crude, but works | ||||
| 20966 | # well enough. | ||||
| 20967 | if ( | ||||
| 20968 | substr( $$old_rpatterns[$j], 0, 1 ) ne | ||||
| 20969 | substr( $$rpatterns[$j], 0, 1 ) ) | ||||
| 20970 | { | ||||
| 20971 | goto NO_MATCH; | ||||
| 20972 | } | ||||
| 20973 | |||||
| 20974 | # If we pass that test, we'll call it a marginal match. | ||||
| 20975 | # Here is an example of a marginal match: | ||||
| 20976 | # $done{$$op} = 1; | ||||
| 20977 | # $op = compile_bblock($op); | ||||
| 20978 | # The left tokens are both identifiers, but | ||||
| 20979 | # one accesses a hash and the other doesn't. | ||||
| 20980 | # We'll let this be a tentative match and undo | ||||
| 20981 | # it later if we don't find more than 2 lines | ||||
| 20982 | # in the group. | ||||
| 20983 | elsif ( $maximum_line_index == 0 ) { | ||||
| 20984 | $marginal_match = | ||||
| 20985 | 2; # =2 prevents being undone below | ||||
| 20986 | } | ||||
| 20987 | } | ||||
| 20988 | } | ||||
| 20989 | |||||
| 20990 | # Don't let line with fewer fields increase column widths | ||||
| 20991 | # ( align3.t ) | ||||
| 20992 | if ( $maximum_field_index > $jmax ) { | ||||
| 20993 | |||||
| 20994 | # Exception: suspend this rule to allow last lines to join | ||||
| 20995 | if ( $pad > 0 ) { goto NO_MATCH; } | ||||
| 20996 | } | ||||
| 20997 | } ## end for my $j ( 0 .. $jlimit) | ||||
| 20998 | |||||
| 20999 | # Turn off the "marginal match" flag in some cases... | ||||
| 21000 | # A "marginal match" occurs when the alignment tokens agree | ||||
| 21001 | # but there are differences in the other tokens (patterns). | ||||
| 21002 | # If we leave the marginal match flag set, then the rule is that we | ||||
| 21003 | # will align only if there are more than two lines in the group. | ||||
| 21004 | # We will turn of the flag if we almost have a match | ||||
| 21005 | # and either we have seen a good alignment token or we | ||||
| 21006 | # just need a small pad (2 spaces) to fit. These rules are | ||||
| 21007 | # the result of experimentation. Tokens which misaligned by just | ||||
| 21008 | # one or two characters are annoying. On the other hand, | ||||
| 21009 | # large gaps to less important alignment tokens are also annoying. | ||||
| 21010 | if ( $marginal_match == 1 | ||||
| 21011 | && $jmax == $maximum_field_index | ||||
| 21012 | && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) | ||||
| 21013 | ) | ||||
| 21014 | { | ||||
| 21015 | $marginal_match = 0; | ||||
| 21016 | } | ||||
| 21017 | ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; | ||||
| 21018 | } | ||||
| 21019 | |||||
| 21020 | # We have a match (even if marginal). | ||||
| 21021 | # If the current line has fewer fields than the current group | ||||
| 21022 | # but otherwise matches, copy the remaining group fields to | ||||
| 21023 | # make it a perfect match. | ||||
| 21024 | if ( $maximum_field_index > $jmax ) { | ||||
| 21025 | my $comment = $$rfields[$jmax]; | ||||
| 21026 | for $jmax ( $jlimit .. $maximum_field_index ) { | ||||
| 21027 | $$rtokens[$jmax] = $$old_rtokens[$jmax]; | ||||
| 21028 | $$rfields[ ++$jmax ] = ''; | ||||
| 21029 | $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; | ||||
| 21030 | } | ||||
| 21031 | $$rfields[$jmax] = $comment; | ||||
| 21032 | $new_line->set_jmax($jmax); | ||||
| 21033 | } | ||||
| 21034 | return; | ||||
| 21035 | |||||
| 21036 | NO_MATCH: | ||||
| 21037 | ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; | ||||
| 21038 | my_flush(); | ||||
| 21039 | return; | ||||
| 21040 | } | ||||
| 21041 | } | ||||
| 21042 | |||||
| 21043 | sub check_fit { | ||||
| 21044 | |||||
| 21045 | return unless ( $maximum_line_index >= 0 ); | ||||
| 21046 | my $new_line = shift; | ||||
| 21047 | my $old_line = shift; | ||||
| 21048 | |||||
| 21049 | my $jmax = $new_line->get_jmax(); | ||||
| 21050 | my $leading_space_count = $new_line->get_leading_space_count(); | ||||
| 21051 | my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); | ||||
| 21052 | my $rtokens = $new_line->get_rtokens(); | ||||
| 21053 | my $rfields = $new_line->get_rfields(); | ||||
| 21054 | my $rpatterns = $new_line->get_rpatterns(); | ||||
| 21055 | |||||
| 21056 | my $group_list_type = $group_lines[0]->get_list_type(); | ||||
| 21057 | |||||
| 21058 | my $padding_so_far = 0; | ||||
| 21059 | my $padding_available = $old_line->get_available_space_on_right(); | ||||
| 21060 | |||||
| 21061 | # save current columns in case this doesn't work | ||||
| 21062 | save_alignment_columns(); | ||||
| 21063 | |||||
| 21064 | my ( $j, $pad, $eight ); | ||||
| 21065 | my $maximum_field_index = $old_line->get_jmax(); | ||||
| 21066 | for $j ( 0 .. $jmax ) { | ||||
| 21067 | |||||
| 21068 | $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); | ||||
| 21069 | |||||
| 21070 | if ( $j == 0 ) { | ||||
| 21071 | $pad += $leading_space_count; | ||||
| 21072 | } | ||||
| 21073 | |||||
| 21074 | # remember largest gap of the group, excluding gap to side comment | ||||
| 21075 | if ( $pad < 0 | ||||
| 21076 | && $group_maximum_gap < -$pad | ||||
| 21077 | && $j > 0 | ||||
| 21078 | && $j < $jmax - 1 ) | ||||
| 21079 | { | ||||
| 21080 | $group_maximum_gap = -$pad; | ||||
| 21081 | } | ||||
| 21082 | |||||
| 21083 | next if $pad < 0; | ||||
| 21084 | |||||
| 21085 | ## This patch helps sometimes, but it doesn't check to see if | ||||
| 21086 | ## the line is too long even without the side comment. It needs | ||||
| 21087 | ## to be reworked. | ||||
| 21088 | ##don't let a long token with no trailing side comment push | ||||
| 21089 | ##side comments out, or end a group. (sidecmt1.t) | ||||
| 21090 | ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0); | ||||
| 21091 | |||||
| 21092 | # This line will need space; lets see if we want to accept it.. | ||||
| 21093 | if ( | ||||
| 21094 | |||||
| 21095 | # not if this won't fit | ||||
| 21096 | ( $pad > $padding_available ) | ||||
| 21097 | |||||
| 21098 | # previously, there were upper bounds placed on padding here | ||||
| 21099 | # (maximum_whitespace_columns), but they were not really helpful | ||||
| 21100 | |||||
| 21101 | ) | ||||
| 21102 | { | ||||
| 21103 | |||||
| 21104 | # revert to starting state then flush; things didn't work out | ||||
| 21105 | restore_alignment_columns(); | ||||
| 21106 | my_flush(); | ||||
| 21107 | last; | ||||
| 21108 | } | ||||
| 21109 | |||||
| 21110 | # patch to avoid excessive gaps in previous lines, | ||||
| 21111 | # due to a line of fewer fields. | ||||
| 21112 | # return join( ".", | ||||
| 21113 | # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, | ||||
| 21114 | # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); | ||||
| 21115 | next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); | ||||
| 21116 | |||||
| 21117 | # looks ok, squeeze this field in | ||||
| 21118 | $old_line->increase_field_width( $j, $pad ); | ||||
| 21119 | $padding_available -= $pad; | ||||
| 21120 | |||||
| 21121 | # remember largest gap of the group, excluding gap to side comment | ||||
| 21122 | if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { | ||||
| 21123 | $group_maximum_gap = $pad; | ||||
| 21124 | } | ||||
| 21125 | } | ||||
| 21126 | } | ||||
| 21127 | |||||
| 21128 | sub add_to_group { | ||||
| 21129 | |||||
| 21130 | # The current line either starts a new alignment group or is | ||||
| 21131 | # accepted into the current alignment group. | ||||
| 21132 | my $new_line = shift; | ||||
| 21133 | $group_lines[ ++$maximum_line_index ] = $new_line; | ||||
| 21134 | |||||
| 21135 | # initialize field lengths if starting new group | ||||
| 21136 | if ( $maximum_line_index == 0 ) { | ||||
| 21137 | |||||
| 21138 | my $jmax = $new_line->get_jmax(); | ||||
| 21139 | my $rfields = $new_line->get_rfields(); | ||||
| 21140 | my $rtokens = $new_line->get_rtokens(); | ||||
| 21141 | my $j; | ||||
| 21142 | my $col = $new_line->get_leading_space_count(); | ||||
| 21143 | |||||
| 21144 | for $j ( 0 .. $jmax ) { | ||||
| 21145 | $col += length( $$rfields[$j] ); | ||||
| 21146 | |||||
| 21147 | # create initial alignments for the new group | ||||
| 21148 | my $token = ""; | ||||
| 21149 | if ( $j < $jmax ) { $token = $$rtokens[$j] } | ||||
| 21150 | my $alignment = make_alignment( $col, $token ); | ||||
| 21151 | $new_line->set_alignment( $j, $alignment ); | ||||
| 21152 | } | ||||
| 21153 | |||||
| 21154 | $maximum_jmax_seen = $jmax; | ||||
| 21155 | $minimum_jmax_seen = $jmax; | ||||
| 21156 | } | ||||
| 21157 | |||||
| 21158 | # use previous alignments otherwise | ||||
| 21159 | else { | ||||
| 21160 | my @new_alignments = | ||||
| 21161 | $group_lines[ $maximum_line_index - 1 ]->get_alignments(); | ||||
| 21162 | $new_line->set_alignments(@new_alignments); | ||||
| 21163 | } | ||||
| 21164 | |||||
| 21165 | # remember group jmax extremes for next call to valign_input | ||||
| 21166 | $previous_minimum_jmax_seen = $minimum_jmax_seen; | ||||
| 21167 | $previous_maximum_jmax_seen = $maximum_jmax_seen; | ||||
| 21168 | } | ||||
| 21169 | |||||
| 21170 | sub dump_array { | ||||
| 21171 | |||||
| 21172 | # debug routine to dump array contents | ||||
| 21173 | local $" = ')('; | ||||
| 21174 | print STDOUT "(@_)\n"; | ||||
| 21175 | } | ||||
| 21176 | |||||
| 21177 | # flush() sends the current Perl::Tidy::VerticalAligner group down the | ||||
| 21178 | # pipeline to Perl::Tidy::FileWriter. | ||||
| 21179 | |||||
| 21180 | # This is the external flush, which also empties the buffer and cache | ||||
| 21181 | sub flush { | ||||
| 21182 | |||||
| 21183 | # the buffer must be emptied first, then any cached text | ||||
| 21184 | dump_valign_buffer(); | ||||
| 21185 | |||||
| 21186 | if ( $maximum_line_index < 0 ) { | ||||
| 21187 | if ($cached_line_type) { | ||||
| 21188 | $seqno_string = $cached_seqno_string; | ||||
| 21189 | valign_output_step_C( $cached_line_text, | ||||
| 21190 | $cached_line_leading_space_count, | ||||
| 21191 | $last_level_written ); | ||||
| 21192 | $cached_line_type = 0; | ||||
| 21193 | $cached_line_text = ""; | ||||
| 21194 | $cached_seqno_string = ""; | ||||
| 21195 | } | ||||
| 21196 | } | ||||
| 21197 | else { | ||||
| 21198 | my_flush(); | ||||
| 21199 | } | ||||
| 21200 | } | ||||
| 21201 | |||||
| 21202 | sub reduce_valign_buffer_indentation { | ||||
| 21203 | |||||
| 21204 | my ($diff) = @_; | ||||
| 21205 | if ( $valign_buffer_filling && $diff ) { | ||||
| 21206 | my $max_valign_buffer = @valign_buffer; | ||||
| 21207 | for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) { | ||||
| 21208 | my ( $line, $leading_space_count, $level ) = | ||||
| 21209 | @{ $valign_buffer[$i] }; | ||||
| 21210 | my $ws = substr( $line, 0, $diff ); | ||||
| 21211 | if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { | ||||
| 21212 | $line = substr( $line, $diff ); | ||||
| 21213 | } | ||||
| 21214 | if ( $leading_space_count >= $diff ) { | ||||
| 21215 | $leading_space_count -= $diff; | ||||
| 21216 | $level = level_change( $leading_space_count, $diff, $level ); | ||||
| 21217 | } | ||||
| 21218 | $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; | ||||
| 21219 | } | ||||
| 21220 | } | ||||
| 21221 | } | ||||
| 21222 | |||||
| 21223 | sub level_change { | ||||
| 21224 | |||||
| 21225 | # compute decrease in level when we remove $diff spaces from the | ||||
| 21226 | # leading spaces | ||||
| 21227 | my ( $leading_space_count, $diff, $level ) = @_; | ||||
| 21228 | if ($rOpts_indent_columns) { | ||||
| 21229 | my $olev = | ||||
| 21230 | int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); | ||||
| 21231 | my $nlev = int( $leading_space_count / $rOpts_indent_columns ); | ||||
| 21232 | $level -= ( $olev - $nlev ); | ||||
| 21233 | if ( $level < 0 ) { $level = 0 } | ||||
| 21234 | } | ||||
| 21235 | return $level; | ||||
| 21236 | } | ||||
| 21237 | |||||
| 21238 | sub dump_valign_buffer { | ||||
| 21239 | if (@valign_buffer) { | ||||
| 21240 | foreach (@valign_buffer) { | ||||
| 21241 | valign_output_step_D( @{$_} ); | ||||
| 21242 | } | ||||
| 21243 | @valign_buffer = (); | ||||
| 21244 | } | ||||
| 21245 | $valign_buffer_filling = ""; | ||||
| 21246 | } | ||||
| 21247 | |||||
| 21248 | # This is the internal flush, which leaves the cache intact | ||||
| 21249 | sub my_flush { | ||||
| 21250 | |||||
| 21251 | return if ( $maximum_line_index < 0 ); | ||||
| 21252 | |||||
| 21253 | # handle a group of comment lines | ||||
| 21254 | if ( $group_type eq 'COMMENT' ) { | ||||
| 21255 | |||||
| 21256 | VALIGN_DEBUG_FLAG_APPEND0 && do { | ||||
| 21257 | my ( $a, $b, $c ) = caller(); | ||||
| 21258 | print STDOUT | ||||
| 21259 | "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; | ||||
| 21260 | |||||
| 21261 | }; | ||||
| 21262 | my $leading_space_count = $comment_leading_space_count; | ||||
| 21263 | my $leading_string = get_leading_string($leading_space_count); | ||||
| 21264 | |||||
| 21265 | # zero leading space count if any lines are too long | ||||
| 21266 | my $max_excess = 0; | ||||
| 21267 | for my $i ( 0 .. $maximum_line_index ) { | ||||
| 21268 | my $str = $group_lines[$i]; | ||||
| 21269 | my $excess = | ||||
| 21270 | length($str) + | ||||
| 21271 | $leading_space_count - | ||||
| 21272 | maximum_line_length_for_level($group_level); | ||||
| 21273 | if ( $excess > $max_excess ) { | ||||
| 21274 | $max_excess = $excess; | ||||
| 21275 | } | ||||
| 21276 | } | ||||
| 21277 | |||||
| 21278 | if ( $max_excess > 0 ) { | ||||
| 21279 | $leading_space_count -= $max_excess; | ||||
| 21280 | if ( $leading_space_count < 0 ) { $leading_space_count = 0 } | ||||
| 21281 | $last_outdented_line_at = | ||||
| 21282 | $file_writer_object->get_output_line_number(); | ||||
| 21283 | unless ($outdented_line_count) { | ||||
| 21284 | $first_outdented_line_at = $last_outdented_line_at; | ||||
| 21285 | } | ||||
| 21286 | $outdented_line_count += ( $maximum_line_index + 1 ); | ||||
| 21287 | } | ||||
| 21288 | |||||
| 21289 | # write the group of lines | ||||
| 21290 | my $outdent_long_lines = 0; | ||||
| 21291 | for my $i ( 0 .. $maximum_line_index ) { | ||||
| 21292 | valign_output_step_B( $leading_space_count, $group_lines[$i], 0, | ||||
| 21293 | $outdent_long_lines, "", $group_level ); | ||||
| 21294 | } | ||||
| 21295 | } | ||||
| 21296 | |||||
| 21297 | # handle a group of code lines | ||||
| 21298 | else { | ||||
| 21299 | |||||
| 21300 | VALIGN_DEBUG_FLAG_APPEND0 && do { | ||||
| 21301 | my $group_list_type = $group_lines[0]->get_list_type(); | ||||
| 21302 | my ( $a, $b, $c ) = caller(); | ||||
| 21303 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
| 21304 | print STDOUT | ||||
| 21305 | "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; | ||||
| 21306 | |||||
| 21307 | }; | ||||
| 21308 | |||||
| 21309 | # some small groups are best left unaligned | ||||
| 21310 | my $do_not_align = decide_if_aligned(); | ||||
| 21311 | |||||
| 21312 | # optimize side comment location | ||||
| 21313 | $do_not_align = adjust_side_comment($do_not_align); | ||||
| 21314 | |||||
| 21315 | # recover spaces for -lp option if possible | ||||
| 21316 | my $extra_leading_spaces = get_extra_leading_spaces(); | ||||
| 21317 | |||||
| 21318 | # all lines of this group have the same basic leading spacing | ||||
| 21319 | my $group_leader_length = $group_lines[0]->get_leading_space_count(); | ||||
| 21320 | |||||
| 21321 | # add extra leading spaces if helpful | ||||
| 21322 | my $min_ci_gap = improve_continuation_indentation( $do_not_align, | ||||
| 21323 | $group_leader_length ); | ||||
| 21324 | |||||
| 21325 | # loop to output all lines | ||||
| 21326 | for my $i ( 0 .. $maximum_line_index ) { | ||||
| 21327 | my $line = $group_lines[$i]; | ||||
| 21328 | valign_output_step_A( $line, $min_ci_gap, $do_not_align, | ||||
| 21329 | $group_leader_length, $extra_leading_spaces ); | ||||
| 21330 | } | ||||
| 21331 | } | ||||
| 21332 | initialize_for_new_group(); | ||||
| 21333 | } | ||||
| 21334 | |||||
| 21335 | sub decide_if_aligned { | ||||
| 21336 | |||||
| 21337 | # Do not try to align two lines which are not really similar | ||||
| 21338 | return unless $maximum_line_index == 1; | ||||
| 21339 | return if ($is_matching_terminal_line); | ||||
| 21340 | |||||
| 21341 | my $group_list_type = $group_lines[0]->get_list_type(); | ||||
| 21342 | |||||
| 21343 | my $do_not_align = ( | ||||
| 21344 | |||||
| 21345 | # always align lists | ||||
| 21346 | !$group_list_type | ||||
| 21347 | |||||
| 21348 | && ( | ||||
| 21349 | |||||
| 21350 | # don't align if it was just a marginal match | ||||
| 21351 | $marginal_match | ||||
| 21352 | |||||
| 21353 | # don't align two lines with big gap | ||||
| 21354 | || $group_maximum_gap > 12 | ||||
| 21355 | |||||
| 21356 | # or lines with differing number of alignment tokens | ||||
| 21357 | # TODO: this could be improved. It occasionally rejects | ||||
| 21358 | # good matches. | ||||
| 21359 | || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen | ||||
| 21360 | ) | ||||
| 21361 | ); | ||||
| 21362 | |||||
| 21363 | # But try to convert them into a simple comment group if the first line | ||||
| 21364 | # a has side comment | ||||
| 21365 | my $rfields = $group_lines[0]->get_rfields(); | ||||
| 21366 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
| 21367 | if ( $do_not_align | ||||
| 21368 | && ( $maximum_line_index > 0 ) | ||||
| 21369 | && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) | ||||
| 21370 | { | ||||
| 21371 | combine_fields(); | ||||
| 21372 | $do_not_align = 0; | ||||
| 21373 | } | ||||
| 21374 | return $do_not_align; | ||||
| 21375 | } | ||||
| 21376 | |||||
| 21377 | sub adjust_side_comment { | ||||
| 21378 | |||||
| 21379 | my $do_not_align = shift; | ||||
| 21380 | |||||
| 21381 | # let's see if we can move the side comment field out a little | ||||
| 21382 | # to improve readability (the last field is always a side comment field) | ||||
| 21383 | my $have_side_comment = 0; | ||||
| 21384 | my $first_side_comment_line = -1; | ||||
| 21385 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
| 21386 | for my $i ( 0 .. $maximum_line_index ) { | ||||
| 21387 | my $line = $group_lines[$i]; | ||||
| 21388 | |||||
| 21389 | if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { | ||||
| 21390 | $have_side_comment = 1; | ||||
| 21391 | $first_side_comment_line = $i; | ||||
| 21392 | last; | ||||
| 21393 | } | ||||
| 21394 | } | ||||
| 21395 | |||||
| 21396 | my $kmax = $maximum_field_index + 1; | ||||
| 21397 | |||||
| 21398 | if ($have_side_comment) { | ||||
| 21399 | |||||
| 21400 | my $line = $group_lines[0]; | ||||
| 21401 | |||||
| 21402 | # the maximum space without exceeding the line length: | ||||
| 21403 | my $avail = $line->get_available_space_on_right(); | ||||
| 21404 | |||||
| 21405 | # try to use the previous comment column | ||||
| 21406 | my $side_comment_column = $line->get_column( $kmax - 2 ); | ||||
| 21407 | my $move = $last_comment_column - $side_comment_column; | ||||
| 21408 | |||||
| 21409 | ## my $sc_line0 = $side_comment_history[0]->[0]; | ||||
| 21410 | ## my $sc_col0 = $side_comment_history[0]->[1]; | ||||
| 21411 | ## my $sc_line1 = $side_comment_history[1]->[0]; | ||||
| 21412 | ## my $sc_col1 = $side_comment_history[1]->[1]; | ||||
| 21413 | ## my $sc_line2 = $side_comment_history[2]->[0]; | ||||
| 21414 | ## my $sc_col2 = $side_comment_history[2]->[1]; | ||||
| 21415 | ## | ||||
| 21416 | ## # FUTURE UPDATES: | ||||
| 21417 | ## # Be sure to ignore 'do not align' and '} # end comments' | ||||
| 21418 | ## # Find first $move > 0 and $move <= $avail as follows: | ||||
| 21419 | ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 | ||||
| 21420 | ## # 2. try sc_col2 if (line-sc_line2) < 12 | ||||
| 21421 | ## # 3. try min possible space, plus up to 8, | ||||
| 21422 | ## # 4. try min possible space | ||||
| 21423 | |||||
| 21424 | if ( $kmax > 0 && !$do_not_align ) { | ||||
| 21425 | |||||
| 21426 | # but if this doesn't work, give up and use the minimum space | ||||
| 21427 | if ( $move > $avail ) { | ||||
| 21428 | $move = $rOpts_minimum_space_to_comment - 1; | ||||
| 21429 | } | ||||
| 21430 | |||||
| 21431 | # but we want some minimum space to the comment | ||||
| 21432 | my $min_move = $rOpts_minimum_space_to_comment - 1; | ||||
| 21433 | if ( $move >= 0 | ||||
| 21434 | && $last_side_comment_length > 0 | ||||
| 21435 | && ( $first_side_comment_line == 0 ) | ||||
| 21436 | && $group_level == $last_level_written ) | ||||
| 21437 | { | ||||
| 21438 | $min_move = 0; | ||||
| 21439 | } | ||||
| 21440 | |||||
| 21441 | if ( $move < $min_move ) { | ||||
| 21442 | $move = $min_move; | ||||
| 21443 | } | ||||
| 21444 | |||||
| 21445 | # previously, an upper bound was placed on $move here, | ||||
| 21446 | # (maximum_space_to_comment), but it was not helpful | ||||
| 21447 | |||||
| 21448 | # don't exceed the available space | ||||
| 21449 | if ( $move > $avail ) { $move = $avail } | ||||
| 21450 | |||||
| 21451 | # we can only increase space, never decrease | ||||
| 21452 | if ( $move > 0 ) { | ||||
| 21453 | $line->increase_field_width( $maximum_field_index - 1, $move ); | ||||
| 21454 | } | ||||
| 21455 | |||||
| 21456 | # remember this column for the next group | ||||
| 21457 | $last_comment_column = $line->get_column( $kmax - 2 ); | ||||
| 21458 | } | ||||
| 21459 | else { | ||||
| 21460 | |||||
| 21461 | # try to at least line up the existing side comment location | ||||
| 21462 | if ( $kmax > 0 && $move > 0 && $move < $avail ) { | ||||
| 21463 | $line->increase_field_width( $maximum_field_index - 1, $move ); | ||||
| 21464 | $do_not_align = 0; | ||||
| 21465 | } | ||||
| 21466 | |||||
| 21467 | # reset side comment column if we can't align | ||||
| 21468 | else { | ||||
| 21469 | forget_side_comment(); | ||||
| 21470 | } | ||||
| 21471 | } | ||||
| 21472 | } | ||||
| 21473 | return $do_not_align; | ||||
| 21474 | } | ||||
| 21475 | |||||
| 21476 | sub improve_continuation_indentation { | ||||
| 21477 | my ( $do_not_align, $group_leader_length ) = @_; | ||||
| 21478 | |||||
| 21479 | # See if we can increase the continuation indentation | ||||
| 21480 | # to move all continuation lines closer to the next field | ||||
| 21481 | # (unless it is a comment). | ||||
| 21482 | # | ||||
| 21483 | # '$min_ci_gap'is the extra indentation that we may need to introduce. | ||||
| 21484 | # We will only introduce this to fields which already have some ci. | ||||
| 21485 | # Without this variable, we would occasionally get something like this | ||||
| 21486 | # (Complex.pm): | ||||
| 21487 | # | ||||
| 21488 | # use overload '+' => \&plus, | ||||
| 21489 | # '-' => \&minus, | ||||
| 21490 | # '*' => \&multiply, | ||||
| 21491 | # ... | ||||
| 21492 | # 'tan' => \&tan, | ||||
| 21493 | # 'atan2' => \&atan2, | ||||
| 21494 | # | ||||
| 21495 | # Whereas with this variable, we can shift variables over to get this: | ||||
| 21496 | # | ||||
| 21497 | # use overload '+' => \&plus, | ||||
| 21498 | # '-' => \&minus, | ||||
| 21499 | # '*' => \&multiply, | ||||
| 21500 | # ... | ||||
| 21501 | # 'tan' => \&tan, | ||||
| 21502 | # 'atan2' => \&atan2, | ||||
| 21503 | |||||
| 21504 | ## Deactivated#################### | ||||
| 21505 | # The trouble with this patch is that it may, for example, | ||||
| 21506 | # move in some 'or's or ':'s, and leave some out, so that the | ||||
| 21507 | # left edge alignment suffers. | ||||
| 21508 | return 0; | ||||
| 21509 | ########################################### | ||||
| 21510 | |||||
| 21511 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
| 21512 | |||||
| 21513 | my $min_ci_gap = maximum_line_length_for_level($group_level); | ||||
| 21514 | if ( $maximum_field_index > 1 && !$do_not_align ) { | ||||
| 21515 | |||||
| 21516 | for my $i ( 0 .. $maximum_line_index ) { | ||||
| 21517 | my $line = $group_lines[$i]; | ||||
| 21518 | my $leading_space_count = $line->get_leading_space_count(); | ||||
| 21519 | my $rfields = $line->get_rfields(); | ||||
| 21520 | |||||
| 21521 | my $gap = | ||||
| 21522 | $line->get_column(0) - | ||||
| 21523 | $leading_space_count - | ||||
| 21524 | length( $$rfields[0] ); | ||||
| 21525 | |||||
| 21526 | if ( $leading_space_count > $group_leader_length ) { | ||||
| 21527 | if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } | ||||
| 21528 | } | ||||
| 21529 | } | ||||
| 21530 | |||||
| 21531 | if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) { | ||||
| 21532 | $min_ci_gap = 0; | ||||
| 21533 | } | ||||
| 21534 | } | ||||
| 21535 | else { | ||||
| 21536 | $min_ci_gap = 0; | ||||
| 21537 | } | ||||
| 21538 | return $min_ci_gap; | ||||
| 21539 | } | ||||
| 21540 | |||||
| 21541 | sub valign_output_step_A { | ||||
| 21542 | |||||
| 21543 | ############################################################### | ||||
| 21544 | # This is Step A in writing vertically aligned lines. | ||||
| 21545 | # The line is prepared according to the alignments which have | ||||
| 21546 | # been found and shipped to the next step. | ||||
| 21547 | ############################################################### | ||||
| 21548 | |||||
| 21549 | my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, | ||||
| 21550 | $extra_leading_spaces ) | ||||
| 21551 | = @_; | ||||
| 21552 | my $rfields = $line->get_rfields(); | ||||
| 21553 | my $leading_space_count = $line->get_leading_space_count(); | ||||
| 21554 | my $outdent_long_lines = $line->get_outdent_long_lines(); | ||||
| 21555 | my $maximum_field_index = $line->get_jmax(); | ||||
| 21556 | my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); | ||||
| 21557 | |||||
| 21558 | # add any extra spaces | ||||
| 21559 | if ( $leading_space_count > $group_leader_length ) { | ||||
| 21560 | $leading_space_count += $min_ci_gap; | ||||
| 21561 | } | ||||
| 21562 | |||||
| 21563 | my $str = $$rfields[0]; | ||||
| 21564 | |||||
| 21565 | # loop to concatenate all fields of this line and needed padding | ||||
| 21566 | my $total_pad_count = 0; | ||||
| 21567 | my ( $j, $pad ); | ||||
| 21568 | for $j ( 1 .. $maximum_field_index ) { | ||||
| 21569 | |||||
| 21570 | # skip zero-length side comments | ||||
| 21571 | last | ||||
| 21572 | if ( ( $j == $maximum_field_index ) | ||||
| 21573 | && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) | ||||
| 21574 | ); | ||||
| 21575 | |||||
| 21576 | # compute spaces of padding before this field | ||||
| 21577 | my $col = $line->get_column( $j - 1 ); | ||||
| 21578 | $pad = $col - ( length($str) + $leading_space_count ); | ||||
| 21579 | |||||
| 21580 | if ($do_not_align) { | ||||
| 21581 | $pad = | ||||
| 21582 | ( $j < $maximum_field_index ) | ||||
| 21583 | ? 0 | ||||
| 21584 | : $rOpts_minimum_space_to_comment - 1; | ||||
| 21585 | } | ||||
| 21586 | |||||
| 21587 | # if the -fpsc flag is set, move the side comment to the selected | ||||
| 21588 | # column if and only if it is possible, ignoring constraints on | ||||
| 21589 | # line length and minimum space to comment | ||||
| 21590 | if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) | ||||
| 21591 | { | ||||
| 21592 | my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; | ||||
| 21593 | if ( $newpad >= 0 ) { $pad = $newpad; } | ||||
| 21594 | } | ||||
| 21595 | |||||
| 21596 | # accumulate the padding | ||||
| 21597 | if ( $pad > 0 ) { $total_pad_count += $pad; } | ||||
| 21598 | |||||
| 21599 | # add this field | ||||
| 21600 | if ( !defined $$rfields[$j] ) { | ||||
| 21601 | write_diagnostics("UNDEFined field at j=$j\n"); | ||||
| 21602 | } | ||||
| 21603 | |||||
| 21604 | # only add padding when we have a finite field; | ||||
| 21605 | # this avoids extra terminal spaces if we have empty fields | ||||
| 21606 | if ( length( $$rfields[$j] ) > 0 ) { | ||||
| 21607 | $str .= ' ' x $total_pad_count; | ||||
| 21608 | $total_pad_count = 0; | ||||
| 21609 | $str .= $$rfields[$j]; | ||||
| 21610 | } | ||||
| 21611 | else { | ||||
| 21612 | $total_pad_count = 0; | ||||
| 21613 | } | ||||
| 21614 | |||||
| 21615 | # update side comment history buffer | ||||
| 21616 | if ( $j == $maximum_field_index ) { | ||||
| 21617 | my $lineno = $file_writer_object->get_output_line_number(); | ||||
| 21618 | shift @side_comment_history; | ||||
| 21619 | push @side_comment_history, [ $lineno, $col ]; | ||||
| 21620 | } | ||||
| 21621 | } | ||||
| 21622 | |||||
| 21623 | my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); | ||||
| 21624 | |||||
| 21625 | # ship this line off | ||||
| 21626 | valign_output_step_B( $leading_space_count + $extra_leading_spaces, | ||||
| 21627 | $str, $side_comment_length, $outdent_long_lines, | ||||
| 21628 | $rvertical_tightness_flags, $group_level ); | ||||
| 21629 | } | ||||
| 21630 | |||||
| 21631 | sub get_extra_leading_spaces { | ||||
| 21632 | |||||
| 21633 | #---------------------------------------------------------- | ||||
| 21634 | # Define any extra indentation space (for the -lp option). | ||||
| 21635 | # Here is why: | ||||
| 21636 | # If a list has side comments, sub scan_list must dump the | ||||
| 21637 | # list before it sees everything. When this happens, it sets | ||||
| 21638 | # the indentation to the standard scheme, but notes how | ||||
| 21639 | # many spaces it would have liked to use. We may be able | ||||
| 21640 | # to recover that space here in the event that all of the | ||||
| 21641 | # lines of a list are back together again. | ||||
| 21642 | #---------------------------------------------------------- | ||||
| 21643 | |||||
| 21644 | my $extra_leading_spaces = 0; | ||||
| 21645 | if ($extra_indent_ok) { | ||||
| 21646 | my $object = $group_lines[0]->get_indentation(); | ||||
| 21647 | if ( ref($object) ) { | ||||
| 21648 | my $extra_indentation_spaces_wanted = | ||||
| 21649 | get_RECOVERABLE_SPACES($object); | ||||
| 21650 | |||||
| 21651 | # all indentation objects must be the same | ||||
| 21652 | my $i; | ||||
| 21653 | for $i ( 1 .. $maximum_line_index ) { | ||||
| 21654 | if ( $object != $group_lines[$i]->get_indentation() ) { | ||||
| 21655 | $extra_indentation_spaces_wanted = 0; | ||||
| 21656 | last; | ||||
| 21657 | } | ||||
| 21658 | } | ||||
| 21659 | |||||
| 21660 | if ($extra_indentation_spaces_wanted) { | ||||
| 21661 | |||||
| 21662 | # the maximum space without exceeding the line length: | ||||
| 21663 | my $avail = $group_lines[0]->get_available_space_on_right(); | ||||
| 21664 | $extra_leading_spaces = | ||||
| 21665 | ( $avail > $extra_indentation_spaces_wanted ) | ||||
| 21666 | ? $extra_indentation_spaces_wanted | ||||
| 21667 | : $avail; | ||||
| 21668 | |||||
| 21669 | # update the indentation object because with -icp the terminal | ||||
| 21670 | # ');' will use the same adjustment. | ||||
| 21671 | $object->permanently_decrease_AVAILABLE_SPACES( | ||||
| 21672 | -$extra_leading_spaces ); | ||||
| 21673 | } | ||||
| 21674 | } | ||||
| 21675 | } | ||||
| 21676 | return $extra_leading_spaces; | ||||
| 21677 | } | ||||
| 21678 | |||||
| 21679 | sub combine_fields { | ||||
| 21680 | |||||
| 21681 | # combine all fields except for the comment field ( sidecmt.t ) | ||||
| 21682 | # Uses global variables: | ||||
| 21683 | # @group_lines | ||||
| 21684 | # $maximum_line_index | ||||
| 21685 | my ( $j, $k ); | ||||
| 21686 | my $maximum_field_index = $group_lines[0]->get_jmax(); | ||||
| 21687 | for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { | ||||
| 21688 | my $line = $group_lines[$j]; | ||||
| 21689 | my $rfields = $line->get_rfields(); | ||||
| 21690 | foreach ( 1 .. $maximum_field_index - 1 ) { | ||||
| 21691 | $$rfields[0] .= $$rfields[$_]; | ||||
| 21692 | } | ||||
| 21693 | $$rfields[1] = $$rfields[$maximum_field_index]; | ||||
| 21694 | |||||
| 21695 | $line->set_jmax(1); | ||||
| 21696 | $line->set_column( 0, 0 ); | ||||
| 21697 | $line->set_column( 1, 0 ); | ||||
| 21698 | |||||
| 21699 | } | ||||
| 21700 | $maximum_field_index = 1; | ||||
| 21701 | |||||
| 21702 | for $j ( 0 .. $maximum_line_index ) { | ||||
| 21703 | my $line = $group_lines[$j]; | ||||
| 21704 | my $rfields = $line->get_rfields(); | ||||
| 21705 | for $k ( 0 .. $maximum_field_index ) { | ||||
| 21706 | my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); | ||||
| 21707 | if ( $k == 0 ) { | ||||
| 21708 | $pad += $group_lines[$j]->get_leading_space_count(); | ||||
| 21709 | } | ||||
| 21710 | |||||
| 21711 | if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } | ||||
| 21712 | |||||
| 21713 | } | ||||
| 21714 | } | ||||
| 21715 | } | ||||
| 21716 | |||||
| 21717 | sub get_output_line_number { | ||||
| 21718 | |||||
| 21719 | # the output line number reported to a caller is the number of items | ||||
| 21720 | # written plus the number of items in the buffer | ||||
| 21721 | my $self = shift; | ||||
| 21722 | 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); | ||||
| 21723 | } | ||||
| 21724 | |||||
| 21725 | sub valign_output_step_B { | ||||
| 21726 | |||||
| 21727 | ############################################################### | ||||
| 21728 | # This is Step B in writing vertically aligned lines. | ||||
| 21729 | # Vertical tightness is applied according to preset flags. | ||||
| 21730 | # In particular this routine handles stacking of opening | ||||
| 21731 | # and closing tokens. | ||||
| 21732 | ############################################################### | ||||
| 21733 | |||||
| 21734 | my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, | ||||
| 21735 | $rvertical_tightness_flags, $level ) | ||||
| 21736 | = @_; | ||||
| 21737 | |||||
| 21738 | # handle outdenting of long lines: | ||||
| 21739 | if ($outdent_long_lines) { | ||||
| 21740 | my $excess = | ||||
| 21741 | length($str) - | ||||
| 21742 | $side_comment_length + | ||||
| 21743 | $leading_space_count - | ||||
| 21744 | maximum_line_length_for_level($level); | ||||
| 21745 | if ( $excess > 0 ) { | ||||
| 21746 | $leading_space_count = 0; | ||||
| 21747 | $last_outdented_line_at = | ||||
| 21748 | $file_writer_object->get_output_line_number(); | ||||
| 21749 | |||||
| 21750 | unless ($outdented_line_count) { | ||||
| 21751 | $first_outdented_line_at = $last_outdented_line_at; | ||||
| 21752 | } | ||||
| 21753 | $outdented_line_count++; | ||||
| 21754 | } | ||||
| 21755 | } | ||||
| 21756 | |||||
| 21757 | # Make preliminary leading whitespace. It could get changed | ||||
| 21758 | # later by entabbing, so we have to keep track of any changes | ||||
| 21759 | # to the leading_space_count from here on. | ||||
| 21760 | my $leading_string = | ||||
| 21761 | $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; | ||||
| 21762 | |||||
| 21763 | # Unpack any recombination data; it was packed by | ||||
| 21764 | # sub send_lines_to_vertical_aligner. Contents: | ||||
| 21765 | # | ||||
| 21766 | # [0] type: 1=opening non-block 2=closing non-block | ||||
| 21767 | # 3=opening block brace 4=closing block brace | ||||
| 21768 | # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok | ||||
| 21769 | # if closing: spaces of padding to use | ||||
| 21770 | # [2] sequence number of container | ||||
| 21771 | # [3] valid flag: do not append if this flag is false | ||||
| 21772 | # | ||||
| 21773 | my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, | ||||
| 21774 | $seqno_end ); | ||||
| 21775 | if ($rvertical_tightness_flags) { | ||||
| 21776 | ( | ||||
| 21777 | $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, | ||||
| 21778 | $seqno_end | ||||
| 21779 | ) = @{$rvertical_tightness_flags}; | ||||
| 21780 | } | ||||
| 21781 | |||||
| 21782 | $seqno_string = $seqno_end; | ||||
| 21783 | |||||
| 21784 | # handle any cached line .. | ||||
| 21785 | # either append this line to it or write it out | ||||
| 21786 | if ( length($cached_line_text) ) { | ||||
| 21787 | |||||
| 21788 | # Dump an invalid cached line | ||||
| 21789 | if ( !$cached_line_valid ) { | ||||
| 21790 | valign_output_step_C( $cached_line_text, | ||||
| 21791 | $cached_line_leading_space_count, | ||||
| 21792 | $last_level_written ); | ||||
| 21793 | } | ||||
| 21794 | |||||
| 21795 | # Handle cached line ending in OPENING tokens | ||||
| 21796 | elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { | ||||
| 21797 | |||||
| 21798 | my $gap = $leading_space_count - length($cached_line_text); | ||||
| 21799 | |||||
| 21800 | # handle option of just one tight opening per line: | ||||
| 21801 | if ( $cached_line_flag == 1 ) { | ||||
| 21802 | if ( defined($open_or_close) && $open_or_close == 1 ) { | ||||
| 21803 | $gap = -1; | ||||
| 21804 | } | ||||
| 21805 | } | ||||
| 21806 | |||||
| 21807 | if ( $gap >= 0 && defined($seqno_beg) ) { | ||||
| 21808 | $leading_string = $cached_line_text . ' ' x $gap; | ||||
| 21809 | $leading_space_count = $cached_line_leading_space_count; | ||||
| 21810 | $seqno_string = $cached_seqno_string . ':' . $seqno_beg; | ||||
| 21811 | $level = $last_level_written; | ||||
| 21812 | } | ||||
| 21813 | else { | ||||
| 21814 | valign_output_step_C( $cached_line_text, | ||||
| 21815 | $cached_line_leading_space_count, | ||||
| 21816 | $last_level_written ); | ||||
| 21817 | } | ||||
| 21818 | } | ||||
| 21819 | |||||
| 21820 | # Handle cached line ending in CLOSING tokens | ||||
| 21821 | else { | ||||
| 21822 | my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; | ||||
| 21823 | if ( | ||||
| 21824 | |||||
| 21825 | # The new line must start with container | ||||
| 21826 | $seqno_beg | ||||
| 21827 | |||||
| 21828 | # The container combination must be okay.. | ||||
| 21829 | && ( | ||||
| 21830 | |||||
| 21831 | # okay to combine like types | ||||
| 21832 | ( $open_or_close == $cached_line_type ) | ||||
| 21833 | |||||
| 21834 | # closing block brace may append to non-block | ||||
| 21835 | || ( $cached_line_type == 2 && $open_or_close == 4 ) | ||||
| 21836 | |||||
| 21837 | # something like ');' | ||||
| 21838 | || ( !$open_or_close && $cached_line_type == 2 ) | ||||
| 21839 | |||||
| 21840 | ) | ||||
| 21841 | |||||
| 21842 | # The combined line must fit | ||||
| 21843 | && ( | ||||
| 21844 | length($test_line) <= | ||||
| 21845 | maximum_line_length_for_level($last_level_written) ) | ||||
| 21846 | ) | ||||
| 21847 | { | ||||
| 21848 | |||||
| 21849 | $seqno_string = $cached_seqno_string . ':' . $seqno_beg; | ||||
| 21850 | |||||
| 21851 | # Patch to outdent closing tokens ending # in ');' | ||||
| 21852 | # If we are joining a line like ');' to a previous stacked | ||||
| 21853 | # set of closing tokens, then decide if we may outdent the | ||||
| 21854 | # combined stack to the indentation of the ');'. Since we | ||||
| 21855 | # should not normally outdent any of the other tokens more than | ||||
| 21856 | # the indentation of the lines that contained them, we will | ||||
| 21857 | # only do this if all of the corresponding opening | ||||
| 21858 | # tokens were on the same line. This can happen with | ||||
| 21859 | # -sot and -sct. For example, it is ok here: | ||||
| 21860 | # __PACKAGE__->load_components( qw( | ||||
| 21861 | # PK::Auto | ||||
| 21862 | # Core | ||||
| 21863 | # )); | ||||
| 21864 | # | ||||
| 21865 | # But, for example, we do not outdent in this example because | ||||
| 21866 | # that would put the closing sub brace out farther than the | ||||
| 21867 | # opening sub brace: | ||||
| 21868 | # | ||||
| 21869 | # perltidy -sot -sct | ||||
| 21870 | # $c->Tk::bind( | ||||
| 21871 | # '<Control-f>' => sub { | ||||
| 21872 | # my ($c) = @_; | ||||
| 21873 | # my $e = $c->XEvent; | ||||
| 21874 | # itemsUnderArea $c; | ||||
| 21875 | # } ); | ||||
| 21876 | # | ||||
| 21877 | if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { | ||||
| 21878 | |||||
| 21879 | # The way to tell this is if the stacked sequence numbers | ||||
| 21880 | # of this output line are the reverse of the stacked | ||||
| 21881 | # sequence numbers of the previous non-blank line of | ||||
| 21882 | # sequence numbers. So we can join if the previous | ||||
| 21883 | # nonblank string of tokens is the mirror image. For | ||||
| 21884 | # example if stack )}] is 13:8:6 then we are looking for a | ||||
| 21885 | # leading stack like [{( which is 6:8:13 We only need to | ||||
| 21886 | # check the two ends, because the intermediate tokens must | ||||
| 21887 | # fall in order. Note on speed: having to split on colons | ||||
| 21888 | # and eliminate multiple colons might appear to be slow, | ||||
| 21889 | # but it's not an issue because we almost never come | ||||
| 21890 | # through here. In a typical file we don't. | ||||
| 21891 | $seqno_string =~ s/^:+//; | ||||
| 21892 | $last_nonblank_seqno_string =~ s/^:+//; | ||||
| 21893 | $seqno_string =~ s/:+/:/g; | ||||
| 21894 | $last_nonblank_seqno_string =~ s/:+/:/g; | ||||
| 21895 | |||||
| 21896 | # how many spaces can we outdent? | ||||
| 21897 | my $diff = | ||||
| 21898 | $cached_line_leading_space_count - $leading_space_count; | ||||
| 21899 | if ( $diff > 0 | ||||
| 21900 | && length($seqno_string) | ||||
| 21901 | && length($last_nonblank_seqno_string) == | ||||
| 21902 | length($seqno_string) ) | ||||
| 21903 | { | ||||
| 21904 | my @seqno_last = | ||||
| 21905 | ( split ':', $last_nonblank_seqno_string ); | ||||
| 21906 | my @seqno_now = ( split ':', $seqno_string ); | ||||
| 21907 | if ( $seqno_now[-1] == $seqno_last[0] | ||||
| 21908 | && $seqno_now[0] == $seqno_last[-1] ) | ||||
| 21909 | { | ||||
| 21910 | |||||
| 21911 | # OK to outdent .. | ||||
| 21912 | # for absolute safety, be sure we only remove | ||||
| 21913 | # whitespace | ||||
| 21914 | my $ws = substr( $test_line, 0, $diff ); | ||||
| 21915 | if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { | ||||
| 21916 | |||||
| 21917 | $test_line = substr( $test_line, $diff ); | ||||
| 21918 | $cached_line_leading_space_count -= $diff; | ||||
| 21919 | $last_level_written = | ||||
| 21920 | level_change( | ||||
| 21921 | $cached_line_leading_space_count, | ||||
| 21922 | $diff, $last_level_written ); | ||||
| 21923 | reduce_valign_buffer_indentation($diff); | ||||
| 21924 | } | ||||
| 21925 | |||||
| 21926 | # shouldn't happen, but not critical: | ||||
| 21927 | ##else { | ||||
| 21928 | ## ERROR transferring indentation here | ||||
| 21929 | ##} | ||||
| 21930 | } | ||||
| 21931 | } | ||||
| 21932 | } | ||||
| 21933 | |||||
| 21934 | $str = $test_line; | ||||
| 21935 | $leading_string = ""; | ||||
| 21936 | $leading_space_count = $cached_line_leading_space_count; | ||||
| 21937 | $level = $last_level_written; | ||||
| 21938 | } | ||||
| 21939 | else { | ||||
| 21940 | valign_output_step_C( $cached_line_text, | ||||
| 21941 | $cached_line_leading_space_count, | ||||
| 21942 | $last_level_written ); | ||||
| 21943 | } | ||||
| 21944 | } | ||||
| 21945 | } | ||||
| 21946 | $cached_line_type = 0; | ||||
| 21947 | $cached_line_text = ""; | ||||
| 21948 | |||||
| 21949 | # make the line to be written | ||||
| 21950 | my $line = $leading_string . $str; | ||||
| 21951 | |||||
| 21952 | # write or cache this line | ||||
| 21953 | if ( !$open_or_close || $side_comment_length > 0 ) { | ||||
| 21954 | valign_output_step_C( $line, $leading_space_count, $level ); | ||||
| 21955 | } | ||||
| 21956 | else { | ||||
| 21957 | $cached_line_text = $line; | ||||
| 21958 | $cached_line_type = $open_or_close; | ||||
| 21959 | $cached_line_flag = $tightness_flag; | ||||
| 21960 | $cached_seqno = $seqno; | ||||
| 21961 | $cached_line_valid = $valid; | ||||
| 21962 | $cached_line_leading_space_count = $leading_space_count; | ||||
| 21963 | $cached_seqno_string = $seqno_string; | ||||
| 21964 | } | ||||
| 21965 | |||||
| 21966 | $last_level_written = $level; | ||||
| 21967 | $last_side_comment_length = $side_comment_length; | ||||
| 21968 | $extra_indent_ok = 0; | ||||
| 21969 | } | ||||
| 21970 | |||||
| 21971 | sub valign_output_step_C { | ||||
| 21972 | |||||
| 21973 | ############################################################### | ||||
| 21974 | # This is Step C in writing vertically aligned lines. | ||||
| 21975 | # Lines are either stored in a buffer or passed along to the next step. | ||||
| 21976 | # The reason for storing lines is that we may later want to reduce their | ||||
| 21977 | # indentation when -sot and -sct are both used. | ||||
| 21978 | ############################################################### | ||||
| 21979 | my @args = @_; | ||||
| 21980 | |||||
| 21981 | # Dump any saved lines if we see a line with an unbalanced opening or | ||||
| 21982 | # closing token. | ||||
| 21983 | dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); | ||||
| 21984 | |||||
| 21985 | # Either store or write this line | ||||
| 21986 | if ($valign_buffer_filling) { | ||||
| 21987 | push @valign_buffer, [@args]; | ||||
| 21988 | } | ||||
| 21989 | else { | ||||
| 21990 | valign_output_step_D(@args); | ||||
| 21991 | } | ||||
| 21992 | |||||
| 21993 | # For lines starting or ending with opening or closing tokens.. | ||||
| 21994 | if ($seqno_string) { | ||||
| 21995 | $last_nonblank_seqno_string = $seqno_string; | ||||
| 21996 | |||||
| 21997 | # Start storing lines when we see a line with multiple stacked opening | ||||
| 21998 | # tokens. | ||||
| 21999 | if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) { | ||||
| 22000 | $valign_buffer_filling = $seqno_string; | ||||
| 22001 | } | ||||
| 22002 | } | ||||
| 22003 | } | ||||
| 22004 | |||||
| 22005 | sub valign_output_step_D { | ||||
| 22006 | |||||
| 22007 | ############################################################### | ||||
| 22008 | # This is Step D in writing vertically aligned lines. | ||||
| 22009 | # Write one vertically aligned line of code to the output object. | ||||
| 22010 | ############################################################### | ||||
| 22011 | |||||
| 22012 | my ( $line, $leading_space_count, $level ) = @_; | ||||
| 22013 | |||||
| 22014 | # The line is currently correct if there is no tabbing (recommended!) | ||||
| 22015 | # We may have to lop off some leading spaces and replace with tabs. | ||||
| 22016 | if ( $leading_space_count > 0 ) { | ||||
| 22017 | |||||
| 22018 | # Nothing to do if no tabs | ||||
| 22019 | if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) | ||||
| 22020 | || $rOpts_indent_columns <= 0 ) | ||||
| 22021 | { | ||||
| 22022 | |||||
| 22023 | # nothing to do | ||||
| 22024 | } | ||||
| 22025 | |||||
| 22026 | # Handle entab option | ||||
| 22027 | elsif ($rOpts_entab_leading_whitespace) { | ||||
| 22028 | my $space_count = | ||||
| 22029 | $leading_space_count % $rOpts_entab_leading_whitespace; | ||||
| 22030 | my $tab_count = | ||||
| 22031 | int( $leading_space_count / $rOpts_entab_leading_whitespace ); | ||||
| 22032 | my $leading_string = "\t" x $tab_count . ' ' x $space_count; | ||||
| 22033 | if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { | ||||
| 22034 | substr( $line, 0, $leading_space_count ) = $leading_string; | ||||
| 22035 | } | ||||
| 22036 | else { | ||||
| 22037 | |||||
| 22038 | # shouldn't happen - program error counting whitespace | ||||
| 22039 | # - skip entabbing | ||||
| 22040 | VALIGN_DEBUG_FLAG_TABS | ||||
| 22041 | && warning( | ||||
| 22042 | "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" | ||||
| 22043 | ); | ||||
| 22044 | } | ||||
| 22045 | } | ||||
| 22046 | |||||
| 22047 | # Handle option of one tab per level | ||||
| 22048 | else { | ||||
| 22049 | my $leading_string = ( "\t" x $level ); | ||||
| 22050 | my $space_count = | ||||
| 22051 | $leading_space_count - $level * $rOpts_indent_columns; | ||||
| 22052 | |||||
| 22053 | # shouldn't happen: | ||||
| 22054 | if ( $space_count < 0 ) { | ||||
| 22055 | |||||
| 22056 | # But it could be an outdented comment | ||||
| 22057 | if ( $line !~ /^\s*#/ ) { | ||||
| 22058 | VALIGN_DEBUG_FLAG_TABS | ||||
| 22059 | && warning( | ||||
| 22060 | "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" | ||||
| 22061 | ); | ||||
| 22062 | } | ||||
| 22063 | $leading_string = ( ' ' x $leading_space_count ); | ||||
| 22064 | } | ||||
| 22065 | else { | ||||
| 22066 | $leading_string .= ( ' ' x $space_count ); | ||||
| 22067 | } | ||||
| 22068 | if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { | ||||
| 22069 | substr( $line, 0, $leading_space_count ) = $leading_string; | ||||
| 22070 | } | ||||
| 22071 | else { | ||||
| 22072 | |||||
| 22073 | # shouldn't happen - program error counting whitespace | ||||
| 22074 | # we'll skip entabbing | ||||
| 22075 | VALIGN_DEBUG_FLAG_TABS | ||||
| 22076 | && warning( | ||||
| 22077 | "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" | ||||
| 22078 | ); | ||||
| 22079 | } | ||||
| 22080 | } | ||||
| 22081 | } | ||||
| 22082 | $file_writer_object->write_code_line( $line . "\n" ); | ||||
| 22083 | } | ||||
| 22084 | |||||
| 22085 | { # begin get_leading_string | ||||
| 22086 | |||||
| 22087 | 2 | 100ns | my @leading_string_cache; | ||
| 22088 | |||||
| 22089 | sub get_leading_string { | ||||
| 22090 | |||||
| 22091 | # define the leading whitespace string for this line.. | ||||
| 22092 | my $leading_whitespace_count = shift; | ||||
| 22093 | |||||
| 22094 | # Handle case of zero whitespace, which includes multi-line quotes | ||||
| 22095 | # (which may have a finite level; this prevents tab problems) | ||||
| 22096 | if ( $leading_whitespace_count <= 0 ) { | ||||
| 22097 | return ""; | ||||
| 22098 | } | ||||
| 22099 | |||||
| 22100 | # look for previous result | ||||
| 22101 | elsif ( $leading_string_cache[$leading_whitespace_count] ) { | ||||
| 22102 | return $leading_string_cache[$leading_whitespace_count]; | ||||
| 22103 | } | ||||
| 22104 | |||||
| 22105 | # must compute a string for this number of spaces | ||||
| 22106 | my $leading_string; | ||||
| 22107 | |||||
| 22108 | # Handle simple case of no tabs | ||||
| 22109 | if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) | ||||
| 22110 | || $rOpts_indent_columns <= 0 ) | ||||
| 22111 | { | ||||
| 22112 | $leading_string = ( ' ' x $leading_whitespace_count ); | ||||
| 22113 | } | ||||
| 22114 | |||||
| 22115 | # Handle entab option | ||||
| 22116 | elsif ($rOpts_entab_leading_whitespace) { | ||||
| 22117 | my $space_count = | ||||
| 22118 | $leading_whitespace_count % $rOpts_entab_leading_whitespace; | ||||
| 22119 | my $tab_count = int( | ||||
| 22120 | $leading_whitespace_count / $rOpts_entab_leading_whitespace ); | ||||
| 22121 | $leading_string = "\t" x $tab_count . ' ' x $space_count; | ||||
| 22122 | } | ||||
| 22123 | |||||
| 22124 | # Handle option of one tab per level | ||||
| 22125 | else { | ||||
| 22126 | $leading_string = ( "\t" x $group_level ); | ||||
| 22127 | my $space_count = | ||||
| 22128 | $leading_whitespace_count - $group_level * $rOpts_indent_columns; | ||||
| 22129 | |||||
| 22130 | # shouldn't happen: | ||||
| 22131 | if ( $space_count < 0 ) { | ||||
| 22132 | VALIGN_DEBUG_FLAG_TABS | ||||
| 22133 | && warning( | ||||
| 22134 | "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" | ||||
| 22135 | ); | ||||
| 22136 | |||||
| 22137 | # -- skip entabbing | ||||
| 22138 | $leading_string = ( ' ' x $leading_whitespace_count ); | ||||
| 22139 | } | ||||
| 22140 | else { | ||||
| 22141 | $leading_string .= ( ' ' x $space_count ); | ||||
| 22142 | } | ||||
| 22143 | } | ||||
| 22144 | $leading_string_cache[$leading_whitespace_count] = $leading_string; | ||||
| 22145 | return $leading_string; | ||||
| 22146 | } | ||||
| 22147 | } # end get_leading_string | ||||
| 22148 | |||||
| 22149 | sub report_anything_unusual { | ||||
| 22150 | my $self = shift; | ||||
| 22151 | if ( $outdented_line_count > 0 ) { | ||||
| 22152 | write_logfile_entry( | ||||
| 22153 | "$outdented_line_count long lines were outdented:\n"); | ||||
| 22154 | write_logfile_entry( | ||||
| 22155 | " First at output line $first_outdented_line_at\n"); | ||||
| 22156 | |||||
| 22157 | if ( $outdented_line_count > 1 ) { | ||||
| 22158 | write_logfile_entry( | ||||
| 22159 | " Last at output line $last_outdented_line_at\n"); | ||||
| 22160 | } | ||||
| 22161 | write_logfile_entry( | ||||
| 22162 | " use -noll to prevent outdenting, -l=n to increase line length\n" | ||||
| 22163 | ); | ||||
| 22164 | write_logfile_entry("\n"); | ||||
| 22165 | } | ||||
| 22166 | } | ||||
| 22167 | |||||
| 22168 | ##################################################################### | ||||
| 22169 | # | ||||
| 22170 | # the Perl::Tidy::FileWriter class writes the output file | ||||
| 22171 | # | ||||
| 22172 | ##################################################################### | ||||
| 22173 | |||||
| 22174 | package Perl::Tidy::FileWriter; | ||||
| 22175 | |||||
| 22176 | # Maximum number of little messages; probably need not be changed. | ||||
| 22177 | 2 | 1.68ms | 2 | 126µs | # spent 69µs (12+57) within Perl::Tidy::FileWriter::BEGIN@22177 which was called:
# once (12µs+57µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22177 # spent 69µs making 1 call to Perl::Tidy::FileWriter::BEGIN@22177
# spent 57µs making 1 call to constant::import |
| 22178 | |||||
| 22179 | sub write_logfile_entry { | ||||
| 22180 | my $self = shift; | ||||
| 22181 | my $logger_object = $self->{_logger_object}; | ||||
| 22182 | if ($logger_object) { | ||||
| 22183 | $logger_object->write_logfile_entry(@_); | ||||
| 22184 | } | ||||
| 22185 | } | ||||
| 22186 | |||||
| 22187 | sub new { | ||||
| 22188 | my $class = shift; | ||||
| 22189 | my ( $line_sink_object, $rOpts, $logger_object ) = @_; | ||||
| 22190 | |||||
| 22191 | bless { | ||||
| 22192 | _line_sink_object => $line_sink_object, | ||||
| 22193 | _logger_object => $logger_object, | ||||
| 22194 | _rOpts => $rOpts, | ||||
| 22195 | _output_line_number => 1, | ||||
| 22196 | _consecutive_blank_lines => 0, | ||||
| 22197 | _consecutive_nonblank_lines => 0, | ||||
| 22198 | _first_line_length_error => 0, | ||||
| 22199 | _max_line_length_error => 0, | ||||
| 22200 | _last_line_length_error => 0, | ||||
| 22201 | _first_line_length_error_at => 0, | ||||
| 22202 | _max_line_length_error_at => 0, | ||||
| 22203 | _last_line_length_error_at => 0, | ||||
| 22204 | _line_length_error_count => 0, | ||||
| 22205 | _max_output_line_length => 0, | ||||
| 22206 | _max_output_line_length_at => 0, | ||||
| 22207 | }, $class; | ||||
| 22208 | } | ||||
| 22209 | |||||
| 22210 | sub tee_on { | ||||
| 22211 | my $self = shift; | ||||
| 22212 | $self->{_line_sink_object}->tee_on(); | ||||
| 22213 | } | ||||
| 22214 | |||||
| 22215 | sub tee_off { | ||||
| 22216 | my $self = shift; | ||||
| 22217 | $self->{_line_sink_object}->tee_off(); | ||||
| 22218 | } | ||||
| 22219 | |||||
| 22220 | sub get_output_line_number { | ||||
| 22221 | my $self = shift; | ||||
| 22222 | return $self->{_output_line_number}; | ||||
| 22223 | } | ||||
| 22224 | |||||
| 22225 | sub decrement_output_line_number { | ||||
| 22226 | my $self = shift; | ||||
| 22227 | $self->{_output_line_number}--; | ||||
| 22228 | } | ||||
| 22229 | |||||
| 22230 | sub get_consecutive_nonblank_lines { | ||||
| 22231 | my $self = shift; | ||||
| 22232 | return $self->{_consecutive_nonblank_lines}; | ||||
| 22233 | } | ||||
| 22234 | |||||
| 22235 | sub reset_consecutive_blank_lines { | ||||
| 22236 | my $self = shift; | ||||
| 22237 | $self->{_consecutive_blank_lines} = 0; | ||||
| 22238 | } | ||||
| 22239 | |||||
| 22240 | sub want_blank_line { | ||||
| 22241 | my $self = shift; | ||||
| 22242 | unless ( $self->{_consecutive_blank_lines} ) { | ||||
| 22243 | $self->write_blank_code_line(); | ||||
| 22244 | } | ||||
| 22245 | } | ||||
| 22246 | |||||
| 22247 | sub require_blank_code_lines { | ||||
| 22248 | |||||
| 22249 | # write out the requested number of blanks regardless of the value of -mbl | ||||
| 22250 | # unless -mbl=0. This allows extra blank lines to be written for subs and | ||||
| 22251 | # packages even with the default -mbl=1 | ||||
| 22252 | my $self = shift; | ||||
| 22253 | my $count = shift; | ||||
| 22254 | my $need = $count - $self->{_consecutive_blank_lines}; | ||||
| 22255 | my $rOpts = $self->{_rOpts}; | ||||
| 22256 | my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; | ||||
| 22257 | for ( my $i = 0 ; $i < $need ; $i++ ) { | ||||
| 22258 | $self->write_blank_code_line($forced); | ||||
| 22259 | } | ||||
| 22260 | } | ||||
| 22261 | |||||
| 22262 | sub write_blank_code_line { | ||||
| 22263 | my $self = shift; | ||||
| 22264 | my $forced = shift; | ||||
| 22265 | my $rOpts = $self->{_rOpts}; | ||||
| 22266 | return | ||||
| 22267 | if (!$forced | ||||
| 22268 | && $self->{_consecutive_blank_lines} >= | ||||
| 22269 | $rOpts->{'maximum-consecutive-blank-lines'} ); | ||||
| 22270 | $self->{_consecutive_blank_lines}++; | ||||
| 22271 | $self->{_consecutive_nonblank_lines} = 0; | ||||
| 22272 | $self->write_line("\n"); | ||||
| 22273 | } | ||||
| 22274 | |||||
| 22275 | sub write_code_line { | ||||
| 22276 | my $self = shift; | ||||
| 22277 | my $a = shift; | ||||
| 22278 | |||||
| 22279 | if ( $a =~ /^\s*$/ ) { | ||||
| 22280 | my $rOpts = $self->{_rOpts}; | ||||
| 22281 | return | ||||
| 22282 | if ( $self->{_consecutive_blank_lines} >= | ||||
| 22283 | $rOpts->{'maximum-consecutive-blank-lines'} ); | ||||
| 22284 | $self->{_consecutive_blank_lines}++; | ||||
| 22285 | $self->{_consecutive_nonblank_lines} = 0; | ||||
| 22286 | } | ||||
| 22287 | else { | ||||
| 22288 | $self->{_consecutive_blank_lines} = 0; | ||||
| 22289 | $self->{_consecutive_nonblank_lines}++; | ||||
| 22290 | } | ||||
| 22291 | $self->write_line($a); | ||||
| 22292 | } | ||||
| 22293 | |||||
| 22294 | sub write_line { | ||||
| 22295 | my $self = shift; | ||||
| 22296 | my $a = shift; | ||||
| 22297 | |||||
| 22298 | # TODO: go through and see if the test is necessary here | ||||
| 22299 | if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } | ||||
| 22300 | |||||
| 22301 | $self->{_line_sink_object}->write_line($a); | ||||
| 22302 | |||||
| 22303 | # This calculation of excess line length ignores any internal tabs | ||||
| 22304 | my $rOpts = $self->{_rOpts}; | ||||
| 22305 | my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; | ||||
| 22306 | if ( $a =~ /^\t+/g ) { | ||||
| 22307 | $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); | ||||
| 22308 | } | ||||
| 22309 | |||||
| 22310 | # Note that we just incremented output line number to future value | ||||
| 22311 | # so we must subtract 1 for current line number | ||||
| 22312 | if ( length($a) > 1 + $self->{_max_output_line_length} ) { | ||||
| 22313 | $self->{_max_output_line_length} = length($a) - 1; | ||||
| 22314 | $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; | ||||
| 22315 | } | ||||
| 22316 | |||||
| 22317 | if ( $exceed > 0 ) { | ||||
| 22318 | my $output_line_number = $self->{_output_line_number}; | ||||
| 22319 | $self->{_last_line_length_error} = $exceed; | ||||
| 22320 | $self->{_last_line_length_error_at} = $output_line_number - 1; | ||||
| 22321 | if ( $self->{_line_length_error_count} == 0 ) { | ||||
| 22322 | $self->{_first_line_length_error} = $exceed; | ||||
| 22323 | $self->{_first_line_length_error_at} = $output_line_number - 1; | ||||
| 22324 | } | ||||
| 22325 | |||||
| 22326 | if ( | ||||
| 22327 | $self->{_last_line_length_error} > $self->{_max_line_length_error} ) | ||||
| 22328 | { | ||||
| 22329 | $self->{_max_line_length_error} = $exceed; | ||||
| 22330 | $self->{_max_line_length_error_at} = $output_line_number - 1; | ||||
| 22331 | } | ||||
| 22332 | |||||
| 22333 | if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { | ||||
| 22334 | $self->write_logfile_entry( | ||||
| 22335 | "Line length exceeded by $exceed characters\n"); | ||||
| 22336 | } | ||||
| 22337 | $self->{_line_length_error_count}++; | ||||
| 22338 | } | ||||
| 22339 | |||||
| 22340 | } | ||||
| 22341 | |||||
| 22342 | sub report_line_length_errors { | ||||
| 22343 | my $self = shift; | ||||
| 22344 | my $rOpts = $self->{_rOpts}; | ||||
| 22345 | my $line_length_error_count = $self->{_line_length_error_count}; | ||||
| 22346 | if ( $line_length_error_count == 0 ) { | ||||
| 22347 | $self->write_logfile_entry( | ||||
| 22348 | "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); | ||||
| 22349 | my $max_output_line_length = $self->{_max_output_line_length}; | ||||
| 22350 | my $max_output_line_length_at = $self->{_max_output_line_length_at}; | ||||
| 22351 | $self->write_logfile_entry( | ||||
| 22352 | " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" | ||||
| 22353 | ); | ||||
| 22354 | |||||
| 22355 | } | ||||
| 22356 | else { | ||||
| 22357 | |||||
| 22358 | my $word = ( $line_length_error_count > 1 ) ? "s" : ""; | ||||
| 22359 | $self->write_logfile_entry( | ||||
| 22360 | "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" | ||||
| 22361 | ); | ||||
| 22362 | |||||
| 22363 | $word = ( $line_length_error_count > 1 ) ? "First" : ""; | ||||
| 22364 | my $first_line_length_error = $self->{_first_line_length_error}; | ||||
| 22365 | my $first_line_length_error_at = $self->{_first_line_length_error_at}; | ||||
| 22366 | $self->write_logfile_entry( | ||||
| 22367 | " $word at line $first_line_length_error_at by $first_line_length_error characters\n" | ||||
| 22368 | ); | ||||
| 22369 | |||||
| 22370 | if ( $line_length_error_count > 1 ) { | ||||
| 22371 | my $max_line_length_error = $self->{_max_line_length_error}; | ||||
| 22372 | my $max_line_length_error_at = $self->{_max_line_length_error_at}; | ||||
| 22373 | my $last_line_length_error = $self->{_last_line_length_error}; | ||||
| 22374 | my $last_line_length_error_at = $self->{_last_line_length_error_at}; | ||||
| 22375 | $self->write_logfile_entry( | ||||
| 22376 | " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" | ||||
| 22377 | ); | ||||
| 22378 | $self->write_logfile_entry( | ||||
| 22379 | " Last at line $last_line_length_error_at by $last_line_length_error characters\n" | ||||
| 22380 | ); | ||||
| 22381 | } | ||||
| 22382 | } | ||||
| 22383 | } | ||||
| 22384 | |||||
| 22385 | ##################################################################### | ||||
| 22386 | # | ||||
| 22387 | # The Perl::Tidy::Debugger class shows line tokenization | ||||
| 22388 | # | ||||
| 22389 | ##################################################################### | ||||
| 22390 | |||||
| 22391 | package Perl::Tidy::Debugger; | ||||
| 22392 | |||||
| 22393 | sub new { | ||||
| 22394 | |||||
| 22395 | my ( $class, $filename ) = @_; | ||||
| 22396 | |||||
| 22397 | bless { | ||||
| 22398 | _debug_file => $filename, | ||||
| 22399 | _debug_file_opened => 0, | ||||
| 22400 | _fh => undef, | ||||
| 22401 | }, $class; | ||||
| 22402 | } | ||||
| 22403 | |||||
| 22404 | sub really_open_debug_file { | ||||
| 22405 | |||||
| 22406 | my $self = shift; | ||||
| 22407 | my $debug_file = $self->{_debug_file}; | ||||
| 22408 | my $fh; | ||||
| 22409 | unless ( $fh = IO::File->new("> $debug_file") ) { | ||||
| 22410 | Perl::Tidy::Warn("can't open $debug_file: $!\n"); | ||||
| 22411 | } | ||||
| 22412 | $self->{_debug_file_opened} = 1; | ||||
| 22413 | $self->{_fh} = $fh; | ||||
| 22414 | print $fh | ||||
| 22415 | "Use -dump-token-types (-dtt) to get a list of token type codes\n"; | ||||
| 22416 | } | ||||
| 22417 | |||||
| 22418 | sub close_debug_file { | ||||
| 22419 | |||||
| 22420 | my $self = shift; | ||||
| 22421 | my $fh = $self->{_fh}; | ||||
| 22422 | if ( $self->{_debug_file_opened} ) { | ||||
| 22423 | |||||
| 22424 | eval { $self->{_fh}->close() }; | ||||
| 22425 | } | ||||
| 22426 | } | ||||
| 22427 | |||||
| 22428 | sub write_debug_entry { | ||||
| 22429 | |||||
| 22430 | # This is a debug dump routine which may be modified as necessary | ||||
| 22431 | # to dump tokens on a line-by-line basis. The output will be written | ||||
| 22432 | # to the .DEBUG file when the -D flag is entered. | ||||
| 22433 | my $self = shift; | ||||
| 22434 | my $line_of_tokens = shift; | ||||
| 22435 | |||||
| 22436 | my $input_line = $line_of_tokens->{_line_text}; | ||||
| 22437 | my $rtoken_type = $line_of_tokens->{_rtoken_type}; | ||||
| 22438 | my $rtokens = $line_of_tokens->{_rtokens}; | ||||
| 22439 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
| 22440 | my $rslevels = $line_of_tokens->{_rslevels}; | ||||
| 22441 | my $rblock_type = $line_of_tokens->{_rblock_type}; | ||||
| 22442 | my $input_line_number = $line_of_tokens->{_line_number}; | ||||
| 22443 | my $line_type = $line_of_tokens->{_line_type}; | ||||
| 22444 | |||||
| 22445 | my ( $j, $num ); | ||||
| 22446 | |||||
| 22447 | my $token_str = "$input_line_number: "; | ||||
| 22448 | my $reconstructed_original = "$input_line_number: "; | ||||
| 22449 | my $block_str = "$input_line_number: "; | ||||
| 22450 | |||||
| 22451 | #$token_str .= "$line_type: "; | ||||
| 22452 | #$reconstructed_original .= "$line_type: "; | ||||
| 22453 | |||||
| 22454 | my $pattern = ""; | ||||
| 22455 | my @next_char = ( '"', '"' ); | ||||
| 22456 | my $i_next = 0; | ||||
| 22457 | unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } | ||||
| 22458 | my $fh = $self->{_fh}; | ||||
| 22459 | |||||
| 22460 | for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { | ||||
| 22461 | |||||
| 22462 | # testing patterns | ||||
| 22463 | if ( $$rtoken_type[$j] eq 'k' ) { | ||||
| 22464 | $pattern .= $$rtokens[$j]; | ||||
| 22465 | } | ||||
| 22466 | else { | ||||
| 22467 | $pattern .= $$rtoken_type[$j]; | ||||
| 22468 | } | ||||
| 22469 | $reconstructed_original .= $$rtokens[$j]; | ||||
| 22470 | $block_str .= "($$rblock_type[$j])"; | ||||
| 22471 | $num = length( $$rtokens[$j] ); | ||||
| 22472 | my $type_str = $$rtoken_type[$j]; | ||||
| 22473 | |||||
| 22474 | # be sure there are no blank tokens (shouldn't happen) | ||||
| 22475 | # This can only happen if a programming error has been made | ||||
| 22476 | # because all valid tokens are non-blank | ||||
| 22477 | if ( $type_str eq ' ' ) { | ||||
| 22478 | print $fh "BLANK TOKEN on the next line\n"; | ||||
| 22479 | $type_str = $next_char[$i_next]; | ||||
| 22480 | $i_next = 1 - $i_next; | ||||
| 22481 | } | ||||
| 22482 | |||||
| 22483 | if ( length($type_str) == 1 ) { | ||||
| 22484 | $type_str = $type_str x $num; | ||||
| 22485 | } | ||||
| 22486 | $token_str .= $type_str; | ||||
| 22487 | } | ||||
| 22488 | |||||
| 22489 | # Write what you want here ... | ||||
| 22490 | # print $fh "$input_line\n"; | ||||
| 22491 | # print $fh "$pattern\n"; | ||||
| 22492 | print $fh "$reconstructed_original\n"; | ||||
| 22493 | print $fh "$token_str\n"; | ||||
| 22494 | |||||
| 22495 | #print $fh "$block_str\n"; | ||||
| 22496 | } | ||||
| 22497 | |||||
| 22498 | ##################################################################### | ||||
| 22499 | # | ||||
| 22500 | # The Perl::Tidy::LineBuffer class supplies a 'get_line()' | ||||
| 22501 | # method for returning the next line to be parsed, as well as a | ||||
| 22502 | # 'peek_ahead()' method | ||||
| 22503 | # | ||||
| 22504 | # The input parameter is an object with a 'get_line()' method | ||||
| 22505 | # which returns the next line to be parsed | ||||
| 22506 | # | ||||
| 22507 | ##################################################################### | ||||
| 22508 | |||||
| 22509 | package Perl::Tidy::LineBuffer; | ||||
| 22510 | |||||
| 22511 | sub new { | ||||
| 22512 | |||||
| 22513 | my $class = shift; | ||||
| 22514 | my $line_source_object = shift; | ||||
| 22515 | |||||
| 22516 | return bless { | ||||
| 22517 | _line_source_object => $line_source_object, | ||||
| 22518 | _rlookahead_buffer => [], | ||||
| 22519 | }, $class; | ||||
| 22520 | } | ||||
| 22521 | |||||
| 22522 | sub peek_ahead { | ||||
| 22523 | my $self = shift; | ||||
| 22524 | my $buffer_index = shift; | ||||
| 22525 | my $line = undef; | ||||
| 22526 | my $line_source_object = $self->{_line_source_object}; | ||||
| 22527 | my $rlookahead_buffer = $self->{_rlookahead_buffer}; | ||||
| 22528 | if ( $buffer_index < scalar(@$rlookahead_buffer) ) { | ||||
| 22529 | $line = $$rlookahead_buffer[$buffer_index]; | ||||
| 22530 | } | ||||
| 22531 | else { | ||||
| 22532 | $line = $line_source_object->get_line(); | ||||
| 22533 | push( @$rlookahead_buffer, $line ); | ||||
| 22534 | } | ||||
| 22535 | return $line; | ||||
| 22536 | } | ||||
| 22537 | |||||
| 22538 | sub get_line { | ||||
| 22539 | my $self = shift; | ||||
| 22540 | my $line = undef; | ||||
| 22541 | my $line_source_object = $self->{_line_source_object}; | ||||
| 22542 | my $rlookahead_buffer = $self->{_rlookahead_buffer}; | ||||
| 22543 | |||||
| 22544 | if ( scalar(@$rlookahead_buffer) ) { | ||||
| 22545 | $line = shift @$rlookahead_buffer; | ||||
| 22546 | } | ||||
| 22547 | else { | ||||
| 22548 | $line = $line_source_object->get_line(); | ||||
| 22549 | } | ||||
| 22550 | return $line; | ||||
| 22551 | } | ||||
| 22552 | |||||
| 22553 | ######################################################################## | ||||
| 22554 | # | ||||
| 22555 | # the Perl::Tidy::Tokenizer package is essentially a filter which | ||||
| 22556 | # reads lines of perl source code from a source object and provides | ||||
| 22557 | # corresponding tokenized lines through its get_line() method. Lines | ||||
| 22558 | # flow from the source_object to the caller like this: | ||||
| 22559 | # | ||||
| 22560 | # source_object --> LineBuffer_object --> Tokenizer --> calling routine | ||||
| 22561 | # get_line() get_line() get_line() line_of_tokens | ||||
| 22562 | # | ||||
| 22563 | # The source object can be any object with a get_line() method which | ||||
| 22564 | # supplies one line (a character string) perl call. | ||||
| 22565 | # The LineBuffer object is created by the Tokenizer. | ||||
| 22566 | # The Tokenizer returns a reference to a data structure 'line_of_tokens' | ||||
| 22567 | # containing one tokenized line for each call to its get_line() method. | ||||
| 22568 | # | ||||
| 22569 | # WARNING: This is not a real class yet. Only one tokenizer my be used. | ||||
| 22570 | # | ||||
| 22571 | ######################################################################## | ||||
| 22572 | |||||
| 22573 | package Perl::Tidy::Tokenizer; | ||||
| 22574 | |||||
| 22575 | # spent 8µs within Perl::Tidy::Tokenizer::BEGIN@22575 which was called:
# once (8µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22596 | ||||
| 22576 | |||||
| 22577 | # Caution: these debug flags produce a lot of output | ||||
| 22578 | # They should all be 0 except when debugging small scripts | ||||
| 22579 | |||||
| 22580 | 2 | 30µs | 2 | 106µs | # spent 58µs (11+47) within Perl::Tidy::Tokenizer::BEGIN@22580 which was called:
# once (11µs+47µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22580 # spent 58µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22580
# spent 47µs making 1 call to constant::import |
| 22581 | 2 | 31µs | 2 | 92µs | # spent 50µs (9+41) within Perl::Tidy::Tokenizer::BEGIN@22581 which was called:
# once (9µs+41µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22581 # spent 50µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22581
# spent 41µs making 1 call to constant::import |
| 22582 | 2 | 26µs | 2 | 81µs | # spent 44µs (8+36) within Perl::Tidy::Tokenizer::BEGIN@22582 which was called:
# once (8µs+36µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22582 # spent 44µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22582
# spent 36µs making 1 call to constant::import |
| 22583 | 2 | 25µs | 2 | 98µs | # spent 53µs (8+45) within Perl::Tidy::Tokenizer::BEGIN@22583 which was called:
# once (8µs+45µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22583 # spent 53µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22583
# spent 45µs making 1 call to constant::import |
| 22584 | 2 | 113µs | 2 | 79µs | # spent 43µs (8+35) within Perl::Tidy::Tokenizer::BEGIN@22584 which was called:
# once (8µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22584 # spent 43µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22584
# spent 36µs making 1 call to constant::import |
| 22585 | |||||
| 22586 | my $debug_warning = sub { | ||||
| 22587 | print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; | ||||
| 22588 | 1 | 2µs | }; | ||
| 22589 | |||||
| 22590 | TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); | ||||
| 22591 | 1 | 0s | TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); | ||
| 22592 | TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); | ||||
| 22593 | 1 | 0s | TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); | ||
| 22594 | 1 | 10µs | TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); | ||
| 22595 | |||||
| 22596 | 1 | 22µs | 1 | 8µs | } # spent 8µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22575 |
| 22597 | |||||
| 22598 | 2 | 56µs | 2 | 96µs | # spent 53µs (11+42) within Perl::Tidy::Tokenizer::BEGIN@22598 which was called:
# once (11µs+42µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22598 # spent 53µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22598
# spent 42µs making 1 call to Exporter::import |
| 22599 | |||||
| 22600 | # PACKAGE VARIABLES for processing an entire FILE. | ||||
| 22601 | 1 | 400ns | # spent 4.38ms (9µs+4.37) within Perl::Tidy::Tokenizer::BEGIN@22601 which was called:
# once (9µs+4.37ms) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22641 | ||
| 22602 | $tokenizer_self | ||||
| 22603 | |||||
| 22604 | $last_nonblank_token | ||||
| 22605 | $last_nonblank_type | ||||
| 22606 | $last_nonblank_block_type | ||||
| 22607 | $statement_type | ||||
| 22608 | $in_attribute_list | ||||
| 22609 | $current_package | ||||
| 22610 | $context | ||||
| 22611 | |||||
| 22612 | %is_constant | ||||
| 22613 | %is_user_function | ||||
| 22614 | %user_function_prototype | ||||
| 22615 | %is_block_function | ||||
| 22616 | %is_block_list_function | ||||
| 22617 | %saw_function_definition | ||||
| 22618 | |||||
| 22619 | $brace_depth | ||||
| 22620 | $paren_depth | ||||
| 22621 | $square_bracket_depth | ||||
| 22622 | |||||
| 22623 | @current_depth | ||||
| 22624 | @total_depth | ||||
| 22625 | $total_depth | ||||
| 22626 | @nesting_sequence_number | ||||
| 22627 | @current_sequence_number | ||||
| 22628 | @paren_type | ||||
| 22629 | @paren_semicolon_count | ||||
| 22630 | @paren_structural_type | ||||
| 22631 | @brace_type | ||||
| 22632 | @brace_structural_type | ||||
| 22633 | @brace_context | ||||
| 22634 | @brace_package | ||||
| 22635 | @square_bracket_type | ||||
| 22636 | @square_bracket_structural_type | ||||
| 22637 | @depth_array | ||||
| 22638 | @nested_ternary_flag | ||||
| 22639 | @nested_statement_type | ||||
| 22640 | @starting_line_of_current_depth | ||||
| 22641 | 1 | 45µs | 2 | 8.75ms | }; # spent 4.38ms making 1 call to Perl::Tidy::Tokenizer::BEGIN@22601
# spent 4.37ms making 1 call to vars::import |
| 22642 | |||||
| 22643 | # GLOBAL CONSTANTS for routines in this package | ||||
| 22644 | 1 | 400ns | # spent 201µs (8+193) within Perl::Tidy::Tokenizer::BEGIN@22644 which was called:
# once (8µs+193µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22662 | ||
| 22645 | %is_indirect_object_taker | ||||
| 22646 | %is_block_operator | ||||
| 22647 | %expecting_operator_token | ||||
| 22648 | %expecting_operator_types | ||||
| 22649 | %expecting_term_types | ||||
| 22650 | %expecting_term_token | ||||
| 22651 | %is_digraph | ||||
| 22652 | %is_file_test_operator | ||||
| 22653 | %is_trigraph | ||||
| 22654 | %is_valid_token_type | ||||
| 22655 | %is_keyword | ||||
| 22656 | %is_code_block_token | ||||
| 22657 | %really_want_term | ||||
| 22658 | @opening_brace_names | ||||
| 22659 | @closing_brace_names | ||||
| 22660 | %is_keyword_taking_list | ||||
| 22661 | %is_q_qq_qw_qx_qr_s_y_tr_m | ||||
| 22662 | 1 | 28µs | 2 | 394µs | }; # spent 201µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22644
# spent 193µs making 1 call to vars::import |
| 22663 | |||||
| 22664 | # possible values of operator_expected() | ||||
| 22665 | 2 | 26µs | 2 | 74µs | # spent 40µs (7+33) within Perl::Tidy::Tokenizer::BEGIN@22665 which was called:
# once (7µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22665 # spent 40µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22665
# spent 34µs making 1 call to constant::import |
| 22666 | 2 | 22µs | 2 | 77µs | # spent 44µs (11+33) within Perl::Tidy::Tokenizer::BEGIN@22666 which was called:
# once (11µs+33µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22666 # spent 44µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22666
# spent 33µs making 1 call to constant::import |
| 22667 | 2 | 22µs | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Tokenizer::BEGIN@22667 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22667 # spent 35µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22667
# spent 29µs making 1 call to constant::import |
| 22668 | |||||
| 22669 | # possible values of context | ||||
| 22670 | 2 | 21µs | 2 | 74µs | # spent 40µs (6+34) within Perl::Tidy::Tokenizer::BEGIN@22670 which was called:
# once (6µs+34µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22670 # spent 40µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22670
# spent 34µs making 1 call to constant::import |
| 22671 | 2 | 20µs | 2 | 65µs | # spent 36µs (7+29) within Perl::Tidy::Tokenizer::BEGIN@22671 which was called:
# once (7µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22671 # spent 36µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22671
# spent 29µs making 1 call to constant::import |
| 22672 | 2 | 21µs | 2 | 72µs | # spent 44µs (15+29) within Perl::Tidy::Tokenizer::BEGIN@22672 which was called:
# once (15µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22672 # spent 44µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22672
# spent 29µs making 1 call to constant::import |
| 22673 | |||||
| 22674 | # Maximum number of little messages; probably need not be changed. | ||||
| 22675 | 2 | 2.76ms | 2 | 64µs | # spent 35µs (6+29) within Perl::Tidy::Tokenizer::BEGIN@22675 which was called:
# once (6µs+29µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 22675 # spent 35µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@22675
# spent 29µs making 1 call to constant::import |
| 22676 | |||||
| 22677 | { | ||||
| 22678 | |||||
| 22679 | # methods to count instances | ||||
| 22680 | 2 | 200ns | my $_count = 0; | ||
| 22681 | sub get_count { $_count; } | ||||
| 22682 | sub _increment_count { ++$_count } | ||||
| 22683 | sub _decrement_count { --$_count } | ||||
| 22684 | } | ||||
| 22685 | |||||
| 22686 | sub DESTROY { | ||||
| 22687 | $_[0]->_decrement_count(); | ||||
| 22688 | } | ||||
| 22689 | |||||
| 22690 | sub new { | ||||
| 22691 | |||||
| 22692 | my $class = shift; | ||||
| 22693 | |||||
| 22694 | # Note: 'tabs' and 'indent_columns' are temporary and should be | ||||
| 22695 | # removed asap | ||||
| 22696 | my %defaults = ( | ||||
| 22697 | source_object => undef, | ||||
| 22698 | debugger_object => undef, | ||||
| 22699 | diagnostics_object => undef, | ||||
| 22700 | logger_object => undef, | ||||
| 22701 | starting_level => undef, | ||||
| 22702 | indent_columns => 4, | ||||
| 22703 | tabsize => 8, | ||||
| 22704 | look_for_hash_bang => 0, | ||||
| 22705 | trim_qw => 1, | ||||
| 22706 | look_for_autoloader => 1, | ||||
| 22707 | look_for_selfloader => 1, | ||||
| 22708 | starting_line_number => 1, | ||||
| 22709 | ); | ||||
| 22710 | my %args = ( %defaults, @_ ); | ||||
| 22711 | |||||
| 22712 | # we are given an object with a get_line() method to supply source lines | ||||
| 22713 | my $source_object = $args{source_object}; | ||||
| 22714 | |||||
| 22715 | # we create another object with a get_line() and peek_ahead() method | ||||
| 22716 | my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); | ||||
| 22717 | |||||
| 22718 | # Tokenizer state data is as follows: | ||||
| 22719 | # _rhere_target_list reference to list of here-doc targets | ||||
| 22720 | # _here_doc_target the target string for a here document | ||||
| 22721 | # _here_quote_character the type of here-doc quoting (" ' ` or none) | ||||
| 22722 | # to determine if interpolation is done | ||||
| 22723 | # _quote_target character we seek if chasing a quote | ||||
| 22724 | # _line_start_quote line where we started looking for a long quote | ||||
| 22725 | # _in_here_doc flag indicating if we are in a here-doc | ||||
| 22726 | # _in_pod flag set if we are in pod documentation | ||||
| 22727 | # _in_error flag set if we saw severe error (binary in script) | ||||
| 22728 | # _in_data flag set if we are in __DATA__ section | ||||
| 22729 | # _in_end flag set if we are in __END__ section | ||||
| 22730 | # _in_format flag set if we are in a format description | ||||
| 22731 | # _in_attribute_list flag telling if we are looking for attributes | ||||
| 22732 | # _in_quote flag telling if we are chasing a quote | ||||
| 22733 | # _starting_level indentation level of first line | ||||
| 22734 | # _line_buffer_object object with get_line() method to supply source code | ||||
| 22735 | # _diagnostics_object place to write debugging information | ||||
| 22736 | # _unexpected_error_count error count used to limit output | ||||
| 22737 | # _lower_case_labels_at line numbers where lower case labels seen | ||||
| 22738 | $tokenizer_self = { | ||||
| 22739 | _rhere_target_list => [], | ||||
| 22740 | _in_here_doc => 0, | ||||
| 22741 | _here_doc_target => "", | ||||
| 22742 | _here_quote_character => "", | ||||
| 22743 | _in_data => 0, | ||||
| 22744 | _in_end => 0, | ||||
| 22745 | _in_format => 0, | ||||
| 22746 | _in_error => 0, | ||||
| 22747 | _in_pod => 0, | ||||
| 22748 | _in_attribute_list => 0, | ||||
| 22749 | _in_quote => 0, | ||||
| 22750 | _quote_target => "", | ||||
| 22751 | _line_start_quote => -1, | ||||
| 22752 | _starting_level => $args{starting_level}, | ||||
| 22753 | _know_starting_level => defined( $args{starting_level} ), | ||||
| 22754 | _tabsize => $args{tabsize}, | ||||
| 22755 | _indent_columns => $args{indent_columns}, | ||||
| 22756 | _look_for_hash_bang => $args{look_for_hash_bang}, | ||||
| 22757 | _trim_qw => $args{trim_qw}, | ||||
| 22758 | _continuation_indentation => $args{continuation_indentation}, | ||||
| 22759 | _outdent_labels => $args{outdent_labels}, | ||||
| 22760 | _last_line_number => $args{starting_line_number} - 1, | ||||
| 22761 | _saw_perl_dash_P => 0, | ||||
| 22762 | _saw_perl_dash_w => 0, | ||||
| 22763 | _saw_use_strict => 0, | ||||
| 22764 | _saw_v_string => 0, | ||||
| 22765 | _look_for_autoloader => $args{look_for_autoloader}, | ||||
| 22766 | _look_for_selfloader => $args{look_for_selfloader}, | ||||
| 22767 | _saw_autoloader => 0, | ||||
| 22768 | _saw_selfloader => 0, | ||||
| 22769 | _saw_hash_bang => 0, | ||||
| 22770 | _saw_end => 0, | ||||
| 22771 | _saw_data => 0, | ||||
| 22772 | _saw_negative_indentation => 0, | ||||
| 22773 | _started_tokenizing => 0, | ||||
| 22774 | _line_buffer_object => $line_buffer_object, | ||||
| 22775 | _debugger_object => $args{debugger_object}, | ||||
| 22776 | _diagnostics_object => $args{diagnostics_object}, | ||||
| 22777 | _logger_object => $args{logger_object}, | ||||
| 22778 | _unexpected_error_count => 0, | ||||
| 22779 | _started_looking_for_here_target_at => 0, | ||||
| 22780 | _nearly_matched_here_target_at => undef, | ||||
| 22781 | _line_text => "", | ||||
| 22782 | _rlower_case_labels_at => undef, | ||||
| 22783 | }; | ||||
| 22784 | |||||
| 22785 | prepare_for_a_new_file(); | ||||
| 22786 | find_starting_indentation_level(); | ||||
| 22787 | |||||
| 22788 | bless $tokenizer_self, $class; | ||||
| 22789 | |||||
| 22790 | # This is not a full class yet, so die if an attempt is made to | ||||
| 22791 | # create more than one object. | ||||
| 22792 | |||||
| 22793 | if ( _increment_count() > 1 ) { | ||||
| 22794 | confess | ||||
| 22795 | "Attempt to create more than 1 object in $class, which is not a true class yet\n"; | ||||
| 22796 | } | ||||
| 22797 | |||||
| 22798 | return $tokenizer_self; | ||||
| 22799 | |||||
| 22800 | } | ||||
| 22801 | |||||
| 22802 | # interface to Perl::Tidy::Logger routines | ||||
| 22803 | sub warning { | ||||
| 22804 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22805 | if ($logger_object) { | ||||
| 22806 | $logger_object->warning(@_); | ||||
| 22807 | } | ||||
| 22808 | } | ||||
| 22809 | |||||
| 22810 | sub complain { | ||||
| 22811 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22812 | if ($logger_object) { | ||||
| 22813 | $logger_object->complain(@_); | ||||
| 22814 | } | ||||
| 22815 | } | ||||
| 22816 | |||||
| 22817 | sub write_logfile_entry { | ||||
| 22818 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22819 | if ($logger_object) { | ||||
| 22820 | $logger_object->write_logfile_entry(@_); | ||||
| 22821 | } | ||||
| 22822 | } | ||||
| 22823 | |||||
| 22824 | sub interrupt_logfile { | ||||
| 22825 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22826 | if ($logger_object) { | ||||
| 22827 | $logger_object->interrupt_logfile(); | ||||
| 22828 | } | ||||
| 22829 | } | ||||
| 22830 | |||||
| 22831 | sub resume_logfile { | ||||
| 22832 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22833 | if ($logger_object) { | ||||
| 22834 | $logger_object->resume_logfile(); | ||||
| 22835 | } | ||||
| 22836 | } | ||||
| 22837 | |||||
| 22838 | sub increment_brace_error { | ||||
| 22839 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22840 | if ($logger_object) { | ||||
| 22841 | $logger_object->increment_brace_error(); | ||||
| 22842 | } | ||||
| 22843 | } | ||||
| 22844 | |||||
| 22845 | sub report_definite_bug { | ||||
| 22846 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22847 | if ($logger_object) { | ||||
| 22848 | $logger_object->report_definite_bug(); | ||||
| 22849 | } | ||||
| 22850 | } | ||||
| 22851 | |||||
| 22852 | sub brace_warning { | ||||
| 22853 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22854 | if ($logger_object) { | ||||
| 22855 | $logger_object->brace_warning(@_); | ||||
| 22856 | } | ||||
| 22857 | } | ||||
| 22858 | |||||
| 22859 | sub get_saw_brace_error { | ||||
| 22860 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 22861 | if ($logger_object) { | ||||
| 22862 | $logger_object->get_saw_brace_error(); | ||||
| 22863 | } | ||||
| 22864 | else { | ||||
| 22865 | 0; | ||||
| 22866 | } | ||||
| 22867 | } | ||||
| 22868 | |||||
| 22869 | # interface to Perl::Tidy::Diagnostics routines | ||||
| 22870 | sub write_diagnostics { | ||||
| 22871 | if ( $tokenizer_self->{_diagnostics_object} ) { | ||||
| 22872 | $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); | ||||
| 22873 | } | ||||
| 22874 | } | ||||
| 22875 | |||||
| 22876 | sub report_tokenization_errors { | ||||
| 22877 | |||||
| 22878 | my $self = shift; | ||||
| 22879 | |||||
| 22880 | my $level = get_indentation_level(); | ||||
| 22881 | if ( $level != $tokenizer_self->{_starting_level} ) { | ||||
| 22882 | warning("final indentation level: $level\n"); | ||||
| 22883 | } | ||||
| 22884 | |||||
| 22885 | check_final_nesting_depths(); | ||||
| 22886 | |||||
| 22887 | if ( $tokenizer_self->{_look_for_hash_bang} | ||||
| 22888 | && !$tokenizer_self->{_saw_hash_bang} ) | ||||
| 22889 | { | ||||
| 22890 | warning( | ||||
| 22891 | "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); | ||||
| 22892 | } | ||||
| 22893 | |||||
| 22894 | if ( $tokenizer_self->{_in_format} ) { | ||||
| 22895 | warning("hit EOF while in format description\n"); | ||||
| 22896 | } | ||||
| 22897 | |||||
| 22898 | if ( $tokenizer_self->{_in_pod} ) { | ||||
| 22899 | |||||
| 22900 | # Just write log entry if this is after __END__ or __DATA__ | ||||
| 22901 | # because this happens to often, and it is not likely to be | ||||
| 22902 | # a parsing error. | ||||
| 22903 | if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { | ||||
| 22904 | write_logfile_entry( | ||||
| 22905 | "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" | ||||
| 22906 | ); | ||||
| 22907 | } | ||||
| 22908 | |||||
| 22909 | else { | ||||
| 22910 | complain( | ||||
| 22911 | "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" | ||||
| 22912 | ); | ||||
| 22913 | } | ||||
| 22914 | |||||
| 22915 | } | ||||
| 22916 | |||||
| 22917 | if ( $tokenizer_self->{_in_here_doc} ) { | ||||
| 22918 | my $here_doc_target = $tokenizer_self->{_here_doc_target}; | ||||
| 22919 | my $started_looking_for_here_target_at = | ||||
| 22920 | $tokenizer_self->{_started_looking_for_here_target_at}; | ||||
| 22921 | if ($here_doc_target) { | ||||
| 22922 | warning( | ||||
| 22923 | "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" | ||||
| 22924 | ); | ||||
| 22925 | } | ||||
| 22926 | else { | ||||
| 22927 | warning( | ||||
| 22928 | "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" | ||||
| 22929 | ); | ||||
| 22930 | } | ||||
| 22931 | my $nearly_matched_here_target_at = | ||||
| 22932 | $tokenizer_self->{_nearly_matched_here_target_at}; | ||||
| 22933 | if ($nearly_matched_here_target_at) { | ||||
| 22934 | warning( | ||||
| 22935 | "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" | ||||
| 22936 | ); | ||||
| 22937 | } | ||||
| 22938 | } | ||||
| 22939 | |||||
| 22940 | if ( $tokenizer_self->{_in_quote} ) { | ||||
| 22941 | my $line_start_quote = $tokenizer_self->{_line_start_quote}; | ||||
| 22942 | my $quote_target = $tokenizer_self->{_quote_target}; | ||||
| 22943 | my $what = | ||||
| 22944 | ( $tokenizer_self->{_in_attribute_list} ) | ||||
| 22945 | ? "attribute list" | ||||
| 22946 | : "quote/pattern"; | ||||
| 22947 | warning( | ||||
| 22948 | "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" | ||||
| 22949 | ); | ||||
| 22950 | } | ||||
| 22951 | |||||
| 22952 | unless ( $tokenizer_self->{_saw_perl_dash_w} ) { | ||||
| 22953 | if ( $] < 5.006 ) { | ||||
| 22954 | write_logfile_entry("Suggest including '-w parameter'\n"); | ||||
| 22955 | } | ||||
| 22956 | else { | ||||
| 22957 | write_logfile_entry("Suggest including 'use warnings;'\n"); | ||||
| 22958 | } | ||||
| 22959 | } | ||||
| 22960 | |||||
| 22961 | if ( $tokenizer_self->{_saw_perl_dash_P} ) { | ||||
| 22962 | write_logfile_entry("Use of -P parameter for defines is discouraged\n"); | ||||
| 22963 | } | ||||
| 22964 | |||||
| 22965 | unless ( $tokenizer_self->{_saw_use_strict} ) { | ||||
| 22966 | write_logfile_entry("Suggest including 'use strict;'\n"); | ||||
| 22967 | } | ||||
| 22968 | |||||
| 22969 | # it is suggested that labels have at least one upper case character | ||||
| 22970 | # for legibility and to avoid code breakage as new keywords are introduced | ||||
| 22971 | if ( $tokenizer_self->{_rlower_case_labels_at} ) { | ||||
| 22972 | my @lower_case_labels_at = | ||||
| 22973 | @{ $tokenizer_self->{_rlower_case_labels_at} }; | ||||
| 22974 | write_logfile_entry( | ||||
| 22975 | "Suggest using upper case characters in label(s)\n"); | ||||
| 22976 | local $" = ')('; | ||||
| 22977 | write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); | ||||
| 22978 | } | ||||
| 22979 | } | ||||
| 22980 | |||||
| 22981 | sub report_v_string { | ||||
| 22982 | |||||
| 22983 | # warn if this version can't handle v-strings | ||||
| 22984 | my $tok = shift; | ||||
| 22985 | unless ( $tokenizer_self->{_saw_v_string} ) { | ||||
| 22986 | $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; | ||||
| 22987 | } | ||||
| 22988 | if ( $] < 5.006 ) { | ||||
| 22989 | warning( | ||||
| 22990 | "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" | ||||
| 22991 | ); | ||||
| 22992 | } | ||||
| 22993 | } | ||||
| 22994 | |||||
| 22995 | sub get_input_line_number { | ||||
| 22996 | return $tokenizer_self->{_last_line_number}; | ||||
| 22997 | } | ||||
| 22998 | |||||
| 22999 | # returns the next tokenized line | ||||
| 23000 | sub get_line { | ||||
| 23001 | |||||
| 23002 | my $self = shift; | ||||
| 23003 | |||||
| 23004 | # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, | ||||
| 23005 | # $square_bracket_depth, $paren_depth | ||||
| 23006 | |||||
| 23007 | my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); | ||||
| 23008 | $tokenizer_self->{_line_text} = $input_line; | ||||
| 23009 | |||||
| 23010 | return undef unless ($input_line); | ||||
| 23011 | |||||
| 23012 | my $input_line_number = ++$tokenizer_self->{_last_line_number}; | ||||
| 23013 | |||||
| 23014 | # Find and remove what characters terminate this line, including any | ||||
| 23015 | # control r | ||||
| 23016 | my $input_line_separator = ""; | ||||
| 23017 | if ( chomp($input_line) ) { $input_line_separator = $/ } | ||||
| 23018 | |||||
| 23019 | # TODO: what other characters should be included here? | ||||
| 23020 | if ( $input_line =~ s/((\r|\035|\032)+)$// ) { | ||||
| 23021 | $input_line_separator = $2 . $input_line_separator; | ||||
| 23022 | } | ||||
| 23023 | |||||
| 23024 | # for backwards compatibility we keep the line text terminated with | ||||
| 23025 | # a newline character | ||||
| 23026 | $input_line .= "\n"; | ||||
| 23027 | $tokenizer_self->{_line_text} = $input_line; # update | ||||
| 23028 | |||||
| 23029 | # create a data structure describing this line which will be | ||||
| 23030 | # returned to the caller. | ||||
| 23031 | |||||
| 23032 | # _line_type codes are: | ||||
| 23033 | # SYSTEM - system-specific code before hash-bang line | ||||
| 23034 | # CODE - line of perl code (including comments) | ||||
| 23035 | # POD_START - line starting pod, such as '=head' | ||||
| 23036 | # POD - pod documentation text | ||||
| 23037 | # POD_END - last line of pod section, '=cut' | ||||
| 23038 | # HERE - text of here-document | ||||
| 23039 | # HERE_END - last line of here-doc (target word) | ||||
| 23040 | # FORMAT - format section | ||||
| 23041 | # FORMAT_END - last line of format section, '.' | ||||
| 23042 | # DATA_START - __DATA__ line | ||||
| 23043 | # DATA - unidentified text following __DATA__ | ||||
| 23044 | # END_START - __END__ line | ||||
| 23045 | # END - unidentified text following __END__ | ||||
| 23046 | # ERROR - we are in big trouble, probably not a perl script | ||||
| 23047 | |||||
| 23048 | # Other variables: | ||||
| 23049 | # _curly_brace_depth - depth of curly braces at start of line | ||||
| 23050 | # _square_bracket_depth - depth of square brackets at start of line | ||||
| 23051 | # _paren_depth - depth of parens at start of line | ||||
| 23052 | # _starting_in_quote - this line continues a multi-line quote | ||||
| 23053 | # (so don't trim leading blanks!) | ||||
| 23054 | # _ending_in_quote - this line ends in a multi-line quote | ||||
| 23055 | # (so don't trim trailing blanks!) | ||||
| 23056 | my $line_of_tokens = { | ||||
| 23057 | _line_type => 'EOF', | ||||
| 23058 | _line_text => $input_line, | ||||
| 23059 | _line_number => $input_line_number, | ||||
| 23060 | _rtoken_type => undef, | ||||
| 23061 | _rtokens => undef, | ||||
| 23062 | _rlevels => undef, | ||||
| 23063 | _rslevels => undef, | ||||
| 23064 | _rblock_type => undef, | ||||
| 23065 | _rcontainer_type => undef, | ||||
| 23066 | _rcontainer_environment => undef, | ||||
| 23067 | _rtype_sequence => undef, | ||||
| 23068 | _rnesting_tokens => undef, | ||||
| 23069 | _rci_levels => undef, | ||||
| 23070 | _rnesting_blocks => undef, | ||||
| 23071 | _guessed_indentation_level => 0, | ||||
| 23072 | _starting_in_quote => 0, # to be set by subroutine | ||||
| 23073 | _ending_in_quote => 0, | ||||
| 23074 | _curly_brace_depth => $brace_depth, | ||||
| 23075 | _square_bracket_depth => $square_bracket_depth, | ||||
| 23076 | _paren_depth => $paren_depth, | ||||
| 23077 | _quote_character => '', | ||||
| 23078 | }; | ||||
| 23079 | |||||
| 23080 | # must print line unchanged if we are in a here document | ||||
| 23081 | if ( $tokenizer_self->{_in_here_doc} ) { | ||||
| 23082 | |||||
| 23083 | $line_of_tokens->{_line_type} = 'HERE'; | ||||
| 23084 | my $here_doc_target = $tokenizer_self->{_here_doc_target}; | ||||
| 23085 | my $here_quote_character = $tokenizer_self->{_here_quote_character}; | ||||
| 23086 | my $candidate_target = $input_line; | ||||
| 23087 | chomp $candidate_target; | ||||
| 23088 | if ( $candidate_target eq $here_doc_target ) { | ||||
| 23089 | $tokenizer_self->{_nearly_matched_here_target_at} = undef; | ||||
| 23090 | $line_of_tokens->{_line_type} = 'HERE_END'; | ||||
| 23091 | write_logfile_entry("Exiting HERE document $here_doc_target\n"); | ||||
| 23092 | |||||
| 23093 | my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; | ||||
| 23094 | if (@$rhere_target_list) { # there can be multiple here targets | ||||
| 23095 | ( $here_doc_target, $here_quote_character ) = | ||||
| 23096 | @{ shift @$rhere_target_list }; | ||||
| 23097 | $tokenizer_self->{_here_doc_target} = $here_doc_target; | ||||
| 23098 | $tokenizer_self->{_here_quote_character} = | ||||
| 23099 | $here_quote_character; | ||||
| 23100 | write_logfile_entry( | ||||
| 23101 | "Entering HERE document $here_doc_target\n"); | ||||
| 23102 | $tokenizer_self->{_nearly_matched_here_target_at} = undef; | ||||
| 23103 | $tokenizer_self->{_started_looking_for_here_target_at} = | ||||
| 23104 | $input_line_number; | ||||
| 23105 | } | ||||
| 23106 | else { | ||||
| 23107 | $tokenizer_self->{_in_here_doc} = 0; | ||||
| 23108 | $tokenizer_self->{_here_doc_target} = ""; | ||||
| 23109 | $tokenizer_self->{_here_quote_character} = ""; | ||||
| 23110 | } | ||||
| 23111 | } | ||||
| 23112 | |||||
| 23113 | # check for error of extra whitespace | ||||
| 23114 | # note for PERL6: leading whitespace is allowed | ||||
| 23115 | else { | ||||
| 23116 | $candidate_target =~ s/\s*$//; | ||||
| 23117 | $candidate_target =~ s/^\s*//; | ||||
| 23118 | if ( $candidate_target eq $here_doc_target ) { | ||||
| 23119 | $tokenizer_self->{_nearly_matched_here_target_at} = | ||||
| 23120 | $input_line_number; | ||||
| 23121 | } | ||||
| 23122 | } | ||||
| 23123 | return $line_of_tokens; | ||||
| 23124 | } | ||||
| 23125 | |||||
| 23126 | # must print line unchanged if we are in a format section | ||||
| 23127 | elsif ( $tokenizer_self->{_in_format} ) { | ||||
| 23128 | |||||
| 23129 | if ( $input_line =~ /^\.[\s#]*$/ ) { | ||||
| 23130 | write_logfile_entry("Exiting format section\n"); | ||||
| 23131 | $tokenizer_self->{_in_format} = 0; | ||||
| 23132 | $line_of_tokens->{_line_type} = 'FORMAT_END'; | ||||
| 23133 | } | ||||
| 23134 | else { | ||||
| 23135 | $line_of_tokens->{_line_type} = 'FORMAT'; | ||||
| 23136 | } | ||||
| 23137 | return $line_of_tokens; | ||||
| 23138 | } | ||||
| 23139 | |||||
| 23140 | # must print line unchanged if we are in pod documentation | ||||
| 23141 | elsif ( $tokenizer_self->{_in_pod} ) { | ||||
| 23142 | |||||
| 23143 | $line_of_tokens->{_line_type} = 'POD'; | ||||
| 23144 | if ( $input_line =~ /^=cut/ ) { | ||||
| 23145 | $line_of_tokens->{_line_type} = 'POD_END'; | ||||
| 23146 | write_logfile_entry("Exiting POD section\n"); | ||||
| 23147 | $tokenizer_self->{_in_pod} = 0; | ||||
| 23148 | } | ||||
| 23149 | if ( $input_line =~ /^\#\!.*perl\b/ ) { | ||||
| 23150 | warning( | ||||
| 23151 | "Hash-bang in pod can cause older versions of perl to fail! \n" | ||||
| 23152 | ); | ||||
| 23153 | } | ||||
| 23154 | |||||
| 23155 | return $line_of_tokens; | ||||
| 23156 | } | ||||
| 23157 | |||||
| 23158 | # must print line unchanged if we have seen a severe error (i.e., we | ||||
| 23159 | # are seeing illegal tokens and cannot continue. Syntax errors do | ||||
| 23160 | # not pass this route). Calling routine can decide what to do, but | ||||
| 23161 | # the default can be to just pass all lines as if they were after __END__ | ||||
| 23162 | elsif ( $tokenizer_self->{_in_error} ) { | ||||
| 23163 | $line_of_tokens->{_line_type} = 'ERROR'; | ||||
| 23164 | return $line_of_tokens; | ||||
| 23165 | } | ||||
| 23166 | |||||
| 23167 | # print line unchanged if we are __DATA__ section | ||||
| 23168 | elsif ( $tokenizer_self->{_in_data} ) { | ||||
| 23169 | |||||
| 23170 | # ...but look for POD | ||||
| 23171 | # Note that the _in_data and _in_end flags remain set | ||||
| 23172 | # so that we return to that state after seeing the | ||||
| 23173 | # end of a pod section | ||||
| 23174 | if ( $input_line =~ /^=(?!cut)/ ) { | ||||
| 23175 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
| 23176 | write_logfile_entry("Entering POD section\n"); | ||||
| 23177 | $tokenizer_self->{_in_pod} = 1; | ||||
| 23178 | return $line_of_tokens; | ||||
| 23179 | } | ||||
| 23180 | else { | ||||
| 23181 | $line_of_tokens->{_line_type} = 'DATA'; | ||||
| 23182 | return $line_of_tokens; | ||||
| 23183 | } | ||||
| 23184 | } | ||||
| 23185 | |||||
| 23186 | # print line unchanged if we are in __END__ section | ||||
| 23187 | elsif ( $tokenizer_self->{_in_end} ) { | ||||
| 23188 | |||||
| 23189 | # ...but look for POD | ||||
| 23190 | # Note that the _in_data and _in_end flags remain set | ||||
| 23191 | # so that we return to that state after seeing the | ||||
| 23192 | # end of a pod section | ||||
| 23193 | if ( $input_line =~ /^=(?!cut)/ ) { | ||||
| 23194 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
| 23195 | write_logfile_entry("Entering POD section\n"); | ||||
| 23196 | $tokenizer_self->{_in_pod} = 1; | ||||
| 23197 | return $line_of_tokens; | ||||
| 23198 | } | ||||
| 23199 | else { | ||||
| 23200 | $line_of_tokens->{_line_type} = 'END'; | ||||
| 23201 | return $line_of_tokens; | ||||
| 23202 | } | ||||
| 23203 | } | ||||
| 23204 | |||||
| 23205 | # check for a hash-bang line if we haven't seen one | ||||
| 23206 | if ( !$tokenizer_self->{_saw_hash_bang} ) { | ||||
| 23207 | if ( $input_line =~ /^\#\!.*perl\b/ ) { | ||||
| 23208 | $tokenizer_self->{_saw_hash_bang} = $input_line_number; | ||||
| 23209 | |||||
| 23210 | # check for -w and -P flags | ||||
| 23211 | if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { | ||||
| 23212 | $tokenizer_self->{_saw_perl_dash_P} = 1; | ||||
| 23213 | } | ||||
| 23214 | |||||
| 23215 | if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { | ||||
| 23216 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
| 23217 | } | ||||
| 23218 | |||||
| 23219 | if ( ( $input_line_number > 1 ) | ||||
| 23220 | && ( !$tokenizer_self->{_look_for_hash_bang} ) ) | ||||
| 23221 | { | ||||
| 23222 | |||||
| 23223 | # this is helpful for VMS systems; we may have accidentally | ||||
| 23224 | # tokenized some DCL commands | ||||
| 23225 | if ( $tokenizer_self->{_started_tokenizing} ) { | ||||
| 23226 | warning( | ||||
| 23227 | "There seems to be a hash-bang after line 1; do you need to run with -x ?\n" | ||||
| 23228 | ); | ||||
| 23229 | } | ||||
| 23230 | else { | ||||
| 23231 | complain("Useless hash-bang after line 1\n"); | ||||
| 23232 | } | ||||
| 23233 | } | ||||
| 23234 | |||||
| 23235 | # Report the leading hash-bang as a system line | ||||
| 23236 | # This will prevent -dac from deleting it | ||||
| 23237 | else { | ||||
| 23238 | $line_of_tokens->{_line_type} = 'SYSTEM'; | ||||
| 23239 | return $line_of_tokens; | ||||
| 23240 | } | ||||
| 23241 | } | ||||
| 23242 | } | ||||
| 23243 | |||||
| 23244 | # wait for a hash-bang before parsing if the user invoked us with -x | ||||
| 23245 | if ( $tokenizer_self->{_look_for_hash_bang} | ||||
| 23246 | && !$tokenizer_self->{_saw_hash_bang} ) | ||||
| 23247 | { | ||||
| 23248 | $line_of_tokens->{_line_type} = 'SYSTEM'; | ||||
| 23249 | return $line_of_tokens; | ||||
| 23250 | } | ||||
| 23251 | |||||
| 23252 | # a first line of the form ': #' will be marked as SYSTEM | ||||
| 23253 | # since lines of this form may be used by tcsh | ||||
| 23254 | if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { | ||||
| 23255 | $line_of_tokens->{_line_type} = 'SYSTEM'; | ||||
| 23256 | return $line_of_tokens; | ||||
| 23257 | } | ||||
| 23258 | |||||
| 23259 | # now we know that it is ok to tokenize the line... | ||||
| 23260 | # the line tokenizer will modify any of these private variables: | ||||
| 23261 | # _rhere_target_list | ||||
| 23262 | # _in_data | ||||
| 23263 | # _in_end | ||||
| 23264 | # _in_format | ||||
| 23265 | # _in_error | ||||
| 23266 | # _in_pod | ||||
| 23267 | # _in_quote | ||||
| 23268 | my $ending_in_quote_last = $tokenizer_self->{_in_quote}; | ||||
| 23269 | tokenize_this_line($line_of_tokens); | ||||
| 23270 | |||||
| 23271 | # Now finish defining the return structure and return it | ||||
| 23272 | $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; | ||||
| 23273 | |||||
| 23274 | # handle severe error (binary data in script) | ||||
| 23275 | if ( $tokenizer_self->{_in_error} ) { | ||||
| 23276 | $tokenizer_self->{_in_quote} = 0; # to avoid any more messages | ||||
| 23277 | warning("Giving up after error\n"); | ||||
| 23278 | $line_of_tokens->{_line_type} = 'ERROR'; | ||||
| 23279 | reset_indentation_level(0); # avoid error messages | ||||
| 23280 | return $line_of_tokens; | ||||
| 23281 | } | ||||
| 23282 | |||||
| 23283 | # handle start of pod documentation | ||||
| 23284 | if ( $tokenizer_self->{_in_pod} ) { | ||||
| 23285 | |||||
| 23286 | # This gets tricky..above a __DATA__ or __END__ section, perl | ||||
| 23287 | # accepts '=cut' as the start of pod section. But afterwards, | ||||
| 23288 | # only pod utilities see it and they may ignore an =cut without | ||||
| 23289 | # leading =head. In any case, this isn't good. | ||||
| 23290 | if ( $input_line =~ /^=cut\b/ ) { | ||||
| 23291 | if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { | ||||
| 23292 | complain("=cut while not in pod ignored\n"); | ||||
| 23293 | $tokenizer_self->{_in_pod} = 0; | ||||
| 23294 | $line_of_tokens->{_line_type} = 'POD_END'; | ||||
| 23295 | } | ||||
| 23296 | else { | ||||
| 23297 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
| 23298 | complain( | ||||
| 23299 | "=cut starts a pod section .. this can fool pod utilities.\n" | ||||
| 23300 | ); | ||||
| 23301 | write_logfile_entry("Entering POD section\n"); | ||||
| 23302 | } | ||||
| 23303 | } | ||||
| 23304 | |||||
| 23305 | else { | ||||
| 23306 | $line_of_tokens->{_line_type} = 'POD_START'; | ||||
| 23307 | write_logfile_entry("Entering POD section\n"); | ||||
| 23308 | } | ||||
| 23309 | |||||
| 23310 | return $line_of_tokens; | ||||
| 23311 | } | ||||
| 23312 | |||||
| 23313 | # update indentation levels for log messages | ||||
| 23314 | if ( $input_line !~ /^\s*$/ ) { | ||||
| 23315 | my $rlevels = $line_of_tokens->{_rlevels}; | ||||
| 23316 | $line_of_tokens->{_guessed_indentation_level} = | ||||
| 23317 | guess_old_indentation_level($input_line); | ||||
| 23318 | } | ||||
| 23319 | |||||
| 23320 | # see if this line contains here doc targets | ||||
| 23321 | my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; | ||||
| 23322 | if (@$rhere_target_list) { | ||||
| 23323 | |||||
| 23324 | my ( $here_doc_target, $here_quote_character ) = | ||||
| 23325 | @{ shift @$rhere_target_list }; | ||||
| 23326 | $tokenizer_self->{_in_here_doc} = 1; | ||||
| 23327 | $tokenizer_self->{_here_doc_target} = $here_doc_target; | ||||
| 23328 | $tokenizer_self->{_here_quote_character} = $here_quote_character; | ||||
| 23329 | write_logfile_entry("Entering HERE document $here_doc_target\n"); | ||||
| 23330 | $tokenizer_self->{_started_looking_for_here_target_at} = | ||||
| 23331 | $input_line_number; | ||||
| 23332 | } | ||||
| 23333 | |||||
| 23334 | # NOTE: __END__ and __DATA__ statements are written unformatted | ||||
| 23335 | # because they can theoretically contain additional characters | ||||
| 23336 | # which are not tokenized (and cannot be read with <DATA> either!). | ||||
| 23337 | if ( $tokenizer_self->{_in_data} ) { | ||||
| 23338 | $line_of_tokens->{_line_type} = 'DATA_START'; | ||||
| 23339 | write_logfile_entry("Starting __DATA__ section\n"); | ||||
| 23340 | $tokenizer_self->{_saw_data} = 1; | ||||
| 23341 | |||||
| 23342 | # keep parsing after __DATA__ if use SelfLoader was seen | ||||
| 23343 | if ( $tokenizer_self->{_saw_selfloader} ) { | ||||
| 23344 | $tokenizer_self->{_in_data} = 0; | ||||
| 23345 | write_logfile_entry( | ||||
| 23346 | "SelfLoader seen, continuing; -nlsl deactivates\n"); | ||||
| 23347 | } | ||||
| 23348 | |||||
| 23349 | return $line_of_tokens; | ||||
| 23350 | } | ||||
| 23351 | |||||
| 23352 | elsif ( $tokenizer_self->{_in_end} ) { | ||||
| 23353 | $line_of_tokens->{_line_type} = 'END_START'; | ||||
| 23354 | write_logfile_entry("Starting __END__ section\n"); | ||||
| 23355 | $tokenizer_self->{_saw_end} = 1; | ||||
| 23356 | |||||
| 23357 | # keep parsing after __END__ if use AutoLoader was seen | ||||
| 23358 | if ( $tokenizer_self->{_saw_autoloader} ) { | ||||
| 23359 | $tokenizer_self->{_in_end} = 0; | ||||
| 23360 | write_logfile_entry( | ||||
| 23361 | "AutoLoader seen, continuing; -nlal deactivates\n"); | ||||
| 23362 | } | ||||
| 23363 | return $line_of_tokens; | ||||
| 23364 | } | ||||
| 23365 | |||||
| 23366 | # now, finally, we know that this line is type 'CODE' | ||||
| 23367 | $line_of_tokens->{_line_type} = 'CODE'; | ||||
| 23368 | |||||
| 23369 | # remember if we have seen any real code | ||||
| 23370 | if ( !$tokenizer_self->{_started_tokenizing} | ||||
| 23371 | && $input_line !~ /^\s*$/ | ||||
| 23372 | && $input_line !~ /^\s*#/ ) | ||||
| 23373 | { | ||||
| 23374 | $tokenizer_self->{_started_tokenizing} = 1; | ||||
| 23375 | } | ||||
| 23376 | |||||
| 23377 | if ( $tokenizer_self->{_debugger_object} ) { | ||||
| 23378 | $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); | ||||
| 23379 | } | ||||
| 23380 | |||||
| 23381 | # Note: if keyword 'format' occurs in this line code, it is still CODE | ||||
| 23382 | # (keyword 'format' need not start a line) | ||||
| 23383 | if ( $tokenizer_self->{_in_format} ) { | ||||
| 23384 | write_logfile_entry("Entering format section\n"); | ||||
| 23385 | } | ||||
| 23386 | |||||
| 23387 | if ( $tokenizer_self->{_in_quote} | ||||
| 23388 | and ( $tokenizer_self->{_line_start_quote} < 0 ) ) | ||||
| 23389 | { | ||||
| 23390 | |||||
| 23391 | #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { | ||||
| 23392 | if ( | ||||
| 23393 | ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) | ||||
| 23394 | { | ||||
| 23395 | $tokenizer_self->{_line_start_quote} = $input_line_number; | ||||
| 23396 | write_logfile_entry( | ||||
| 23397 | "Start multi-line quote or pattern ending in $quote_target\n"); | ||||
| 23398 | } | ||||
| 23399 | } | ||||
| 23400 | elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) | ||||
| 23401 | and !$tokenizer_self->{_in_quote} ) | ||||
| 23402 | { | ||||
| 23403 | $tokenizer_self->{_line_start_quote} = -1; | ||||
| 23404 | write_logfile_entry("End of multi-line quote or pattern\n"); | ||||
| 23405 | } | ||||
| 23406 | |||||
| 23407 | # we are returning a line of CODE | ||||
| 23408 | return $line_of_tokens; | ||||
| 23409 | } | ||||
| 23410 | |||||
| 23411 | sub find_starting_indentation_level { | ||||
| 23412 | |||||
| 23413 | # We need to find the indentation level of the first line of the | ||||
| 23414 | # script being formatted. Often it will be zero for an entire file, | ||||
| 23415 | # but if we are formatting a local block of code (within an editor for | ||||
| 23416 | # example) it may not be zero. The user may specify this with the | ||||
| 23417 | # -sil=n parameter but normally doesn't so we have to guess. | ||||
| 23418 | # | ||||
| 23419 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
| 23420 | my $starting_level = 0; | ||||
| 23421 | |||||
| 23422 | # use value if given as parameter | ||||
| 23423 | if ( $tokenizer_self->{_know_starting_level} ) { | ||||
| 23424 | $starting_level = $tokenizer_self->{_starting_level}; | ||||
| 23425 | } | ||||
| 23426 | |||||
| 23427 | # if we know there is a hash_bang line, the level must be zero | ||||
| 23428 | elsif ( $tokenizer_self->{_look_for_hash_bang} ) { | ||||
| 23429 | $tokenizer_self->{_know_starting_level} = 1; | ||||
| 23430 | } | ||||
| 23431 | |||||
| 23432 | # otherwise figure it out from the input file | ||||
| 23433 | else { | ||||
| 23434 | my $line; | ||||
| 23435 | my $i = 0; | ||||
| 23436 | |||||
| 23437 | # keep looking at lines until we find a hash bang or piece of code | ||||
| 23438 | my $msg = ""; | ||||
| 23439 | while ( $line = | ||||
| 23440 | $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | ||||
| 23441 | { | ||||
| 23442 | |||||
| 23443 | # if first line is #! then assume starting level is zero | ||||
| 23444 | if ( $i == 1 && $line =~ /^\#\!/ ) { | ||||
| 23445 | $starting_level = 0; | ||||
| 23446 | last; | ||||
| 23447 | } | ||||
| 23448 | next if ( $line =~ /^\s*#/ ); # skip past comments | ||||
| 23449 | next if ( $line =~ /^\s*$/ ); # skip past blank lines | ||||
| 23450 | $starting_level = guess_old_indentation_level($line); | ||||
| 23451 | last; | ||||
| 23452 | } | ||||
| 23453 | $msg = "Line $i implies starting-indentation-level = $starting_level\n"; | ||||
| 23454 | write_logfile_entry("$msg"); | ||||
| 23455 | } | ||||
| 23456 | $tokenizer_self->{_starting_level} = $starting_level; | ||||
| 23457 | reset_indentation_level($starting_level); | ||||
| 23458 | } | ||||
| 23459 | |||||
| 23460 | sub guess_old_indentation_level { | ||||
| 23461 | my ($line) = @_; | ||||
| 23462 | |||||
| 23463 | # Guess the indentation level of an input line. | ||||
| 23464 | # | ||||
| 23465 | # For the first line of code this result will define the starting | ||||
| 23466 | # indentation level. It will mainly be non-zero when perltidy is applied | ||||
| 23467 | # within an editor to a local block of code. | ||||
| 23468 | # | ||||
| 23469 | # This is an impossible task in general because we can't know what tabs | ||||
| 23470 | # meant for the old script and how many spaces were used for one | ||||
| 23471 | # indentation level in the given input script. For example it may have | ||||
| 23472 | # been previously formatted with -i=7 -et=3. But we can at least try to | ||||
| 23473 | # make sure that perltidy guesses correctly if it is applied repeatedly to | ||||
| 23474 | # a block of code within an editor, so that the block stays at the same | ||||
| 23475 | # level when perltidy is applied repeatedly. | ||||
| 23476 | # | ||||
| 23477 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
| 23478 | my $level = 0; | ||||
| 23479 | |||||
| 23480 | # find leading tabs, spaces, and any statement label | ||||
| 23481 | my $spaces = 0; | ||||
| 23482 | if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { | ||||
| 23483 | |||||
| 23484 | # If there are leading tabs, we use the tab scheme for this run, if | ||||
| 23485 | # any, so that the code will remain stable when editing. | ||||
| 23486 | if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } | ||||
| 23487 | |||||
| 23488 | if ($2) { $spaces += length($2) } | ||||
| 23489 | |||||
| 23490 | # correct for outdented labels | ||||
| 23491 | if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { | ||||
| 23492 | $spaces += $tokenizer_self->{_continuation_indentation}; | ||||
| 23493 | } | ||||
| 23494 | } | ||||
| 23495 | |||||
| 23496 | # compute indentation using the value of -i for this run. | ||||
| 23497 | # If -i=0 is used for this run (which is possible) it doesn't matter | ||||
| 23498 | # what we do here but we'll guess that the old run used 4 spaces per level. | ||||
| 23499 | my $indent_columns = $tokenizer_self->{_indent_columns}; | ||||
| 23500 | $indent_columns = 4 if ( !$indent_columns ); | ||||
| 23501 | $level = int( $spaces / $indent_columns ); | ||||
| 23502 | return ($level); | ||||
| 23503 | } | ||||
| 23504 | |||||
| 23505 | # This is a currently unused debug routine | ||||
| 23506 | sub dump_functions { | ||||
| 23507 | |||||
| 23508 | my $fh = *STDOUT; | ||||
| 23509 | my ( $pkg, $sub ); | ||||
| 23510 | foreach $pkg ( keys %is_user_function ) { | ||||
| 23511 | print $fh "\nnon-constant subs in package $pkg\n"; | ||||
| 23512 | |||||
| 23513 | foreach $sub ( keys %{ $is_user_function{$pkg} } ) { | ||||
| 23514 | my $msg = ""; | ||||
| 23515 | if ( $is_block_list_function{$pkg}{$sub} ) { | ||||
| 23516 | $msg = 'block_list'; | ||||
| 23517 | } | ||||
| 23518 | |||||
| 23519 | if ( $is_block_function{$pkg}{$sub} ) { | ||||
| 23520 | $msg = 'block'; | ||||
| 23521 | } | ||||
| 23522 | print $fh "$sub $msg\n"; | ||||
| 23523 | } | ||||
| 23524 | } | ||||
| 23525 | |||||
| 23526 | foreach $pkg ( keys %is_constant ) { | ||||
| 23527 | print $fh "\nconstants and constant subs in package $pkg\n"; | ||||
| 23528 | |||||
| 23529 | foreach $sub ( keys %{ $is_constant{$pkg} } ) { | ||||
| 23530 | print $fh "$sub\n"; | ||||
| 23531 | } | ||||
| 23532 | } | ||||
| 23533 | } | ||||
| 23534 | |||||
| 23535 | sub ones_count { | ||||
| 23536 | |||||
| 23537 | # count number of 1's in a string of 1's and 0's | ||||
| 23538 | # example: ones_count("010101010101") gives 6 | ||||
| 23539 | return ( my $cis = $_[0] ) =~ tr/1/0/; | ||||
| 23540 | } | ||||
| 23541 | |||||
| 23542 | sub prepare_for_a_new_file { | ||||
| 23543 | |||||
| 23544 | # previous tokens needed to determine what to expect next | ||||
| 23545 | $last_nonblank_token = ';'; # the only possible starting state which | ||||
| 23546 | $last_nonblank_type = ';'; # will make a leading brace a code block | ||||
| 23547 | $last_nonblank_block_type = ''; | ||||
| 23548 | |||||
| 23549 | # scalars for remembering statement types across multiple lines | ||||
| 23550 | $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' | ||||
| 23551 | $in_attribute_list = 0; | ||||
| 23552 | |||||
| 23553 | # scalars for remembering where we are in the file | ||||
| 23554 | $current_package = "main"; | ||||
| 23555 | $context = UNKNOWN_CONTEXT; | ||||
| 23556 | |||||
| 23557 | # hashes used to remember function information | ||||
| 23558 | %is_constant = (); # user-defined constants | ||||
| 23559 | %is_user_function = (); # user-defined functions | ||||
| 23560 | %user_function_prototype = (); # their prototypes | ||||
| 23561 | %is_block_function = (); | ||||
| 23562 | %is_block_list_function = (); | ||||
| 23563 | %saw_function_definition = (); | ||||
| 23564 | |||||
| 23565 | # variables used to track depths of various containers | ||||
| 23566 | # and report nesting errors | ||||
| 23567 | $paren_depth = 0; | ||||
| 23568 | $brace_depth = 0; | ||||
| 23569 | $square_bracket_depth = 0; | ||||
| 23570 | @current_depth[ 0 .. $#closing_brace_names ] = | ||||
| 23571 | (0) x scalar @closing_brace_names; | ||||
| 23572 | $total_depth = 0; | ||||
| 23573 | @total_depth = (); | ||||
| 23574 | @nesting_sequence_number[ 0 .. $#closing_brace_names ] = | ||||
| 23575 | ( 0 .. $#closing_brace_names ); | ||||
| 23576 | @current_sequence_number = (); | ||||
| 23577 | $paren_type[$paren_depth] = ''; | ||||
| 23578 | $paren_semicolon_count[$paren_depth] = 0; | ||||
| 23579 | $paren_structural_type[$brace_depth] = ''; | ||||
| 23580 | $brace_type[$brace_depth] = ';'; # identify opening brace as code block | ||||
| 23581 | $brace_structural_type[$brace_depth] = ''; | ||||
| 23582 | $brace_context[$brace_depth] = UNKNOWN_CONTEXT; | ||||
| 23583 | $brace_package[$paren_depth] = $current_package; | ||||
| 23584 | $square_bracket_type[$square_bracket_depth] = ''; | ||||
| 23585 | $square_bracket_structural_type[$square_bracket_depth] = ''; | ||||
| 23586 | |||||
| 23587 | initialize_tokenizer_state(); | ||||
| 23588 | } | ||||
| 23589 | |||||
| 23590 | { # begin tokenize_this_line | ||||
| 23591 | |||||
| 23592 | 3 | 29µs | 2 | 81µs | # spent 45µs (9+36) within Perl::Tidy::Tokenizer::BEGIN@23592 which was called:
# once (9µs+36µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23592 # spent 45µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23592
# spent 36µs making 1 call to constant::import |
| 23593 | 2 | 26µs | 2 | 78µs | # spent 43µs (7+35) within Perl::Tidy::Tokenizer::BEGIN@23593 which was called:
# once (7µs+35µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23593 # spent 43µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23593
# spent 35µs making 1 call to constant::import |
| 23594 | 2 | 22µs | 2 | 66µs | # spent 36µs (6+30) within Perl::Tidy::Tokenizer::BEGIN@23594 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23594 # spent 36µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23594
# spent 30µs making 1 call to constant::import |
| 23595 | 2 | 14.5ms | 2 | 65µs | # spent 36µs (6+30) within Perl::Tidy::Tokenizer::BEGIN@23595 which was called:
# once (6µs+30µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 23595 # spent 36µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@23595
# spent 30µs making 1 call to constant::import |
| 23596 | |||||
| 23597 | # TV1: scalars for processing one LINE. | ||||
| 23598 | # Re-initialized on each entry to sub tokenize_this_line. | ||||
| 23599 | my ( | ||||
| 23600 | 1 | 100ns | $block_type, $container_type, $expecting, | ||
| 23601 | $i, $i_tok, $input_line, | ||||
| 23602 | $input_line_number, $last_nonblank_i, $max_token_index, | ||||
| 23603 | $next_tok, $next_type, $peeked_ahead, | ||||
| 23604 | $prototype, $rhere_target_list, $rtoken_map, | ||||
| 23605 | $rtoken_type, $rtokens, $tok, | ||||
| 23606 | $type, $type_sequence, $indent_flag, | ||||
| 23607 | ); | ||||
| 23608 | |||||
| 23609 | # TV2: refs to ARRAYS for processing one LINE | ||||
| 23610 | # Re-initialized on each call. | ||||
| 23611 | 1 | 200ns | my $routput_token_list = []; # stack of output token indexes | ||
| 23612 | 1 | 100ns | my $routput_token_type = []; # token types | ||
| 23613 | 1 | 100ns | my $routput_block_type = []; # types of code block | ||
| 23614 | 1 | 200ns | my $routput_container_type = []; # paren types, such as if, elsif, .. | ||
| 23615 | 1 | 100ns | my $routput_type_sequence = []; # nesting sequential number | ||
| 23616 | 1 | 100ns | my $routput_indent_flag = []; # | ||
| 23617 | |||||
| 23618 | # TV3: SCALARS for quote variables. These are initialized with a | ||||
| 23619 | # subroutine call and continually updated as lines are processed. | ||||
| 23620 | 1 | 100ns | my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, | ||
| 23621 | $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); | ||||
| 23622 | |||||
| 23623 | # TV4: SCALARS for multi-line identifiers and | ||||
| 23624 | # statements. These are initialized with a subroutine call | ||||
| 23625 | # and continually updated as lines are processed. | ||||
| 23626 | my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); | ||||
| 23627 | |||||
| 23628 | # TV5: SCALARS for tracking indentation level. | ||||
| 23629 | # Initialized once and continually updated as lines are | ||||
| 23630 | # processed. | ||||
| 23631 | my ( | ||||
| 23632 | $nesting_token_string, $nesting_type_string, | ||||
| 23633 | $nesting_block_string, $nesting_block_flag, | ||||
| 23634 | $nesting_list_string, $nesting_list_flag, | ||||
| 23635 | $ci_string_in_tokenizer, $continuation_string_in_tokenizer, | ||||
| 23636 | $in_statement_continuation, $level_in_tokenizer, | ||||
| 23637 | $slevel_in_tokenizer, $rslevel_stack, | ||||
| 23638 | ); | ||||
| 23639 | |||||
| 23640 | # TV6: SCALARS for remembering several previous | ||||
| 23641 | # tokens. Initialized once and continually updated as | ||||
| 23642 | # lines are processed. | ||||
| 23643 | my ( | ||||
| 23644 | $last_nonblank_container_type, $last_nonblank_type_sequence, | ||||
| 23645 | $last_last_nonblank_token, $last_last_nonblank_type, | ||||
| 23646 | $last_last_nonblank_block_type, $last_last_nonblank_container_type, | ||||
| 23647 | $last_last_nonblank_type_sequence, $last_nonblank_prototype, | ||||
| 23648 | ); | ||||
| 23649 | |||||
| 23650 | # ---------------------------------------------------------------- | ||||
| 23651 | # beginning of tokenizer variable access and manipulation routines | ||||
| 23652 | # ---------------------------------------------------------------- | ||||
| 23653 | |||||
| 23654 | sub initialize_tokenizer_state { | ||||
| 23655 | |||||
| 23656 | # TV1: initialized on each call | ||||
| 23657 | # TV2: initialized on each call | ||||
| 23658 | # TV3: | ||||
| 23659 | $in_quote = 0; | ||||
| 23660 | $quote_type = 'Q'; | ||||
| 23661 | $quote_character = ""; | ||||
| 23662 | $quote_pos = 0; | ||||
| 23663 | $quote_depth = 0; | ||||
| 23664 | $quoted_string_1 = ""; | ||||
| 23665 | $quoted_string_2 = ""; | ||||
| 23666 | $allowed_quote_modifiers = ""; | ||||
| 23667 | |||||
| 23668 | # TV4: | ||||
| 23669 | $id_scan_state = ''; | ||||
| 23670 | $identifier = ''; | ||||
| 23671 | $want_paren = ""; | ||||
| 23672 | $indented_if_level = 0; | ||||
| 23673 | |||||
| 23674 | # TV5: | ||||
| 23675 | $nesting_token_string = ""; | ||||
| 23676 | $nesting_type_string = ""; | ||||
| 23677 | $nesting_block_string = '1'; # initially in a block | ||||
| 23678 | $nesting_block_flag = 1; | ||||
| 23679 | $nesting_list_string = '0'; # initially not in a list | ||||
| 23680 | $nesting_list_flag = 0; # initially not in a list | ||||
| 23681 | $ci_string_in_tokenizer = ""; | ||||
| 23682 | $continuation_string_in_tokenizer = "0"; | ||||
| 23683 | $in_statement_continuation = 0; | ||||
| 23684 | $level_in_tokenizer = 0; | ||||
| 23685 | $slevel_in_tokenizer = 0; | ||||
| 23686 | $rslevel_stack = []; | ||||
| 23687 | |||||
| 23688 | # TV6: | ||||
| 23689 | $last_nonblank_container_type = ''; | ||||
| 23690 | $last_nonblank_type_sequence = ''; | ||||
| 23691 | $last_last_nonblank_token = ';'; | ||||
| 23692 | $last_last_nonblank_type = ';'; | ||||
| 23693 | $last_last_nonblank_block_type = ''; | ||||
| 23694 | $last_last_nonblank_container_type = ''; | ||||
| 23695 | $last_last_nonblank_type_sequence = ''; | ||||
| 23696 | $last_nonblank_prototype = ""; | ||||
| 23697 | } | ||||
| 23698 | |||||
| 23699 | sub save_tokenizer_state { | ||||
| 23700 | |||||
| 23701 | my $rTV1 = [ | ||||
| 23702 | $block_type, $container_type, $expecting, | ||||
| 23703 | $i, $i_tok, $input_line, | ||||
| 23704 | $input_line_number, $last_nonblank_i, $max_token_index, | ||||
| 23705 | $next_tok, $next_type, $peeked_ahead, | ||||
| 23706 | $prototype, $rhere_target_list, $rtoken_map, | ||||
| 23707 | $rtoken_type, $rtokens, $tok, | ||||
| 23708 | $type, $type_sequence, $indent_flag, | ||||
| 23709 | ]; | ||||
| 23710 | |||||
| 23711 | my $rTV2 = [ | ||||
| 23712 | $routput_token_list, $routput_token_type, | ||||
| 23713 | $routput_block_type, $routput_container_type, | ||||
| 23714 | $routput_type_sequence, $routput_indent_flag, | ||||
| 23715 | ]; | ||||
| 23716 | |||||
| 23717 | my $rTV3 = [ | ||||
| 23718 | $in_quote, $quote_type, | ||||
| 23719 | $quote_character, $quote_pos, | ||||
| 23720 | $quote_depth, $quoted_string_1, | ||||
| 23721 | $quoted_string_2, $allowed_quote_modifiers, | ||||
| 23722 | ]; | ||||
| 23723 | |||||
| 23724 | my $rTV4 = | ||||
| 23725 | [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; | ||||
| 23726 | |||||
| 23727 | my $rTV5 = [ | ||||
| 23728 | $nesting_token_string, $nesting_type_string, | ||||
| 23729 | $nesting_block_string, $nesting_block_flag, | ||||
| 23730 | $nesting_list_string, $nesting_list_flag, | ||||
| 23731 | $ci_string_in_tokenizer, $continuation_string_in_tokenizer, | ||||
| 23732 | $in_statement_continuation, $level_in_tokenizer, | ||||
| 23733 | $slevel_in_tokenizer, $rslevel_stack, | ||||
| 23734 | ]; | ||||
| 23735 | |||||
| 23736 | my $rTV6 = [ | ||||
| 23737 | $last_nonblank_container_type, | ||||
| 23738 | $last_nonblank_type_sequence, | ||||
| 23739 | $last_last_nonblank_token, | ||||
| 23740 | $last_last_nonblank_type, | ||||
| 23741 | $last_last_nonblank_block_type, | ||||
| 23742 | $last_last_nonblank_container_type, | ||||
| 23743 | $last_last_nonblank_type_sequence, | ||||
| 23744 | $last_nonblank_prototype, | ||||
| 23745 | ]; | ||||
| 23746 | return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; | ||||
| 23747 | } | ||||
| 23748 | |||||
| 23749 | sub restore_tokenizer_state { | ||||
| 23750 | my ($rstate) = @_; | ||||
| 23751 | my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; | ||||
| 23752 | ( | ||||
| 23753 | $block_type, $container_type, $expecting, | ||||
| 23754 | $i, $i_tok, $input_line, | ||||
| 23755 | $input_line_number, $last_nonblank_i, $max_token_index, | ||||
| 23756 | $next_tok, $next_type, $peeked_ahead, | ||||
| 23757 | $prototype, $rhere_target_list, $rtoken_map, | ||||
| 23758 | $rtoken_type, $rtokens, $tok, | ||||
| 23759 | $type, $type_sequence, $indent_flag, | ||||
| 23760 | ) = @{$rTV1}; | ||||
| 23761 | |||||
| 23762 | ( | ||||
| 23763 | $routput_token_list, $routput_token_type, | ||||
| 23764 | $routput_block_type, $routput_container_type, | ||||
| 23765 | $routput_type_sequence, $routput_type_sequence, | ||||
| 23766 | ) = @{$rTV2}; | ||||
| 23767 | |||||
| 23768 | ( | ||||
| 23769 | $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, | ||||
| 23770 | $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, | ||||
| 23771 | ) = @{$rTV3}; | ||||
| 23772 | |||||
| 23773 | ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = | ||||
| 23774 | @{$rTV4}; | ||||
| 23775 | |||||
| 23776 | ( | ||||
| 23777 | $nesting_token_string, $nesting_type_string, | ||||
| 23778 | $nesting_block_string, $nesting_block_flag, | ||||
| 23779 | $nesting_list_string, $nesting_list_flag, | ||||
| 23780 | $ci_string_in_tokenizer, $continuation_string_in_tokenizer, | ||||
| 23781 | $in_statement_continuation, $level_in_tokenizer, | ||||
| 23782 | $slevel_in_tokenizer, $rslevel_stack, | ||||
| 23783 | ) = @{$rTV5}; | ||||
| 23784 | |||||
| 23785 | ( | ||||
| 23786 | $last_nonblank_container_type, | ||||
| 23787 | $last_nonblank_type_sequence, | ||||
| 23788 | $last_last_nonblank_token, | ||||
| 23789 | $last_last_nonblank_type, | ||||
| 23790 | $last_last_nonblank_block_type, | ||||
| 23791 | $last_last_nonblank_container_type, | ||||
| 23792 | $last_last_nonblank_type_sequence, | ||||
| 23793 | $last_nonblank_prototype, | ||||
| 23794 | ) = @{$rTV6}; | ||||
| 23795 | } | ||||
| 23796 | |||||
| 23797 | sub get_indentation_level { | ||||
| 23798 | |||||
| 23799 | # patch to avoid reporting error if indented if is not terminated | ||||
| 23800 | if ($indented_if_level) { return $level_in_tokenizer - 1 } | ||||
| 23801 | return $level_in_tokenizer; | ||||
| 23802 | } | ||||
| 23803 | |||||
| 23804 | sub reset_indentation_level { | ||||
| 23805 | $level_in_tokenizer = $_[0]; | ||||
| 23806 | $slevel_in_tokenizer = $_[0]; | ||||
| 23807 | push @{$rslevel_stack}, $slevel_in_tokenizer; | ||||
| 23808 | } | ||||
| 23809 | |||||
| 23810 | sub peeked_ahead { | ||||
| 23811 | $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead; | ||||
| 23812 | } | ||||
| 23813 | |||||
| 23814 | # ------------------------------------------------------------ | ||||
| 23815 | # end of tokenizer variable access and manipulation routines | ||||
| 23816 | # ------------------------------------------------------------ | ||||
| 23817 | |||||
| 23818 | # ------------------------------------------------------------ | ||||
| 23819 | # beginning of various scanner interface routines | ||||
| 23820 | # ------------------------------------------------------------ | ||||
| 23821 | sub scan_replacement_text { | ||||
| 23822 | |||||
| 23823 | # check for here-docs in replacement text invoked by | ||||
| 23824 | # a substitution operator with executable modifier 'e'. | ||||
| 23825 | # | ||||
| 23826 | # given: | ||||
| 23827 | # $replacement_text | ||||
| 23828 | # return: | ||||
| 23829 | # $rht = reference to any here-doc targets | ||||
| 23830 | my ($replacement_text) = @_; | ||||
| 23831 | |||||
| 23832 | # quick check | ||||
| 23833 | return undef unless ( $replacement_text =~ /<</ ); | ||||
| 23834 | |||||
| 23835 | write_logfile_entry("scanning replacement text for here-doc targets\n"); | ||||
| 23836 | |||||
| 23837 | # save the logger object for error messages | ||||
| 23838 | my $logger_object = $tokenizer_self->{_logger_object}; | ||||
| 23839 | |||||
| 23840 | # localize all package variables | ||||
| 23841 | local ( | ||||
| 23842 | $tokenizer_self, $last_nonblank_token, | ||||
| 23843 | $last_nonblank_type, $last_nonblank_block_type, | ||||
| 23844 | $statement_type, $in_attribute_list, | ||||
| 23845 | $current_package, $context, | ||||
| 23846 | %is_constant, %is_user_function, | ||||
| 23847 | %user_function_prototype, %is_block_function, | ||||
| 23848 | %is_block_list_function, %saw_function_definition, | ||||
| 23849 | $brace_depth, $paren_depth, | ||||
| 23850 | $square_bracket_depth, @current_depth, | ||||
| 23851 | @total_depth, $total_depth, | ||||
| 23852 | @nesting_sequence_number, @current_sequence_number, | ||||
| 23853 | @paren_type, @paren_semicolon_count, | ||||
| 23854 | @paren_structural_type, @brace_type, | ||||
| 23855 | @brace_structural_type, @brace_context, | ||||
| 23856 | @brace_package, @square_bracket_type, | ||||
| 23857 | @square_bracket_structural_type, @depth_array, | ||||
| 23858 | @starting_line_of_current_depth, @nested_ternary_flag, | ||||
| 23859 | @nested_statement_type, | ||||
| 23860 | ); | ||||
| 23861 | |||||
| 23862 | # save all lexical variables | ||||
| 23863 | my $rstate = save_tokenizer_state(); | ||||
| 23864 | _decrement_count(); # avoid error check for multiple tokenizers | ||||
| 23865 | |||||
| 23866 | # make a new tokenizer | ||||
| 23867 | my $rOpts = {}; | ||||
| 23868 | my $rpending_logfile_message; | ||||
| 23869 | my $source_object = | ||||
| 23870 | Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, | ||||
| 23871 | $rpending_logfile_message ); | ||||
| 23872 | my $tokenizer = Perl::Tidy::Tokenizer->new( | ||||
| 23873 | source_object => $source_object, | ||||
| 23874 | logger_object => $logger_object, | ||||
| 23875 | starting_line_number => $input_line_number, | ||||
| 23876 | ); | ||||
| 23877 | |||||
| 23878 | # scan the replacement text | ||||
| 23879 | 1 while ( $tokenizer->get_line() ); | ||||
| 23880 | |||||
| 23881 | # remove any here doc targets | ||||
| 23882 | my $rht = undef; | ||||
| 23883 | if ( $tokenizer_self->{_in_here_doc} ) { | ||||
| 23884 | $rht = []; | ||||
| 23885 | push @{$rht}, | ||||
| 23886 | [ | ||||
| 23887 | $tokenizer_self->{_here_doc_target}, | ||||
| 23888 | $tokenizer_self->{_here_quote_character} | ||||
| 23889 | ]; | ||||
| 23890 | if ( $tokenizer_self->{_rhere_target_list} ) { | ||||
| 23891 | push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; | ||||
| 23892 | $tokenizer_self->{_rhere_target_list} = undef; | ||||
| 23893 | } | ||||
| 23894 | $tokenizer_self->{_in_here_doc} = undef; | ||||
| 23895 | } | ||||
| 23896 | |||||
| 23897 | # now its safe to report errors | ||||
| 23898 | $tokenizer->report_tokenization_errors(); | ||||
| 23899 | |||||
| 23900 | # restore all tokenizer lexical variables | ||||
| 23901 | restore_tokenizer_state($rstate); | ||||
| 23902 | |||||
| 23903 | # return the here doc targets | ||||
| 23904 | return $rht; | ||||
| 23905 | } | ||||
| 23906 | |||||
| 23907 | sub scan_bare_identifier { | ||||
| 23908 | ( $i, $tok, $type, $prototype ) = | ||||
| 23909 | scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, | ||||
| 23910 | $rtoken_map, $max_token_index ); | ||||
| 23911 | } | ||||
| 23912 | |||||
| 23913 | sub scan_identifier { | ||||
| 23914 | ( $i, $tok, $type, $id_scan_state, $identifier ) = | ||||
| 23915 | scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, | ||||
| 23916 | $max_token_index, $expecting ); | ||||
| 23917 | } | ||||
| 23918 | |||||
| 23919 | sub scan_id { | ||||
| 23920 | ( $i, $tok, $type, $id_scan_state ) = | ||||
| 23921 | scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, | ||||
| 23922 | $id_scan_state, $max_token_index ); | ||||
| 23923 | } | ||||
| 23924 | |||||
| 23925 | sub scan_number { | ||||
| 23926 | my $number; | ||||
| 23927 | ( $i, $type, $number ) = | ||||
| 23928 | scan_number_do( $input_line, $i, $rtoken_map, $type, | ||||
| 23929 | $max_token_index ); | ||||
| 23930 | return $number; | ||||
| 23931 | } | ||||
| 23932 | |||||
| 23933 | # a sub to warn if token found where term expected | ||||
| 23934 | sub error_if_expecting_TERM { | ||||
| 23935 | if ( $expecting == TERM ) { | ||||
| 23936 | if ( $really_want_term{$last_nonblank_type} ) { | ||||
| 23937 | unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map, | ||||
| 23938 | $rtoken_type, $input_line ); | ||||
| 23939 | 1; | ||||
| 23940 | } | ||||
| 23941 | } | ||||
| 23942 | } | ||||
| 23943 | |||||
| 23944 | # a sub to warn if token found where operator expected | ||||
| 23945 | sub error_if_expecting_OPERATOR { | ||||
| 23946 | if ( $expecting == OPERATOR ) { | ||||
| 23947 | my $thing = defined $_[0] ? $_[0] : $tok; | ||||
| 23948 | unexpected( $thing, "operator", $i_tok, $last_nonblank_i, | ||||
| 23949 | $rtoken_map, $rtoken_type, $input_line ); | ||||
| 23950 | if ( $i_tok == 0 ) { | ||||
| 23951 | interrupt_logfile(); | ||||
| 23952 | warning("Missing ';' above?\n"); | ||||
| 23953 | resume_logfile(); | ||||
| 23954 | } | ||||
| 23955 | 1; | ||||
| 23956 | } | ||||
| 23957 | } | ||||
| 23958 | |||||
| 23959 | # ------------------------------------------------------------ | ||||
| 23960 | # end scanner interfaces | ||||
| 23961 | # ------------------------------------------------------------ | ||||
| 23962 | |||||
| 23963 | my %is_for_foreach; | ||||
| 23964 | 1 | 900ns | @_ = qw(for foreach); | ||
| 23965 | 1 | 2µs | @is_for_foreach{@_} = (1) x scalar(@_); | ||
| 23966 | |||||
| 23967 | 1 | 0s | my %is_my_our; | ||
| 23968 | 1 | 1µs | @_ = qw(my our); | ||
| 23969 | 1 | 1µs | @is_my_our{@_} = (1) x scalar(@_); | ||
| 23970 | |||||
| 23971 | # These keywords may introduce blocks after parenthesized expressions, | ||||
| 23972 | # in the form: | ||||
| 23973 | # keyword ( .... ) { BLOCK } | ||||
| 23974 | # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' | ||||
| 23975 | 1 | 0s | my %is_blocktype_with_paren; | ||
| 23976 | 1 | 3µs | @_ = qw(if elsif unless while until for foreach switch case given when); | ||
| 23977 | 1 | 3µs | @is_blocktype_with_paren{@_} = (1) x scalar(@_); | ||
| 23978 | |||||
| 23979 | # ------------------------------------------------------------ | ||||
| 23980 | # begin hash of code for handling most token types | ||||
| 23981 | # ------------------------------------------------------------ | ||||
| 23982 | my $tokenization_code = { | ||||
| 23983 | |||||
| 23984 | # no special code for these types yet, but syntax checks | ||||
| 23985 | # could be added | ||||
| 23986 | |||||
| 23987 | ## '!' => undef, | ||||
| 23988 | ## '!=' => undef, | ||||
| 23989 | ## '!~' => undef, | ||||
| 23990 | ## '%=' => undef, | ||||
| 23991 | ## '&&=' => undef, | ||||
| 23992 | ## '&=' => undef, | ||||
| 23993 | ## '+=' => undef, | ||||
| 23994 | ## '-=' => undef, | ||||
| 23995 | ## '..' => undef, | ||||
| 23996 | ## '..' => undef, | ||||
| 23997 | ## '...' => undef, | ||||
| 23998 | ## '.=' => undef, | ||||
| 23999 | ## '<<=' => undef, | ||||
| 24000 | ## '<=' => undef, | ||||
| 24001 | ## '<=>' => undef, | ||||
| 24002 | ## '<>' => undef, | ||||
| 24003 | ## '=' => undef, | ||||
| 24004 | ## '==' => undef, | ||||
| 24005 | ## '=~' => undef, | ||||
| 24006 | ## '>=' => undef, | ||||
| 24007 | ## '>>' => undef, | ||||
| 24008 | ## '>>=' => undef, | ||||
| 24009 | ## '\\' => undef, | ||||
| 24010 | ## '^=' => undef, | ||||
| 24011 | ## '|=' => undef, | ||||
| 24012 | ## '||=' => undef, | ||||
| 24013 | ## '//=' => undef, | ||||
| 24014 | ## '~' => undef, | ||||
| 24015 | ## '~~' => undef, | ||||
| 24016 | ## '!~~' => undef, | ||||
| 24017 | |||||
| 24018 | '>' => sub { | ||||
| 24019 | error_if_expecting_TERM() | ||||
| 24020 | if ( $expecting == TERM ); | ||||
| 24021 | }, | ||||
| 24022 | '|' => sub { | ||||
| 24023 | error_if_expecting_TERM() | ||||
| 24024 | if ( $expecting == TERM ); | ||||
| 24025 | }, | ||||
| 24026 | '$' => sub { | ||||
| 24027 | |||||
| 24028 | # start looking for a scalar | ||||
| 24029 | error_if_expecting_OPERATOR("Scalar") | ||||
| 24030 | if ( $expecting == OPERATOR ); | ||||
| 24031 | scan_identifier(); | ||||
| 24032 | |||||
| 24033 | if ( $identifier eq '$^W' ) { | ||||
| 24034 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
| 24035 | } | ||||
| 24036 | |||||
| 24037 | # Check for identifier in indirect object slot | ||||
| 24038 | # (vorboard.pl, sort.t). Something like: | ||||
| 24039 | # /^(print|printf|sort|exec|system)$/ | ||||
| 24040 | if ( | ||||
| 24041 | $is_indirect_object_taker{$last_nonblank_token} | ||||
| 24042 | |||||
| 24043 | || ( ( $last_nonblank_token eq '(' ) | ||||
| 24044 | && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) | ||||
| 24045 | || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object | ||||
| 24046 | ) | ||||
| 24047 | { | ||||
| 24048 | $type = 'Z'; | ||||
| 24049 | } | ||||
| 24050 | }, | ||||
| 24051 | '(' => sub { | ||||
| 24052 | |||||
| 24053 | ++$paren_depth; | ||||
| 24054 | $paren_semicolon_count[$paren_depth] = 0; | ||||
| 24055 | if ($want_paren) { | ||||
| 24056 | $container_type = $want_paren; | ||||
| 24057 | $want_paren = ""; | ||||
| 24058 | } | ||||
| 24059 | else { | ||||
| 24060 | $container_type = $last_nonblank_token; | ||||
| 24061 | |||||
| 24062 | # We can check for a syntax error here of unexpected '(', | ||||
| 24063 | # but this is going to get messy... | ||||
| 24064 | if ( | ||||
| 24065 | $expecting == OPERATOR | ||||
| 24066 | |||||
| 24067 | # be sure this is not a method call of the form | ||||
| 24068 | # &method(...), $method->(..), &{method}(...), | ||||
| 24069 | # $ref[2](list) is ok & short for $ref[2]->(list) | ||||
| 24070 | # NOTE: at present, braces in something like &{ xxx } | ||||
| 24071 | # are not marked as a block, we might have a method call | ||||
| 24072 | && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ | ||||
| 24073 | |||||
| 24074 | ) | ||||
| 24075 | { | ||||
| 24076 | |||||
| 24077 | # ref: camel 3 p 703. | ||||
| 24078 | if ( $last_last_nonblank_token eq 'do' ) { | ||||
| 24079 | complain( | ||||
| 24080 | "do SUBROUTINE is deprecated; consider & or -> notation\n" | ||||
| 24081 | ); | ||||
| 24082 | } | ||||
| 24083 | else { | ||||
| 24084 | |||||
| 24085 | # if this is an empty list, (), then it is not an | ||||
| 24086 | # error; for example, we might have a constant pi and | ||||
| 24087 | # invoke it with pi() or just pi; | ||||
| 24088 | my ( $next_nonblank_token, $i_next ) = | ||||
| 24089 | find_next_nonblank_token( $i, $rtokens, | ||||
| 24090 | $max_token_index ); | ||||
| 24091 | if ( $next_nonblank_token ne ')' ) { | ||||
| 24092 | my $hint; | ||||
| 24093 | error_if_expecting_OPERATOR('('); | ||||
| 24094 | |||||
| 24095 | if ( $last_nonblank_type eq 'C' ) { | ||||
| 24096 | $hint = | ||||
| 24097 | "$last_nonblank_token has a void prototype\n"; | ||||
| 24098 | } | ||||
| 24099 | elsif ( $last_nonblank_type eq 'i' ) { | ||||
| 24100 | if ( $i_tok > 0 | ||||
| 24101 | && $last_nonblank_token =~ /^\$/ ) | ||||
| 24102 | { | ||||
| 24103 | $hint = | ||||
| 24104 | "Do you mean '$last_nonblank_token->(' ?\n"; | ||||
| 24105 | } | ||||
| 24106 | } | ||||
| 24107 | if ($hint) { | ||||
| 24108 | interrupt_logfile(); | ||||
| 24109 | warning($hint); | ||||
| 24110 | resume_logfile(); | ||||
| 24111 | } | ||||
| 24112 | } ## end if ( $next_nonblank_token... | ||||
| 24113 | } ## end else [ if ( $last_last_nonblank_token... | ||||
| 24114 | } ## end if ( $expecting == OPERATOR... | ||||
| 24115 | } | ||||
| 24116 | $paren_type[$paren_depth] = $container_type; | ||||
| 24117 | ( $type_sequence, $indent_flag ) = | ||||
| 24118 | increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); | ||||
| 24119 | |||||
| 24120 | # propagate types down through nested parens | ||||
| 24121 | # for example: the second paren in 'if ((' would be structural | ||||
| 24122 | # since the first is. | ||||
| 24123 | |||||
| 24124 | if ( $last_nonblank_token eq '(' ) { | ||||
| 24125 | $type = $last_nonblank_type; | ||||
| 24126 | } | ||||
| 24127 | |||||
| 24128 | # We exclude parens as structural after a ',' because it | ||||
| 24129 | # causes subtle problems with continuation indentation for | ||||
| 24130 | # something like this, where the first 'or' will not get | ||||
| 24131 | # indented. | ||||
| 24132 | # | ||||
| 24133 | # assert( | ||||
| 24134 | # __LINE__, | ||||
| 24135 | # ( not defined $check ) | ||||
| 24136 | # or ref $check | ||||
| 24137 | # or $check eq "new" | ||||
| 24138 | # or $check eq "old", | ||||
| 24139 | # ); | ||||
| 24140 | # | ||||
| 24141 | # Likewise, we exclude parens where a statement can start | ||||
| 24142 | # because of problems with continuation indentation, like | ||||
| 24143 | # these: | ||||
| 24144 | # | ||||
| 24145 | # ($firstline =~ /^#\!.*perl/) | ||||
| 24146 | # and (print $File::Find::name, "\n") | ||||
| 24147 | # and (return 1); | ||||
| 24148 | # | ||||
| 24149 | # (ref($usage_fref) =~ /CODE/) | ||||
| 24150 | # ? &$usage_fref | ||||
| 24151 | # : (&blast_usage, &blast_params, &blast_general_params); | ||||
| 24152 | |||||
| 24153 | else { | ||||
| 24154 | $type = '{'; | ||||
| 24155 | } | ||||
| 24156 | |||||
| 24157 | if ( $last_nonblank_type eq ')' ) { | ||||
| 24158 | warning( | ||||
| 24159 | "Syntax error? found token '$last_nonblank_type' then '('\n" | ||||
| 24160 | ); | ||||
| 24161 | } | ||||
| 24162 | $paren_structural_type[$paren_depth] = $type; | ||||
| 24163 | |||||
| 24164 | }, | ||||
| 24165 | ')' => sub { | ||||
| 24166 | ( $type_sequence, $indent_flag ) = | ||||
| 24167 | decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); | ||||
| 24168 | |||||
| 24169 | if ( $paren_structural_type[$paren_depth] eq '{' ) { | ||||
| 24170 | $type = '}'; | ||||
| 24171 | } | ||||
| 24172 | |||||
| 24173 | $container_type = $paren_type[$paren_depth]; | ||||
| 24174 | |||||
| 24175 | # /^(for|foreach)$/ | ||||
| 24176 | if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { | ||||
| 24177 | my $num_sc = $paren_semicolon_count[$paren_depth]; | ||||
| 24178 | if ( $num_sc > 0 && $num_sc != 2 ) { | ||||
| 24179 | warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); | ||||
| 24180 | } | ||||
| 24181 | } | ||||
| 24182 | |||||
| 24183 | if ( $paren_depth > 0 ) { $paren_depth-- } | ||||
| 24184 | }, | ||||
| 24185 | ',' => sub { | ||||
| 24186 | if ( $last_nonblank_type eq ',' ) { | ||||
| 24187 | complain("Repeated ','s \n"); | ||||
| 24188 | } | ||||
| 24189 | |||||
| 24190 | # patch for operator_expected: note if we are in the list (use.t) | ||||
| 24191 | if ( $statement_type eq 'use' ) { $statement_type = '_use' } | ||||
| 24192 | ## FIXME: need to move this elsewhere, perhaps check after a '(' | ||||
| 24193 | ## elsif ($last_nonblank_token eq '(') { | ||||
| 24194 | ## warning("Leading ','s illegal in some versions of perl\n"); | ||||
| 24195 | ## } | ||||
| 24196 | }, | ||||
| 24197 | ';' => sub { | ||||
| 24198 | $context = UNKNOWN_CONTEXT; | ||||
| 24199 | $statement_type = ''; | ||||
| 24200 | $want_paren = ""; | ||||
| 24201 | |||||
| 24202 | # /^(for|foreach)$/ | ||||
| 24203 | if ( $is_for_foreach{ $paren_type[$paren_depth] } ) | ||||
| 24204 | { # mark ; in for loop | ||||
| 24205 | |||||
| 24206 | # Be careful: we do not want a semicolon such as the | ||||
| 24207 | # following to be included: | ||||
| 24208 | # | ||||
| 24209 | # for (sort {strcoll($a,$b);} keys %investments) { | ||||
| 24210 | |||||
| 24211 | if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] | ||||
| 24212 | && $square_bracket_depth == | ||||
| 24213 | $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) | ||||
| 24214 | { | ||||
| 24215 | |||||
| 24216 | $type = 'f'; | ||||
| 24217 | $paren_semicolon_count[$paren_depth]++; | ||||
| 24218 | } | ||||
| 24219 | } | ||||
| 24220 | |||||
| 24221 | }, | ||||
| 24222 | '"' => sub { | ||||
| 24223 | error_if_expecting_OPERATOR("String") | ||||
| 24224 | if ( $expecting == OPERATOR ); | ||||
| 24225 | $in_quote = 1; | ||||
| 24226 | $type = 'Q'; | ||||
| 24227 | $allowed_quote_modifiers = ""; | ||||
| 24228 | }, | ||||
| 24229 | "'" => sub { | ||||
| 24230 | error_if_expecting_OPERATOR("String") | ||||
| 24231 | if ( $expecting == OPERATOR ); | ||||
| 24232 | $in_quote = 1; | ||||
| 24233 | $type = 'Q'; | ||||
| 24234 | $allowed_quote_modifiers = ""; | ||||
| 24235 | }, | ||||
| 24236 | '`' => sub { | ||||
| 24237 | error_if_expecting_OPERATOR("String") | ||||
| 24238 | if ( $expecting == OPERATOR ); | ||||
| 24239 | $in_quote = 1; | ||||
| 24240 | $type = 'Q'; | ||||
| 24241 | $allowed_quote_modifiers = ""; | ||||
| 24242 | }, | ||||
| 24243 | '/' => sub { | ||||
| 24244 | my $is_pattern; | ||||
| 24245 | |||||
| 24246 | if ( $expecting == UNKNOWN ) { # indeterminate, must guess.. | ||||
| 24247 | my $msg; | ||||
| 24248 | ( $is_pattern, $msg ) = | ||||
| 24249 | guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, | ||||
| 24250 | $max_token_index ); | ||||
| 24251 | |||||
| 24252 | if ($msg) { | ||||
| 24253 | write_diagnostics("DIVIDE:$msg\n"); | ||||
| 24254 | write_logfile_entry($msg); | ||||
| 24255 | } | ||||
| 24256 | } | ||||
| 24257 | else { $is_pattern = ( $expecting == TERM ) } | ||||
| 24258 | |||||
| 24259 | if ($is_pattern) { | ||||
| 24260 | $in_quote = 1; | ||||
| 24261 | $type = 'Q'; | ||||
| 24262 | $allowed_quote_modifiers = '[msixpodualgc]'; | ||||
| 24263 | } | ||||
| 24264 | else { # not a pattern; check for a /= token | ||||
| 24265 | |||||
| 24266 | if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= | ||||
| 24267 | $i++; | ||||
| 24268 | $tok = '/='; | ||||
| 24269 | $type = $tok; | ||||
| 24270 | } | ||||
| 24271 | |||||
| 24272 | #DEBUG - collecting info on what tokens follow a divide | ||||
| 24273 | # for development of guessing algorithm | ||||
| 24274 | #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { | ||||
| 24275 | # #write_diagnostics( "DIVIDE? $input_line\n" ); | ||||
| 24276 | #} | ||||
| 24277 | } | ||||
| 24278 | }, | ||||
| 24279 | '{' => sub { | ||||
| 24280 | |||||
| 24281 | # if we just saw a ')', we will label this block with | ||||
| 24282 | # its type. We need to do this to allow sub | ||||
| 24283 | # code_block_type to determine if this brace starts a | ||||
| 24284 | # code block or anonymous hash. (The type of a paren | ||||
| 24285 | # pair is the preceding token, such as 'if', 'else', | ||||
| 24286 | # etc). | ||||
| 24287 | $container_type = ""; | ||||
| 24288 | |||||
| 24289 | # ATTRS: for a '{' following an attribute list, reset | ||||
| 24290 | # things to look like we just saw the sub name | ||||
| 24291 | if ( $statement_type =~ /^sub/ ) { | ||||
| 24292 | $last_nonblank_token = $statement_type; | ||||
| 24293 | $last_nonblank_type = 'i'; | ||||
| 24294 | $statement_type = ""; | ||||
| 24295 | } | ||||
| 24296 | |||||
| 24297 | # patch for SWITCH/CASE: hide these keywords from an immediately | ||||
| 24298 | # following opening brace | ||||
| 24299 | elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) | ||||
| 24300 | && $statement_type eq $last_nonblank_token ) | ||||
| 24301 | { | ||||
| 24302 | $last_nonblank_token = ";"; | ||||
| 24303 | } | ||||
| 24304 | |||||
| 24305 | elsif ( $last_nonblank_token eq ')' ) { | ||||
| 24306 | $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; | ||||
| 24307 | |||||
| 24308 | # defensive move in case of a nesting error (pbug.t) | ||||
| 24309 | # in which this ')' had no previous '(' | ||||
| 24310 | # this nesting error will have been caught | ||||
| 24311 | if ( !defined($last_nonblank_token) ) { | ||||
| 24312 | $last_nonblank_token = 'if'; | ||||
| 24313 | } | ||||
| 24314 | |||||
| 24315 | # check for syntax error here; | ||||
| 24316 | unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { | ||||
| 24317 | my $list = join( ' ', sort keys %is_blocktype_with_paren ); | ||||
| 24318 | warning( | ||||
| 24319 | "syntax error at ') {', didn't see one of: $list\n"); | ||||
| 24320 | } | ||||
| 24321 | } | ||||
| 24322 | |||||
| 24323 | # patch for paren-less for/foreach glitch, part 2. | ||||
| 24324 | # see note below under 'qw' | ||||
| 24325 | elsif ($last_nonblank_token eq 'qw' | ||||
| 24326 | && $is_for_foreach{$want_paren} ) | ||||
| 24327 | { | ||||
| 24328 | $last_nonblank_token = $want_paren; | ||||
| 24329 | if ( $last_last_nonblank_token eq $want_paren ) { | ||||
| 24330 | warning( | ||||
| 24331 | "syntax error at '$want_paren .. {' -- missing \$ loop variable\n" | ||||
| 24332 | ); | ||||
| 24333 | |||||
| 24334 | } | ||||
| 24335 | $want_paren = ""; | ||||
| 24336 | } | ||||
| 24337 | |||||
| 24338 | # now identify which of the three possible types of | ||||
| 24339 | # curly braces we have: hash index container, anonymous | ||||
| 24340 | # hash reference, or code block. | ||||
| 24341 | |||||
| 24342 | # non-structural (hash index) curly brace pair | ||||
| 24343 | # get marked 'L' and 'R' | ||||
| 24344 | if ( is_non_structural_brace() ) { | ||||
| 24345 | $type = 'L'; | ||||
| 24346 | |||||
| 24347 | # patch for SWITCH/CASE: | ||||
| 24348 | # allow paren-less identifier after 'when' | ||||
| 24349 | # if the brace is preceded by a space | ||||
| 24350 | if ( $statement_type eq 'when' | ||||
| 24351 | && $last_nonblank_type eq 'i' | ||||
| 24352 | && $last_last_nonblank_type eq 'k' | ||||
| 24353 | && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) | ||||
| 24354 | { | ||||
| 24355 | $type = '{'; | ||||
| 24356 | $block_type = $statement_type; | ||||
| 24357 | } | ||||
| 24358 | } | ||||
| 24359 | |||||
| 24360 | # code and anonymous hash have the same type, '{', but are | ||||
| 24361 | # distinguished by 'block_type', | ||||
| 24362 | # which will be blank for an anonymous hash | ||||
| 24363 | else { | ||||
| 24364 | |||||
| 24365 | $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, | ||||
| 24366 | $max_token_index ); | ||||
| 24367 | |||||
| 24368 | # remember a preceding smartmatch operator | ||||
| 24369 | ## SMARTMATCH | ||||
| 24370 | ##if ( $last_nonblank_type eq '~~' ) { | ||||
| 24371 | ## $block_type = $last_nonblank_type; | ||||
| 24372 | ##} | ||||
| 24373 | |||||
| 24374 | # patch to promote bareword type to function taking block | ||||
| 24375 | if ( $block_type | ||||
| 24376 | && $last_nonblank_type eq 'w' | ||||
| 24377 | && $last_nonblank_i >= 0 ) | ||||
| 24378 | { | ||||
| 24379 | if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { | ||||
| 24380 | $routput_token_type->[$last_nonblank_i] = 'G'; | ||||
| 24381 | } | ||||
| 24382 | } | ||||
| 24383 | |||||
| 24384 | # patch for SWITCH/CASE: if we find a stray opening block brace | ||||
| 24385 | # where we might accept a 'case' or 'when' block, then take it | ||||
| 24386 | if ( $statement_type eq 'case' | ||||
| 24387 | || $statement_type eq 'when' ) | ||||
| 24388 | { | ||||
| 24389 | if ( !$block_type || $block_type eq '}' ) { | ||||
| 24390 | $block_type = $statement_type; | ||||
| 24391 | } | ||||
| 24392 | } | ||||
| 24393 | } | ||||
| 24394 | $brace_type[ ++$brace_depth ] = $block_type; | ||||
| 24395 | $brace_package[$brace_depth] = $current_package; | ||||
| 24396 | $brace_structural_type[$brace_depth] = $type; | ||||
| 24397 | $brace_context[$brace_depth] = $context; | ||||
| 24398 | ( $type_sequence, $indent_flag ) = | ||||
| 24399 | increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); | ||||
| 24400 | }, | ||||
| 24401 | '}' => sub { | ||||
| 24402 | $block_type = $brace_type[$brace_depth]; | ||||
| 24403 | if ($block_type) { $statement_type = '' } | ||||
| 24404 | if ( defined( $brace_package[$brace_depth] ) ) { | ||||
| 24405 | $current_package = $brace_package[$brace_depth]; | ||||
| 24406 | } | ||||
| 24407 | |||||
| 24408 | # can happen on brace error (caught elsewhere) | ||||
| 24409 | else { | ||||
| 24410 | } | ||||
| 24411 | ( $type_sequence, $indent_flag ) = | ||||
| 24412 | decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); | ||||
| 24413 | |||||
| 24414 | if ( $brace_structural_type[$brace_depth] eq 'L' ) { | ||||
| 24415 | $type = 'R'; | ||||
| 24416 | } | ||||
| 24417 | |||||
| 24418 | # propagate type information for 'do' and 'eval' blocks, and also | ||||
| 24419 | # for smartmatch operator. This is necessary to enable us to know | ||||
| 24420 | # if an operator or term is expected next. | ||||
| 24421 | ## SMARTMATCH | ||||
| 24422 | ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) { | ||||
| 24423 | if ( $is_block_operator{$block_type} ) { | ||||
| 24424 | $tok = $block_type; | ||||
| 24425 | } | ||||
| 24426 | |||||
| 24427 | $context = $brace_context[$brace_depth]; | ||||
| 24428 | if ( $brace_depth > 0 ) { $brace_depth--; } | ||||
| 24429 | }, | ||||
| 24430 | '&' => sub { # maybe sub call? start looking | ||||
| 24431 | |||||
| 24432 | # We have to check for sub call unless we are sure we | ||||
| 24433 | # are expecting an operator. This example from s2p | ||||
| 24434 | # got mistaken as a q operator in an early version: | ||||
| 24435 | # print BODY &q(<<'EOT'); | ||||
| 24436 | if ( $expecting != OPERATOR ) { | ||||
| 24437 | |||||
| 24438 | # But only look for a sub call if we are expecting a term or | ||||
| 24439 | # if there is no existing space after the &. | ||||
| 24440 | # For example we probably don't want & as sub call here: | ||||
| 24441 | # Fcntl::S_IRUSR & $mode; | ||||
| 24442 | if ( $expecting == TERM || $next_type ne 'b' ) { | ||||
| 24443 | scan_identifier(); | ||||
| 24444 | } | ||||
| 24445 | } | ||||
| 24446 | else { | ||||
| 24447 | } | ||||
| 24448 | }, | ||||
| 24449 | '<' => sub { # angle operator or less than? | ||||
| 24450 | |||||
| 24451 | if ( $expecting != OPERATOR ) { | ||||
| 24452 | ( $i, $type ) = | ||||
| 24453 | find_angle_operator_termination( $input_line, $i, $rtoken_map, | ||||
| 24454 | $expecting, $max_token_index ); | ||||
| 24455 | |||||
| 24456 | if ( $type eq '<' && $expecting == TERM ) { | ||||
| 24457 | error_if_expecting_TERM(); | ||||
| 24458 | interrupt_logfile(); | ||||
| 24459 | warning("Unterminated <> operator?\n"); | ||||
| 24460 | resume_logfile(); | ||||
| 24461 | } | ||||
| 24462 | } | ||||
| 24463 | else { | ||||
| 24464 | } | ||||
| 24465 | }, | ||||
| 24466 | '?' => sub { # ?: conditional or starting pattern? | ||||
| 24467 | |||||
| 24468 | my $is_pattern; | ||||
| 24469 | |||||
| 24470 | if ( $expecting == UNKNOWN ) { | ||||
| 24471 | |||||
| 24472 | my $msg; | ||||
| 24473 | ( $is_pattern, $msg ) = | ||||
| 24474 | guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, | ||||
| 24475 | $max_token_index ); | ||||
| 24476 | |||||
| 24477 | if ($msg) { write_logfile_entry($msg) } | ||||
| 24478 | } | ||||
| 24479 | else { $is_pattern = ( $expecting == TERM ) } | ||||
| 24480 | |||||
| 24481 | if ($is_pattern) { | ||||
| 24482 | $in_quote = 1; | ||||
| 24483 | $type = 'Q'; | ||||
| 24484 | $allowed_quote_modifiers = '[msixpodualgc]'; | ||||
| 24485 | } | ||||
| 24486 | else { | ||||
| 24487 | ( $type_sequence, $indent_flag ) = | ||||
| 24488 | increase_nesting_depth( QUESTION_COLON, | ||||
| 24489 | $$rtoken_map[$i_tok] ); | ||||
| 24490 | } | ||||
| 24491 | }, | ||||
| 24492 | '*' => sub { # typeglob, or multiply? | ||||
| 24493 | |||||
| 24494 | if ( $expecting == TERM ) { | ||||
| 24495 | scan_identifier(); | ||||
| 24496 | } | ||||
| 24497 | else { | ||||
| 24498 | |||||
| 24499 | if ( $$rtokens[ $i + 1 ] eq '=' ) { | ||||
| 24500 | $tok = '*='; | ||||
| 24501 | $type = $tok; | ||||
| 24502 | $i++; | ||||
| 24503 | } | ||||
| 24504 | elsif ( $$rtokens[ $i + 1 ] eq '*' ) { | ||||
| 24505 | $tok = '**'; | ||||
| 24506 | $type = $tok; | ||||
| 24507 | $i++; | ||||
| 24508 | if ( $$rtokens[ $i + 1 ] eq '=' ) { | ||||
| 24509 | $tok = '**='; | ||||
| 24510 | $type = $tok; | ||||
| 24511 | $i++; | ||||
| 24512 | } | ||||
| 24513 | } | ||||
| 24514 | } | ||||
| 24515 | }, | ||||
| 24516 | '.' => sub { # what kind of . ? | ||||
| 24517 | |||||
| 24518 | if ( $expecting != OPERATOR ) { | ||||
| 24519 | scan_number(); | ||||
| 24520 | if ( $type eq '.' ) { | ||||
| 24521 | error_if_expecting_TERM() | ||||
| 24522 | if ( $expecting == TERM ); | ||||
| 24523 | } | ||||
| 24524 | } | ||||
| 24525 | else { | ||||
| 24526 | } | ||||
| 24527 | }, | ||||
| 24528 | ':' => sub { | ||||
| 24529 | |||||
| 24530 | # if this is the first nonblank character, call it a label | ||||
| 24531 | # since perl seems to just swallow it | ||||
| 24532 | if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { | ||||
| 24533 | $type = 'J'; | ||||
| 24534 | } | ||||
| 24535 | |||||
| 24536 | # ATTRS: check for a ':' which introduces an attribute list | ||||
| 24537 | # (this might eventually get its own token type) | ||||
| 24538 | elsif ( $statement_type =~ /^sub/ ) { | ||||
| 24539 | $type = 'A'; | ||||
| 24540 | $in_attribute_list = 1; | ||||
| 24541 | } | ||||
| 24542 | |||||
| 24543 | # check for scalar attribute, such as | ||||
| 24544 | # my $foo : shared = 1; | ||||
| 24545 | elsif ($is_my_our{$statement_type} | ||||
| 24546 | && $current_depth[QUESTION_COLON] == 0 ) | ||||
| 24547 | { | ||||
| 24548 | $type = 'A'; | ||||
| 24549 | $in_attribute_list = 1; | ||||
| 24550 | } | ||||
| 24551 | |||||
| 24552 | # otherwise, it should be part of a ?/: operator | ||||
| 24553 | else { | ||||
| 24554 | ( $type_sequence, $indent_flag ) = | ||||
| 24555 | decrease_nesting_depth( QUESTION_COLON, | ||||
| 24556 | $$rtoken_map[$i_tok] ); | ||||
| 24557 | if ( $last_nonblank_token eq '?' ) { | ||||
| 24558 | warning("Syntax error near ? :\n"); | ||||
| 24559 | } | ||||
| 24560 | } | ||||
| 24561 | }, | ||||
| 24562 | '+' => sub { # what kind of plus? | ||||
| 24563 | |||||
| 24564 | if ( $expecting == TERM ) { | ||||
| 24565 | my $number = scan_number(); | ||||
| 24566 | |||||
| 24567 | # unary plus is safest assumption if not a number | ||||
| 24568 | if ( !defined($number) ) { $type = 'p'; } | ||||
| 24569 | } | ||||
| 24570 | elsif ( $expecting == OPERATOR ) { | ||||
| 24571 | } | ||||
| 24572 | else { | ||||
| 24573 | if ( $next_type eq 'w' ) { $type = 'p' } | ||||
| 24574 | } | ||||
| 24575 | }, | ||||
| 24576 | '@' => sub { | ||||
| 24577 | |||||
| 24578 | error_if_expecting_OPERATOR("Array") | ||||
| 24579 | if ( $expecting == OPERATOR ); | ||||
| 24580 | scan_identifier(); | ||||
| 24581 | }, | ||||
| 24582 | '%' => sub { # hash or modulo? | ||||
| 24583 | |||||
| 24584 | # first guess is hash if no following blank | ||||
| 24585 | if ( $expecting == UNKNOWN ) { | ||||
| 24586 | if ( $next_type ne 'b' ) { $expecting = TERM } | ||||
| 24587 | } | ||||
| 24588 | if ( $expecting == TERM ) { | ||||
| 24589 | scan_identifier(); | ||||
| 24590 | } | ||||
| 24591 | }, | ||||
| 24592 | '[' => sub { | ||||
| 24593 | $square_bracket_type[ ++$square_bracket_depth ] = | ||||
| 24594 | $last_nonblank_token; | ||||
| 24595 | ( $type_sequence, $indent_flag ) = | ||||
| 24596 | increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); | ||||
| 24597 | |||||
| 24598 | # It may seem odd, but structural square brackets have | ||||
| 24599 | # type '{' and '}'. This simplifies the indentation logic. | ||||
| 24600 | if ( !is_non_structural_brace() ) { | ||||
| 24601 | $type = '{'; | ||||
| 24602 | } | ||||
| 24603 | $square_bracket_structural_type[$square_bracket_depth] = $type; | ||||
| 24604 | }, | ||||
| 24605 | ']' => sub { | ||||
| 24606 | ( $type_sequence, $indent_flag ) = | ||||
| 24607 | decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); | ||||
| 24608 | |||||
| 24609 | if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) | ||||
| 24610 | { | ||||
| 24611 | $type = '}'; | ||||
| 24612 | } | ||||
| 24613 | |||||
| 24614 | # propagate type information for smartmatch operator. This is | ||||
| 24615 | # necessary to enable us to know if an operator or term is expected | ||||
| 24616 | # next. | ||||
| 24617 | if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { | ||||
| 24618 | $tok = $square_bracket_type[$square_bracket_depth]; | ||||
| 24619 | } | ||||
| 24620 | |||||
| 24621 | if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } | ||||
| 24622 | }, | ||||
| 24623 | '-' => sub { # what kind of minus? | ||||
| 24624 | |||||
| 24625 | if ( ( $expecting != OPERATOR ) | ||||
| 24626 | && $is_file_test_operator{$next_tok} ) | ||||
| 24627 | { | ||||
| 24628 | my ( $next_nonblank_token, $i_next ) = | ||||
| 24629 | find_next_nonblank_token( $i + 1, $rtokens, | ||||
| 24630 | $max_token_index ); | ||||
| 24631 | |||||
| 24632 | # check for a quoted word like "-w=>xx"; | ||||
| 24633 | # it is sufficient to just check for a following '=' | ||||
| 24634 | if ( $next_nonblank_token eq '=' ) { | ||||
| 24635 | $type = 'm'; | ||||
| 24636 | } | ||||
| 24637 | else { | ||||
| 24638 | $i++; | ||||
| 24639 | $tok .= $next_tok; | ||||
| 24640 | $type = 'F'; | ||||
| 24641 | } | ||||
| 24642 | } | ||||
| 24643 | elsif ( $expecting == TERM ) { | ||||
| 24644 | my $number = scan_number(); | ||||
| 24645 | |||||
| 24646 | # maybe part of bareword token? unary is safest | ||||
| 24647 | if ( !defined($number) ) { $type = 'm'; } | ||||
| 24648 | |||||
| 24649 | } | ||||
| 24650 | elsif ( $expecting == OPERATOR ) { | ||||
| 24651 | } | ||||
| 24652 | else { | ||||
| 24653 | |||||
| 24654 | if ( $next_type eq 'w' ) { | ||||
| 24655 | $type = 'm'; | ||||
| 24656 | } | ||||
| 24657 | } | ||||
| 24658 | }, | ||||
| 24659 | |||||
| 24660 | '^' => sub { | ||||
| 24661 | |||||
| 24662 | # check for special variables like ${^WARNING_BITS} | ||||
| 24663 | if ( $expecting == TERM ) { | ||||
| 24664 | |||||
| 24665 | # FIXME: this should work but will not catch errors | ||||
| 24666 | # because we also have to be sure that previous token is | ||||
| 24667 | # a type character ($,@,%). | ||||
| 24668 | if ( $last_nonblank_token eq '{' | ||||
| 24669 | && ( $next_tok =~ /^[A-Za-z_]/ ) ) | ||||
| 24670 | { | ||||
| 24671 | |||||
| 24672 | if ( $next_tok eq 'W' ) { | ||||
| 24673 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
| 24674 | } | ||||
| 24675 | $tok = $tok . $next_tok; | ||||
| 24676 | $i = $i + 1; | ||||
| 24677 | $type = 'w'; | ||||
| 24678 | } | ||||
| 24679 | |||||
| 24680 | else { | ||||
| 24681 | unless ( error_if_expecting_TERM() ) { | ||||
| 24682 | |||||
| 24683 | # Something like this is valid but strange: | ||||
| 24684 | # undef ^I; | ||||
| 24685 | complain("The '^' seems unusual here\n"); | ||||
| 24686 | } | ||||
| 24687 | } | ||||
| 24688 | } | ||||
| 24689 | }, | ||||
| 24690 | |||||
| 24691 | '::' => sub { # probably a sub call | ||||
| 24692 | scan_bare_identifier(); | ||||
| 24693 | }, | ||||
| 24694 | '<<' => sub { # maybe a here-doc? | ||||
| 24695 | return | ||||
| 24696 | unless ( $i < $max_token_index ) | ||||
| 24697 | ; # here-doc not possible if end of line | ||||
| 24698 | |||||
| 24699 | if ( $expecting != OPERATOR ) { | ||||
| 24700 | my ( $found_target, $here_doc_target, $here_quote_character, | ||||
| 24701 | $saw_error ); | ||||
| 24702 | ( | ||||
| 24703 | $found_target, $here_doc_target, $here_quote_character, $i, | ||||
| 24704 | $saw_error | ||||
| 24705 | ) | ||||
| 24706 | = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, | ||||
| 24707 | $max_token_index ); | ||||
| 24708 | |||||
| 24709 | if ($found_target) { | ||||
| 24710 | push @{$rhere_target_list}, | ||||
| 24711 | [ $here_doc_target, $here_quote_character ]; | ||||
| 24712 | $type = 'h'; | ||||
| 24713 | if ( length($here_doc_target) > 80 ) { | ||||
| 24714 | my $truncated = substr( $here_doc_target, 0, 80 ); | ||||
| 24715 | complain("Long here-target: '$truncated' ...\n"); | ||||
| 24716 | } | ||||
| 24717 | elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { | ||||
| 24718 | complain( | ||||
| 24719 | "Unconventional here-target: '$here_doc_target'\n" | ||||
| 24720 | ); | ||||
| 24721 | } | ||||
| 24722 | } | ||||
| 24723 | elsif ( $expecting == TERM ) { | ||||
| 24724 | unless ($saw_error) { | ||||
| 24725 | |||||
| 24726 | # shouldn't happen.. | ||||
| 24727 | warning("Program bug; didn't find here doc target\n"); | ||||
| 24728 | report_definite_bug(); | ||||
| 24729 | } | ||||
| 24730 | } | ||||
| 24731 | } | ||||
| 24732 | else { | ||||
| 24733 | } | ||||
| 24734 | }, | ||||
| 24735 | '->' => sub { | ||||
| 24736 | |||||
| 24737 | # if -> points to a bare word, we must scan for an identifier, | ||||
| 24738 | # otherwise something like ->y would look like the y operator | ||||
| 24739 | scan_identifier(); | ||||
| 24740 | }, | ||||
| 24741 | |||||
| 24742 | # type = 'pp' for pre-increment, '++' for post-increment | ||||
| 24743 | '++' => sub { | ||||
| 24744 | if ( $expecting == TERM ) { $type = 'pp' } | ||||
| 24745 | elsif ( $expecting == UNKNOWN ) { | ||||
| 24746 | my ( $next_nonblank_token, $i_next ) = | ||||
| 24747 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 24748 | if ( $next_nonblank_token eq '$' ) { $type = 'pp' } | ||||
| 24749 | } | ||||
| 24750 | }, | ||||
| 24751 | |||||
| 24752 | '=>' => sub { | ||||
| 24753 | if ( $last_nonblank_type eq $tok ) { | ||||
| 24754 | complain("Repeated '=>'s \n"); | ||||
| 24755 | } | ||||
| 24756 | |||||
| 24757 | # patch for operator_expected: note if we are in the list (use.t) | ||||
| 24758 | # TODO: make version numbers a new token type | ||||
| 24759 | if ( $statement_type eq 'use' ) { $statement_type = '_use' } | ||||
| 24760 | }, | ||||
| 24761 | |||||
| 24762 | # type = 'mm' for pre-decrement, '--' for post-decrement | ||||
| 24763 | '--' => sub { | ||||
| 24764 | |||||
| 24765 | if ( $expecting == TERM ) { $type = 'mm' } | ||||
| 24766 | elsif ( $expecting == UNKNOWN ) { | ||||
| 24767 | my ( $next_nonblank_token, $i_next ) = | ||||
| 24768 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 24769 | if ( $next_nonblank_token eq '$' ) { $type = 'mm' } | ||||
| 24770 | } | ||||
| 24771 | }, | ||||
| 24772 | |||||
| 24773 | '&&' => sub { | ||||
| 24774 | error_if_expecting_TERM() | ||||
| 24775 | if ( $expecting == TERM ); | ||||
| 24776 | }, | ||||
| 24777 | |||||
| 24778 | '||' => sub { | ||||
| 24779 | error_if_expecting_TERM() | ||||
| 24780 | if ( $expecting == TERM ); | ||||
| 24781 | }, | ||||
| 24782 | |||||
| 24783 | '//' => sub { | ||||
| 24784 | error_if_expecting_TERM() | ||||
| 24785 | if ( $expecting == TERM ); | ||||
| 24786 | }, | ||||
| 24787 | 1 | 46µs | }; | ||
| 24788 | |||||
| 24789 | # ------------------------------------------------------------ | ||||
| 24790 | # end hash of code for handling individual token types | ||||
| 24791 | # ------------------------------------------------------------ | ||||
| 24792 | |||||
| 24793 | 1 | 1µs | my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); | ||
| 24794 | |||||
| 24795 | # These block types terminate statements and do not need a trailing | ||||
| 24796 | # semicolon | ||||
| 24797 | # patched for SWITCH/CASE/ | ||||
| 24798 | 1 | 100ns | my %is_zero_continuation_block_type; | ||
| 24799 | 1 | 6µs | @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; | ||
| 24800 | if elsif else unless while until for foreach switch case given when); | ||||
| 24801 | 1 | 5µs | @is_zero_continuation_block_type{@_} = (1) x scalar(@_); | ||
| 24802 | |||||
| 24803 | 1 | 0s | my %is_not_zero_continuation_block_type; | ||
| 24804 | 1 | 3µs | @_ = qw(sort grep map do eval); | ||
| 24805 | 1 | 1µs | @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); | ||
| 24806 | |||||
| 24807 | 1 | 0s | my %is_logical_container; | ||
| 24808 | 1 | 3µs | @_ = qw(if elsif unless while and or err not && ! || for foreach); | ||
| 24809 | 1 | 3µs | @is_logical_container{@_} = (1) x scalar(@_); | ||
| 24810 | |||||
| 24811 | 1 | 100ns | my %is_binary_type; | ||
| 24812 | 1 | 2µs | @_ = qw(|| &&); | ||
| 24813 | 1 | 800ns | @is_binary_type{@_} = (1) x scalar(@_); | ||
| 24814 | |||||
| 24815 | 1 | 0s | my %is_binary_keyword; | ||
| 24816 | 1 | 1µs | @_ = qw(and or err eq ne cmp); | ||
| 24817 | 1 | 1µs | @is_binary_keyword{@_} = (1) x scalar(@_); | ||
| 24818 | |||||
| 24819 | # 'L' is token for opening { at hash key | ||||
| 24820 | 1 | 0s | my %is_opening_type; | ||
| 24821 | 1 | 1µs | @_ = qw" L { ( [ "; | ||
| 24822 | 1 | 900ns | @is_opening_type{@_} = (1) x scalar(@_); | ||
| 24823 | |||||
| 24824 | # 'R' is token for closing } at hash key | ||||
| 24825 | 1 | 0s | my %is_closing_type; | ||
| 24826 | 1 | 1µs | @_ = qw" R } ) ] "; | ||
| 24827 | 1 | 900ns | @is_closing_type{@_} = (1) x scalar(@_); | ||
| 24828 | |||||
| 24829 | 1 | 100ns | my %is_redo_last_next_goto; | ||
| 24830 | 1 | 1µs | @_ = qw(redo last next goto); | ||
| 24831 | 1 | 1µs | @is_redo_last_next_goto{@_} = (1) x scalar(@_); | ||
| 24832 | |||||
| 24833 | 1 | 0s | my %is_use_require; | ||
| 24834 | 1 | 800ns | @_ = qw(use require); | ||
| 24835 | 1 | 600ns | @is_use_require{@_} = (1) x scalar(@_); | ||
| 24836 | |||||
| 24837 | 1 | 0s | my %is_sub_package; | ||
| 24838 | 1 | 700ns | @_ = qw(sub package); | ||
| 24839 | 1 | 700ns | @is_sub_package{@_} = (1) x scalar(@_); | ||
| 24840 | |||||
| 24841 | # This hash holds the hash key in $tokenizer_self for these keywords: | ||||
| 24842 | 1 | 2µs | my %is_format_END_DATA = ( | ||
| 24843 | 'format' => '_in_format', | ||||
| 24844 | '__END__' => '_in_end', | ||||
| 24845 | '__DATA__' => '_in_data', | ||||
| 24846 | ); | ||||
| 24847 | |||||
| 24848 | # ref: camel 3 p 147, | ||||
| 24849 | # but perl may accept undocumented flags | ||||
| 24850 | # perl 5.10 adds 'p' (preserve) | ||||
| 24851 | # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these: | ||||
| 24852 | # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc | ||||
| 24853 | # s/PATTERN/REPLACEMENT/msixpodualgcer | ||||
| 24854 | # y/SEARCHLIST/REPLACEMENTLIST/cdsr | ||||
| 24855 | # tr/SEARCHLIST/REPLACEMENTLIST/cdsr | ||||
| 24856 | # qr/STRING/msixpodual | ||||
| 24857 | 1 | 3µs | my %quote_modifiers = ( | ||
| 24858 | 's' => '[msixpodualgcer]', | ||||
| 24859 | 'y' => '[cdsr]', | ||||
| 24860 | 'tr' => '[cdsr]', | ||||
| 24861 | 'm' => '[msixpodualgc]', | ||||
| 24862 | 'qr' => '[msixpodual]', | ||||
| 24863 | 'q' => "", | ||||
| 24864 | 'qq' => "", | ||||
| 24865 | 'qw' => "", | ||||
| 24866 | 'qx' => "", | ||||
| 24867 | ); | ||||
| 24868 | |||||
| 24869 | # table showing how many quoted things to look for after quote operator.. | ||||
| 24870 | # s, y, tr have 2 (pattern and replacement) | ||||
| 24871 | # others have 1 (pattern only) | ||||
| 24872 | 1 | 3µs | my %quote_items = ( | ||
| 24873 | 's' => 2, | ||||
| 24874 | 'y' => 2, | ||||
| 24875 | 'tr' => 2, | ||||
| 24876 | 'm' => 1, | ||||
| 24877 | 'qr' => 1, | ||||
| 24878 | 'q' => 1, | ||||
| 24879 | 'qq' => 1, | ||||
| 24880 | 'qw' => 1, | ||||
| 24881 | 'qx' => 1, | ||||
| 24882 | ); | ||||
| 24883 | |||||
| 24884 | sub tokenize_this_line { | ||||
| 24885 | |||||
| 24886 | # This routine breaks a line of perl code into tokens which are of use in | ||||
| 24887 | # indentation and reformatting. One of my goals has been to define tokens | ||||
| 24888 | # such that a newline may be inserted between any pair of tokens without | ||||
| 24889 | # changing or invalidating the program. This version comes close to this, | ||||
| 24890 | # although there are necessarily a few exceptions which must be caught by | ||||
| 24891 | # the formatter. Many of these involve the treatment of bare words. | ||||
| 24892 | # | ||||
| 24893 | # The tokens and their types are returned in arrays. See previous | ||||
| 24894 | # routine for their names. | ||||
| 24895 | # | ||||
| 24896 | # See also the array "valid_token_types" in the BEGIN section for an | ||||
| 24897 | # up-to-date list. | ||||
| 24898 | # | ||||
| 24899 | # To simplify things, token types are either a single character, or they | ||||
| 24900 | # are identical to the tokens themselves. | ||||
| 24901 | # | ||||
| 24902 | # As a debugging aid, the -D flag creates a file containing a side-by-side | ||||
| 24903 | # comparison of the input string and its tokenization for each line of a file. | ||||
| 24904 | # This is an invaluable debugging aid. | ||||
| 24905 | # | ||||
| 24906 | # In addition to tokens, and some associated quantities, the tokenizer | ||||
| 24907 | # also returns flags indication any special line types. These include | ||||
| 24908 | # quotes, here_docs, formats. | ||||
| 24909 | # | ||||
| 24910 | # ----------------------------------------------------------------------- | ||||
| 24911 | # | ||||
| 24912 | # How to add NEW_TOKENS: | ||||
| 24913 | # | ||||
| 24914 | # New token types will undoubtedly be needed in the future both to keep up | ||||
| 24915 | # with changes in perl and to help adapt the tokenizer to other applications. | ||||
| 24916 | # | ||||
| 24917 | # Here are some notes on the minimal steps. I wrote these notes while | ||||
| 24918 | # adding the 'v' token type for v-strings, which are things like version | ||||
| 24919 | # numbers 5.6.0, and ip addresses, and will use that as an example. ( You | ||||
| 24920 | # can use your editor to search for the string "NEW_TOKENS" to find the | ||||
| 24921 | # appropriate sections to change): | ||||
| 24922 | # | ||||
| 24923 | # *. Try to talk somebody else into doing it! If not, .. | ||||
| 24924 | # | ||||
| 24925 | # *. Make a backup of your current version in case things don't work out! | ||||
| 24926 | # | ||||
| 24927 | # *. Think of a new, unused character for the token type, and add to | ||||
| 24928 | # the array @valid_token_types in the BEGIN section of this package. | ||||
| 24929 | # For example, I used 'v' for v-strings. | ||||
| 24930 | # | ||||
| 24931 | # *. Implement coding to recognize the $type of the token in this routine. | ||||
| 24932 | # This is the hardest part, and is best done by imitating or modifying | ||||
| 24933 | # some of the existing coding. For example, to recognize v-strings, I | ||||
| 24934 | # patched 'sub scan_bare_identifier' to recognize v-strings beginning with | ||||
| 24935 | # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. | ||||
| 24936 | # | ||||
| 24937 | # *. Update sub operator_expected. This update is critically important but | ||||
| 24938 | # the coding is trivial. Look at the comments in that routine for help. | ||||
| 24939 | # For v-strings, which should behave like numbers, I just added 'v' to the | ||||
| 24940 | # regex used to handle numbers and strings (types 'n' and 'Q'). | ||||
| 24941 | # | ||||
| 24942 | # *. Implement a 'bond strength' rule in sub set_bond_strengths in | ||||
| 24943 | # Perl::Tidy::Formatter for breaking lines around this token type. You can | ||||
| 24944 | # skip this step and take the default at first, then adjust later to get | ||||
| 24945 | # desired results. For adding type 'v', I looked at sub bond_strength and | ||||
| 24946 | # saw that number type 'n' was using default strengths, so I didn't do | ||||
| 24947 | # anything. I may tune it up someday if I don't like the way line | ||||
| 24948 | # breaks with v-strings look. | ||||
| 24949 | # | ||||
| 24950 | # *. Implement a 'whitespace' rule in sub set_white_space_flag in | ||||
| 24951 | # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine | ||||
| 24952 | # and saw that type 'n' used spaces on both sides, so I just added 'v' | ||||
| 24953 | # to the array @spaces_both_sides. | ||||
| 24954 | # | ||||
| 24955 | # *. Update HtmlWriter package so that users can colorize the token as | ||||
| 24956 | # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in | ||||
| 24957 | # that package. For v-strings, I initially chose to use a default color | ||||
| 24958 | # equal to the default for numbers, but it might be nice to change that | ||||
| 24959 | # eventually. | ||||
| 24960 | # | ||||
| 24961 | # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. | ||||
| 24962 | # | ||||
| 24963 | # *. Run lots and lots of debug tests. Start with special files designed | ||||
| 24964 | # to test the new token type. Run with the -D flag to create a .DEBUG | ||||
| 24965 | # file which shows the tokenization. When these work ok, test as many old | ||||
| 24966 | # scripts as possible. Start with all of the '.t' files in the 'test' | ||||
| 24967 | # directory of the distribution file. Compare .tdy output with previous | ||||
| 24968 | # version and updated version to see the differences. Then include as | ||||
| 24969 | # many more files as possible. My own technique has been to collect a huge | ||||
| 24970 | # number of perl scripts (thousands!) into one directory and run perltidy | ||||
| 24971 | # *, then run diff between the output of the previous version and the | ||||
| 24972 | # current version. | ||||
| 24973 | # | ||||
| 24974 | # *. For another example, search for the smartmatch operator '~~' | ||||
| 24975 | # with your editor to see where updates were made for it. | ||||
| 24976 | # | ||||
| 24977 | # ----------------------------------------------------------------------- | ||||
| 24978 | |||||
| 24979 | my $line_of_tokens = shift; | ||||
| 24980 | my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; | ||||
| 24981 | |||||
| 24982 | # patch while coding change is underway | ||||
| 24983 | # make callers private data to allow access | ||||
| 24984 | # $tokenizer_self = $caller_tokenizer_self; | ||||
| 24985 | |||||
| 24986 | # extract line number for use in error messages | ||||
| 24987 | $input_line_number = $line_of_tokens->{_line_number}; | ||||
| 24988 | |||||
| 24989 | # reinitialize for multi-line quote | ||||
| 24990 | $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; | ||||
| 24991 | |||||
| 24992 | # check for pod documentation | ||||
| 24993 | if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { | ||||
| 24994 | |||||
| 24995 | # must not be in multi-line quote | ||||
| 24996 | # and must not be in an equation | ||||
| 24997 | if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) | ||||
| 24998 | { | ||||
| 24999 | $tokenizer_self->{_in_pod} = 1; | ||||
| 25000 | return; | ||||
| 25001 | } | ||||
| 25002 | } | ||||
| 25003 | |||||
| 25004 | $input_line = $untrimmed_input_line; | ||||
| 25005 | |||||
| 25006 | chomp $input_line; | ||||
| 25007 | |||||
| 25008 | # trim start of this line unless we are continuing a quoted line | ||||
| 25009 | # do not trim end because we might end in a quote (test: deken4.pl) | ||||
| 25010 | # Perl::Tidy::Formatter will delete needless trailing blanks | ||||
| 25011 | unless ( $in_quote && ( $quote_type eq 'Q' ) ) { | ||||
| 25012 | $input_line =~ s/^\s*//; # trim left end | ||||
| 25013 | } | ||||
| 25014 | |||||
| 25015 | # update the copy of the line for use in error messages | ||||
| 25016 | # This must be exactly what we give the pre_tokenizer | ||||
| 25017 | $tokenizer_self->{_line_text} = $input_line; | ||||
| 25018 | |||||
| 25019 | # re-initialize for the main loop | ||||
| 25020 | $routput_token_list = []; # stack of output token indexes | ||||
| 25021 | $routput_token_type = []; # token types | ||||
| 25022 | $routput_block_type = []; # types of code block | ||||
| 25023 | $routput_container_type = []; # paren types, such as if, elsif, .. | ||||
| 25024 | $routput_type_sequence = []; # nesting sequential number | ||||
| 25025 | |||||
| 25026 | $rhere_target_list = []; | ||||
| 25027 | |||||
| 25028 | $tok = $last_nonblank_token; | ||||
| 25029 | $type = $last_nonblank_type; | ||||
| 25030 | $prototype = $last_nonblank_prototype; | ||||
| 25031 | $last_nonblank_i = -1; | ||||
| 25032 | $block_type = $last_nonblank_block_type; | ||||
| 25033 | $container_type = $last_nonblank_container_type; | ||||
| 25034 | $type_sequence = $last_nonblank_type_sequence; | ||||
| 25035 | $indent_flag = 0; | ||||
| 25036 | $peeked_ahead = 0; | ||||
| 25037 | |||||
| 25038 | # tokenization is done in two stages.. | ||||
| 25039 | # stage 1 is a very simple pre-tokenization | ||||
| 25040 | my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens | ||||
| 25041 | |||||
| 25042 | # a little optimization for a full-line comment | ||||
| 25043 | if ( !$in_quote && ( $input_line =~ /^#/ ) ) { | ||||
| 25044 | $max_tokens_wanted = 1 # no use tokenizing a comment | ||||
| 25045 | } | ||||
| 25046 | |||||
| 25047 | # start by breaking the line into pre-tokens | ||||
| 25048 | ( $rtokens, $rtoken_map, $rtoken_type ) = | ||||
| 25049 | pre_tokenize( $input_line, $max_tokens_wanted ); | ||||
| 25050 | |||||
| 25051 | $max_token_index = scalar(@$rtokens) - 1; | ||||
| 25052 | push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic | ||||
| 25053 | push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced | ||||
| 25054 | push( @$rtoken_type, 'b', 'b', 'b' ); | ||||
| 25055 | |||||
| 25056 | # initialize for main loop | ||||
| 25057 | for $i ( 0 .. $max_token_index + 3 ) { | ||||
| 25058 | $routput_token_type->[$i] = ""; | ||||
| 25059 | $routput_block_type->[$i] = ""; | ||||
| 25060 | $routput_container_type->[$i] = ""; | ||||
| 25061 | $routput_type_sequence->[$i] = ""; | ||||
| 25062 | $routput_indent_flag->[$i] = 0; | ||||
| 25063 | } | ||||
| 25064 | $i = -1; | ||||
| 25065 | $i_tok = -1; | ||||
| 25066 | |||||
| 25067 | # ------------------------------------------------------------ | ||||
| 25068 | # begin main tokenization loop | ||||
| 25069 | # ------------------------------------------------------------ | ||||
| 25070 | |||||
| 25071 | # we are looking at each pre-token of one line and combining them | ||||
| 25072 | # into tokens | ||||
| 25073 | while ( ++$i <= $max_token_index ) { | ||||
| 25074 | |||||
| 25075 | if ($in_quote) { # continue looking for end of a quote | ||||
| 25076 | $type = $quote_type; | ||||
| 25077 | |||||
| 25078 | unless ( @{$routput_token_list} ) | ||||
| 25079 | { # initialize if continuation line | ||||
| 25080 | push( @{$routput_token_list}, $i ); | ||||
| 25081 | $routput_token_type->[$i] = $type; | ||||
| 25082 | |||||
| 25083 | } | ||||
| 25084 | $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); | ||||
| 25085 | |||||
| 25086 | # scan for the end of the quote or pattern | ||||
| 25087 | ( | ||||
| 25088 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
| 25089 | $quoted_string_1, $quoted_string_2 | ||||
| 25090 | ) | ||||
| 25091 | = do_quote( | ||||
| 25092 | $i, $in_quote, $quote_character, | ||||
| 25093 | $quote_pos, $quote_depth, $quoted_string_1, | ||||
| 25094 | $quoted_string_2, $rtokens, $rtoken_map, | ||||
| 25095 | $max_token_index | ||||
| 25096 | ); | ||||
| 25097 | |||||
| 25098 | # all done if we didn't find it | ||||
| 25099 | last if ($in_quote); | ||||
| 25100 | |||||
| 25101 | # save pattern and replacement text for rescanning | ||||
| 25102 | my $qs1 = $quoted_string_1; | ||||
| 25103 | my $qs2 = $quoted_string_2; | ||||
| 25104 | |||||
| 25105 | # re-initialize for next search | ||||
| 25106 | $quote_character = ''; | ||||
| 25107 | $quote_pos = 0; | ||||
| 25108 | $quote_type = 'Q'; | ||||
| 25109 | $quoted_string_1 = ""; | ||||
| 25110 | $quoted_string_2 = ""; | ||||
| 25111 | last if ( ++$i > $max_token_index ); | ||||
| 25112 | |||||
| 25113 | # look for any modifiers | ||||
| 25114 | if ($allowed_quote_modifiers) { | ||||
| 25115 | |||||
| 25116 | # check for exact quote modifiers | ||||
| 25117 | if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { | ||||
| 25118 | my $str = $$rtokens[$i]; | ||||
| 25119 | my $saw_modifier_e; | ||||
| 25120 | while ( $str =~ /\G$allowed_quote_modifiers/gc ) { | ||||
| 25121 | my $pos = pos($str); | ||||
| 25122 | my $char = substr( $str, $pos - 1, 1 ); | ||||
| 25123 | $saw_modifier_e ||= ( $char eq 'e' ); | ||||
| 25124 | } | ||||
| 25125 | |||||
| 25126 | # For an 'e' quote modifier we must scan the replacement | ||||
| 25127 | # text for here-doc targets. | ||||
| 25128 | if ($saw_modifier_e) { | ||||
| 25129 | |||||
| 25130 | my $rht = scan_replacement_text($qs1); | ||||
| 25131 | |||||
| 25132 | # Change type from 'Q' to 'h' for quotes with | ||||
| 25133 | # here-doc targets so that the formatter (see sub | ||||
| 25134 | # print_line_of_tokens) will not make any line | ||||
| 25135 | # breaks after this point. | ||||
| 25136 | if ($rht) { | ||||
| 25137 | push @{$rhere_target_list}, @{$rht}; | ||||
| 25138 | $type = 'h'; | ||||
| 25139 | if ( $i_tok < 0 ) { | ||||
| 25140 | my $ilast = $routput_token_list->[-1]; | ||||
| 25141 | $routput_token_type->[$ilast] = $type; | ||||
| 25142 | } | ||||
| 25143 | } | ||||
| 25144 | } | ||||
| 25145 | |||||
| 25146 | if ( defined( pos($str) ) ) { | ||||
| 25147 | |||||
| 25148 | # matched | ||||
| 25149 | if ( pos($str) == length($str) ) { | ||||
| 25150 | last if ( ++$i > $max_token_index ); | ||||
| 25151 | } | ||||
| 25152 | |||||
| 25153 | # Looks like a joined quote modifier | ||||
| 25154 | # and keyword, maybe something like | ||||
| 25155 | # s/xxx/yyy/gefor @k=... | ||||
| 25156 | # Example is "galgen.pl". Would have to split | ||||
| 25157 | # the word and insert a new token in the | ||||
| 25158 | # pre-token list. This is so rare that I haven't | ||||
| 25159 | # done it. Will just issue a warning citation. | ||||
| 25160 | |||||
| 25161 | # This error might also be triggered if my quote | ||||
| 25162 | # modifier characters are incomplete | ||||
| 25163 | else { | ||||
| 25164 | warning(<<EOM); | ||||
| 25165 | |||||
| 25166 | Partial match to quote modifier $allowed_quote_modifiers at word: '$str' | ||||
| 25167 | Please put a space between quote modifiers and trailing keywords. | ||||
| 25168 | EOM | ||||
| 25169 | |||||
| 25170 | # print "token $$rtokens[$i]\n"; | ||||
| 25171 | # my $num = length($str) - pos($str); | ||||
| 25172 | # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); | ||||
| 25173 | # print "continuing with new token $$rtokens[$i]\n"; | ||||
| 25174 | |||||
| 25175 | # skipping past this token does least damage | ||||
| 25176 | last if ( ++$i > $max_token_index ); | ||||
| 25177 | } | ||||
| 25178 | } | ||||
| 25179 | else { | ||||
| 25180 | |||||
| 25181 | # example file: rokicki4.pl | ||||
| 25182 | # This error might also be triggered if my quote | ||||
| 25183 | # modifier characters are incomplete | ||||
| 25184 | write_logfile_entry( | ||||
| 25185 | "Note: found word $str at quote modifier location\n" | ||||
| 25186 | ); | ||||
| 25187 | } | ||||
| 25188 | } | ||||
| 25189 | |||||
| 25190 | # re-initialize | ||||
| 25191 | $allowed_quote_modifiers = ""; | ||||
| 25192 | } | ||||
| 25193 | } | ||||
| 25194 | |||||
| 25195 | unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { | ||||
| 25196 | |||||
| 25197 | # try to catch some common errors | ||||
| 25198 | if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { | ||||
| 25199 | |||||
| 25200 | if ( $last_nonblank_token eq 'eq' ) { | ||||
| 25201 | complain("Should 'eq' be '==' here ?\n"); | ||||
| 25202 | } | ||||
| 25203 | elsif ( $last_nonblank_token eq 'ne' ) { | ||||
| 25204 | complain("Should 'ne' be '!=' here ?\n"); | ||||
| 25205 | } | ||||
| 25206 | } | ||||
| 25207 | |||||
| 25208 | $last_last_nonblank_token = $last_nonblank_token; | ||||
| 25209 | $last_last_nonblank_type = $last_nonblank_type; | ||||
| 25210 | $last_last_nonblank_block_type = $last_nonblank_block_type; | ||||
| 25211 | $last_last_nonblank_container_type = | ||||
| 25212 | $last_nonblank_container_type; | ||||
| 25213 | $last_last_nonblank_type_sequence = | ||||
| 25214 | $last_nonblank_type_sequence; | ||||
| 25215 | $last_nonblank_token = $tok; | ||||
| 25216 | $last_nonblank_type = $type; | ||||
| 25217 | $last_nonblank_prototype = $prototype; | ||||
| 25218 | $last_nonblank_block_type = $block_type; | ||||
| 25219 | $last_nonblank_container_type = $container_type; | ||||
| 25220 | $last_nonblank_type_sequence = $type_sequence; | ||||
| 25221 | $last_nonblank_i = $i_tok; | ||||
| 25222 | } | ||||
| 25223 | |||||
| 25224 | # store previous token type | ||||
| 25225 | if ( $i_tok >= 0 ) { | ||||
| 25226 | $routput_token_type->[$i_tok] = $type; | ||||
| 25227 | $routput_block_type->[$i_tok] = $block_type; | ||||
| 25228 | $routput_container_type->[$i_tok] = $container_type; | ||||
| 25229 | $routput_type_sequence->[$i_tok] = $type_sequence; | ||||
| 25230 | $routput_indent_flag->[$i_tok] = $indent_flag; | ||||
| 25231 | } | ||||
| 25232 | my $pre_tok = $$rtokens[$i]; # get the next pre-token | ||||
| 25233 | my $pre_type = $$rtoken_type[$i]; # and type | ||||
| 25234 | $tok = $pre_tok; | ||||
| 25235 | $type = $pre_type; # to be modified as necessary | ||||
| 25236 | $block_type = ""; # blank for all tokens except code block braces | ||||
| 25237 | $container_type = ""; # blank for all tokens except some parens | ||||
| 25238 | $type_sequence = ""; # blank for all tokens except ?/: | ||||
| 25239 | $indent_flag = 0; | ||||
| 25240 | $prototype = ""; # blank for all tokens except user defined subs | ||||
| 25241 | $i_tok = $i; | ||||
| 25242 | |||||
| 25243 | # this pre-token will start an output token | ||||
| 25244 | push( @{$routput_token_list}, $i_tok ); | ||||
| 25245 | |||||
| 25246 | # continue gathering identifier if necessary | ||||
| 25247 | # but do not start on blanks and comments | ||||
| 25248 | if ( $id_scan_state && $pre_type !~ /[b#]/ ) { | ||||
| 25249 | |||||
| 25250 | if ( $id_scan_state =~ /^(sub|package)/ ) { | ||||
| 25251 | scan_id(); | ||||
| 25252 | } | ||||
| 25253 | else { | ||||
| 25254 | scan_identifier(); | ||||
| 25255 | } | ||||
| 25256 | |||||
| 25257 | last if ($id_scan_state); | ||||
| 25258 | next if ( ( $i > 0 ) || $type ); | ||||
| 25259 | |||||
| 25260 | # didn't find any token; start over | ||||
| 25261 | $type = $pre_type; | ||||
| 25262 | $tok = $pre_tok; | ||||
| 25263 | } | ||||
| 25264 | |||||
| 25265 | # handle whitespace tokens.. | ||||
| 25266 | next if ( $type eq 'b' ); | ||||
| 25267 | my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; | ||||
| 25268 | my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; | ||||
| 25269 | |||||
| 25270 | # Build larger tokens where possible, since we are not in a quote. | ||||
| 25271 | # | ||||
| 25272 | # First try to assemble digraphs. The following tokens are | ||||
| 25273 | # excluded and handled specially: | ||||
| 25274 | # '/=' is excluded because the / might start a pattern. | ||||
| 25275 | # 'x=' is excluded since it might be $x=, with $ on previous line | ||||
| 25276 | # '**' and *= might be typeglobs of punctuation variables | ||||
| 25277 | # I have allowed tokens starting with <, such as <=, | ||||
| 25278 | # because I don't think these could be valid angle operators. | ||||
| 25279 | # test file: storrs4.pl | ||||
| 25280 | my $test_tok = $tok . $$rtokens[ $i + 1 ]; | ||||
| 25281 | my $combine_ok = $is_digraph{$test_tok}; | ||||
| 25282 | |||||
| 25283 | # check for special cases which cannot be combined | ||||
| 25284 | if ($combine_ok) { | ||||
| 25285 | |||||
| 25286 | # '//' must be defined_or operator if an operator is expected. | ||||
| 25287 | # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) | ||||
| 25288 | # could be migrated here for clarity | ||||
| 25289 | if ( $test_tok eq '//' ) { | ||||
| 25290 | my $next_type = $$rtokens[ $i + 1 ]; | ||||
| 25291 | my $expecting = | ||||
| 25292 | operator_expected( $prev_type, $tok, $next_type ); | ||||
| 25293 | $combine_ok = 0 unless ( $expecting == OPERATOR ); | ||||
| 25294 | } | ||||
| 25295 | } | ||||
| 25296 | |||||
| 25297 | if ( | ||||
| 25298 | $combine_ok | ||||
| 25299 | && ( $test_tok ne '/=' ) # might be pattern | ||||
| 25300 | && ( $test_tok ne 'x=' ) # might be $x | ||||
| 25301 | && ( $test_tok ne '**' ) # typeglob? | ||||
| 25302 | && ( $test_tok ne '*=' ) # typeglob? | ||||
| 25303 | ) | ||||
| 25304 | { | ||||
| 25305 | $tok = $test_tok; | ||||
| 25306 | $i++; | ||||
| 25307 | |||||
| 25308 | # Now try to assemble trigraphs. Note that all possible | ||||
| 25309 | # perl trigraphs can be constructed by appending a character | ||||
| 25310 | # to a digraph. | ||||
| 25311 | $test_tok = $tok . $$rtokens[ $i + 1 ]; | ||||
| 25312 | |||||
| 25313 | if ( $is_trigraph{$test_tok} ) { | ||||
| 25314 | $tok = $test_tok; | ||||
| 25315 | $i++; | ||||
| 25316 | } | ||||
| 25317 | } | ||||
| 25318 | |||||
| 25319 | $type = $tok; | ||||
| 25320 | $next_tok = $$rtokens[ $i + 1 ]; | ||||
| 25321 | $next_type = $$rtoken_type[ $i + 1 ]; | ||||
| 25322 | |||||
| 25323 | TOKENIZER_DEBUG_FLAG_TOKENIZE && do { | ||||
| 25324 | local $" = ')('; | ||||
| 25325 | my @debug_list = ( | ||||
| 25326 | $last_nonblank_token, $tok, | ||||
| 25327 | $next_tok, $brace_depth, | ||||
| 25328 | $brace_type[$brace_depth], $paren_depth, | ||||
| 25329 | $paren_type[$paren_depth] | ||||
| 25330 | ); | ||||
| 25331 | print STDOUT "TOKENIZE:(@debug_list)\n"; | ||||
| 25332 | }; | ||||
| 25333 | |||||
| 25334 | # turn off attribute list on first non-blank, non-bareword | ||||
| 25335 | if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } | ||||
| 25336 | |||||
| 25337 | ############################################################### | ||||
| 25338 | # We have the next token, $tok. | ||||
| 25339 | # Now we have to examine this token and decide what it is | ||||
| 25340 | # and define its $type | ||||
| 25341 | # | ||||
| 25342 | # section 1: bare words | ||||
| 25343 | ############################################################### | ||||
| 25344 | |||||
| 25345 | if ( $pre_type eq 'w' ) { | ||||
| 25346 | $expecting = operator_expected( $prev_type, $tok, $next_type ); | ||||
| 25347 | my ( $next_nonblank_token, $i_next ) = | ||||
| 25348 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 25349 | |||||
| 25350 | # ATTRS: handle sub and variable attributes | ||||
| 25351 | if ($in_attribute_list) { | ||||
| 25352 | |||||
| 25353 | # treat bare word followed by open paren like qw( | ||||
| 25354 | if ( $next_nonblank_token eq '(' ) { | ||||
| 25355 | $in_quote = $quote_items{'q'}; | ||||
| 25356 | $allowed_quote_modifiers = $quote_modifiers{'q'}; | ||||
| 25357 | $type = 'q'; | ||||
| 25358 | $quote_type = 'q'; | ||||
| 25359 | next; | ||||
| 25360 | } | ||||
| 25361 | |||||
| 25362 | # handle bareword not followed by open paren | ||||
| 25363 | else { | ||||
| 25364 | $type = 'w'; | ||||
| 25365 | next; | ||||
| 25366 | } | ||||
| 25367 | } | ||||
| 25368 | |||||
| 25369 | # quote a word followed by => operator | ||||
| 25370 | if ( $next_nonblank_token eq '=' ) { | ||||
| 25371 | |||||
| 25372 | if ( $$rtokens[ $i_next + 1 ] eq '>' ) { | ||||
| 25373 | if ( $is_constant{$current_package}{$tok} ) { | ||||
| 25374 | $type = 'C'; | ||||
| 25375 | } | ||||
| 25376 | elsif ( $is_user_function{$current_package}{$tok} ) { | ||||
| 25377 | $type = 'U'; | ||||
| 25378 | $prototype = | ||||
| 25379 | $user_function_prototype{$current_package}{$tok}; | ||||
| 25380 | } | ||||
| 25381 | elsif ( $tok =~ /^v\d+$/ ) { | ||||
| 25382 | $type = 'v'; | ||||
| 25383 | report_v_string($tok); | ||||
| 25384 | } | ||||
| 25385 | else { $type = 'w' } | ||||
| 25386 | |||||
| 25387 | next; | ||||
| 25388 | } | ||||
| 25389 | } | ||||
| 25390 | |||||
| 25391 | # quote a bare word within braces..like xxx->{s}; note that we | ||||
| 25392 | # must be sure this is not a structural brace, to avoid | ||||
| 25393 | # mistaking {s} in the following for a quoted bare word: | ||||
| 25394 | # for(@[){s}bla}BLA} | ||||
| 25395 | # Also treat q in something like var{-q} as a bare word, not qoute operator | ||||
| 25396 | if ( | ||||
| 25397 | $next_nonblank_token eq '}' | ||||
| 25398 | && ( | ||||
| 25399 | $last_nonblank_type eq 'L' | ||||
| 25400 | || ( $last_nonblank_type eq 'm' | ||||
| 25401 | && $last_last_nonblank_type eq 'L' ) | ||||
| 25402 | ) | ||||
| 25403 | ) | ||||
| 25404 | { | ||||
| 25405 | $type = 'w'; | ||||
| 25406 | next; | ||||
| 25407 | } | ||||
| 25408 | |||||
| 25409 | # a bare word immediately followed by :: is not a keyword; | ||||
| 25410 | # use $tok_kw when testing for keywords to avoid a mistake | ||||
| 25411 | my $tok_kw = $tok; | ||||
| 25412 | if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) | ||||
| 25413 | { | ||||
| 25414 | $tok_kw .= '::'; | ||||
| 25415 | } | ||||
| 25416 | |||||
| 25417 | # handle operator x (now we know it isn't $x=) | ||||
| 25418 | if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { | ||||
| 25419 | if ( $tok eq 'x' ) { | ||||
| 25420 | |||||
| 25421 | if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= | ||||
| 25422 | $tok = 'x='; | ||||
| 25423 | $type = $tok; | ||||
| 25424 | $i++; | ||||
| 25425 | } | ||||
| 25426 | else { | ||||
| 25427 | $type = 'x'; | ||||
| 25428 | } | ||||
| 25429 | } | ||||
| 25430 | |||||
| 25431 | # FIXME: Patch: mark something like x4 as an integer for now | ||||
| 25432 | # It gets fixed downstream. This is easier than | ||||
| 25433 | # splitting the pretoken. | ||||
| 25434 | else { | ||||
| 25435 | $type = 'n'; | ||||
| 25436 | } | ||||
| 25437 | } | ||||
| 25438 | elsif ( $tok_kw eq 'CORE::' ) { | ||||
| 25439 | $type = $tok = $tok_kw; | ||||
| 25440 | $i += 2; | ||||
| 25441 | } | ||||
| 25442 | elsif ( ( $tok eq 'strict' ) | ||||
| 25443 | and ( $last_nonblank_token eq 'use' ) ) | ||||
| 25444 | { | ||||
| 25445 | $tokenizer_self->{_saw_use_strict} = 1; | ||||
| 25446 | scan_bare_identifier(); | ||||
| 25447 | } | ||||
| 25448 | |||||
| 25449 | elsif ( ( $tok eq 'warnings' ) | ||||
| 25450 | and ( $last_nonblank_token eq 'use' ) ) | ||||
| 25451 | { | ||||
| 25452 | $tokenizer_self->{_saw_perl_dash_w} = 1; | ||||
| 25453 | |||||
| 25454 | # scan as identifier, so that we pick up something like: | ||||
| 25455 | # use warnings::register | ||||
| 25456 | scan_bare_identifier(); | ||||
| 25457 | } | ||||
| 25458 | |||||
| 25459 | elsif ( | ||||
| 25460 | $tok eq 'AutoLoader' | ||||
| 25461 | && $tokenizer_self->{_look_for_autoloader} | ||||
| 25462 | && ( | ||||
| 25463 | $last_nonblank_token eq 'use' | ||||
| 25464 | |||||
| 25465 | # these regexes are from AutoSplit.pm, which we want | ||||
| 25466 | # to mimic | ||||
| 25467 | || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ | ||||
| 25468 | || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ | ||||
| 25469 | ) | ||||
| 25470 | ) | ||||
| 25471 | { | ||||
| 25472 | write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); | ||||
| 25473 | $tokenizer_self->{_saw_autoloader} = 1; | ||||
| 25474 | $tokenizer_self->{_look_for_autoloader} = 0; | ||||
| 25475 | scan_bare_identifier(); | ||||
| 25476 | } | ||||
| 25477 | |||||
| 25478 | elsif ( | ||||
| 25479 | $tok eq 'SelfLoader' | ||||
| 25480 | && $tokenizer_self->{_look_for_selfloader} | ||||
| 25481 | && ( $last_nonblank_token eq 'use' | ||||
| 25482 | || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ | ||||
| 25483 | || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) | ||||
| 25484 | ) | ||||
| 25485 | { | ||||
| 25486 | write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); | ||||
| 25487 | $tokenizer_self->{_saw_selfloader} = 1; | ||||
| 25488 | $tokenizer_self->{_look_for_selfloader} = 0; | ||||
| 25489 | scan_bare_identifier(); | ||||
| 25490 | } | ||||
| 25491 | |||||
| 25492 | elsif ( ( $tok eq 'constant' ) | ||||
| 25493 | and ( $last_nonblank_token eq 'use' ) ) | ||||
| 25494 | { | ||||
| 25495 | scan_bare_identifier(); | ||||
| 25496 | my ( $next_nonblank_token, $i_next ) = | ||||
| 25497 | find_next_nonblank_token( $i, $rtokens, | ||||
| 25498 | $max_token_index ); | ||||
| 25499 | |||||
| 25500 | if ($next_nonblank_token) { | ||||
| 25501 | |||||
| 25502 | if ( $is_keyword{$next_nonblank_token} ) { | ||||
| 25503 | |||||
| 25504 | # Assume qw is used as a quote and okay, as in: | ||||
| 25505 | # use constant qw{ DEBUG 0 }; | ||||
| 25506 | # Not worth trying to parse for just a warning | ||||
| 25507 | |||||
| 25508 | # NOTE: This warning is deactivated because recent | ||||
| 25509 | # versions of perl do not complain here, but | ||||
| 25510 | # the coding is retained for reference. | ||||
| 25511 | if ( 0 && $next_nonblank_token ne 'qw' ) { | ||||
| 25512 | warning( | ||||
| 25513 | "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" | ||||
| 25514 | ); | ||||
| 25515 | } | ||||
| 25516 | } | ||||
| 25517 | |||||
| 25518 | # FIXME: could check for error in which next token is | ||||
| 25519 | # not a word (number, punctuation, ..) | ||||
| 25520 | else { | ||||
| 25521 | $is_constant{$current_package}{$next_nonblank_token} | ||||
| 25522 | = 1; | ||||
| 25523 | } | ||||
| 25524 | } | ||||
| 25525 | } | ||||
| 25526 | |||||
| 25527 | # various quote operators | ||||
| 25528 | elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { | ||||
| 25529 | ##NICOL PATCH | ||||
| 25530 | if ( $expecting == OPERATOR ) { | ||||
| 25531 | |||||
| 25532 | # Be careful not to call an error for a qw quote | ||||
| 25533 | # where a parenthesized list is allowed. For example, | ||||
| 25534 | # it could also be a for/foreach construct such as | ||||
| 25535 | # | ||||
| 25536 | # foreach my $key qw\Uno Due Tres Quadro\ { | ||||
| 25537 | # print "Set $key\n"; | ||||
| 25538 | # } | ||||
| 25539 | # | ||||
| 25540 | |||||
| 25541 | # Or it could be a function call. | ||||
| 25542 | # NOTE: Braces in something like &{ xxx } are not | ||||
| 25543 | # marked as a block, we might have a method call. | ||||
| 25544 | # &method(...), $method->(..), &{method}(...), | ||||
| 25545 | # $ref[2](list) is ok & short for $ref[2]->(list) | ||||
| 25546 | # | ||||
| 25547 | # See notes in 'sub code_block_type' and | ||||
| 25548 | # 'sub is_non_structural_brace' | ||||
| 25549 | |||||
| 25550 | unless ( | ||||
| 25551 | $tok eq 'qw' | ||||
| 25552 | && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ | ||||
| 25553 | || $is_for_foreach{$want_paren} ) | ||||
| 25554 | ) | ||||
| 25555 | { | ||||
| 25556 | error_if_expecting_OPERATOR(); | ||||
| 25557 | } | ||||
| 25558 | } | ||||
| 25559 | $in_quote = $quote_items{$tok}; | ||||
| 25560 | $allowed_quote_modifiers = $quote_modifiers{$tok}; | ||||
| 25561 | |||||
| 25562 | # All quote types are 'Q' except possibly qw quotes. | ||||
| 25563 | # qw quotes are special in that they may generally be trimmed | ||||
| 25564 | # of leading and trailing whitespace. So they are given a | ||||
| 25565 | # separate type, 'q', unless requested otherwise. | ||||
| 25566 | $type = | ||||
| 25567 | ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) | ||||
| 25568 | ? 'q' | ||||
| 25569 | : 'Q'; | ||||
| 25570 | $quote_type = $type; | ||||
| 25571 | } | ||||
| 25572 | |||||
| 25573 | # check for a statement label | ||||
| 25574 | elsif ( | ||||
| 25575 | ( $next_nonblank_token eq ':' ) | ||||
| 25576 | && ( $$rtokens[ $i_next + 1 ] ne ':' ) | ||||
| 25577 | && ( $i_next <= $max_token_index ) # colon on same line | ||||
| 25578 | && label_ok() | ||||
| 25579 | ) | ||||
| 25580 | { | ||||
| 25581 | if ( $tok !~ /[A-Z]/ ) { | ||||
| 25582 | push @{ $tokenizer_self->{_rlower_case_labels_at} }, | ||||
| 25583 | $input_line_number; | ||||
| 25584 | } | ||||
| 25585 | $type = 'J'; | ||||
| 25586 | $tok .= ':'; | ||||
| 25587 | $i = $i_next; | ||||
| 25588 | next; | ||||
| 25589 | } | ||||
| 25590 | |||||
| 25591 | # 'sub' || 'package' | ||||
| 25592 | elsif ( $is_sub_package{$tok_kw} ) { | ||||
| 25593 | error_if_expecting_OPERATOR() | ||||
| 25594 | if ( $expecting == OPERATOR ); | ||||
| 25595 | scan_id(); | ||||
| 25596 | } | ||||
| 25597 | |||||
| 25598 | # Note on token types for format, __DATA__, __END__: | ||||
| 25599 | # It simplifies things to give these type ';', so that when we | ||||
| 25600 | # start rescanning we will be expecting a token of type TERM. | ||||
| 25601 | # We will switch to type 'k' before outputting the tokens. | ||||
| 25602 | elsif ( $is_format_END_DATA{$tok_kw} ) { | ||||
| 25603 | $type = ';'; # make tokenizer look for TERM next | ||||
| 25604 | $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; | ||||
| 25605 | last; | ||||
| 25606 | } | ||||
| 25607 | |||||
| 25608 | elsif ( $is_keyword{$tok_kw} ) { | ||||
| 25609 | $type = 'k'; | ||||
| 25610 | |||||
| 25611 | # Since for and foreach may not be followed immediately | ||||
| 25612 | # by an opening paren, we have to remember which keyword | ||||
| 25613 | # is associated with the next '(' | ||||
| 25614 | if ( $is_for_foreach{$tok} ) { | ||||
| 25615 | if ( new_statement_ok() ) { | ||||
| 25616 | $want_paren = $tok; | ||||
| 25617 | } | ||||
| 25618 | } | ||||
| 25619 | |||||
| 25620 | # recognize 'use' statements, which are special | ||||
| 25621 | elsif ( $is_use_require{$tok} ) { | ||||
| 25622 | $statement_type = $tok; | ||||
| 25623 | error_if_expecting_OPERATOR() | ||||
| 25624 | if ( $expecting == OPERATOR ); | ||||
| 25625 | } | ||||
| 25626 | |||||
| 25627 | # remember my and our to check for trailing ": shared" | ||||
| 25628 | elsif ( $is_my_our{$tok} ) { | ||||
| 25629 | $statement_type = $tok; | ||||
| 25630 | } | ||||
| 25631 | |||||
| 25632 | # Check for misplaced 'elsif' and 'else', but allow isolated | ||||
| 25633 | # else or elsif blocks to be formatted. This is indicated | ||||
| 25634 | # by a last noblank token of ';' | ||||
| 25635 | elsif ( $tok eq 'elsif' ) { | ||||
| 25636 | if ( $last_nonblank_token ne ';' | ||||
| 25637 | && $last_nonblank_block_type !~ | ||||
| 25638 | /^(if|elsif|unless)$/ ) | ||||
| 25639 | { | ||||
| 25640 | warning( | ||||
| 25641 | "expecting '$tok' to follow one of 'if|elsif|unless'\n" | ||||
| 25642 | ); | ||||
| 25643 | } | ||||
| 25644 | } | ||||
| 25645 | elsif ( $tok eq 'else' ) { | ||||
| 25646 | |||||
| 25647 | # patched for SWITCH/CASE | ||||
| 25648 | if ( $last_nonblank_token ne ';' | ||||
| 25649 | && $last_nonblank_block_type !~ | ||||
| 25650 | /^(if|elsif|unless|case|when)$/ ) | ||||
| 25651 | { | ||||
| 25652 | warning( | ||||
| 25653 | "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" | ||||
| 25654 | ); | ||||
| 25655 | } | ||||
| 25656 | } | ||||
| 25657 | elsif ( $tok eq 'continue' ) { | ||||
| 25658 | if ( $last_nonblank_token ne ';' | ||||
| 25659 | && $last_nonblank_block_type !~ | ||||
| 25660 | /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) | ||||
| 25661 | { | ||||
| 25662 | |||||
| 25663 | # note: ';' '{' and '}' in list above | ||||
| 25664 | # because continues can follow bare blocks; | ||||
| 25665 | # ':' is labeled block | ||||
| 25666 | # | ||||
| 25667 | ############################################ | ||||
| 25668 | # NOTE: This check has been deactivated because | ||||
| 25669 | # continue has an alternative usage for given/when | ||||
| 25670 | # blocks in perl 5.10 | ||||
| 25671 | ## warning("'$tok' should follow a block\n"); | ||||
| 25672 | ############################################ | ||||
| 25673 | } | ||||
| 25674 | } | ||||
| 25675 | |||||
| 25676 | # patch for SWITCH/CASE if 'case' and 'when are | ||||
| 25677 | # treated as keywords. | ||||
| 25678 | elsif ( $tok eq 'when' || $tok eq 'case' ) { | ||||
| 25679 | $statement_type = $tok; # next '{' is block | ||||
| 25680 | } | ||||
| 25681 | |||||
| 25682 | # | ||||
| 25683 | # indent trailing if/unless/while/until | ||||
| 25684 | # outdenting will be handled by later indentation loop | ||||
| 25685 | ## DEACTIVATED: unfortunately this can cause some unwanted indentation like: | ||||
| 25686 | ##$opt_o = 1 | ||||
| 25687 | ## if !( | ||||
| 25688 | ## $opt_b | ||||
| 25689 | ## || $opt_c | ||||
| 25690 | ## || $opt_d | ||||
| 25691 | ## || $opt_f | ||||
| 25692 | ## || $opt_i | ||||
| 25693 | ## || $opt_l | ||||
| 25694 | ## || $opt_o | ||||
| 25695 | ## || $opt_x | ||||
| 25696 | ## ); | ||||
| 25697 | ## if ( $tok =~ /^(if|unless|while|until)$/ | ||||
| 25698 | ## && $next_nonblank_token ne '(' ) | ||||
| 25699 | ## { | ||||
| 25700 | ## $indent_flag = 1; | ||||
| 25701 | ## } | ||||
| 25702 | } | ||||
| 25703 | |||||
| 25704 | # check for inline label following | ||||
| 25705 | # /^(redo|last|next|goto)$/ | ||||
| 25706 | elsif (( $last_nonblank_type eq 'k' ) | ||||
| 25707 | && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) | ||||
| 25708 | { | ||||
| 25709 | $type = 'j'; | ||||
| 25710 | next; | ||||
| 25711 | } | ||||
| 25712 | |||||
| 25713 | # something else -- | ||||
| 25714 | else { | ||||
| 25715 | |||||
| 25716 | scan_bare_identifier(); | ||||
| 25717 | if ( $type eq 'w' ) { | ||||
| 25718 | |||||
| 25719 | if ( $expecting == OPERATOR ) { | ||||
| 25720 | |||||
| 25721 | # don't complain about possible indirect object | ||||
| 25722 | # notation. | ||||
| 25723 | # For example: | ||||
| 25724 | # package main; | ||||
| 25725 | # sub new($) { ... } | ||||
| 25726 | # $b = new A::; # calls A::new | ||||
| 25727 | # $c = new A; # same thing but suspicious | ||||
| 25728 | # This will call A::new but we have a 'new' in | ||||
| 25729 | # main:: which looks like a constant. | ||||
| 25730 | # | ||||
| 25731 | if ( $last_nonblank_type eq 'C' ) { | ||||
| 25732 | if ( $tok !~ /::$/ ) { | ||||
| 25733 | complain(<<EOM); | ||||
| 25734 | Expecting operator after '$last_nonblank_token' but found bare word '$tok' | ||||
| 25735 | Maybe indirectet object notation? | ||||
| 25736 | EOM | ||||
| 25737 | } | ||||
| 25738 | } | ||||
| 25739 | else { | ||||
| 25740 | error_if_expecting_OPERATOR("bareword"); | ||||
| 25741 | } | ||||
| 25742 | } | ||||
| 25743 | |||||
| 25744 | # mark bare words immediately followed by a paren as | ||||
| 25745 | # functions | ||||
| 25746 | $next_tok = $$rtokens[ $i + 1 ]; | ||||
| 25747 | if ( $next_tok eq '(' ) { | ||||
| 25748 | $type = 'U'; | ||||
| 25749 | } | ||||
| 25750 | |||||
| 25751 | # underscore after file test operator is file handle | ||||
| 25752 | if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { | ||||
| 25753 | $type = 'Z'; | ||||
| 25754 | } | ||||
| 25755 | |||||
| 25756 | # patch for SWITCH/CASE if 'case' and 'when are | ||||
| 25757 | # not treated as keywords: | ||||
| 25758 | if ( | ||||
| 25759 | ( | ||||
| 25760 | $tok eq 'case' | ||||
| 25761 | && $brace_type[$brace_depth] eq 'switch' | ||||
| 25762 | ) | ||||
| 25763 | || ( $tok eq 'when' | ||||
| 25764 | && $brace_type[$brace_depth] eq 'given' ) | ||||
| 25765 | ) | ||||
| 25766 | { | ||||
| 25767 | $statement_type = $tok; # next '{' is block | ||||
| 25768 | $type = 'k'; # for keyword syntax coloring | ||||
| 25769 | } | ||||
| 25770 | |||||
| 25771 | # patch for SWITCH/CASE if switch and given not keywords | ||||
| 25772 | # Switch is not a perl 5 keyword, but we will gamble | ||||
| 25773 | # and mark switch followed by paren as a keyword. This | ||||
| 25774 | # is only necessary to get html syntax coloring nice, | ||||
| 25775 | # and does not commit this as being a switch/case. | ||||
| 25776 | if ( $next_nonblank_token eq '(' | ||||
| 25777 | && ( $tok eq 'switch' || $tok eq 'given' ) ) | ||||
| 25778 | { | ||||
| 25779 | $type = 'k'; # for keyword syntax coloring | ||||
| 25780 | } | ||||
| 25781 | } | ||||
| 25782 | } | ||||
| 25783 | } | ||||
| 25784 | |||||
| 25785 | ############################################################### | ||||
| 25786 | # section 2: strings of digits | ||||
| 25787 | ############################################################### | ||||
| 25788 | elsif ( $pre_type eq 'd' ) { | ||||
| 25789 | $expecting = operator_expected( $prev_type, $tok, $next_type ); | ||||
| 25790 | error_if_expecting_OPERATOR("Number") | ||||
| 25791 | if ( $expecting == OPERATOR ); | ||||
| 25792 | my $number = scan_number(); | ||||
| 25793 | if ( !defined($number) ) { | ||||
| 25794 | |||||
| 25795 | # shouldn't happen - we should always get a number | ||||
| 25796 | warning("non-number beginning with digit--program bug\n"); | ||||
| 25797 | report_definite_bug(); | ||||
| 25798 | } | ||||
| 25799 | } | ||||
| 25800 | |||||
| 25801 | ############################################################### | ||||
| 25802 | # section 3: all other tokens | ||||
| 25803 | ############################################################### | ||||
| 25804 | |||||
| 25805 | else { | ||||
| 25806 | last if ( $tok eq '#' ); | ||||
| 25807 | my $code = $tokenization_code->{$tok}; | ||||
| 25808 | if ($code) { | ||||
| 25809 | $expecting = | ||||
| 25810 | operator_expected( $prev_type, $tok, $next_type ); | ||||
| 25811 | $code->(); | ||||
| 25812 | redo if $in_quote; | ||||
| 25813 | } | ||||
| 25814 | } | ||||
| 25815 | } | ||||
| 25816 | |||||
| 25817 | # ----------------------------- | ||||
| 25818 | # end of main tokenization loop | ||||
| 25819 | # ----------------------------- | ||||
| 25820 | |||||
| 25821 | if ( $i_tok >= 0 ) { | ||||
| 25822 | $routput_token_type->[$i_tok] = $type; | ||||
| 25823 | $routput_block_type->[$i_tok] = $block_type; | ||||
| 25824 | $routput_container_type->[$i_tok] = $container_type; | ||||
| 25825 | $routput_type_sequence->[$i_tok] = $type_sequence; | ||||
| 25826 | $routput_indent_flag->[$i_tok] = $indent_flag; | ||||
| 25827 | } | ||||
| 25828 | |||||
| 25829 | unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { | ||||
| 25830 | $last_last_nonblank_token = $last_nonblank_token; | ||||
| 25831 | $last_last_nonblank_type = $last_nonblank_type; | ||||
| 25832 | $last_last_nonblank_block_type = $last_nonblank_block_type; | ||||
| 25833 | $last_last_nonblank_container_type = $last_nonblank_container_type; | ||||
| 25834 | $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; | ||||
| 25835 | $last_nonblank_token = $tok; | ||||
| 25836 | $last_nonblank_type = $type; | ||||
| 25837 | $last_nonblank_block_type = $block_type; | ||||
| 25838 | $last_nonblank_container_type = $container_type; | ||||
| 25839 | $last_nonblank_type_sequence = $type_sequence; | ||||
| 25840 | $last_nonblank_prototype = $prototype; | ||||
| 25841 | } | ||||
| 25842 | |||||
| 25843 | # reset indentation level if necessary at a sub or package | ||||
| 25844 | # in an attempt to recover from a nesting error | ||||
| 25845 | if ( $level_in_tokenizer < 0 ) { | ||||
| 25846 | if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { | ||||
| 25847 | reset_indentation_level(0); | ||||
| 25848 | brace_warning("resetting level to 0 at $1 $2\n"); | ||||
| 25849 | } | ||||
| 25850 | } | ||||
| 25851 | |||||
| 25852 | # all done tokenizing this line ... | ||||
| 25853 | # now prepare the final list of tokens and types | ||||
| 25854 | |||||
| 25855 | my @token_type = (); # stack of output token types | ||||
| 25856 | my @block_type = (); # stack of output code block types | ||||
| 25857 | my @container_type = (); # stack of output code container types | ||||
| 25858 | my @type_sequence = (); # stack of output type sequence numbers | ||||
| 25859 | my @tokens = (); # output tokens | ||||
| 25860 | my @levels = (); # structural brace levels of output tokens | ||||
| 25861 | my @slevels = (); # secondary nesting levels of output tokens | ||||
| 25862 | my @nesting_tokens = (); # string of tokens leading to this depth | ||||
| 25863 | my @nesting_types = (); # string of token types leading to this depth | ||||
| 25864 | my @nesting_blocks = (); # string of block types leading to this depth | ||||
| 25865 | my @nesting_lists = (); # string of list types leading to this depth | ||||
| 25866 | my @ci_string = (); # string needed to compute continuation indentation | ||||
| 25867 | my @container_environment = (); # BLOCK or LIST | ||||
| 25868 | my $container_environment = ''; | ||||
| 25869 | my $im = -1; # previous $i value | ||||
| 25870 | my $num; | ||||
| 25871 | my $ci_string_sum = ones_count($ci_string_in_tokenizer); | ||||
| 25872 | |||||
| 25873 | # Computing Token Indentation | ||||
| 25874 | # | ||||
| 25875 | # The final section of the tokenizer forms tokens and also computes | ||||
| 25876 | # parameters needed to find indentation. It is much easier to do it | ||||
| 25877 | # in the tokenizer than elsewhere. Here is a brief description of how | ||||
| 25878 | # indentation is computed. Perl::Tidy computes indentation as the sum | ||||
| 25879 | # of 2 terms: | ||||
| 25880 | # | ||||
| 25881 | # (1) structural indentation, such as if/else/elsif blocks | ||||
| 25882 | # (2) continuation indentation, such as long parameter call lists. | ||||
| 25883 | # | ||||
| 25884 | # These are occasionally called primary and secondary indentation. | ||||
| 25885 | # | ||||
| 25886 | # Structural indentation is introduced by tokens of type '{', although | ||||
| 25887 | # the actual tokens might be '{', '(', or '['. Structural indentation | ||||
| 25888 | # is of two types: BLOCK and non-BLOCK. Default structural indentation | ||||
| 25889 | # is 4 characters if the standard indentation scheme is used. | ||||
| 25890 | # | ||||
| 25891 | # Continuation indentation is introduced whenever a line at BLOCK level | ||||
| 25892 | # is broken before its termination. Default continuation indentation | ||||
| 25893 | # is 2 characters in the standard indentation scheme. | ||||
| 25894 | # | ||||
| 25895 | # Both types of indentation may be nested arbitrarily deep and | ||||
| 25896 | # interlaced. The distinction between the two is somewhat arbitrary. | ||||
| 25897 | # | ||||
| 25898 | # For each token, we will define two variables which would apply if | ||||
| 25899 | # the current statement were broken just before that token, so that | ||||
| 25900 | # that token started a new line: | ||||
| 25901 | # | ||||
| 25902 | # $level = the structural indentation level, | ||||
| 25903 | # $ci_level = the continuation indentation level | ||||
| 25904 | # | ||||
| 25905 | # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), | ||||
| 25906 | # assuming defaults. However, in some special cases it is customary | ||||
| 25907 | # to modify $ci_level from this strict value. | ||||
| 25908 | # | ||||
| 25909 | # The total structural indentation is easy to compute by adding and | ||||
| 25910 | # subtracting 1 from a saved value as types '{' and '}' are seen. The | ||||
| 25911 | # running value of this variable is $level_in_tokenizer. | ||||
| 25912 | # | ||||
| 25913 | # The total continuation is much more difficult to compute, and requires | ||||
| 25914 | # several variables. These variables are: | ||||
| 25915 | # | ||||
| 25916 | # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for | ||||
| 25917 | # each indentation level, if there are intervening open secondary | ||||
| 25918 | # structures just prior to that level. | ||||
| 25919 | # $continuation_string_in_tokenizer = a string of 1's and 0's indicating | ||||
| 25920 | # if the last token at that level is "continued", meaning that it | ||||
| 25921 | # is not the first token of an expression. | ||||
| 25922 | # $nesting_block_string = a string of 1's and 0's indicating, for each | ||||
| 25923 | # indentation level, if the level is of type BLOCK or not. | ||||
| 25924 | # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string | ||||
| 25925 | # $nesting_list_string = a string of 1's and 0's indicating, for each | ||||
| 25926 | # indentation level, if it is appropriate for list formatting. | ||||
| 25927 | # If so, continuation indentation is used to indent long list items. | ||||
| 25928 | # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string | ||||
| 25929 | # @{$rslevel_stack} = a stack of total nesting depths at each | ||||
| 25930 | # structural indentation level, where "total nesting depth" means | ||||
| 25931 | # the nesting depth that would occur if every nesting token -- '{', '[', | ||||
| 25932 | # and '(' -- , regardless of context, is used to compute a nesting | ||||
| 25933 | # depth. | ||||
| 25934 | |||||
| 25935 | #my $nesting_block_flag = ($nesting_block_string =~ /1$/); | ||||
| 25936 | #my $nesting_list_flag = ($nesting_list_string =~ /1$/); | ||||
| 25937 | |||||
| 25938 | my ( $ci_string_i, $level_i, $nesting_block_string_i, | ||||
| 25939 | $nesting_list_string_i, $nesting_token_string_i, | ||||
| 25940 | $nesting_type_string_i, ); | ||||
| 25941 | |||||
| 25942 | foreach $i ( @{$routput_token_list} ) | ||||
| 25943 | { # scan the list of pre-tokens indexes | ||||
| 25944 | |||||
| 25945 | # self-checking for valid token types | ||||
| 25946 | my $type = $routput_token_type->[$i]; | ||||
| 25947 | my $forced_indentation_flag = $routput_indent_flag->[$i]; | ||||
| 25948 | |||||
| 25949 | # See if we should undo the $forced_indentation_flag. | ||||
| 25950 | # Forced indentation after 'if', 'unless', 'while' and 'until' | ||||
| 25951 | # expressions without trailing parens is optional and doesn't | ||||
| 25952 | # always look good. It is usually okay for a trailing logical | ||||
| 25953 | # expression, but if the expression is a function call, code block, | ||||
| 25954 | # or some kind of list it puts in an unwanted extra indentation | ||||
| 25955 | # level which is hard to remove. | ||||
| 25956 | # | ||||
| 25957 | # Example where extra indentation looks ok: | ||||
| 25958 | # return 1 | ||||
| 25959 | # if $det_a < 0 and $det_b > 0 | ||||
| 25960 | # or $det_a > 0 and $det_b < 0; | ||||
| 25961 | # | ||||
| 25962 | # Example where extra indentation is not needed because | ||||
| 25963 | # the eval brace also provides indentation: | ||||
| 25964 | # print "not " if defined eval { | ||||
| 25965 | # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; | ||||
| 25966 | # }; | ||||
| 25967 | # | ||||
| 25968 | # The following rule works fairly well: | ||||
| 25969 | # Undo the flag if the end of this line, or start of the next | ||||
| 25970 | # line, is an opening container token or a comma. | ||||
| 25971 | # This almost always works, but if not after another pass it will | ||||
| 25972 | # be stable. | ||||
| 25973 | if ( $forced_indentation_flag && $type eq 'k' ) { | ||||
| 25974 | my $ixlast = -1; | ||||
| 25975 | my $ilast = $routput_token_list->[$ixlast]; | ||||
| 25976 | my $toklast = $routput_token_type->[$ilast]; | ||||
| 25977 | if ( $toklast eq '#' ) { | ||||
| 25978 | $ixlast--; | ||||
| 25979 | $ilast = $routput_token_list->[$ixlast]; | ||||
| 25980 | $toklast = $routput_token_type->[$ilast]; | ||||
| 25981 | } | ||||
| 25982 | if ( $toklast eq 'b' ) { | ||||
| 25983 | $ixlast--; | ||||
| 25984 | $ilast = $routput_token_list->[$ixlast]; | ||||
| 25985 | $toklast = $routput_token_type->[$ilast]; | ||||
| 25986 | } | ||||
| 25987 | if ( $toklast =~ /^[\{,]$/ ) { | ||||
| 25988 | $forced_indentation_flag = 0; | ||||
| 25989 | } | ||||
| 25990 | else { | ||||
| 25991 | ( $toklast, my $i_next ) = | ||||
| 25992 | find_next_nonblank_token( $max_token_index, $rtokens, | ||||
| 25993 | $max_token_index ); | ||||
| 25994 | if ( $toklast =~ /^[\{,]$/ ) { | ||||
| 25995 | $forced_indentation_flag = 0; | ||||
| 25996 | } | ||||
| 25997 | } | ||||
| 25998 | } | ||||
| 25999 | |||||
| 26000 | # if we are already in an indented if, see if we should outdent | ||||
| 26001 | if ($indented_if_level) { | ||||
| 26002 | |||||
| 26003 | # don't try to nest trailing if's - shouldn't happen | ||||
| 26004 | if ( $type eq 'k' ) { | ||||
| 26005 | $forced_indentation_flag = 0; | ||||
| 26006 | } | ||||
| 26007 | |||||
| 26008 | # check for the normal case - outdenting at next ';' | ||||
| 26009 | elsif ( $type eq ';' ) { | ||||
| 26010 | if ( $level_in_tokenizer == $indented_if_level ) { | ||||
| 26011 | $forced_indentation_flag = -1; | ||||
| 26012 | $indented_if_level = 0; | ||||
| 26013 | } | ||||
| 26014 | } | ||||
| 26015 | |||||
| 26016 | # handle case of missing semicolon | ||||
| 26017 | elsif ( $type eq '}' ) { | ||||
| 26018 | if ( $level_in_tokenizer == $indented_if_level ) { | ||||
| 26019 | $indented_if_level = 0; | ||||
| 26020 | |||||
| 26021 | # TBD: This could be a subroutine call | ||||
| 26022 | $level_in_tokenizer--; | ||||
| 26023 | if ( @{$rslevel_stack} > 1 ) { | ||||
| 26024 | pop( @{$rslevel_stack} ); | ||||
| 26025 | } | ||||
| 26026 | if ( length($nesting_block_string) > 1 ) | ||||
| 26027 | { # true for valid script | ||||
| 26028 | chop $nesting_block_string; | ||||
| 26029 | chop $nesting_list_string; | ||||
| 26030 | } | ||||
| 26031 | |||||
| 26032 | } | ||||
| 26033 | } | ||||
| 26034 | } | ||||
| 26035 | |||||
| 26036 | my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken | ||||
| 26037 | $level_i = $level_in_tokenizer; | ||||
| 26038 | |||||
| 26039 | # This can happen by running perltidy on non-scripts | ||||
| 26040 | # although it could also be bug introduced by programming change. | ||||
| 26041 | # Perl silently accepts a 032 (^Z) and takes it as the end | ||||
| 26042 | if ( !$is_valid_token_type{$type} ) { | ||||
| 26043 | my $val = ord($type); | ||||
| 26044 | warning( | ||||
| 26045 | "unexpected character decimal $val ($type) in script\n"); | ||||
| 26046 | $tokenizer_self->{_in_error} = 1; | ||||
| 26047 | } | ||||
| 26048 | |||||
| 26049 | # ---------------------------------------------------------------- | ||||
| 26050 | # TOKEN TYPE PATCHES | ||||
| 26051 | # output __END__, __DATA__, and format as type 'k' instead of ';' | ||||
| 26052 | # to make html colors correct, etc. | ||||
| 26053 | my $fix_type = $type; | ||||
| 26054 | if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } | ||||
| 26055 | |||||
| 26056 | # output anonymous 'sub' as keyword | ||||
| 26057 | if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } | ||||
| 26058 | |||||
| 26059 | # ----------------------------------------------------------------- | ||||
| 26060 | |||||
| 26061 | $nesting_token_string_i = $nesting_token_string; | ||||
| 26062 | $nesting_type_string_i = $nesting_type_string; | ||||
| 26063 | $nesting_block_string_i = $nesting_block_string; | ||||
| 26064 | $nesting_list_string_i = $nesting_list_string; | ||||
| 26065 | |||||
| 26066 | # set primary indentation levels based on structural braces | ||||
| 26067 | # Note: these are set so that the leading braces have a HIGHER | ||||
| 26068 | # level than their CONTENTS, which is convenient for indentation | ||||
| 26069 | # Also, define continuation indentation for each token. | ||||
| 26070 | if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) | ||||
| 26071 | { | ||||
| 26072 | |||||
| 26073 | # use environment before updating | ||||
| 26074 | $container_environment = | ||||
| 26075 | $nesting_block_flag ? 'BLOCK' | ||||
| 26076 | : $nesting_list_flag ? 'LIST' | ||||
| 26077 | : ""; | ||||
| 26078 | |||||
| 26079 | # if the difference between total nesting levels is not 1, | ||||
| 26080 | # there are intervening non-structural nesting types between | ||||
| 26081 | # this '{' and the previous unclosed '{' | ||||
| 26082 | my $intervening_secondary_structure = 0; | ||||
| 26083 | if ( @{$rslevel_stack} ) { | ||||
| 26084 | $intervening_secondary_structure = | ||||
| 26085 | $slevel_in_tokenizer - $rslevel_stack->[-1]; | ||||
| 26086 | } | ||||
| 26087 | |||||
| 26088 | # Continuation Indentation | ||||
| 26089 | # | ||||
| 26090 | # Having tried setting continuation indentation both in the formatter and | ||||
| 26091 | # in the tokenizer, I can say that setting it in the tokenizer is much, | ||||
| 26092 | # much easier. The formatter already has too much to do, and can't | ||||
| 26093 | # make decisions on line breaks without knowing what 'ci' will be at | ||||
| 26094 | # arbitrary locations. | ||||
| 26095 | # | ||||
| 26096 | # But a problem with setting the continuation indentation (ci) here | ||||
| 26097 | # in the tokenizer is that we do not know where line breaks will actually | ||||
| 26098 | # be. As a result, we don't know if we should propagate continuation | ||||
| 26099 | # indentation to higher levels of structure. | ||||
| 26100 | # | ||||
| 26101 | # For nesting of only structural indentation, we never need to do this. | ||||
| 26102 | # For example, in a long if statement, like this | ||||
| 26103 | # | ||||
| 26104 | # if ( !$output_block_type[$i] | ||||
| 26105 | # && ($in_statement_continuation) ) | ||||
| 26106 | # { <--outdented | ||||
| 26107 | # do_something(); | ||||
| 26108 | # } | ||||
| 26109 | # | ||||
| 26110 | # the second line has ci but we do normally give the lines within the BLOCK | ||||
| 26111 | # any ci. This would be true if we had blocks nested arbitrarily deeply. | ||||
| 26112 | # | ||||
| 26113 | # But consider something like this, where we have created a break after | ||||
| 26114 | # an opening paren on line 1, and the paren is not (currently) a | ||||
| 26115 | # structural indentation token: | ||||
| 26116 | # | ||||
| 26117 | # my $file = $menubar->Menubutton( | ||||
| 26118 | # qw/-text File -underline 0 -menuitems/ => [ | ||||
| 26119 | # [ | ||||
| 26120 | # Cascade => '~View', | ||||
| 26121 | # -menuitems => [ | ||||
| 26122 | # ... | ||||
| 26123 | # | ||||
| 26124 | # The second line has ci, so it would seem reasonable to propagate it | ||||
| 26125 | # down, giving the third line 1 ci + 1 indentation. This suggests the | ||||
| 26126 | # following rule, which is currently used to propagating ci down: if there | ||||
| 26127 | # are any non-structural opening parens (or brackets, or braces), before | ||||
| 26128 | # an opening structural brace, then ci is propagated down, and otherwise | ||||
| 26129 | # not. The variable $intervening_secondary_structure contains this | ||||
| 26130 | # information for the current token, and the string | ||||
| 26131 | # "$ci_string_in_tokenizer" is a stack of previous values of this | ||||
| 26132 | # variable. | ||||
| 26133 | |||||
| 26134 | # save the current states | ||||
| 26135 | push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); | ||||
| 26136 | $level_in_tokenizer++; | ||||
| 26137 | |||||
| 26138 | if ($forced_indentation_flag) { | ||||
| 26139 | |||||
| 26140 | # break BEFORE '?' when there is forced indentation | ||||
| 26141 | if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } | ||||
| 26142 | if ( $type eq 'k' ) { | ||||
| 26143 | $indented_if_level = $level_in_tokenizer; | ||||
| 26144 | } | ||||
| 26145 | |||||
| 26146 | # do not change container environment here if we are not | ||||
| 26147 | # at a real list. Adding this check prevents "blinkers" | ||||
| 26148 | # often near 'unless" clauses, such as in the following | ||||
| 26149 | # code: | ||||
| 26150 | ## next | ||||
| 26151 | ## unless -e ( | ||||
| 26152 | ## $archive = | ||||
| 26153 | ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) | ||||
| 26154 | ## ); | ||||
| 26155 | |||||
| 26156 | $nesting_block_string .= "$nesting_block_flag"; | ||||
| 26157 | } | ||||
| 26158 | else { | ||||
| 26159 | |||||
| 26160 | if ( $routput_block_type->[$i] ) { | ||||
| 26161 | $nesting_block_flag = 1; | ||||
| 26162 | $nesting_block_string .= '1'; | ||||
| 26163 | } | ||||
| 26164 | else { | ||||
| 26165 | $nesting_block_flag = 0; | ||||
| 26166 | $nesting_block_string .= '0'; | ||||
| 26167 | } | ||||
| 26168 | } | ||||
| 26169 | |||||
| 26170 | # we will use continuation indentation within containers | ||||
| 26171 | # which are not blocks and not logical expressions | ||||
| 26172 | my $bit = 0; | ||||
| 26173 | if ( !$routput_block_type->[$i] ) { | ||||
| 26174 | |||||
| 26175 | # propagate flag down at nested open parens | ||||
| 26176 | if ( $routput_container_type->[$i] eq '(' ) { | ||||
| 26177 | $bit = 1 if $nesting_list_flag; | ||||
| 26178 | } | ||||
| 26179 | |||||
| 26180 | # use list continuation if not a logical grouping | ||||
| 26181 | # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ | ||||
| 26182 | else { | ||||
| 26183 | $bit = 1 | ||||
| 26184 | unless | ||||
| 26185 | $is_logical_container{ $routput_container_type->[$i] | ||||
| 26186 | }; | ||||
| 26187 | } | ||||
| 26188 | } | ||||
| 26189 | $nesting_list_string .= $bit; | ||||
| 26190 | $nesting_list_flag = $bit; | ||||
| 26191 | |||||
| 26192 | $ci_string_in_tokenizer .= | ||||
| 26193 | ( $intervening_secondary_structure != 0 ) ? '1' : '0'; | ||||
| 26194 | $ci_string_sum = ones_count($ci_string_in_tokenizer); | ||||
| 26195 | $continuation_string_in_tokenizer .= | ||||
| 26196 | ( $in_statement_continuation > 0 ) ? '1' : '0'; | ||||
| 26197 | |||||
| 26198 | # Sometimes we want to give an opening brace continuation indentation, | ||||
| 26199 | # and sometimes not. For code blocks, we don't do it, so that the leading | ||||
| 26200 | # '{' gets outdented, like this: | ||||
| 26201 | # | ||||
| 26202 | # if ( !$output_block_type[$i] | ||||
| 26203 | # && ($in_statement_continuation) ) | ||||
| 26204 | # { <--outdented | ||||
| 26205 | # | ||||
| 26206 | # For other types, we will give them continuation indentation. For example, | ||||
| 26207 | # here is how a list looks with the opening paren indented: | ||||
| 26208 | # | ||||
| 26209 | # @LoL = | ||||
| 26210 | # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], | ||||
| 26211 | # [ "homer", "marge", "bart" ], ); | ||||
| 26212 | # | ||||
| 26213 | # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) | ||||
| 26214 | |||||
| 26215 | my $total_ci = $ci_string_sum; | ||||
| 26216 | if ( | ||||
| 26217 | !$routput_block_type->[$i] # patch: skip for BLOCK | ||||
| 26218 | && ($in_statement_continuation) | ||||
| 26219 | && !( $forced_indentation_flag && $type eq ':' ) | ||||
| 26220 | ) | ||||
| 26221 | { | ||||
| 26222 | $total_ci += $in_statement_continuation | ||||
| 26223 | unless ( $ci_string_in_tokenizer =~ /1$/ ); | ||||
| 26224 | } | ||||
| 26225 | |||||
| 26226 | $ci_string_i = $total_ci; | ||||
| 26227 | $in_statement_continuation = 0; | ||||
| 26228 | } | ||||
| 26229 | |||||
| 26230 | elsif ($type eq '}' | ||||
| 26231 | || $type eq 'R' | ||||
| 26232 | || $forced_indentation_flag < 0 ) | ||||
| 26233 | { | ||||
| 26234 | |||||
| 26235 | # only a nesting error in the script would prevent popping here | ||||
| 26236 | if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } | ||||
| 26237 | |||||
| 26238 | $level_i = --$level_in_tokenizer; | ||||
| 26239 | |||||
| 26240 | # restore previous level values | ||||
| 26241 | if ( length($nesting_block_string) > 1 ) | ||||
| 26242 | { # true for valid script | ||||
| 26243 | chop $nesting_block_string; | ||||
| 26244 | $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); | ||||
| 26245 | chop $nesting_list_string; | ||||
| 26246 | $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); | ||||
| 26247 | |||||
| 26248 | chop $ci_string_in_tokenizer; | ||||
| 26249 | $ci_string_sum = ones_count($ci_string_in_tokenizer); | ||||
| 26250 | |||||
| 26251 | $in_statement_continuation = | ||||
| 26252 | chop $continuation_string_in_tokenizer; | ||||
| 26253 | |||||
| 26254 | # zero continuation flag at terminal BLOCK '}' which | ||||
| 26255 | # ends a statement. | ||||
| 26256 | if ( $routput_block_type->[$i] ) { | ||||
| 26257 | |||||
| 26258 | # ...These include non-anonymous subs | ||||
| 26259 | # note: could be sub ::abc { or sub 'abc | ||||
| 26260 | if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { | ||||
| 26261 | |||||
| 26262 | # note: older versions of perl require the /gc modifier | ||||
| 26263 | # here or else the \G does not work. | ||||
| 26264 | if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) | ||||
| 26265 | { | ||||
| 26266 | $in_statement_continuation = 0; | ||||
| 26267 | } | ||||
| 26268 | } | ||||
| 26269 | |||||
| 26270 | # ...and include all block types except user subs with | ||||
| 26271 | # block prototypes and these: (sort|grep|map|do|eval) | ||||
| 26272 | # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ | ||||
| 26273 | elsif ( | ||||
| 26274 | $is_zero_continuation_block_type{ | ||||
| 26275 | $routput_block_type->[$i] | ||||
| 26276 | } ) | ||||
| 26277 | { | ||||
| 26278 | $in_statement_continuation = 0; | ||||
| 26279 | } | ||||
| 26280 | |||||
| 26281 | # ..but these are not terminal types: | ||||
| 26282 | # /^(sort|grep|map|do|eval)$/ ) | ||||
| 26283 | elsif ( | ||||
| 26284 | $is_not_zero_continuation_block_type{ | ||||
| 26285 | $routput_block_type->[$i] | ||||
| 26286 | } ) | ||||
| 26287 | { | ||||
| 26288 | } | ||||
| 26289 | |||||
| 26290 | # ..and a block introduced by a label | ||||
| 26291 | # /^\w+\s*:$/gc ) { | ||||
| 26292 | elsif ( $routput_block_type->[$i] =~ /:$/ ) { | ||||
| 26293 | $in_statement_continuation = 0; | ||||
| 26294 | } | ||||
| 26295 | |||||
| 26296 | # user function with block prototype | ||||
| 26297 | else { | ||||
| 26298 | $in_statement_continuation = 0; | ||||
| 26299 | } | ||||
| 26300 | } | ||||
| 26301 | |||||
| 26302 | # If we are in a list, then | ||||
| 26303 | # we must set continuation indentation at the closing | ||||
| 26304 | # paren of something like this (paren after $check): | ||||
| 26305 | # assert( | ||||
| 26306 | # __LINE__, | ||||
| 26307 | # ( not defined $check ) | ||||
| 26308 | # or ref $check | ||||
| 26309 | # or $check eq "new" | ||||
| 26310 | # or $check eq "old", | ||||
| 26311 | # ); | ||||
| 26312 | elsif ( $tok eq ')' ) { | ||||
| 26313 | $in_statement_continuation = 1 | ||||
| 26314 | if $routput_container_type->[$i] =~ /^[;,\{\}]$/; | ||||
| 26315 | } | ||||
| 26316 | |||||
| 26317 | elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } | ||||
| 26318 | } | ||||
| 26319 | |||||
| 26320 | # use environment after updating | ||||
| 26321 | $container_environment = | ||||
| 26322 | $nesting_block_flag ? 'BLOCK' | ||||
| 26323 | : $nesting_list_flag ? 'LIST' | ||||
| 26324 | : ""; | ||||
| 26325 | $ci_string_i = $ci_string_sum + $in_statement_continuation; | ||||
| 26326 | $nesting_block_string_i = $nesting_block_string; | ||||
| 26327 | $nesting_list_string_i = $nesting_list_string; | ||||
| 26328 | } | ||||
| 26329 | |||||
| 26330 | # not a structural indentation type.. | ||||
| 26331 | else { | ||||
| 26332 | |||||
| 26333 | $container_environment = | ||||
| 26334 | $nesting_block_flag ? 'BLOCK' | ||||
| 26335 | : $nesting_list_flag ? 'LIST' | ||||
| 26336 | : ""; | ||||
| 26337 | |||||
| 26338 | # zero the continuation indentation at certain tokens so | ||||
| 26339 | # that they will be at the same level as its container. For | ||||
| 26340 | # commas, this simplifies the -lp indentation logic, which | ||||
| 26341 | # counts commas. For ?: it makes them stand out. | ||||
| 26342 | if ($nesting_list_flag) { | ||||
| 26343 | if ( $type =~ /^[,\?\:]$/ ) { | ||||
| 26344 | $in_statement_continuation = 0; | ||||
| 26345 | } | ||||
| 26346 | } | ||||
| 26347 | |||||
| 26348 | # be sure binary operators get continuation indentation | ||||
| 26349 | if ( | ||||
| 26350 | $container_environment | ||||
| 26351 | && ( $type eq 'k' && $is_binary_keyword{$tok} | ||||
| 26352 | || $is_binary_type{$type} ) | ||||
| 26353 | ) | ||||
| 26354 | { | ||||
| 26355 | $in_statement_continuation = 1; | ||||
| 26356 | } | ||||
| 26357 | |||||
| 26358 | # continuation indentation is sum of any open ci from previous | ||||
| 26359 | # levels plus the current level | ||||
| 26360 | $ci_string_i = $ci_string_sum + $in_statement_continuation; | ||||
| 26361 | |||||
| 26362 | # update continuation flag ... | ||||
| 26363 | # if this isn't a blank or comment.. | ||||
| 26364 | if ( $type ne 'b' && $type ne '#' ) { | ||||
| 26365 | |||||
| 26366 | # and we are in a BLOCK | ||||
| 26367 | if ($nesting_block_flag) { | ||||
| 26368 | |||||
| 26369 | # the next token after a ';' and label starts a new stmt | ||||
| 26370 | if ( $type eq ';' || $type eq 'J' ) { | ||||
| 26371 | $in_statement_continuation = 0; | ||||
| 26372 | } | ||||
| 26373 | |||||
| 26374 | # otherwise, we are continuing the current statement | ||||
| 26375 | else { | ||||
| 26376 | $in_statement_continuation = 1; | ||||
| 26377 | } | ||||
| 26378 | } | ||||
| 26379 | |||||
| 26380 | # if we are not in a BLOCK.. | ||||
| 26381 | else { | ||||
| 26382 | |||||
| 26383 | # do not use continuation indentation if not list | ||||
| 26384 | # environment (could be within if/elsif clause) | ||||
| 26385 | if ( !$nesting_list_flag ) { | ||||
| 26386 | $in_statement_continuation = 0; | ||||
| 26387 | } | ||||
| 26388 | |||||
| 26389 | # otherwise, the next token after a ',' starts a new term | ||||
| 26390 | elsif ( $type eq ',' ) { | ||||
| 26391 | $in_statement_continuation = 0; | ||||
| 26392 | } | ||||
| 26393 | |||||
| 26394 | # otherwise, we are continuing the current term | ||||
| 26395 | else { | ||||
| 26396 | $in_statement_continuation = 1; | ||||
| 26397 | } | ||||
| 26398 | } | ||||
| 26399 | } | ||||
| 26400 | } | ||||
| 26401 | |||||
| 26402 | if ( $level_in_tokenizer < 0 ) { | ||||
| 26403 | unless ( $tokenizer_self->{_saw_negative_indentation} ) { | ||||
| 26404 | $tokenizer_self->{_saw_negative_indentation} = 1; | ||||
| 26405 | warning("Starting negative indentation\n"); | ||||
| 26406 | } | ||||
| 26407 | } | ||||
| 26408 | |||||
| 26409 | # set secondary nesting levels based on all containment token types | ||||
| 26410 | # Note: these are set so that the nesting depth is the depth | ||||
| 26411 | # of the PREVIOUS TOKEN, which is convenient for setting | ||||
| 26412 | # the strength of token bonds | ||||
| 26413 | my $slevel_i = $slevel_in_tokenizer; | ||||
| 26414 | |||||
| 26415 | # /^[L\{\(\[]$/ | ||||
| 26416 | if ( $is_opening_type{$type} ) { | ||||
| 26417 | $slevel_in_tokenizer++; | ||||
| 26418 | $nesting_token_string .= $tok; | ||||
| 26419 | $nesting_type_string .= $type; | ||||
| 26420 | } | ||||
| 26421 | |||||
| 26422 | # /^[R\}\)\]]$/ | ||||
| 26423 | elsif ( $is_closing_type{$type} ) { | ||||
| 26424 | $slevel_in_tokenizer--; | ||||
| 26425 | my $char = chop $nesting_token_string; | ||||
| 26426 | |||||
| 26427 | if ( $char ne $matching_start_token{$tok} ) { | ||||
| 26428 | $nesting_token_string .= $char . $tok; | ||||
| 26429 | $nesting_type_string .= $type; | ||||
| 26430 | } | ||||
| 26431 | else { | ||||
| 26432 | chop $nesting_type_string; | ||||
| 26433 | } | ||||
| 26434 | } | ||||
| 26435 | |||||
| 26436 | push( @block_type, $routput_block_type->[$i] ); | ||||
| 26437 | push( @ci_string, $ci_string_i ); | ||||
| 26438 | push( @container_environment, $container_environment ); | ||||
| 26439 | push( @container_type, $routput_container_type->[$i] ); | ||||
| 26440 | push( @levels, $level_i ); | ||||
| 26441 | push( @nesting_tokens, $nesting_token_string_i ); | ||||
| 26442 | push( @nesting_types, $nesting_type_string_i ); | ||||
| 26443 | push( @slevels, $slevel_i ); | ||||
| 26444 | push( @token_type, $fix_type ); | ||||
| 26445 | push( @type_sequence, $routput_type_sequence->[$i] ); | ||||
| 26446 | push( @nesting_blocks, $nesting_block_string ); | ||||
| 26447 | push( @nesting_lists, $nesting_list_string ); | ||||
| 26448 | |||||
| 26449 | # now form the previous token | ||||
| 26450 | if ( $im >= 0 ) { | ||||
| 26451 | $num = | ||||
| 26452 | $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters | ||||
| 26453 | |||||
| 26454 | if ( $num > 0 ) { | ||||
| 26455 | push( @tokens, | ||||
| 26456 | substr( $input_line, $$rtoken_map[$im], $num ) ); | ||||
| 26457 | } | ||||
| 26458 | } | ||||
| 26459 | $im = $i; | ||||
| 26460 | } | ||||
| 26461 | |||||
| 26462 | $num = length($input_line) - $$rtoken_map[$im]; # make the last token | ||||
| 26463 | if ( $num > 0 ) { | ||||
| 26464 | push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); | ||||
| 26465 | } | ||||
| 26466 | |||||
| 26467 | $tokenizer_self->{_in_attribute_list} = $in_attribute_list; | ||||
| 26468 | $tokenizer_self->{_in_quote} = $in_quote; | ||||
| 26469 | $tokenizer_self->{_quote_target} = | ||||
| 26470 | $in_quote ? matching_end_token($quote_character) : ""; | ||||
| 26471 | $tokenizer_self->{_rhere_target_list} = $rhere_target_list; | ||||
| 26472 | |||||
| 26473 | $line_of_tokens->{_rtoken_type} = \@token_type; | ||||
| 26474 | $line_of_tokens->{_rtokens} = \@tokens; | ||||
| 26475 | $line_of_tokens->{_rblock_type} = \@block_type; | ||||
| 26476 | $line_of_tokens->{_rcontainer_type} = \@container_type; | ||||
| 26477 | $line_of_tokens->{_rcontainer_environment} = \@container_environment; | ||||
| 26478 | $line_of_tokens->{_rtype_sequence} = \@type_sequence; | ||||
| 26479 | $line_of_tokens->{_rlevels} = \@levels; | ||||
| 26480 | $line_of_tokens->{_rslevels} = \@slevels; | ||||
| 26481 | $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; | ||||
| 26482 | $line_of_tokens->{_rci_levels} = \@ci_string; | ||||
| 26483 | $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; | ||||
| 26484 | |||||
| 26485 | return; | ||||
| 26486 | } | ||||
| 26487 | } # end tokenize_this_line | ||||
| 26488 | |||||
| 26489 | #########i############################################################# | ||||
| 26490 | # Tokenizer routines which assist in identifying token types | ||||
| 26491 | ####################################################################### | ||||
| 26492 | |||||
| 26493 | sub operator_expected { | ||||
| 26494 | |||||
| 26495 | # Many perl symbols have two or more meanings. For example, '<<' | ||||
| 26496 | # can be a shift operator or a here-doc operator. The | ||||
| 26497 | # interpretation of these symbols depends on the current state of | ||||
| 26498 | # the tokenizer, which may either be expecting a term or an | ||||
| 26499 | # operator. For this example, a << would be a shift if an operator | ||||
| 26500 | # is expected, and a here-doc if a term is expected. This routine | ||||
| 26501 | # is called to make this decision for any current token. It returns | ||||
| 26502 | # one of three possible values: | ||||
| 26503 | # | ||||
| 26504 | # OPERATOR - operator expected (or at least, not a term) | ||||
| 26505 | # UNKNOWN - can't tell | ||||
| 26506 | # TERM - a term is expected (or at least, not an operator) | ||||
| 26507 | # | ||||
| 26508 | # The decision is based on what has been seen so far. This | ||||
| 26509 | # information is stored in the "$last_nonblank_type" and | ||||
| 26510 | # "$last_nonblank_token" variables. For example, if the | ||||
| 26511 | # $last_nonblank_type is '=~', then we are expecting a TERM, whereas | ||||
| 26512 | # if $last_nonblank_type is 'n' (numeric), we are expecting an | ||||
| 26513 | # OPERATOR. | ||||
| 26514 | # | ||||
| 26515 | # If a UNKNOWN is returned, the calling routine must guess. A major | ||||
| 26516 | # goal of this tokenizer is to minimize the possibility of returning | ||||
| 26517 | # UNKNOWN, because a wrong guess can spoil the formatting of a | ||||
| 26518 | # script. | ||||
| 26519 | # | ||||
| 26520 | # adding NEW_TOKENS: it is critically important that this routine be | ||||
| 26521 | # updated to allow it to determine if an operator or term is to be | ||||
| 26522 | # expected after the new token. Doing this simply involves adding | ||||
| 26523 | # the new token character to one of the regexes in this routine or | ||||
| 26524 | # to one of the hash lists | ||||
| 26525 | # that it uses, which are initialized in the BEGIN section. | ||||
| 26526 | # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, | ||||
| 26527 | # $statement_type | ||||
| 26528 | |||||
| 26529 | my ( $prev_type, $tok, $next_type ) = @_; | ||||
| 26530 | |||||
| 26531 | my $op_expected = UNKNOWN; | ||||
| 26532 | |||||
| 26533 | ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; | ||||
| 26534 | |||||
| 26535 | # Note: function prototype is available for token type 'U' for future | ||||
| 26536 | # program development. It contains the leading and trailing parens, | ||||
| 26537 | # and no blanks. It might be used to eliminate token type 'C', for | ||||
| 26538 | # example (prototype = '()'). Thus: | ||||
| 26539 | # if ($last_nonblank_type eq 'U') { | ||||
| 26540 | # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; | ||||
| 26541 | # } | ||||
| 26542 | |||||
| 26543 | # A possible filehandle (or object) requires some care... | ||||
| 26544 | if ( $last_nonblank_type eq 'Z' ) { | ||||
| 26545 | |||||
| 26546 | # angle.t | ||||
| 26547 | if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { | ||||
| 26548 | $op_expected = UNKNOWN; | ||||
| 26549 | } | ||||
| 26550 | |||||
| 26551 | # For possible file handle like "$a", Perl uses weird parsing rules. | ||||
| 26552 | # For example: | ||||
| 26553 | # print $a/2,"/hi"; - division | ||||
| 26554 | # print $a / 2,"/hi"; - division | ||||
| 26555 | # print $a/ 2,"/hi"; - division | ||||
| 26556 | # print $a /2,"/hi"; - pattern (and error)! | ||||
| 26557 | elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { | ||||
| 26558 | $op_expected = TERM; | ||||
| 26559 | } | ||||
| 26560 | |||||
| 26561 | # Note when an operation is being done where a | ||||
| 26562 | # filehandle might be expected, since a change in whitespace | ||||
| 26563 | # could change the interpretation of the statement. | ||||
| 26564 | else { | ||||
| 26565 | if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { | ||||
| 26566 | complain("operator in print statement not recommended\n"); | ||||
| 26567 | $op_expected = OPERATOR; | ||||
| 26568 | } | ||||
| 26569 | } | ||||
| 26570 | } | ||||
| 26571 | |||||
| 26572 | # Check for smartmatch operator before preceding brace or square bracket. | ||||
| 26573 | # For example, at the ? after the ] in the following expressions we are | ||||
| 26574 | # expecting an operator: | ||||
| 26575 | # | ||||
| 26576 | # qr/3/ ~~ ['1234'] ? 1 : 0; | ||||
| 26577 | # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; | ||||
| 26578 | elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { | ||||
| 26579 | $op_expected = OPERATOR; | ||||
| 26580 | } | ||||
| 26581 | |||||
| 26582 | # handle something after 'do' and 'eval' | ||||
| 26583 | elsif ( $is_block_operator{$last_nonblank_token} ) { | ||||
| 26584 | |||||
| 26585 | # something like $a = eval "expression"; | ||||
| 26586 | # ^ | ||||
| 26587 | if ( $last_nonblank_type eq 'k' ) { | ||||
| 26588 | $op_expected = TERM; # expression or list mode following keyword | ||||
| 26589 | } | ||||
| 26590 | |||||
| 26591 | # something like $a = do { BLOCK } / 2; | ||||
| 26592 | # or this ? after a smartmatch anonynmous hash or array reference: | ||||
| 26593 | # qr/3/ ~~ ['1234'] ? 1 : 0; | ||||
| 26594 | # ^ | ||||
| 26595 | else { | ||||
| 26596 | $op_expected = OPERATOR; # block mode following } | ||||
| 26597 | } | ||||
| 26598 | } | ||||
| 26599 | |||||
| 26600 | # handle bare word.. | ||||
| 26601 | elsif ( $last_nonblank_type eq 'w' ) { | ||||
| 26602 | |||||
| 26603 | # unfortunately, we can't tell what type of token to expect next | ||||
| 26604 | # after most bare words | ||||
| 26605 | $op_expected = UNKNOWN; | ||||
| 26606 | } | ||||
| 26607 | |||||
| 26608 | # operator, but not term possible after these types | ||||
| 26609 | # Note: moved ')' from type to token because parens in list context | ||||
| 26610 | # get marked as '{' '}' now. This is a minor glitch in the following: | ||||
| 26611 | # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); | ||||
| 26612 | # | ||||
| 26613 | elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) | ||||
| 26614 | || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) | ||||
| 26615 | { | ||||
| 26616 | $op_expected = OPERATOR; | ||||
| 26617 | |||||
| 26618 | # in a 'use' statement, numbers and v-strings are not true | ||||
| 26619 | # numbers, so to avoid incorrect error messages, we will | ||||
| 26620 | # mark them as unknown for now (use.t) | ||||
| 26621 | # TODO: it would be much nicer to create a new token V for VERSION | ||||
| 26622 | # number in a use statement. Then this could be a check on type V | ||||
| 26623 | # and related patches which change $statement_type for '=>' | ||||
| 26624 | # and ',' could be removed. Further, it would clean things up to | ||||
| 26625 | # scan the 'use' statement with a separate subroutine. | ||||
| 26626 | if ( ( $statement_type eq 'use' ) | ||||
| 26627 | && ( $last_nonblank_type =~ /^[nv]$/ ) ) | ||||
| 26628 | { | ||||
| 26629 | $op_expected = UNKNOWN; | ||||
| 26630 | } | ||||
| 26631 | |||||
| 26632 | # expecting VERSION or {} after package NAMESPACE | ||||
| 26633 | elsif ($statement_type =~ /^package\b/ | ||||
| 26634 | && $last_nonblank_token =~ /^package\b/ ) | ||||
| 26635 | { | ||||
| 26636 | $op_expected = TERM; | ||||
| 26637 | } | ||||
| 26638 | } | ||||
| 26639 | |||||
| 26640 | # no operator after many keywords, such as "die", "warn", etc | ||||
| 26641 | elsif ( $expecting_term_token{$last_nonblank_token} ) { | ||||
| 26642 | |||||
| 26643 | # patch for dor.t (defined or). | ||||
| 26644 | # perl functions which may be unary operators | ||||
| 26645 | # TODO: This list is incomplete, and these should be put | ||||
| 26646 | # into a hash. | ||||
| 26647 | if ( $tok eq '/' | ||||
| 26648 | && $next_type eq '/' | ||||
| 26649 | && $last_nonblank_type eq 'k' | ||||
| 26650 | && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) | ||||
| 26651 | { | ||||
| 26652 | $op_expected = OPERATOR; | ||||
| 26653 | } | ||||
| 26654 | else { | ||||
| 26655 | $op_expected = TERM; | ||||
| 26656 | } | ||||
| 26657 | } | ||||
| 26658 | |||||
| 26659 | # no operator after things like + - ** (i.e., other operators) | ||||
| 26660 | elsif ( $expecting_term_types{$last_nonblank_type} ) { | ||||
| 26661 | $op_expected = TERM; | ||||
| 26662 | } | ||||
| 26663 | |||||
| 26664 | # a few operators, like "time", have an empty prototype () and so | ||||
| 26665 | # take no parameters but produce a value to operate on | ||||
| 26666 | elsif ( $expecting_operator_token{$last_nonblank_token} ) { | ||||
| 26667 | $op_expected = OPERATOR; | ||||
| 26668 | } | ||||
| 26669 | |||||
| 26670 | # post-increment and decrement produce values to be operated on | ||||
| 26671 | elsif ( $expecting_operator_types{$last_nonblank_type} ) { | ||||
| 26672 | $op_expected = OPERATOR; | ||||
| 26673 | } | ||||
| 26674 | |||||
| 26675 | # no value to operate on after sub block | ||||
| 26676 | elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } | ||||
| 26677 | |||||
| 26678 | # a right brace here indicates the end of a simple block. | ||||
| 26679 | # all non-structural right braces have type 'R' | ||||
| 26680 | # all braces associated with block operator keywords have been given those | ||||
| 26681 | # keywords as "last_nonblank_token" and caught above. | ||||
| 26682 | # (This statement is order dependent, and must come after checking | ||||
| 26683 | # $last_nonblank_token). | ||||
| 26684 | elsif ( $last_nonblank_type eq '}' ) { | ||||
| 26685 | |||||
| 26686 | # patch for dor.t (defined or). | ||||
| 26687 | if ( $tok eq '/' | ||||
| 26688 | && $next_type eq '/' | ||||
| 26689 | && $last_nonblank_token eq ']' ) | ||||
| 26690 | { | ||||
| 26691 | $op_expected = OPERATOR; | ||||
| 26692 | } | ||||
| 26693 | else { | ||||
| 26694 | $op_expected = TERM; | ||||
| 26695 | } | ||||
| 26696 | } | ||||
| 26697 | |||||
| 26698 | # something else..what did I forget? | ||||
| 26699 | else { | ||||
| 26700 | |||||
| 26701 | # collecting diagnostics on unknown operator types..see what was missed | ||||
| 26702 | $op_expected = UNKNOWN; | ||||
| 26703 | write_diagnostics( | ||||
| 26704 | "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" | ||||
| 26705 | ); | ||||
| 26706 | } | ||||
| 26707 | |||||
| 26708 | TOKENIZER_DEBUG_FLAG_EXPECT && do { | ||||
| 26709 | print STDOUT | ||||
| 26710 | "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; | ||||
| 26711 | }; | ||||
| 26712 | return $op_expected; | ||||
| 26713 | } | ||||
| 26714 | |||||
| 26715 | sub new_statement_ok { | ||||
| 26716 | |||||
| 26717 | # return true if the current token can start a new statement | ||||
| 26718 | # USES GLOBAL VARIABLES: $last_nonblank_type | ||||
| 26719 | |||||
| 26720 | return label_ok() # a label would be ok here | ||||
| 26721 | |||||
| 26722 | || $last_nonblank_type eq 'J'; # or we follow a label | ||||
| 26723 | |||||
| 26724 | } | ||||
| 26725 | |||||
| 26726 | sub label_ok { | ||||
| 26727 | |||||
| 26728 | # Decide if a bare word followed by a colon here is a label | ||||
| 26729 | # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, | ||||
| 26730 | # $brace_depth, @brace_type | ||||
| 26731 | |||||
| 26732 | # if it follows an opening or closing code block curly brace.. | ||||
| 26733 | if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) | ||||
| 26734 | && $last_nonblank_type eq $last_nonblank_token ) | ||||
| 26735 | { | ||||
| 26736 | |||||
| 26737 | # it is a label if and only if the curly encloses a code block | ||||
| 26738 | return $brace_type[$brace_depth]; | ||||
| 26739 | } | ||||
| 26740 | |||||
| 26741 | # otherwise, it is a label if and only if it follows a ';' (real or fake) | ||||
| 26742 | # or another label | ||||
| 26743 | else { | ||||
| 26744 | return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); | ||||
| 26745 | } | ||||
| 26746 | } | ||||
| 26747 | |||||
| 26748 | sub code_block_type { | ||||
| 26749 | |||||
| 26750 | # Decide if this is a block of code, and its type. | ||||
| 26751 | # Must be called only when $type = $token = '{' | ||||
| 26752 | # The problem is to distinguish between the start of a block of code | ||||
| 26753 | # and the start of an anonymous hash reference | ||||
| 26754 | # Returns "" if not code block, otherwise returns 'last_nonblank_token' | ||||
| 26755 | # to indicate the type of code block. (For example, 'last_nonblank_token' | ||||
| 26756 | # might be 'if' for an if block, 'else' for an else block, etc). | ||||
| 26757 | # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, | ||||
| 26758 | # $last_nonblank_block_type, $brace_depth, @brace_type | ||||
| 26759 | |||||
| 26760 | # handle case of multiple '{'s | ||||
| 26761 | |||||
| 26762 | # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; | ||||
| 26763 | |||||
| 26764 | my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; | ||||
| 26765 | if ( $last_nonblank_token eq '{' | ||||
| 26766 | && $last_nonblank_type eq $last_nonblank_token ) | ||||
| 26767 | { | ||||
| 26768 | |||||
| 26769 | # opening brace where a statement may appear is probably | ||||
| 26770 | # a code block but might be and anonymous hash reference | ||||
| 26771 | if ( $brace_type[$brace_depth] ) { | ||||
| 26772 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
| 26773 | $max_token_index ); | ||||
| 26774 | } | ||||
| 26775 | |||||
| 26776 | # cannot start a code block within an anonymous hash | ||||
| 26777 | else { | ||||
| 26778 | return ""; | ||||
| 26779 | } | ||||
| 26780 | } | ||||
| 26781 | |||||
| 26782 | elsif ( $last_nonblank_token eq ';' ) { | ||||
| 26783 | |||||
| 26784 | # an opening brace where a statement may appear is probably | ||||
| 26785 | # a code block but might be and anonymous hash reference | ||||
| 26786 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
| 26787 | $max_token_index ); | ||||
| 26788 | } | ||||
| 26789 | |||||
| 26790 | # handle case of '}{' | ||||
| 26791 | elsif ($last_nonblank_token eq '}' | ||||
| 26792 | && $last_nonblank_type eq $last_nonblank_token ) | ||||
| 26793 | { | ||||
| 26794 | |||||
| 26795 | # a } { situation ... | ||||
| 26796 | # could be hash reference after code block..(blktype1.t) | ||||
| 26797 | if ($last_nonblank_block_type) { | ||||
| 26798 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
| 26799 | $max_token_index ); | ||||
| 26800 | } | ||||
| 26801 | |||||
| 26802 | # must be a block if it follows a closing hash reference | ||||
| 26803 | else { | ||||
| 26804 | return $last_nonblank_token; | ||||
| 26805 | } | ||||
| 26806 | } | ||||
| 26807 | |||||
| 26808 | # NOTE: braces after type characters start code blocks, but for | ||||
| 26809 | # simplicity these are not identified as such. See also | ||||
| 26810 | # sub is_non_structural_brace. | ||||
| 26811 | |||||
| 26812 | ## elsif ( $last_nonblank_type eq 't' ) { | ||||
| 26813 | ## return $last_nonblank_token; | ||||
| 26814 | ## } | ||||
| 26815 | |||||
| 26816 | # brace after label: | ||||
| 26817 | elsif ( $last_nonblank_type eq 'J' ) { | ||||
| 26818 | return $last_nonblank_token; | ||||
| 26819 | } | ||||
| 26820 | |||||
| 26821 | # otherwise, look at previous token. This must be a code block if | ||||
| 26822 | # it follows any of these: | ||||
| 26823 | # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ | ||||
| 26824 | elsif ( $is_code_block_token{$last_nonblank_token} ) { | ||||
| 26825 | |||||
| 26826 | # Bug Patch: Note that the opening brace after the 'if' in the following | ||||
| 26827 | # snippet is an anonymous hash ref and not a code block! | ||||
| 26828 | # print 'hi' if { x => 1, }->{x}; | ||||
| 26829 | # We can identify this situation because the last nonblank type | ||||
| 26830 | # will be a keyword (instead of a closing peren) | ||||
| 26831 | if ( $last_nonblank_token =~ /^(if|unless)$/ | ||||
| 26832 | && $last_nonblank_type eq 'k' ) | ||||
| 26833 | { | ||||
| 26834 | return ""; | ||||
| 26835 | } | ||||
| 26836 | else { | ||||
| 26837 | return $last_nonblank_token; | ||||
| 26838 | } | ||||
| 26839 | } | ||||
| 26840 | |||||
| 26841 | # or a sub or package BLOCK | ||||
| 26842 | elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) | ||||
| 26843 | && $last_nonblank_token =~ /^(sub|package)\b/ ) | ||||
| 26844 | { | ||||
| 26845 | return $last_nonblank_token; | ||||
| 26846 | } | ||||
| 26847 | |||||
| 26848 | elsif ( $statement_type =~ /^(sub|package)\b/ ) { | ||||
| 26849 | return $statement_type; | ||||
| 26850 | } | ||||
| 26851 | |||||
| 26852 | # user-defined subs with block parameters (like grep/map/eval) | ||||
| 26853 | elsif ( $last_nonblank_type eq 'G' ) { | ||||
| 26854 | return $last_nonblank_token; | ||||
| 26855 | } | ||||
| 26856 | |||||
| 26857 | # check bareword | ||||
| 26858 | elsif ( $last_nonblank_type eq 'w' ) { | ||||
| 26859 | return decide_if_code_block( $i, $rtokens, $rtoken_type, | ||||
| 26860 | $max_token_index ); | ||||
| 26861 | } | ||||
| 26862 | |||||
| 26863 | # anything else must be anonymous hash reference | ||||
| 26864 | else { | ||||
| 26865 | return ""; | ||||
| 26866 | } | ||||
| 26867 | } | ||||
| 26868 | |||||
| 26869 | sub decide_if_code_block { | ||||
| 26870 | |||||
| 26871 | # USES GLOBAL VARIABLES: $last_nonblank_token | ||||
| 26872 | my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; | ||||
| 26873 | my ( $next_nonblank_token, $i_next ) = | ||||
| 26874 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 26875 | |||||
| 26876 | # we are at a '{' where a statement may appear. | ||||
| 26877 | # We must decide if this brace starts an anonymous hash or a code | ||||
| 26878 | # block. | ||||
| 26879 | # return "" if anonymous hash, and $last_nonblank_token otherwise | ||||
| 26880 | |||||
| 26881 | # initialize to be code BLOCK | ||||
| 26882 | my $code_block_type = $last_nonblank_token; | ||||
| 26883 | |||||
| 26884 | # Check for the common case of an empty anonymous hash reference: | ||||
| 26885 | # Maybe something like sub { { } } | ||||
| 26886 | if ( $next_nonblank_token eq '}' ) { | ||||
| 26887 | $code_block_type = ""; | ||||
| 26888 | } | ||||
| 26889 | |||||
| 26890 | else { | ||||
| 26891 | |||||
| 26892 | # To guess if this '{' is an anonymous hash reference, look ahead | ||||
| 26893 | # and test as follows: | ||||
| 26894 | # | ||||
| 26895 | # it is a hash reference if next come: | ||||
| 26896 | # - a string or digit followed by a comma or => | ||||
| 26897 | # - bareword followed by => | ||||
| 26898 | # otherwise it is a code block | ||||
| 26899 | # | ||||
| 26900 | # Examples of anonymous hash ref: | ||||
| 26901 | # {'aa',}; | ||||
| 26902 | # {1,2} | ||||
| 26903 | # | ||||
| 26904 | # Examples of code blocks: | ||||
| 26905 | # {1; print "hello\n", 1;} | ||||
| 26906 | # {$a,1}; | ||||
| 26907 | |||||
| 26908 | # We are only going to look ahead one more (nonblank/comment) line. | ||||
| 26909 | # Strange formatting could cause a bad guess, but that's unlikely. | ||||
| 26910 | my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; | ||||
| 26911 | my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; | ||||
| 26912 | my ( $rpre_tokens, $rpre_types ) = | ||||
| 26913 | peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but | ||||
| 26914 | # generous, and prevents | ||||
| 26915 | # wasting lots of | ||||
| 26916 | # time in mangled files | ||||
| 26917 | if ( defined($rpre_types) && @$rpre_types ) { | ||||
| 26918 | push @pre_types, @$rpre_types; | ||||
| 26919 | push @pre_tokens, @$rpre_tokens; | ||||
| 26920 | } | ||||
| 26921 | |||||
| 26922 | # put a sentinel token to simplify stopping the search | ||||
| 26923 | push @pre_types, '}'; | ||||
| 26924 | |||||
| 26925 | my $jbeg = 0; | ||||
| 26926 | $jbeg = 1 if $pre_types[0] eq 'b'; | ||||
| 26927 | |||||
| 26928 | # first look for one of these | ||||
| 26929 | # - bareword | ||||
| 26930 | # - bareword with leading - | ||||
| 26931 | # - digit | ||||
| 26932 | # - quoted string | ||||
| 26933 | my $j = $jbeg; | ||||
| 26934 | if ( $pre_types[$j] =~ /^[\'\"]/ ) { | ||||
| 26935 | |||||
| 26936 | # find the closing quote; don't worry about escapes | ||||
| 26937 | my $quote_mark = $pre_types[$j]; | ||||
| 26938 | for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { | ||||
| 26939 | if ( $pre_types[$k] eq $quote_mark ) { | ||||
| 26940 | $j = $k + 1; | ||||
| 26941 | my $next = $pre_types[$j]; | ||||
| 26942 | last; | ||||
| 26943 | } | ||||
| 26944 | } | ||||
| 26945 | } | ||||
| 26946 | elsif ( $pre_types[$j] eq 'd' ) { | ||||
| 26947 | $j++; | ||||
| 26948 | } | ||||
| 26949 | elsif ( $pre_types[$j] eq 'w' ) { | ||||
| 26950 | unless ( $is_keyword{ $pre_tokens[$j] } ) { | ||||
| 26951 | $j++; | ||||
| 26952 | } | ||||
| 26953 | } | ||||
| 26954 | elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { | ||||
| 26955 | $j++; | ||||
| 26956 | } | ||||
| 26957 | if ( $j > $jbeg ) { | ||||
| 26958 | |||||
| 26959 | $j++ if $pre_types[$j] eq 'b'; | ||||
| 26960 | |||||
| 26961 | # it's a hash ref if a comma or => follow next | ||||
| 26962 | if ( $pre_types[$j] eq ',' | ||||
| 26963 | || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) | ||||
| 26964 | { | ||||
| 26965 | $code_block_type = ""; | ||||
| 26966 | } | ||||
| 26967 | } | ||||
| 26968 | } | ||||
| 26969 | |||||
| 26970 | return $code_block_type; | ||||
| 26971 | } | ||||
| 26972 | |||||
| 26973 | sub unexpected { | ||||
| 26974 | |||||
| 26975 | # report unexpected token type and show where it is | ||||
| 26976 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
| 26977 | my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, | ||||
| 26978 | $rpretoken_type, $input_line ) | ||||
| 26979 | = @_; | ||||
| 26980 | |||||
| 26981 | if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { | ||||
| 26982 | my $msg = "found $found where $expecting expected"; | ||||
| 26983 | my $pos = $$rpretoken_map[$i_tok]; | ||||
| 26984 | interrupt_logfile(); | ||||
| 26985 | my $input_line_number = $tokenizer_self->{_last_line_number}; | ||||
| 26986 | my ( $offset, $numbered_line, $underline ) = | ||||
| 26987 | make_numbered_line( $input_line_number, $input_line, $pos ); | ||||
| 26988 | $underline = write_on_underline( $underline, $pos - $offset, '^' ); | ||||
| 26989 | |||||
| 26990 | my $trailer = ""; | ||||
| 26991 | if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { | ||||
| 26992 | my $pos_prev = $$rpretoken_map[$last_nonblank_i]; | ||||
| 26993 | my $num; | ||||
| 26994 | if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { | ||||
| 26995 | $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; | ||||
| 26996 | } | ||||
| 26997 | else { | ||||
| 26998 | $num = $pos - $pos_prev; | ||||
| 26999 | } | ||||
| 27000 | if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } | ||||
| 27001 | |||||
| 27002 | $underline = | ||||
| 27003 | write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); | ||||
| 27004 | $trailer = " (previous token underlined)"; | ||||
| 27005 | } | ||||
| 27006 | warning( $numbered_line . "\n" ); | ||||
| 27007 | warning( $underline . "\n" ); | ||||
| 27008 | warning( $msg . $trailer . "\n" ); | ||||
| 27009 | resume_logfile(); | ||||
| 27010 | } | ||||
| 27011 | } | ||||
| 27012 | |||||
| 27013 | sub is_non_structural_brace { | ||||
| 27014 | |||||
| 27015 | # Decide if a brace or bracket is structural or non-structural | ||||
| 27016 | # by looking at the previous token and type | ||||
| 27017 | # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token | ||||
| 27018 | |||||
| 27019 | # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. | ||||
| 27020 | # Tentatively deactivated because it caused the wrong operator expectation | ||||
| 27021 | # for this code: | ||||
| 27022 | # $user = @vars[1] / 100; | ||||
| 27023 | # Must update sub operator_expected before re-implementing. | ||||
| 27024 | # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { | ||||
| 27025 | # return 0; | ||||
| 27026 | # } | ||||
| 27027 | |||||
| 27028 | # NOTE: braces after type characters start code blocks, but for | ||||
| 27029 | # simplicity these are not identified as such. See also | ||||
| 27030 | # sub code_block_type | ||||
| 27031 | |||||
| 27032 | ##if ($last_nonblank_type eq 't') {return 0} | ||||
| 27033 | |||||
| 27034 | # otherwise, it is non-structural if it is decorated | ||||
| 27035 | # by type information. | ||||
| 27036 | # For example, the '{' here is non-structural: ${xxx} | ||||
| 27037 | ( | ||||
| 27038 | $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ | ||||
| 27039 | |||||
| 27040 | # or if we follow a hash or array closing curly brace or bracket | ||||
| 27041 | # For example, the second '{' in this is non-structural: $a{'x'}{'y'} | ||||
| 27042 | # because the first '}' would have been given type 'R' | ||||
| 27043 | || $last_nonblank_type =~ /^([R\]])$/ | ||||
| 27044 | ); | ||||
| 27045 | } | ||||
| 27046 | |||||
| 27047 | #########i############################################################# | ||||
| 27048 | # Tokenizer routines for tracking container nesting depths | ||||
| 27049 | ####################################################################### | ||||
| 27050 | |||||
| 27051 | # The following routines keep track of nesting depths of the nesting | ||||
| 27052 | # types, ( [ { and ?. This is necessary for determining the indentation | ||||
| 27053 | # level, and also for debugging programs. Not only do they keep track of | ||||
| 27054 | # nesting depths of the individual brace types, but they check that each | ||||
| 27055 | # of the other brace types is balanced within matching pairs. For | ||||
| 27056 | # example, if the program sees this sequence: | ||||
| 27057 | # | ||||
| 27058 | # { ( ( ) } | ||||
| 27059 | # | ||||
| 27060 | # then it can determine that there is an extra left paren somewhere | ||||
| 27061 | # between the { and the }. And so on with every other possible | ||||
| 27062 | # combination of outer and inner brace types. For another | ||||
| 27063 | # example: | ||||
| 27064 | # | ||||
| 27065 | # ( [ ..... ] ] ) | ||||
| 27066 | # | ||||
| 27067 | # which has an extra ] within the parens. | ||||
| 27068 | # | ||||
| 27069 | # The brace types have indexes 0 .. 3 which are indexes into | ||||
| 27070 | # the matrices. | ||||
| 27071 | # | ||||
| 27072 | # The pair ? : are treated as just another nesting type, with ? acting | ||||
| 27073 | # as the opening brace and : acting as the closing brace. | ||||
| 27074 | # | ||||
| 27075 | # The matrix | ||||
| 27076 | # | ||||
| 27077 | # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; | ||||
| 27078 | # | ||||
| 27079 | # saves the nesting depth of brace type $b (where $b is either of the other | ||||
| 27080 | # nesting types) when brace type $a enters a new depth. When this depth | ||||
| 27081 | # decreases, a check is made that the current depth of brace types $b is | ||||
| 27082 | # unchanged, or otherwise there must have been an error. This can | ||||
| 27083 | # be very useful for localizing errors, particularly when perl runs to | ||||
| 27084 | # the end of a large file (such as this one) and announces that there | ||||
| 27085 | # is a problem somewhere. | ||||
| 27086 | # | ||||
| 27087 | # A numerical sequence number is maintained for every nesting type, | ||||
| 27088 | # so that each matching pair can be uniquely identified in a simple | ||||
| 27089 | # way. | ||||
| 27090 | |||||
| 27091 | sub increase_nesting_depth { | ||||
| 27092 | my ( $aa, $pos ) = @_; | ||||
| 27093 | |||||
| 27094 | # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, | ||||
| 27095 | # @current_sequence_number, @depth_array, @starting_line_of_current_depth, | ||||
| 27096 | # $statement_type | ||||
| 27097 | my $bb; | ||||
| 27098 | $current_depth[$aa]++; | ||||
| 27099 | $total_depth++; | ||||
| 27100 | $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; | ||||
| 27101 | my $input_line_number = $tokenizer_self->{_last_line_number}; | ||||
| 27102 | my $input_line = $tokenizer_self->{_line_text}; | ||||
| 27103 | |||||
| 27104 | # Sequence numbers increment by number of items. This keeps | ||||
| 27105 | # a unique set of numbers but still allows the relative location | ||||
| 27106 | # of any type to be determined. | ||||
| 27107 | $nesting_sequence_number[$aa] += scalar(@closing_brace_names); | ||||
| 27108 | my $seqno = $nesting_sequence_number[$aa]; | ||||
| 27109 | $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; | ||||
| 27110 | |||||
| 27111 | $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = | ||||
| 27112 | [ $input_line_number, $input_line, $pos ]; | ||||
| 27113 | |||||
| 27114 | for $bb ( 0 .. $#closing_brace_names ) { | ||||
| 27115 | next if ( $bb == $aa ); | ||||
| 27116 | $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; | ||||
| 27117 | } | ||||
| 27118 | |||||
| 27119 | # set a flag for indenting a nested ternary statement | ||||
| 27120 | my $indent = 0; | ||||
| 27121 | if ( $aa == QUESTION_COLON ) { | ||||
| 27122 | $nested_ternary_flag[ $current_depth[$aa] ] = 0; | ||||
| 27123 | if ( $current_depth[$aa] > 1 ) { | ||||
| 27124 | if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { | ||||
| 27125 | my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; | ||||
| 27126 | if ( $pdepth == $total_depth - 1 ) { | ||||
| 27127 | $indent = 1; | ||||
| 27128 | $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; | ||||
| 27129 | } | ||||
| 27130 | } | ||||
| 27131 | } | ||||
| 27132 | } | ||||
| 27133 | $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; | ||||
| 27134 | $statement_type = ""; | ||||
| 27135 | return ( $seqno, $indent ); | ||||
| 27136 | } | ||||
| 27137 | |||||
| 27138 | sub decrease_nesting_depth { | ||||
| 27139 | |||||
| 27140 | my ( $aa, $pos ) = @_; | ||||
| 27141 | |||||
| 27142 | # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, | ||||
| 27143 | # @current_sequence_number, @depth_array, @starting_line_of_current_depth | ||||
| 27144 | # $statement_type | ||||
| 27145 | my $bb; | ||||
| 27146 | my $seqno = 0; | ||||
| 27147 | my $input_line_number = $tokenizer_self->{_last_line_number}; | ||||
| 27148 | my $input_line = $tokenizer_self->{_line_text}; | ||||
| 27149 | |||||
| 27150 | my $outdent = 0; | ||||
| 27151 | $total_depth--; | ||||
| 27152 | if ( $current_depth[$aa] > 0 ) { | ||||
| 27153 | |||||
| 27154 | # set a flag for un-indenting after seeing a nested ternary statement | ||||
| 27155 | $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; | ||||
| 27156 | if ( $aa == QUESTION_COLON ) { | ||||
| 27157 | $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; | ||||
| 27158 | } | ||||
| 27159 | $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; | ||||
| 27160 | |||||
| 27161 | # check that any brace types $bb contained within are balanced | ||||
| 27162 | for $bb ( 0 .. $#closing_brace_names ) { | ||||
| 27163 | next if ( $bb == $aa ); | ||||
| 27164 | |||||
| 27165 | unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == | ||||
| 27166 | $current_depth[$bb] ) | ||||
| 27167 | { | ||||
| 27168 | my $diff = | ||||
| 27169 | $current_depth[$bb] - | ||||
| 27170 | $depth_array[$aa][$bb][ $current_depth[$aa] ]; | ||||
| 27171 | |||||
| 27172 | # don't whine too many times | ||||
| 27173 | my $saw_brace_error = get_saw_brace_error(); | ||||
| 27174 | if ( | ||||
| 27175 | $saw_brace_error <= MAX_NAG_MESSAGES | ||||
| 27176 | |||||
| 27177 | # if too many closing types have occurred, we probably | ||||
| 27178 | # already caught this error | ||||
| 27179 | && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) | ||||
| 27180 | ) | ||||
| 27181 | { | ||||
| 27182 | interrupt_logfile(); | ||||
| 27183 | my $rsl = | ||||
| 27184 | $starting_line_of_current_depth[$aa] | ||||
| 27185 | [ $current_depth[$aa] ]; | ||||
| 27186 | my $sl = $$rsl[0]; | ||||
| 27187 | my $rel = [ $input_line_number, $input_line, $pos ]; | ||||
| 27188 | my $el = $$rel[0]; | ||||
| 27189 | my ($ess); | ||||
| 27190 | |||||
| 27191 | if ( $diff == 1 || $diff == -1 ) { | ||||
| 27192 | $ess = ''; | ||||
| 27193 | } | ||||
| 27194 | else { | ||||
| 27195 | $ess = 's'; | ||||
| 27196 | } | ||||
| 27197 | my $bname = | ||||
| 27198 | ( $diff > 0 ) | ||||
| 27199 | ? $opening_brace_names[$bb] | ||||
| 27200 | : $closing_brace_names[$bb]; | ||||
| 27201 | write_error_indicator_pair( @$rsl, '^' ); | ||||
| 27202 | my $msg = <<"EOM"; | ||||
| 27203 | Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el | ||||
| 27204 | EOM | ||||
| 27205 | |||||
| 27206 | if ( $diff > 0 ) { | ||||
| 27207 | my $rml = | ||||
| 27208 | $starting_line_of_current_depth[$bb] | ||||
| 27209 | [ $current_depth[$bb] ]; | ||||
| 27210 | my $ml = $$rml[0]; | ||||
| 27211 | $msg .= | ||||
| 27212 | " The most recent un-matched $bname is on line $ml\n"; | ||||
| 27213 | write_error_indicator_pair( @$rml, '^' ); | ||||
| 27214 | } | ||||
| 27215 | write_error_indicator_pair( @$rel, '^' ); | ||||
| 27216 | warning($msg); | ||||
| 27217 | resume_logfile(); | ||||
| 27218 | } | ||||
| 27219 | increment_brace_error(); | ||||
| 27220 | } | ||||
| 27221 | } | ||||
| 27222 | $current_depth[$aa]--; | ||||
| 27223 | } | ||||
| 27224 | else { | ||||
| 27225 | |||||
| 27226 | my $saw_brace_error = get_saw_brace_error(); | ||||
| 27227 | if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { | ||||
| 27228 | my $msg = <<"EOM"; | ||||
| 27229 | There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number | ||||
| 27230 | EOM | ||||
| 27231 | indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); | ||||
| 27232 | } | ||||
| 27233 | increment_brace_error(); | ||||
| 27234 | } | ||||
| 27235 | return ( $seqno, $outdent ); | ||||
| 27236 | } | ||||
| 27237 | |||||
| 27238 | sub check_final_nesting_depths { | ||||
| 27239 | my ($aa); | ||||
| 27240 | |||||
| 27241 | # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth | ||||
| 27242 | |||||
| 27243 | for $aa ( 0 .. $#closing_brace_names ) { | ||||
| 27244 | |||||
| 27245 | if ( $current_depth[$aa] ) { | ||||
| 27246 | my $rsl = | ||||
| 27247 | $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; | ||||
| 27248 | my $sl = $$rsl[0]; | ||||
| 27249 | my $msg = <<"EOM"; | ||||
| 27250 | Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] | ||||
| 27251 | The most recent un-matched $opening_brace_names[$aa] is on line $sl | ||||
| 27252 | EOM | ||||
| 27253 | indicate_error( $msg, @$rsl, '^' ); | ||||
| 27254 | increment_brace_error(); | ||||
| 27255 | } | ||||
| 27256 | } | ||||
| 27257 | } | ||||
| 27258 | |||||
| 27259 | #########i############################################################# | ||||
| 27260 | # Tokenizer routines for looking ahead in input stream | ||||
| 27261 | ####################################################################### | ||||
| 27262 | |||||
| 27263 | sub peek_ahead_for_n_nonblank_pre_tokens { | ||||
| 27264 | |||||
| 27265 | # returns next n pretokens if they exist | ||||
| 27266 | # returns undef's if hits eof without seeing any pretokens | ||||
| 27267 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
| 27268 | my $max_pretokens = shift; | ||||
| 27269 | my $line; | ||||
| 27270 | my $i = 0; | ||||
| 27271 | my ( $rpre_tokens, $rmap, $rpre_types ); | ||||
| 27272 | |||||
| 27273 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | ||||
| 27274 | { | ||||
| 27275 | $line =~ s/^\s*//; # trim leading blanks | ||||
| 27276 | next if ( length($line) <= 0 ); # skip blank | ||||
| 27277 | next if ( $line =~ /^#/ ); # skip comment | ||||
| 27278 | ( $rpre_tokens, $rmap, $rpre_types ) = | ||||
| 27279 | pre_tokenize( $line, $max_pretokens ); | ||||
| 27280 | last; | ||||
| 27281 | } | ||||
| 27282 | return ( $rpre_tokens, $rpre_types ); | ||||
| 27283 | } | ||||
| 27284 | |||||
| 27285 | # look ahead for next non-blank, non-comment line of code | ||||
| 27286 | sub peek_ahead_for_nonblank_token { | ||||
| 27287 | |||||
| 27288 | # USES GLOBAL VARIABLES: $tokenizer_self | ||||
| 27289 | my ( $rtokens, $max_token_index ) = @_; | ||||
| 27290 | my $line; | ||||
| 27291 | my $i = 0; | ||||
| 27292 | |||||
| 27293 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) | ||||
| 27294 | { | ||||
| 27295 | $line =~ s/^\s*//; # trim leading blanks | ||||
| 27296 | next if ( length($line) <= 0 ); # skip blank | ||||
| 27297 | next if ( $line =~ /^#/ ); # skip comment | ||||
| 27298 | my ( $rtok, $rmap, $rtype ) = | ||||
| 27299 | pre_tokenize( $line, 2 ); # only need 2 pre-tokens | ||||
| 27300 | my $j = $max_token_index + 1; | ||||
| 27301 | my $tok; | ||||
| 27302 | |||||
| 27303 | foreach $tok (@$rtok) { | ||||
| 27304 | last if ( $tok =~ "\n" ); | ||||
| 27305 | $$rtokens[ ++$j ] = $tok; | ||||
| 27306 | } | ||||
| 27307 | last; | ||||
| 27308 | } | ||||
| 27309 | return $rtokens; | ||||
| 27310 | } | ||||
| 27311 | |||||
| 27312 | #########i############################################################# | ||||
| 27313 | # Tokenizer guessing routines for ambiguous situations | ||||
| 27314 | ####################################################################### | ||||
| 27315 | |||||
| 27316 | sub guess_if_pattern_or_conditional { | ||||
| 27317 | |||||
| 27318 | # this routine is called when we have encountered a ? following an | ||||
| 27319 | # unknown bareword, and we must decide if it starts a pattern or not | ||||
| 27320 | # input parameters: | ||||
| 27321 | # $i - token index of the ? starting possible pattern | ||||
| 27322 | # output parameters: | ||||
| 27323 | # $is_pattern = 0 if probably not pattern, =1 if probably a pattern | ||||
| 27324 | # msg = a warning or diagnostic message | ||||
| 27325 | # USES GLOBAL VARIABLES: $last_nonblank_token | ||||
| 27326 | my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; | ||||
| 27327 | my $is_pattern = 0; | ||||
| 27328 | my $msg = "guessing that ? after $last_nonblank_token starts a "; | ||||
| 27329 | |||||
| 27330 | if ( $i >= $max_token_index ) { | ||||
| 27331 | $msg .= "conditional (no end to pattern found on the line)\n"; | ||||
| 27332 | } | ||||
| 27333 | else { | ||||
| 27334 | my $ibeg = $i; | ||||
| 27335 | $i = $ibeg + 1; | ||||
| 27336 | my $next_token = $$rtokens[$i]; # first token after ? | ||||
| 27337 | |||||
| 27338 | # look for a possible ending ? on this line.. | ||||
| 27339 | my $in_quote = 1; | ||||
| 27340 | my $quote_depth = 0; | ||||
| 27341 | my $quote_character = ''; | ||||
| 27342 | my $quote_pos = 0; | ||||
| 27343 | my $quoted_string; | ||||
| 27344 | ( | ||||
| 27345 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
| 27346 | $quoted_string | ||||
| 27347 | ) | ||||
| 27348 | = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | ||||
| 27349 | $quote_pos, $quote_depth, $max_token_index ); | ||||
| 27350 | |||||
| 27351 | if ($in_quote) { | ||||
| 27352 | |||||
| 27353 | # we didn't find an ending ? on this line, | ||||
| 27354 | # so we bias towards conditional | ||||
| 27355 | $is_pattern = 0; | ||||
| 27356 | $msg .= "conditional (no ending ? on this line)\n"; | ||||
| 27357 | |||||
| 27358 | # we found an ending ?, so we bias towards a pattern | ||||
| 27359 | } | ||||
| 27360 | else { | ||||
| 27361 | |||||
| 27362 | if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { | ||||
| 27363 | $is_pattern = 1; | ||||
| 27364 | $msg .= "pattern (found ending ? and pattern expected)\n"; | ||||
| 27365 | } | ||||
| 27366 | else { | ||||
| 27367 | $msg .= "pattern (uncertain, but found ending ?)\n"; | ||||
| 27368 | } | ||||
| 27369 | } | ||||
| 27370 | } | ||||
| 27371 | return ( $is_pattern, $msg ); | ||||
| 27372 | } | ||||
| 27373 | |||||
| 27374 | sub guess_if_pattern_or_division { | ||||
| 27375 | |||||
| 27376 | # this routine is called when we have encountered a / following an | ||||
| 27377 | # unknown bareword, and we must decide if it starts a pattern or is a | ||||
| 27378 | # division | ||||
| 27379 | # input parameters: | ||||
| 27380 | # $i - token index of the / starting possible pattern | ||||
| 27381 | # output parameters: | ||||
| 27382 | # $is_pattern = 0 if probably division, =1 if probably a pattern | ||||
| 27383 | # msg = a warning or diagnostic message | ||||
| 27384 | # USES GLOBAL VARIABLES: $last_nonblank_token | ||||
| 27385 | my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; | ||||
| 27386 | my $is_pattern = 0; | ||||
| 27387 | my $msg = "guessing that / after $last_nonblank_token starts a "; | ||||
| 27388 | |||||
| 27389 | if ( $i >= $max_token_index ) { | ||||
| 27390 | $msg .= "division (no end to pattern found on the line)\n"; | ||||
| 27391 | } | ||||
| 27392 | else { | ||||
| 27393 | my $ibeg = $i; | ||||
| 27394 | my $divide_expected = | ||||
| 27395 | numerator_expected( $i, $rtokens, $max_token_index ); | ||||
| 27396 | $i = $ibeg + 1; | ||||
| 27397 | my $next_token = $$rtokens[$i]; # first token after slash | ||||
| 27398 | |||||
| 27399 | # look for a possible ending / on this line.. | ||||
| 27400 | my $in_quote = 1; | ||||
| 27401 | my $quote_depth = 0; | ||||
| 27402 | my $quote_character = ''; | ||||
| 27403 | my $quote_pos = 0; | ||||
| 27404 | my $quoted_string; | ||||
| 27405 | ( | ||||
| 27406 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
| 27407 | $quoted_string | ||||
| 27408 | ) | ||||
| 27409 | = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | ||||
| 27410 | $quote_pos, $quote_depth, $max_token_index ); | ||||
| 27411 | |||||
| 27412 | if ($in_quote) { | ||||
| 27413 | |||||
| 27414 | # we didn't find an ending / on this line, | ||||
| 27415 | # so we bias towards division | ||||
| 27416 | if ( $divide_expected >= 0 ) { | ||||
| 27417 | $is_pattern = 0; | ||||
| 27418 | $msg .= "division (no ending / on this line)\n"; | ||||
| 27419 | } | ||||
| 27420 | else { | ||||
| 27421 | $msg = "multi-line pattern (division not possible)\n"; | ||||
| 27422 | $is_pattern = 1; | ||||
| 27423 | } | ||||
| 27424 | |||||
| 27425 | } | ||||
| 27426 | |||||
| 27427 | # we found an ending /, so we bias towards a pattern | ||||
| 27428 | else { | ||||
| 27429 | |||||
| 27430 | if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { | ||||
| 27431 | |||||
| 27432 | if ( $divide_expected >= 0 ) { | ||||
| 27433 | |||||
| 27434 | if ( $i - $ibeg > 60 ) { | ||||
| 27435 | $msg .= "division (matching / too distant)\n"; | ||||
| 27436 | $is_pattern = 0; | ||||
| 27437 | } | ||||
| 27438 | else { | ||||
| 27439 | $msg .= "pattern (but division possible too)\n"; | ||||
| 27440 | $is_pattern = 1; | ||||
| 27441 | } | ||||
| 27442 | } | ||||
| 27443 | else { | ||||
| 27444 | $is_pattern = 1; | ||||
| 27445 | $msg .= "pattern (division not possible)\n"; | ||||
| 27446 | } | ||||
| 27447 | } | ||||
| 27448 | else { | ||||
| 27449 | |||||
| 27450 | if ( $divide_expected >= 0 ) { | ||||
| 27451 | $is_pattern = 0; | ||||
| 27452 | $msg .= "division (pattern not possible)\n"; | ||||
| 27453 | } | ||||
| 27454 | else { | ||||
| 27455 | $is_pattern = 1; | ||||
| 27456 | $msg .= | ||||
| 27457 | "pattern (uncertain, but division would not work here)\n"; | ||||
| 27458 | } | ||||
| 27459 | } | ||||
| 27460 | } | ||||
| 27461 | } | ||||
| 27462 | return ( $is_pattern, $msg ); | ||||
| 27463 | } | ||||
| 27464 | |||||
| 27465 | # try to resolve here-doc vs. shift by looking ahead for | ||||
| 27466 | # non-code or the end token (currently only looks for end token) | ||||
| 27467 | # returns 1 if it is probably a here doc, 0 if not | ||||
| 27468 | sub guess_if_here_doc { | ||||
| 27469 | |||||
| 27470 | # This is how many lines we will search for a target as part of the | ||||
| 27471 | # guessing strategy. It is a constant because there is probably | ||||
| 27472 | # little reason to change it. | ||||
| 27473 | # USES GLOBAL VARIABLES: $tokenizer_self, $current_package | ||||
| 27474 | # %is_constant, | ||||
| 27475 | 2 | 7.55ms | 2 | 156µs | # spent 86µs (16+70) within Perl::Tidy::Tokenizer::BEGIN@27475 which was called:
# once (16µs+70µs) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 27475 # spent 86µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@27475
# spent 70µs making 1 call to constant::import |
| 27476 | |||||
| 27477 | my $next_token = shift; | ||||
| 27478 | my $here_doc_expected = 0; | ||||
| 27479 | my $line; | ||||
| 27480 | my $k = 0; | ||||
| 27481 | my $msg = "checking <<"; | ||||
| 27482 | |||||
| 27483 | while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) | ||||
| 27484 | { | ||||
| 27485 | chomp $line; | ||||
| 27486 | |||||
| 27487 | if ( $line =~ /^$next_token$/ ) { | ||||
| 27488 | $msg .= " -- found target $next_token ahead $k lines\n"; | ||||
| 27489 | $here_doc_expected = 1; # got it | ||||
| 27490 | last; | ||||
| 27491 | } | ||||
| 27492 | last if ( $k >= HERE_DOC_WINDOW ); | ||||
| 27493 | } | ||||
| 27494 | |||||
| 27495 | unless ($here_doc_expected) { | ||||
| 27496 | |||||
| 27497 | if ( !defined($line) ) { | ||||
| 27498 | $here_doc_expected = -1; # hit eof without seeing target | ||||
| 27499 | $msg .= " -- must be shift; target $next_token not in file\n"; | ||||
| 27500 | |||||
| 27501 | } | ||||
| 27502 | else { # still unsure..taking a wild guess | ||||
| 27503 | |||||
| 27504 | if ( !$is_constant{$current_package}{$next_token} ) { | ||||
| 27505 | $here_doc_expected = 1; | ||||
| 27506 | $msg .= | ||||
| 27507 | " -- guessing it's a here-doc ($next_token not a constant)\n"; | ||||
| 27508 | } | ||||
| 27509 | else { | ||||
| 27510 | $msg .= | ||||
| 27511 | " -- guessing it's a shift ($next_token is a constant)\n"; | ||||
| 27512 | } | ||||
| 27513 | } | ||||
| 27514 | } | ||||
| 27515 | write_logfile_entry($msg); | ||||
| 27516 | return $here_doc_expected; | ||||
| 27517 | } | ||||
| 27518 | |||||
| 27519 | #########i############################################################# | ||||
| 27520 | # Tokenizer Routines for scanning identifiers and related items | ||||
| 27521 | ####################################################################### | ||||
| 27522 | |||||
| 27523 | sub scan_bare_identifier_do { | ||||
| 27524 | |||||
| 27525 | # this routine is called to scan a token starting with an alphanumeric | ||||
| 27526 | # variable or package separator, :: or '. | ||||
| 27527 | # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, | ||||
| 27528 | # $last_nonblank_type,@paren_type, $paren_depth | ||||
| 27529 | |||||
| 27530 | my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, | ||||
| 27531 | $max_token_index ) | ||||
| 27532 | = @_; | ||||
| 27533 | my $i_begin = $i; | ||||
| 27534 | my $package = undef; | ||||
| 27535 | |||||
| 27536 | my $i_beg = $i; | ||||
| 27537 | |||||
| 27538 | # we have to back up one pretoken at a :: since each : is one pretoken | ||||
| 27539 | if ( $tok eq '::' ) { $i_beg-- } | ||||
| 27540 | if ( $tok eq '->' ) { $i_beg-- } | ||||
| 27541 | my $pos_beg = $$rtoken_map[$i_beg]; | ||||
| 27542 | pos($input_line) = $pos_beg; | ||||
| 27543 | |||||
| 27544 | # Examples: | ||||
| 27545 | # A::B::C | ||||
| 27546 | # A:: | ||||
| 27547 | # ::A | ||||
| 27548 | # A'B | ||||
| 27549 | if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { | ||||
| 27550 | |||||
| 27551 | my $pos = pos($input_line); | ||||
| 27552 | my $numc = $pos - $pos_beg; | ||||
| 27553 | $tok = substr( $input_line, $pos_beg, $numc ); | ||||
| 27554 | |||||
| 27555 | # type 'w' includes anything without leading type info | ||||
| 27556 | # ($,%,@,*) including something like abc::def::ghi | ||||
| 27557 | $type = 'w'; | ||||
| 27558 | |||||
| 27559 | my $sub_name = ""; | ||||
| 27560 | if ( defined($2) ) { $sub_name = $2; } | ||||
| 27561 | if ( defined($1) ) { | ||||
| 27562 | $package = $1; | ||||
| 27563 | |||||
| 27564 | # patch: don't allow isolated package name which just ends | ||||
| 27565 | # in the old style package separator (single quote). Example: | ||||
| 27566 | # use CGI':all'; | ||||
| 27567 | if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { | ||||
| 27568 | $pos--; | ||||
| 27569 | } | ||||
| 27570 | |||||
| 27571 | $package =~ s/\'/::/g; | ||||
| 27572 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | ||||
| 27573 | $package =~ s/::$//; | ||||
| 27574 | } | ||||
| 27575 | else { | ||||
| 27576 | $package = $current_package; | ||||
| 27577 | |||||
| 27578 | if ( $is_keyword{$tok} ) { | ||||
| 27579 | $type = 'k'; | ||||
| 27580 | } | ||||
| 27581 | } | ||||
| 27582 | |||||
| 27583 | # if it is a bareword.. | ||||
| 27584 | if ( $type eq 'w' ) { | ||||
| 27585 | |||||
| 27586 | # check for v-string with leading 'v' type character | ||||
| 27587 | # (This seems to have precedence over filehandle, type 'Y') | ||||
| 27588 | if ( $tok =~ /^v\d[_\d]*$/ ) { | ||||
| 27589 | |||||
| 27590 | # we only have the first part - something like 'v101' - | ||||
| 27591 | # look for more | ||||
| 27592 | if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { | ||||
| 27593 | $pos = pos($input_line); | ||||
| 27594 | $numc = $pos - $pos_beg; | ||||
| 27595 | $tok = substr( $input_line, $pos_beg, $numc ); | ||||
| 27596 | } | ||||
| 27597 | $type = 'v'; | ||||
| 27598 | |||||
| 27599 | # warn if this version can't handle v-strings | ||||
| 27600 | report_v_string($tok); | ||||
| 27601 | } | ||||
| 27602 | |||||
| 27603 | elsif ( $is_constant{$package}{$sub_name} ) { | ||||
| 27604 | $type = 'C'; | ||||
| 27605 | } | ||||
| 27606 | |||||
| 27607 | # bareword after sort has implied empty prototype; for example: | ||||
| 27608 | # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); | ||||
| 27609 | # This has priority over whatever the user has specified. | ||||
| 27610 | elsif ($last_nonblank_token eq 'sort' | ||||
| 27611 | && $last_nonblank_type eq 'k' ) | ||||
| 27612 | { | ||||
| 27613 | $type = 'Z'; | ||||
| 27614 | } | ||||
| 27615 | |||||
| 27616 | # Note: strangely, perl does not seem to really let you create | ||||
| 27617 | # functions which act like eval and do, in the sense that eval | ||||
| 27618 | # and do may have operators following the final }, but any operators | ||||
| 27619 | # that you create with prototype (&) apparently do not allow | ||||
| 27620 | # trailing operators, only terms. This seems strange. | ||||
| 27621 | # If this ever changes, here is the update | ||||
| 27622 | # to make perltidy behave accordingly: | ||||
| 27623 | |||||
| 27624 | # elsif ( $is_block_function{$package}{$tok} ) { | ||||
| 27625 | # $tok='eval'; # patch to do braces like eval - doesn't work | ||||
| 27626 | # $type = 'k'; | ||||
| 27627 | #} | ||||
| 27628 | # FIXME: This could become a separate type to allow for different | ||||
| 27629 | # future behavior: | ||||
| 27630 | elsif ( $is_block_function{$package}{$sub_name} ) { | ||||
| 27631 | $type = 'G'; | ||||
| 27632 | } | ||||
| 27633 | |||||
| 27634 | elsif ( $is_block_list_function{$package}{$sub_name} ) { | ||||
| 27635 | $type = 'G'; | ||||
| 27636 | } | ||||
| 27637 | elsif ( $is_user_function{$package}{$sub_name} ) { | ||||
| 27638 | $type = 'U'; | ||||
| 27639 | $prototype = $user_function_prototype{$package}{$sub_name}; | ||||
| 27640 | } | ||||
| 27641 | |||||
| 27642 | # check for indirect object | ||||
| 27643 | elsif ( | ||||
| 27644 | |||||
| 27645 | # added 2001-03-27: must not be followed immediately by '(' | ||||
| 27646 | # see fhandle.t | ||||
| 27647 | ( $input_line !~ m/\G\(/gc ) | ||||
| 27648 | |||||
| 27649 | # and | ||||
| 27650 | && ( | ||||
| 27651 | |||||
| 27652 | # preceded by keyword like 'print', 'printf' and friends | ||||
| 27653 | $is_indirect_object_taker{$last_nonblank_token} | ||||
| 27654 | |||||
| 27655 | # or preceded by something like 'print(' or 'printf(' | ||||
| 27656 | || ( | ||||
| 27657 | ( $last_nonblank_token eq '(' ) | ||||
| 27658 | && $is_indirect_object_taker{ $paren_type[$paren_depth] | ||||
| 27659 | } | ||||
| 27660 | |||||
| 27661 | ) | ||||
| 27662 | ) | ||||
| 27663 | ) | ||||
| 27664 | { | ||||
| 27665 | |||||
| 27666 | # may not be indirect object unless followed by a space | ||||
| 27667 | if ( $input_line =~ m/\G\s+/gc ) { | ||||
| 27668 | $type = 'Y'; | ||||
| 27669 | |||||
| 27670 | # Abandon Hope ... | ||||
| 27671 | # Perl's indirect object notation is a very bad | ||||
| 27672 | # thing and can cause subtle bugs, especially for | ||||
| 27673 | # beginning programmers. And I haven't even been | ||||
| 27674 | # able to figure out a sane warning scheme which | ||||
| 27675 | # doesn't get in the way of good scripts. | ||||
| 27676 | |||||
| 27677 | # Complain if a filehandle has any lower case | ||||
| 27678 | # letters. This is suggested good practice. | ||||
| 27679 | # Use 'sub_name' because something like | ||||
| 27680 | # main::MYHANDLE is ok for filehandle | ||||
| 27681 | if ( $sub_name =~ /[a-z]/ ) { | ||||
| 27682 | |||||
| 27683 | # could be bug caused by older perltidy if | ||||
| 27684 | # followed by '(' | ||||
| 27685 | if ( $input_line =~ m/\G\s*\(/gc ) { | ||||
| 27686 | complain( | ||||
| 27687 | "Caution: unknown word '$tok' in indirect object slot\n" | ||||
| 27688 | ); | ||||
| 27689 | } | ||||
| 27690 | } | ||||
| 27691 | } | ||||
| 27692 | |||||
| 27693 | # bareword not followed by a space -- may not be filehandle | ||||
| 27694 | # (may be function call defined in a 'use' statement) | ||||
| 27695 | else { | ||||
| 27696 | $type = 'Z'; | ||||
| 27697 | } | ||||
| 27698 | } | ||||
| 27699 | } | ||||
| 27700 | |||||
| 27701 | # Now we must convert back from character position | ||||
| 27702 | # to pre_token index. | ||||
| 27703 | # I don't think an error flag can occur here ..but who knows | ||||
| 27704 | my $error; | ||||
| 27705 | ( $i, $error ) = | ||||
| 27706 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
| 27707 | if ($error) { | ||||
| 27708 | warning("scan_bare_identifier: Possibly invalid tokenization\n"); | ||||
| 27709 | } | ||||
| 27710 | } | ||||
| 27711 | |||||
| 27712 | # no match but line not blank - could be syntax error | ||||
| 27713 | # perl will take '::' alone without complaint | ||||
| 27714 | else { | ||||
| 27715 | $type = 'w'; | ||||
| 27716 | |||||
| 27717 | # change this warning to log message if it becomes annoying | ||||
| 27718 | warning("didn't find identifier after leading ::\n"); | ||||
| 27719 | } | ||||
| 27720 | return ( $i, $tok, $type, $prototype ); | ||||
| 27721 | } | ||||
| 27722 | |||||
| 27723 | sub scan_id_do { | ||||
| 27724 | |||||
| 27725 | # This is the new scanner and will eventually replace scan_identifier. | ||||
| 27726 | # Only type 'sub' and 'package' are implemented. | ||||
| 27727 | # Token types $ * % @ & -> are not yet implemented. | ||||
| 27728 | # | ||||
| 27729 | # Scan identifier following a type token. | ||||
| 27730 | # The type of call depends on $id_scan_state: $id_scan_state = '' | ||||
| 27731 | # for starting call, in which case $tok must be the token defining | ||||
| 27732 | # the type. | ||||
| 27733 | # | ||||
| 27734 | # If the type token is the last nonblank token on the line, a value | ||||
| 27735 | # of $id_scan_state = $tok is returned, indicating that further | ||||
| 27736 | # calls must be made to get the identifier. If the type token is | ||||
| 27737 | # not the last nonblank token on the line, the identifier is | ||||
| 27738 | # scanned and handled and a value of '' is returned. | ||||
| 27739 | # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, | ||||
| 27740 | # $statement_type, $tokenizer_self | ||||
| 27741 | |||||
| 27742 | my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, | ||||
| 27743 | $max_token_index ) | ||||
| 27744 | = @_; | ||||
| 27745 | my $type = ''; | ||||
| 27746 | my ( $i_beg, $pos_beg ); | ||||
| 27747 | |||||
| 27748 | #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; | ||||
| 27749 | #my ($a,$b,$c) = caller; | ||||
| 27750 | #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; | ||||
| 27751 | |||||
| 27752 | # on re-entry, start scanning at first token on the line | ||||
| 27753 | if ($id_scan_state) { | ||||
| 27754 | $i_beg = $i; | ||||
| 27755 | $type = ''; | ||||
| 27756 | } | ||||
| 27757 | |||||
| 27758 | # on initial entry, start scanning just after type token | ||||
| 27759 | else { | ||||
| 27760 | $i_beg = $i + 1; | ||||
| 27761 | $id_scan_state = $tok; | ||||
| 27762 | $type = 't'; | ||||
| 27763 | } | ||||
| 27764 | |||||
| 27765 | # find $i_beg = index of next nonblank token, | ||||
| 27766 | # and handle empty lines | ||||
| 27767 | my $blank_line = 0; | ||||
| 27768 | my $next_nonblank_token = $$rtokens[$i_beg]; | ||||
| 27769 | if ( $i_beg > $max_token_index ) { | ||||
| 27770 | $blank_line = 1; | ||||
| 27771 | } | ||||
| 27772 | else { | ||||
| 27773 | |||||
| 27774 | # only a '#' immediately after a '$' is not a comment | ||||
| 27775 | if ( $next_nonblank_token eq '#' ) { | ||||
| 27776 | unless ( $tok eq '$' ) { | ||||
| 27777 | $blank_line = 1; | ||||
| 27778 | } | ||||
| 27779 | } | ||||
| 27780 | |||||
| 27781 | if ( $next_nonblank_token =~ /^\s/ ) { | ||||
| 27782 | ( $next_nonblank_token, $i_beg ) = | ||||
| 27783 | find_next_nonblank_token_on_this_line( $i_beg, $rtokens, | ||||
| 27784 | $max_token_index ); | ||||
| 27785 | if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { | ||||
| 27786 | $blank_line = 1; | ||||
| 27787 | } | ||||
| 27788 | } | ||||
| 27789 | } | ||||
| 27790 | |||||
| 27791 | # handle non-blank line; identifier, if any, must follow | ||||
| 27792 | unless ($blank_line) { | ||||
| 27793 | |||||
| 27794 | if ( $id_scan_state eq 'sub' ) { | ||||
| 27795 | ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( | ||||
| 27796 | $input_line, $i, $i_beg, | ||||
| 27797 | $tok, $type, $rtokens, | ||||
| 27798 | $rtoken_map, $id_scan_state, $max_token_index | ||||
| 27799 | ); | ||||
| 27800 | } | ||||
| 27801 | |||||
| 27802 | elsif ( $id_scan_state eq 'package' ) { | ||||
| 27803 | ( $i, $tok, $type ) = | ||||
| 27804 | do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, | ||||
| 27805 | $rtoken_map, $max_token_index ); | ||||
| 27806 | $id_scan_state = ''; | ||||
| 27807 | } | ||||
| 27808 | |||||
| 27809 | else { | ||||
| 27810 | warning("invalid token in scan_id: $tok\n"); | ||||
| 27811 | $id_scan_state = ''; | ||||
| 27812 | } | ||||
| 27813 | } | ||||
| 27814 | |||||
| 27815 | if ( $id_scan_state && ( !defined($type) || !$type ) ) { | ||||
| 27816 | |||||
| 27817 | # shouldn't happen: | ||||
| 27818 | warning( | ||||
| 27819 | "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" | ||||
| 27820 | ); | ||||
| 27821 | report_definite_bug(); | ||||
| 27822 | } | ||||
| 27823 | |||||
| 27824 | TOKENIZER_DEBUG_FLAG_NSCAN && do { | ||||
| 27825 | print STDOUT | ||||
| 27826 | "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; | ||||
| 27827 | }; | ||||
| 27828 | return ( $i, $tok, $type, $id_scan_state ); | ||||
| 27829 | } | ||||
| 27830 | |||||
| 27831 | sub check_prototype { | ||||
| 27832 | my ( $proto, $package, $subname ) = @_; | ||||
| 27833 | return unless ( defined($package) && defined($subname) ); | ||||
| 27834 | if ( defined($proto) ) { | ||||
| 27835 | $proto =~ s/^\s*\(\s*//; | ||||
| 27836 | $proto =~ s/\s*\)$//; | ||||
| 27837 | if ($proto) { | ||||
| 27838 | $is_user_function{$package}{$subname} = 1; | ||||
| 27839 | $user_function_prototype{$package}{$subname} = "($proto)"; | ||||
| 27840 | |||||
| 27841 | # prototypes containing '&' must be treated specially.. | ||||
| 27842 | if ( $proto =~ /\&/ ) { | ||||
| 27843 | |||||
| 27844 | # right curly braces of prototypes ending in | ||||
| 27845 | # '&' may be followed by an operator | ||||
| 27846 | if ( $proto =~ /\&$/ ) { | ||||
| 27847 | $is_block_function{$package}{$subname} = 1; | ||||
| 27848 | } | ||||
| 27849 | |||||
| 27850 | # right curly braces of prototypes NOT ending in | ||||
| 27851 | # '&' may NOT be followed by an operator | ||||
| 27852 | elsif ( $proto !~ /\&$/ ) { | ||||
| 27853 | $is_block_list_function{$package}{$subname} = 1; | ||||
| 27854 | } | ||||
| 27855 | } | ||||
| 27856 | } | ||||
| 27857 | else { | ||||
| 27858 | $is_constant{$package}{$subname} = 1; | ||||
| 27859 | } | ||||
| 27860 | } | ||||
| 27861 | else { | ||||
| 27862 | $is_user_function{$package}{$subname} = 1; | ||||
| 27863 | } | ||||
| 27864 | } | ||||
| 27865 | |||||
| 27866 | sub do_scan_package { | ||||
| 27867 | |||||
| 27868 | # do_scan_package parses a package name | ||||
| 27869 | # it is called with $i_beg equal to the index of the first nonblank | ||||
| 27870 | # token following a 'package' token. | ||||
| 27871 | # USES GLOBAL VARIABLES: $current_package, | ||||
| 27872 | |||||
| 27873 | # package NAMESPACE | ||||
| 27874 | # package NAMESPACE VERSION | ||||
| 27875 | # package NAMESPACE BLOCK | ||||
| 27876 | # package NAMESPACE VERSION BLOCK | ||||
| 27877 | # | ||||
| 27878 | # If VERSION is provided, package sets the $VERSION variable in the given | ||||
| 27879 | # namespace to a version object with the VERSION provided. VERSION must be | ||||
| 27880 | # a "strict" style version number as defined by the version module: a | ||||
| 27881 | # positive decimal number (integer or decimal-fraction) without | ||||
| 27882 | # exponentiation or else a dotted-decimal v-string with a leading 'v' | ||||
| 27883 | # character and at least three components. | ||||
| 27884 | # reference http://perldoc.perl.org/functions/package.html | ||||
| 27885 | |||||
| 27886 | my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, | ||||
| 27887 | $max_token_index ) | ||||
| 27888 | = @_; | ||||
| 27889 | my $package = undef; | ||||
| 27890 | my $pos_beg = $$rtoken_map[$i_beg]; | ||||
| 27891 | pos($input_line) = $pos_beg; | ||||
| 27892 | |||||
| 27893 | # handle non-blank line; package name, if any, must follow | ||||
| 27894 | if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { | ||||
| 27895 | $package = $1; | ||||
| 27896 | $package = ( defined($1) && $1 ) ? $1 : 'main'; | ||||
| 27897 | $package =~ s/\'/::/g; | ||||
| 27898 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | ||||
| 27899 | $package =~ s/::$//; | ||||
| 27900 | my $pos = pos($input_line); | ||||
| 27901 | my $numc = $pos - $pos_beg; | ||||
| 27902 | $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); | ||||
| 27903 | $type = 'i'; | ||||
| 27904 | |||||
| 27905 | # Now we must convert back from character position | ||||
| 27906 | # to pre_token index. | ||||
| 27907 | # I don't think an error flag can occur here ..but ? | ||||
| 27908 | my $error; | ||||
| 27909 | ( $i, $error ) = | ||||
| 27910 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
| 27911 | if ($error) { warning("Possibly invalid package\n") } | ||||
| 27912 | $current_package = $package; | ||||
| 27913 | |||||
| 27914 | # we should now have package NAMESPACE | ||||
| 27915 | # now expecting VERSION, BLOCK, or ; to follow ... | ||||
| 27916 | # package NAMESPACE VERSION | ||||
| 27917 | # package NAMESPACE BLOCK | ||||
| 27918 | # package NAMESPACE VERSION BLOCK | ||||
| 27919 | my ( $next_nonblank_token, $i_next ) = | ||||
| 27920 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 27921 | |||||
| 27922 | # check that something recognizable follows, but do not parse. | ||||
| 27923 | # A VERSION number will be parsed later as a number or v-string in the | ||||
| 27924 | # normal way. What is important is to set the statement type if | ||||
| 27925 | # everything looks okay so that the operator_expected() routine | ||||
| 27926 | # knows that the number is in a package statement. | ||||
| 27927 | # Examples of valid primitive tokens that might follow are: | ||||
| 27928 | # 1235 . ; { } v3 v | ||||
| 27929 | if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { | ||||
| 27930 | $statement_type = $tok; | ||||
| 27931 | } | ||||
| 27932 | else { | ||||
| 27933 | warning( | ||||
| 27934 | "Unexpected '$next_nonblank_token' after package name '$tok'\n" | ||||
| 27935 | ); | ||||
| 27936 | } | ||||
| 27937 | } | ||||
| 27938 | |||||
| 27939 | # no match but line not blank -- | ||||
| 27940 | # could be a label with name package, like package: , for example. | ||||
| 27941 | else { | ||||
| 27942 | $type = 'k'; | ||||
| 27943 | } | ||||
| 27944 | |||||
| 27945 | return ( $i, $tok, $type ); | ||||
| 27946 | } | ||||
| 27947 | |||||
| 27948 | sub scan_identifier_do { | ||||
| 27949 | |||||
| 27950 | # This routine assembles tokens into identifiers. It maintains a | ||||
| 27951 | # scan state, id_scan_state. It updates id_scan_state based upon | ||||
| 27952 | # current id_scan_state and token, and returns an updated | ||||
| 27953 | # id_scan_state and the next index after the identifier. | ||||
| 27954 | # USES GLOBAL VARIABLES: $context, $last_nonblank_token, | ||||
| 27955 | # $last_nonblank_type | ||||
| 27956 | |||||
| 27957 | my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, | ||||
| 27958 | $expecting ) | ||||
| 27959 | = @_; | ||||
| 27960 | my $i_begin = $i; | ||||
| 27961 | my $type = ''; | ||||
| 27962 | my $tok_begin = $$rtokens[$i_begin]; | ||||
| 27963 | if ( $tok_begin eq ':' ) { $tok_begin = '::' } | ||||
| 27964 | my $id_scan_state_begin = $id_scan_state; | ||||
| 27965 | my $identifier_begin = $identifier; | ||||
| 27966 | my $tok = $tok_begin; | ||||
| 27967 | my $message = ""; | ||||
| 27968 | |||||
| 27969 | # these flags will be used to help figure out the type: | ||||
| 27970 | my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); | ||||
| 27971 | my $saw_type; | ||||
| 27972 | |||||
| 27973 | # allow old package separator (') except in 'use' statement | ||||
| 27974 | my $allow_tick = ( $last_nonblank_token ne 'use' ); | ||||
| 27975 | |||||
| 27976 | # get started by defining a type and a state if necessary | ||||
| 27977 | unless ($id_scan_state) { | ||||
| 27978 | $context = UNKNOWN_CONTEXT; | ||||
| 27979 | |||||
| 27980 | # fixup for digraph | ||||
| 27981 | if ( $tok eq '>' ) { | ||||
| 27982 | $tok = '->'; | ||||
| 27983 | $tok_begin = $tok; | ||||
| 27984 | } | ||||
| 27985 | $identifier = $tok; | ||||
| 27986 | |||||
| 27987 | if ( $tok eq '$' || $tok eq '*' ) { | ||||
| 27988 | $id_scan_state = '$'; | ||||
| 27989 | $context = SCALAR_CONTEXT; | ||||
| 27990 | } | ||||
| 27991 | elsif ( $tok eq '%' || $tok eq '@' ) { | ||||
| 27992 | $id_scan_state = '$'; | ||||
| 27993 | $context = LIST_CONTEXT; | ||||
| 27994 | } | ||||
| 27995 | elsif ( $tok eq '&' ) { | ||||
| 27996 | $id_scan_state = '&'; | ||||
| 27997 | } | ||||
| 27998 | elsif ( $tok eq 'sub' or $tok eq 'package' ) { | ||||
| 27999 | $saw_alpha = 0; # 'sub' is considered type info here | ||||
| 28000 | $id_scan_state = '$'; | ||||
| 28001 | $identifier .= ' '; # need a space to separate sub from sub name | ||||
| 28002 | } | ||||
| 28003 | elsif ( $tok eq '::' ) { | ||||
| 28004 | $id_scan_state = 'A'; | ||||
| 28005 | } | ||||
| 28006 | elsif ( $tok =~ /^[A-Za-z_]/ ) { | ||||
| 28007 | $id_scan_state = ':'; | ||||
| 28008 | } | ||||
| 28009 | elsif ( $tok eq '->' ) { | ||||
| 28010 | $id_scan_state = '$'; | ||||
| 28011 | } | ||||
| 28012 | else { | ||||
| 28013 | |||||
| 28014 | # shouldn't happen | ||||
| 28015 | my ( $a, $b, $c ) = caller; | ||||
| 28016 | warning("Program Bug: scan_identifier given bad token = $tok \n"); | ||||
| 28017 | warning(" called from sub $a line: $c\n"); | ||||
| 28018 | report_definite_bug(); | ||||
| 28019 | } | ||||
| 28020 | $saw_type = !$saw_alpha; | ||||
| 28021 | } | ||||
| 28022 | else { | ||||
| 28023 | $i--; | ||||
| 28024 | $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); | ||||
| 28025 | } | ||||
| 28026 | |||||
| 28027 | # now loop to gather the identifier | ||||
| 28028 | my $i_save = $i; | ||||
| 28029 | |||||
| 28030 | while ( $i < $max_token_index ) { | ||||
| 28031 | $i_save = $i unless ( $tok =~ /^\s*$/ ); | ||||
| 28032 | $tok = $$rtokens[ ++$i ]; | ||||
| 28033 | |||||
| 28034 | if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { | ||||
| 28035 | $tok = '::'; | ||||
| 28036 | $i++; | ||||
| 28037 | } | ||||
| 28038 | |||||
| 28039 | if ( $id_scan_state eq '$' ) { # starting variable name | ||||
| 28040 | |||||
| 28041 | if ( $tok eq '$' ) { | ||||
| 28042 | |||||
| 28043 | $identifier .= $tok; | ||||
| 28044 | |||||
| 28045 | # we've got a punctuation variable if end of line (punct.t) | ||||
| 28046 | if ( $i == $max_token_index ) { | ||||
| 28047 | $type = 'i'; | ||||
| 28048 | $id_scan_state = ''; | ||||
| 28049 | last; | ||||
| 28050 | } | ||||
| 28051 | } | ||||
| 28052 | elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. | ||||
| 28053 | $saw_alpha = 1; | ||||
| 28054 | $id_scan_state = ':'; # now need :: | ||||
| 28055 | $identifier .= $tok; | ||||
| 28056 | } | ||||
| 28057 | elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. | ||||
| 28058 | $saw_alpha = 1; | ||||
| 28059 | $id_scan_state = ':'; # now need :: | ||||
| 28060 | $identifier .= $tok; | ||||
| 28061 | |||||
| 28062 | # Perl will accept leading digits in identifiers, | ||||
| 28063 | # although they may not always produce useful results. | ||||
| 28064 | # Something like $main::0 is ok. But this also works: | ||||
| 28065 | # | ||||
| 28066 | # sub howdy::123::bubba{ print "bubba $54321!\n" } | ||||
| 28067 | # howdy::123::bubba(); | ||||
| 28068 | # | ||||
| 28069 | } | ||||
| 28070 | elsif ( $tok =~ /^[0-9]/ ) { # numeric | ||||
| 28071 | $saw_alpha = 1; | ||||
| 28072 | $id_scan_state = ':'; # now need :: | ||||
| 28073 | $identifier .= $tok; | ||||
| 28074 | } | ||||
| 28075 | elsif ( $tok eq '::' ) { | ||||
| 28076 | $id_scan_state = 'A'; | ||||
| 28077 | $identifier .= $tok; | ||||
| 28078 | } | ||||
| 28079 | elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array | ||||
| 28080 | $identifier .= $tok; # keep same state, a $ could follow | ||||
| 28081 | } | ||||
| 28082 | elsif ( $tok eq '{' ) { | ||||
| 28083 | |||||
| 28084 | # check for something like ${#} or ${©} | ||||
| 28085 | ##if ( $identifier eq '$' | ||||
| 28086 | if ( | ||||
| 28087 | ( | ||||
| 28088 | $identifier eq '$' | ||||
| 28089 | || $identifier eq '@' | ||||
| 28090 | || $identifier eq '$#' | ||||
| 28091 | ) | ||||
| 28092 | && $i + 2 <= $max_token_index | ||||
| 28093 | && $$rtokens[ $i + 2 ] eq '}' | ||||
| 28094 | && $$rtokens[ $i + 1 ] !~ /[\s\w]/ | ||||
| 28095 | ) | ||||
| 28096 | { | ||||
| 28097 | my $next2 = $$rtokens[ $i + 2 ]; | ||||
| 28098 | my $next1 = $$rtokens[ $i + 1 ]; | ||||
| 28099 | $identifier .= $tok . $next1 . $next2; | ||||
| 28100 | $i += 2; | ||||
| 28101 | $id_scan_state = ''; | ||||
| 28102 | last; | ||||
| 28103 | } | ||||
| 28104 | |||||
| 28105 | # skip something like ${xxx} or ->{ | ||||
| 28106 | $id_scan_state = ''; | ||||
| 28107 | |||||
| 28108 | # if this is the first token of a line, any tokens for this | ||||
| 28109 | # identifier have already been accumulated | ||||
| 28110 | if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } | ||||
| 28111 | $i = $i_save; | ||||
| 28112 | last; | ||||
| 28113 | } | ||||
| 28114 | |||||
| 28115 | # space ok after leading $ % * & @ | ||||
| 28116 | elsif ( $tok =~ /^\s*$/ ) { | ||||
| 28117 | |||||
| 28118 | if ( $identifier =~ /^[\$\%\*\&\@]/ ) { | ||||
| 28119 | |||||
| 28120 | if ( length($identifier) > 1 ) { | ||||
| 28121 | $id_scan_state = ''; | ||||
| 28122 | $i = $i_save; | ||||
| 28123 | $type = 'i'; # probably punctuation variable | ||||
| 28124 | last; | ||||
| 28125 | } | ||||
| 28126 | else { | ||||
| 28127 | |||||
| 28128 | # spaces after $'s are common, and space after @ | ||||
| 28129 | # is harmless, so only complain about space | ||||
| 28130 | # after other type characters. Space after $ and | ||||
| 28131 | # @ will be removed in formatting. Report space | ||||
| 28132 | # after % and * because they might indicate a | ||||
| 28133 | # parsing error. In other words '% ' might be a | ||||
| 28134 | # modulo operator. Delete this warning if it | ||||
| 28135 | # gets annoying. | ||||
| 28136 | if ( $identifier !~ /^[\@\$]$/ ) { | ||||
| 28137 | $message = | ||||
| 28138 | "Space in identifier, following $identifier\n"; | ||||
| 28139 | } | ||||
| 28140 | } | ||||
| 28141 | } | ||||
| 28142 | |||||
| 28143 | # else: | ||||
| 28144 | # space after '->' is ok | ||||
| 28145 | } | ||||
| 28146 | elsif ( $tok eq '^' ) { | ||||
| 28147 | |||||
| 28148 | # check for some special variables like $^W | ||||
| 28149 | if ( $identifier =~ /^[\$\*\@\%]$/ ) { | ||||
| 28150 | $identifier .= $tok; | ||||
| 28151 | $id_scan_state = 'A'; | ||||
| 28152 | |||||
| 28153 | # Perl accepts '$^]' or '@^]', but | ||||
| 28154 | # there must not be a space before the ']'. | ||||
| 28155 | my $next1 = $$rtokens[ $i + 1 ]; | ||||
| 28156 | if ( $next1 eq ']' ) { | ||||
| 28157 | $i++; | ||||
| 28158 | $identifier .= $next1; | ||||
| 28159 | $id_scan_state = ""; | ||||
| 28160 | last; | ||||
| 28161 | } | ||||
| 28162 | } | ||||
| 28163 | else { | ||||
| 28164 | $id_scan_state = ''; | ||||
| 28165 | } | ||||
| 28166 | } | ||||
| 28167 | else { # something else | ||||
| 28168 | |||||
| 28169 | # check for various punctuation variables | ||||
| 28170 | if ( $identifier =~ /^[\$\*\@\%]$/ ) { | ||||
| 28171 | $identifier .= $tok; | ||||
| 28172 | } | ||||
| 28173 | |||||
| 28174 | elsif ( $identifier eq '$#' ) { | ||||
| 28175 | |||||
| 28176 | if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } | ||||
| 28177 | |||||
| 28178 | # perl seems to allow just these: $#: $#- $#+ | ||||
| 28179 | elsif ( $tok =~ /^[\:\-\+]$/ ) { | ||||
| 28180 | $type = 'i'; | ||||
| 28181 | $identifier .= $tok; | ||||
| 28182 | } | ||||
| 28183 | else { | ||||
| 28184 | $i = $i_save; | ||||
| 28185 | write_logfile_entry( 'Use of $# is deprecated' . "\n" ); | ||||
| 28186 | } | ||||
| 28187 | } | ||||
| 28188 | elsif ( $identifier eq '$$' ) { | ||||
| 28189 | |||||
| 28190 | # perl does not allow references to punctuation | ||||
| 28191 | # variables without braces. For example, this | ||||
| 28192 | # won't work: | ||||
| 28193 | # $:=\4; | ||||
| 28194 | # $a = $$:; | ||||
| 28195 | # You would have to use | ||||
| 28196 | # $a = ${$:}; | ||||
| 28197 | |||||
| 28198 | $i = $i_save; | ||||
| 28199 | if ( $tok eq '{' ) { $type = 't' } | ||||
| 28200 | else { $type = 'i' } | ||||
| 28201 | } | ||||
| 28202 | elsif ( $identifier eq '->' ) { | ||||
| 28203 | $i = $i_save; | ||||
| 28204 | } | ||||
| 28205 | else { | ||||
| 28206 | $i = $i_save; | ||||
| 28207 | if ( length($identifier) == 1 ) { $identifier = ''; } | ||||
| 28208 | } | ||||
| 28209 | $id_scan_state = ''; | ||||
| 28210 | last; | ||||
| 28211 | } | ||||
| 28212 | } | ||||
| 28213 | elsif ( $id_scan_state eq '&' ) { # starting sub call? | ||||
| 28214 | |||||
| 28215 | if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. | ||||
| 28216 | $id_scan_state = ':'; # now need :: | ||||
| 28217 | $saw_alpha = 1; | ||||
| 28218 | $identifier .= $tok; | ||||
| 28219 | } | ||||
| 28220 | elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. | ||||
| 28221 | $id_scan_state = ':'; # now need :: | ||||
| 28222 | $saw_alpha = 1; | ||||
| 28223 | $identifier .= $tok; | ||||
| 28224 | } | ||||
| 28225 | elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above | ||||
| 28226 | $id_scan_state = ':'; # now need :: | ||||
| 28227 | $saw_alpha = 1; | ||||
| 28228 | $identifier .= $tok; | ||||
| 28229 | } | ||||
| 28230 | elsif ( $tok =~ /^\s*$/ ) { # allow space | ||||
| 28231 | } | ||||
| 28232 | elsif ( $tok eq '::' ) { # leading :: | ||||
| 28233 | $id_scan_state = 'A'; # accept alpha next | ||||
| 28234 | $identifier .= $tok; | ||||
| 28235 | } | ||||
| 28236 | elsif ( $tok eq '{' ) { | ||||
| 28237 | if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } | ||||
| 28238 | $i = $i_save; | ||||
| 28239 | $id_scan_state = ''; | ||||
| 28240 | last; | ||||
| 28241 | } | ||||
| 28242 | else { | ||||
| 28243 | |||||
| 28244 | # punctuation variable? | ||||
| 28245 | # testfile: cunningham4.pl | ||||
| 28246 | # | ||||
| 28247 | # We have to be careful here. If we are in an unknown state, | ||||
| 28248 | # we will reject the punctuation variable. In the following | ||||
| 28249 | # example the '&' is a binary operator but we are in an unknown | ||||
| 28250 | # state because there is no sigil on 'Prima', so we don't | ||||
| 28251 | # know what it is. But it is a bad guess that | ||||
| 28252 | # '&~' is a function variable. | ||||
| 28253 | # $self->{text}->{colorMap}->[ | ||||
| 28254 | # Prima::PodView::COLOR_CODE_FOREGROUND | ||||
| 28255 | # & ~tb::COLOR_INDEX ] = | ||||
| 28256 | # $sec->{ColorCode} | ||||
| 28257 | if ( $identifier eq '&' && $expecting ) { | ||||
| 28258 | $identifier .= $tok; | ||||
| 28259 | } | ||||
| 28260 | else { | ||||
| 28261 | $identifier = ''; | ||||
| 28262 | $i = $i_save; | ||||
| 28263 | $type = '&'; | ||||
| 28264 | } | ||||
| 28265 | $id_scan_state = ''; | ||||
| 28266 | last; | ||||
| 28267 | } | ||||
| 28268 | } | ||||
| 28269 | elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) | ||||
| 28270 | |||||
| 28271 | if ( $tok =~ /^[A-Za-z_]/ ) { # found it | ||||
| 28272 | $identifier .= $tok; | ||||
| 28273 | $id_scan_state = ':'; # now need :: | ||||
| 28274 | $saw_alpha = 1; | ||||
| 28275 | } | ||||
| 28276 | elsif ( $tok eq "'" && $allow_tick ) { | ||||
| 28277 | $identifier .= $tok; | ||||
| 28278 | $id_scan_state = ':'; # now need :: | ||||
| 28279 | $saw_alpha = 1; | ||||
| 28280 | } | ||||
| 28281 | elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above | ||||
| 28282 | $identifier .= $tok; | ||||
| 28283 | $id_scan_state = ':'; # now need :: | ||||
| 28284 | $saw_alpha = 1; | ||||
| 28285 | } | ||||
| 28286 | elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { | ||||
| 28287 | $id_scan_state = '('; | ||||
| 28288 | $identifier .= $tok; | ||||
| 28289 | } | ||||
| 28290 | elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { | ||||
| 28291 | $id_scan_state = ')'; | ||||
| 28292 | $identifier .= $tok; | ||||
| 28293 | } | ||||
| 28294 | else { | ||||
| 28295 | $id_scan_state = ''; | ||||
| 28296 | $i = $i_save; | ||||
| 28297 | last; | ||||
| 28298 | } | ||||
| 28299 | } | ||||
| 28300 | elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha | ||||
| 28301 | |||||
| 28302 | if ( $tok eq '::' ) { # got it | ||||
| 28303 | $identifier .= $tok; | ||||
| 28304 | $id_scan_state = 'A'; # now require alpha | ||||
| 28305 | } | ||||
| 28306 | elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here | ||||
| 28307 | $identifier .= $tok; | ||||
| 28308 | $id_scan_state = ':'; # now need :: | ||||
| 28309 | $saw_alpha = 1; | ||||
| 28310 | } | ||||
| 28311 | elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above | ||||
| 28312 | $identifier .= $tok; | ||||
| 28313 | $id_scan_state = ':'; # now need :: | ||||
| 28314 | $saw_alpha = 1; | ||||
| 28315 | } | ||||
| 28316 | elsif ( $tok eq "'" && $allow_tick ) { # tick | ||||
| 28317 | |||||
| 28318 | if ( $is_keyword{$identifier} ) { | ||||
| 28319 | $id_scan_state = ''; # that's all | ||||
| 28320 | $i = $i_save; | ||||
| 28321 | } | ||||
| 28322 | else { | ||||
| 28323 | $identifier .= $tok; | ||||
| 28324 | } | ||||
| 28325 | } | ||||
| 28326 | elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { | ||||
| 28327 | $id_scan_state = '('; | ||||
| 28328 | $identifier .= $tok; | ||||
| 28329 | } | ||||
| 28330 | elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { | ||||
| 28331 | $id_scan_state = ')'; | ||||
| 28332 | $identifier .= $tok; | ||||
| 28333 | } | ||||
| 28334 | else { | ||||
| 28335 | $id_scan_state = ''; # that's all | ||||
| 28336 | $i = $i_save; | ||||
| 28337 | last; | ||||
| 28338 | } | ||||
| 28339 | } | ||||
| 28340 | elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype | ||||
| 28341 | |||||
| 28342 | if ( $tok eq '(' ) { # got it | ||||
| 28343 | $identifier .= $tok; | ||||
| 28344 | $id_scan_state = ')'; # now find the end of it | ||||
| 28345 | } | ||||
| 28346 | elsif ( $tok =~ /^\s*$/ ) { # blank - keep going | ||||
| 28347 | $identifier .= $tok; | ||||
| 28348 | } | ||||
| 28349 | else { | ||||
| 28350 | $id_scan_state = ''; # that's all - no prototype | ||||
| 28351 | $i = $i_save; | ||||
| 28352 | last; | ||||
| 28353 | } | ||||
| 28354 | } | ||||
| 28355 | elsif ( $id_scan_state eq ')' ) { # looking for ) to end | ||||
| 28356 | |||||
| 28357 | if ( $tok eq ')' ) { # got it | ||||
| 28358 | $identifier .= $tok; | ||||
| 28359 | $id_scan_state = ''; # all done | ||||
| 28360 | last; | ||||
| 28361 | } | ||||
| 28362 | elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { | ||||
| 28363 | $identifier .= $tok; | ||||
| 28364 | } | ||||
| 28365 | else { # probable error in script, but keep going | ||||
| 28366 | warning("Unexpected '$tok' while seeking end of prototype\n"); | ||||
| 28367 | $identifier .= $tok; | ||||
| 28368 | } | ||||
| 28369 | } | ||||
| 28370 | else { # can get here due to error in initialization | ||||
| 28371 | $id_scan_state = ''; | ||||
| 28372 | $i = $i_save; | ||||
| 28373 | last; | ||||
| 28374 | } | ||||
| 28375 | } | ||||
| 28376 | |||||
| 28377 | if ( $id_scan_state eq ')' ) { | ||||
| 28378 | warning("Hit end of line while seeking ) to end prototype\n"); | ||||
| 28379 | } | ||||
| 28380 | |||||
| 28381 | # once we enter the actual identifier, it may not extend beyond | ||||
| 28382 | # the end of the current line | ||||
| 28383 | if ( $id_scan_state =~ /^[A\:\(\)]/ ) { | ||||
| 28384 | $id_scan_state = ''; | ||||
| 28385 | } | ||||
| 28386 | if ( $i < 0 ) { $i = 0 } | ||||
| 28387 | |||||
| 28388 | unless ($type) { | ||||
| 28389 | |||||
| 28390 | if ($saw_type) { | ||||
| 28391 | |||||
| 28392 | if ($saw_alpha) { | ||||
| 28393 | if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { | ||||
| 28394 | $type = 'w'; | ||||
| 28395 | } | ||||
| 28396 | else { $type = 'i' } | ||||
| 28397 | } | ||||
| 28398 | elsif ( $identifier eq '->' ) { | ||||
| 28399 | $type = '->'; | ||||
| 28400 | } | ||||
| 28401 | elsif ( | ||||
| 28402 | ( length($identifier) > 1 ) | ||||
| 28403 | |||||
| 28404 | # In something like '@$=' we have an identifier '@$' | ||||
| 28405 | # In something like '$${' we have type '$$' (and only | ||||
| 28406 | # part of an identifier) | ||||
| 28407 | && !( $identifier =~ /\$$/ && $tok eq '{' ) | ||||
| 28408 | && ( $identifier !~ /^(sub |package )$/ ) | ||||
| 28409 | ) | ||||
| 28410 | { | ||||
| 28411 | $type = 'i'; | ||||
| 28412 | } | ||||
| 28413 | else { $type = 't' } | ||||
| 28414 | } | ||||
| 28415 | elsif ($saw_alpha) { | ||||
| 28416 | |||||
| 28417 | # type 'w' includes anything without leading type info | ||||
| 28418 | # ($,%,@,*) including something like abc::def::ghi | ||||
| 28419 | $type = 'w'; | ||||
| 28420 | } | ||||
| 28421 | else { | ||||
| 28422 | $type = ''; | ||||
| 28423 | } # this can happen on a restart | ||||
| 28424 | } | ||||
| 28425 | |||||
| 28426 | if ($identifier) { | ||||
| 28427 | $tok = $identifier; | ||||
| 28428 | if ($message) { write_logfile_entry($message) } | ||||
| 28429 | } | ||||
| 28430 | else { | ||||
| 28431 | $tok = $tok_begin; | ||||
| 28432 | $i = $i_begin; | ||||
| 28433 | } | ||||
| 28434 | |||||
| 28435 | TOKENIZER_DEBUG_FLAG_SCAN_ID && do { | ||||
| 28436 | my ( $a, $b, $c ) = caller; | ||||
| 28437 | print STDOUT | ||||
| 28438 | "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; | ||||
| 28439 | print STDOUT | ||||
| 28440 | "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; | ||||
| 28441 | }; | ||||
| 28442 | return ( $i, $tok, $type, $id_scan_state, $identifier ); | ||||
| 28443 | } | ||||
| 28444 | |||||
| 28445 | { | ||||
| 28446 | |||||
| 28447 | # saved package and subnames in case prototype is on separate line | ||||
| 28448 | 2 | 300ns | my ( $package_saved, $subname_saved ); | ||
| 28449 | |||||
| 28450 | sub do_scan_sub { | ||||
| 28451 | |||||
| 28452 | # do_scan_sub parses a sub name and prototype | ||||
| 28453 | # it is called with $i_beg equal to the index of the first nonblank | ||||
| 28454 | # token following a 'sub' token. | ||||
| 28455 | |||||
| 28456 | # TODO: add future error checks to be sure we have a valid | ||||
| 28457 | # sub name. For example, 'sub &doit' is wrong. Also, be sure | ||||
| 28458 | # a name is given if and only if a non-anonymous sub is | ||||
| 28459 | # appropriate. | ||||
| 28460 | # USES GLOBAL VARS: $current_package, $last_nonblank_token, | ||||
| 28461 | # $in_attribute_list, %saw_function_definition, | ||||
| 28462 | # $statement_type | ||||
| 28463 | |||||
| 28464 | my ( | ||||
| 28465 | $input_line, $i, $i_beg, | ||||
| 28466 | $tok, $type, $rtokens, | ||||
| 28467 | $rtoken_map, $id_scan_state, $max_token_index | ||||
| 28468 | ) = @_; | ||||
| 28469 | $id_scan_state = ""; # normally we get everything in one call | ||||
| 28470 | my $subname = undef; | ||||
| 28471 | my $package = undef; | ||||
| 28472 | my $proto = undef; | ||||
| 28473 | my $attrs = undef; | ||||
| 28474 | my $match; | ||||
| 28475 | |||||
| 28476 | my $pos_beg = $$rtoken_map[$i_beg]; | ||||
| 28477 | pos($input_line) = $pos_beg; | ||||
| 28478 | |||||
| 28479 | # sub NAME PROTO ATTRS | ||||
| 28480 | if ( | ||||
| 28481 | $input_line =~ m/\G\s* | ||||
| 28482 | ((?:\w*(?:'|::))*) # package - something that ends in :: or ' | ||||
| 28483 | (\w+) # NAME - required | ||||
| 28484 | (\s*\([^){]*\))? # PROTO - something in parens | ||||
| 28485 | (\s*:)? # ATTRS - leading : of attribute list | ||||
| 28486 | /gcx | ||||
| 28487 | ) | ||||
| 28488 | { | ||||
| 28489 | $match = 1; | ||||
| 28490 | $subname = $2; | ||||
| 28491 | $proto = $3; | ||||
| 28492 | $attrs = $4; | ||||
| 28493 | |||||
| 28494 | $package = ( defined($1) && $1 ) ? $1 : $current_package; | ||||
| 28495 | $package =~ s/\'/::/g; | ||||
| 28496 | if ( $package =~ /^\:/ ) { $package = 'main' . $package } | ||||
| 28497 | $package =~ s/::$//; | ||||
| 28498 | my $pos = pos($input_line); | ||||
| 28499 | my $numc = $pos - $pos_beg; | ||||
| 28500 | $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); | ||||
| 28501 | $type = 'i'; | ||||
| 28502 | } | ||||
| 28503 | |||||
| 28504 | # Look for prototype/attributes not preceded on this line by subname; | ||||
| 28505 | # This might be an anonymous sub with attributes, | ||||
| 28506 | # or a prototype on a separate line from its sub name | ||||
| 28507 | elsif ( | ||||
| 28508 | $input_line =~ m/\G(\s*\([^){]*\))? # PROTO | ||||
| 28509 | (\s*:)? # ATTRS leading ':' | ||||
| 28510 | /gcx | ||||
| 28511 | && ( $1 || $2 ) | ||||
| 28512 | ) | ||||
| 28513 | { | ||||
| 28514 | $match = 1; | ||||
| 28515 | $proto = $1; | ||||
| 28516 | $attrs = $2; | ||||
| 28517 | |||||
| 28518 | # Handle prototype on separate line from subname | ||||
| 28519 | if ($subname_saved) { | ||||
| 28520 | $package = $package_saved; | ||||
| 28521 | $subname = $subname_saved; | ||||
| 28522 | $tok = $last_nonblank_token; | ||||
| 28523 | } | ||||
| 28524 | $type = 'i'; | ||||
| 28525 | } | ||||
| 28526 | |||||
| 28527 | if ($match) { | ||||
| 28528 | |||||
| 28529 | # ATTRS: if there are attributes, back up and let the ':' be | ||||
| 28530 | # found later by the scanner. | ||||
| 28531 | my $pos = pos($input_line); | ||||
| 28532 | if ($attrs) { | ||||
| 28533 | $pos -= length($attrs); | ||||
| 28534 | } | ||||
| 28535 | |||||
| 28536 | my $next_nonblank_token = $tok; | ||||
| 28537 | |||||
| 28538 | # catch case of line with leading ATTR ':' after anonymous sub | ||||
| 28539 | if ( $pos == $pos_beg && $tok eq ':' ) { | ||||
| 28540 | $type = 'A'; | ||||
| 28541 | $in_attribute_list = 1; | ||||
| 28542 | } | ||||
| 28543 | |||||
| 28544 | # We must convert back from character position | ||||
| 28545 | # to pre_token index. | ||||
| 28546 | else { | ||||
| 28547 | |||||
| 28548 | # I don't think an error flag can occur here ..but ? | ||||
| 28549 | my $error; | ||||
| 28550 | ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, | ||||
| 28551 | $max_token_index ); | ||||
| 28552 | if ($error) { warning("Possibly invalid sub\n") } | ||||
| 28553 | |||||
| 28554 | # check for multiple definitions of a sub | ||||
| 28555 | ( $next_nonblank_token, my $i_next ) = | ||||
| 28556 | find_next_nonblank_token_on_this_line( $i, $rtokens, | ||||
| 28557 | $max_token_index ); | ||||
| 28558 | } | ||||
| 28559 | |||||
| 28560 | if ( $next_nonblank_token =~ /^(\s*|#)$/ ) | ||||
| 28561 | { # skip blank or side comment | ||||
| 28562 | my ( $rpre_tokens, $rpre_types ) = | ||||
| 28563 | peek_ahead_for_n_nonblank_pre_tokens(1); | ||||
| 28564 | if ( defined($rpre_tokens) && @$rpre_tokens ) { | ||||
| 28565 | $next_nonblank_token = $rpre_tokens->[0]; | ||||
| 28566 | } | ||||
| 28567 | else { | ||||
| 28568 | $next_nonblank_token = '}'; | ||||
| 28569 | } | ||||
| 28570 | } | ||||
| 28571 | $package_saved = ""; | ||||
| 28572 | $subname_saved = ""; | ||||
| 28573 | if ( $next_nonblank_token eq '{' ) { | ||||
| 28574 | if ($subname) { | ||||
| 28575 | |||||
| 28576 | # Check for multiple definitions of a sub, but | ||||
| 28577 | # it is ok to have multiple sub BEGIN, etc, | ||||
| 28578 | # so we do not complain if name is all caps | ||||
| 28579 | if ( $saw_function_definition{$package}{$subname} | ||||
| 28580 | && $subname !~ /^[A-Z]+$/ ) | ||||
| 28581 | { | ||||
| 28582 | my $lno = $saw_function_definition{$package}{$subname}; | ||||
| 28583 | warning( | ||||
| 28584 | "already saw definition of 'sub $subname' in package '$package' at line $lno\n" | ||||
| 28585 | ); | ||||
| 28586 | } | ||||
| 28587 | $saw_function_definition{$package}{$subname} = | ||||
| 28588 | $tokenizer_self->{_last_line_number}; | ||||
| 28589 | } | ||||
| 28590 | } | ||||
| 28591 | elsif ( $next_nonblank_token eq ';' ) { | ||||
| 28592 | } | ||||
| 28593 | elsif ( $next_nonblank_token eq '}' ) { | ||||
| 28594 | } | ||||
| 28595 | |||||
| 28596 | # ATTRS - if an attribute list follows, remember the name | ||||
| 28597 | # of the sub so the next opening brace can be labeled. | ||||
| 28598 | # Setting 'statement_type' causes any ':'s to introduce | ||||
| 28599 | # attributes. | ||||
| 28600 | elsif ( $next_nonblank_token eq ':' ) { | ||||
| 28601 | $statement_type = $tok; | ||||
| 28602 | } | ||||
| 28603 | |||||
| 28604 | # see if PROTO follows on another line: | ||||
| 28605 | elsif ( $next_nonblank_token eq '(' ) { | ||||
| 28606 | if ( $attrs || $proto ) { | ||||
| 28607 | warning( | ||||
| 28608 | "unexpected '(' after definition or declaration of sub '$subname'\n" | ||||
| 28609 | ); | ||||
| 28610 | } | ||||
| 28611 | else { | ||||
| 28612 | $id_scan_state = 'sub'; # we must come back to get proto | ||||
| 28613 | $statement_type = $tok; | ||||
| 28614 | $package_saved = $package; | ||||
| 28615 | $subname_saved = $subname; | ||||
| 28616 | } | ||||
| 28617 | } | ||||
| 28618 | elsif ($next_nonblank_token) { # EOF technically ok | ||||
| 28619 | warning( | ||||
| 28620 | "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" | ||||
| 28621 | ); | ||||
| 28622 | } | ||||
| 28623 | check_prototype( $proto, $package, $subname ); | ||||
| 28624 | } | ||||
| 28625 | |||||
| 28626 | # no match but line not blank | ||||
| 28627 | else { | ||||
| 28628 | } | ||||
| 28629 | return ( $i, $tok, $type, $id_scan_state ); | ||||
| 28630 | } | ||||
| 28631 | } | ||||
| 28632 | |||||
| 28633 | #########i############################################################### | ||||
| 28634 | # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS | ||||
| 28635 | ######################################################################### | ||||
| 28636 | |||||
| 28637 | sub find_next_nonblank_token { | ||||
| 28638 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
| 28639 | |||||
| 28640 | if ( $i >= $max_token_index ) { | ||||
| 28641 | if ( !peeked_ahead() ) { | ||||
| 28642 | peeked_ahead(1); | ||||
| 28643 | $rtokens = | ||||
| 28644 | peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); | ||||
| 28645 | } | ||||
| 28646 | } | ||||
| 28647 | my $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
| 28648 | |||||
| 28649 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
| 28650 | $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
| 28651 | } | ||||
| 28652 | return ( $next_nonblank_token, $i ); | ||||
| 28653 | } | ||||
| 28654 | |||||
| 28655 | sub numerator_expected { | ||||
| 28656 | |||||
| 28657 | # this is a filter for a possible numerator, in support of guessing | ||||
| 28658 | # for the / pattern delimiter token. | ||||
| 28659 | # returns - | ||||
| 28660 | # 1 - yes | ||||
| 28661 | # 0 - can't tell | ||||
| 28662 | # -1 - no | ||||
| 28663 | # Note: I am using the convention that variables ending in | ||||
| 28664 | # _expected have these 3 possible values. | ||||
| 28665 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
| 28666 | my $next_token = $$rtokens[ $i + 1 ]; | ||||
| 28667 | if ( $next_token eq '=' ) { $i++; } # handle /= | ||||
| 28668 | my ( $next_nonblank_token, $i_next ) = | ||||
| 28669 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 28670 | |||||
| 28671 | if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { | ||||
| 28672 | 1; | ||||
| 28673 | } | ||||
| 28674 | else { | ||||
| 28675 | |||||
| 28676 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
| 28677 | 0; | ||||
| 28678 | } | ||||
| 28679 | else { | ||||
| 28680 | -1; | ||||
| 28681 | } | ||||
| 28682 | } | ||||
| 28683 | } | ||||
| 28684 | |||||
| 28685 | sub pattern_expected { | ||||
| 28686 | |||||
| 28687 | # This is the start of a filter for a possible pattern. | ||||
| 28688 | # It looks at the token after a possible pattern and tries to | ||||
| 28689 | # determine if that token could end a pattern. | ||||
| 28690 | # returns - | ||||
| 28691 | # 1 - yes | ||||
| 28692 | # 0 - can't tell | ||||
| 28693 | # -1 - no | ||||
| 28694 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
| 28695 | my $next_token = $$rtokens[ $i + 1 ]; | ||||
| 28696 | if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier | ||||
| 28697 | my ( $next_nonblank_token, $i_next ) = | ||||
| 28698 | find_next_nonblank_token( $i, $rtokens, $max_token_index ); | ||||
| 28699 | |||||
| 28700 | # list of tokens which may follow a pattern | ||||
| 28701 | # (can probably be expanded) | ||||
| 28702 | if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) | ||||
| 28703 | { | ||||
| 28704 | 1; | ||||
| 28705 | } | ||||
| 28706 | else { | ||||
| 28707 | |||||
| 28708 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
| 28709 | 0; | ||||
| 28710 | } | ||||
| 28711 | else { | ||||
| 28712 | -1; | ||||
| 28713 | } | ||||
| 28714 | } | ||||
| 28715 | } | ||||
| 28716 | |||||
| 28717 | sub find_next_nonblank_token_on_this_line { | ||||
| 28718 | my ( $i, $rtokens, $max_token_index ) = @_; | ||||
| 28719 | my $next_nonblank_token; | ||||
| 28720 | |||||
| 28721 | if ( $i < $max_token_index ) { | ||||
| 28722 | $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
| 28723 | |||||
| 28724 | if ( $next_nonblank_token =~ /^\s*$/ ) { | ||||
| 28725 | |||||
| 28726 | if ( $i < $max_token_index ) { | ||||
| 28727 | $next_nonblank_token = $$rtokens[ ++$i ]; | ||||
| 28728 | } | ||||
| 28729 | } | ||||
| 28730 | } | ||||
| 28731 | else { | ||||
| 28732 | $next_nonblank_token = ""; | ||||
| 28733 | } | ||||
| 28734 | return ( $next_nonblank_token, $i ); | ||||
| 28735 | } | ||||
| 28736 | |||||
| 28737 | sub find_angle_operator_termination { | ||||
| 28738 | |||||
| 28739 | # We are looking at a '<' and want to know if it is an angle operator. | ||||
| 28740 | # We are to return: | ||||
| 28741 | # $i = pretoken index of ending '>' if found, current $i otherwise | ||||
| 28742 | # $type = 'Q' if found, '>' otherwise | ||||
| 28743 | my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; | ||||
| 28744 | my $i = $i_beg; | ||||
| 28745 | my $type = '<'; | ||||
| 28746 | pos($input_line) = 1 + $$rtoken_map[$i]; | ||||
| 28747 | |||||
| 28748 | my $filter; | ||||
| 28749 | |||||
| 28750 | # we just have to find the next '>' if a term is expected | ||||
| 28751 | if ( $expecting == TERM ) { $filter = '[\>]' } | ||||
| 28752 | |||||
| 28753 | # we have to guess if we don't know what is expected | ||||
| 28754 | elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } | ||||
| 28755 | |||||
| 28756 | # shouldn't happen - we shouldn't be here if operator is expected | ||||
| 28757 | else { warning("Program Bug in find_angle_operator_termination\n") } | ||||
| 28758 | |||||
| 28759 | # To illustrate what we might be looking at, in case we are | ||||
| 28760 | # guessing, here are some examples of valid angle operators | ||||
| 28761 | # (or file globs): | ||||
| 28762 | # <tmp_imp/*> | ||||
| 28763 | # <FH> | ||||
| 28764 | # <$fh> | ||||
| 28765 | # <*.c *.h> | ||||
| 28766 | # <_> | ||||
| 28767 | # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) | ||||
| 28768 | # <${PREFIX}*img*.$IMAGE_TYPE> | ||||
| 28769 | # <img*.$IMAGE_TYPE> | ||||
| 28770 | # <Timg*.$IMAGE_TYPE> | ||||
| 28771 | # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> | ||||
| 28772 | # | ||||
| 28773 | # Here are some examples of lines which do not have angle operators: | ||||
| 28774 | # return undef unless $self->[2]++ < $#{$self->[1]}; | ||||
| 28775 | # < 2 || @$t > | ||||
| 28776 | # | ||||
| 28777 | # the following line from dlister.pl caused trouble: | ||||
| 28778 | # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; | ||||
| 28779 | # | ||||
| 28780 | # If the '<' starts an angle operator, it must end on this line and | ||||
| 28781 | # it must not have certain characters like ';' and '=' in it. I use | ||||
| 28782 | # this to limit the testing. This filter should be improved if | ||||
| 28783 | # possible. | ||||
| 28784 | |||||
| 28785 | if ( $input_line =~ /($filter)/g ) { | ||||
| 28786 | |||||
| 28787 | if ( $1 eq '>' ) { | ||||
| 28788 | |||||
| 28789 | # We MAY have found an angle operator termination if we get | ||||
| 28790 | # here, but we need to do more to be sure we haven't been | ||||
| 28791 | # fooled. | ||||
| 28792 | my $pos = pos($input_line); | ||||
| 28793 | |||||
| 28794 | my $pos_beg = $$rtoken_map[$i]; | ||||
| 28795 | my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); | ||||
| 28796 | |||||
| 28797 | # Reject if the closing '>' follows a '-' as in: | ||||
| 28798 | # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } | ||||
| 28799 | if ( $expecting eq UNKNOWN ) { | ||||
| 28800 | my $check = substr( $input_line, $pos - 2, 1 ); | ||||
| 28801 | if ( $check eq '-' ) { | ||||
| 28802 | return ( $i, $type ); | ||||
| 28803 | } | ||||
| 28804 | } | ||||
| 28805 | |||||
| 28806 | ######################################debug##### | ||||
| 28807 | #write_diagnostics( "ANGLE? :$str\n"); | ||||
| 28808 | #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; | ||||
| 28809 | ######################################debug##### | ||||
| 28810 | $type = 'Q'; | ||||
| 28811 | my $error; | ||||
| 28812 | ( $i, $error ) = | ||||
| 28813 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
| 28814 | |||||
| 28815 | # It may be possible that a quote ends midway in a pretoken. | ||||
| 28816 | # If this happens, it may be necessary to split the pretoken. | ||||
| 28817 | if ($error) { | ||||
| 28818 | warning( | ||||
| 28819 | "Possible tokinization error..please check this line\n"); | ||||
| 28820 | report_possible_bug(); | ||||
| 28821 | } | ||||
| 28822 | |||||
| 28823 | # Now let's see where we stand.... | ||||
| 28824 | # OK if math op not possible | ||||
| 28825 | if ( $expecting == TERM ) { | ||||
| 28826 | } | ||||
| 28827 | |||||
| 28828 | # OK if there are no more than 2 pre-tokens inside | ||||
| 28829 | # (not possible to write 2 token math between < and >) | ||||
| 28830 | # This catches most common cases | ||||
| 28831 | elsif ( $i <= $i_beg + 3 ) { | ||||
| 28832 | write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); | ||||
| 28833 | } | ||||
| 28834 | |||||
| 28835 | # Not sure.. | ||||
| 28836 | else { | ||||
| 28837 | |||||
| 28838 | # Let's try a Brace Test: any braces inside must balance | ||||
| 28839 | my $br = 0; | ||||
| 28840 | while ( $str =~ /\{/g ) { $br++ } | ||||
| 28841 | while ( $str =~ /\}/g ) { $br-- } | ||||
| 28842 | my $sb = 0; | ||||
| 28843 | while ( $str =~ /\[/g ) { $sb++ } | ||||
| 28844 | while ( $str =~ /\]/g ) { $sb-- } | ||||
| 28845 | my $pr = 0; | ||||
| 28846 | while ( $str =~ /\(/g ) { $pr++ } | ||||
| 28847 | while ( $str =~ /\)/g ) { $pr-- } | ||||
| 28848 | |||||
| 28849 | # if braces do not balance - not angle operator | ||||
| 28850 | if ( $br || $sb || $pr ) { | ||||
| 28851 | $i = $i_beg; | ||||
| 28852 | $type = '<'; | ||||
| 28853 | write_diagnostics( | ||||
| 28854 | "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); | ||||
| 28855 | } | ||||
| 28856 | |||||
| 28857 | # we should keep doing more checks here...to be continued | ||||
| 28858 | # Tentatively accepting this as a valid angle operator. | ||||
| 28859 | # There are lots more things that can be checked. | ||||
| 28860 | else { | ||||
| 28861 | write_diagnostics( | ||||
| 28862 | "ANGLE-Guessing yes: $str expecting=$expecting\n"); | ||||
| 28863 | write_logfile_entry("Guessing angle operator here: $str\n"); | ||||
| 28864 | } | ||||
| 28865 | } | ||||
| 28866 | } | ||||
| 28867 | |||||
| 28868 | # didn't find ending > | ||||
| 28869 | else { | ||||
| 28870 | if ( $expecting == TERM ) { | ||||
| 28871 | warning("No ending > for angle operator\n"); | ||||
| 28872 | } | ||||
| 28873 | } | ||||
| 28874 | } | ||||
| 28875 | return ( $i, $type ); | ||||
| 28876 | } | ||||
| 28877 | |||||
| 28878 | sub scan_number_do { | ||||
| 28879 | |||||
| 28880 | # scan a number in any of the formats that Perl accepts | ||||
| 28881 | # Underbars (_) are allowed in decimal numbers. | ||||
| 28882 | # input parameters - | ||||
| 28883 | # $input_line - the string to scan | ||||
| 28884 | # $i - pre_token index to start scanning | ||||
| 28885 | # $rtoken_map - reference to the pre_token map giving starting | ||||
| 28886 | # character position in $input_line of token $i | ||||
| 28887 | # output parameters - | ||||
| 28888 | # $i - last pre_token index of the number just scanned | ||||
| 28889 | # number - the number (characters); or undef if not a number | ||||
| 28890 | |||||
| 28891 | my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; | ||||
| 28892 | my $pos_beg = $$rtoken_map[$i]; | ||||
| 28893 | my $pos; | ||||
| 28894 | my $i_begin = $i; | ||||
| 28895 | my $number = undef; | ||||
| 28896 | my $type = $input_type; | ||||
| 28897 | |||||
| 28898 | my $first_char = substr( $input_line, $pos_beg, 1 ); | ||||
| 28899 | |||||
| 28900 | # Look for bad starting characters; Shouldn't happen.. | ||||
| 28901 | if ( $first_char !~ /[\d\.\+\-Ee]/ ) { | ||||
| 28902 | warning("Program bug - scan_number given character $first_char\n"); | ||||
| 28903 | report_definite_bug(); | ||||
| 28904 | return ( $i, $type, $number ); | ||||
| 28905 | } | ||||
| 28906 | |||||
| 28907 | # handle v-string without leading 'v' character ('Two Dot' rule) | ||||
| 28908 | # (vstring.t) | ||||
| 28909 | # TODO: v-strings may contain underscores | ||||
| 28910 | pos($input_line) = $pos_beg; | ||||
| 28911 | if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { | ||||
| 28912 | $pos = pos($input_line); | ||||
| 28913 | my $numc = $pos - $pos_beg; | ||||
| 28914 | $number = substr( $input_line, $pos_beg, $numc ); | ||||
| 28915 | $type = 'v'; | ||||
| 28916 | report_v_string($number); | ||||
| 28917 | } | ||||
| 28918 | |||||
| 28919 | # handle octal, hex, binary | ||||
| 28920 | if ( !defined($number) ) { | ||||
| 28921 | pos($input_line) = $pos_beg; | ||||
| 28922 | if ( $input_line =~ | ||||
| 28923 | /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) | ||||
| 28924 | { | ||||
| 28925 | $pos = pos($input_line); | ||||
| 28926 | my $numc = $pos - $pos_beg; | ||||
| 28927 | $number = substr( $input_line, $pos_beg, $numc ); | ||||
| 28928 | $type = 'n'; | ||||
| 28929 | } | ||||
| 28930 | } | ||||
| 28931 | |||||
| 28932 | # handle decimal | ||||
| 28933 | if ( !defined($number) ) { | ||||
| 28934 | pos($input_line) = $pos_beg; | ||||
| 28935 | |||||
| 28936 | if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { | ||||
| 28937 | $pos = pos($input_line); | ||||
| 28938 | |||||
| 28939 | # watch out for things like 0..40 which would give 0. by this; | ||||
| 28940 | if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) | ||||
| 28941 | && ( substr( $input_line, $pos, 1 ) eq '.' ) ) | ||||
| 28942 | { | ||||
| 28943 | $pos--; | ||||
| 28944 | } | ||||
| 28945 | my $numc = $pos - $pos_beg; | ||||
| 28946 | $number = substr( $input_line, $pos_beg, $numc ); | ||||
| 28947 | $type = 'n'; | ||||
| 28948 | } | ||||
| 28949 | } | ||||
| 28950 | |||||
| 28951 | # filter out non-numbers like e + - . e2 .e3 +e6 | ||||
| 28952 | # the rule: at least one digit, and any 'e' must be preceded by a digit | ||||
| 28953 | if ( | ||||
| 28954 | $number !~ /\d/ # no digits | ||||
| 28955 | || ( $number =~ /^(.*)[eE]/ | ||||
| 28956 | && $1 !~ /\d/ ) # or no digits before the 'e' | ||||
| 28957 | ) | ||||
| 28958 | { | ||||
| 28959 | $number = undef; | ||||
| 28960 | $type = $input_type; | ||||
| 28961 | return ( $i, $type, $number ); | ||||
| 28962 | } | ||||
| 28963 | |||||
| 28964 | # Found a number; now we must convert back from character position | ||||
| 28965 | # to pre_token index. An error here implies user syntax error. | ||||
| 28966 | # An example would be an invalid octal number like '009'. | ||||
| 28967 | my $error; | ||||
| 28968 | ( $i, $error ) = | ||||
| 28969 | inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); | ||||
| 28970 | if ($error) { warning("Possibly invalid number\n") } | ||||
| 28971 | |||||
| 28972 | return ( $i, $type, $number ); | ||||
| 28973 | } | ||||
| 28974 | |||||
| 28975 | sub inverse_pretoken_map { | ||||
| 28976 | |||||
| 28977 | # Starting with the current pre_token index $i, scan forward until | ||||
| 28978 | # finding the index of the next pre_token whose position is $pos. | ||||
| 28979 | my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; | ||||
| 28980 | my $error = 0; | ||||
| 28981 | |||||
| 28982 | while ( ++$i <= $max_token_index ) { | ||||
| 28983 | |||||
| 28984 | if ( $pos <= $$rtoken_map[$i] ) { | ||||
| 28985 | |||||
| 28986 | # Let the calling routine handle errors in which we do not | ||||
| 28987 | # land on a pre-token boundary. It can happen by running | ||||
| 28988 | # perltidy on some non-perl scripts, for example. | ||||
| 28989 | if ( $pos < $$rtoken_map[$i] ) { $error = 1 } | ||||
| 28990 | $i--; | ||||
| 28991 | last; | ||||
| 28992 | } | ||||
| 28993 | } | ||||
| 28994 | return ( $i, $error ); | ||||
| 28995 | } | ||||
| 28996 | |||||
| 28997 | sub find_here_doc { | ||||
| 28998 | |||||
| 28999 | # find the target of a here document, if any | ||||
| 29000 | # input parameters: | ||||
| 29001 | # $i - token index of the second < of << | ||||
| 29002 | # ($i must be less than the last token index if this is called) | ||||
| 29003 | # output parameters: | ||||
| 29004 | # $found_target = 0 didn't find target; =1 found target | ||||
| 29005 | # HERE_TARGET - the target string (may be empty string) | ||||
| 29006 | # $i - unchanged if not here doc, | ||||
| 29007 | # or index of the last token of the here target | ||||
| 29008 | # $saw_error - flag noting unbalanced quote on here target | ||||
| 29009 | my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; | ||||
| 29010 | my $ibeg = $i; | ||||
| 29011 | my $found_target = 0; | ||||
| 29012 | my $here_doc_target = ''; | ||||
| 29013 | my $here_quote_character = ''; | ||||
| 29014 | my $saw_error = 0; | ||||
| 29015 | my ( $next_nonblank_token, $i_next_nonblank, $next_token ); | ||||
| 29016 | $next_token = $$rtokens[ $i + 1 ]; | ||||
| 29017 | |||||
| 29018 | # perl allows a backslash before the target string (heredoc.t) | ||||
| 29019 | my $backslash = 0; | ||||
| 29020 | if ( $next_token eq '\\' ) { | ||||
| 29021 | $backslash = 1; | ||||
| 29022 | $next_token = $$rtokens[ $i + 2 ]; | ||||
| 29023 | } | ||||
| 29024 | |||||
| 29025 | ( $next_nonblank_token, $i_next_nonblank ) = | ||||
| 29026 | find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); | ||||
| 29027 | |||||
| 29028 | if ( $next_nonblank_token =~ /[\'\"\`]/ ) { | ||||
| 29029 | |||||
| 29030 | my $in_quote = 1; | ||||
| 29031 | my $quote_depth = 0; | ||||
| 29032 | my $quote_pos = 0; | ||||
| 29033 | my $quoted_string; | ||||
| 29034 | |||||
| 29035 | ( | ||||
| 29036 | $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, | ||||
| 29037 | $quoted_string | ||||
| 29038 | ) | ||||
| 29039 | = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, | ||||
| 29040 | $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); | ||||
| 29041 | |||||
| 29042 | if ($in_quote) { # didn't find end of quote, so no target found | ||||
| 29043 | $i = $ibeg; | ||||
| 29044 | if ( $expecting == TERM ) { | ||||
| 29045 | warning( | ||||
| 29046 | "Did not find here-doc string terminator ($here_quote_character) before end of line \n" | ||||
| 29047 | ); | ||||
| 29048 | $saw_error = 1; | ||||
| 29049 | } | ||||
| 29050 | } | ||||
| 29051 | else { # found ending quote | ||||
| 29052 | my $j; | ||||
| 29053 | $found_target = 1; | ||||
| 29054 | |||||
| 29055 | my $tokj; | ||||
| 29056 | for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { | ||||
| 29057 | $tokj = $$rtokens[$j]; | ||||
| 29058 | |||||
| 29059 | # we have to remove any backslash before the quote character | ||||
| 29060 | # so that the here-doc-target exactly matches this string | ||||
| 29061 | next | ||||
| 29062 | if ( $tokj eq "\\" | ||||
| 29063 | && $j < $i - 1 | ||||
| 29064 | && $$rtokens[ $j + 1 ] eq $here_quote_character ); | ||||
| 29065 | $here_doc_target .= $tokj; | ||||
| 29066 | } | ||||
| 29067 | } | ||||
| 29068 | } | ||||
| 29069 | |||||
| 29070 | elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { | ||||
| 29071 | $found_target = 1; | ||||
| 29072 | write_logfile_entry( | ||||
| 29073 | "found blank here-target after <<; suggest using \"\"\n"); | ||||
| 29074 | $i = $ibeg; | ||||
| 29075 | } | ||||
| 29076 | elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << | ||||
| 29077 | |||||
| 29078 | my $here_doc_expected; | ||||
| 29079 | if ( $expecting == UNKNOWN ) { | ||||
| 29080 | $here_doc_expected = guess_if_here_doc($next_token); | ||||
| 29081 | } | ||||
| 29082 | else { | ||||
| 29083 | $here_doc_expected = 1; | ||||
| 29084 | } | ||||
| 29085 | |||||
| 29086 | if ($here_doc_expected) { | ||||
| 29087 | $found_target = 1; | ||||
| 29088 | $here_doc_target = $next_token; | ||||
| 29089 | $i = $ibeg + 1; | ||||
| 29090 | } | ||||
| 29091 | |||||
| 29092 | } | ||||
| 29093 | else { | ||||
| 29094 | |||||
| 29095 | if ( $expecting == TERM ) { | ||||
| 29096 | $found_target = 1; | ||||
| 29097 | write_logfile_entry("Note: bare here-doc operator <<\n"); | ||||
| 29098 | } | ||||
| 29099 | else { | ||||
| 29100 | $i = $ibeg; | ||||
| 29101 | } | ||||
| 29102 | } | ||||
| 29103 | |||||
| 29104 | # patch to neglect any prepended backslash | ||||
| 29105 | if ( $found_target && $backslash ) { $i++ } | ||||
| 29106 | |||||
| 29107 | return ( $found_target, $here_doc_target, $here_quote_character, $i, | ||||
| 29108 | $saw_error ); | ||||
| 29109 | } | ||||
| 29110 | |||||
| 29111 | sub do_quote { | ||||
| 29112 | |||||
| 29113 | # follow (or continue following) quoted string(s) | ||||
| 29114 | # $in_quote return code: | ||||
| 29115 | # 0 - ok, found end | ||||
| 29116 | # 1 - still must find end of quote whose target is $quote_character | ||||
| 29117 | # 2 - still looking for end of first of two quotes | ||||
| 29118 | # | ||||
| 29119 | # Returns updated strings: | ||||
| 29120 | # $quoted_string_1 = quoted string seen while in_quote=1 | ||||
| 29121 | # $quoted_string_2 = quoted string seen while in_quote=2 | ||||
| 29122 | my ( | ||||
| 29123 | $i, $in_quote, $quote_character, | ||||
| 29124 | $quote_pos, $quote_depth, $quoted_string_1, | ||||
| 29125 | $quoted_string_2, $rtokens, $rtoken_map, | ||||
| 29126 | $max_token_index | ||||
| 29127 | ) = @_; | ||||
| 29128 | |||||
| 29129 | my $in_quote_starting = $in_quote; | ||||
| 29130 | |||||
| 29131 | my $quoted_string; | ||||
| 29132 | if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow | ||||
| 29133 | my $ibeg = $i; | ||||
| 29134 | ( | ||||
| 29135 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
| 29136 | $quoted_string | ||||
| 29137 | ) | ||||
| 29138 | = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, | ||||
| 29139 | $quote_pos, $quote_depth, $max_token_index ); | ||||
| 29140 | $quoted_string_2 .= $quoted_string; | ||||
| 29141 | if ( $in_quote == 1 ) { | ||||
| 29142 | if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } | ||||
| 29143 | $quote_character = ''; | ||||
| 29144 | } | ||||
| 29145 | else { | ||||
| 29146 | $quoted_string_2 .= "\n"; | ||||
| 29147 | } | ||||
| 29148 | } | ||||
| 29149 | |||||
| 29150 | if ( $in_quote == 1 ) { # one (more) quote to follow | ||||
| 29151 | my $ibeg = $i; | ||||
| 29152 | ( | ||||
| 29153 | $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
| 29154 | $quoted_string | ||||
| 29155 | ) | ||||
| 29156 | = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, | ||||
| 29157 | $quote_pos, $quote_depth, $max_token_index ); | ||||
| 29158 | $quoted_string_1 .= $quoted_string; | ||||
| 29159 | if ( $in_quote == 1 ) { | ||||
| 29160 | $quoted_string_1 .= "\n"; | ||||
| 29161 | } | ||||
| 29162 | } | ||||
| 29163 | return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, | ||||
| 29164 | $quoted_string_1, $quoted_string_2 ); | ||||
| 29165 | } | ||||
| 29166 | |||||
| 29167 | sub follow_quoted_string { | ||||
| 29168 | |||||
| 29169 | # scan for a specific token, skipping escaped characters | ||||
| 29170 | # if the quote character is blank, use the first non-blank character | ||||
| 29171 | # input parameters: | ||||
| 29172 | # $rtokens = reference to the array of tokens | ||||
| 29173 | # $i = the token index of the first character to search | ||||
| 29174 | # $in_quote = number of quoted strings being followed | ||||
| 29175 | # $beginning_tok = the starting quote character | ||||
| 29176 | # $quote_pos = index to check next for alphanumeric delimiter | ||||
| 29177 | # output parameters: | ||||
| 29178 | # $i = the token index of the ending quote character | ||||
| 29179 | # $in_quote = decremented if found end, unchanged if not | ||||
| 29180 | # $beginning_tok = the starting quote character | ||||
| 29181 | # $quote_pos = index to check next for alphanumeric delimiter | ||||
| 29182 | # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. | ||||
| 29183 | # $quoted_string = the text of the quote (without quotation tokens) | ||||
| 29184 | my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, | ||||
| 29185 | $max_token_index ) | ||||
| 29186 | = @_; | ||||
| 29187 | my ( $tok, $end_tok ); | ||||
| 29188 | my $i = $i_beg - 1; | ||||
| 29189 | my $quoted_string = ""; | ||||
| 29190 | |||||
| 29191 | TOKENIZER_DEBUG_FLAG_QUOTE && do { | ||||
| 29192 | print STDOUT | ||||
| 29193 | "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; | ||||
| 29194 | }; | ||||
| 29195 | |||||
| 29196 | # get the corresponding end token | ||||
| 29197 | if ( $beginning_tok !~ /^\s*$/ ) { | ||||
| 29198 | $end_tok = matching_end_token($beginning_tok); | ||||
| 29199 | } | ||||
| 29200 | |||||
| 29201 | # a blank token means we must find and use the first non-blank one | ||||
| 29202 | else { | ||||
| 29203 | my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> | ||||
| 29204 | |||||
| 29205 | while ( $i < $max_token_index ) { | ||||
| 29206 | $tok = $$rtokens[ ++$i ]; | ||||
| 29207 | |||||
| 29208 | if ( $tok !~ /^\s*$/ ) { | ||||
| 29209 | |||||
| 29210 | if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { | ||||
| 29211 | $i = $max_token_index; | ||||
| 29212 | } | ||||
| 29213 | else { | ||||
| 29214 | |||||
| 29215 | if ( length($tok) > 1 ) { | ||||
| 29216 | if ( $quote_pos <= 0 ) { $quote_pos = 1 } | ||||
| 29217 | $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); | ||||
| 29218 | } | ||||
| 29219 | else { | ||||
| 29220 | $beginning_tok = $tok; | ||||
| 29221 | $quote_pos = 0; | ||||
| 29222 | } | ||||
| 29223 | $end_tok = matching_end_token($beginning_tok); | ||||
| 29224 | $quote_depth = 1; | ||||
| 29225 | last; | ||||
| 29226 | } | ||||
| 29227 | } | ||||
| 29228 | else { | ||||
| 29229 | $allow_quote_comments = 1; | ||||
| 29230 | } | ||||
| 29231 | } | ||||
| 29232 | } | ||||
| 29233 | |||||
| 29234 | # There are two different loops which search for the ending quote | ||||
| 29235 | # character. In the rare case of an alphanumeric quote delimiter, we | ||||
| 29236 | # have to look through alphanumeric tokens character-by-character, since | ||||
| 29237 | # the pre-tokenization process combines multiple alphanumeric | ||||
| 29238 | # characters, whereas for a non-alphanumeric delimiter, only tokens of | ||||
| 29239 | # length 1 can match. | ||||
| 29240 | |||||
| 29241 | ################################################################### | ||||
| 29242 | # Case 1 (rare): loop for case of alphanumeric quote delimiter.. | ||||
| 29243 | # "quote_pos" is the position the current word to begin searching | ||||
| 29244 | ################################################################### | ||||
| 29245 | if ( $beginning_tok =~ /\w/ ) { | ||||
| 29246 | |||||
| 29247 | # Note this because it is not recommended practice except | ||||
| 29248 | # for obfuscated perl contests | ||||
| 29249 | if ( $in_quote == 1 ) { | ||||
| 29250 | write_logfile_entry( | ||||
| 29251 | "Note: alphanumeric quote delimiter ($beginning_tok) \n"); | ||||
| 29252 | } | ||||
| 29253 | |||||
| 29254 | while ( $i < $max_token_index ) { | ||||
| 29255 | |||||
| 29256 | if ( $quote_pos == 0 || ( $i < 0 ) ) { | ||||
| 29257 | $tok = $$rtokens[ ++$i ]; | ||||
| 29258 | |||||
| 29259 | if ( $tok eq '\\' ) { | ||||
| 29260 | |||||
| 29261 | # retain backslash unless it hides the end token | ||||
| 29262 | $quoted_string .= $tok | ||||
| 29263 | unless $$rtokens[ $i + 1 ] eq $end_tok; | ||||
| 29264 | $quote_pos++; | ||||
| 29265 | last if ( $i >= $max_token_index ); | ||||
| 29266 | $tok = $$rtokens[ ++$i ]; | ||||
| 29267 | } | ||||
| 29268 | } | ||||
| 29269 | my $old_pos = $quote_pos; | ||||
| 29270 | |||||
| 29271 | unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) | ||||
| 29272 | { | ||||
| 29273 | |||||
| 29274 | } | ||||
| 29275 | $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); | ||||
| 29276 | |||||
| 29277 | if ( $quote_pos > 0 ) { | ||||
| 29278 | |||||
| 29279 | $quoted_string .= | ||||
| 29280 | substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); | ||||
| 29281 | |||||
| 29282 | $quote_depth--; | ||||
| 29283 | |||||
| 29284 | if ( $quote_depth == 0 ) { | ||||
| 29285 | $in_quote--; | ||||
| 29286 | last; | ||||
| 29287 | } | ||||
| 29288 | } | ||||
| 29289 | else { | ||||
| 29290 | $quoted_string .= substr( $tok, $old_pos ); | ||||
| 29291 | } | ||||
| 29292 | } | ||||
| 29293 | } | ||||
| 29294 | |||||
| 29295 | ######################################################################## | ||||
| 29296 | # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. | ||||
| 29297 | ######################################################################## | ||||
| 29298 | else { | ||||
| 29299 | |||||
| 29300 | while ( $i < $max_token_index ) { | ||||
| 29301 | $tok = $$rtokens[ ++$i ]; | ||||
| 29302 | |||||
| 29303 | if ( $tok eq $end_tok ) { | ||||
| 29304 | $quote_depth--; | ||||
| 29305 | |||||
| 29306 | if ( $quote_depth == 0 ) { | ||||
| 29307 | $in_quote--; | ||||
| 29308 | last; | ||||
| 29309 | } | ||||
| 29310 | } | ||||
| 29311 | elsif ( $tok eq $beginning_tok ) { | ||||
| 29312 | $quote_depth++; | ||||
| 29313 | } | ||||
| 29314 | elsif ( $tok eq '\\' ) { | ||||
| 29315 | |||||
| 29316 | # retain backslash unless it hides the beginning or end token | ||||
| 29317 | $tok = $$rtokens[ ++$i ]; | ||||
| 29318 | $quoted_string .= '\\' | ||||
| 29319 | unless ( $tok eq $end_tok || $tok eq $beginning_tok ); | ||||
| 29320 | } | ||||
| 29321 | $quoted_string .= $tok; | ||||
| 29322 | } | ||||
| 29323 | } | ||||
| 29324 | if ( $i > $max_token_index ) { $i = $max_token_index } | ||||
| 29325 | return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, | ||||
| 29326 | $quoted_string ); | ||||
| 29327 | } | ||||
| 29328 | |||||
| 29329 | sub indicate_error { | ||||
| 29330 | my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; | ||||
| 29331 | interrupt_logfile(); | ||||
| 29332 | warning($msg); | ||||
| 29333 | write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); | ||||
| 29334 | resume_logfile(); | ||||
| 29335 | } | ||||
| 29336 | |||||
| 29337 | sub write_error_indicator_pair { | ||||
| 29338 | my ( $line_number, $input_line, $pos, $carrat ) = @_; | ||||
| 29339 | my ( $offset, $numbered_line, $underline ) = | ||||
| 29340 | make_numbered_line( $line_number, $input_line, $pos ); | ||||
| 29341 | $underline = write_on_underline( $underline, $pos - $offset, $carrat ); | ||||
| 29342 | warning( $numbered_line . "\n" ); | ||||
| 29343 | $underline =~ s/\s*$//; | ||||
| 29344 | warning( $underline . "\n" ); | ||||
| 29345 | } | ||||
| 29346 | |||||
| 29347 | sub make_numbered_line { | ||||
| 29348 | |||||
| 29349 | # Given an input line, its line number, and a character position of | ||||
| 29350 | # interest, create a string not longer than 80 characters of the form | ||||
| 29351 | # $lineno: sub_string | ||||
| 29352 | # such that the sub_string of $str contains the position of interest | ||||
| 29353 | # | ||||
| 29354 | # Here is an example of what we want, in this case we add trailing | ||||
| 29355 | # '...' because the line is long. | ||||
| 29356 | # | ||||
| 29357 | # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... | ||||
| 29358 | # | ||||
| 29359 | # Here is another example, this time in which we used leading '...' | ||||
| 29360 | # because of excessive length: | ||||
| 29361 | # | ||||
| 29362 | # 2: ... er of the World Wide Web Consortium's | ||||
| 29363 | # | ||||
| 29364 | # input parameters are: | ||||
| 29365 | # $lineno = line number | ||||
| 29366 | # $str = the text of the line | ||||
| 29367 | # $pos = position of interest (the error) : 0 = first character | ||||
| 29368 | # | ||||
| 29369 | # We return : | ||||
| 29370 | # - $offset = an offset which corrects the position in case we only | ||||
| 29371 | # display part of a line, such that $pos-$offset is the effective | ||||
| 29372 | # position from the start of the displayed line. | ||||
| 29373 | # - $numbered_line = the numbered line as above, | ||||
| 29374 | # - $underline = a blank 'underline' which is all spaces with the same | ||||
| 29375 | # number of characters as the numbered line. | ||||
| 29376 | |||||
| 29377 | my ( $lineno, $str, $pos ) = @_; | ||||
| 29378 | my $offset = ( $pos < 60 ) ? 0 : $pos - 40; | ||||
| 29379 | my $excess = length($str) - $offset - 68; | ||||
| 29380 | my $numc = ( $excess > 0 ) ? 68 : undef; | ||||
| 29381 | |||||
| 29382 | if ( defined($numc) ) { | ||||
| 29383 | if ( $offset == 0 ) { | ||||
| 29384 | $str = substr( $str, $offset, $numc - 4 ) . " ..."; | ||||
| 29385 | } | ||||
| 29386 | else { | ||||
| 29387 | $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; | ||||
| 29388 | } | ||||
| 29389 | } | ||||
| 29390 | else { | ||||
| 29391 | |||||
| 29392 | if ( $offset == 0 ) { | ||||
| 29393 | } | ||||
| 29394 | else { | ||||
| 29395 | $str = "... " . substr( $str, $offset + 4 ); | ||||
| 29396 | } | ||||
| 29397 | } | ||||
| 29398 | |||||
| 29399 | my $numbered_line = sprintf( "%d: ", $lineno ); | ||||
| 29400 | $offset -= length($numbered_line); | ||||
| 29401 | $numbered_line .= $str; | ||||
| 29402 | my $underline = " " x length($numbered_line); | ||||
| 29403 | return ( $offset, $numbered_line, $underline ); | ||||
| 29404 | } | ||||
| 29405 | |||||
| 29406 | sub write_on_underline { | ||||
| 29407 | |||||
| 29408 | # The "underline" is a string that shows where an error is; it starts | ||||
| 29409 | # out as a string of blanks with the same length as the numbered line of | ||||
| 29410 | # code above it, and we have to add marking to show where an error is. | ||||
| 29411 | # In the example below, we want to write the string '--^' just below | ||||
| 29412 | # the line of bad code: | ||||
| 29413 | # | ||||
| 29414 | # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... | ||||
| 29415 | # ---^ | ||||
| 29416 | # We are given the current underline string, plus a position and a | ||||
| 29417 | # string to write on it. | ||||
| 29418 | # | ||||
| 29419 | # In the above example, there will be 2 calls to do this: | ||||
| 29420 | # First call: $pos=19, pos_chr=^ | ||||
| 29421 | # Second call: $pos=16, pos_chr=--- | ||||
| 29422 | # | ||||
| 29423 | # This is a trivial thing to do with substr, but there is some | ||||
| 29424 | # checking to do. | ||||
| 29425 | |||||
| 29426 | my ( $underline, $pos, $pos_chr ) = @_; | ||||
| 29427 | |||||
| 29428 | # check for error..shouldn't happen | ||||
| 29429 | unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { | ||||
| 29430 | return $underline; | ||||
| 29431 | } | ||||
| 29432 | my $excess = length($pos_chr) + $pos - length($underline); | ||||
| 29433 | if ( $excess > 0 ) { | ||||
| 29434 | $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); | ||||
| 29435 | } | ||||
| 29436 | substr( $underline, $pos, length($pos_chr) ) = $pos_chr; | ||||
| 29437 | return ($underline); | ||||
| 29438 | } | ||||
| 29439 | |||||
| 29440 | sub pre_tokenize { | ||||
| 29441 | |||||
| 29442 | # Break a string, $str, into a sequence of preliminary tokens. We | ||||
| 29443 | # are interested in these types of tokens: | ||||
| 29444 | # words (type='w'), example: 'max_tokens_wanted' | ||||
| 29445 | # digits (type = 'd'), example: '0755' | ||||
| 29446 | # whitespace (type = 'b'), example: ' ' | ||||
| 29447 | # any other single character (i.e. punct; type = the character itself). | ||||
| 29448 | # We cannot do better than this yet because we might be in a quoted | ||||
| 29449 | # string or pattern. Caller sets $max_tokens_wanted to 0 to get all | ||||
| 29450 | # tokens. | ||||
| 29451 | my ( $str, $max_tokens_wanted ) = @_; | ||||
| 29452 | |||||
| 29453 | # we return references to these 3 arrays: | ||||
| 29454 | my @tokens = (); # array of the tokens themselves | ||||
| 29455 | my @token_map = (0); # string position of start of each token | ||||
| 29456 | my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct | ||||
| 29457 | |||||
| 29458 | do { | ||||
| 29459 | |||||
| 29460 | # whitespace | ||||
| 29461 | if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } | ||||
| 29462 | |||||
| 29463 | # numbers | ||||
| 29464 | # note that this must come before words! | ||||
| 29465 | elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } | ||||
| 29466 | |||||
| 29467 | # words | ||||
| 29468 | elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } | ||||
| 29469 | |||||
| 29470 | # single-character punctuation | ||||
| 29471 | elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } | ||||
| 29472 | |||||
| 29473 | # that's all.. | ||||
| 29474 | else { | ||||
| 29475 | return ( \@tokens, \@token_map, \@type ); | ||||
| 29476 | } | ||||
| 29477 | |||||
| 29478 | push @tokens, $1; | ||||
| 29479 | push @token_map, pos($str); | ||||
| 29480 | |||||
| 29481 | } while ( --$max_tokens_wanted != 0 ); | ||||
| 29482 | |||||
| 29483 | return ( \@tokens, \@token_map, \@type ); | ||||
| 29484 | } | ||||
| 29485 | |||||
| 29486 | sub show_tokens { | ||||
| 29487 | |||||
| 29488 | # this is an old debug routine | ||||
| 29489 | my ( $rtokens, $rtoken_map ) = @_; | ||||
| 29490 | my $num = scalar(@$rtokens); | ||||
| 29491 | my $i; | ||||
| 29492 | |||||
| 29493 | for ( $i = 0 ; $i < $num ; $i++ ) { | ||||
| 29494 | my $len = length( $$rtokens[$i] ); | ||||
| 29495 | print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; | ||||
| 29496 | } | ||||
| 29497 | } | ||||
| 29498 | |||||
| 29499 | sub matching_end_token { | ||||
| 29500 | |||||
| 29501 | # find closing character for a pattern | ||||
| 29502 | my $beginning_token = shift; | ||||
| 29503 | |||||
| 29504 | if ( $beginning_token eq '{' ) { | ||||
| 29505 | '}'; | ||||
| 29506 | } | ||||
| 29507 | elsif ( $beginning_token eq '[' ) { | ||||
| 29508 | ']'; | ||||
| 29509 | } | ||||
| 29510 | elsif ( $beginning_token eq '<' ) { | ||||
| 29511 | '>'; | ||||
| 29512 | } | ||||
| 29513 | elsif ( $beginning_token eq '(' ) { | ||||
| 29514 | ')'; | ||||
| 29515 | } | ||||
| 29516 | else { | ||||
| 29517 | $beginning_token; | ||||
| 29518 | } | ||||
| 29519 | } | ||||
| 29520 | |||||
| 29521 | sub dump_token_types { | ||||
| 29522 | my $class = shift; | ||||
| 29523 | my $fh = shift; | ||||
| 29524 | |||||
| 29525 | # This should be the latest list of token types in use | ||||
| 29526 | # adding NEW_TOKENS: add a comment here | ||||
| 29527 | print $fh <<'END_OF_LIST'; | ||||
| 29528 | |||||
| 29529 | Here is a list of the token types currently used for lines of type 'CODE'. | ||||
| 29530 | For the following tokens, the "type" of a token is just the token itself. | ||||
| 29531 | |||||
| 29532 | .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> | ||||
| 29533 | ( ) <= >= == =~ !~ != ++ -- /= x= | ||||
| 29534 | ... **= <<= >>= &&= ||= //= <=> | ||||
| 29535 | , + - / * | % ! x ~ = \ ? : . < > ^ & | ||||
| 29536 | |||||
| 29537 | The following additional token types are defined: | ||||
| 29538 | |||||
| 29539 | type meaning | ||||
| 29540 | b blank (white space) | ||||
| 29541 | { indent: opening structural curly brace or square bracket or paren | ||||
| 29542 | (code block, anonymous hash reference, or anonymous array reference) | ||||
| 29543 | } outdent: right structural curly brace or square bracket or paren | ||||
| 29544 | [ left non-structural square bracket (enclosing an array index) | ||||
| 29545 | ] right non-structural square bracket | ||||
| 29546 | ( left non-structural paren (all but a list right of an =) | ||||
| 29547 | ) right non-structural paren | ||||
| 29548 | L left non-structural curly brace (enclosing a key) | ||||
| 29549 | R right non-structural curly brace | ||||
| 29550 | ; terminal semicolon | ||||
| 29551 | f indicates a semicolon in a "for" statement | ||||
| 29552 | h here_doc operator << | ||||
| 29553 | # a comment | ||||
| 29554 | Q indicates a quote or pattern | ||||
| 29555 | q indicates a qw quote block | ||||
| 29556 | k a perl keyword | ||||
| 29557 | C user-defined constant or constant function (with void prototype = ()) | ||||
| 29558 | U user-defined function taking parameters | ||||
| 29559 | G user-defined function taking block parameter (like grep/map/eval) | ||||
| 29560 | M (unused, but reserved for subroutine definition name) | ||||
| 29561 | P (unused, but -html uses it to label pod text) | ||||
| 29562 | t type indicater such as %,$,@,*,&,sub | ||||
| 29563 | w bare word (perhaps a subroutine call) | ||||
| 29564 | i identifier of some type (with leading %, $, @, *, &, sub, -> ) | ||||
| 29565 | n a number | ||||
| 29566 | v a v-string | ||||
| 29567 | F a file test operator (like -e) | ||||
| 29568 | Y File handle | ||||
| 29569 | Z identifier in indirect object slot: may be file handle, object | ||||
| 29570 | J LABEL: code block label | ||||
| 29571 | j LABEL after next, last, redo, goto | ||||
| 29572 | p unary + | ||||
| 29573 | m unary - | ||||
| 29574 | pp pre-increment operator ++ | ||||
| 29575 | mm pre-decrement operator -- | ||||
| 29576 | A : used as attribute separator | ||||
| 29577 | |||||
| 29578 | Here are the '_line_type' codes used internally: | ||||
| 29579 | SYSTEM - system-specific code before hash-bang line | ||||
| 29580 | CODE - line of perl code (including comments) | ||||
| 29581 | POD_START - line starting pod, such as '=head' | ||||
| 29582 | POD - pod documentation text | ||||
| 29583 | POD_END - last line of pod section, '=cut' | ||||
| 29584 | HERE - text of here-document | ||||
| 29585 | HERE_END - last line of here-doc (target word) | ||||
| 29586 | FORMAT - format section | ||||
| 29587 | FORMAT_END - last line of format section, '.' | ||||
| 29588 | DATA_START - __DATA__ line | ||||
| 29589 | DATA - unidentified text following __DATA__ | ||||
| 29590 | END_START - __END__ line | ||||
| 29591 | END - unidentified text following __END__ | ||||
| 29592 | ERROR - we are in big trouble, probably not a perl script | ||||
| 29593 | END_OF_LIST | ||||
| 29594 | } | ||||
| 29595 | |||||
| 29596 | # spent 484µs within Perl::Tidy::Tokenizer::BEGIN@29596 which was called:
# once (484µs+0s) by Perl::Critic::Policy::CodeLayout::RequireTidyCode::BEGIN@18 at line 30087 | ||||
| 29597 | |||||
| 29598 | # These names are used in error messages | ||||
| 29599 | 1 | 2µs | @opening_brace_names = qw# '{' '[' '(' '?' #; | ||
| 29600 | 1 | 4µs | @closing_brace_names = qw# '}' ']' ')' ':' #; | ||
| 29601 | |||||
| 29602 | 1 | 4µs | my @digraphs = qw( | ||
| 29603 | .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> | ||||
| 29604 | <= >= == =~ !~ != ++ -- /= x= ~~ | ||||
| 29605 | ); | ||||
| 29606 | 1 | 13µs | @is_digraph{@digraphs} = (1) x scalar(@digraphs); | ||
| 29607 | |||||
| 29608 | 1 | 1µs | my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); | ||
| 29609 | 1 | 4µs | @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); | ||
| 29610 | |||||
| 29611 | # make a hash of all valid token types for self-checking the tokenizer | ||||
| 29612 | # (adding NEW_TOKENS : select a new character and add to this list) | ||||
| 29613 | 1 | 6µs | my @valid_token_types = qw# | ||
| 29614 | A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v | ||||
| 29615 | { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & | ||||
| 29616 | #; | ||||
| 29617 | 1 | 9µs | push( @valid_token_types, @digraphs ); | ||
| 29618 | 1 | 1µs | push( @valid_token_types, @trigraphs ); | ||
| 29619 | 1 | 600ns | push( @valid_token_types, ( '#', ',', 'CORE::' ) ); | ||
| 29620 | 1 | 28µs | @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); | ||
| 29621 | |||||
| 29622 | # a list of file test letters, as in -e (Table 3-4 of 'camel 3') | ||||
| 29623 | 1 | 4µs | my @file_test_operators = | ||
| 29624 | qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); | ||||
| 29625 | 1 | 10µs | @is_file_test_operator{@file_test_operators} = | ||
| 29626 | (1) x scalar(@file_test_operators); | ||||
| 29627 | |||||
| 29628 | # these functions have prototypes of the form (&), so when they are | ||||
| 29629 | # followed by a block, that block MAY BE followed by an operator. | ||||
| 29630 | # Smartmatch operator ~~ may be followed by anonymous hash or array ref | ||||
| 29631 | 1 | 800ns | @_ = qw( do eval ); | ||
| 29632 | 1 | 1µs | @is_block_operator{@_} = (1) x scalar(@_); | ||
| 29633 | |||||
| 29634 | # these functions allow an identifier in the indirect object slot | ||||
| 29635 | 1 | 2µs | @_ = qw( print printf sort exec system say); | ||
| 29636 | 1 | 3µs | @is_indirect_object_taker{@_} = (1) x scalar(@_); | ||
| 29637 | |||||
| 29638 | # These tokens may precede a code block | ||||
| 29639 | # patched for SWITCH/CASE | ||||
| 29640 | 1 | 5µs | @_ = | ||
| 29641 | qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else | ||||
| 29642 | unless do while until eval for foreach map grep sort | ||||
| 29643 | switch case given when); | ||||
| 29644 | 1 | 9µs | @is_code_block_token{@_} = (1) x scalar(@_); | ||
| 29645 | |||||
| 29646 | # I'll build the list of keywords incrementally | ||||
| 29647 | 1 | 200ns | my @Keywords = (); | ||
| 29648 | |||||
| 29649 | # keywords and tokens after which a value or pattern is expected, | ||||
| 29650 | # but not an operator. In other words, these should consume terms | ||||
| 29651 | # to their right, or at least they are not expected to be followed | ||||
| 29652 | # immediately by operators. | ||||
| 29653 | 1 | 31µs | my @value_requestor = qw( | ||
| 29654 | AUTOLOAD | ||||
| 29655 | BEGIN | ||||
| 29656 | CHECK | ||||
| 29657 | DESTROY | ||||
| 29658 | END | ||||
| 29659 | EQ | ||||
| 29660 | GE | ||||
| 29661 | GT | ||||
| 29662 | INIT | ||||
| 29663 | LE | ||||
| 29664 | LT | ||||
| 29665 | NE | ||||
| 29666 | UNITCHECK | ||||
| 29667 | abs | ||||
| 29668 | accept | ||||
| 29669 | alarm | ||||
| 29670 | and | ||||
| 29671 | atan2 | ||||
| 29672 | bind | ||||
| 29673 | binmode | ||||
| 29674 | bless | ||||
| 29675 | break | ||||
| 29676 | caller | ||||
| 29677 | chdir | ||||
| 29678 | chmod | ||||
| 29679 | chomp | ||||
| 29680 | chop | ||||
| 29681 | chown | ||||
| 29682 | chr | ||||
| 29683 | chroot | ||||
| 29684 | close | ||||
| 29685 | closedir | ||||
| 29686 | cmp | ||||
| 29687 | connect | ||||
| 29688 | continue | ||||
| 29689 | cos | ||||
| 29690 | crypt | ||||
| 29691 | dbmclose | ||||
| 29692 | dbmopen | ||||
| 29693 | defined | ||||
| 29694 | delete | ||||
| 29695 | die | ||||
| 29696 | dump | ||||
| 29697 | each | ||||
| 29698 | else | ||||
| 29699 | elsif | ||||
| 29700 | eof | ||||
| 29701 | eq | ||||
| 29702 | exec | ||||
| 29703 | exists | ||||
| 29704 | exit | ||||
| 29705 | exp | ||||
| 29706 | fcntl | ||||
| 29707 | fileno | ||||
| 29708 | flock | ||||
| 29709 | for | ||||
| 29710 | foreach | ||||
| 29711 | formline | ||||
| 29712 | ge | ||||
| 29713 | getc | ||||
| 29714 | getgrgid | ||||
| 29715 | getgrnam | ||||
| 29716 | gethostbyaddr | ||||
| 29717 | gethostbyname | ||||
| 29718 | getnetbyaddr | ||||
| 29719 | getnetbyname | ||||
| 29720 | getpeername | ||||
| 29721 | getpgrp | ||||
| 29722 | getpriority | ||||
| 29723 | getprotobyname | ||||
| 29724 | getprotobynumber | ||||
| 29725 | getpwnam | ||||
| 29726 | getpwuid | ||||
| 29727 | getservbyname | ||||
| 29728 | getservbyport | ||||
| 29729 | getsockname | ||||
| 29730 | getsockopt | ||||
| 29731 | glob | ||||
| 29732 | gmtime | ||||
| 29733 | goto | ||||
| 29734 | grep | ||||
| 29735 | gt | ||||
| 29736 | hex | ||||
| 29737 | if | ||||
| 29738 | index | ||||
| 29739 | int | ||||
| 29740 | ioctl | ||||
| 29741 | join | ||||
| 29742 | keys | ||||
| 29743 | kill | ||||
| 29744 | last | ||||
| 29745 | lc | ||||
| 29746 | lcfirst | ||||
| 29747 | le | ||||
| 29748 | length | ||||
| 29749 | link | ||||
| 29750 | listen | ||||
| 29751 | local | ||||
| 29752 | localtime | ||||
| 29753 | lock | ||||
| 29754 | log | ||||
| 29755 | lstat | ||||
| 29756 | lt | ||||
| 29757 | map | ||||
| 29758 | mkdir | ||||
| 29759 | msgctl | ||||
| 29760 | msgget | ||||
| 29761 | msgrcv | ||||
| 29762 | msgsnd | ||||
| 29763 | my | ||||
| 29764 | ne | ||||
| 29765 | next | ||||
| 29766 | no | ||||
| 29767 | not | ||||
| 29768 | oct | ||||
| 29769 | open | ||||
| 29770 | opendir | ||||
| 29771 | or | ||||
| 29772 | ord | ||||
| 29773 | our | ||||
| 29774 | pack | ||||
| 29775 | pipe | ||||
| 29776 | pop | ||||
| 29777 | pos | ||||
| 29778 | |||||
| 29779 | printf | ||||
| 29780 | prototype | ||||
| 29781 | push | ||||
| 29782 | quotemeta | ||||
| 29783 | rand | ||||
| 29784 | read | ||||
| 29785 | readdir | ||||
| 29786 | readlink | ||||
| 29787 | readline | ||||
| 29788 | readpipe | ||||
| 29789 | recv | ||||
| 29790 | redo | ||||
| 29791 | ref | ||||
| 29792 | rename | ||||
| 29793 | require | ||||
| 29794 | reset | ||||
| 29795 | return | ||||
| 29796 | reverse | ||||
| 29797 | rewinddir | ||||
| 29798 | rindex | ||||
| 29799 | rmdir | ||||
| 29800 | scalar | ||||
| 29801 | seek | ||||
| 29802 | seekdir | ||||
| 29803 | select | ||||
| 29804 | semctl | ||||
| 29805 | semget | ||||
| 29806 | semop | ||||
| 29807 | send | ||||
| 29808 | sethostent | ||||
| 29809 | setnetent | ||||
| 29810 | setpgrp | ||||
| 29811 | setpriority | ||||
| 29812 | setprotoent | ||||
| 29813 | setservent | ||||
| 29814 | setsockopt | ||||
| 29815 | shift | ||||
| 29816 | shmctl | ||||
| 29817 | shmget | ||||
| 29818 | shmread | ||||
| 29819 | shmwrite | ||||
| 29820 | shutdown | ||||
| 29821 | sin | ||||
| 29822 | sleep | ||||
| 29823 | socket | ||||
| 29824 | socketpair | ||||
| 29825 | sort | ||||
| 29826 | splice | ||||
| 29827 | split | ||||
| 29828 | sprintf | ||||
| 29829 | sqrt | ||||
| 29830 | srand | ||||
| 29831 | stat | ||||
| 29832 | study | ||||
| 29833 | substr | ||||
| 29834 | symlink | ||||
| 29835 | syscall | ||||
| 29836 | sysopen | ||||
| 29837 | sysread | ||||
| 29838 | sysseek | ||||
| 29839 | system | ||||
| 29840 | syswrite | ||||
| 29841 | tell | ||||
| 29842 | telldir | ||||
| 29843 | tie | ||||
| 29844 | tied | ||||
| 29845 | truncate | ||||
| 29846 | uc | ||||
| 29847 | ucfirst | ||||
| 29848 | umask | ||||
| 29849 | undef | ||||
| 29850 | unless | ||||
| 29851 | unlink | ||||
| 29852 | unpack | ||||
| 29853 | unshift | ||||
| 29854 | untie | ||||
| 29855 | until | ||||
| 29856 | use | ||||
| 29857 | utime | ||||
| 29858 | values | ||||
| 29859 | vec | ||||
| 29860 | waitpid | ||||
| 29861 | warn | ||||
| 29862 | while | ||||
| 29863 | write | ||||
| 29864 | xor | ||||
| 29865 | |||||
| 29866 | switch | ||||
| 29867 | case | ||||
| 29868 | given | ||||
| 29869 | when | ||||
| 29870 | err | ||||
| 29871 | say | ||||
| 29872 | ); | ||||
| 29873 | |||||
| 29874 | # patched above for SWITCH/CASE given/when err say | ||||
| 29875 | # 'err' is a fairly safe addition. | ||||
| 29876 | # TODO: 'default' still needed if appropriate | ||||
| 29877 | # 'use feature' seen, but perltidy works ok without it. | ||||
| 29878 | # Concerned that 'default' could break code. | ||||
| 29879 | 1 | 40µs | push( @Keywords, @value_requestor ); | ||
| 29880 | |||||
| 29881 | # These are treated the same but are not keywords: | ||||
| 29882 | 1 | 600ns | my @extra_vr = qw( | ||
| 29883 | constant | ||||
| 29884 | vars | ||||
| 29885 | ); | ||||
| 29886 | 1 | 500ns | push( @value_requestor, @extra_vr ); | ||
| 29887 | |||||
| 29888 | 1 | 75µs | @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); | ||
| 29889 | |||||
| 29890 | # this list contains keywords which do not look for arguments, | ||||
| 29891 | # so that they might be followed by an operator, or at least | ||||
| 29892 | # not a term. | ||||
| 29893 | 1 | 3µs | my @operator_requestor = qw( | ||
| 29894 | endgrent | ||||
| 29895 | endhostent | ||||
| 29896 | endnetent | ||||
| 29897 | endprotoent | ||||
| 29898 | endpwent | ||||
| 29899 | endservent | ||||
| 29900 | fork | ||||
| 29901 | getgrent | ||||
| 29902 | gethostent | ||||
| 29903 | getlogin | ||||
| 29904 | getnetent | ||||
| 29905 | getppid | ||||
| 29906 | getprotoent | ||||
| 29907 | getpwent | ||||
| 29908 | getservent | ||||
| 29909 | setgrent | ||||
| 29910 | setpwent | ||||
| 29911 | time | ||||
| 29912 | times | ||||
| 29913 | wait | ||||
| 29914 | wantarray | ||||
| 29915 | ); | ||||
| 29916 | |||||
| 29917 | 1 | 3µs | push( @Keywords, @operator_requestor ); | ||
| 29918 | |||||
| 29919 | # These are treated the same but are not considered keywords: | ||||
| 29920 | 1 | 500ns | my @extra_or = qw( | ||
| 29921 | STDERR | ||||
| 29922 | STDIN | ||||
| 29923 | STDOUT | ||||
| 29924 | ); | ||||
| 29925 | |||||
| 29926 | 1 | 800ns | push( @operator_requestor, @extra_or ); | ||
| 29927 | |||||
| 29928 | 1 | 9µs | @expecting_operator_token{@operator_requestor} = | ||
| 29929 | (1) x scalar(@operator_requestor); | ||||
| 29930 | |||||
| 29931 | # these token TYPES expect trailing operator but not a term | ||||
| 29932 | # note: ++ and -- are post-increment and decrement, 'C' = constant | ||||
| 29933 | 1 | 1µs | my @operator_requestor_types = qw( ++ -- C <> q ); | ||
| 29934 | 1 | 1µs | @expecting_operator_types{@operator_requestor_types} = | ||
| 29935 | (1) x scalar(@operator_requestor_types); | ||||
| 29936 | |||||
| 29937 | # these token TYPES consume values (terms) | ||||
| 29938 | # note: pp and mm are pre-increment and decrement | ||||
| 29939 | # f=semicolon in for, F=file test operator | ||||
| 29940 | 1 | 8µs | my @value_requestor_type = qw# | ||
| 29941 | L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x | ||||
| 29942 | **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= | ||||
| 29943 | <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ | ||||
| 29944 | f F pp mm Y p m U J G j >> << ^ t | ||||
| 29945 | #; | ||||
| 29946 | 1 | 2µs | push( @value_requestor_type, ',' ) | ||
| 29947 | ; # (perl doesn't like a ',' in a qw block) | ||||
| 29948 | 1 | 17µs | @expecting_term_types{@value_requestor_type} = | ||
| 29949 | (1) x scalar(@value_requestor_type); | ||||
| 29950 | |||||
| 29951 | # Note: the following valid token types are not assigned here to | ||||
| 29952 | # hashes requesting to be followed by values or terms, but are | ||||
| 29953 | # instead currently hard-coded into sub operator_expected: | ||||
| 29954 | # ) -> :: Q R Z ] b h i k n v w } # | ||||
| 29955 | |||||
| 29956 | # For simple syntax checking, it is nice to have a list of operators which | ||||
| 29957 | # will really be unhappy if not followed by a term. This includes most | ||||
| 29958 | # of the above... | ||||
| 29959 | 1 | 14µs | %really_want_term = %expecting_term_types; | ||
| 29960 | |||||
| 29961 | # with these exceptions... | ||||
| 29962 | 1 | 600ns | delete $really_want_term{'U'}; # user sub, depends on prototype | ||
| 29963 | 1 | 200ns | delete $really_want_term{'F'}; # file test works on $_ if no following term | ||
| 29964 | 1 | 100ns | delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; | ||
| 29965 | # let perl do it | ||||
| 29966 | |||||
| 29967 | 1 | 4µs | @_ = qw(q qq qw qx qr s y tr m); | ||
| 29968 | 1 | 3µs | @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_); | ||
| 29969 | |||||
| 29970 | # These keywords are handled specially in the tokenizer code: | ||||
| 29971 | 1 | 2µs | my @special_keywords = qw( | ||
| 29972 | do | ||||
| 29973 | eval | ||||
| 29974 | format | ||||
| 29975 | m | ||||
| 29976 | package | ||||
| 29977 | q | ||||
| 29978 | |||||
| 29979 | qr | ||||
| 29980 | qw | ||||
| 29981 | qx | ||||
| 29982 | s | ||||
| 29983 | sub | ||||
| 29984 | tr | ||||
| 29985 | y | ||||
| 29986 | ); | ||||
| 29987 | 1 | 2µs | push( @Keywords, @special_keywords ); | ||
| 29988 | |||||
| 29989 | # Keywords after which list formatting may be used | ||||
| 29990 | # WARNING: do not include |map|grep|eval or perl may die on | ||||
| 29991 | # syntax errors (map1.t). | ||||
| 29992 | 1 | 9µs | my @keyword_taking_list = qw( | ||
| 29993 | and | ||||
| 29994 | chmod | ||||
| 29995 | chomp | ||||
| 29996 | chop | ||||
| 29997 | chown | ||||
| 29998 | dbmopen | ||||
| 29999 | die | ||||
| 30000 | elsif | ||||
| 30001 | exec | ||||
| 30002 | fcntl | ||||
| 30003 | for | ||||
| 30004 | foreach | ||||
| 30005 | formline | ||||
| 30006 | getsockopt | ||||
| 30007 | if | ||||
| 30008 | index | ||||
| 30009 | ioctl | ||||
| 30010 | join | ||||
| 30011 | kill | ||||
| 30012 | local | ||||
| 30013 | msgctl | ||||
| 30014 | msgrcv | ||||
| 30015 | msgsnd | ||||
| 30016 | my | ||||
| 30017 | open | ||||
| 30018 | or | ||||
| 30019 | our | ||||
| 30020 | pack | ||||
| 30021 | |||||
| 30022 | printf | ||||
| 30023 | push | ||||
| 30024 | read | ||||
| 30025 | readpipe | ||||
| 30026 | recv | ||||
| 30027 | return | ||||
| 30028 | reverse | ||||
| 30029 | rindex | ||||
| 30030 | seek | ||||
| 30031 | select | ||||
| 30032 | semctl | ||||
| 30033 | semget | ||||
| 30034 | send | ||||
| 30035 | setpriority | ||||
| 30036 | setsockopt | ||||
| 30037 | shmctl | ||||
| 30038 | shmget | ||||
| 30039 | shmread | ||||
| 30040 | shmwrite | ||||
| 30041 | socket | ||||
| 30042 | socketpair | ||||
| 30043 | sort | ||||
| 30044 | splice | ||||
| 30045 | split | ||||
| 30046 | sprintf | ||||
| 30047 | substr | ||||
| 30048 | syscall | ||||
| 30049 | sysopen | ||||
| 30050 | sysread | ||||
| 30051 | sysseek | ||||
| 30052 | system | ||||
| 30053 | syswrite | ||||
| 30054 | tie | ||||
| 30055 | unless | ||||
| 30056 | unlink | ||||
| 30057 | unpack | ||||
| 30058 | unshift | ||||
| 30059 | until | ||||
| 30060 | vec | ||||
| 30061 | warn | ||||
| 30062 | while | ||||
| 30063 | given | ||||
| 30064 | when | ||||
| 30065 | ); | ||||
| 30066 | 1 | 20µs | @is_keyword_taking_list{@keyword_taking_list} = | ||
| 30067 | (1) x scalar(@keyword_taking_list); | ||||
| 30068 | |||||
| 30069 | # These are not used in any way yet | ||||
| 30070 | # my @unused_keywords = qw( | ||||
| 30071 | # __FILE__ | ||||
| 30072 | # __LINE__ | ||||
| 30073 | # __PACKAGE__ | ||||
| 30074 | # ); | ||||
| 30075 | |||||
| 30076 | # The list of keywords was originally extracted from function 'keyword' in | ||||
| 30077 | # perl file toke.c version 5.005.03, using this utility, plus a | ||||
| 30078 | # little editing: (file getkwd.pl): | ||||
| 30079 | # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } | ||||
| 30080 | # Add 'get' prefix where necessary, then split into the above lists. | ||||
| 30081 | # This list should be updated as necessary. | ||||
| 30082 | # The list should not contain these special variables: | ||||
| 30083 | # ARGV DATA ENV SIG STDERR STDIN STDOUT | ||||
| 30084 | # __DATA__ __END__ | ||||
| 30085 | |||||
| 30086 | 1 | 131µs | @is_keyword{@Keywords} = (1) x scalar(@Keywords); | ||
| 30087 | 1 | 416µs | 1 | 484µs | } # spent 484µs making 1 call to Perl::Tidy::Tokenizer::BEGIN@29596 |
| 30088 | 1 | 42µs | 1; | ||
| 30089 | __END__ | ||||
# spent 6µs within Perl::Tidy::CORE:subst which was called:
# once (6µs+0s) by Perl::Tidy::BEGIN@81 at line 82 | |||||
# spent 1µs within Perl::Tidy::CORE:substcont which was called 2 times, avg 650ns/call:
# 2 times (1µs+0s) by Perl::Tidy::BEGIN@81 at line 82, avg 650ns/call |