1 ###############################################################################
3 # Class: NaturalDocs::Languages::Perl
5 ###############################################################################
7 # A subclass to handle the language variations of Perl.
10 # Topic: Language Support
15 # - Inheritance via "use base" and "@ISA =".
23 ###############################################################################
25 # This file is part of Natural Docs, which is Copyright © 2003-2010 Greg Valure
26 # Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
27 # Refer to License.txt for the complete details
32 package NaturalDocs::Languages::Perl;
34 use base 'NaturalDocs::Languages::Advanced';
38 # array: hereDocTerminators
39 # An array of active Here Doc terminators, or an empty array if not active. Each entry is an arrayref of tokens. The entries
40 # must appear in the order they must appear in the source.
42 my @hereDocTerminators;
46 ###############################################################################
47 # Group: Interface Functions
51 # Function: PackageSeparator
52 # Returns the package separator symbol.
58 # Function: EnumValues
59 # Returns the <EnumValuesType> that describes how the language handles enums.
62 { return ::ENUM_GLOBAL(); };
68 # Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>.
72 # sourceFile - The name of the source file to parse.
73 # topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
77 # The array ( autoTopics, scopeRecord ).
79 # autoTopics - An arrayref of automatically generated topics from the file, or undef if none.
80 # scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none.
82 sub ParseFile #(sourceFile, topicsList)
84 my ($self, $sourceFile, $topicsList) = @_;
86 @hereDocTerminators = ( );
88 # The regular block comment symbols are undef because they're all potentially JavaDoc comments. PreprocessFile() will
89 # handle translating things like =begin naturaldocs and =begin javadoc to =begin nd.
90 $self->ParseForCommentsAndTokens($sourceFile, [ '#' ], undef, [ '##' ], [ '=begin nd', '=end nd' ]);
92 my $tokens = $self->Tokens();
96 while ($index < scalar @$tokens)
98 if ($self->TryToSkipWhitespace(\$index, \$lineNumber) ||
99 $self->TryToGetPackage(\$index, \$lineNumber) ||
100 $self->TryToGetBase(\$index, \$lineNumber) ||
101 $self->TryToGetFunction(\$index, \$lineNumber) ||
102 $self->TryToGetVariable(\$index, \$lineNumber) )
104 # The functions above will handle everything.
107 elsif ($tokens->[$index] eq '{')
109 $self->StartScope('}', $lineNumber, undef);
113 elsif ($tokens->[$index] eq '}')
115 if ($self->ClosingScopeSymbol() eq '}')
116 { $self->EndScope($lineNumber); };
121 elsif (lc($tokens->[$index]) eq 'eval')
123 # We want to skip the token in this case instead of letting it fall to SkipRestOfStatement. This allows evals with braces
124 # to be treated like normal floating braces.
130 $self->SkipRestOfStatement(\$index, \$lineNumber);
135 # Don't need to keep these around.
136 $self->ClearTokens();
138 return ( $self->AutoTopics(), $self->ScopeRecord() );
143 # Function: PreprocessFile
145 # Overridden to support "=begin nd" and similar.
147 # - "=begin [nd|naturaldocs|natural docs|jd|javadoc|java doc]" all translate to "=begin nd".
148 # - "=[nd|naturaldocs|natural docs]" also translate to "=begin nd".
149 # - "=end [nd|naturaldocs|natural docs|jd|javadoc]" all translate to "=end nd".
150 # - "=cut" from a ND block translates into "=end nd", but the next line will be altered to begin with "(NDPODBREAK)". This is
151 # so if there is POD leading into ND which ends with a cut, the parser can still end the original POD because the end ND line
152 # would have been removed. Remember, <NaturalDocs::Languages::Advanced->ParseForCommentsAndTokens()> removes
153 # Natural Docs-worthy comments to save parsing time.
154 # - "=pod begin nd" and "=pod end nd" are supported for compatibility with ND 1.32 and earlier, even though the syntax is a
156 # - It also supports the wrong plural forms, so naturaldoc/natural doc/javadocs/java docs will work.
158 sub PreprocessFile #(lines)
160 my ($self, $lines) = @_;
163 my $mustBreakPOD = 0;
165 for (my $i = 0; $i < scalar @$lines; $i++)
167 if ($lines->[$i] =~ /^\=(?:(?:pod[ \t]+)?begin[ \t]+)?(?:nd|natural[ \t]*docs?|jd|java[ \t]*docs?)[ \t]*$/i)
169 $lines->[$i] = '=begin nd';
173 elsif ($lines->[$i] =~ /^\=(?:pod[ \t]+)end[ \t]+(?:nd|natural[ \t]*docs?|jd|javadocs?)[ \t]*$/i)
175 $lines->[$i] = '=end nd';
179 elsif ($lines->[$i] =~ /^\=cut[ \t]*$/i)
183 $lines->[$i] = '=end nd';
188 elsif ($mustBreakPOD)
190 $lines->[$i] = '(NDPODBREAK)' . $lines->[$i];
198 ###############################################################################
199 # Group: Statement Parsing Functions
200 # All functions here assume that the current position is at the beginning of a statement.
202 # Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as
203 # often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler.
207 # Function: TryToGetPackage
209 # Determines whether the position is at a package declaration statement, and if so, generates a topic for it, skips it, and
212 sub TryToGetPackage #(indexRef, lineNumberRef)
214 my ($self, $indexRef, $lineNumberRef) = @_;
215 my $tokens = $self->Tokens();
217 if (lc($tokens->[$$indexRef]) eq 'package')
219 my $index = $$indexRef + 1;
220 my $lineNumber = $$lineNumberRef;
222 if (!$self->TryToSkipWhitespace(\$index, \$lineNumber))
227 while ($tokens->[$index] =~ /^[a-z_\:]/i)
229 $name .= $tokens->[$index];
236 my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name,
239 undef, undef, $$lineNumberRef);
240 $self->AddAutoTopic($autoTopic);
242 NaturalDocs::Parser->OnClass($autoTopic->Symbol());
244 $self->SetPackage($autoTopic->Symbol(), $$lineNumberRef);
247 $$lineNumberRef = $lineNumber;
248 $self->SkipRestOfStatement($indexRef, $lineNumberRef);
258 # Function: TryToGetBase
260 # Determines whether the position is at a package base declaration statement, and if so, calls
261 # <NaturalDocs::Parser->OnClassParent()>.
263 # Supported Syntaxes:
265 # > use base [list of strings]
266 # > @ISA = [list of strings]
267 # > @[package]::ISA = [list of strings]
268 # > our @ISA = [list of strings]
270 sub TryToGetBase #(indexRef, lineNumberRef)
272 my ($self, $indexRef, $lineNumberRef) = @_;
273 my $tokens = $self->Tokens();
275 my ($index, $lineNumber, $class, $parents);
277 if (lc($tokens->[$$indexRef]) eq 'use')
279 $index = $$indexRef + 1;
280 $lineNumber = $$lineNumberRef;
282 if (!$self->TryToSkipWhitespace(\$index, \$lineNumber) ||
283 lc($tokens->[$index]) ne 'base')
287 $self->TryToSkipWhitespace(\$index, \$lineNumber);
289 $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
295 $lineNumber = $$lineNumberRef;
297 if (lc($tokens->[$index]) eq 'our')
300 $self->TryToSkipWhitespace(\$index, \$lineNumber);
303 if ($tokens->[$index] eq '@')
307 while ($index < scalar @$tokens)
309 if ($tokens->[$index] eq 'ISA')
312 $self->TryToSkipWhitespace(\$index, \$lineNumber);
314 if ($tokens->[$index] eq '=')
317 $self->TryToSkipWhitespace(\$index, \$lineNumber);
319 $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
325 # If token isn't ISA...
326 elsif ($tokens->[$index] =~ /^[a-z0-9_:]/i)
328 $class .= $tokens->[$index];
337 if (defined $parents)
342 my @classIdentifiers = split(/::/, $class);
343 $class = NaturalDocs::SymbolString->Join(@classIdentifiers);
346 { $class = $self->CurrentScope(); };
348 foreach my $parent (@$parents)
350 my @parentIdentifiers = split(/::/, $parent);
351 my $parentSymbol = NaturalDocs::SymbolString->Join(@parentIdentifiers);
353 NaturalDocs::Parser->OnClassParent($class, $parentSymbol, undef, undef, ::RESOLVE_ABSOLUTE());
357 $$lineNumberRef = $lineNumber;
358 $self->SkipRestOfStatement($indexRef, $lineNumberRef);
368 # Function: TryToGetFunction
370 # Determines whether the position is at a function declaration statement, and if so, generates a topic for it, skips it, and
373 sub TryToGetFunction #(indexRef, lineNumberRef)
375 my ($self, $indexRef, $lineNumberRef) = @_;
376 my $tokens = $self->Tokens();
378 if ( lc($tokens->[$$indexRef]) eq 'sub')
380 my $prototypeStart = $$indexRef;
381 my $prototypeStartLine = $$lineNumberRef;
382 my $prototypeEnd = $$indexRef + 1;
383 my $prototypeEndLine = $$lineNumberRef;
385 if ( !$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine) ||
386 $tokens->[$prototypeEnd] !~ /^[a-z_]/i )
389 my $name = $tokens->[$prototypeEnd];
392 # We parsed 'sub [name]'. Now keep going until we find a semicolon or a brace.
396 if ($prototypeEnd >= scalar @$tokens)
399 # End if we find a semicolon, since it means we found a predeclaration rather than an actual function.
400 elsif ($tokens->[$prototypeEnd] eq ';')
403 elsif ($tokens->[$prototypeEnd] eq '{')
407 my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );
409 $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), $name,
410 $self->CurrentScope(), undef,
412 undef, undef, $prototypeStartLine));
414 $$indexRef = $prototypeEnd;
415 $$lineNumberRef = $prototypeEndLine;
417 $self->SkipRestOfStatement($indexRef, $lineNumberRef);
423 { $self->GenericSkip(\$prototypeEnd, \$prototypeEndLine, 0, 1); };
432 # Function: TryToGetVariable
434 # Determines if the position is at a variable declaration statement, and if so, generates a topic for it, skips it, and returns
437 # Supported Syntaxes:
439 # - Supports variables declared with "my", "our", and "local".
440 # - Supports multiple declarations in one statement, such as "my ($x, $y);".
441 # - Supports types and attributes.
443 sub TryToGetVariable #(indexRef, lineNumberRef)
445 my ($self, $indexRef, $lineNumberRef) = @_;
446 my $tokens = $self->Tokens();
448 my $firstToken = lc( $tokens->[$$indexRef] );
450 if ($firstToken eq 'my' || $firstToken eq 'our' || $firstToken eq 'local')
452 my $prototypeStart = $$indexRef;
453 my $prototypeStartLine = $$lineNumberRef;
454 my $prototypeEnd = $$indexRef + 1;
455 my $prototypeEndLine = $$lineNumberRef;
457 $self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine);
460 # Get the type if present.
464 if ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i)
468 $type .= $tokens->[$prototypeEnd];
471 while ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i);
473 if (!$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine))
478 # Get the name, or possibly names.
480 if ($tokens->[$prototypeEnd] eq '(')
482 # If there's multiple variables, we'll need to build a custom prototype for each one. $firstToken already has the
483 # declaring word. We're going to store each name in @names, and we're going to use $prototypeStart and
484 # $prototypeEnd to capture any properties appearing after the list.
490 $prototypeStart = $prototypeEnd + 1;
491 $prototypeStartLine = $prototypeEndLine;
495 $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
497 $name = $self->TryToGetVariableName(\$prototypeStart, \$prototypeStartLine);
504 $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
506 # We can have multiple commas in a row. We can also have trailing commas. However, the parenthesis must
507 # not start with a comma or be empty, hence this logic does not appear earlier.
508 while ($tokens->[$prototypeStart] eq ',')
511 $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
516 if ($tokens->[$prototypeStart] eq ')')
526 # Now find the end of the prototype.
528 $prototypeEnd = $prototypeStart;
529 $prototypeEndLine = $prototypeStartLine;
531 while ($prototypeEnd < scalar @$tokens &&
532 $tokens->[$prototypeEnd] !~ /^[\;\=]/)
538 my $prototypePrefix = $firstToken . ' ';
540 { $prototypePrefix .= $type . ' '; };
542 my $prototypeSuffix = ' ' . $self->CreateString($prototypeStart, $prototypeEnd);
544 foreach $name (@names)
546 my $prototype = $self->NormalizePrototype( $prototypePrefix . $name . $prototypeSuffix );
548 $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
549 $self->CurrentScope(), undef,
551 undef, undef, $prototypeStartLine));
554 $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);
556 $$indexRef = $prototypeEnd;
557 $$lineNumberRef = $prototypeEndLine;
560 else # no parenthesis
562 my $name = $self->TryToGetVariableName(\$prototypeEnd, \$prototypeEndLine);
567 while ($prototypeEnd < scalar @$tokens &&
568 $tokens->[$prototypeEnd] !~ /^[\;\=]/)
573 my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );
575 $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
576 $self->CurrentScope(), undef,
578 undef, undef, $prototypeStartLine));
580 $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);
582 $$indexRef = $prototypeEnd;
583 $$lineNumberRef = $prototypeEndLine;
594 # Function: TryToGetVariableName
596 # Determines if the position is at a variable name, and if so, skips it and returns the name.
598 sub TryToGetVariableName #(indexRef, lineNumberRef)
600 my ($self, $indexRef, $lineNumberRef) = @_;
601 my $tokens = $self->Tokens();
605 if ($tokens->[$$indexRef] =~ /^[\$\@\%\*]/)
607 $name .= $tokens->[$$indexRef];
610 $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
612 if ($tokens->[$$indexRef] =~ /^[a-z_]/i)
614 $name .= $tokens->[$$indexRef];
626 # Function: TryToGetListOfStrings
628 # Attempts to retrieve a list of strings from the current position. Returns an arrayref of them if any are found, or undef if none.
629 # It stops the moment it reaches a non-string, so "string1, variable, string2" will only return string1.
631 # Supported Syntaxes:
633 # - Supports parenthesis.
634 # - Supports all string forms supported by <TryToSkipString()>.
635 # - Supports qw() string arrays.
637 sub TryToGetListOfStrings #(indexRef, lineNumberRef)
639 my ($self, $indexRef, $lineNumberRef) = @_;
640 my $tokens = $self->Tokens();
645 while ($$indexRef < scalar @$tokens)
647 # We'll tolerate parenthesis.
648 if ($tokens->[$$indexRef] eq '(')
653 elsif ($tokens->[$$indexRef] eq ')')
655 if ($parenthesis == 0)
661 elsif ($tokens->[$$indexRef] eq ',')
667 my ($startContent, $endContent);
668 my $symbolIndex = $$indexRef;
670 if ($self->TryToSkipString($indexRef, $lineNumberRef, \$startContent, \$endContent))
672 my $content = $self->CreateString($startContent, $endContent);
674 if (!defined $strings)
677 if (lc($tokens->[$symbolIndex]) eq 'qw')
679 $content =~ tr/ \t\n/ /s;
682 my @qwStrings = split(/ /, $content);
684 push @$strings, @qwStrings;
688 push @$strings, $content;
695 $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
702 ###############################################################################
703 # Group: Low Level Parsing Functions
707 # Function: GenericSkip
709 # Advances the position one place through general code.
711 # - If the position is on a comment or string, it will skip it completely.
712 # - If the position is on an opening symbol, it will skip until the past the closing symbol.
713 # - If the position is on a regexp or quote-like operator, it will skip it completely.
714 # - If the position is on a backslash, it will skip it and the following token.
715 # - If the position is on whitespace (including comments), it will skip it completely.
716 # - Otherwise it skips one token.
720 # indexRef - A reference to the current index.
721 # lineNumberRef - A reference to the current line number.
722 # noRegExps - If set, does not test for regular expressions.
724 sub GenericSkip #(indexRef, lineNumberRef, noRegExps)
726 my ($self, $indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens) = @_;
727 my $tokens = $self->Tokens();
729 if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
732 # Note that we don't want to count backslashed ()[]{} since they could be in regexps. Also, ()[] are valid variable names
733 # when preceded by a string.
735 # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
736 elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef))
739 $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}', $noRegExps, $allowStringedClosingParens);
741 elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
743 # Temporarily allow stringed closing parenthesis if it looks like we're in an anonymous function declaration with Perl's
744 # cheap version of prototypes, such as "my $_declare = sub($) {}".
745 my $tempAllowStringedClosingParens = $allowStringedClosingParens;
746 if (!$allowStringedClosingParens)
748 my $tempIndex = $$indexRef - 1;
749 if ($tempIndex >= 0 && $tokens->[$tempIndex] =~ /^[ \t]/)
751 if ($tempIndex >= 0 && $tokens->[$tempIndex] eq 'sub')
752 { $tempAllowStringedClosingParens = 1; }
758 { $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')', $noRegExps, $tempAllowStringedClosingParens); }
759 while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1) && !$tempAllowStringedClosingParens);
761 elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
766 { $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']', $noRegExps, $allowStringedClosingParens); }
767 while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
770 elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) ||
771 $self->TryToSkipString($indexRef, $lineNumberRef) ||
772 $self->TryToSkipHereDocDeclaration($indexRef, $lineNumberRef) ||
773 (!$noRegExps && $self->TryToSkipRegexp($indexRef, $lineNumberRef) ) )
783 # Function: GenericSkipUntilAfter
785 # Advances the position via <GenericSkip()> until a specific token is reached and passed.
787 sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token, noRegExps, allowStringedClosingParens)
789 my ($self, $indexRef, $lineNumberRef, $token, $noRegExps, $allowStringedClosingParens) = @_;
790 my $tokens = $self->Tokens();
792 while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
793 { $self->GenericSkip($indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens); };
795 if ($tokens->[$$indexRef] eq "\n")
796 { $$lineNumberRef++; };
802 # Function: GenericRegexpSkip
804 # Advances the position one place through regexp code.
806 # - If the position is on an opening symbol, it will skip until the past the closing symbol.
807 # - If the position is on a backslash, it will skip it and the following token.
808 # - If the position is on whitespace (not including comments), it will skip it completely.
809 # - Otherwise it skips one token.
811 # Also differs from <GenericSkip()> in that the parenthesis in $( and $) do count against the scope, where they wouldn't
816 # indexRef - A reference to the current index.
817 # lineNumberRef - A reference to the current line number.
818 # inBrackets - Whether we're in brackets or not. If true, we don't care about matching braces and parenthesis.
820 sub GenericRegexpSkip #(indexRef, lineNumberRef, inBrackets)
822 my ($self, $indexRef, $lineNumberRef, $inBrackets) = @_;
823 my $tokens = $self->Tokens();
825 if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
828 # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
829 elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
832 $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, '}');
834 elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
837 $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ')');
839 elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
844 { $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ']'); }
845 while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
848 elsif ($tokens->[$$indexRef] eq "\n")
860 # Function: GenericRegexpSkipUntilAfter
862 # Advances the position via <GenericRegexpSkip()> until a specific token is reached and passed.
864 sub GenericRegexpSkipUntilAfter #(indexRef, lineNumberRef, token)
866 my ($self, $indexRef, $lineNumberRef, $token) = @_;
867 my $tokens = $self->Tokens();
869 my $inBrackets = ( $token eq ']' );
871 while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
872 { $self->GenericRegexpSkip($indexRef, $lineNumberRef, $inBrackets); };
874 if ($tokens->[$$indexRef] eq "\n")
875 { $$lineNumberRef++; };
881 # Function: SkipRestOfStatement
883 # Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or
884 # a brace group. Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count.
886 sub SkipRestOfStatement #(indexRef, lineNumberRef)
888 my ($self, $indexRef, $lineNumberRef) = @_;
889 my $tokens = $self->Tokens();
891 while ($$indexRef < scalar @$tokens &&
892 $tokens->[$$indexRef] ne ';' &&
893 !($tokens->[$$indexRef] eq '{' && !$self->IsStringed($$indexRef)) )
895 $self->GenericSkip($indexRef, $lineNumberRef);
898 if ($tokens->[$$indexRef] eq ';')
900 elsif ($tokens->[$$indexRef] eq '{')
901 { $self->GenericSkip($indexRef, $lineNumberRef); };
906 # Function: TryToSkipWhitespace
908 # If the current position is on whitespace it skips them and returns true. If there are a number of these in a row, it skips them
915 # - All comment forms supported by <TryToSkipComment()>
918 sub TryToSkipWhitespace #(indexRef, lineNumberRef)
920 my ($self, $indexRef, $lineNumberRef) = @_;
921 my $tokens = $self->Tokens();
925 while ($$indexRef < scalar @$tokens)
927 if ($self->TryToSkipHereDocContent($indexRef, $lineNumberRef) ||
928 $self->TryToSkipComment($indexRef, $lineNumberRef))
932 elsif ($tokens->[$$indexRef] =~ /^[ \t]/)
937 elsif ($tokens->[$$indexRef] eq "\n")
952 # Function: TryToSkipComment
953 # If the current position is on a comment, skip past it and return true.
955 sub TryToSkipComment #(indexRef, lineNumberRef)
957 my ($self, $indexRef, $lineNumberRef) = @_;
959 return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) ||
960 $self->TryToSkipPODComment($indexRef, $lineNumberRef) );
965 # Function: TryToSkipLineComment
966 # If the current position is on a line comment symbol, skip past it and return true.
968 sub TryToSkipLineComment #(indexRef, lineNumberRef)
970 my ($self, $indexRef, $lineNumberRef) = @_;
971 my $tokens = $self->Tokens();
973 # Note that $#var is not a comment.
974 if ($tokens->[$$indexRef] eq '#' && !$self->IsStringed($$indexRef))
976 $self->SkipRestOfLine($indexRef, $lineNumberRef);
985 # Function: TryToSkipPODComment
986 # If the current position is on a POD comment symbol, skip past it and return true.
988 sub TryToSkipPODComment #(indexRef, lineNumberRef)
990 my ($self, $indexRef, $lineNumberRef) = @_;
991 my $tokens = $self->Tokens();
993 # Note that whitespace is not allowed before the equals sign. It must directly start a line.
994 if ($tokens->[$$indexRef] eq '=' &&
995 ( $$indexRef == 0 || $tokens->[$$indexRef - 1] eq "\n" ) &&
996 $tokens->[$$indexRef + 1] =~ /^[a-z]/i )
998 # Skip until =cut or (NDPODBREAK). Note that it's theoretically possible for =cut to appear without a prior POD directive.
1002 if ($tokens->[$$indexRef] eq '=' && lc( $tokens->[$$indexRef + 1] ) eq 'cut')
1004 $self->SkipRestOfLine($indexRef, $lineNumberRef);
1007 elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
1008 $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
1015 $self->SkipRestOfLine($indexRef, $lineNumberRef);
1018 while ($$indexRef < scalar @$tokens);
1023 # It's also possible that (NDPODBREAK) will appear without any opening pod statement because "=begin nd" and "=cut" will
1024 # still result in one. We need to pick off the stray (NDPODBREAK).
1025 elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
1026 $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
1038 # Function: TryToSkipString
1039 # If the current position is on a string delimiter, skip past the string and return true.
1043 # indexRef - A reference to the index of the position to start at.
1044 # lineNumberRef - A reference to the line number of the position.
1045 # startContentIndexRef - A reference to the variable in which to store the index of the first content token. May be undef.
1046 # endContentIndexRef - A reference to the variable in which to store the index of the end of the content, which is one past
1047 # the last content token. may be undef.
1051 # Whether the position was at a string. The index, line number, and content index variabls will only be changed if true.
1055 # - Supports quotes, apostrophes, backticks, q(), qq(), qx(), and qw().
1056 # - All symbols are supported for the letter forms.
1058 sub TryToSkipString #(indexRef, lineNumberRef, startContentIndexRef, endContentIndexRef)
1060 my ($self, $indexRef, $lineNumberRef, $startContentIndexRef, $endContentIndexRef) = @_;
1061 my $tokens = $self->Tokens();
1063 # The three string delimiters. All three are Perl variables when preceded by a dollar sign.
1064 if (!$self->IsStringed($$indexRef) &&
1065 ( $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'', '\'', $startContentIndexRef, $endContentIndexRef) ||
1066 $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"', '"', $startContentIndexRef, $endContentIndexRef) ||
1067 $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '`', '`', $startContentIndexRef, $endContentIndexRef) ) )
1071 elsif ($tokens->[$$indexRef] =~ /^(?:q|qq|qx|qw)$/i &&
1072 ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*]$/))
1076 $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
1078 my $openingSymbol = $tokens->[$$indexRef];
1081 if ($openingSymbol eq '{')
1082 { $closingSymbol = '}'; }
1083 elsif ($openingSymbol eq '(')
1084 { $closingSymbol = ')'; }
1085 elsif ($openingSymbol eq '[')
1086 { $closingSymbol = ']'; }
1087 elsif ($openingSymbol eq '<')
1088 { $closingSymbol = '>'; }
1090 { $closingSymbol = $openingSymbol; };
1092 $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, $openingSymbol, $closingSymbol,
1093 $startContentIndexRef, $endContentIndexRef);
1103 # Function: TryToSkipHereDocDeclaration
1105 # If the current position is on a Here Doc declaration, add its terminators to <hereDocTerminators> and skip it.
1110 # - Supports << "String" with all string forms supported by <TryToSkipString()>.
1112 sub TryToSkipHereDocDeclaration #(indexRef, lineNumberRef)
1114 my ($self, $indexRef, $lineNumberRef) = @_;
1115 my $tokens = $self->Tokens();
1117 my $index = $$indexRef;
1118 my $lineNumber = $$lineNumberRef;
1120 if ($tokens->[$index] eq '<' && $tokens->[$index + 1] eq '<')
1125 # No whitespace allowed with the bare word.
1126 if ($tokens->[$index] =~ /^[a-z0-9_]/i)
1128 push @hereDocTerminators, [ $tokens->[$index] ];
1134 $self->TryToSkipWhitespace(\$index, \$lineNumber);
1136 my ($contentStart, $contentEnd);
1137 if ($self->TryToSkipString(\$index, \$lineNumber, \$contentStart, \$contentEnd))
1139 push @hereDocTerminators, [ @{$tokens}[$contentStart..$contentEnd - 1] ];
1146 $$indexRef = $index;
1147 $$lineNumberRef = $lineNumber;
1158 # Function: TryToSkipHereDocContent
1160 # If the current position is at the beginning of a line and there are entries in <hereDocTerminators>, skips lines until all the
1161 # terminators are exhausted or we reach the end of the file.
1165 # Whether the position was on Here Doc content.
1167 sub TryToSkipHereDocContent #(indexRef, lineNumberRef)
1169 my ($self, $indexRef, $lineNumberRef) = @_;
1170 my $tokens = $self->Tokens();
1172 # We don't use IsFirstLineToken() because it really needs to be the first line token. Whitespace is not allowed.
1173 if ($$indexRef > 0 && $tokens->[$$indexRef - 1] eq "\n")
1175 my $success = (scalar @hereDocTerminators > 0);
1177 while (scalar @hereDocTerminators && $$indexRef < scalar @$tokens)
1179 my $terminatorIndex = 0;
1181 while ($hereDocTerminators[0]->[$terminatorIndex] eq $tokens->[$$indexRef])
1187 if ($terminatorIndex == scalar @{$hereDocTerminators[0]} &&
1188 ($tokens->[$$indexRef] eq "\n" || ($tokens->[$$indexRef] =~ /^[ \t]/ && $tokens->[$$indexRef + 1] eq "\n")) )
1190 shift @hereDocTerminators;
1195 { $self->SkipRestOfLine($indexRef, $lineNumberRef); };
1207 # Function: TryToSkipRegexp
1208 # If the current position is on a regular expression or a quote-like operator, skip past it and return true.
1212 # - Supports //, ??, m//, qr//, s///, tr///, and y///.
1213 # - All symbols are supported for the letter forms.
1214 # - ?? is *not* supported because it could cause problems with ?: statements. The generic parser has a good chance of
1215 # successfully stumbling through a regex, whereas the regex code will almost certainly see the rest of the file as part of it.
1217 sub TryToSkipRegexp #(indexRef, lineNumberRef)
1219 my ($self, $indexRef, $lineNumberRef) = @_;
1220 my $tokens = $self->Tokens();
1224 # If it's a supported character sequence that's not a variable (ex $qr) or package (ex a::tr)...
1225 if ($tokens->[$$indexRef] =~ /^(?:m|qr|s|tr|y)$/i &&
1226 ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*\-\>\:]$/) )
1229 elsif ($tokens->[$$indexRef] eq '/' && !$self->IsStringed($$indexRef))
1231 # This is a bit of a hack. If we find a random slash, it could be a divide operator or a bare regexp. Find the first previous
1232 # non-whitespace token and if it's text, a closing brace, or a string, assume it's a divide operator. (Strings don't make
1233 # much pratical sense there but a regexp would be impossible.) Otherwise assume it's a regexp.
1235 # We make a special consideration for split() appearing without parenthesis. If the previous token is split and it's not a
1236 # variable, assume it is a regexp even though it fails the above test.
1238 my $index = $$indexRef - 1;
1240 while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/)
1243 if ($index < 0 || $tokens->[$index] !~ /^[a-zA-Z0-9_\)\]\}\'\"\`]/ ||
1244 ($tokens->[$index] =~ /^split|grep$/ && $index > 0 && $tokens->[$index-1] !~ /^[\$\%\@\*]$/) )
1250 my $operator = lc($tokens->[$$indexRef]);
1251 my $index = $$indexRef;
1252 my $lineNumber = $$lineNumberRef;
1254 if ($operator =~ /^[\?\/]/)
1255 { $operator = 'm'; }
1260 # Believe it or not, s#...# is allowed. We can't pass over number signs here.
1261 if ($tokens->[$index] ne '#')
1262 { $self->TryToSkipWhitespace(\$index, \$lineNumber); };
1265 if ($tokens->[$index] =~ /^\w/)
1267 if ($tokens->[$index] eq '=' && $tokens->[$index+1] eq '>')
1270 my $openingSymbol = $tokens->[$index];
1273 if ($openingSymbol eq '{')
1274 { $closingSymbol = '}'; }
1275 elsif ($openingSymbol eq '(')
1276 { $closingSymbol = ')'; }
1277 elsif ($openingSymbol eq '[')
1278 { $closingSymbol = ']'; }
1279 elsif ($openingSymbol eq '<')
1280 { $closingSymbol = '>'; }
1282 { $closingSymbol = $openingSymbol; };
1286 $self->GenericRegexpSkipUntilAfter(\$index, \$lineNumber, $closingSymbol);
1288 $$indexRef = $index;
1289 $$lineNumberRef = $lineNumber;
1291 if ($operator =~ /^(?:s|tr|y)$/)
1293 if ($openingSymbol ne $closingSymbol)
1295 $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
1297 $openingSymbol = $tokens->[$index];
1299 if ($openingSymbol eq '{')
1300 { $closingSymbol = '}'; }
1301 elsif ($openingSymbol eq '(')
1302 { $closingSymbol = ')'; }
1303 elsif ($openingSymbol eq '[')
1304 { $closingSymbol = ']'; }
1305 elsif ($openingSymbol eq '<')
1306 { $closingSymbol = '>'; }
1308 { $closingSymbol = $openingSymbol; };
1313 if ($operator eq 's')
1315 $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, $closingSymbol, 1);
1317 else # ($operator eq 'tr' || $operator eq 'y')
1319 while ($$indexRef < scalar @$tokens &&
1320 ($tokens->[$$indexRef] ne $closingSymbol || $self->IsBackslashed($$indexRef)) )
1322 if ($tokens->[$$indexRef] eq "\n")
1323 { $$lineNumberRef++; };
1331 # We want to skip any letters after the regexp. Otherwise something like tr/a/b/s; could have the trailing s; interpreted
1332 # as another regexp. Whitespace is not allowed between the closing symbol and the letters.
1334 if ($tokens->[$$indexRef] =~ /^[a-z]/i)
1345 ###############################################################################
1346 # Group: Support Functions
1350 # Function: IsStringed
1352 # Returns whether the position is after a string (dollar sign) character. Returns false if it's preceded by two dollar signs so
1353 # "if ($x == $$)" doesn't skip the closing parenthesis as stringed.
1357 # index - The index of the postition.
1359 sub IsStringed #(index)
1361 my ($self, $index) = @_;
1362 my $tokens = $self->Tokens();
1364 if ($index > 0 && $tokens->[$index - 1] eq '$' && !($index > 1 && $tokens->[$index - 2] eq '$'))