1 ###############################################################################
3 # Class: NaturalDocs::Languages::Simple
5 ###############################################################################
7 # A class containing the characteristics of a particular programming language for basic support within Natural Docs.
8 # Also serves as a base class for languages that break from general conventions, such as not having parameter lists use
9 # parenthesis and commas.
11 ###############################################################################
13 # This file is part of Natural Docs, which is Copyright © 2003-2010 Greg Valure
14 # Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
15 # Refer to License.txt for the complete details
20 package NaturalDocs::Languages::Simple;
22 use base 'NaturalDocs::Languages::Base';
25 our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' );
28 use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref',
29 'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()',
30 'SetBlockCommentSymbols() duparrayref',
32 'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()',
33 'PACKAGE_SEPARATOR', 'PackageSeparator()',
34 'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()',
35 'ENUM_VALUES', 'EnumValues()',
36 'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()';
41 # Creates and returns a new object.
45 # name - The name of the language.
49 my ($selfPackage, $name) = @_;
51 my $object = $selfPackage->SUPER::New($name);
53 $object->[ENUM_VALUES] = ::ENUM_GLOBAL();
54 $object->[PACKAGE_SEPARATOR] = '.';
63 # LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none.
64 # SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment.
65 # BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none. Pairs
66 # are specified with two consecutive array entries.
67 # SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment. Pairs are
68 # specified with two consecutive array entries.
69 # LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant.
70 # SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant.
71 # PackageSeparator - Returns the package separator symbol.
72 # PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default.
76 # Function: SetPackageSeparator
77 # Replaces the language's package separator string.
79 sub SetPackageSeparator #(separator)
81 my ($self, $separator) = @_;
82 $self->[PACKAGE_SEPARATOR] = $separator;
83 $self->[PACKAGE_SEPARATOR_WAS_SET] = 1;
90 # EnumValues - Returns the <EnumValuesType> that describes how the language handles enums.
91 # EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default.
95 # Function: SetEnumValues
96 # Replaces the <EnumValuesType> that describes how the language handles enums.
98 sub SetEnumValues #(EnumValuesType newBehavior)
100 my ($self, $behavior) = @_;
101 $self->[ENUM_VALUES] = $behavior;
102 $self->[ENUM_VALUES_WAS_SET] = 1;
107 # Function: PrototypeEndersFor
109 # Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none.
111 sub PrototypeEndersFor #(type)
113 my ($self, $type) = @_;
115 if (defined $self->[PROTOTYPE_ENDERS])
116 { return $self->[PROTOTYPE_ENDERS]->{$type}; }
123 # Function: SetPrototypeEndersFor
125 # Replaces the arrayref of prototype ender symbols for the passed <TopicType>.
127 sub SetPrototypeEndersFor #(type, enders)
129 my ($self, $type, $enders) = @_;
131 if (!defined $self->[PROTOTYPE_ENDERS])
132 { $self->[PROTOTYPE_ENDERS] = { }; };
134 if (!defined $enders)
135 { delete $self->[PROTOTYPE_ENDERS]->{$type}; }
138 $self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ];
145 ###############################################################################
146 # Group: Parsing Functions
150 # Function: ParseFile
152 # Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>
153 # and all other sections to <OnCode()>.
157 # sourceFile - The <FileName> of the source file to parse.
158 # topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
162 # Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ).
164 sub ParseFile #(sourceFile, topicsList)
166 my ($self, $sourceFile, $topicsList) = @_;
168 open(SOURCEFILEHANDLE, '<' . $sourceFile)
169 or die "Couldn't open input file " . $sourceFile . "\n";
171 my $lineReader = NaturalDocs::LineReader->New(\*SOURCEFILEHANDLE);
175 my $lastCommentTopicCount = 0;
177 if ($self->Name() eq 'Text File')
179 @commentLines = $lineReader->GetAll();
180 NaturalDocs::Parser->OnComment(\@commentLines, 1);
185 my $line = $lineReader->Get();
188 while (defined $line)
190 my $originalLine = $line;
193 # Retrieve multiline comments. This leaves $line at the next line.
194 # We check for multiline comments before single line comments because in Lua the symbols are --[[ and --.
196 if (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols()))
198 # Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in
199 # the code. For example, look at this prototype with this splint annotation:
201 # int get_array(integer_t id,
202 # /*@out@*/ array_t array);
204 # The annotation starts correctly but doesn't end so because it is followed by code on the same line.
210 $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol);
212 push @commentLines, $line;
214 # If we found an end comment symbol...
215 if (defined $lineRemainder)
218 $line = $lineReader->Get();
224 if ($lineRemainder !~ /^[ \t]*$/)
226 # If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code.
227 push @codeLines, @commentLines;
231 $line = $lineReader->Get();
235 # Retrieve single line comments. This leaves $line at the next line.
237 elsif ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()))
241 push @commentLines, $line;
242 $line = $lineReader->Get();
247 while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()));
249 EndDo: # I hate Perl sometimes.
253 # Otherwise just add it to the code.
257 push @codeLines, $line;
258 $line = $lineReader->Get();
262 # If there were comments, send them to Parser->OnComment().
264 if (scalar @commentLines)
266 # First process any code lines before the comment.
267 if (scalar @codeLines)
269 $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
270 $lineNumber += scalar @codeLines;
274 $lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber);
275 $lineNumber += scalar @commentLines;
279 }; # while (defined $line)
282 # Clean up any remaining code.
283 if (scalar @codeLines)
285 $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
291 close(SOURCEFILEHANDLE);
293 return ( undef, undef );
300 # Called whenever a section of code is encountered by the parser. Is used to find the prototype of the last topic created.
304 # codeLines - The source code as an arrayref of lines.
305 # codeLineNumber - The line number of the first line of code.
306 # topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
307 # lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment.
309 sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount)
311 my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_;
313 if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type()))
318 # Skip all blank lines before a prototype.
319 while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/)
326 my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type());
328 # Add prototype lines until we reach the end of the prototype or the end of the code lines.
329 while ($lineIndex < scalar @$codeLines)
331 my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n");
333 push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g;
335 while ($tokenIndex < scalar @tokens)
337 # If we're not inside brackets, check for ender symbols.
338 if (!scalar @brackets)
340 my $startingIndex = 0;
345 my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex);
347 if ($enderIndex == -1)
351 # We do this here so we don't duplicate prototype for every single token. Just the first time an ender symbol
353 if (!defined $testPrototype)
354 { $testPrototype = $prototype; };
356 $testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex);
360 # If the ender is all text and the character preceding or following it is as well, ignore it.
361 if ($ender =~ /^[a-z0-9]+$/i &&
362 ( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) ||
363 substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) )
364 { $enderResult = ENDER_IGNORE(); }
366 { $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender); }
368 if ($enderResult == ENDER_IGNORE())
370 $testPrototype .= $ender;
371 $startingIndex = $enderIndex + length($ender);
373 elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED())
377 else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE
379 my $titleInPrototype = $topicList->[-1]->Title();
381 # Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything).
382 $titleInPrototype =~ s/[\t ]*\([^\(]*$//;
384 if (index($testPrototype, $titleInPrototype) != -1)
386 $topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) );
389 if ($enderResult == ENDER_ACCEPT())
391 else # ENDER_ACCEPT_AND_CONTINUE
393 $testPrototype .= $ender;
394 $startingIndex = $enderIndex + length($ender);
401 # If we are inside brackets, check for closing symbols.
402 elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') ||
403 ($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') ||
404 ($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') ||
405 ($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') )
410 # Check for opening brackets.
411 if ($tokens[$tokenIndex] =~ /^[\(\[\{]$/ ||
412 ($tokens[$tokenIndex] eq "<" && $tokens[$tokenIndex-1] !~ /operator[ \t]*$/) )
414 push @brackets, $tokens[$tokenIndex];
417 $prototype .= $tokens[$tokenIndex];
424 # If we got out of that while loop by running out of lines, there was no prototype.
429 use constant ENDER_ACCEPT => 1;
430 use constant ENDER_IGNORE => 2;
431 use constant ENDER_ACCEPT_AND_CONTINUE => 3;
432 use constant ENDER_REVERT_TO_ACCEPTED => 4;
435 # Function: OnPrototypeEnd
437 # Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives.
441 # type - The <TopicType> of the prototype.
442 # prototypeRef - A reference to the prototype so far, minus the ender in dispute.
443 # ender - The ender symbol.
447 # ENDER_ACCEPT - The ender is accepted and the prototype is finished.
448 # ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole
449 # if all enders are ignored before reaching the end of the code.
450 # ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might
451 # also continue on so continue parsing. If there is no accepted ender between here and
452 # the end of the code this version will be accepted instead.
453 # ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted
454 # version and end parsing.
456 sub OnPrototypeEnd #(type, prototypeRef, ender)
458 return ENDER_ACCEPT();
463 # Function: RemoveLineExtender
465 # If the passed line has a line extender, returns it without the extender or the line break that follows. If it doesn't, or there are
466 # no line extenders defined, returns the passed line unchanged.
468 sub RemoveLineExtender #(line)
470 my ($self, $line) = @_;
472 if (defined $self->LineExtender())
474 my $lineExtenderIndex = rindex($line, $self->LineExtender());
476 if ($lineExtenderIndex != -1 &&
477 substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/)
479 $line = substr($line, 0, $lineExtenderIndex) . ' ';