GPL_bin_fullset/NaturalDocs/Modules/NaturalDocs/Languages/Simple.pm

   1 ###############################################################################
   2 #
   3 #   Class: NaturalDocs::Languages::Simple
   4 #
   5 ###############################################################################
   6 #
   7 #   A class containing the characteristics of a particular programming language for basic support within Natural Docs.
   8 #   Also serves as a base class for languages that break from general conventions, such as not having parameter lists use
   9 #   parenthesis and commas.
  10 #
  11 ###############################################################################
  12
  13 # This file is part of Natural Docs, which is Copyright © 2003-2010 Greg Valure
  14 # Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
  15 # Refer to License.txt for the complete details
  16
  17 use strict;
  18 use integer;
  19
  20 package NaturalDocs::Languages::Simple;
  21
  22 use base 'NaturalDocs::Languages::Base';
  23 use base 'Exporter';
  24
  25 our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' );
  26
  27
  28 use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref',
  29                                                  'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()',
  30                                                                                               'SetBlockCommentSymbols() duparrayref',
  31                                                  'PROTOTYPE_ENDERS',
  32                                                  'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()',
  33                                                  'PACKAGE_SEPARATOR', 'PackageSeparator()',
  34                                                  'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()',
  35                                                  'ENUM_VALUES', 'EnumValues()',
  36                                                  'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()';
  37
  38 #
  39 #   Function: New
  40 #
  41 #   Creates and returns a new object.
  42 #
  43 #   Parameters:
  44 #
  45 #       name - The name of the language.
  46 #
  47 sub New #(name)
  48     {
  49     my ($selfPackage, $name) = @_;
  50
  51     my $object = $selfPackage->SUPER::New($name);
  52
  53     $object->[ENUM_VALUES] = ::ENUM_GLOBAL();
  54     $object->[PACKAGE_SEPARATOR] = '.';
  55
  56     return $object;
  57     };
  58
  59
  60 #
  61 #   Functions: Members
  62 #
  63 #   LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none.
  64 #   SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment.
  65 #   BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none.  Pairs
  66 #                                        are specified with two consecutive array entries.
  67 #   SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment.  Pairs are
  68 #                                             specified with two consecutive array entries.
  69 #   LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant.
  70 #   SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant.
  71 #   PackageSeparator - Returns the package separator symbol.
  72 #   PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default.
  73 #
  74
  75 #
  76 #   Function: SetPackageSeparator
  77 #   Replaces the language's package separator string.
  78 #
  79 sub SetPackageSeparator #(separator)
  80     {
  81     my ($self, $separator) = @_;
  82     $self->[PACKAGE_SEPARATOR] = $separator;
  83     $self->[PACKAGE_SEPARATOR_WAS_SET] = 1;
  84     };
  85
  86
  87 #
  88 #   Functions: Members
  89 #
  90 #   EnumValues - Returns the <EnumValuesType> that describes how the language handles enums.
  91 #   EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default.
  92
  93
  94 #
  95 #   Function: SetEnumValues
  96 #   Replaces the <EnumValuesType> that describes how the language handles enums.
  97 #
  98 sub SetEnumValues #(EnumValuesType newBehavior)
  99     {
 100     my ($self, $behavior) = @_;
 101     $self->[ENUM_VALUES] = $behavior;
 102     $self->[ENUM_VALUES_WAS_SET] = 1;
 103     };
 104
 105
 106 #
 107 #   Function: PrototypeEndersFor
 108 #
 109 #   Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none.
 110 #
 111 sub PrototypeEndersFor #(type)
 112     {
 113     my ($self, $type) = @_;
 114
 115     if (defined $self->[PROTOTYPE_ENDERS])
 116         {  return $self->[PROTOTYPE_ENDERS]->{$type};  }
 117     else
 118         {  return undef;  };
 119     };
 120
 121
 122 #
 123 #   Function: SetPrototypeEndersFor
 124 #
 125 #   Replaces the arrayref of prototype ender symbols for the passed <TopicType>.
 126 #
 127 sub SetPrototypeEndersFor #(type, enders)
 128     {
 129     my ($self, $type, $enders) = @_;
 130
 131     if (!defined $self->[PROTOTYPE_ENDERS])
 132         {  $self->[PROTOTYPE_ENDERS] = { };  };
 133
 134     if (!defined $enders)
 135         {  delete $self->[PROTOTYPE_ENDERS]->{$type};  }
 136     else
 137         {
 138         $self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ];
 139         };
 140     };
 141
 142
 143
 144
 145 ###############################################################################
 146 # Group: Parsing Functions
 147
 148
 149 #
 150 #   Function: ParseFile
 151 #
 152 #   Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>
 153 #   and all other sections to <OnCode()>.
 154 #
 155 #   Parameters:
 156 #
 157 #       sourceFile - The <FileName> of the source file to parse.
 158 #       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
 159 #
 160 #   Returns:
 161 #
 162 #       Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ).
 163 #
 164 sub ParseFile #(sourceFile, topicsList)
 165     {
 166     my ($self, $sourceFile, $topicsList) = @_;
 167
 168     open(SOURCEFILEHANDLE, '<' . $sourceFile)
 169         or die "Couldn't open input file " . $sourceFile . "\n";
 170
 171     my $lineReader = NaturalDocs::LineReader->New(\*SOURCEFILEHANDLE);
 172
 173     my @commentLines;
 174     my @codeLines;
 175     my $lastCommentTopicCount = 0;
 176
 177     if ($self->Name() eq 'Text File')
 178         {
 179         @commentLines = $lineReader->GetAll();
 180         NaturalDocs::Parser->OnComment(\@commentLines, 1);
 181         }
 182
 183     else
 184         {
 185         my $line = $lineReader->Get();
 186         my $lineNumber = 1;
 187
 188         while (defined $line)
 189             {
 190             my $originalLine = $line;
 191
 192
 193             # Retrieve multiline comments.  This leaves $line at the next line.
 194             # We check for multiline comments before single line comments because in Lua the symbols are --[[ and --.
 195
 196             if (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols()))
 197                 {
 198                 # Note that it is possible for a multiline comment to start correctly but not end so.  We want those comments to stay in
 199                 # the code.  For example, look at this prototype with this splint annotation:
 200                 #
 201                 # int get_array(integer_t id,
 202                 #                    /*@out@*/ array_t array);
 203                 #
 204                 # The annotation starts correctly but doesn't end so because it is followed by code on the same line.
 205
 206                 my $lineRemainder;
 207
 208                 for (;;)
 209                     {
 210                     $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol);
 211
 212                     push @commentLines, $line;
 213
 214                     #  If we found an end comment symbol...
 215                     if (defined $lineRemainder)
 216                         {  last;  };
 217
 218                     $line = $lineReader->Get();
 219
 220                     if (!defined $line)
 221                         {  last;  };
 222                     };
 223
 224                 if ($lineRemainder !~ /^[ \t]*$/)
 225                     {
 226                     # If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code.
 227                     push @codeLines, @commentLines;
 228                     @commentLines = ( );
 229                     };
 230
 231                 $line = $lineReader->Get();
 232                 }
 233
 234
 235             # Retrieve single line comments.  This leaves $line at the next line.
 236
 237             elsif ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()))
 238                 {
 239                 do
 240                     {
 241                     push @commentLines, $line;
 242                     $line = $lineReader->Get();
 243
 244                     if (!defined $line)
 245                         {  goto EndDo;  };
 246                     }
 247                 while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()));
 248
 249                 EndDo:  # I hate Perl sometimes.
 250                 }
 251
 252
 253             # Otherwise just add it to the code.
 254
 255             else
 256                 {
 257                 push @codeLines, $line;
 258                 $line = $lineReader->Get();
 259                 };
 260
 261
 262             # If there were comments, send them to Parser->OnComment().
 263
 264             if (scalar @commentLines)
 265                 {
 266                 # First process any code lines before the comment.
 267                 if (scalar @codeLines)
 268                     {
 269                     $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
 270                     $lineNumber += scalar @codeLines;
 271                     @codeLines = ( );
 272                     };
 273
 274                 $lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber);
 275                 $lineNumber += scalar @commentLines;
 276                 @commentLines = ( );
 277                 };
 278
 279             };  # while (defined $line)
 280
 281
 282         # Clean up any remaining code.
 283         if (scalar @codeLines)
 284             {
 285             $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
 286             @codeLines = ( );
 287             };
 288
 289         };
 290
 291     close(SOURCEFILEHANDLE);
 292
 293     return ( undef, undef );
 294     };
 295
 296
 297 #
 298 #   Function: OnCode
 299 #
 300 #   Called whenever a section of code is encountered by the parser.  Is used to find the prototype of the last topic created.
 301 #
 302 #   Parameters:
 303 #
 304 #       codeLines - The source code as an arrayref of lines.
 305 #       codeLineNumber - The line number of the first line of code.
 306 #       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
 307 #       lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment.
 308 #
 309 sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount)
 310     {
 311     my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_;
 312
 313     if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type()))
 314         {
 315         my $lineIndex = 0;
 316         my $prototype;
 317
 318         # Skip all blank lines before a prototype.
 319         while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/)
 320             {  $lineIndex++;  };
 321
 322         my @tokens;
 323         my $tokenIndex = 0;
 324
 325         my @brackets;
 326         my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type());
 327
 328         # Add prototype lines until we reach the end of the prototype or the end of the code lines.
 329         while ($lineIndex < scalar @$codeLines)
 330             {
 331             my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n");
 332
 333             push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g;
 334
 335             while ($tokenIndex < scalar @tokens)
 336                 {
 337                 # If we're not inside brackets, check for ender symbols.
 338                 if (!scalar @brackets)
 339                     {
 340                     my $startingIndex = 0;
 341                     my $testPrototype;
 342
 343                     for (;;)
 344                         {
 345                         my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex);
 346
 347                         if ($enderIndex == -1)
 348                             {  last;  }
 349                         else
 350                             {
 351                             # We do this here so we don't duplicate prototype for every single token.  Just the first time an ender symbol
 352                             # is found in one.
 353                             if (!defined $testPrototype)
 354                                 {  $testPrototype = $prototype;  };
 355
 356                             $testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex);
 357
 358                             my $enderResult;
 359
 360                             # If the ender is all text and the character preceding or following it is as well, ignore it.
 361                             if ($ender =~ /^[a-z0-9]+$/i &&
 362                                 ( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) ||
 363                                    substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) )
 364                                 {  $enderResult = ENDER_IGNORE();  }
 365                             else
 366                                 {  $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender);  }
 367
 368                             if ($enderResult == ENDER_IGNORE())
 369                                 {
 370                                 $testPrototype .= $ender;
 371                                 $startingIndex = $enderIndex + length($ender);
 372                                 }
 373                             elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED())
 374                                 {
 375                                 return;
 376                                 }
 377                             else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE
 378                                 {
 379                                 my $titleInPrototype = $topicList->[-1]->Title();
 380
 381                                 # Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything).
 382                                 $titleInPrototype =~ s/[\t ]*\([^\(]*$//;
 383
 384                                 if (index($testPrototype, $titleInPrototype) != -1)
 385                                     {
 386                                     $topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) );
 387                                     };
 388
 389                                 if ($enderResult == ENDER_ACCEPT())
 390                                     {  return;  }
 391                                 else # ENDER_ACCEPT_AND_CONTINUE
 392                                     {
 393                                     $testPrototype .= $ender;
 394                                     $startingIndex = $enderIndex + length($ender);
 395                                     };
 396                                 };
 397                             };
 398                         };
 399                     }
 400
 401                 # If we are inside brackets, check for closing symbols.
 402                 elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') ||
 403                          ($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') ||
 404                          ($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') ||
 405                          ($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') )
 406                     {
 407                     pop @brackets;
 408                     };
 409
 410                 # Check for opening brackets.
 411                                 if ($tokens[$tokenIndex] =~ /^[\(\[\{]$/ ||
 412                                     ($tokens[$tokenIndex] eq "<" && $tokens[$tokenIndex-1] !~ /operator[ \t]*$/) )
 413                         {
 414                     push @brackets, $tokens[$tokenIndex];
 415                     };
 416
 417                 $prototype .= $tokens[$tokenIndex];
 418                 $tokenIndex++;
 419                 };
 420
 421             $lineIndex++;
 422             };
 423
 424         # If we got out of that while loop by running out of lines, there was no prototype.
 425         };
 426     };
 427
 428
 429 use constant ENDER_ACCEPT => 1;
 430 use constant ENDER_IGNORE => 2;
 431 use constant ENDER_ACCEPT_AND_CONTINUE => 3;
 432 use constant ENDER_REVERT_TO_ACCEPTED => 4;
 433
 434 #
 435 #   Function: OnPrototypeEnd
 436 #
 437 #   Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives.
 438 #
 439 #   Parameters:
 440 #
 441 #       type - The <TopicType> of the prototype.
 442 #       prototypeRef - A reference to the prototype so far, minus the ender in dispute.
 443 #       ender - The ender symbol.
 444 #
 445 #   Returns:
 446 #
 447 #       ENDER_ACCEPT - The ender is accepted and the prototype is finished.
 448 #       ENDER_IGNORE - The ender is rejected and parsing should continue.  Note that the prototype will be rejected as a whole
 449 #                                  if all enders are ignored before reaching the end of the code.
 450 #       ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is.  However, the prototype might
 451 #                                                          also continue on so continue parsing.  If there is no accepted ender between here and
 452 #                                                          the end of the code this version will be accepted instead.
 453 #       ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed.  Use the last accepted
 454 #                                                        version and end parsing.
 455 #
 456 sub OnPrototypeEnd #(type, prototypeRef, ender)
 457     {
 458     return ENDER_ACCEPT();
 459     };
 460
 461
 462 #
 463 #   Function: RemoveLineExtender
 464 #
 465 #   If the passed line has a line extender, returns it without the extender or the line break that follows.  If it doesn't, or there are
 466 #   no line extenders defined, returns the passed line unchanged.
 467 #
 468 sub RemoveLineExtender #(line)
 469     {
 470     my ($self, $line) = @_;
 471
 472     if (defined $self->LineExtender())
 473         {
 474         my $lineExtenderIndex = rindex($line, $self->LineExtender());
 475
 476         if ($lineExtenderIndex != -1 &&
 477             substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/)
 478             {
 479             $line = substr($line, 0, $lineExtenderIndex) . ' ';
 480             };
 481         };
 482
 483     return $line;
 484     };
 485
 486
 487 1;