1 ###############################################################################
3 # Package: NaturalDocs::Parser
5 ###############################################################################
7 # A package that coordinates source file parsing between the <NaturalDocs::Languages::Base>-derived objects and its own
8 # sub-packages such as <NaturalDocs::Parser::Native>. Also handles sending symbols to <NaturalDocs::SymbolTable> and
9 # other generic topic processing.
11 # Usage and Dependencies:
13 # - Prior to use, <NaturalDocs::Settings>, <NaturalDocs::Languages>, <NaturalDocs::Project>, <NaturalDocs::SymbolTable>,
14 # and <NaturalDocs::ClassHierarchy> must be initialized. <NaturalDocs::SymbolTable> and <NaturalDocs::ClassHierarchy>
15 # do not have to be fully resolved.
17 # - Aside from that, the package is ready to use right away. It does not have its own initialization function.
19 ###############################################################################
21 # This file is part of Natural Docs, which is Copyright © 2003-2010 Greg Valure
22 # Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
23 # Refer to License.txt for the complete details
25 use NaturalDocs::Parser::ParsedTopic;
26 use NaturalDocs::Parser::Native;
27 use NaturalDocs::Parser::JavaDoc;
32 package NaturalDocs::Parser;
36 ###############################################################################
43 # The source <FileName> currently being parsed.
50 # The language object for the file, derived from <NaturalDocs::Languages::Base>.
57 # An array of <NaturalDocs::Parser::ParsedTopic> objects.
63 # bool: parsingForInformation
64 # Whether <ParseForInformation()> was called. If false, then <ParseForBuild()> was called.
66 my $parsingForInformation;
70 ###############################################################################
74 # Function: ParseForInformation
76 # Parses the input file for information. Will update the information about the file in <NaturalDocs::SymbolTable> and
77 # <NaturalDocs::Project>.
81 # file - The <FileName> to parse.
83 sub ParseForInformation #(file)
85 my ($self, $file) = @_;
88 $parsingForInformation = 1;
90 # Watch this parse so we detect any changes.
91 NaturalDocs::SymbolTable->WatchFileForChanges($sourceFile);
92 NaturalDocs::ClassHierarchy->WatchFileForChanges($sourceFile);
93 NaturalDocs::SourceDB->WatchFileForChanges($sourceFile);
95 my $defaultMenuTitle = $self->Parse();
97 foreach my $topic (@parsedFile)
99 # Add a symbol for the topic.
101 my $type = $topic->Type();
102 if ($type eq ::TOPIC_ENUMERATION())
103 { $type = ::TOPIC_TYPE(); };
105 NaturalDocs::SymbolTable->AddSymbol($topic->Symbol(), $sourceFile, $type,
106 $topic->Prototype(), $topic->Summary());
109 # You can't put the function call directly in a while with a regex. It has to sit in a variable to work.
110 my $body = $topic->Body();
113 # If it's a list or enum topic, add a symbol for each description list entry.
115 if ($topic->IsList() || $topic->Type() eq ::TOPIC_ENUMERATION())
117 # We'll hijack the enum constants to apply to non-enum behavior too.
120 if ($topic->Type() eq ::TOPIC_ENUMERATION())
122 $type = ::TOPIC_CONSTANT();
123 $behavior = $language->EnumValues();
125 elsif (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() == ::SCOPE_ALWAYS_GLOBAL())
127 $behavior = ::ENUM_GLOBAL();
131 $behavior = ::ENUM_UNDER_PARENT();
134 while ($body =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g)
136 my ($listTextSymbol, $listSummary) = ($1, $2);
138 $listTextSymbol = NaturalDocs::NDMarkup->RestoreAmpChars($listTextSymbol);
139 my $listSymbol = NaturalDocs::SymbolString->FromText($listTextSymbol);
141 if ($behavior == ::ENUM_UNDER_PARENT())
142 { $listSymbol = NaturalDocs::SymbolString->Join($topic->Package(), $listSymbol); }
143 elsif ($behavior == ::ENUM_UNDER_TYPE())
144 { $listSymbol = NaturalDocs::SymbolString->Join($topic->Symbol(), $listSymbol); };
146 NaturalDocs::SymbolTable->AddSymbol($listSymbol, $sourceFile, $type, undef,
147 $self->GetSummaryFromDescriptionList($listSummary));
152 # Add references in the topic.
154 while ($body =~ /<link target=\"([^\"]*)\" name=\"[^\"]*\" original=\"[^\"]*\">/g)
156 my $linkText = NaturalDocs::NDMarkup->RestoreAmpChars($1);
157 my $linkSymbol = NaturalDocs::SymbolString->FromText($linkText);
159 NaturalDocs::SymbolTable->AddReference(::REFERENCE_TEXT(), $linkSymbol,
160 $topic->Package(), $topic->Using(), $sourceFile);
164 # Add images in the topic.
166 while ($body =~ /<img mode=\"[^\"]*\" target=\"([^\"]+)\" original=\"[^\"]*\">/g)
168 my $target = NaturalDocs::NDMarkup->RestoreAmpChars($1);
169 NaturalDocs::ImageReferenceTable->AddReference($sourceFile, $target);
173 # Handle any changes to the file.
174 NaturalDocs::ClassHierarchy->AnalyzeChanges();
175 NaturalDocs::SymbolTable->AnalyzeChanges();
176 NaturalDocs::SourceDB->AnalyzeWatchedFileChanges();
178 # Update project on the file's characteristics.
179 my $hasContent = (scalar @parsedFile > 0);
181 NaturalDocs::Project->SetHasContent($sourceFile, $hasContent);
183 { NaturalDocs::Project->SetDefaultMenuTitle($sourceFile, $defaultMenuTitle); };
185 # We don't need to keep this around.
191 # Function: ParseForBuild
193 # Parses the input file for building, returning it as a <NaturalDocs::Parser::ParsedTopic> arrayref.
195 # Note that all new and changed files should be parsed for symbols via <ParseForInformation()> before calling this function on
196 # *any* file. The reason is that <NaturalDocs::SymbolTable> needs to know about all the symbol definitions and references to
197 # resolve them properly.
201 # file - The <FileName> to parse for building.
205 # An arrayref of the source file as <NaturalDocs::Parser::ParsedTopic> objects.
207 sub ParseForBuild #(file)
209 my ($self, $file) = @_;
212 $parsingForInformation = undef;
222 ###############################################################################
223 # Group: Interface Functions
227 # Function: OnComment
229 # The function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a comment
230 # suitable for documentation.
234 # commentLines - An arrayref of the comment's lines. The language's comment symbols should be converted to spaces,
235 # and there should be no line break characters at the end of each line. *The original memory will be
237 # lineNumber - The line number of the first of the comment lines.
238 # isJavaDoc - Whether the comment is in JavaDoc format.
242 # The number of topics created by this comment, or zero if none.
244 sub OnComment #(string[] commentLines, int lineNumber, bool isJavaDoc)
246 my ($self, $commentLines, $lineNumber, $isJavaDoc) = @_;
248 $self->CleanComment($commentLines);
250 # We check if it's definitely Natural Docs content first. This overrides all else, since it's possible that a comment could start
251 # with a topic line yet have something that looks like a JavaDoc tag. Natural Docs wins in this case.
252 if (NaturalDocs::Parser::Native->IsMine($commentLines, $isJavaDoc))
253 { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); }
255 elsif (NaturalDocs::Parser::JavaDoc->IsMine($commentLines, $isJavaDoc))
256 { return NaturalDocs::Parser::JavaDoc->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); }
258 # If the content is ambiguous and it's a JavaDoc-styled comment, treat it as Natural Docs content.
260 { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); }
267 # A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a class declaration.
271 # class - The <SymbolString> of the class encountered.
275 my ($self, $class) = @_;
277 if ($parsingForInformation)
278 { NaturalDocs::ClassHierarchy->AddClass($sourceFile, $class); };
283 # Function: OnClassParent
285 # A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a declaration of
290 # class - The <SymbolString> of the class we're in.
291 # parent - The <SymbolString> of the class it inherits.
292 # scope - The package <SymbolString> that the reference appeared in.
293 # using - An arrayref of package <SymbolStrings> that the reference has access to via "using" statements.
294 # resolvingFlags - Any <Resolving Flags> to be used when resolving the reference. <RESOLVE_NOPLURAL> is added
295 # automatically since that would never apply to source code.
297 sub OnClassParent #(class, parent, scope, using, resolvingFlags)
299 my ($self, $class, $parent, $scope, $using, $resolvingFlags) = @_;
301 if ($parsingForInformation)
303 NaturalDocs::ClassHierarchy->AddParentReference($sourceFile, $class, $parent, $scope, $using,
304 $resolvingFlags | ::RESOLVE_NOPLURAL());
310 ###############################################################################
311 # Group: Support Functions
316 # Opens the source file and parses process. Most of the actual parsing is done in <NaturalDocs::Languages::Base->ParseFile()>
317 # and <OnComment()>, though.
319 # *Do not call externally.* Rather, call <ParseForInformation()> or <ParseForBuild()>.
323 # The default menu title of the file. Will be the <FileName> if nothing better is found.
329 NaturalDocs::Error->OnStartParsing($sourceFile);
331 $language = NaturalDocs::Languages->LanguageOf($sourceFile);
332 NaturalDocs::Parser::Native->Start();
335 my ($autoTopics, $scopeRecord) = $language->ParseFile($sourceFile, \@parsedFile);
338 $self->AddToClassHierarchy();
342 if (defined $autoTopics)
344 if (defined $scopeRecord)
345 { $self->RepairPackages($autoTopics, $scopeRecord); };
347 $self->MergeAutoTopics($language, $autoTopics);
350 $self->RemoveRemainingHeaderlessTopics();
353 # We don't need to do this if there aren't any auto-topics because the only package changes would be implied by the comments.
354 if (defined $autoTopics)
355 { $self->AddPackageDelineators(); };
357 if (!NaturalDocs::Settings->NoAutoGroup())
358 { $self->MakeAutoGroups($autoTopics); };
361 # Set the menu title.
363 my $defaultMenuTitle = $sourceFile;
365 if (scalar @parsedFile)
369 if (NaturalDocs::Settings->OnlyFileTitles())
371 # We still want to use the title from the topics if the first one is a file.
372 if ($parsedFile[0]->Type() eq ::TOPIC_FILE())
373 { $addFileTitle = 0; }
375 { $addFileTitle = 1; };
377 elsif (scalar @parsedFile == 1 || NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst())
378 { $addFileTitle = 0; }
380 { $addFileTitle = 1; };
384 $defaultMenuTitle = $parsedFile[0]->Title();
388 # If the title ended up being the file name, add a leading section for it.
391 NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FILE(), (NaturalDocs::File->SplitPath($sourceFile))[2],
392 undef, undef, undef, undef, undef, 1, undef);
396 NaturalDocs::Error->OnEndParsing($sourceFile);
398 return $defaultMenuTitle;
403 # Function: CleanComment
405 # Removes any extraneous formatting and whitespace from the comment. Eliminates comment boxes, horizontal lines, trailing
406 # whitespace from lines, and expands all tab characters. It keeps leading whitespace, though, since it may be needed for
407 # example code, and blank lines, since the original line numbers are needed.
411 # commentLines - An arrayref of the comment lines to clean. *The original memory will be changed.* Lines should have the
412 # language's comment symbols replaced by spaces and not have a trailing line break.
414 sub CleanComment #(commentLines)
416 my ($self, $commentLines) = @_;
418 use constant DONT_KNOW => 0;
419 use constant IS_UNIFORM => 1;
420 use constant IS_UNIFORM_IF_AT_END => 2;
421 use constant IS_NOT_UNIFORM => 3;
423 my $leftSide = DONT_KNOW;
424 my $rightSide = DONT_KNOW;
429 my $tabLength = NaturalDocs::Settings->TabLength();
431 while ($index < scalar @$commentLines)
433 # Strip trailing whitespace from the original.
435 $commentLines->[$index] =~ s/[ \t]+$//;
438 # Expand tabs in the original. This method is almost six times faster than Text::Tabs' method.
440 my $tabIndex = index($commentLines->[$index], "\t");
442 while ($tabIndex != -1)
444 substr( $commentLines->[$index], $tabIndex, 1, ' ' x ($tabLength - ($tabIndex % $tabLength)) );
445 $tabIndex = index($commentLines->[$index], "\t", $tabIndex);
449 # Make a working copy and strip leading whitespace as well. This has to be done after tabs are expanded because
450 # stripping indentation could change how far tabs are expanded.
452 my $line = $commentLines->[$index];
455 # If the line is blank...
458 # If we have a potential vertical line, this only acceptable if it's at the end of the comment.
459 if ($leftSide == IS_UNIFORM)
460 { $leftSide = IS_UNIFORM_IF_AT_END; };
461 if ($rightSide == IS_UNIFORM)
462 { $rightSide = IS_UNIFORM_IF_AT_END; };
465 # If there's at least four symbols in a row, it's a horizontal line. The second regex supports differing edge characters. It
466 # doesn't matter if any of this matches the left and right side symbols. The length < 256 is a sanity check, because that
467 # regexp has caused the perl regexp engine to choke on an insane line someone sent me from an automatically generated
468 # file. It had over 10k characters on the first line, and most of them were 0x00.
469 elsif ($line =~ /^([^a-zA-Z0-9 ])\1{3,}$/ ||
470 (length $line < 256 && $line =~ /^([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/) )
472 # Ignore it. This has no effect on the vertical line detection. We want to keep it in the output though in case it was
476 # If the line is not blank or a horizontal line...
479 # More content means any previous blank lines are no longer tolerated in vertical line detection. They are only
480 # acceptable at the end of the comment.
482 if ($leftSide == IS_UNIFORM_IF_AT_END)
483 { $leftSide = IS_NOT_UNIFORM; };
484 if ($rightSide == IS_UNIFORM_IF_AT_END)
485 { $rightSide = IS_NOT_UNIFORM; };
488 # Detect vertical lines. Lines are only lines if they are followed by whitespace or a connected horizontal line.
489 # Otherwise we may accidentally detect lines from short comments that just happen to have every first or last
490 # character the same.
492 if ($leftSide != IS_NOT_UNIFORM)
494 if ($line =~ /^([^a-zA-Z0-9])\1*(?: |$)/)
496 if ($leftSide == DONT_KNOW)
498 $leftSide = IS_UNIFORM;
501 else # ($leftSide == IS_UNIFORM) Other choices already ruled out.
503 if ($leftSideChar ne $1)
504 { $leftSide = IS_NOT_UNIFORM; };
507 # We'll tolerate the lack of symbols on the left on the first line, because it may be a
508 # /* Function: Whatever
511 # comment which would have the leading /* blanked out.
514 $leftSide = IS_NOT_UNIFORM;
518 if ($rightSide != IS_NOT_UNIFORM)
520 if ($line =~ / ([^a-zA-Z0-9])\1*$/)
522 if ($rightSide == DONT_KNOW)
524 $rightSide = IS_UNIFORM;
527 else # ($rightSide == IS_UNIFORM) Other choices already ruled out.
529 if ($rightSideChar ne $1)
530 { $rightSide = IS_NOT_UNIFORM; };
535 $rightSide = IS_NOT_UNIFORM;
539 # We'll remove vertical lines later if they're uniform throughout the entire comment.
546 if ($leftSide == IS_UNIFORM_IF_AT_END)
547 { $leftSide = IS_UNIFORM; };
548 if ($rightSide == IS_UNIFORM_IF_AT_END)
549 { $rightSide = IS_UNIFORM; };
553 my $inCodeSection = 0;
555 while ($index < scalar @$commentLines)
557 # Clear horizontal lines only if we're not in a code section.
558 if ($commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1{3,}$/ ||
559 ( length $commentLines->[$index] < 256 &&
560 $commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/ ) )
563 { $commentLines->[$index] = ''; }
568 # Clear vertical lines.
570 if ($leftSide == IS_UNIFORM)
572 # This works because every line should either start this way, be blank, or be the first line that doesn't start with a
574 $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*//;
577 if ($rightSide == IS_UNIFORM)
579 $commentLines->[$index] =~ s/ *([^a-zA-Z0-9 ])\1*$//;
583 # Clear horizontal lines again if there were vertical lines. This catches lines that were separated from the verticals by
586 if (($leftSide == IS_UNIFORM || $rightSide == IS_UNIFORM) && !$inCodeSection)
588 $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1{3,}$//;
589 $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$//;
593 # Check for the start and end of code sections. Note that this doesn't affect vertical line removal.
595 if (!$inCodeSection &&
596 $commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i )
600 elsif ($inCodeSection &&
601 $commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
615 ###############################################################################
616 # Group: Processing Functions
620 # Function: RepairPackages
622 # Recalculates the packages for all comment topics using the auto-topics and the scope record. Call this *before* calling
623 # <MergeAutoTopics()>.
627 # autoTopics - A reference to the list of automatically generated <NaturalDocs::Parser::ParsedTopics>.
628 # scopeRecord - A reference to an array of <NaturalDocs::Languages::Advanced::ScopeChanges>.
630 sub RepairPackages #(autoTopics, scopeRecord)
632 my ($self, $autoTopics, $scopeRecord) = @_;
635 my $autoTopicIndex = 0;
638 my $topic = $parsedFile[0];
639 my $autoTopic = $autoTopics->[0];
640 my $scopeChange = $scopeRecord->[0];
645 while (defined $topic)
647 # First update the scope via the record if its defined and has the lowest line number.
648 if (defined $scopeChange &&
649 $scopeChange->LineNumber() <= $topic->LineNumber() &&
650 (!defined $autoTopic || $scopeChange->LineNumber() <= $autoTopic->LineNumber()) )
652 $currentPackage = $scopeChange->Scope();
654 $scopeChange = $scopeRecord->[$scopeIndex]; # Will be undef when past end.
655 $inFakePackage = undef;
658 # Next try to end a fake scope with an auto topic if its defined and has the lowest line number.
659 elsif (defined $autoTopic &&
660 $autoTopic->LineNumber() <= $topic->LineNumber())
664 $currentPackage = $autoTopic->Package();
665 $inFakePackage = undef;
669 $autoTopic = $autoTopics->[$autoTopicIndex]; # Will be undef when past end.
673 # Finally try to handle the topic, since it has the lowest line number. Check for Type() because headerless topics won't have
679 { $scope = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); }
681 { $scope = ::SCOPE_NORMAL(); };
683 if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
685 # They should already have the correct class and scope.
686 $currentPackage = $topic->Package();
691 # Fix the package of everything else.
693 # Note that the first function or variable topic to appear in a fake package will assume that package even if it turns out
694 # to be incorrect in the actual code, since the topic will come before the auto-topic. This will be corrected in
697 $topic->SetPackage($currentPackage);
701 $topic = $parsedFile[$topicIndex]; # Will be undef when past end.
709 # Function: MergeAutoTopics
711 # Merges the automatically generated topics into the file. If an auto-topic matches an existing topic, it will have it's prototype
712 # and package transferred. If it doesn't, the auto-topic will be inserted into the list unless
713 # <NaturalDocs::Settings->DocumentedOnly()> is set. If an existing topic doesn't have a title, it's assumed to be a headerless
714 # comment and will be merged with the next auto-topic or discarded.
718 # language - The <NaturalDocs::Languages::Base>-derived class for the file.
719 # autoTopics - A reference to the list of automatically generated topics.
721 sub MergeAutoTopics #(language, autoTopics)
723 my ($self, $language, $autoTopics) = @_;
726 my $autoTopicIndex = 0;
728 # Keys are topic types, values are existence hashrefs of titles.
731 while ($topicIndex < scalar @parsedFile && $autoTopicIndex < scalar @$autoTopics)
733 my $topic = $parsedFile[$topicIndex];
734 my $autoTopic = $autoTopics->[$autoTopicIndex];
736 my $cleanTitle = $topic->Title();
737 $cleanTitle =~ s/[\t ]*\([^\(]*$//;
739 # Add the auto-topic if it's higher in the file than the current topic.
740 if ($autoTopic->LineNumber() < $topic->LineNumber())
742 if (exists $topicsInLists{$autoTopic->Type()} &&
743 exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()})
745 # Remove it from the list so a second one with the same name will be added.
746 delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()};
748 elsif (!NaturalDocs::Settings->DocumentedOnly())
750 splice(@parsedFile, $topicIndex, 0, $autoTopic);
757 # Remove a headerless topic if there's another topic between it and the next auto-topic.
758 elsif (!$topic->Title() && $topicIndex + 1 < scalar @parsedFile &&
759 $parsedFile[$topicIndex+1]->LineNumber() < $autoTopic->LineNumber())
761 splice(@parsedFile, $topicIndex, 1);
764 # Transfer information if we have a match or a headerless topic.
765 elsif ( !$topic->Title() ||
766 $topic->Symbol() eq $autoTopic->Symbol() ||
767 ( $topic->Type() == $autoTopic->Type() &&
768 ( index($autoTopic->Title(), $cleanTitle) != -1 || index($cleanTitle, $autoTopic->Title()) != -1 ) ) )
770 $topic->SetType($autoTopic->Type());
771 $topic->SetPrototype($autoTopic->Prototype());
772 $topic->SetUsing($autoTopic->Using());
774 if (!$topic->Title())
775 { $topic->SetTitle($autoTopic->Title()); };
777 if (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() != ::SCOPE_START())
778 { $topic->SetPackage($autoTopic->Package()); }
779 elsif ($autoTopic->Package() ne $topic->Package())
781 my @autoPackageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($autoTopic->Package());
782 my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($topic->Package());
784 while (scalar @autoPackageIdentifiers && $autoPackageIdentifiers[-1] eq $packageIdentifiers[-1])
786 pop @autoPackageIdentifiers;
787 pop @packageIdentifiers;
790 if (scalar @autoPackageIdentifiers)
791 { $topic->SetPackage( NaturalDocs::SymbolString->Join(@autoPackageIdentifiers) ); };
798 # Extract topics in lists.
799 elsif ($topic->IsList())
801 if (!exists $topicsInLists{$topic->Type()})
802 { $topicsInLists{$topic->Type()} = { }; };
804 my $body = $topic->Body();
806 while ($body =~ /<ds>([^<]+)<\/ds>/g)
807 { $topicsInLists{$topic->Type()}->{NaturalDocs::NDMarkup->RestoreAmpChars($1)} = 1; };
812 # Otherwise there's no match. Skip the topic. The auto-topic will be added later.
819 # Add any auto-topics remaining.
820 if (!NaturalDocs::Settings->DocumentedOnly())
822 while ($autoTopicIndex < scalar @$autoTopics)
824 my $autoTopic = $autoTopics->[$autoTopicIndex];
826 if (exists $topicsInLists{$autoTopic->Type()} &&
827 exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()})
829 # Remove it from the list so a second one with the same name will be added.
830 delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()};
834 push(@parsedFile, $autoTopic);
844 # Function: RemoveRemainingHeaderlessTopics
846 # After <MergeAutoTopics()> is done, this function removes any remaining headerless topics from the file. If they don't merge
847 # into anything, they're not valid topics.
849 sub RemoveRemainingHeaderlessTopics
854 while ($index < scalar @parsedFile)
856 if ($parsedFile[$index]->Title())
859 { splice(@parsedFile, $index, 1); };
865 # Function: MakeAutoGroups
867 # Creates group topics for files that do not have them.
873 # No groups only one topic.
874 if (scalar @parsedFile < 2)
878 my $startStretch = 0;
880 # Skip the first entry if its the page title.
881 if (NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst())
887 # Make auto-groups for each stretch between scope-altering topics.
888 while ($index < scalar @parsedFile)
890 my $scope = NaturalDocs::Topics->TypeInfo($parsedFile[$index]->Type())->Scope();
892 if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
894 if ($index > $startStretch)
895 { $index += $self->MakeAutoGroupsFor($startStretch, $index); };
897 $startStretch = $index + 1;
903 if ($index > $startStretch)
904 { $self->MakeAutoGroupsFor($startStretch, $index); };
909 # Function: MakeAutoGroupsFor
911 # Creates group topics for sections of files that do not have them. A support function for <MakeAutoGroups()>.
915 # startIndex - The index to start at.
916 # endIndex - The index to end at. Not inclusive.
920 # The number of group topics added.
922 sub MakeAutoGroupsFor #(startIndex, endIndex)
924 my ($self, $startIndex, $endIndex) = @_;
926 # No groups if any are defined already.
927 for (my $i = $startIndex; $i < $endIndex; $i++)
929 if ($parsedFile[$i]->Type() eq ::TOPIC_GROUP())
934 use constant COUNT => 0;
935 use constant TYPE => 1;
936 use constant SECOND_TYPE => 2;
937 use constant SIZE => 3;
939 # This is an array of ( count, type, secondType ) triples. Count and Type will always be filled in; count is the number of
940 # consecutive topics of type. On the second pass, if small groups are combined secondType will be filled in. There will not be
941 # more than two types per group.
946 # First pass: Determine all the groups.
951 while ($i < $endIndex)
953 if (!defined $currentType || ($parsedFile[$i]->Type() ne $currentType && $parsedFile[$i]->Type() ne ::TOPIC_GENERIC()) )
955 if (defined $currentType)
956 { $groupIndex += SIZE; };
958 $currentType = $parsedFile[$i]->Type();
960 $groups[$groupIndex + COUNT] = 1;
961 $groups[$groupIndex + TYPE] = $currentType;
964 { $groups[$groupIndex + COUNT]++; };
970 # Second pass: Combine groups based on "noise". Noise means types go from A to B to A at least once, and there are at least
971 # two groups in a row with three or less, and at least one of those groups is two or less. So 3, 3, 3 doesn't count as noise, but
976 # While there are at least three groups left...
977 while ($groupIndex < scalar @groups - (2 * SIZE))
979 # If the group two places in front of this one has the same type...
980 if ($groups[$groupIndex + (2 * SIZE) + TYPE] eq $groups[$groupIndex + TYPE])
982 # It means we went from A to B to A, which partially qualifies as noise.
984 my $firstType = $groups[$groupIndex + TYPE];
985 my $secondType = $groups[$groupIndex + SIZE + TYPE];
987 if (NaturalDocs::Topics->TypeInfo($firstType)->CanGroupWith($secondType) ||
988 NaturalDocs::Topics->TypeInfo($secondType)->CanGroupWith($firstType))
995 my $endIndex = $groupIndex;
997 while ($endIndex < scalar @groups &&
998 ($groups[$endIndex + TYPE] eq $firstType || $groups[$endIndex + TYPE] eq $secondType))
1000 if ($groups[$endIndex + COUNT] > 3)
1002 # They must be consecutive to count.
1006 elsif ($groups[$endIndex + COUNT] == 3)
1015 if ($hasThrees || $hasTwosOrOnes)
1026 $groupIndex = $endIndex - SIZE;
1030 $groups[$groupIndex + SECOND_TYPE] = $secondType;
1032 for (my $noiseIndex = $groupIndex + SIZE; $noiseIndex < $endIndex; $noiseIndex += SIZE)
1034 $groups[$groupIndex + COUNT] += $groups[$noiseIndex + COUNT];
1037 splice(@groups, $groupIndex + SIZE, $endIndex - $groupIndex - SIZE);
1039 $groupIndex += SIZE;
1043 else # They can't group together
1045 $groupIndex += SIZE;
1050 { $groupIndex += SIZE; };
1054 # Finally, create group topics for the parsed file.
1059 while ($groupIndex < scalar @groups)
1061 if ($groups[$groupIndex + TYPE] ne ::TOPIC_GENERIC())
1063 my $topic = $parsedFile[$i];
1064 my $title = NaturalDocs::Topics->NameOfType($groups[$groupIndex + TYPE], 1);
1066 if (defined $groups[$groupIndex + SECOND_TYPE])
1067 { $title .= ' and ' . NaturalDocs::Topics->NameOfType($groups[$groupIndex + SECOND_TYPE], 1); };
1069 splice(@parsedFile, $i, 0, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_GROUP(),
1071 $topic->Package(), $topic->Using(),
1072 undef, undef, undef,
1073 $topic->LineNumber()) );
1077 $i += $groups[$groupIndex + COUNT];
1078 $groupIndex += SIZE;
1081 return (scalar @groups / SIZE);
1086 # Function: AddToClassHierarchy
1088 # Adds any class topics to the class hierarchy, since they may not have been called with <OnClass()> if they didn't match up to
1091 sub AddToClassHierarchy
1095 foreach my $topic (@parsedFile)
1097 if ($topic->Type() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->ClassHierarchy())
1099 if ($topic->IsList())
1101 my $body = $topic->Body();
1103 while ($body =~ /<ds>([^<]+)<\/ds>/g)
1105 $self->OnClass( NaturalDocs::SymbolString->FromText( NaturalDocs::NDMarkup->RestoreAmpChars($1) ) );
1110 $self->OnClass($topic->Package());
1118 # Function: AddPackageDelineators
1120 # Adds section and class topics to make sure the package is correctly represented in the documentation. Should be called last in
1123 sub AddPackageDelineators
1130 # Values are the arrayref [ title, type ];
1133 while ($index < scalar @parsedFile)
1135 my $topic = $parsedFile[$index];
1137 if ($topic->Package() ne $currentPackage)
1139 $currentPackage = $topic->Package();
1140 my $scopeType = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope();
1142 if ($scopeType == ::SCOPE_START())
1144 $usedPackages{$currentPackage} = [ $topic->Title(), $topic->Type() ];
1146 elsif ($scopeType == ::SCOPE_END())
1150 if (!defined $currentPackage)
1152 $newTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_SECTION(), 'Global',
1154 undef, undef, undef,
1155 $topic->LineNumber(), undef);
1159 my ($title, $body, $summary, $type);
1160 my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($currentPackage);
1162 if (exists $usedPackages{$currentPackage})
1164 $title = $usedPackages{$currentPackage}->[0];
1165 $type = $usedPackages{$currentPackage}->[1];
1166 $body = '<p>(continued)</p>';
1167 $summary = '(continued)';
1171 $title = join($language->PackageSeparator(), @packageIdentifiers);
1172 $type = ::TOPIC_CLASS();
1174 # Body and summary stay undef.
1176 $usedPackages{$currentPackage} = $title;
1179 my @titleIdentifiers = NaturalDocs::SymbolString->IdentifiersOf( NaturalDocs::SymbolString->FromText($title) );
1180 for (my $i = 0; $i < scalar @titleIdentifiers; $i++)
1181 { pop @packageIdentifiers; };
1183 $newTopic = NaturalDocs::Parser::ParsedTopic->New($type, $title,
1184 NaturalDocs::SymbolString->Join(@packageIdentifiers), undef,
1185 undef, $summary, $body,
1186 $topic->LineNumber(), undef);
1189 splice(@parsedFile, $index, 0, $newTopic);
1200 # Function: BreakLists
1202 # Breaks list topics into individual topics.
1210 while ($index < scalar @parsedFile)
1212 my $topic = $parsedFile[$index];
1214 if ($topic->IsList() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->BreakLists())
1216 my $body = $topic->Body();
1225 my $startList = index($body, '<dl>', $bodyIndex);
1227 if ($startList == -1)
1230 $newBody .= substr($body, $bodyIndex, $startList - $bodyIndex);
1232 my $endList = index($body, '</dl>', $startList);
1233 my $listBody = substr($body, $startList, $endList - $startList);
1235 while ($listBody =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g)
1237 my ($symbol, $description) = ($1, $2);
1239 push @newTopics, NaturalDocs::Parser::ParsedTopic->New( $topic->Type(), $symbol, $topic->Package(),
1240 $topic->Using(), undef,
1241 $self->GetSummaryFromDescriptionList($description),
1242 '<p>' . $description . '</p>', $topic->LineNumber(),
1246 $bodyIndex = $endList + 5;
1249 $newBody .= substr($body, $bodyIndex);
1251 # Remove trailing headings.
1252 $newBody =~ s/(?:<h>[^<]+<\/h>)+$//;
1254 # Remove empty headings.
1255 $newBody =~ s/(?:<h>[^<]+<\/h>)+(<h>[^<]+<\/h>)/$1/g;
1259 unshift @newTopics, NaturalDocs::Parser::ParsedTopic->New( ::TOPIC_GROUP(), $topic->Title(), $topic->Package(),
1260 $topic->Using(), undef,
1261 $self->GetSummaryFromBody($newBody), $newBody,
1262 $topic->LineNumber(), undef );
1265 splice(@parsedFile, $index, 1, @newTopics);
1267 $index += scalar @newTopics;
1277 # Function: GetSummaryFromBody
1279 # Returns the summary text from the topic body.
1283 # body - The complete topic body, in <NDMarkup>.
1287 # The topic summary, or undef if none.
1289 sub GetSummaryFromBody #(body)
1291 my ($self, $body) = @_;
1295 # Extract the first sentence from the leading paragraph, if any. We'll tolerate a single header beforehand, but nothing else.
1297 if ($body =~ /^(?:<h>[^<]*<\/h>)?<p>(.*?)(<\/p>|[\.\!\?](?:[\)\}\'\ ]|"|>))/x)
1302 { $summary .= $2; };
1310 # Function: GetSummaryFromDescriptionList
1312 # Returns the summary text from a description list entry.
1316 # description - The description in <NDMarkup>. Should be the content between the <dd></dd> tags only.
1320 # The description summary, or undef if none.
1322 sub GetSummaryFromDescriptionList #(description)
1324 my ($self, $description) = @_;
1328 if ($description =~ /^(.*?)($|[\.\!\?](?:[\)\}\'\ ]|"|>))/)
1329 { $summary = $1 . $2; };