1 ###############################################################################
3 # Package: NaturalDocs::SymbolString
5 ###############################################################################
7 # A package to manage <SymbolString> handling throughout the program.
9 ###############################################################################
11 # This file is part of Natural Docs, which is Copyright © 2003-2010 Greg Valure
12 # Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
13 # Refer to License.txt for the complete details
18 package NaturalDocs::SymbolString;
20 use Encode qw(encode_utf8 decode_utf8);
26 # Extracts and returns a <SymbolString> from plain text.
28 # This should be the only way to get a <SymbolString> from plain text, as the splitting and normalization must be consistent
29 # throughout the application.
31 sub FromText #(string textSymbol)
33 my ($self, $textSymbol) = @_;
35 # The internal format of a symbol is all the normalized identifiers separated by 0x1F characters.
37 # Convert whitespace and reserved characters to spaces, and condense multiple consecutive ones.
38 $textSymbol =~ tr/ \t\r\n\x1C\x1D\x1E\x1F/ /s;
40 # DEPENDENCY: ReferenceString->MakeFrom() assumes all 0x1E characters were removed.
41 # DEPENDENCY: ReferenceString->MakeFrom() assumes this encoding doesn't use 0x1E characters.
43 # Remove spaces unless they're separating two alphanumeric/underscore characters.
44 $textSymbol =~ s/^ //;
45 $textSymbol =~ s/ $//;
46 $textSymbol =~ s/(\W) /$1/g;
47 $textSymbol =~ s/ (\W)/$1/g;
49 # Remove trailing empty parenthesis, so Function and Function() are equivalent.
50 $textSymbol =~ s/\(\)$//;
52 # Split the string into pieces.
53 my @pieces = split(/(\.|::|->)/, $textSymbol);
56 my $lastWasSeparator = 1;
58 foreach my $piece (@pieces)
60 if ($piece =~ /^(?:\.|::|->)$/)
62 if (!$lastWasSeparator)
64 $symbolString .= "\x1F";
65 $lastWasSeparator = 1;
70 $symbolString .= $piece;
71 $lastWasSeparator = 0;
76 $symbolString =~ s/\x1F$//;
85 # Converts a <SymbolString> to text, using the passed separator.
87 sub ToText #(SymbolString symbolString, string separator)
89 my ($self, $symbolString, $separator) = @_;
91 my @identifiers = $self->IdentifiersOf($symbolString);
92 return join($separator, @identifiers);
97 # Function: ToBinaryFile
99 # Writes a <SymbolString> to the passed filehandle. Can also encode an undef.
103 # fileHandle - The filehandle to write to.
104 # symbol - The <SymbolString> to write, or undef.
108 # > [UInt8: number of identifiers]
109 # > [UString16: identifier] [UString16: identifier] ...
111 # Undef is represented by a zero for the number of identifiers.
113 sub ToBinaryFile #(FileHandle fileHandle, SymbolString symbol)
115 my ($self, $fileHandle, $symbol) = @_;
119 { @identifiers = $self->IdentifiersOf($symbol); };
121 print $fileHandle pack('C', scalar @identifiers);
123 foreach my $identifier (@identifiers)
125 my $uIdentifier = encode_utf8($identifier);
126 print $fileHandle pack('na*', length($uIdentifier), $uIdentifier);
132 # Function: FromBinaryFile
134 # Loads a <SymbolString> or undef from the filehandle and returns it.
138 # fileHandle - The filehandle to read from.
142 # The <SymbolString> or undef.
146 # See <ToBinaryFile()> for format and dependencies.
148 sub FromBinaryFile #(FileHandle fileHandle)
150 my ($self, $fileHandle) = @_;
154 # [UInt8: number of identifiers or 0 if none]
156 read($fileHandle, $raw, 1);
157 my $identifierCount = unpack('C', $raw);
161 while ($identifierCount)
163 # [UString16: identifier] [UString16: identifier] ...
165 read($fileHandle, $raw, 2);
166 my $identifierLength = unpack('n', $raw);
169 read($fileHandle, $identifier, $identifierLength);
170 $identifier = decode_utf8($identifier);
172 push @identifiers, $identifier;
177 if (scalar @identifiers)
178 { return $self->Join(@identifiers); }
185 # Function: IdentifiersOf
187 # Returns the <SymbolString> as an array of identifiers.
189 sub IdentifiersOf #(SymbolString symbol)
191 my ($self, $symbol) = @_;
192 return split(/\x1F/, $symbol);
199 # Takes a list of identifiers and/or <SymbolStrings> and returns it as a new <SymbolString>.
201 sub Join #(string/SymbolString identifier/symbol, string/SymolString identifier/symbol, ...)
203 my ($self, @pieces) = @_;
205 # Can't have undefs screwing everything up.
206 while (scalar @pieces && !defined $pieces[0])
209 # We need to test @pieces first because joining on an empty array returns an empty string rather than undef.
211 { return join("\x1F", @pieces); }