2 // Radegast Metaverse Client
3 // Copyright (c) 2009-2013, Radegast Development Team
4 // All rights reserved.
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are met:
9 // * Redistributions of source code must retain the above copyright notice,
10 // this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 // * Neither the name of the application "Radegast", nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 /********************************************************8
32 * Author: Andrew Deren
34 * http://www.adersoftware.com
36 * StringTokenizer class. You can use this class in any way you want
37 * as long as this header remains in this file.
39 **********************************************************/
65 public Token(TokenKind kind, string value, int line, int column)
75 get { return this.column; }
80 get { return this.kind; }
85 get { return this.line; }
90 get { return this.value; }
95 /// StringTokenizer tokenized string (or stream) into tokens.
97 public class StringTokenizer
99 const char EOF = (char)0;
103 int pos; // position within data
107 bool ignoreWhiteSpace;
114 public StringTokenizer(TextReader reader)
117 throw new ArgumentNullException("reader");
119 data = reader.ReadToEnd();
124 public StringTokenizer(string data)
127 throw new ArgumentNullException("data");
135 /// gets or sets which characters are part of TokenKind.Symbol
137 public char[] SymbolChars
139 get { return this.symbolChars; }
140 set { this.symbolChars = value; }
144 /// if set to true, white space characters will be ignored,
145 /// but EOL and whitespace inside of string will still be tokenized
147 public bool IgnoreWhiteSpace
149 get { return this.ignoreWhiteSpace; }
150 set { this.ignoreWhiteSpace = value; }
155 this.ignoreWhiteSpace = false;
156 this.symbolChars = new char[]{'=', '+', '-', '/', ',', '.', '*', '~', '!', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', ':', ';', '<', '>', '?', '|', '\\'};
163 protected char LA(int count)
165 if (pos + count >= data.Length)
168 return data[pos+count];
171 protected char Consume()
173 char ret = data[pos];
180 protected Token CreateToken(TokenKind kind, string value)
182 return new Token(kind, value, line, column);
185 protected Token CreateToken(TokenKind kind)
187 string tokenData = data.Substring(savePos, pos-savePos);
188 return new Token(kind, tokenData, saveLine, saveCol);
199 return CreateToken(TokenKind.EOF, string.Empty);
204 if (this.ignoreWhiteSpace)
210 return ReadWhitespace();
229 Consume(); // on DOS/Windows we have \r\n for new line
234 return CreateToken(TokenKind.EOL);
243 return CreateToken(TokenKind.EOL);
255 return ReadComment();
257 else if (LA(1) == '*')
259 return ReadStarComment();
265 return CreateToken(TokenKind.Symbol);
271 if (Char.IsLetter(ch) || ch == '_')
273 else if (IsSymbol(ch))
277 return CreateToken(TokenKind.Symbol);
283 return CreateToken(TokenKind.Unknown);
291 /// save read point positions so that CreateToken can use those
293 private void StartRead()
301 /// reads all whitespace characters (does not include newline)
303 /// <returns></returns>
304 protected Token ReadWhitespace()
308 Consume(); // consume the looked-ahead whitespace char
313 if (ch == '\t' || ch == ' ')
319 return CreateToken(TokenKind.WhiteSpace);
324 /// reads number. Number is: DIGIT+ ("." DIGIT*)?
326 /// <returns></returns>
327 protected Token ReadNumber()
333 Consume(); // read first digit
338 if (Char.IsDigit(ch))
340 else if (ch == '.' && !hadDot)
349 return CreateToken(TokenKind.Number);
353 /// reads word. Word contains any alpha character or _
355 protected Token ReadWord()
359 Consume(); // consume first character of the word
364 if (Char.IsLetter(ch) || ch == '_')
370 return CreateToken(TokenKind.Word);
374 /// Reads he rest of line in // comment
376 protected Token ReadComment()
380 Consume(); // consume first character of the comment
385 if (ch != EOF && ch != '\n' && ch != '\r')
391 return CreateToken(TokenKind.Comment);
395 /// Read c-style comments /* */
397 protected Token ReadStarComment()
401 Consume(); // consume first character of the comment
410 else if (ch == '*' && LA(1) == '/')
422 return CreateToken(TokenKind.Comment);
426 /// reads all characters until next " is found.
427 /// If "" (2 quotes) are found, then they are consumed as
428 /// part of the string
430 /// <returns></returns>
431 protected Token ReadString()
442 else if (ch == '\r') // handle CR in strings
445 if (LA(0) == '\n') // for DOS & windows
451 else if (ch == '\n') // new line in quoted string
462 break; // done reading, and this quotes does not have escape character
464 Consume(); // consume second ", because first was just an escape
470 return CreateToken(TokenKind.QuotedString);
474 /// checks whether c is a symbol character.
476 protected bool IsSymbol(char c)
478 for (int i=0; i<symbolChars.Length; i++)
479 if (symbolChars[i] == c)