libcore/luni/src/main/java/java/util/StringTokenizer.java

   1 /*
   2  *  Licensed to the Apache Software Foundation (ASF) under one or more
   3  *  contributor license agreements.  See the NOTICE file distributed with
   4  *  this work for additional information regarding copyright ownership.
   5  *  The ASF licenses this file to You under the Apache License, Version 2.0
   6  *  (the "License"); you may not use this file except in compliance with
   7  *  the License.  You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  *  Unless required by applicable law or agreed to in writing, software
  12  *  distributed under the License is distributed on an "AS IS" BASIS,
  13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  *  See the License for the specific language governing permissions and
  15  *  limitations under the License.
  16  */
  17
  18 package java.util;
  19
  20 /**
  21  * The {@code StringTokenizer} class allows an application to break a string
  22  * into tokens by performing code point comparison. The {@code StringTokenizer}
  23  * methods do not distinguish among identifiers, numbers, and quoted strings,
  24  * nor do they recognize and skip comments.
  25  * <p>
  26  * The set of delimiters (the codepoints that separate tokens) may be specified
  27  * either at creation time or on a per-token basis.
  28  * <p>
  29  * An instance of {@code StringTokenizer} behaves in one of three ways,
  30  * depending on whether it was created with the {@code returnDelimiters} flag
  31  * having the value {@code true} or {@code false}:
  32  * <ul>
  33  * <li>If returnDelims is {@code false}, delimiter code points serve to separate
  34  * tokens. A token is a maximal sequence of consecutive code points that are not
  35  * delimiters.
  36  * <li>If returnDelims is {@code true}, delimiter code points are themselves
  37  * considered to be tokens. In this case a token will be received for each
  38  * delimiter code point.
  39  * </ul>
  40  * <p>
  41  * A token is thus either one delimiter code point, or a maximal sequence of
  42  * consecutive code points that are not delimiters.
  43  * <p>
  44  * A {@code StringTokenizer} object internally maintains a current position
  45  * within the string to be tokenized. Some operations advance this current
  46  * position past the code point processed.
  47  * <p>
  48  * A token is returned by taking a substring of the string that was used to
  49  * create the {@code StringTokenizer} object.
  50  * <p>
  51  * Here's an example of the use of the default delimiter {@code StringTokenizer}
  52  * : <blockquote>
  53  *
  54  * <pre>
  55  * StringTokenizer st = new StringTokenizer(&quot;this is a test&quot;);
  56  * while (st.hasMoreTokens()) {
  57  *     println(st.nextToken());
  58  * }
  59  * </pre>
  60  *
  61  * </blockquote>
  62  * <p>
  63  * This prints the following output: <blockquote>
  64  *
  65  * <pre>
  66  *     this
  67  *     is
  68  *     a
  69  *     test
  70  * </pre>
  71  *
  72  * </blockquote>
  73  * <p>
  74  * Here's an example of how to use a {@code StringTokenizer} with a user
  75  * specified delimiter: <blockquote>
  76  *
  77  * <pre>
  78  * StringTokenizer st = new StringTokenizer(
  79  *         &quot;this is a test with supplementary characters \ud800\ud800\udc00\udc00&quot;,
  80  *         &quot; \ud800\udc00&quot;);
  81  * while (st.hasMoreTokens()) {
  82  *     println(st.nextToken());
  83  * }
  84  * </pre>
  85  *
  86  * </blockquote>
  87  * <p>
  88  * This prints the following output: <blockquote>
  89  *
  90  * <pre>
  91  *     this
  92  *     is
  93  *     a
  94  *     test
  95  *     with
  96  *     supplementary
  97  *     characters
  98  *     \ud800
  99  *     \udc00
 100  * </pre>
 101  *
 102  * </blockquote>
 103  */
 104 public class StringTokenizer implements Enumeration<Object> {
 105
 106     private String string;
 107
 108     private String delimiters;
 109
 110     private boolean returnDelimiters;
 111
 112     private int position;
 113
 114     /**
 115      * Constructs a new {@code StringTokenizer} for the parameter string using
 116      * whitespace as the delimiter. The {@code returnDelimiters} flag is set to
 117      * {@code false}.
 118      *
 119      * @param string
 120      *            the string to be tokenized.
 121      */
 122     public StringTokenizer(String string) {
 123         this(string, " \t\n\r\f", false);
 124     }
 125
 126     /**
 127      * Constructs a new {@code StringTokenizer} for the parameter string using
 128      * the specified delimiters. The {@code returnDelimiters} flag is set to
 129      * {@code false}. If {@code delimiters} is {@code null}, this constructor
 130      * doesn't throw an {@code Exception}, but later calls to some methods might
 131      * throw a {@code NullPointerException}.
 132      *
 133      * @param string
 134      *            the string to be tokenized.
 135      * @param delimiters
 136      *            the delimiters to use.
 137      */
 138     public StringTokenizer(String string, String delimiters) {
 139         this(string, delimiters, false);
 140     }
 141
 142     /**
 143      * Constructs a new {@code StringTokenizer} for the parameter string using
 144      * the specified delimiters, returning the delimiters as tokens if the
 145      * parameter {@code returnDelimiters} is {@code true}. If {@code delimiters}
 146      * is null this constructor doesn't throw an {@code Exception}, but later
 147      * calls to some methods might throw a {@code NullPointerException}.
 148      *
 149      * @param string
 150      *            the string to be tokenized.
 151      * @param delimiters
 152      *            the delimiters to use.
 153      * @param returnDelimiters
 154      *            {@code true} to return each delimiter as a token.
 155      */
 156     public StringTokenizer(String string, String delimiters,
 157             boolean returnDelimiters) {
 158         if (string != null) {
 159             this.string = string;
 160             this.delimiters = delimiters;
 161             this.returnDelimiters = returnDelimiters;
 162             this.position = 0;
 163         } else
 164             throw new NullPointerException();
 165     }
 166
 167     /**
 168      * Returns the number of unprocessed tokens remaining in the string.
 169      *
 170      * @return number of tokens that can be retreived before an {@code
 171      *         Exception} will result from a call to {@code nextToken()}.
 172      */
 173     public int countTokens() {
 174         int count = 0;
 175         boolean inToken = false;
 176         for (int i = position, length = string.length(); i < length; i++) {
 177             if (delimiters.indexOf(string.charAt(i), 0) >= 0) {
 178                 if (returnDelimiters)
 179                     count++;
 180                 if (inToken) {
 181                     count++;
 182                     inToken = false;
 183                 }
 184             } else {
 185                 inToken = true;
 186             }
 187         }
 188         if (inToken)
 189             count++;
 190         return count;
 191     }
 192
 193     /**
 194      * Returns {@code true} if unprocessed tokens remain. This method is
 195      * implemented in order to satisfy the {@code Enumeration} interface.
 196      *
 197      * @return {@code true} if unprocessed tokens remain.
 198      */
 199     public boolean hasMoreElements() {
 200         return hasMoreTokens();
 201     }
 202
 203     /**
 204      * Returns {@code true} if unprocessed tokens remain.
 205      *
 206      * @return {@code true} if unprocessed tokens remain.
 207      */
 208     public boolean hasMoreTokens() {
 209         if (delimiters == null) {
 210             throw new NullPointerException();
 211         }
 212         int length = string.length();
 213         if (position < length) {
 214             if (returnDelimiters)
 215                 return true; // there is at least one character and even if
 216             // it is a delimiter it is a token
 217
 218             // otherwise find a character which is not a delimiter
 219             for (int i = position; i < length; i++)
 220                 if (delimiters.indexOf(string.charAt(i), 0) == -1)
 221                     return true;
 222         }
 223         return false;
 224     }
 225
 226     /**
 227      * Returns the next token in the string as an {@code Object}. This method is
 228      * implemented in order to satisfy the {@code Enumeration} interface.
 229      *
 230      * @return next token in the string as an {@code Object}
 231      * @throws NoSuchElementException
 232      *                if no tokens remain.
 233      */
 234     public Object nextElement() {
 235         return nextToken();
 236     }
 237
 238     /**
 239      * Returns the next token in the string as a {@code String}.
 240      *
 241      * @return next token in the string as a {@code String}.
 242      * @throws NoSuchElementException
 243      *                if no tokens remain.
 244      */
 245     public String nextToken() {
 246         if (delimiters == null) {
 247             throw new NullPointerException();
 248         }
 249         int i = position;
 250         int length = string.length();
 251
 252         if (i < length) {
 253             if (returnDelimiters) {
 254                 if (delimiters.indexOf(string.charAt(position), 0) >= 0)
 255                     return String.valueOf(string.charAt(position++));
 256                 for (position++; position < length; position++)
 257                     if (delimiters.indexOf(string.charAt(position), 0) >= 0)
 258                         return string.substring(i, position);
 259                 return string.substring(i);
 260             }
 261
 262             while (i < length && delimiters.indexOf(string.charAt(i), 0) >= 0)
 263                 i++;
 264             position = i;
 265             if (i < length) {
 266                 for (position++; position < length; position++)
 267                     if (delimiters.indexOf(string.charAt(position), 0) >= 0)
 268                         return string.substring(i, position);
 269                 return string.substring(i);
 270             }
 271         }
 272         throw new NoSuchElementException();
 273     }
 274
 275     /**
 276      * Returns the next token in the string as a {@code String}. The delimiters
 277      * used are changed to the specified delimiters.
 278      *
 279      * @param delims
 280      *            the new delimiters to use.
 281      * @return next token in the string as a {@code String}.
 282      * @throws NoSuchElementException
 283      *                if no tokens remain.
 284      */
 285     public String nextToken(String delims) {
 286         this.delimiters = delims;
 287         return nextToken();
 288     }
 289 }