From: leo Date: Mon, 1 Jun 2009 09:43:22 +0000 (+0000) Subject: JSON construction using JSONPullParser X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=fd680674419806dbb7c450e91fce91d694472504;p=xerial%2Fxerial-core.git JSON construction using JSONPullParser git-svn-id: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core@3347 ae02f08e-27ec-0310-ae8c-8ba02fe2eafd --- diff --git a/src/main/java/org/xerial/json/JSONArray.java b/src/main/java/org/xerial/json/JSONArray.java index 98e8f5c..8c6d2ce 100644 --- a/src/main/java/org/xerial/json/JSONArray.java +++ b/src/main/java/org/xerial/json/JSONArray.java @@ -39,19 +39,108 @@ import org.xerial.json.impl.JSONTokenizer; public class JSONArray extends JSONValueBase implements Iterable { - private ArrayList _array = new ArrayList(); + private final ArrayList _array; public JSONArray() - {} + { + _array = new ArrayList(); + } public JSONArray(List elemList) { + _array = new ArrayList(elemList.size()); for (JSONValue v : elemList) _array.add(v); } + JSONArray(JSONPullParser parser) throws JSONException + { + this._array = new ArrayList(); + + JSONEvent e = parser.next(); + if (e != JSONEvent.StartArray) + throw new JSONException(JSONErrorCode.ParseError, "expected [, but " + e); + + parseArray(this, parser); + } + + private static JSONArray parseArray(JSONArray array, JSONPullParser parser) throws JSONException + { + JSONEvent e; + + while ((e = parser.next()) != JSONEvent.EndJSON) + { + switch (e) + { + case Integer: + case Double: + case Boolean: + case Null: + case String: + array.add(parser.getValue()); + break; + case StartObject: + array.add(parseObject(parser)); + break; + case EndObject: + break; + case StartArray: + array.add(parseArray(new JSONArray(), parser)); + break; + case EndArray: + return array; + } + + } + + return array; + + } + + private static JSONObject parseObject(JSONPullParser parser) throws JSONException + { + JSONEvent e = parser.next(); + + JSONObject obj = new JSONObject(); + + while ((e = parser.next()) != JSONEvent.EndJSON) + { + switch (e) + { + case Integer: + case Double: + case Boolean: + case Null: + case String: + String key = parser.getKeyName(); + + // if first child element is value attribute + if (key != null) + { + obj.put(key, parser.getValue()); + } + break; + case StartObject: + obj.put(parser.getKeyName(), parseObject(parser)); + break; + case EndObject: + return obj; + case StartArray: + obj.put(parser.getKeyName(), parseArray(new JSONArray(), parser)); + break; + case EndArray: + break; + } + + } + return obj; + + } + public JSONArray(JSONTokenizer tokenizer) throws JSONException { + _array = new ArrayList(); + char c = tokenizer.nextClean(); char q; if (c == '[') diff --git a/src/main/java/org/xerial/json/JSONEventHandler.java b/src/main/java/org/xerial/json/JSONEventHandler.java new file mode 100644 index 0000000..2a20648 --- /dev/null +++ b/src/main/java/org/xerial/json/JSONEventHandler.java @@ -0,0 +1,36 @@ +/*-------------------------------------------------------------------------- + * Copyright 2009 Taro L. Saito + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ +//-------------------------------------- +// XerialJ +// +// JSONEventHandler.java +// Since: Jun 1, 2009 5:51:31 PM +// +// $URL$ +// $Author$ +//-------------------------------------- +package org.xerial.json; + +/** + * {@link JSONEvent} handler for JSONPushParser + * + * @author leo + * + */ +public interface JSONEventHandler +{ + public void handle(JSONEvent e) throws Exception; +} diff --git a/src/main/java/org/xerial/json/JSONPushParser.java b/src/main/java/org/xerial/json/JSONPushParser.java new file mode 100644 index 0000000..e1792d7 --- /dev/null +++ b/src/main/java/org/xerial/json/JSONPushParser.java @@ -0,0 +1,95 @@ +/*-------------------------------------------------------------------------- + * Copyright 2009 Taro L. Saito + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------*/ +//-------------------------------------- +// XerialJ +// +// JSONPushParser.java +// Since: Jun 1, 2009 5:52:05 PM +// +// $URL$ +// $Author$ +//-------------------------------------- +package org.xerial.json; + +import static org.xerial.json.impl.JSONLexer.*; + +import java.io.IOException; +import java.io.Reader; + +import org.antlr.runtime.ANTLRReaderStream; +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.Token; +import org.xerial.json.impl.JSONLexer; + +/** + * Push-style JSON parser + * + * @author leo + * + */ +public class JSONPushParser +{ + private JSONLexer lexer; + + public JSONPushParser(String json) + { + lexer = new JSONLexer(new ANTLRStringStream(json)); + } + + public JSONPushParser(Reader input) throws IOException + { + lexer = new JSONLexer(new ANTLRReaderStream(input)); + } + + private JSONEventHandler handler; + + public void parse(JSONEventHandler handler) + { + this.handler = handler; + + Token t = lexer.nextToken(); + + switch (t.getType()) + { + case Integer: + break; + case Double: + break; + case String: + break; + case LBracket: + break; + case RBracket: + break; + case LBrace: + break; + case RBrace: + break; + case Comma: + break; + case Colon: + break; + case TRUE: + break; + case FALSE: + break; + case NULL: + break; + } + + } + +} diff --git a/src/main/java/org/xerial/silk/SilkParser.java b/src/main/java/org/xerial/silk/SilkParser.java index a8791b6..8f529a6 100644 --- a/src/main/java/org/xerial/silk/SilkParser.java +++ b/src/main/java/org/xerial/silk/SilkParser.java @@ -689,6 +689,9 @@ public class SilkParser implements SilkEventHandler // TODO this part is the bottle neck of the parsing JSONArray array = new JSONArray(columnData); + // JSONPullParser parser = new JSONPullParser(columnData); + // while (parser.next() != JSONEvent.EndJSON) + // {} // 10000 lines/sec @@ -715,6 +718,75 @@ public class SilkParser implements SilkEventHandler } + // private class EvalJSON + // { + // JSONPullParser parser; + // + // EvalJSON(String json) + // { + // this.parser = new JSONPullParser(json); + // } + // + // void evalJSONArray(SilkNode schemaNode) + // { + // if (schemaNode.hasManyOccurrences()) + // { + // if (schemaNode.hasChildren()) + // { + // // e.g., exon(start, name)* + // // multiple occurrences: [[start, end], [start, end], ... ] + // + // JSONEvent e = parser.next(); + // if(e != JSONEvent.StartArray) + // throw new XerialException(XerialErrorCode.PARSE_ERROR, "expected [ but " + e); + // + // + // for (int i = 0; i < value.size(); i++) + // { + // JSONArray eachElement = value.getJSONArray(i); + // if (eachElement == null) + // continue; + // + // visit(schemaNode.getName(), null); + // int index = 0; + // for (SilkNode eachSubSchema : schemaNode.getChildNodes()) + // { + // evalJSONArray(eachSubSchema); + // } + // leave(schemaNode.getName()); + // } + // } + // else + // { + // // e.g. QV*: [20, 50, 50] + // for (int i = 0; i < value.size(); i++) + // { + // visit(schemaNode.getName(), value.get(i).toString()); + // leave(schemaNode.getName()); + // } + // } + // } + // else + // { + // // [e1, e2, ...] + // visit(schemaNode.getName(), null); + // int index = 0; + // if (schemaNode.getChildNodes().size() != value.size()) + // { + // throw new XerialException(XerialErrorCode.INVALID_INPUT, String.format( + // "data format doesn't match: schema=%s, value=%s", schemaNode, value)); + // } + // for (SilkNode each : schemaNode.getChildNodes()) + // { + // walkMicroFormatElement(each, value.get(index++)); + // } + // leave(schemaNode.getName()); + // } + // + // } + // + // } + private void evalColumnData(SilkNode node, String columnData) throws Exception { try diff --git a/src/test/java/org/xerial/json/JSONArrayTest.java b/src/test/java/org/xerial/json/JSONArrayTest.java index aa78c7a..8e87b15 100644 --- a/src/test/java/org/xerial/json/JSONArrayTest.java +++ b/src/test/java/org/xerial/json/JSONArrayTest.java @@ -94,4 +94,35 @@ public class JSONArrayTest } + @Test + public void testParseANTLRLexer() throws JSONException + { + // generate a sample JSON array + StringBuilder sample = new StringBuilder(); + sample.append("["); + int i = 0; + final int N = 5000; + for (; i < N - 1; i++) + { + sample.append(i); + sample.append(","); + } + sample.append(i); + sample.append("]"); + + String json = sample.toString(); + + StopWatch timer = new StopWatch(); + for (int n = 0; n < 500; n++) + { + JSONArray array = new JSONArray(new JSONPullParser(json)); + assertEquals(N, array.size()); + } + _logger.info("time: " + timer.getElapsedTime()); + + // i:1000, n:100 time=18.4 sec (2009.4.23 using ANTLR JSON.g) + // i:1000, n:100 time=2.248 (2009. 4.23 using JSONTokener) + + } + } diff --git a/src/test/java/org/xerial/json/JSONStreamReaderTest.java b/src/test/java/org/xerial/json/JSONStreamReaderTest.java index b9485a5..692b059 100644 --- a/src/test/java/org/xerial/json/JSONStreamReaderTest.java +++ b/src/test/java/org/xerial/json/JSONStreamReaderTest.java @@ -26,9 +26,13 @@ package org.xerial.json; import static org.junit.Assert.*; +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.Token; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.xerial.json.impl.JSONLexer; +import org.xerial.json.impl.JSONTokenizer; import org.xerial.util.FileResource; import org.xerial.util.HashedArrayList; import org.xerial.util.StopWatch; @@ -109,10 +113,9 @@ public class JSONStreamReaderTest StopWatch timer = new StopWatch(); - JSONPullParser parser = new JSONPullParser(json); for (int n = 0; n < 500; n++) { - parser.reset(json); + JSONPullParser parser = new JSONPullParser(json); JSONEvent e; while ((e = parser.next()) != JSONEvent.EndJSON) @@ -121,9 +124,133 @@ public class JSONStreamReaderTest } _logger.info("time: " + timer.getElapsedTime()); + } + + @Test + public void testLexerPerformance() throws Exception + { + + // generate a sample JSON array + StringBuilder sample = new StringBuilder(); + sample.append("["); + int i = 0; + final int N = 5000; + for (; i < N; i++) + { + sample.append(i); + sample.append(","); + } + sample.append(i); + sample.append("]"); + + String json = sample.toString(); + + StopWatch timer = new StopWatch(); + + for (int n = 0; n < 500; n++) + { + JSONLexer lexer = new JSONLexer(new ANTLRStringStream(json)); + + Token t; + while ((t = lexer.nextToken()).getType() != Token.EOF) + {} + + } + _logger.info("time: " + timer.getElapsedTime()); + + } + + @Test + public void testJSONTokenerPeformance() throws Exception + { + + // generate a sample JSON array + StringBuilder sample = new StringBuilder(); + sample.append("["); + int i = 0; + final int N = 5000; + for (; i < N; i++) + { + sample.append(i); + sample.append(","); + } + sample.append(i); + sample.append("]"); + + String json = sample.toString(); + + StopWatch timer = new StopWatch(); + + for (int n = 0; n < 500; n++) + { + JSONTokenizer tokenizer = new JSONTokenizer(json); + + parseArray(tokenizer); + + } + _logger.info("time: " + timer.getElapsedTime()); + // i:1000, n:100 time=18.4 sec (2009.4.23 using ANTLR JSON.g) // i:1000, n:100 time=2.248 (2009. 4.23 using JSONTokener) } + public void parseArray(JSONTokenizer tokenizer) throws JSONException + { + char c = tokenizer.nextClean(); + char q; + if (c == '[') + { + q = ']'; + } + else if (c == '(') + { + q = ')'; + } + else + { + throw tokenizer.syntaxError("A JSONArray text must start with '['"); + } + if (tokenizer.nextClean() == ']') + { + return; + } + tokenizer.back(); + for (;;) + { + if (tokenizer.nextClean() == ',') + { + tokenizer.back(); + //_array.add(null); + } + else + { + tokenizer.back(); + tokenizer.nextValue(); + //_array.add(tokenizer.nextValue()); + } + c = tokenizer.nextClean(); + switch (c) + { + case ';': + case ',': + if (tokenizer.nextClean() == ']') + { + return; + } + tokenizer.back(); + break; + case ']': + case ')': + if (q != c) + { + throw tokenizer.syntaxError("Expected a '" + new Character(q) + "'"); + } + return; + default: + throw tokenizer.syntaxError("Expected a ',' or ']'"); + } + } + } + }