OSDN Git Service

JSON construction using JSONPullParser
authorleo <leo@ae02f08e-27ec-0310-ae8c-8ba02fe2eafd>
Mon, 1 Jun 2009 09:43:22 +0000 (09:43 +0000)
committerleo <leo@ae02f08e-27ec-0310-ae8c-8ba02fe2eafd>
Mon, 1 Jun 2009 09:43:22 +0000 (09:43 +0000)
git-svn-id: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core@3347 ae02f08e-27ec-0310-ae8c-8ba02fe2eafd

src/main/java/org/xerial/json/JSONArray.java
src/main/java/org/xerial/json/JSONEventHandler.java [new file with mode: 0644]
src/main/java/org/xerial/json/JSONPushParser.java [new file with mode: 0644]
src/main/java/org/xerial/silk/SilkParser.java
src/test/java/org/xerial/json/JSONArrayTest.java
src/test/java/org/xerial/json/JSONStreamReaderTest.java

index 98e8f5c..8c6d2ce 100644 (file)
@@ -39,19 +39,108 @@ import org.xerial.json.impl.JSONTokenizer;
 public class JSONArray extends JSONValueBase implements Iterable<JSONValue>\r
 {\r
 \r
-    private ArrayList<JSONValue> _array = new ArrayList<JSONValue>();\r
+    private final ArrayList<JSONValue> _array;\r
 \r
     public JSONArray()\r
-    {}\r
+    {\r
+        _array = new ArrayList<JSONValue>();\r
+    }\r
 \r
     public JSONArray(List<JSONValue> elemList)\r
     {\r
+        _array = new ArrayList<JSONValue>(elemList.size());\r
         for (JSONValue v : elemList)\r
             _array.add(v);\r
     }\r
 \r
+    JSONArray(JSONPullParser parser) throws JSONException\r
+    {\r
+        this._array = new ArrayList<JSONValue>();\r
+\r
+        JSONEvent e = parser.next();\r
+        if (e != JSONEvent.StartArray)\r
+            throw new JSONException(JSONErrorCode.ParseError, "expected [, but " + e);\r
+\r
+        parseArray(this, parser);\r
+    }\r
+\r
+    private static JSONArray parseArray(JSONArray array, JSONPullParser parser) throws JSONException\r
+    {\r
+        JSONEvent e;\r
+\r
+        while ((e = parser.next()) != JSONEvent.EndJSON)\r
+        {\r
+            switch (e)\r
+            {\r
+            case Integer:\r
+            case Double:\r
+            case Boolean:\r
+            case Null:\r
+            case String:\r
+                array.add(parser.getValue());\r
+                break;\r
+            case StartObject:\r
+                array.add(parseObject(parser));\r
+                break;\r
+            case EndObject:\r
+                break;\r
+            case StartArray:\r
+                array.add(parseArray(new JSONArray(), parser));\r
+                break;\r
+            case EndArray:\r
+                return array;\r
+            }\r
+\r
+        }\r
+\r
+        return array;\r
+\r
+    }\r
+\r
+    private static JSONObject parseObject(JSONPullParser parser) throws JSONException\r
+    {\r
+        JSONEvent e = parser.next();\r
+\r
+        JSONObject obj = new JSONObject();\r
+\r
+        while ((e = parser.next()) != JSONEvent.EndJSON)\r
+        {\r
+            switch (e)\r
+            {\r
+            case Integer:\r
+            case Double:\r
+            case Boolean:\r
+            case Null:\r
+            case String:\r
+                String key = parser.getKeyName();\r
+\r
+                // if first child element is value attribute\r
+                if (key != null)\r
+                {\r
+                    obj.put(key, parser.getValue());\r
+                }\r
+                break;\r
+            case StartObject:\r
+                obj.put(parser.getKeyName(), parseObject(parser));\r
+                break;\r
+            case EndObject:\r
+                return obj;\r
+            case StartArray:\r
+                obj.put(parser.getKeyName(), parseArray(new JSONArray(), parser));\r
+                break;\r
+            case EndArray:\r
+                break;\r
+            }\r
+\r
+        }\r
+        return obj;\r
+\r
+    }\r
+\r
     public JSONArray(JSONTokenizer tokenizer) throws JSONException\r
     {\r
+        _array = new ArrayList<JSONValue>();\r
+\r
         char c = tokenizer.nextClean();\r
         char q;\r
         if (c == '[')\r
diff --git a/src/main/java/org/xerial/json/JSONEventHandler.java b/src/main/java/org/xerial/json/JSONEventHandler.java
new file mode 100644 (file)
index 0000000..2a20648
--- /dev/null
@@ -0,0 +1,36 @@
+/*--------------------------------------------------------------------------
+ *  Copyright 2009 Taro L. Saito
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *--------------------------------------------------------------------------*/
+//--------------------------------------
+// XerialJ
+//
+// JSONEventHandler.java
+// Since: Jun 1, 2009 5:51:31 PM
+//
+// $URL$
+// $Author$
+//--------------------------------------
+package org.xerial.json;
+
+/**
+ * {@link JSONEvent} handler for JSONPushParser
+ * 
+ * @author leo
+ * 
+ */
+public interface JSONEventHandler
+{
+    public void handle(JSONEvent e) throws Exception;
+}
diff --git a/src/main/java/org/xerial/json/JSONPushParser.java b/src/main/java/org/xerial/json/JSONPushParser.java
new file mode 100644 (file)
index 0000000..e1792d7
--- /dev/null
@@ -0,0 +1,95 @@
+/*--------------------------------------------------------------------------
+ *  Copyright 2009 Taro L. Saito
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *--------------------------------------------------------------------------*/
+//--------------------------------------
+// XerialJ
+//
+// JSONPushParser.java
+// Since: Jun 1, 2009 5:52:05 PM
+//
+// $URL$
+// $Author$
+//--------------------------------------
+package org.xerial.json;
+
+import static org.xerial.json.impl.JSONLexer.*;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.antlr.runtime.ANTLRReaderStream;
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.Token;
+import org.xerial.json.impl.JSONLexer;
+
+/**
+ * Push-style JSON parser
+ * 
+ * @author leo
+ * 
+ */
+public class JSONPushParser
+{
+    private JSONLexer lexer;
+
+    public JSONPushParser(String json)
+    {
+        lexer = new JSONLexer(new ANTLRStringStream(json));
+    }
+
+    public JSONPushParser(Reader input) throws IOException
+    {
+        lexer = new JSONLexer(new ANTLRReaderStream(input));
+    }
+
+    private JSONEventHandler handler;
+
+    public void parse(JSONEventHandler handler)
+    {
+        this.handler = handler;
+
+        Token t = lexer.nextToken();
+
+        switch (t.getType())
+        {
+        case Integer:
+            break;
+        case Double:
+            break;
+        case String:
+            break;
+        case LBracket:
+            break;
+        case RBracket:
+            break;
+        case LBrace:
+            break;
+        case RBrace:
+            break;
+        case Comma:
+            break;
+        case Colon:
+            break;
+        case TRUE:
+            break;
+        case FALSE:
+            break;
+        case NULL:
+            break;
+        }
+
+    }
+
+}
index a8791b6..8f529a6 100644 (file)
@@ -689,6 +689,9 @@ public class SilkParser implements SilkEventHandler
 
                 // TODO this part is the bottle neck of the parsing
                 JSONArray array = new JSONArray(columnData);
+                //                JSONPullParser parser = new JSONPullParser(columnData);
+                //                while (parser.next() != JSONEvent.EndJSON)
+                //                {}
 
                 // 10000 lines/sec
 
@@ -715,6 +718,75 @@ public class SilkParser implements SilkEventHandler
 
     }
 
+    //    private class EvalJSON 
+    //    {
+    //        JSONPullParser parser;
+    //        
+    //        EvalJSON(String json)
+    //        {
+    //            this.parser = new JSONPullParser(json);
+    //        }
+    //        
+    //        void evalJSONArray(SilkNode schemaNode)
+    //        {
+    //            if (schemaNode.hasManyOccurrences())
+    //            {
+    //                if (schemaNode.hasChildren())
+    //                {
+    //                    // e.g., exon(start, name)*
+    //                    // multiple occurrences: [[start, end], [start, end], ... ]
+    //                    
+    //                    JSONEvent e = parser.next();                    
+    //                    if(e != JSONEvent.StartArray)
+    //                        throw new XerialException(XerialErrorCode.PARSE_ERROR, "expected [ but " + e);
+    //                    
+    //                    
+    //                    for (int i = 0; i < value.size(); i++)
+    //                    {
+    //                        JSONArray eachElement = value.getJSONArray(i);
+    //                        if (eachElement == null)
+    //                            continue;
+    //
+    //                        visit(schemaNode.getName(), null);
+    //                        int index = 0;
+    //                        for (SilkNode eachSubSchema : schemaNode.getChildNodes())
+    //                        {
+    //                            evalJSONArray(eachSubSchema);
+    //                        }
+    //                        leave(schemaNode.getName());
+    //                    }
+    //                }
+    //                else
+    //                {
+    //                    // e.g. QV*: [20, 50, 50]
+    //                    for (int i = 0; i < value.size(); i++)
+    //                    {
+    //                        visit(schemaNode.getName(), value.get(i).toString());
+    //                        leave(schemaNode.getName());
+    //                    }
+    //                }
+    //            }
+    //            else
+    //            {
+    //                // [e1, e2, ...]
+    //                visit(schemaNode.getName(), null);
+    //                int index = 0;
+    //                if (schemaNode.getChildNodes().size() != value.size())
+    //                {
+    //                    throw new XerialException(XerialErrorCode.INVALID_INPUT, String.format(
+    //                            "data format doesn't match: schema=%s, value=%s", schemaNode, value));
+    //                }
+    //                for (SilkNode each : schemaNode.getChildNodes())
+    //                {
+    //                    walkMicroFormatElement(each, value.get(index++));
+    //                }
+    //                leave(schemaNode.getName());
+    //            }
+    //   
+    //        }
+    //        
+    //    }
+
     private void evalColumnData(SilkNode node, String columnData) throws Exception
     {
         try
index aa78c7a..8e87b15 100644 (file)
@@ -94,4 +94,35 @@ public class JSONArrayTest
 
     }
 
+    @Test
+    public void testParseANTLRLexer() throws JSONException
+    {
+        // generate a sample JSON array
+        StringBuilder sample = new StringBuilder();
+        sample.append("[");
+        int i = 0;
+        final int N = 5000;
+        for (; i < N - 1; i++)
+        {
+            sample.append(i);
+            sample.append(",");
+        }
+        sample.append(i);
+        sample.append("]");
+
+        String json = sample.toString();
+
+        StopWatch timer = new StopWatch();
+        for (int n = 0; n < 500; n++)
+        {
+            JSONArray array = new JSONArray(new JSONPullParser(json));
+            assertEquals(N, array.size());
+        }
+        _logger.info("time: " + timer.getElapsedTime());
+
+        // i:1000, n:100   time=18.4 sec (2009.4.23 using ANTLR JSON.g)
+        // i:1000, n:100   time=2.248 (2009. 4.23 using JSONTokener)
+
+    }
+
 }
index b9485a5..692b059 100644 (file)
@@ -26,9 +26,13 @@ package org.xerial.json;
 
 import static org.junit.Assert.*;
 
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.Token;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.xerial.json.impl.JSONLexer;
+import org.xerial.json.impl.JSONTokenizer;
 import org.xerial.util.FileResource;
 import org.xerial.util.HashedArrayList;
 import org.xerial.util.StopWatch;
@@ -109,10 +113,9 @@ public class JSONStreamReaderTest
 
         StopWatch timer = new StopWatch();
 
-        JSONPullParser parser = new JSONPullParser(json);
         for (int n = 0; n < 500; n++)
         {
-            parser.reset(json);
+            JSONPullParser parser = new JSONPullParser(json);
 
             JSONEvent e;
             while ((e = parser.next()) != JSONEvent.EndJSON)
@@ -121,9 +124,133 @@ public class JSONStreamReaderTest
         }
         _logger.info("time: " + timer.getElapsedTime());
 
+    }
+
+    @Test
+    public void testLexerPerformance() throws Exception
+    {
+
+        // generate a sample JSON array
+        StringBuilder sample = new StringBuilder();
+        sample.append("[");
+        int i = 0;
+        final int N = 5000;
+        for (; i < N; i++)
+        {
+            sample.append(i);
+            sample.append(",");
+        }
+        sample.append(i);
+        sample.append("]");
+
+        String json = sample.toString();
+
+        StopWatch timer = new StopWatch();
+
+        for (int n = 0; n < 500; n++)
+        {
+            JSONLexer lexer = new JSONLexer(new ANTLRStringStream(json));
+
+            Token t;
+            while ((t = lexer.nextToken()).getType() != Token.EOF)
+            {}
+
+        }
+        _logger.info("time: " + timer.getElapsedTime());
+
+    }
+
+    @Test
+    public void testJSONTokenerPeformance() throws Exception
+    {
+
+        // generate a sample JSON array
+        StringBuilder sample = new StringBuilder();
+        sample.append("[");
+        int i = 0;
+        final int N = 5000;
+        for (; i < N; i++)
+        {
+            sample.append(i);
+            sample.append(",");
+        }
+        sample.append(i);
+        sample.append("]");
+
+        String json = sample.toString();
+
+        StopWatch timer = new StopWatch();
+
+        for (int n = 0; n < 500; n++)
+        {
+            JSONTokenizer tokenizer = new JSONTokenizer(json);
+
+            parseArray(tokenizer);
+
+        }
+        _logger.info("time: " + timer.getElapsedTime());
+
         // i:1000, n:100   time=18.4 sec (2009.4.23 using ANTLR JSON.g)
         // i:1000, n:100   time=2.248 (2009. 4.23 using JSONTokener)
 
     }
 
+    public void parseArray(JSONTokenizer tokenizer) throws JSONException
+    {
+        char c = tokenizer.nextClean();
+        char q;
+        if (c == '[')
+        {
+            q = ']';
+        }
+        else if (c == '(')
+        {
+            q = ')';
+        }
+        else
+        {
+            throw tokenizer.syntaxError("A JSONArray text must start with '['");
+        }
+        if (tokenizer.nextClean() == ']')
+        {
+            return;
+        }
+        tokenizer.back();
+        for (;;)
+        {
+            if (tokenizer.nextClean() == ',')
+            {
+                tokenizer.back();
+                //_array.add(null);
+            }
+            else
+            {
+                tokenizer.back();
+                tokenizer.nextValue();
+                //_array.add(tokenizer.nextValue());
+            }
+            c = tokenizer.nextClean();
+            switch (c)
+            {
+            case ';':
+            case ',':
+                if (tokenizer.nextClean() == ']')
+                {
+                    return;
+                }
+                tokenizer.back();
+                break;
+            case ']':
+            case ')':
+                if (q != c)
+                {
+                    throw tokenizer.syntaxError("Expected a '" + new Character(q) + "'");
+                }
+                return;
+            default:
+                throw tokenizer.syntaxError("Expected a ',' or ']'");
+            }
+        }
+    }
+
 }