OSDN Git Service

Changed the implementation of SilkPullParser: It now uses SilkPushParsre internally
authorleo <leo@ae02f08e-27ec-0310-ae8c-8ba02fe2eafd>
Thu, 23 Apr 2009 04:15:53 +0000 (04:15 +0000)
committerleo <leo@ae02f08e-27ec-0310-ae8c-8ba02fe2eafd>
Thu, 23 Apr 2009 04:15:53 +0000 (04:15 +0000)
git-svn-id: http://www.xerial.org/svn/project/XerialJ/trunk/xerial-core@3255 ae02f08e-27ec-0310-ae8c-8ba02fe2eafd

src/main/java/org/xerial/silk/SilkEventHandler.java
src/main/java/org/xerial/silk/SilkPullParser.java
src/main/java/org/xerial/silk/SilkPushParser.java
src/main/java/org/xerial/silk/SilkStreamReader.java
src/test/java/org/xerial/silk/SilkStreamReaderTest.java

index 6d26ac2..13f9805 100644 (file)
@@ -24,6 +24,8 @@
 //--------------------------------------
 package org.xerial.silk;
 
+import org.xerial.core.XerialException;
+
 /**
  * {@link SilkEvent} handler for push-style Silk parser: {@link SilkPushParser}.
  * 
@@ -32,5 +34,5 @@ package org.xerial.silk;
  */
 public interface SilkEventHandler
 {
-    public void handle(SilkEvent event);
+    public void handle(SilkEvent event) throws XerialException;
 }
index dff6685..5350652 100644 (file)
 //--------------------------------------
 package org.xerial.silk;
 
-import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.CommonTokenStream;
-import org.antlr.runtime.RecognitionException;
-import org.antlr.runtime.tree.Tree;
-import org.xerial.core.XerialError;
-import org.xerial.core.XerialErrorCode;
 import org.xerial.core.XerialException;
-import org.xerial.silk.impl.SilkDataLine;
 import org.xerial.silk.impl.SilkElement;
-import org.xerial.silk.impl.SilkFunction;
-import org.xerial.silk.impl.SilkLexer;
-import org.xerial.silk.impl.SilkNode;
-import org.xerial.silk.impl.SilkParser;
-import org.xerial.silk.impl.SilkPreamble;
-import org.xerial.silk.impl.SilkParser.silkLine_return;
 import org.xerial.util.ArrayDeque;
-import org.xerial.util.bean.impl.BeanUtilImpl;
 import org.xerial.util.log.Logger;
 
 /**
@@ -71,7 +56,7 @@ public class SilkPullParser
     private int lineCount = 0;
 
     private final int eventQueueMax = 10000;
-    private boolean foundEOF = false;
+    volatile private boolean foundEOF = false;
 
     private ArrayBlockingQueue<SilkEvent> eventQueue = new ArrayBlockingQueue<SilkEvent>(eventQueueMax);
 
@@ -99,7 +84,9 @@ public class SilkPullParser
         }
     }
 
+    private SilkPushParser parser;
     private ExecutorService threadPool;
+    private Future<Boolean> future;
 
     public SilkPullParser(InputStream input) throws IOException
     {
@@ -108,180 +95,35 @@ public class SilkPullParser
 
     public SilkPullParser(Reader input) throws IOException
     {
-        threadPool = Executors.newFixedThreadPool(2);
-        threadPool.execute(new SilkEventProducer(input));
+        threadPool = Executors.newFixedThreadPool(1);
+
+        parser = new SilkPushParser(input);
+        future = threadPool.submit(new SilkEventProducer());
     }
 
-    protected class SilkEventProducer implements Runnable
+    private class SilkEventProducer implements Callable<Boolean>, SilkEventHandler
     {
-        private final SilkLexer lexer;
-        private final SilkParser parser;
-        private final BufferedReader buffer;
-
-        public SilkEventProducer(Reader input)
-        {
-            buffer = new BufferedReader(input, 1024 * 1024); // use 1MB buffer size
-            lexer = new SilkLexer();
-            parser = new SilkParser(null);
-        }
+        public SilkEventProducer()
+        {}
 
-        public void push(SilkEvent e)
+        public void handle(SilkEvent event) throws XerialException
         {
             try
             {
-                eventQueue.put(e);
+                //if (!Thread.currentThread().isInterrupted())
+                eventQueue.put(event);
             }
-            catch (InterruptedException e1)
+            catch (InterruptedException e)
             {
-                e1.printStackTrace();
-            }
-        }
 
-        public void run()
-        {
-            while (!foundEOF)
-            {
-                readNext();
             }
         }
 
-        public void readNext()
+        public Boolean call() throws Exception
         {
-            // read next line
-            String line = null;
-            try
-            {
-                // line without newline characters, '\n' and '\r' 
-                line = buffer.readLine();
-                lineCount++;
-            }
-            catch (IOException e)
-            {
-                throw new XerialError(XerialErrorCode.IO_EXCEPTION, String.format("line=%d: %s", lineCount, e
-                        .getMessage()));
-            }
-
-            if (line == null)
-            {
-                // EOF
-                push(EOFEvent);
-                foundEOF = true;
-                return;
-            }
-
-            // 40000 lines/sec
-
-            //        // dummy
-            //        if (true)
-            //        {
-            //            SilkNode node = new SilkNode();
-            //            node.setName("dummy");
-            //            node.setNodeIndent("-");
-            //            push(new SilkEvent(SilkEventType.NODE, node));
-            //            //push(BlankLineEvent);
-            //            return;
-            //        }
-
-            if (line.length() <= 0)
-            {
-                push(BlankLineEvent);
-                return;
-            }
-
-            if (line.startsWith("%"))
-            {
-                push(new SilkEvent(SilkEventType.PREAMBLE, new SilkPreamble(line)));
-                return;
-            }
-            else if (line.startsWith("--"))
-            {
-                push(new SilkEvent(SilkEventType.MULTILINE_SEPARATOR, null));
-                return;
-            }
-            else if (line.startsWith(">>"))
-            {
-                push(new SilkEvent(SilkEventType.MULTILINE_ENTRY_SEPARATOR, null));
-                return;
-            }
-
-            // 39000 lines/sec
-
-            // remove leading and trailing white spaces (' ') 
-            String trimmedLine = line.trim();
-            if (trimmedLine.length() <= 0)
-            {
-                push(BlankLineEvent);
-                return;
-            }
-
-            // comment line 
-            if (trimmedLine.startsWith("#"))
-            {
-                // ignore the comment line
-                return;
-            }
-
-            // 36000 lines / sec
-
-            if (!(trimmedLine.startsWith("-") || trimmedLine.startsWith("@")))
-            {
-                SilkDataLine dataLine = new SilkDataLine(sanitizeDataLine(trimmedLine));
-                push(new SilkEvent(SilkEventType.DATA_LINE, dataLine));
-                return;
-            }
-
-            // 17000 lines/sec
-
-            // lexical analysis
-            lexer.resetContext();
-            lexer.setCharStream(new ANTLRStringStream(line));
-
-            // 17500 lines/sec
-
-            try
-            {
-                CommonTokenStream tokenStream = new CommonTokenStream(lexer);
-                parser.setTokenStream(tokenStream);
-
-                // 17000 lines/sec 
-
-                silkLine_return ret = parser.silkLine();
-                Tree t = (Tree) ret.getTree();
-
-                // 8500 -> 12000 lines/sec
-
-                switch (t.getType())
-                {
-                case SilkParser.Function:
-                {
-                    SilkFunction func = BeanUtilImpl.createBeanFromParseTree(SilkFunction.class, t,
-                            SilkParser.tokenNames);
-                    push(new SilkEvent(SilkEventType.FUNCTION, func));
-                    return;
-                }
-                case SilkParser.SilkNode:
-                {
-                    SilkNode node = BeanUtilImpl.createBeanFromParseTree(SilkNode.class, t, SilkParser.tokenNames);
-                    push(new SilkEvent(SilkEventType.NODE, node));
-                    return;
-                }
-                default:
-                    throw new XerialError(XerialErrorCode.INVALID_INPUT, String.format(
-                            "line=%d: invalid data type: %s", lineCount, parser.getTokenNames()[t.getType()]));
-                }
-
-                // 1500 lines/sec
-            }
-            catch (RecognitionException e)
-            {
-                throw new XerialError(XerialErrorCode.INVALID_INPUT, String.format("parse error line=%d: %s",
-                        lineCount, e.getMessage()));
-            }
-            catch (XerialException e)
-            {
-                throw new XerialError(e.getErrorCode(), e);
-            }
-
+            parser.parse(this);
+            foundEOF = true;
+            return true;
         }
 
     }
@@ -296,20 +138,8 @@ public class SilkPullParser
         if (foundEOF)
             return !eventQueue.isEmpty();
 
-        try
-        {
-            SilkEvent e = null;
-            while (!foundEOF && (e = eventQueue.poll(1, TimeUnit.MILLISECONDS)) == null)
-            {
+        fetchNext();
 
-            }
-            if (e != null)
-                prefetchedEventQueue.addLast(e);
-        }
-        catch (InterruptedException e)
-        {
-            foundEOF = true;
-        }
         return hasNext();
     }
 
@@ -321,56 +151,31 @@ public class SilkPullParser
         if (foundEOF)
             return eventQueue.poll();
 
+        fetchNext();
+
+        return next();
+    }
+
+    private void fetchNext() throws XerialException
+    {
         try
         {
-
             SilkEvent e = null;
             while (!foundEOF && (e = eventQueue.poll(1, TimeUnit.MILLISECONDS)) == null)
-            {
-
-            }
+            {}
             if (e != null)
                 prefetchedEventQueue.addLast(e);
         }
-        catch (InterruptedException e1)
+        catch (InterruptedException e)
         {
             foundEOF = true;
         }
-
-        return next();
+        return;
     }
 
-    public static String sanitizeDataLine(String line)
-    {
-        if (line.startsWith("\\"))
-            return removeLineComment(line.substring(1));
-        else
-            return removeLineComment(line);
-    }
-
-    private static Pattern lineCommentPattern = Pattern.compile("[^\"]*(\\\"[^\"]*\\\")*[^\"]*(#.*)");
-
-    public static String removeLineComment(String line)
-    {
-        if (!line.contains("#"))
-            return line;
-
-        Matcher m = lineCommentPattern.matcher(line);
-        if (m.matches())
-        {
-            int lineCommentStart = m.start(2);
-            if (lineCommentStart != -1)
-                line = line.substring(0, lineCommentStart);
-        }
-        return line;
-    }
-
-    protected void fillQueue() throws XerialException
-    {}
-
-    public int getLine()
+    public int getNumReadLine()
     {
-        return lineCount;
+        return parser.getNumReadLine();
     }
 
 }
index 3f68095..c689375 100644 (file)
@@ -27,6 +27,7 @@ package org.xerial.silk;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.io.Reader;
 import java.net.URL;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -66,12 +67,21 @@ public class SilkPushParser
 
     public SilkPushParser(URL resourceURL) throws IOException
     {
-        buffer = new BufferedReader(new InputStreamReader(resourceURL.openStream())); // use 1MB buffer size
+        this(new InputStreamReader(resourceURL.openStream()));
+    }
+
+    public SilkPushParser(Reader reader)
+    {
+        if (reader.getClass().isAssignableFrom(BufferedReader.class))
+            buffer = BufferedReader.class.cast(reader);
+        else
+            buffer = new BufferedReader(reader);
+
         lexer = new SilkLexer();
         parser = new SilkParser(null);
     }
 
-    private void push(SilkEvent e)
+    private void push(SilkEvent e) throws XerialException
     {
         handler.handle(e);
     }
index 619a3b6..b6b49cb 100644 (file)
@@ -615,7 +615,8 @@ public class SilkStreamReader implements TreeStreamReader
         }\r
         catch (JSONException e)\r
         {\r
-            throw new XerialException(e.getErrorCode(), String.format("line=%d: %s", parser.getLine(), e.getMessage()));\r
+            throw new XerialException(e.getErrorCode(), String.format("line=%d: %s", parser.getNumReadLine(), e\r
+                    .getMessage()));\r
         }\r
 \r
     }\r
@@ -652,7 +653,7 @@ public class SilkStreamReader implements TreeStreamReader
 \r
         SilkEvent currentEvent = parser.next();\r
 \r
-        numReadLine = parser.getLine();\r
+        numReadLine = parser.getNumReadLine();\r
 \r
         if (_logger.isTraceEnabled())\r
         {\r
index f59c11b..ebee20b 100644 (file)
@@ -73,6 +73,8 @@ public class SilkStreamReaderTest
         _logger.info(String.format("time=%s", timer.getElapsedTime()));\r
 \r
         // best time: 60,000 lines/sec\r
+        // 80000 lines/sec (Xeon 3.0 * dual) \r
+\r
     }\r
 \r
     @Test\r
@@ -99,6 +101,7 @@ public class SilkStreamReaderTest
 \r
         // best time: 4200 lines/sec (2009 Apr. 23)\r
         // 6585 lines/sec (after threading SilkPullParser)\r
+        // 7738 lines/sec (PullParser implementation using PushPaser)\r
     }\r
 \r
     @Test\r
@@ -113,7 +116,7 @@ public class SilkStreamReaderTest
             count++;\r
             if (count % 100000 == 0)\r
             {\r
-                int line = reader.getLine();\r
+                int line = reader.getNumReadLine();\r
                 double percentage = (line / 10145176.0) * 100;\r
                 double time = timer.getElapsedTime();\r
                 double speed = line / time;\r
@@ -127,6 +130,7 @@ public class SilkStreamReaderTest
         // 12500 lines/sec (after threading SilkPullParser)\r
 \r
         // 18500 lines/sec (Xeon 3.0 * dual) \r
+        // 17411 lines/sec (Xeon 3.0 * dual) (PullParser implementation using PushPaser)\r
     }\r
 \r
     @Test\r
@@ -153,6 +157,8 @@ public class SilkStreamReaderTest
             }\r
         });\r
         _logger.info(String.format("time=%s", timer.getElapsedTime()));\r
+\r
+        // 20000 lines/sec (Xeon 3.0 * dual) \r
     }\r
 \r
 }\r