OSDN Git Service

implemented table notation
authorSet <set.minami@gmail.com>
Mon, 24 Jun 2013 07:18:17 +0000 (16:18 +0900)
committerSet <set.minami@gmail.com>
Mon, 24 Jun 2013 07:18:17 +0000 (16:18 +0900)
.cache
mdTest/jsTest2.html
mdTest/test10.html [new file with mode: 0644]
mdTest/test11.bq
mdTest/test13.bq
mdTest/test14.bq [new file with mode: 0644]
planAndproceed.png
src/main/scala/org/blackquill/engine/BQParser.scala
src/main/scala/org/blackquill/engine/HTMLMap.scala
src/main/scala/org/blackquill/main/BlackQuill.scala

diff --git a/.cache b/.cache
index d7137c6..9f9d54e 100644 (file)
Binary files a/.cache and b/.cache differ
index 52f7be8..7441f49 100644 (file)
@@ -4,11 +4,13 @@
 </head>
 <html>
 <body>
-aaaaa<a href="http://example.example.com">http://example.example.com</a> bbbbb
+<p>aaaaa<a href="http://example.example.com">http://example.example.com</a> bbbbb
 aaaa<script type="text/javascript">
 document.write('<a href=\"mailto:example')
 document.write("@")
-document.write("example.com\">MailMe!</a>") </script>bbb
+document.write("example.com\">MailMe!</a>") </script>bbb</p><p>
 
+</p><p>*
+</p>
 </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/mdTest/test10.html b/mdTest/test10.html
new file mode 100644 (file)
index 0000000..3df7dae
--- /dev/null
@@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<head>
+<title>NO TITLE</title>
+</head>
+<html>
+<body>
+<p>
+<h2>foo</h2></p>
+
+<p><h2>- - - </h2></p>
+
+<p><hr />
+<hr />
+</p>
+</body>
+</html>
index 9f50a12..e7b2f0f 100644 (file)
@@ -1,9 +1,9 @@
-
 aaaaa
 [hoge](http://hoge.com/)
 [hoge](http://hoge.com "Title!")
 bbb
 
+
 aa`aaaabbb`ddddd
 ``aaa`bbb``ccc
 ``aaa`bbb`ccc``
@@ -14,6 +14,7 @@ aa`aaaabbb`ddddd
 [2]:http://bbb.com "TITLE"
 [3]:http://ccc.com
 
+
 ddd[aaa][2]
 eee
 [Google][]
index 6150fa7..3df1a58 100644 (file)
@@ -1,4 +1,10 @@
 aaaaa<http://example.example.com>bbbbb
 aaaa<example@example.com>bbb
 
+aaa
+---
+<div>
+aaaa
+bbbb
+</div>
 \*
\ No newline at end of file
diff --git a/mdTest/test14.bq b/mdTest/test14.bq
new file mode 100644 (file)
index 0000000..3c6e096
--- /dev/null
@@ -0,0 +1,16 @@
+aaaaaaaaaaaaaaaaaaxxxxxxxxxxxxxxxxxxxxxxcccccccccccccccccccccccccc
+wwwwwwwwwwwwwwwwwwwwwwww
+
+aaaa|bbbb|cccc
+:---|---:|:---:
+ddd|eee|fff
+ggg|hhh|iii
+
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+|111|222|333|444|
+|:---|---|:---:|:---|
+|555|666|777|888|
+|999|111|222|333|
+
+qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq
index aae5c8d..e13c626 100644 (file)
Binary files a/planAndproceed.png and b/planAndproceed.png differ
index 3a89baf..16bbd1c 100644 (file)
@@ -13,6 +13,7 @@ import scala.collection.mutable.Stack
 import scala.collection.mutable.ListMap
 import scala.collection.SortedSet
 import scala.util.matching.Regex
+import scala.util.control.Breaks.{break,breakable}
 import scala.xml._
 
 import org.blackquill.engine._
@@ -27,8 +28,10 @@ class BQParser {
        private val Syntax = LinkedHashMap(
        //STRONG
        "^(.*?)`(.*)" -> ("code",surroundByCodeTAG _),
+       """^(.*?)\\,\\,((?:\|?.+?\|?)+?)\\,((?:\|?:?\-{3,}:?\|?)+?)\\,((?:\|?.+?\|?\\,)+?)\\,(.*)$$"""
+       -> ("table",surroundTableTAG _),
        "^(.*)<([\\w\\d\\.\\-\\_\\+]+?)@([\\w\\d\\.\\-\\_\\+]+?)>(.*)" -> ("a", autoMailLink _),
-       "^(.*)<(https?:\\/\\/[\\w\\d\\.\\/]+?)>(.*)$$" -> ("a",autoURLLink _),
+       "^(.*)<((?:https?|ftp):\\/\\/[\\w\\d\\.\\/]+?)>(.*)$$" -> ("a",autoURLLink _),
        "^(.*)!\\[(.+?)\\]\\[(.*?)\\](?:\\{(.+?)\\})?(.*)$$" -> ("img",referenceExpander _),
        "^(.*)\\[(.+?)\\]\\[(.*?)\\](?:\\{(.+?)\\})?(.*)$$" -> ("a",referenceExpander _),
        "^(.*?)!\\[(.*?)\\]\\((.+?)\\x20*?(?:\"(.+?)\")?(?:\\x20+?(\\d+?%?)?x(\\d+?%?)?)?\\)(?:\\{(.+?)\\})?(.*)$$"
@@ -43,10 +46,96 @@ class BQParser {
        "^(.*\\\\,)((?:\\-|\\*){3,}|(?:(?:\\-|\\*)\\x20){3,})(.*?)$$" -> ("hr",putHrTAG _),
        "^(.*?)\\*\\*(.+?)\\*\\*(.*?)$$" -> ("strong",surroundByGeneralTAG _),
        "^(.*?)\\*(.+?)\\*(.*?)$$" -> ("em",surroundByGeneralTAG _)
+//     "^(.*?)([^(?:\\\\,\\\\,).]+)(\\\\,\\,.*?)?$$" -> ("p",surroundByGeneralTAG _)
        //WEAK
        //"^(.*?)(\\\\,.+?\\\\,)(.*?)$$" -> ("p",surroundByAbstructTAG _)
        )
 
+       private def surroundTableTAG(doc:String, regex:String, TAG:String):String = {
+               def _normalize(text:String):String = {
+                 var retStr = text
+                 if(retStr.startsWith("|")){
+                   retStr = retStr.tail.toString
+                 }
+                 if(retStr.endsWith("|")){
+                   retStr = retStr.init.toString
+                 }
+                 return retStr
+               }
+               
+               def _getAlign(alignList:List[String],i:Int):String = {
+                 if(i >= alignList.size){""}else{alignList(i)}
+               }
+
+               if(doc == ""){return ""}
+
+               log debug "***" + doc
+               val p = new Regex(regex, "before","headSeq","separatorSeq","bodySeq","following")
+               val m = p findFirstMatchIn(doc)
+
+               if(m != None){
+                       val bef = m.get.group("before")
+                       val fol = m.get.group("following")
+                       var head = m.get.group("headSeq")
+                       val sep = m.get.group("separatorSeq")
+                       val body = m.get.group("bodySeq")
+
+                       if(Option(sep) != None){
+                               val pSep = """((?:\|)?(:?-{3,}?:?)(?:\|)?)+?""".r
+                               val mSep = pSep.findAllMatchIn(sep)
+
+                               var tableList = List[List[String]]()
+                               var tmpList = List[String]()
+                               for(mS <- mSep){
+                                       val align = mS.group(2)
+                                       if(align.startsWith(":") && align.endsWith(":")){
+                                               tmpList ::= """align=\"center\" """
+                                       }else if(align.startsWith(":")){
+                                               tmpList ::= """align=\"left\" """
+                                       }else if(align.endsWith(":")){
+                                               tmpList ::= """align=\"right\" """
+                                       }else{
+                                         tmpList ::= ""
+                                       }
+                               }
+                               val alignList = tmpList.reverse
+                               head = _normalize(head)
+                               log info head
+                               val heads = for((h,i) <- head.split("\\|").zipWithIndex)yield(s"""<th ${_getAlign(alignList,i)}>$h</th>\\,""")
+                               val headList = heads.toList
+                               if(headList.size != alignList.size){
+                                       log error "Table header is wrong.:" + headList
+                                       exit(-1)
+                               }
+
+
+                               log debug headList
+                               log debug alignList
+
+
+                               val pTBody = """((((\|)?(.*?)(\|)?)+?)\\,?)+?""".r
+                               val mTBSeq = pTBody.findAllMatchIn(body)
+                               var bodyList = List[String]()
+                               tmpList = List.empty
+                               for((mTBS,i) <- mTBSeq.zipWithIndex){
+                                       val row = _normalize(mTBS.group(2)).split("\\|")
+                                       val body =  for((c,j) <- row.zipWithIndex)yield(s"""<td ${alignList(j)}>$c</td>\\,""")
+                                       bodyList ::= "<tr>\\\\," + body.mkString("") + "</tr>\\\\,"
+                               }
+
+                               bodyList = bodyList.reverse
+                               log debug bodyList
+                               return surroundTableTAG(bef, regex, TAG) +
+                                       "\\\\,<table><thead>\\\\," + s"<tr>${headList.mkString("")}</tr></thead>\\\\," +
+                                       s"<tbody>${bodyList.mkString("")}</tbody></table>\\\\," +
+                                       surroundTableTAG(fol, regex, TAG)
+
+                       }
+
+               }
+               doc
+       }
+
        private def autoMailLink(doc:String, regex:String, TAG:String):String = {
                if(doc == ""){return ""}
 
@@ -604,8 +693,8 @@ class BQParser {
        }
 
        private def surroundByGeneralTAG(doc:String, regex:String, TAG:String):String = {
-         if(doc == ""){return doc}
-         log debug doc
+         if(doc == ""||Option(doc) == None){return ""}
+         log info doc
          val p = new Regex(regex,"before","inTAG","following")
          val m = p findFirstMatchIn(doc)
          if(m != None){
@@ -648,16 +737,55 @@ class BQParser {
                }
 
                md = backslashEscape(md)
+               md = paragraphize(md)
                log info urlDefMap
                val header = constructHEADER(markdown)
                s"${docType}\n${header}\n<${htmlTAG}>\n<${bodyTAG}>\n${md.replaceAll("\\\\,","\n")}\n</${bodyTAG}>\n</${htmlTAG}>"
        }
 
+       private def paragraphize(doc:String):String = {
+               val delimiter = """\,"""
+               def f(text:String):String = {
+                       text + delimiter
+               }
+               val BlockElements = new HTMLMap().BLOCKTags
+               var isBlock = false
+               var isOneLineBlock = false
+               var text = ""
+               var pg = ""
+
+               for(l <- doc.split("\\" + delimiter)){
+                       isOneLineBlock = false
+                       log debug l
+                       breakable{
+                               for(e <- BlockElements){
+                                 log debug e
+                                       if(l.startsWith("<" + e) &&  l.endsWith("</" + e + ">")){
+                                               isOneLineBlock = true;break;
+                                       }else if(l.startsWith("<" + e)){
+                                               isBlock = true;break;
+                                       }else if(l.endsWith("</" + e + ">")){
+                                               isBlock = false;isOneLineBlock = true;break;
+                                       }
+                               }
+                       }
+                       log debug ">>>>" + l + "::" + isBlock + "|" + isOneLineBlock
+                       if(isBlock | isOneLineBlock){
+                               text += l + delimiter
+                       }else{
+                               if(l != ""){text += "<p>" + l + "</p>" + delimiter}
+                       }
+               }
+               text
+               //var text = "<p>" + doc.replaceAll("\\\\,\\\\,","</p>\\\\,\\\\,<p>") + "</p>"
+               //text.replaceAll("<p></p>","")
+       }
+
        private def backslashEscape(doc:String):String = {
-               val escapeCharSet = Set("\\","`","*","_","{","}","[","]","(",")","#","+","-","!")
+               val escapeCharSet = Set("\\","`","*","_","{","}","[","]","(",")","#","+","-","!",":","|")
                var bef = ""
                for(e <- doc){
-                       if(escapeCharSet.contains(e.toString) && bef.reverse.head.toString == "\\"){
+                       if(bef.size > 2 && escapeCharSet.contains(e.toString) && bef.reverse.head.toString == "\\"){
                                bef = bef.init + e
                        }else{
                                bef += e
index eb312ea..5dde468 100644 (file)
@@ -9,9 +9,18 @@ import org.apache.commons.logging._
 import scala.util.matching.Regex
 
 class HTMLMap{
-  
+
   private val log:Log = LogFactory.getLog(classOf[HTMLMap])
 
+  val INLINETags = Set(
+    "a","abbr","acronym","b","basefont","bdo","big","br","cite","code","dfn",
+"em","font","i","img","input","kbd","label","q","s","samp","select",
+"small","span","strike","strong","sub","sup","textarea","tt","u","var")
+
+  val BLOCKTags = Set(
+    "address","blockquote","center","div","dl","fieldset","form","h1","h2","h3","h4","h5","h6",
+    "hr","noframes","noscript","ol","p","pre","table","ul")
+
   private val HTMLTag = LinkedHashMap(
     "hr"->Tuple2((None),passThrough _),
     "br"->Tuple2((None),passThrough _),
@@ -102,9 +111,9 @@ class HTMLMap{
     """\(c\)""".r -> "&copy;","""\(R\)""".r ->"&reg;","""\(SS\)""".r -> "&sect;","""\(TM\)""".r -> "&trade;",
     """!in""".r -> "&notin;", """\\<""".r->"&lt;","""\\>""".r->"&gt;","""\\&""".r->"&amp;")
 
-  
+
   private def passThrough(text:String):String = {text}
-  
+
   def specialCharConvert(text:String):String = {
     var str = text
     for(elem <- specialChar.keys){
@@ -112,37 +121,37 @@ class HTMLMap{
     }
     str
   }
-  
+
 /*
   def ampConverter(text:String):String = {
     val index = text.indexWhere(_ == '&',0)
-    
+
     val headStr = text.slice(0,index)
     val subStr = text.slice(index,text.size)
-    
+
     val amp : Regex = """^(.*?)(&(.+?)(;|\\,))(.*?)$$""".r
     subStr match{
       case amp(v1,v2,v3,v4,v5) =>
         if("\\,".equals(v4)){
           return headStr + (v1 + v2).replaceAll("&","&amp;") + ampConverter(v5)
         }else if(";".equals(v4)){
-          return headStr + v1.replaceAll("&","&amp;") + v2 + ampConverter(v5)      
+          return headStr + v1.replaceAll("&","&amp;") + v2 + ampConverter(v5)
         }
       case _ => return text
     }
     text
   }
-  
+
   def gtConverter(text:String):String = {
    if(text.contains(">")){
      val index = text.indexWhere(_ == '>',0)
      if(index < 2){return text}
-     
-     val headStr = text.slice(0,index -2)     
+
+     val headStr = text.slice(0,index -2)
      val subStr = text.slice(index - 2,text.size)
 
-     val gtSeq : Regex = """(.*?)(>+)(.*?)""".r 
-     subStr match{    
+     val gtSeq : Regex = """(.*?)(>+)(.*?)""".r
+     subStr match{
        case  gtSeq(v1,v2,v3) =>
          log debug subStr
          log debug v1
@@ -157,16 +166,16 @@ class HTMLMap{
      }
    }else{return text}
  }
-  
+
   def ltConverter(text:String):String ={
        text.replaceAll("<","&lt;")
   }
  */
+
   def htmlTAGFilter(doc:String):String = {
     if(doc == ""){return ""}
     val node = new BQParser
-    
+
 
     val NORMALIZE: Regex = """(?i)(.+)""".r
 
@@ -175,8 +184,8 @@ class HTMLMap{
       log debug HTMLTag(elem)._1
       val tmp = "</" + elem + ">"
       val endTag = HTMLTag(elem)._1
-      
-      for (eT <- Iterator(HTMLTag(elem) _1)){ 
+
+      for (eT <- Iterator(HTMLTag(elem) _1)){
        eT match{
        case None =>
                val p = new Regex(s"""(?i)^(.*?)<${elem}\\s*[>|\\/>](.*?)$$""","before","following")
@@ -184,21 +193,21 @@ class HTMLMap{
                if(m != None){
                        return htmlTAGFilter(m.get.group("before")) + "<" +
                                        elem + " />" + htmlTAGFilter(m.get.group("following"))
-               }       
+               }
        case NORMALIZE("-->") =>
          val p = new Regex(s"""^(.*?)<${elem}.*?${endTag}(.*?)$$""","before","following")
          val m = p findFirstMatchIn(doc)
          if(m != None){
                return htmlTAGFilter(m.get.group("before")) + htmlTAGFilter(m.get.group("following"))
          }
-       case NORMALIZE(">") => 
+       case NORMALIZE(">") =>
                val p = new Regex(s"""(?i)^(.*?)<${elem}\\s((\\w+=\\"(.*?)\\"\\s??)+)\\/??>(.*?)$$""",
                                "before","attributes","attribute","contents","following")
                val m = p findFirstMatchIn(doc)
                if(m != None){
                  return htmlTAGFilter(m.get.group("before")) + "<" + elem + " " + m.get.group("attributes") + ">" +
                                  htmlTAGFilter(m.get.group("following"))
-                                               
+
                }
         case NORMALIZE(tmp) =>
                val p = new Regex(
@@ -207,23 +216,23 @@ class HTMLMap{
                val m = p findFirstMatchIn(doc)
                if(m != None){
                        log debug "[" + elem + "]"
-                       return htmlTAGFilter(m.get.group("before")) + 
-                                       "<" + elem + m.get.group("attribute") + ">" + 
-                                       HTMLTag(elem)._2(m.get.group("inTAG")) + endTag + 
+                       return htmlTAGFilter(m.get.group("before")) +
+                                       "<" + elem + m.get.group("attribute") + ">" +
+                                       HTMLTag(elem)._2(m.get.group("inTAG")) + endTag +
                                        htmlTAGFilter(m.get.group("following"))
                }
-        case NORMALIZE("---") => 
+        case NORMALIZE("---") =>
                val p = new Regex("""^(.*?)$$""","plain")
                val m = p findFirstMatchIn(doc)
                if(m != None){return node.toHTML(HTMLTag(elem)._2(m.get.group("plain"))).toString}
-        case _ => return specialCharConvert(doc)            
-        //case NORMALIZE("""\\/>""") => 
+        case _ => return specialCharConvert(doc)
+        //case NORMALIZE("""\\/>""") =>
         //     log info "###"
        }
-      }        
+      }
     }
     specialCharConvert(doc)
-    
+
   }
 
 }
\ No newline at end of file
index b9ff598..cec3126 100644 (file)
@@ -121,11 +121,11 @@ object BlackQuill{
   }
 
   def blackquill(lines:List[String]):List[String] = {
-    val str = new HTMLMap htmlTAGFilter lines.mkString("\\,")
+    val str = new HTMLMap htmlTAGFilter lines.mkString("""\,""")
     log info str
     val parsed = new BQParser
     log info parsed.toHTML(str)
-    str split """\\,""" toList
+    str split """\,""" toList
   }
 
 }