import scala.collection.mutable.ListMap
import scala.collection.SortedSet
import scala.util.matching.Regex
+import scala.util.control.Breaks.{break,breakable}
import scala.xml._
import org.blackquill.engine._
private val Syntax = LinkedHashMap(
//STRONG
"^(.*?)`(.*)" -> ("code",surroundByCodeTAG _),
+ """^(.*?)\\,\\,((?:\|?.+?\|?)+?)\\,((?:\|?:?\-{3,}:?\|?)+?)\\,((?:\|?.+?\|?\\,)+?)\\,(.*)$$"""
+ -> ("table",surroundTableTAG _),
"^(.*)<([\\w\\d\\.\\-\\_\\+]+?)@([\\w\\d\\.\\-\\_\\+]+?)>(.*)" -> ("a", autoMailLink _),
- "^(.*)<(https?:\\/\\/[\\w\\d\\.\\/]+?)>(.*)$$" -> ("a",autoURLLink _),
+ "^(.*)<((?:https?|ftp):\\/\\/[\\w\\d\\.\\/]+?)>(.*)$$" -> ("a",autoURLLink _),
"^(.*)!\\[(.+?)\\]\\[(.*?)\\](?:\\{(.+?)\\})?(.*)$$" -> ("img",referenceExpander _),
"^(.*)\\[(.+?)\\]\\[(.*?)\\](?:\\{(.+?)\\})?(.*)$$" -> ("a",referenceExpander _),
"^(.*?)!\\[(.*?)\\]\\((.+?)\\x20*?(?:\"(.+?)\")?(?:\\x20+?(\\d+?%?)?x(\\d+?%?)?)?\\)(?:\\{(.+?)\\})?(.*)$$"
"^(.*\\\\,)((?:\\-|\\*){3,}|(?:(?:\\-|\\*)\\x20){3,})(.*?)$$" -> ("hr",putHrTAG _),
"^(.*?)\\*\\*(.+?)\\*\\*(.*?)$$" -> ("strong",surroundByGeneralTAG _),
"^(.*?)\\*(.+?)\\*(.*?)$$" -> ("em",surroundByGeneralTAG _)
+// "^(.*?)([^(?:\\\\,\\\\,).]+)(\\\\,\\,.*?)?$$" -> ("p",surroundByGeneralTAG _)
//WEAK
//"^(.*?)(\\\\,.+?\\\\,)(.*?)$$" -> ("p",surroundByAbstructTAG _)
)
+ private def surroundTableTAG(doc:String, regex:String, TAG:String):String = {
+ def _normalize(text:String):String = {
+ var retStr = text
+ if(retStr.startsWith("|")){
+ retStr = retStr.tail.toString
+ }
+ if(retStr.endsWith("|")){
+ retStr = retStr.init.toString
+ }
+ return retStr
+ }
+
+ def _getAlign(alignList:List[String],i:Int):String = {
+ if(i >= alignList.size){""}else{alignList(i)}
+ }
+
+ if(doc == ""){return ""}
+
+ log debug "***" + doc
+ val p = new Regex(regex, "before","headSeq","separatorSeq","bodySeq","following")
+ val m = p findFirstMatchIn(doc)
+
+ if(m != None){
+ val bef = m.get.group("before")
+ val fol = m.get.group("following")
+ var head = m.get.group("headSeq")
+ val sep = m.get.group("separatorSeq")
+ val body = m.get.group("bodySeq")
+
+ if(Option(sep) != None){
+ val pSep = """((?:\|)?(:?-{3,}?:?)(?:\|)?)+?""".r
+ val mSep = pSep.findAllMatchIn(sep)
+
+ var tableList = List[List[String]]()
+ var tmpList = List[String]()
+ for(mS <- mSep){
+ val align = mS.group(2)
+ if(align.startsWith(":") && align.endsWith(":")){
+ tmpList ::= """align=\"center\" """
+ }else if(align.startsWith(":")){
+ tmpList ::= """align=\"left\" """
+ }else if(align.endsWith(":")){
+ tmpList ::= """align=\"right\" """
+ }else{
+ tmpList ::= ""
+ }
+ }
+ val alignList = tmpList.reverse
+ head = _normalize(head)
+ log info head
+ val heads = for((h,i) <- head.split("\\|").zipWithIndex)yield(s"""<th ${_getAlign(alignList,i)}>$h</th>\\,""")
+ val headList = heads.toList
+ if(headList.size != alignList.size){
+ log error "Table header is wrong.:" + headList
+ exit(-1)
+ }
+
+
+ log debug headList
+ log debug alignList
+
+
+ val pTBody = """((((\|)?(.*?)(\|)?)+?)\\,?)+?""".r
+ val mTBSeq = pTBody.findAllMatchIn(body)
+ var bodyList = List[String]()
+ tmpList = List.empty
+ for((mTBS,i) <- mTBSeq.zipWithIndex){
+ val row = _normalize(mTBS.group(2)).split("\\|")
+ val body = for((c,j) <- row.zipWithIndex)yield(s"""<td ${alignList(j)}>$c</td>\\,""")
+ bodyList ::= "<tr>\\\\," + body.mkString("") + "</tr>\\\\,"
+ }
+
+ bodyList = bodyList.reverse
+ log debug bodyList
+ return surroundTableTAG(bef, regex, TAG) +
+ "\\\\,<table><thead>\\\\," + s"<tr>${headList.mkString("")}</tr></thead>\\\\," +
+ s"<tbody>${bodyList.mkString("")}</tbody></table>\\\\," +
+ surroundTableTAG(fol, regex, TAG)
+
+ }
+
+ }
+ doc
+ }
+
private def autoMailLink(doc:String, regex:String, TAG:String):String = {
if(doc == ""){return ""}
}
private def surroundByGeneralTAG(doc:String, regex:String, TAG:String):String = {
- if(doc == ""){return doc}
- log debug doc
+ if(doc == ""||Option(doc) == None){return ""}
+ log info doc
val p = new Regex(regex,"before","inTAG","following")
val m = p findFirstMatchIn(doc)
if(m != None){
}
md = backslashEscape(md)
+ md = paragraphize(md)
log info urlDefMap
val header = constructHEADER(markdown)
s"${docType}\n${header}\n<${htmlTAG}>\n<${bodyTAG}>\n${md.replaceAll("\\\\,","\n")}\n</${bodyTAG}>\n</${htmlTAG}>"
}
+ private def paragraphize(doc:String):String = {
+ val delimiter = """\,"""
+ def f(text:String):String = {
+ text + delimiter
+ }
+ val BlockElements = new HTMLMap().BLOCKTags
+ var isBlock = false
+ var isOneLineBlock = false
+ var text = ""
+ var pg = ""
+
+ for(l <- doc.split("\\" + delimiter)){
+ isOneLineBlock = false
+ log debug l
+ breakable{
+ for(e <- BlockElements){
+ log debug e
+ if(l.startsWith("<" + e) && l.endsWith("</" + e + ">")){
+ isOneLineBlock = true;break;
+ }else if(l.startsWith("<" + e)){
+ isBlock = true;break;
+ }else if(l.endsWith("</" + e + ">")){
+ isBlock = false;isOneLineBlock = true;break;
+ }
+ }
+ }
+ log debug ">>>>" + l + "::" + isBlock + "|" + isOneLineBlock
+ if(isBlock | isOneLineBlock){
+ text += l + delimiter
+ }else{
+ if(l != ""){text += "<p>" + l + "</p>" + delimiter}
+ }
+ }
+ text
+ //var text = "<p>" + doc.replaceAll("\\\\,\\\\,","</p>\\\\,\\\\,<p>") + "</p>"
+ //text.replaceAll("<p></p>","")
+ }
+
private def backslashEscape(doc:String):String = {
- val escapeCharSet = Set("\\","`","*","_","{","}","[","]","(",")","#","+","-","!")
+ val escapeCharSet = Set("\\","`","*","_","{","}","[","]","(",")","#","+","-","!",":","|")
var bef = ""
for(e <- doc){
- if(escapeCharSet.contains(e.toString) && bef.reverse.head.toString == "\\"){
+ if(bef.size > 2 && escapeCharSet.contains(e.toString) && bef.reverse.head.toString == "\\"){
bef = bef.init + e
}else{
bef += e
import scala.util.matching.Regex
class HTMLMap{
-
+
private val log:Log = LogFactory.getLog(classOf[HTMLMap])
+ val INLINETags = Set(
+ "a","abbr","acronym","b","basefont","bdo","big","br","cite","code","dfn",
+"em","font","i","img","input","kbd","label","q","s","samp","select",
+"small","span","strike","strong","sub","sup","textarea","tt","u","var")
+
+ val BLOCKTags = Set(
+ "address","blockquote","center","div","dl","fieldset","form","h1","h2","h3","h4","h5","h6",
+ "hr","noframes","noscript","ol","p","pre","table","ul")
+
private val HTMLTag = LinkedHashMap(
"hr"->Tuple2((None),passThrough _),
"br"->Tuple2((None),passThrough _),
"""\(c\)""".r -> "©","""\(R\)""".r ->"®","""\(SS\)""".r -> "§","""\(TM\)""".r -> "™",
"""!in""".r -> "∉", """\\<""".r->"<","""\\>""".r->">","""\\&""".r->"&")
-
+
private def passThrough(text:String):String = {text}
-
+
def specialCharConvert(text:String):String = {
var str = text
for(elem <- specialChar.keys){
}
str
}
-
+
/*
def ampConverter(text:String):String = {
val index = text.indexWhere(_ == '&',0)
-
+
val headStr = text.slice(0,index)
val subStr = text.slice(index,text.size)
-
+
val amp : Regex = """^(.*?)(&(.+?)(;|\\,))(.*?)$$""".r
subStr match{
case amp(v1,v2,v3,v4,v5) =>
if("\\,".equals(v4)){
return headStr + (v1 + v2).replaceAll("&","&") + ampConverter(v5)
}else if(";".equals(v4)){
- return headStr + v1.replaceAll("&","&") + v2 + ampConverter(v5)
+ return headStr + v1.replaceAll("&","&") + v2 + ampConverter(v5)
}
case _ => return text
}
text
}
-
+
def gtConverter(text:String):String = {
if(text.contains(">")){
val index = text.indexWhere(_ == '>',0)
if(index < 2){return text}
-
- val headStr = text.slice(0,index -2)
+
+ val headStr = text.slice(0,index -2)
val subStr = text.slice(index - 2,text.size)
- val gtSeq : Regex = """(.*?)(>+)(.*?)""".r
- subStr match{
+ val gtSeq : Regex = """(.*?)(>+)(.*?)""".r
+ subStr match{
case gtSeq(v1,v2,v3) =>
log debug subStr
log debug v1
}
}else{return text}
}
-
+
def ltConverter(text:String):String ={
text.replaceAll("<","<")
}
*/
-
+
def htmlTAGFilter(doc:String):String = {
if(doc == ""){return ""}
val node = new BQParser
-
+
val NORMALIZE: Regex = """(?i)(.+)""".r
log debug HTMLTag(elem)._1
val tmp = "</" + elem + ">"
val endTag = HTMLTag(elem)._1
-
- for (eT <- Iterator(HTMLTag(elem) _1)){
+
+ for (eT <- Iterator(HTMLTag(elem) _1)){
eT match{
case None =>
val p = new Regex(s"""(?i)^(.*?)<${elem}\\s*[>|\\/>](.*?)$$""","before","following")
if(m != None){
return htmlTAGFilter(m.get.group("before")) + "<" +
elem + " />" + htmlTAGFilter(m.get.group("following"))
- }
+ }
case NORMALIZE("-->") =>
val p = new Regex(s"""^(.*?)<${elem}.*?${endTag}(.*?)$$""","before","following")
val m = p findFirstMatchIn(doc)
if(m != None){
return htmlTAGFilter(m.get.group("before")) + htmlTAGFilter(m.get.group("following"))
}
- case NORMALIZE(">") =>
+ case NORMALIZE(">") =>
val p = new Regex(s"""(?i)^(.*?)<${elem}\\s((\\w+=\\"(.*?)\\"\\s??)+)\\/??>(.*?)$$""",
"before","attributes","attribute","contents","following")
val m = p findFirstMatchIn(doc)
if(m != None){
return htmlTAGFilter(m.get.group("before")) + "<" + elem + " " + m.get.group("attributes") + ">" +
htmlTAGFilter(m.get.group("following"))
-
+
}
case NORMALIZE(tmp) =>
val p = new Regex(
val m = p findFirstMatchIn(doc)
if(m != None){
log debug "[" + elem + "]"
- return htmlTAGFilter(m.get.group("before")) +
- "<" + elem + m.get.group("attribute") + ">" +
- HTMLTag(elem)._2(m.get.group("inTAG")) + endTag +
+ return htmlTAGFilter(m.get.group("before")) +
+ "<" + elem + m.get.group("attribute") + ">" +
+ HTMLTag(elem)._2(m.get.group("inTAG")) + endTag +
htmlTAGFilter(m.get.group("following"))
}
- case NORMALIZE("---") =>
+ case NORMALIZE("---") =>
val p = new Regex("""^(.*?)$$""","plain")
val m = p findFirstMatchIn(doc)
if(m != None){return node.toHTML(HTMLTag(elem)._2(m.get.group("plain"))).toString}
- case _ => return specialCharConvert(doc)
- //case NORMALIZE("""\\/>""") =>
+ case _ => return specialCharConvert(doc)
+ //case NORMALIZE("""\\/>""") =>
// log info "###"
}
- }
+ }
}
specialCharConvert(doc)
-
+
}
}
\ No newline at end of file