1 package com.yuji.ec.utility;
\r
3 import java.io.IOException;
\r
4 import java.io.StringReader;
\r
6 import org.ccil.cowan.tagsoup.Parser;
\r
7 import org.xml.sax.Attributes;
\r
8 import org.xml.sax.ContentHandler;
\r
9 import org.xml.sax.InputSource;
\r
10 import org.xml.sax.Locator;
\r
11 import org.xml.sax.SAXException;
\r
12 import org.xml.sax.SAXNotRecognizedException;
\r
13 import org.xml.sax.SAXNotSupportedException;
\r
15 import com.yuji.ec.common.CommonUtil;
\r
17 public class HtmlParser implements ContentHandler {
\r
18 // http://java.sun.com/javase/ja/6/docs/ja/api/index.html?org/xml/sax/ContentHandler.html
\r
19 // http://www35.atwiki.jp/nikoban/pages/23.html
\r
20 private final static String NL = System.getProperty("line.separator");
\r
21 private StringBuffer sb = null;
\r
23 public boolean parse(String str){
\r
24 // String str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
\r
25 // +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">"
\r
26 // +"<en-note><div><br clear=\"none\"/></div><hr /><h3>2012
\94N4
\8c\8e5
\93ú 13:08</h3><hr /><div><div>
\82 \82©
\82³
\82½
\82È
\82½
\82È</div><div><br clear='none'/></div><div><br clear='none'/></div><div><br clear='none'/></div><div>
\82 \82½</div></div><hr /><h3>2012
\94N4
\8c\8e5
\93ú 16:17</h3><hr /><div><div>
\83e
\83X
\83g</div></div><hr /><h3>2012
\94N4
\8c\8e5
\93ú 20:24</h3><hr /><div><div>
\82 \82©
\82³
\82½
\82È</div><div>
\82Í
\82È
\82Ü
\82â
\82è
\82í</div></div></en-note>";
\r
28 boolean result = false;
\r
30 sb = new StringBuffer();
\r
32 StringReader sr = new StringReader(str);
\r
33 InputSource is = new InputSource(sr);
\r
35 Parser parser = new Parser();
\r
36 parser.setContentHandler(this);
\r
38 parser.setFeature(Parser.namespacesFeature, false);
\r
42 } catch (SAXNotRecognizedException e) {
\r
43 Debug.d(this, null, e);
\r
44 } catch (SAXNotSupportedException e) {
\r
45 Debug.d(this, null, e);
\r
46 } catch (IOException e) {
\r
47 Debug.d(this, null, e);
\r
48 } catch (SAXException e) {
\r
49 Debug.d(this, null, e);
\r
54 public String getText(){
\r
55 return (sb != null)? sb.toString() : null;
\r
58 public void characters(char[] ch, int start, int length) throws SAXException {
\r
59 if (sb.length() > 0){
\r
62 String str = new String(ch, start, length);
\r
63 str = CommonUtil.replaceString(str, "\\n", "");
\r
67 public void endDocument() throws SAXException {
\r
71 public void endElement(String uri, String localName, String qName)
\r
72 throws SAXException {
\r
76 public void endPrefixMapping(String prefix) throws SAXException {
\r
80 public void ignorableWhitespace(char[] arg0, int arg1, int arg2)
\r
81 throws SAXException {
\r
85 public void processingInstruction(String arg0, String arg1)
\r
86 throws SAXException {
\r
90 public void setDocumentLocator(Locator arg0) {
\r
94 public void skippedEntity(String arg0) throws SAXException {
\r
98 public void startDocument() throws SAXException {
\r
102 public void startElement(String arg0, String arg1, String arg2,
\r
103 Attributes arg3) throws SAXException {
\r
107 public void startPrefixMapping(String arg0, String arg1)
\r
108 throws SAXException {
\r