OSDN Git Service
(root)
/
htmltree-py
/
htmltree.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
| inline |
side by side
(parent:
52b33a3
)
fix for unicode type input
author
Hiromichi MATSUSHIMA
<hirom@office-sv.osdn.jp>
Tue, 28 Jun 2011 11:25:56 +0000
(20:25 +0900)
committer
Hiromichi MATSUSHIMA
<hirom@office-sv.osdn.jp>
Tue, 28 Jun 2011 11:25:56 +0000
(20:25 +0900)
htmltree.py
patch
|
blob
|
history
diff --git
a/htmltree.py
b/htmltree.py
index
597da01
..
2bcb1da
100644
(file)
--- a/
htmltree.py
+++ b/
htmltree.py
@@
-356,9
+356,17
@@
class HTMLTree(HTMLParser.HTMLParser):
elem = HTMLElement(HTMLElement.TEXT)
elem._parent = self._cursor
+
+ # text encode check and convert.
+ # if charset is given, convert text to unicode type.
if self.charset:
- elem._text = unicode(data, self.charset).encode("utf-8")
+ try:
+ elem._text = unicode(data, self.charset)
+ except TypeError:
+ # self.charset is utf-8.
+ elem._text = data
else:
+ # treat as unicode input
elem._text = data
self._cursor.append(elem)