OSDN Git Service

fix for unicode type input
authorHiromichi MATSUSHIMA <hirom@office-sv.osdn.jp>
Tue, 28 Jun 2011 11:25:56 +0000 (20:25 +0900)
committerHiromichi MATSUSHIMA <hirom@office-sv.osdn.jp>
Tue, 28 Jun 2011 11:25:56 +0000 (20:25 +0900)
htmltree.py

index 597da01..2bcb1da 100644 (file)
@@ -356,9 +356,17 @@ class HTMLTree(HTMLParser.HTMLParser):
 
         elem = HTMLElement(HTMLElement.TEXT)
         elem._parent = self._cursor
+
+        # text encode check and convert.
+        # if charset is given, convert text to unicode type.
         if self.charset:
-            elem._text = unicode(data, self.charset).encode("utf-8")
+            try:
+                elem._text = unicode(data, self.charset)
+            except TypeError:
+                # self.charset is utf-8.
+                elem._text = data
         else:
+            # treat as unicode input
             elem._text = data
         self._cursor.append(elem)