From: Hiromichi MATSUSHIMA Date: Tue, 28 Jun 2011 11:25:56 +0000 (+0900) Subject: fix for unicode type input X-Git-Url: http://git.osdn.net/view?p=htmltree-py%2Fhtmltree.git;a=commitdiff_plain;h=e0668f7f20adea4f0a795dec8769b754e87f228d fix for unicode type input --- diff --git a/htmltree.py b/htmltree.py index 597da01..2bcb1da 100644 --- a/htmltree.py +++ b/htmltree.py @@ -356,9 +356,17 @@ class HTMLTree(HTMLParser.HTMLParser): elem = HTMLElement(HTMLElement.TEXT) elem._parent = self._cursor + + # text encode check and convert. + # if charset is given, convert text to unicode type. if self.charset: - elem._text = unicode(data, self.charset).encode("utf-8") + try: + elem._text = unicode(data, self.charset) + except TypeError: + # self.charset is utf-8. + elem._text = data else: + # treat as unicode input elem._text = data self._cursor.append(elem)