From 4391955c894b3b17bbf70418507c0633a08205b8 Mon Sep 17 00:00:00 2001 From: Hiromichi MATSUSHIMA Date: Thu, 28 Jul 2011 20:27:56 +0900 Subject: [PATCH] append TEXTRenderer and HTMLElement.inner_text() --- htmltree.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/htmltree.py b/htmltree.py index a9e57b8..fea5a7b 100644 --- a/htmltree.py +++ b/htmltree.py @@ -63,6 +63,34 @@ class HTMLRenderer(Renderer): elif elem.is_decl(): texts.append("") + +class TEXTRenderer(Renderer): + """Render HTMLElement as TEXT.""" + # TODO: check tags not need to close more strict... + UNCLOSABLE_TAGS = ["br", "link", "meta", "img"] + + def render_inner(self, elem): + texts = [] + for child in elem: + self._recursive(child, texts) + return "".join(texts) + + def render(self, elem): + texts = [] + self._recursive(elem, texts) + return "".join(texts) + + def _recursive(self, elem, texts): + if elem.is_tag(): + for child in elem: + self._recursive(child, texts) + elif elem.is_text(): + if elem.text(): + texts.append(elem.text()) + elif elem.is_root(): + for child in elem: + self._recursive(child, texts) + class HTMLElement(list): """HTML element object to use as tree nodes.""" ROOT = 0 @@ -135,6 +163,11 @@ class HTMLElement(list): rn = HTMLRenderer() return rn.render_inner(self) + def inner_text(self): + "returns inner text" + rn = TEXTRenderer() + return rn.render_inner(self) + # navigation functions def parent(self): """returns tag's parent element.""" -- 2.11.0