X-Git-Url: http://git.osdn.net/view?p=htmltree-py%2Fhtmltree.git;a=blobdiff_plain;f=htmltree.py;h=fea5a7be3a70d83df0dbdfa406dc681150230419;hp=a9e57b87a649901dc29fce2d7bee15831d5614d2;hb=4391955c894b3b17bbf70418507c0633a08205b8;hpb=ec0ba778324d7a92b215c990692fa6d08263a187 diff --git a/htmltree.py b/htmltree.py index a9e57b8..fea5a7b 100644 --- a/htmltree.py +++ b/htmltree.py @@ -63,6 +63,34 @@ class HTMLRenderer(Renderer): elif elem.is_decl(): texts.append("") + +class TEXTRenderer(Renderer): + """Render HTMLElement as TEXT.""" + # TODO: check tags not need to close more strict... + UNCLOSABLE_TAGS = ["br", "link", "meta", "img"] + + def render_inner(self, elem): + texts = [] + for child in elem: + self._recursive(child, texts) + return "".join(texts) + + def render(self, elem): + texts = [] + self._recursive(elem, texts) + return "".join(texts) + + def _recursive(self, elem, texts): + if elem.is_tag(): + for child in elem: + self._recursive(child, texts) + elif elem.is_text(): + if elem.text(): + texts.append(elem.text()) + elif elem.is_root(): + for child in elem: + self._recursive(child, texts) + class HTMLElement(list): """HTML element object to use as tree nodes.""" ROOT = 0 @@ -135,6 +163,11 @@ class HTMLElement(list): rn = HTMLRenderer() return rn.render_inner(self) + def inner_text(self): + "returns inner text" + rn = TEXTRenderer() + return rn.render_inner(self) + # navigation functions def parent(self): """returns tag's parent element."""