rex_imgtag = re.compile(r"""<img\s+src=["'](.*?)["'].*?>""")
rex_atag = re.compile(r"""<a\s+href=["'](.*?)["'].*?>""")
rex_alt = re.compile(r"""alt=["'](.*?)["']""")
+rex_figuretag = re.compile(r"""<figure\s+style=["'](.*?)["'].*?>""")
try:
in_f = codecs.open(sys.argv[1], "r", "utf_8" )
return line.replace(tagstr, new_tag_str)
+def replace_figure_tag(line, tagstr, path):
+ attrs = htmltaglib.parse_attributes(tagstr)
+ attrs['style'] = "width:480px;"
+ new_tag_str = htmltaglib.build_tag('figure', attrs)
+
+ return line.replace(tagstr, new_tag_str)
+
for line in in_f:
# proc for IMG tag
match = rex_imgtag.search(line)
path = match.group(1)
line = replace_a_tag(line, tagstr, path)
+ #proc for FIGURE tag
+ match = rex_figuretag.search(line)
+ if match:
+ tagstr = match.group(0)
+ style= match.group(1)
+ line = replace_figure_tag(line, tagstr, style)
+
print >> out_f, line,