--- /dev/null
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+#
+# wp_imgswap.py : WordPress image tag swapper
+#
+
+import sys
+import codecs
+import re
+import os.path
+import hashlib
+import math
+
+import htmltaglib
+import deterfile
+import getjpggeom
+
+usage = """usage: %s <target file> <output_file> <image_dir> [link_prefix] [max_image_width] [max_image_height]""" % (os.path.basename(sys.argv[0]),)
+
+rex_imgtag = re.compile(r"""<img\s+src=["'](.*?)["'].*?>""")
+rex_atag = re.compile(r"""<a\s+href=["'](.*?)["'].*?>""")
+rex_alt = re.compile(r"""alt=["'](.*?)["']""")
+rex_figuretag = re.compile(r"""<figure\s+style=["'](.*?)["'].*?>""")
+
+try:
+ in_f = codecs.open(sys.argv[1], "r", "utf_8" )
+ out_f = codecs.open(sys.argv[2], "w", "utf_8" )
+ image_dir = sys.argv[3]
+except IndexError:
+ sys.exit(usage)
+
+try:
+ link_prefix = sys.argv[4]
+except IndexError:
+ link_prefix = ""
+
+def _get_png_geom(filepath):
+ s = filepath.split('.')
+ ext = s[-1]
+ if (ext == 'JPG') or (ext == 'jpg'):
+ (w, h) = getjpggeom.get_jpeg_geometory(filepath)
+ return (w, h)
+ else:
+ desc = deterfile.file(filepath)
+
+ try:
+ m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
+ except IndexError:
+ err = ", ".join(desc)
+ raise Exception("deterfile error: %s, file: %s . " % (err,filepath))
+ if m:
+ w = m.group(1)
+ h = m.group(2)
+ return (int(w), int(h))
+ else:
+ return None
+
+def replace_img_tag(line, tagstr, path):
+ if not os.path.isfile(path):
+ return line
+
+ attrs = htmltaglib.parse_attributes(tagstr)
+ (root, ext) = os.path.splitext(os.path.basename(path))
+ (w, h) = _get_png_geom(path)
+
+ filename = root + ext
+ attrs['height'] = str(h)
+ attrs['width'] = str(w)
+
+ wp_image_url = '''%s/%s''' % (image_dir, filename)
+ attrs['src'] = wp_image_url
+ # if tag has 'alt' attribute, use it
+ if rex_alt.search(tagstr):
+ alt_text = rex_alt.search(tagstr).group(1)
+ attrs['alt'] = alt_text
+
+ new_tag_str = htmltaglib.build_tag('img', attrs)
+ return line.replace(tagstr, new_tag_str)
+
+def replace_a_tag(line, tagstr, path):
+ if not os.path.isfile(path):
+ return line
+
+ attrs = htmltaglib.parse_attributes(tagstr)
+# wp_image_url = image_dir + os.path.basename(path)
+ (root, ext) = os.path.splitext(os.path.basename(path))
+ wp_image_url = link_prefix + "/" + root + ext
+ attrs['href'] = wp_image_url
+ new_tag_str = htmltaglib.build_tag('a', attrs)
+
+ return line.replace(tagstr, new_tag_str)
+
+def replace_figure_tag(line, tagstr, path):
+ attrs = htmltaglib.parse_attributes(tagstr)
+ #width = max_image_width
+ #if 'style' in attrs:
+ # m = re.search(ur'width:\s*([0-9]+)px', attrs['style'])#
+ # if m:
+ # w = int(m.group(1))
+ # if w <= 480:
+ # width = w
+
+ #attrs['style'] = "width:" + str(width) + "px;"
+ del attrs['style']
+ new_tag_str = htmltaglib.build_tag('figure', attrs)
+
+ return line.replace(tagstr, new_tag_str)
+
+for line in in_f:
+ # proc for IMG tag
+ match = rex_imgtag.search(line)
+ if match:
+ tagstr = match.group(0)
+ path = match.group(1)
+ line = replace_img_tag(line, tagstr, path)
+
+ #proc for A tag
+ match = rex_atag.search(line)
+ if match:
+ tagstr = match.group(0)
+ path = match.group(1)
+ line = replace_a_tag(line, tagstr, path)
+
+ #proc for FIGURE tag
+ match = rex_figuretag.search(line)
+ if match:
+ tagstr = match.group(0)
+ style= match.group(1)
+ line = replace_figure_tag(line, tagstr, style)
+
+ print >> out_f, line,