doc/xml2po.py

   1 #!/usr/bin/python -u\r
   2 # Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.\r
   3 #\r
   4 # This file is part of xml2po.\r
   5 #\r
   6 # xml2po is free software; you can redistribute it and/or modify\r
   7 # it under the terms of the GNU General Public License as published by\r
   8 # the Free Software Foundation; either version 2 of the License, or\r
   9 # (at your option) any later version.\r
  10 #\r
  11 # xml2po is distributed in the hope that it will be useful,\r
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of\r
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
  14 # GNU General Public License for more details.\r
  15 #\r
  16 # You should have received a copy of the GNU General Public License\r
  17 # along with xml2po; if not, write to the Free Software Foundation, Inc.,\r
  18 # 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\r
  19 #\r
  20 \r
  21 # slightly modified to work on Windows for TortoiseSVN.\r
  22 \r
  23 # xml2po -- translate XML documents\r
  24 VERSION = "1.0.5"\r
  25 \r
  26 # Versioning system (I use this for a long time, so lets explain it to\r
  27 # those Linux-versioning-scheme addicts):\r
  28 #   1.0.* are unstable, development versions\r
  29 #   1.1 will be first stable release (release 1), and 1.1.* bugfix releases\r
  30 #   2.0.* will be unstable-feature-development stage (milestone 1)\r
  31 #   2.1.* unstable development betas (milestone 2)\r
  32 #   2.2 second stable release (release 2), and 2.2.* bugfix releases\r
  33 #   ...\r
  34 #\r
  35 import sys\r
  36 import libxml2\r
  37 import gettext\r
  38 import os\r
  39 import re\r
  40 \r
  41 class MessageOutput:\r
  42     def __init__(self, with_translations = 0):\r
  43         self.messages = []\r
  44         self.comments = {}\r
  45         self.linenos = {}\r
  46         self.nowrap = {}\r
  47         if with_translations:\r
  48             self.translations = []\r
  49         self.do_translations = with_translations\r
  50         self.output_msgstr = 0 # this is msgid mode for outputMessage; 1 is for msgstr mode\r
  51 \r
  52     def translationsFollow(self):\r
  53         """Indicate that what follows are translations."""\r
  54         self.output_msgstr = 1\r
  55 \r
  56     def setFilename(self, filename):\r
  57         self.filename = filename\r
  58 \r
  59     def outputMessage(self, text, lineno = 0, comment = None, spacepreserve = 0, tag = None):\r
  60         """Adds a string to the list of messages."""\r
  61         if (text.strip() != ''):\r
  62             t = escapePoString(normalizeString(text, not spacepreserve))\r
  63             if self.output_msgstr:\r
  64                 self.translations.append(t)\r
  65                 return\r
  66             \r
  67             if self.do_translations or (not t in self.messages):\r
  68                 self.messages.append(t)\r
  69                 if spacepreserve:\r
  70                     self.nowrap[t] = 1\r
  71                 if t in self.linenos.keys():\r
  72                     self.linenos[t].append((self.filename, tag, lineno))\r
  73                 else:\r
  74                     self.linenos[t] = [ (self.filename, tag, lineno) ]\r
  75                 if (not self.do_translations) and comment and not t in self.comments:\r
  76                     self.comments[t] = comment\r
  77             else:\r
  78                 if t in self.linenos.keys():\r
  79                     self.linenos[t].append((self.filename, tag, lineno))\r
  80                 else:\r
  81                     self.linenos[t] = [ (self.filename, tag, lineno) ]\r
  82                 if comment and not t in self.comments:\r
  83                     self.comments[t] = comment\r
  84 \r
  85     def outputHeader(self, out):\r
  86         from time import gmtime, strftime\r
  87         tstamp = strftime("%Y-%m-%d %H:%M +0000", gmtime())\r
  88         tmp = """msgid ""\r
  89 msgstr ""\r
  90 "Project-Id-Version: PACKAGE VERSION\\n"\r
  91 "POT-Creation-Date: %s\\n"\r
  92 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\r
  93 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\r
  94 "Language-Team: LANGUAGE <LL@li.org>\\n"\r
  95 "MIME-Version: 1.0\\n"\r
  96 "Content-Type: text/plain; charset=UTF-8\\n"\r
  97 "Content-Transfer-Encoding: 8bit\\n"\r
  98 \r
  99 """ % (tstamp)\r
 100 \r
 101         out.write(tmp.encode('utf-8'))\r
 102 \r
 103     def outputAll(self, out):\r
 104         self.outputHeader(out)\r
 105         \r
 106         for k in self.messages:\r
 107             if k in self.comments:\r
 108                 out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))\r
 109             references = ""\r
 110             for reference in self.linenos[k]:\r
 111                 references += "#: %s:%d\n#.(%s)\n" % (reference[0], reference[2], reference[1])\r
 112             out.write("%s" % (references))\r
 113             if k in self.nowrap and self.nowrap[k]:\r
 114                 out.write("#, no-wrap\n")\r
 115             out.write("msgid \"%s\"\n" % (k))\r
 116             translation = ""\r
 117             if self.do_translations:\r
 118                 if len(self.translations)>0:\r
 119                     translation = self.translations.pop(0)\r
 120             out.write("msgstr \"%s\"\n\n" % (translation))\r
 121 \r
 122 \r
 123 def normalizeNode(node):\r
 124     if not node:\r
 125         return\r
 126     elif isSpacePreserveNode(node):\r
 127         return\r
 128     elif node.isText():\r
 129         if node.isBlankNode():\r
 130             node.setContent('')\r
 131         else:\r
 132             node.setContent(re.sub('\s+',' ', node.content))\r
 133 \r
 134     elif node.children and node.type == 'element':\r
 135         child = node.children\r
 136         while child:\r
 137             normalizeNode(child)\r
 138             child = child.next\r
 139 \r
 140 def normalizeString(text, ignorewhitespace = 1):\r
 141     """Normalizes string to be used as key for gettext lookup.\r
 142 \r
 143     Removes all unnecessary whitespace."""\r
 144     if not ignorewhitespace:\r
 145         return text\r
 146     try:\r
 147         # Lets add document DTD so entities are resolved\r
 148         dtd = doc.intSubset()\r
 149         tmp = dtd.serialize('utf-8')\r
 150         tmp = tmp + '<norm>%s</norm>' % text\r
 151     except:\r
 152         tmp = '<norm>%s</norm>' % text\r
 153 \r
 154     try:\r
 155         ctxt = libxml2.createDocParserCtxt(tmp)\r
 156         if expand_entities:\r
 157             ctxt.replaceEntities(1)\r
 158         ctxt.parseDocument()\r
 159         tree = ctxt.doc()\r
 160         newnode = tree.getRootElement()\r
 161     except:\r
 162         print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)\r
 163         return text\r
 164 \r
 165     normalizeNode(newnode)\r
 166 \r
 167     result = ''\r
 168     child = newnode.children\r
 169     while child:\r
 170         result += child.serialize('utf-8')\r
 171         child = child.next\r
 172 \r
 173     result = re.sub('^ ','', result)\r
 174     result = re.sub(' $','', result)\r
 175     \r
 176     return result\r
 177 \r
 178 def stringForEntity(node):\r
 179     """Replaces entities in the node."""\r
 180     text = node.serialize('utf-8')\r
 181     try:\r
 182         # Lets add document DTD so entities are resolved\r
 183         dtd = node.doc.intSubset()\r
 184         tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text\r
 185         next = 1\r
 186     except:\r
 187         tmp = '<norm>%s</norm>' % text\r
 188         next = 0\r
 189 \r
 190     ctxt = libxml2.createDocParserCtxt(tmp)\r
 191     if expand_entities:\r
 192         ctxt.replaceEntities(1)\r
 193     ctxt.parseDocument()\r
 194     tree = ctxt.doc()\r
 195     if next:\r
 196         newnode = tree.children.next\r
 197     else:\r
 198         newnode = tree.children\r
 199 \r
 200     result = ''\r
 201     child = newnode.children\r
 202     while child:\r
 203         result += child.serialize('utf-8')\r
 204         child = child.next\r
 205 \r
 206     return result\r
 207 \r
 208 \r
 209 def escapePoString(text):\r
 210     return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")\r
 211 \r
 212 def unEscapePoString(text):\r
 213     return text.replace('\\"', '"').replace('\\\\','\\')\r
 214 \r
 215 def getTranslation(text, spacepreserve = 0):\r
 216     """Returns a translation via gettext for specified snippet.\r
 217 \r
 218     text should be a string to look for, spacepreserve set to 1\r
 219     when spaces should be preserved.\r
 220     """\r
 221     text = normalizeString(text, not spacepreserve)\r
 222     if (text.strip() == ''):\r
 223         return text\r
 224     file = open(mofile, "rb")\r
 225     if file:\r
 226         gt = gettext.GNUTranslations(file)\r
 227         if gt:\r
 228             return gt.ugettext(text.decode('utf-8'))\r
 229     return text\r
 230 \r
 231 def startTagForNode(node):\r
 232     if not node:\r
 233         return 0\r
 234 \r
 235     result = node.name\r
 236     params = ''\r
 237     if node.properties:\r
 238         for p in node.properties:\r
 239             if p.type == 'attribute':\r
 240                 # FIXME: This part sucks\r
 241                 params += p.serialize('utf-8')\r
 242     return result+params\r
 243         \r
 244 def endTagForNode(node):\r
 245     if not node:\r
 246         return 0\r
 247 \r
 248     result = node.name\r
 249     return result\r
 250         \r
 251 def isFinalNode(node):\r
 252     if automatic:\r
 253         auto = autoNodeIsFinal(node)\r
 254         # Check if any of the parents is also autoNodeIsFinal,\r
 255         # and if it is, don't consider this node a final one\r
 256         parent = node.parent\r
 257         while parent and auto:\r
 258             auto = not autoNodeIsFinal(parent)\r
 259             parent = parent.parent\r
 260         return auto\r
 261     #node.type =='text' or not node.children or\r
 262     if node.type == 'element' and node.name in ultimate_tags:\r
 263         return 1\r
 264     elif node.children:\r
 265         final_children = 1\r
 266         child = node.children\r
 267         while child and final_children:\r
 268             if not isFinalNode(child):\r
 269                 final_children = 0\r
 270             child = child.next\r
 271         if final_children:\r
 272             return 1\r
 273     return 0\r
 274 \r
 275 def ignoreNode(node):\r
 276     if automatic:\r
 277         if node.type in ('dtd', 'comment'):\r
 278             return 1\r
 279         else:\r
 280             return 0\r
 281     else:\r
 282         if isFinalNode(node):\r
 283             return 0\r
 284         if node.name in ignored_tags or node.type in ('dtd', 'comment'):\r
 285             return 1\r
 286         return 0\r
 287 \r
 288 def isSpacePreserveNode(node):\r
 289     pres = node.getSpacePreserve()\r
 290     if pres == 1:\r
 291         return 1\r
 292     else:\r
 293         if CurrentXmlMode and (node.name in CurrentXmlMode.getSpacePreserveTags()):\r
 294             return 1\r
 295         else:\r
 296             return 0\r
 297 \r
 298 def getCommentForNode(node):\r
 299     """Walk through previous siblings until a comment is found, or other element.\r
 300 \r
 301     Only whitespace is allowed between comment and current node."""\r
 302     prev = node.prev\r
 303     while prev and prev.type == 'text' and prev.content.strip() == '':\r
 304         prev = prev.prev\r
 305     if prev and prev.type == 'comment':\r
 306         return prev.content.strip()\r
 307     else:\r
 308         return None\r
 309 \r
 310 \r
 311 def replaceNodeContentsWithText(node,text):\r
 312     """Replaces all subnodes of a node with contents of text treated as XML."""\r
 313     if node.children:\r
 314         starttag = node.name #startTagForNode(node)\r
 315         endtag = endTagForNode(node)\r
 316         try:\r
 317             # Lets add document DTD so entities are resolved\r
 318             dtd = doc.intSubset()\r
 319             tmp = ''\r
 320             if expand_entities: # FIXME: we get a "Segmentation fault" in libxml2.parseMemory() when we include DTD otherwise\r
 321                 tmp = dtd.serialize('utf-8')\r
 322             tmp = tmp + '<%s>%s</%s>' % (starttag, text, endtag)\r
 323         except:\r
 324             tmp = '<%s>%s</%s>' % (starttag, text, endtag)\r
 325 \r
 326         try:\r
 327             ctxt = libxml2.createDocParserCtxt(tmp.encode('utf-8'))\r
 328             ctxt.replaceEntities(0)\r
 329             ctxt.parseDocument()\r
 330             newnode = ctxt.doc()\r
 331         except:\r
 332             print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))\r
 333             return\r
 334 \r
 335         newelem = newnode.getRootElement()\r
 336         if newelem and newelem.children:\r
 337             free = node.children\r
 338             while free:\r
 339                 next = free.next\r
 340                 free.unlinkNode()\r
 341                 free = next\r
 342 \r
 343             node.addChildList(newelem.children)\r
 344         else:\r
 345             # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)\r
 346             pass\r
 347     else:\r
 348         node.setContent(text)\r
 349 \r
 350 def autoNodeIsFinal(node):\r
 351     """Returns 1 if node is text node, contains non-whitespace text nodes or entities."""\r
 352     final = 0\r
 353     if node.isText() and node.content.strip()!='':\r
 354         return 1\r
 355     child = node.children\r
 356     while child:\r
 357         if child.type in ['text'] and  child.content.strip()!='':\r
 358             final = 1\r
 359             break\r
 360         child = child.next\r
 361 \r
 362     return final\r
 363 \r
 364 \r
 365 def worthOutputting(node):\r
 366     """Returns 1 if node is "worth outputting", otherwise 0.\r
 367 \r
 368     Node is "worth outputting", if none of the parents\r
 369     isFinalNode, and it contains non-blank text and entities.\r
 370     """\r
 371     worth = 1\r
 372     parent = node.parent\r
 373     final = isFinalNode(node) and node.name not in ignored_tags\r
 374     while not final and parent:\r
 375         if isFinalNode(parent):\r
 376             final = 1 # reset if we've got to one final tag\r
 377         if final and (parent.name not in ignored_tags) and worthOutputting(parent):\r
 378             worth = 0\r
 379             break\r
 380         parent = parent.parent\r
 381     if not worth:\r
 382         return 0\r
 383 \r
 384     return autoNodeIsFinal(node)\r
 385     \r
 386 def processElementTag(node, replacements, restart = 0):\r
 387     """Process node with node.type == 'element'."""\r
 388     if node.type == 'element':\r
 389         outtxt = ''\r
 390         if restart:\r
 391             myrepl = []\r
 392         else:\r
 393             myrepl = replacements\r
 394 \r
 395         submsgs = []\r
 396 \r
 397         child = node.children\r
 398         while child:\r
 399             if (isFinalNode(child)) or (child.type == 'element' and worthOutputting(child)):\r
 400                 myrepl.append(processElementTag(child, myrepl, 1))\r
 401                 outtxt += '<placeholder-%d/>' % (len(myrepl))\r
 402             else:\r
 403                 if child.type == 'element':\r
 404                     (starttag, content, endtag, translation) = processElementTag(child, myrepl, 0)\r
 405                     outtxt += '<%s>%s</%s>' % (starttag, content, endtag)\r
 406                 else:\r
 407                     outtxt += doSerialize(child)\r
 408 \r
 409             child = child.next\r
 410 \r
 411         if mode == 'merge':\r
 412             translation = getTranslation(outtxt, isSpacePreserveNode(node))\r
 413         else:\r
 414             translation = outtxt\r
 415         starttag = startTagForNode(node)\r
 416         endtag = endTagForNode(node)\r
 417 \r
 418         if restart or worthOutputting(node):\r
 419             i = 0\r
 420             while i < len(myrepl):\r
 421                 replacement = '<%s>%s</%s>' % (myrepl[i][0], myrepl[i][3], myrepl[i][2])\r
 422                 i += 1\r
 423                 translation = translation.replace('<placeholder-%d/>' % (i), replacement)\r
 424 \r
 425             if worthOutputting(node):\r
 426                 if mode == 'merge':\r
 427                     replaceNodeContentsWithText(node, translation)\r
 428                 else:\r
 429                     msg.outputMessage(outtxt, node.lineNo(), getCommentForNode(node), isSpacePreserveNode(node), tag = node.name)\r
 430 \r
 431         return (starttag, outtxt, endtag, translation)\r
 432     else:\r
 433         raise Exception("You must pass node with node.type=='element'.")\r
 434 \r
 435 \r
 436 def isExternalGeneralParsedEntity(node):\r
 437     if (node and node.type=='entity_ref'):\r
 438         try:\r
 439             # it would be nice if debugDumpNode could use StringIO, but it apparently cannot\r
 440             tmp = file(".xml2po-entitychecking","w+")\r
 441             node.debugDumpNode(tmp,0)\r
 442             tmp.seek(0)\r
 443             tmpstr = tmp.read()\r
 444             tmp.close()\r
 445             os.remove(".xml2po-entitychecking")\r
 446         except:\r
 447             # We fail silently, and replace all entities if we cannot\r
 448             # write .xml2po-entitychecking\r
 449             # !!! This is not very nice thing to do, but I don't know if\r
 450             #     raising an exception is any better\r
 451             return 0\r
 452         if tmpstr.find('EXTERNAL_GENERAL_PARSED_ENTITY') != -1:\r
 453             return 1\r
 454         else:\r
 455             return 0\r
 456     else:\r
 457         return 0\r
 458 \r
 459 def doSerialize(node):\r
 460     """Serializes a node and its children, emitting PO messages along the way.\r
 461 \r
 462     node is the node to serialize, first indicates whether surrounding\r
 463     tags should be emitted as well.\r
 464     """\r
 465 \r
 466     if ignoreNode(node):\r
 467         return ''\r
 468     elif not node.children:\r
 469         return node.serialize("utf-8")\r
 470     elif node.type == 'entity_ref':\r
 471         if isExternalGeneralParsedEntity(node):\r
 472             return node.serialize('utf-8')\r
 473         else:\r
 474             return stringForEntity(node) #content #content #serialize("utf-8")\r
 475     elif node.type == 'entity_decl':\r
 476         return node.serialize('utf-8') #'<%s>%s</%s>' % (startTagForNode(node), node.content, node.name)\r
 477     elif node.type == 'text':\r
 478         return node.serialize('utf-8')\r
 479     elif node.type == 'element':\r
 480         repl = []\r
 481         (starttag, content, endtag, translation) = processElementTag(node, repl, 1)\r
 482         return '<%s>%s</%s>' % (starttag, content, endtag)\r
 483     else:\r
 484         child = node.children\r
 485         outtxt = ''\r
 486         while child:\r
 487             outtxt += doSerialize(child)\r
 488             child = child.next\r
 489         return outtxt\r
 490 \r
 491     \r
 492 def read_finaltags(filelist):\r
 493     if CurrentXmlMode:\r
 494         return CurrentXmlMode.getFinalTags()\r
 495     else:\r
 496         defaults = ['para', 'title', 'releaseinfo', 'revnumber',\r
 497                     'date', 'itemizedlist', 'orderedlist',\r
 498                     'variablelist', 'varlistentry', 'term' ]\r
 499         return defaults\r
 500 \r
 501 def read_ignoredtags(filelist):\r
 502     if CurrentXmlMode:\r
 503         return CurrentXmlMode.getIgnoredTags()\r
 504     else:\r
 505         defaults = ['itemizedlist', 'orderedlist', 'variablelist',\r
 506                     'varlistentry' ]\r
 507         return defaults\r
 508 \r
 509 def tryToUpdate(allargs, lang):\r
 510     # Remove "-u" and "--update-translation"\r
 511     command = allargs[0]\r
 512     args = allargs[1:]\r
 513     opts, args = getopt.getopt(args, 'avhmket:o:p:u:',\r
 514                                ['automatic-tags','version', 'help', 'keep-entities', 'extract-all-entities', 'merge', 'translation=',\r
 515                                 'output=', 'po-file=', 'update-translation=' ])\r
 516     for opt, arg in opts:\r
 517         if opt in ('-a', '--automatic-tags'):\r
 518             command += " -a"\r
 519         elif opt in ('-k', '--keep-entities'):\r
 520             command += " -k"\r
 521         elif opt in ('-e', '--extract-all-entities'):\r
 522             command += " -e"\r
 523         elif opt in ('-m', '--mode'):\r
 524             command += " -m %s" % arg\r
 525         elif opt in ('-o', '--output'):\r
 526             sys.stderr.write("Error: Option '-o' is not yet supported when updating translations directly.\n")\r
 527             sys.exit(8)\r
 528         elif opt in ('-v', '--version'):\r
 529             print VERSION\r
 530             sys.exit(0)\r
 531         elif opt in ('-h', '--help'):\r
 532             sys.stderr.write("Error: If you want help, please use `%s --help' without '-u' option.\n" % (allargs[0]))\r
 533             sys.exit(9)\r
 534         elif opt in ('-u', '--update-translation'):\r
 535             pass\r
 536         else:\r
 537             sys.stderr.write("Error: Option `%s' is not supported with option `-u'.\n" % (opt))\r
 538             sys.exit(9)\r
 539 \r
 540     while args:\r
 541         command += " " + args.pop()\r
 542 \r
 543     file = lang\r
 544 \r
 545     sys.stderr.write("Merging translations for %s: " % (lang))\r
 546     result = os.system("%s | msgmerge -o .tmp.%s.po %s -" % (command, lang, file))\r
 547     if result:\r
 548         sys.exit(10)\r
 549     else:\r
 550         result = os.system("mv .tmp.%s.po %s" % (lang, file))\r
 551         if result:\r
 552             sys.stderr.write("Error: cannot rename file.\n")\r
 553             sys.exit(11)\r
 554         else:\r
 555             os.system("msgfmt -cv -o NUL %s" % (file))\r
 556             sys.exit(0)\r
 557 \r
 558 def load_mode(modename):\r
 559     #import imp\r
 560     #found = imp.find_module(modename, submodes_path)\r
 561     #module = imp.load_module(modename, found[0], found[1], found[2])\r
 562     try:\r
 563         sys.path.append(submodes_path)\r
 564         module = __import__(modename)\r
 565         modeModule = '%sXmlMode' % modename\r
 566         return getattr(module, modeModule)\r
 567     except:\r
 568         return None\r
 569 \r
 570 def xml_error_handler(arg, ctxt):\r
 571     pass\r
 572 \r
 573 libxml2.registerErrorHandler(xml_error_handler, None)\r
 574 \r
 575 \r
 576 # Main program start\r
 577 if __name__ != '__main__': raise NotImplementedError\r
 578 \r
 579 # Parameters\r
 580 submodes_path = "xml2po-modes"\r
 581 default_mode = 'docbook'\r
 582 \r
 583 filename = ''\r
 584 origxml = ''\r
 585 mofile = ''\r
 586 ultimate = [ ]\r
 587 ignored = [ ]\r
 588 filenames = [ ]\r
 589 \r
 590 mode = 'pot' # 'pot' or 'merge'\r
 591 automatic = 0\r
 592 expand_entities = 1\r
 593 expand_all_entities = 0\r
 594 \r
 595 output  = '-' # this means to stdout\r
 596 \r
 597 import getopt, fileinput\r
 598 \r
 599 def usage (with_help = False):\r
 600         print >> sys.stderr, "Usage:  %s [OPTIONS] [XMLFILE]..." % (sys.argv[0])\r
 601         if (with_help):\r
 602                 print >> sys.stderr, """\r
 603 OPTIONS may be some of:\r
 604     -a    --automatic-tags     Automatically decides if tags are to be considered\r
 605                                  "final" or not\r
 606     -k    --keep-entities      Don't expand entities\r
 607     -e    --expand-all-entities  Expand ALL entities (including SYSTEM ones)\r
 608     -m    --mode=TYPE          Treat tags as type TYPE (default: docbook)\r
 609     -o    --output=FILE        Print resulting text (XML or POT) to FILE\r
 610     -p    --po-file=FILE       Specify PO file containing translation, and merge\r
 611                                  Overwrites temporary file .xml2po.mo.\r
 612     -r    --reuse=FILE         Specify translated XML file with the same structure\r
 613     -t    --translation=FILE   Specify MO file containing translation, and merge\r
 614     -u    --update-translation=LANG.po   Updates a PO file using msgmerge program\r
 615     -v    --version            Output version of the xml2po program\r
 616 \r
 617     -h    --help               Output this message\r
 618 \r
 619 EXAMPLES:\r
 620     To create a POTemplate book.pot from input files chapter1.xml and\r
 621     chapter2.xml, run the following:\r
 622         %s -o book.pot chapter1.xml chapter2.xml\r
 623 \r
 624     After translating book.pot into de.po, merge the translations back,\r
 625     using -p option for each XML file:\r
 626         %s -p de.po chapter1.xml > chapter1.de.xml\r
 627         %s -p de.po chapter2.xml > chapter2.de.xml\r
 628 """ % (sys.argv[0], sys.argv[0], sys.argv[0])\r
 629         sys.exit(0)\r
 630 \r
 631 if len(sys.argv) < 2: usage()\r
 632 \r
 633 args = sys.argv[1:]\r
 634 try: opts, args = getopt.getopt(args, 'avhkem:t:o:p:u:r:',\r
 635                            ['automatic-tags','version', 'help', 'keep-entities', 'expand-all-entities', 'mode=', 'translation=',\r
 636                             'output=', 'po-file=', 'update-translation=', 'reuse=' ])\r
 637 except getopt.GetoptError: usage(True)\r
 638 \r
 639 for opt, arg in opts:\r
 640     if opt in ('-m', '--mode'):\r
 641         default_mode = arg\r
 642     if opt in ('-a', '--automatic-tags'):\r
 643         automatic = 1\r
 644     elif opt in ('-k', '--keep-entities'):\r
 645         expand_entities = 0\r
 646     elif opt in ('-e', '--expand-all-entities'):\r
 647         expand_all_entities = 1\r
 648     elif opt in ('-t', '--translation'):\r
 649         mofile = arg\r
 650         mode = 'merge'\r
 651         translationlanguage = os.path.splitext(mofile)[0]\r
 652     elif opt in ('-r', '--reuse'):\r
 653         origxml = arg\r
 654     elif opt in ('-u', '--update-translation'):\r
 655         tryToUpdate(sys.argv, arg)\r
 656     elif opt in ('-p', '--po-file'):\r
 657         mofile = ".xml2po.mo"\r
 658         pofile = arg\r
 659         translationlanguage = os.path.splitext(pofile)[0]\r
 660         os.system("msgfmt -o %s %s >NUL" % (mofile, pofile)) and sys.exit(7)\r
 661         mode = 'merge'\r
 662     elif opt in ('-o', '--output'):\r
 663         output = arg\r
 664     elif opt in ('-v', '--version'):\r
 665         print VERSION\r
 666         sys.exit(0)\r
 667     elif opt in ('-h', '--help'):\r
 668         usage(True)\r
 669 \r
 670 # Treat remaining arguments as XML files\r
 671 while args:\r
 672     filenames.append(args.pop())\r
 673 \r
 674 if len(filenames) > 1 and mode=='merge':\r
 675     print  >> sys.stderr, "Error: You can merge translations with only one XML file at a time."\r
 676     sys.exit(2)\r
 677 \r
 678 try:\r
 679     CurrentXmlMode = load_mode(default_mode)()\r
 680 except:\r
 681     CurrentXmlMode = None\r
 682     print >> sys.stderr, "Warning: cannot load module '%s', using automatic detection (-a)." % (default_mode)\r
 683     automatic = 1\r
 684 \r
 685 if mode=='merge' and mofile=='':\r
 686     print >> sys.stderr, "Error: You must specify MO file when merging translations."\r
 687     sys.exit(3)\r
 688 \r
 689 ultimate_tags = read_finaltags(ultimate)\r
 690 ignored_tags = read_ignoredtags(ignored)\r
 691 \r
 692 # I'm not particularly happy about making any of these global,\r
 693 # but I don't want to bother too much with it right now\r
 694 semitrans = {}\r
 695 PlaceHolder = 0\r
 696 if origxml == '':\r
 697     msg = MessageOutput()\r
 698 else:\r
 699     filenames.append(origxml)\r
 700     msg = MessageOutput(1)\r
 701 \r
 702 for filename in filenames:\r
 703     try:\r
 704         if filename == origxml:\r
 705             msg.translationsFollow()\r
 706         ctxt = libxml2.createFileParserCtxt(filename)\r
 707         ctxt.lineNumbers(1)\r
 708         if expand_all_entities:\r
 709             ctxt.replaceEntities(1)\r
 710         ctxt.parseDocument()\r
 711         doc = ctxt.doc()\r
 712     except:\r
 713         print >> sys.stderr, "Error: cannot open file '%s'." % (filename)\r
 714         sys.exit(1)\r
 715 \r
 716     msg.setFilename(filename)\r
 717     if CurrentXmlMode and origxml=='':\r
 718         CurrentXmlMode.preProcessXml(doc,msg)\r
 719     doSerialize(doc)\r
 720 \r
 721 if output == '-':\r
 722     out = sys.stdout\r
 723 else:\r
 724     try:\r
 725         out = file(output, 'w')\r
 726     except:\r
 727         print >> sys.stderr, "Error: cannot open file %s for writing." % (output)\r
 728         sys.exit(5)\r
 729 \r
 730 if mode != 'merge':\r
 731     if CurrentXmlMode:\r
 732         tcmsg = CurrentXmlMode.getStringForTranslators()\r
 733         tccom = CurrentXmlMode.getCommentForTranslators()\r
 734         if tcmsg:\r
 735             msg.outputMessage(tcmsg, 0, tccom)\r
 736     if CurrentXmlMode:\r
 737         tcmsg = CurrentXmlMode.getStringForTranslation()\r
 738         tccom = CurrentXmlMode.getCommentForTranslation()\r
 739         if tcmsg:\r
 740             msg.outputMessage(tcmsg, 0, tccom)\r
 741 \r
 742     msg.outputAll(out)\r
 743 else:\r
 744     if CurrentXmlMode:\r
 745         tcmsg = CurrentXmlMode.getStringForTranslators()\r
 746         if tcmsg:\r
 747             tnames = getTranslation(tcmsg)\r
 748         else:\r
 749             tnames = ''\r
 750         tcmsg = CurrentXmlMode.getStringForTranslation()\r
 751         if tcmsg:\r
 752             tstring = getTranslation(tcmsg)\r
 753         else:\r
 754             tstring = ''\r
 755 \r
 756         CurrentXmlMode.postProcessXmlTranslation(doc, translationlanguage, tnames, tstring)\r
 757     out.write(doc.serialize('utf-8', 1))\r