txt2tags.py

   1 #!/usr/bin/env python
   2 # txt2tags - generic text conversion tool
   3 # http://txt2tags.sf.net
   4 #
   5 # Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Aurelio Jargas
   6 #
   7 #   This program is free software; you can redistribute it and/or modify
   8 #   it under the terms of the GNU General Public License as published by
   9 #   the Free Software Foundation, version 2.
  10 #
  11 #   This program is distributed in the hope that it will be useful,
  12 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #   GNU General Public License for more details.
  15 #
  16 #   You have received a copy of the GNU General Public License along
  17 #   with this program, on the COPYING file.
  18 #
  19 ########################################################################
  20 #
  21 #   BORING CODE EXPLANATION AHEAD
  22 #
  23 # Just read it if you wish to understand how the txt2tags code works.
  24 #
  25 ########################################################################
  26 #
  27 # The code that [1] parses the marked text is separated from the
  28 # code that [2] insert the target tags.
  29 #
  30 #   [1] made by: def convert()
  31 #   [2] made by: class BlockMaster
  32 #
  33 # The structures of the marked text are identified and its contents are
  34 # extracted into a data holder (Python lists and dictionaries).
  35 #
  36 # When parsing the source file, the blocks (para, lists, quote, table)
  37 # are opened with BlockMaster, right when found. Then its contents,
  38 # which spans on several lines, are feeded into a special holder on the
  39 # BlockMaster instance. Just when the block is closed, the target tags
  40 # are inserted for the full block as a whole, in one pass. This way, we
  41 # have a better control on blocks. Much better than the previous line by
  42 # line approach.
  43 #
  44 # In other words, whenever inside a block, the parser *holds* the tag
  45 # insertion process, waiting until the full block is read. That was
  46 # needed primary to close paragraphs for the XHTML target, but
  47 # proved to be a very good adding, improving many other processing.
  48 #
  49 # -------------------------------------------------------------------
  50 #
  51 # These important classes are all documented:
  52 # CommandLine, SourceDocument, ConfigMaster, ConfigLines.
  53 #
  54 # There is a RAW Config format and all kind of configuration is first
  55 # converted to this format. Then a generic method parses it.
  56 #
  57 # These functions get information about the input file(s) and take
  58 # care of the init processing:
  59 # get_infiles_config(), process_source_file() and convert_this_files()
  60 #
  61 ########################################################################
  62
  63 #XXX Python coding warning
  64 # Avoid common mistakes:
  65 # - do NOT use newlist=list instead newlist=list[:]
  66 # - do NOT use newdic=dic   instead newdic=dic.copy()
  67 # - do NOT use dic[key]     instead dic.get(key)
  68 # - do NOT use del dic[key] without has_key() before
  69
  70 #XXX Smart Image Align don't work if the image is a link
  71 # Can't fix that because the image is expanded together with the
  72 # link, at the linkbank filling moment. Only the image is passed
  73 # to parse_images(), not the full line, so it is always 'middle'.
  74
  75 #XXX Paragraph separation not valid inside Quote
  76 # Quote will not have <p></p> inside, instead will close and open
  77 # again the <blockquote>. This really sux in CSS, when defining a
  78 # different background color. Still don't know how to fix it.
  79
  80 #XXX TODO (maybe)
  81 # New mark or macro which expands to an anchor full title.
  82 # It is necessary to parse the full document in this order:
  83 #  DONE  1st scan: HEAD: get all settings, including %!includeconf
  84 #  DONE  2nd scan: BODY: expand includes & apply %!preproc
  85 #        3rd scan: BODY: read titles and compose TOC info
  86 #        4th scan: BODY: full parsing, expanding [#anchor] 1st
  87 # Steps 2 and 3 can be made together, with no tag adding.
  88 # Two complete body scans will be *slow*, don't know if it worths.
  89 # One solution may be add the titles as postproc rules
  90
  91
  92 ##############################################################################
  93
  94 # User config (1=ON, 0=OFF)
  95
  96 USE_I18N    = 1   # use gettext for i18ned messages?        (default is 1)
  97 COLOR_DEBUG = 1   # show debug messages in colors?          (default is 1)
  98 BG_LIGHT    = 0   # your terminal background color is light (default is 0)
  99 HTML_LOWER  = 0   # use lowercased HTML tags instead upper? (default is 0)
 100
 101 ##############################################################################
 102
 103
 104 # These are all the core Python modules used by txt2tags (KISS!)
 105 import re, string, os, sys, time, getopt
 106
 107 # Program information
 108 my_url = 'http://txt2tags.sf.net'
 109 my_name = 'txt2tags'
 110 my_email = 'verde@aurelio.net'
 111 my_version = '2.5'
 112
 113 # i18n - just use if available
 114 if USE_I18N:
 115         try:
 116                 import gettext
 117                 # If your locale dir is different, change it here
 118                 cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
 119                 _ = cat.gettext
 120         except:
 121                 _ = lambda x:x
 122 else:
 123         _ = lambda x:x
 124
 125 # FLAGS   : the conversion related flags  , may be used in %!options
 126 # OPTIONS : the conversion related options, may be used in %!options
 127 # ACTIONS : the other behavior modifiers, valid on command line only
 128 # MACROS  : the valid macros with their default values for formatting
 129 # SETTINGS: global miscellaneous settings, valid on RC file only
 130 # NO_TARGET: actions that don't require a target specification
 131 # NO_MULTI_INPUT: actions that don't accept more than one input file
 132 # CONFIG_KEYWORDS: the valid %!key:val keywords
 133 #
 134 # FLAGS and OPTIONS are configs that affect the converted document.
 135 # They usually have also a --no-<option> to turn them OFF.
 136 #
 137 # ACTIONS are needed because when doing multiple input files, strange
 138 # behavior would be found, as use command line interface for the
 139 # first file and gui for the second. There is no --no-<action>.
 140 # --version and --help inside %!options are also odd
 141 #
 142 TARGETS  = 'html xhtml sgml tex lout man mgp wiki gwiki doku moin pm6 txt'.split()
 143
 144 FLAGS    = {'headers'    :1 , 'enum-title' :0 , 'mask-email' :0 ,
 145             'toc-only'   :0 , 'toc'        :0 , 'rc'         :1 ,
 146             'css-sugar'  :0 , 'css-suggar' :0 , 'css-inside' :0 ,
 147             'quiet'      :0 }
 148 OPTIONS  = {'target'     :'', 'toc-level'  :3 , 'style'      :'',
 149             'infile'     :'', 'outfile'    :'', 'encoding'   :'',
 150             'config-file':'', 'split'      :0 , 'lang'       :'',
 151             'show-config-value':'' }
 152 ACTIONS  = {'help'       :0 , 'version'    :0 , 'gui'        :0 ,
 153             'verbose'    :0 , 'debug'      :0 , 'dump-config':0 ,
 154             'dump-source':0 }
 155 MACROS   = {'date' : '%Y%m%d',  'infile': '%f',
 156             'mtime': '%Y%m%d', 'outfile': '%f'}
 157 SETTINGS = {}         # for future use
 158 NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source']
 159 NO_MULTI_INPUT = ['gui','dump-config','dump-source']
 160 CONFIG_KEYWORDS = [
 161             'target', 'encoding', 'style', 'options', 'preproc','postproc',
 162             'guicolors']
 163
 164 TARGET_NAMES = {
 165   'html' : _('HTML page'),
 166   'xhtml': _('XHTML page'),
 167   'sgml' : _('SGML document'),
 168   'tex'  : _('LaTeX document'),
 169   'lout' : _('Lout document'),
 170   'man'  : _('UNIX Manual page'),
 171   'mgp'  : _('MagicPoint presentation'),
 172   'wiki' : _('Wikipedia page'),
 173   'gwiki': _('Google Wiki page'),
 174   'doku' : _('DokuWiki page'),
 175   'moin' : _('MoinMoin page'),
 176   'pm6'  : _('PageMaker document'),
 177   'txt'  : _('Plain Text'),
 178 }
 179
 180 DEBUG = 0     # do not edit here, please use --debug
 181 VERBOSE = 0   # do not edit here, please use -v, -vv or -vvv
 182 QUIET = 0     # do not edit here, please use --quiet
 183 GUI = 0       # do not edit here, please use --gui
 184 AUTOTOC = 1   # do not edit here, please use --no-toc or %%toc
 185
 186 RC_RAW = []
 187 CMDLINE_RAW = []
 188 CONF = {}
 189 BLOCK = None
 190 regex = {}
 191 TAGS = {}
 192 rules = {}
 193
 194 lang = 'english'
 195 TARGET = ''
 196
 197 STDIN = STDOUT = '-'
 198 MODULEIN = MODULEOUT = '-module-'
 199 ESCCHAR   = '\x00'
 200 SEPARATOR = '\x01'
 201 LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
 202 LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
 203
 204 # Platform specific settings
 205 LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
 206
 207 VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
 208
 209 USAGE = string.join([
 210 '',
 211 _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
 212 '',
 213 _("  -t, --target=TYPE   set target document type. currently supported:"),
 214 '                      %s,' % string.join(TARGETS[:8], ', '),
 215 '                      %s'  % string.join(TARGETS[8:], ', '),
 216 _("  -i, --infile=FILE   set FILE as the input file name ('-' for STDIN)"),
 217 _("  -o, --outfile=FILE  set FILE as the output file name ('-' for STDOUT)"),
 218 _("  -H, --no-headers    suppress header, title and footer contents"),
 219 _("      --headers       show header, title and footer contents (default ON)"),
 220 _("      --encoding=ENC  set target file encoding (utf-8, iso-8859-1, etc)"),
 221 _("      --style=FILE    use FILE as the document style (like HTML CSS)"),
 222 _("      --css-sugar     insert CSS-friendly tags for HTML and XHTML targets"),
 223 _("      --css-inside    insert CSS file contents inside HTML/XHTML headers"),
 224 _("      --mask-email    hide email from spam robots. x@y.z turns <x (a) y z>"),
 225 _("      --toc           add TOC (Table of Contents) to target document"),
 226 _("      --toc-only      print document TOC and exit"),
 227 _("      --toc-level=N   set maximum TOC level (depth) to N"),
 228 _("  -n, --enum-title    enumerate all titles as 1, 1.1, 1.1.1, etc"),
 229 _("  -C, --config-file=F read config from file F"),
 230 _("      --rc            read user config file ~/.txt2tagsrc (default ON)"),
 231 _("      --gui           invoke Graphical Tk Interface"),
 232 _("  -q, --quiet         quiet mode, suppress all output (except errors)"),
 233 _("  -v, --verbose       print informative messages during conversion"),
 234 _("  -h, --help          print this help information and exit"),
 235 _("  -V, --version       print program version and exit"),
 236 _("      --dump-config   print all the config found and exit"),
 237 _("      --dump-source   print the document source, with includes expanded"),
 238 '',
 239 _("Turn OFF options:"),
 240 "     --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
 241 "     --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
 242 "     --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config",
 243 "     --no-dump-source",
 244 '',
 245 _("Example:\n     %s -t html --toc myfile.t2t") % my_name,
 246 '',
 247 _("By default, converted output is saved to 'infile.<target>'."),
 248 _("Use --outfile to force an output file name."),
 249 _("If  input file is '-', reads from STDIN."),
 250 _("If output file is '-', dumps output to STDOUT."),
 251 '',
 252 'http://txt2tags.sourceforge.net',
 253 ''
 254 ], '\n')
 255
 256
 257 ##############################################################################
 258
 259
 260 # Here is all the target's templates
 261 # You may edit them to fit your needs
 262 #  - the %(HEADERn)s strings represent the Header lines
 263 #  - the %(STYLE)s string is changed by --style contents
 264 #  - the %(ENCODING)s string is changed by --encoding contents
 265 #  - if any of the above is empty, the full line is removed
 266 #  - use %% to represent a literal %
 267 #
 268 HEADER_TEMPLATE = {
 269   'txt': """\
 270 %(HEADER1)s
 271 %(HEADER2)s
 272 %(HEADER3)s
 273 """,
 274
 275   'sgml': """\
 276 <!doctype linuxdoc system>
 277 <article>
 278 <title>%(HEADER1)s
 279 <author>%(HEADER2)s
 280 <date>%(HEADER3)s
 281 """,
 282
 283   'html': """\
 284 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 285 <HTML>
 286 <HEAD>
 287 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
 288 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
 289 <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
 290 <TITLE>%(HEADER1)s</TITLE>
 291 </HEAD><BODY BGCOLOR="white" TEXT="black">
 292 <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
 293 <FONT SIZE="4">
 294 <I>%(HEADER2)s</I><BR>
 295 %(HEADER3)s
 296 </FONT></CENTER>
 297 """,
 298
 299   'htmlcss': """\
 300 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 301 <HTML>
 302 <HEAD>
 303 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
 304 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
 305 <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
 306 <TITLE>%(HEADER1)s</TITLE>
 307 </HEAD>
 308 <BODY>
 309
 310 <DIV CLASS="header" ID="header">
 311 <H1>%(HEADER1)s</H1>
 312 <H2>%(HEADER2)s</H2>
 313 <H3>%(HEADER3)s</H3>
 314 </DIV>
 315 """,
 316
 317   'xhtml': """\
 318 <?xml version="1.0"
 319       encoding="%(ENCODING)s"
 320 ?>
 321 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
 322  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 323 <html xmlns="http://www.w3.org/1999/xhtml">
 324 <head>
 325 <title>%(HEADER1)s</title>
 326 <meta name="generator" content="http://txt2tags.sf.net" />
 327 <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
 328 </head>
 329 <body bgcolor="white" text="black">
 330 <div align="center">
 331 <h1>%(HEADER1)s</h1>
 332 <h2>%(HEADER2)s</h2>
 333 <h3>%(HEADER3)s</h3>
 334 </div>
 335 """,
 336
 337   'xhtmlcss': """\
 338 <?xml version="1.0"
 339       encoding="%(ENCODING)s"
 340 ?>
 341 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
 342  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 343 <html xmlns="http://www.w3.org/1999/xhtml">
 344 <head>
 345 <title>%(HEADER1)s</title>
 346 <meta name="generator" content="http://txt2tags.sf.net" />
 347 <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
 348 </head>
 349 <body>
 350
 351 <div class="header" id="header">
 352 <h1>%(HEADER1)s</h1>
 353 <h2>%(HEADER2)s</h2>
 354 <h3>%(HEADER3)s</h3>
 355 </div>
 356 """,
 357
 358   'man': """\
 359 .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
 360 """,
 361
 362 # TODO style to <HR>
 363   'pm6': """\
 364 <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
 365 ><@Normal=
 366   <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
 367   <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
 368   <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
 369   <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
 370   <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
 371   <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
 372   <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
 373 ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
 374   <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
 375 ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
 376   <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
 377 ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
 378   <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
 379 ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
 380 ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
 381 ><@Title4=<@-PARENT "Title3">
 382 ><@Title5=<@-PARENT "Title3">
 383 ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
 384
 385 %(HEADER1)s
 386 %(HEADER2)s
 387 %(HEADER3)s
 388 """,
 389
 390   'mgp': """\
 391 #!/usr/X11R6/bin/mgp -t 90
 392 %%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
 393 %%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
 394 %%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
 395 %%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
 396 %%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
 397 %%default 1 size 5
 398 %%default 2 size 8, fore "yellow", font "normal-b", center
 399 %%default 3 size 5, fore "white",  font "normal", left, prefix "  "
 400 %%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
 401 %%tab 2 prefix "            ", icon arc "orange" 40, leftfill
 402 %%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
 403 %%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
 404 %%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
 405 %%%%------------------------- end of headers -----------------------------
 406 %%page
 407
 408
 409
 410
 411
 412 %%size 10, center, fore "yellow"
 413 %(HEADER1)s
 414
 415 %%font "normal-i", size 6, fore "white", center
 416 %(HEADER2)s
 417
 418 %%font "mono", size 7, center
 419 %(HEADER3)s
 420 """,
 421
 422   'moin': """\
 423 '''%(HEADER1)s'''
 424
 425 ''%(HEADER2)s''
 426
 427 %(HEADER3)s
 428 """,
 429
 430   'gwiki': """\
 431 *%(HEADER1)s*
 432
 433 %(HEADER2)s
 434
 435 _%(HEADER3)s_
 436
 437 """,
 438
 439   'doku': """\
 440 ===== %(HEADER1)s =====
 441
 442 **//%(HEADER2)s//**
 443
 444 //%(HEADER3)s//
 445 """,
 446
 447   'wiki': """\
 448 '''%(HEADER1)s'''
 449
 450 %(HEADER2)s
 451
 452 ''%(HEADER3)s''
 453
 454 """,
 455
 456   'tex': \
 457 r"""\documentclass{article}
 458 \usepackage{graphicx}
 459 \usepackage[normalem]{ulem} %% needed by strike
 460 \usepackage[urlcolor=blue,colorlinks=true]{hyperref}
 461 \usepackage[%(ENCODING)s]{inputenc}  %% char encoding
 462 \usepackage{%(STYLE)s}  %% user defined
 463
 464 \title{%(HEADER1)s}
 465 \author{%(HEADER2)s}
 466 \begin{document}
 467 \date{%(HEADER3)s}
 468 \maketitle
 469 \clearpage
 470 """,
 471
 472   'lout': """\
 473 @SysInclude { doc }
 474 @Document
 475   @InitialFont { Times Base 12p }  # Times, Courier, Helvetica, ...
 476   @PageOrientation { Portrait }    # Portrait, Landscape
 477   @ColumnNumber { 1 }              # Number of columns (2, 3, ...)
 478   @PageHeaders { Simple }          # None, Simple, Titles, NoTitles
 479   @InitialLanguage { English }     # German, French, Portuguese, ...
 480   @OptimizePages { Yes }           # Yes/No smart page break feature
 481 //
 482 @Text @Begin
 483 @Display @Heading { %(HEADER1)s }
 484 @Display @I { %(HEADER2)s }
 485 @Display { %(HEADER3)s }
 486 #@NP                               # Break page after Headers
 487 """
 488 # @SysInclude { tbl }                   # Tables support
 489 # setup: @MakeContents { Yes }          # show TOC
 490 # setup: @SectionGap                    # break page at each section
 491 }
 492
 493
 494 ##############################################################################
 495
 496
 497 def getTags(config):
 498         "Returns all the known tags for the specified target"
 499
 500         keys = """
 501         title1              numtitle1
 502         title2              numtitle2
 503         title3              numtitle3
 504         title4              numtitle4
 505         title5              numtitle5
 506         title1Open          title1Close
 507         title2Open          title2Close
 508         title3Open          title3Close
 509         title4Open          title4Close
 510         title5Open          title5Close
 511         blocktitle1Open     blocktitle1Close
 512         blocktitle2Open     blocktitle2Close
 513         blocktitle3Open     blocktitle3Close
 514
 515         paragraphOpen       paragraphClose
 516         blockVerbOpen       blockVerbClose
 517         blockQuoteOpen      blockQuoteClose blockQuoteLine
 518         blockCommentOpen    blockCommentClose
 519
 520         fontMonoOpen        fontMonoClose
 521         fontBoldOpen        fontBoldClose
 522         fontItalicOpen      fontItalicClose
 523         fontUnderlineOpen   fontUnderlineClose
 524         fontStrikeOpen      fontStrikeClose
 525
 526         listOpen            listClose
 527         listItemOpen        listItemClose     listItemLine
 528         numlistOpen         numlistClose
 529         numlistItemOpen     numlistItemClose  numlistItemLine
 530         deflistOpen         deflistClose
 531         deflistItem1Open    deflistItem1Close
 532         deflistItem2Open    deflistItem2Close deflistItem2LinePrefix
 533
 534         bar1                bar2
 535         url                 urlMark
 536         email               emailMark
 537         img                 imgAlignLeft  imgAlignRight  imgAlignCenter
 538                            _imgAlignLeft _imgAlignRight _imgAlignCenter
 539
 540         tableOpen           tableClose
 541         _tableBorder        _tableAlignLeft      _tableAlignCenter
 542         tableRowOpen        tableRowClose        tableRowSep
 543         tableTitleRowOpen   tableTitleRowClose
 544         tableCellOpen       tableCellClose       tableCellSep
 545         tableTitleCellOpen  tableTitleCellClose  tableTitleCellSep
 546         _tableColAlignLeft  _tableColAlignRight  _tableColAlignCenter
 547         _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter
 548         _tableCellColSpan   tableColAlignSep
 549
 550         bodyOpen            bodyClose
 551         cssOpen             cssClose
 552         tocOpen             tocClose             TOC
 553         anchor
 554         comment
 555         pageBreak
 556         EOD
 557         """.split()
 558
 559         # TIP: \a represents the current text on the mark
 560         # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
 561
 562         alltags = {
 563
 564         'txt': {
 565            'title1'              : '  \a'      ,
 566            'title2'              : '\t\a'      ,
 567            'title3'              : '\t\t\a'    ,
 568            'title4'              : '\t\t\t\a'  ,
 569            'title5'              : '\t\t\t\t\a',
 570            'blockQuoteLine'      : '\t'        ,
 571            'listItemOpen'        : '- '        ,
 572            'numlistItemOpen'     : '\a. '      ,
 573            'bar1'                : '\a'        ,
 574            'url'                 : '\a'        ,
 575            'urlMark'             : '\a (\a)'   ,
 576            'email'               : '\a'        ,
 577            'emailMark'           : '\a (\a)'   ,
 578            'img'                 : '[\a]'      ,
 579         },
 580
 581         'html': {
 582            'paragraphOpen'        : '<P>'            ,
 583            'paragraphClose'       : '</P>'           ,
 584            'title1'               : '~A~<H1>\a</H1>' ,
 585            'title2'               : '~A~<H2>\a</H2>' ,
 586            'title3'               : '~A~<H3>\a</H3>' ,
 587            'title4'               : '~A~<H4>\a</H4>' ,
 588            'title5'               : '~A~<H5>\a</H5>' ,
 589            'anchor'               : '<A NAME="\a"></A>\n',
 590            'blockVerbOpen'        : '<PRE>'          ,
 591            'blockVerbClose'       : '</PRE>'         ,
 592            'blockQuoteOpen'       : '<BLOCKQUOTE>'   ,
 593            'blockQuoteClose'      : '</BLOCKQUOTE>'  ,
 594            'fontMonoOpen'         : '<CODE>'         ,
 595            'fontMonoClose'        : '</CODE>'        ,
 596            'fontBoldOpen'         : '<B>'            ,
 597            'fontBoldClose'        : '</B>'           ,
 598            'fontItalicOpen'       : '<I>'            ,
 599            'fontItalicClose'      : '</I>'           ,
 600            'fontUnderlineOpen'    : '<U>'            ,
 601            'fontUnderlineClose'   : '</U>'           ,
 602            'fontStrikeOpen'       : '<S>'            ,
 603            'fontStrikeClose'      : '</S>'           ,
 604            'listOpen'             : '<UL>'           ,
 605            'listClose'            : '</UL>'          ,
 606            'listItemOpen'         : '<LI>'           ,
 607            'numlistOpen'          : '<OL>'           ,
 608            'numlistClose'         : '</OL>'          ,
 609            'numlistItemOpen'      : '<LI>'           ,
 610            'deflistOpen'          : '<DL>'           ,
 611            'deflistClose'         : '</DL>'          ,
 612            'deflistItem1Open'     : '<DT>'           ,
 613            'deflistItem1Close'    : '</DT>'          ,
 614            'deflistItem2Open'     : '<DD>'           ,
 615            'bar1'                 : '<HR NOSHADE SIZE=1>'        ,
 616            'bar2'                 : '<HR NOSHADE SIZE=5>'        ,
 617            'url'                  : '<A HREF="\a">\a</A>'        ,
 618            'urlMark'              : '<A HREF="\a">\a</A>'        ,
 619            'email'                : '<A HREF="mailto:\a">\a</A>' ,
 620            'emailMark'            : '<A HREF="mailto:\a">\a</A>' ,
 621            'img'                  : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
 622            '_imgAlignLeft'        : ' ALIGN="left"'  ,
 623            '_imgAlignCenter'      : ' ALIGN="middle"',
 624            '_imgAlignRight'       : ' ALIGN="right"' ,
 625            'tableOpen'            : '<TABLE~A~~B~ CELLPADDING="4">',
 626            'tableClose'           : '</TABLE>'       ,
 627            'tableRowOpen'         : '<TR>'           ,
 628            'tableRowClose'        : '</TR>'          ,
 629            'tableCellOpen'        : '<TD~A~~S~>'     ,
 630            'tableCellClose'       : '</TD>'          ,
 631            'tableTitleCellOpen'   : '<TH~S~>'        ,
 632            'tableTitleCellClose'  : '</TH>'          ,
 633            '_tableBorder'         : ' BORDER="1"'    ,
 634            '_tableAlignCenter'    : ' ALIGN="center"',
 635            '_tableCellAlignRight' : ' ALIGN="right"' ,
 636            '_tableCellAlignCenter': ' ALIGN="center"',
 637            '_tableCellColSpan'    : ' COLSPAN="\a"'  ,
 638            'cssOpen'              : '<STYLE TYPE="text/css">',
 639            'cssClose'             : '</STYLE>'       ,
 640            'comment'              : '<!-- \a -->'    ,
 641            'EOD'                  : '</BODY></HTML>'
 642         },
 643
 644         #TIP xhtml inherits all HTML definitions (lowercased)
 645         #TIP http://www.w3.org/TR/xhtml1/#guidelines
 646         #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
 647         'xhtml': {
 648            'listItemClose'       : '</li>'          ,
 649            'numlistItemClose'    : '</li>'          ,
 650            'deflistItem2Close'   : '</dd>'          ,
 651            'bar1'                : '<hr class="light" />',
 652            'bar2'                : '<hr class="heavy" />',
 653            'anchor'              : '<a id="\a" name="\a"></a>\n',
 654            'img'                 : '<img~A~ src="\a" border="0" alt=""/>',
 655         },
 656
 657         'sgml': {
 658            'paragraphOpen'       : '<p>'                ,
 659            'title1'              : '<sect>\a~A~<p>'     ,
 660            'title2'              : '<sect1>\a~A~<p>'    ,
 661            'title3'              : '<sect2>\a~A~<p>'    ,
 662            'title4'              : '<sect3>\a~A~<p>'    ,
 663            'title5'              : '<sect4>\a~A~<p>'    ,
 664            'anchor'              : '<label id="\a">'    ,
 665            'blockVerbOpen'       : '<tscreen><verb>'    ,
 666            'blockVerbClose'      : '</verb></tscreen>'  ,
 667            'blockQuoteOpen'      : '<quote>'            ,
 668            'blockQuoteClose'     : '</quote>'           ,
 669            'fontMonoOpen'        : '<tt>'               ,
 670            'fontMonoClose'       : '</tt>'              ,
 671            'fontBoldOpen'        : '<bf>'               ,
 672            'fontBoldClose'       : '</bf>'              ,
 673            'fontItalicOpen'      : '<em>'               ,
 674            'fontItalicClose'     : '</em>'              ,
 675            'fontUnderlineOpen'   : '<bf><em>'           ,
 676            'fontUnderlineClose'  : '</em></bf>'         ,
 677            'listOpen'            : '<itemize>'          ,
 678            'listClose'           : '</itemize>'         ,
 679            'listItemOpen'        : '<item>'             ,
 680            'numlistOpen'         : '<enum>'             ,
 681            'numlistClose'        : '</enum>'            ,
 682            'numlistItemOpen'     : '<item>'             ,
 683            'deflistOpen'         : '<descrip>'          ,
 684            'deflistClose'        : '</descrip>'         ,
 685            'deflistItem1Open'    : '<tag>'              ,
 686            'deflistItem1Close'   : '</tag>'             ,
 687            'bar1'                : '<!-- \a -->'        ,
 688            'url'                 : '<htmlurl url="\a" name="\a">'        ,
 689            'urlMark'             : '<htmlurl url="\a" name="\a">'        ,
 690            'email'               : '<htmlurl url="mailto:\a" name="\a">' ,
 691            'emailMark'           : '<htmlurl url="mailto:\a" name="\a">' ,
 692            'img'                 : '<figure><ph vspace=""><img src="\a">'+\
 693                                    '</figure>'                           ,
 694            'tableOpen'           : '<table><tabular ca="~C~">'           ,
 695            'tableClose'          : '</tabular></table>' ,
 696            'tableRowSep'         : '<rowsep>'           ,
 697            'tableCellSep'        : '<colsep>'           ,
 698            '_tableColAlignLeft'  : 'l'                  ,
 699            '_tableColAlignRight' : 'r'                  ,
 700            '_tableColAlignCenter': 'c'                  ,
 701            'comment'             : '<!-- \a -->'        ,
 702            'TOC'                 : '<toc>'              ,
 703            'EOD'                 : '</article>'
 704         },
 705
 706         'tex': {
 707            'title1'              : '\n~A~\section*{\a}'     ,
 708            'title2'              : '~A~\\subsection*{\a}'   ,
 709            'title3'              : '~A~\\subsubsection*{\a}',
 710            # title 4/5: DIRTY: para+BF+\\+\n
 711            'title4'              : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
 712            'title5'              : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
 713            'numtitle1'           : '\n~A~\section{\a}'      ,
 714            'numtitle2'           : '~A~\\subsection{\a}'    ,
 715            'numtitle3'           : '~A~\\subsubsection{\a}' ,
 716            'anchor'              : '\\hypertarget{\a}{}\n'  ,
 717            'blockVerbOpen'       : '\\begin{verbatim}'   ,
 718            'blockVerbClose'      : '\\end{verbatim}'     ,
 719            'blockQuoteOpen'      : '\\begin{quotation}'  ,
 720            'blockQuoteClose'     : '\\end{quotation}'    ,
 721            'fontMonoOpen'        : '\\texttt{'           ,
 722            'fontMonoClose'       : '}'                   ,
 723            'fontBoldOpen'        : '\\textbf{'           ,
 724            'fontBoldClose'       : '}'                   ,
 725            'fontItalicOpen'      : '\\textit{'           ,
 726            'fontItalicClose'     : '}'                   ,
 727            'fontUnderlineOpen'   : '\\underline{'        ,
 728            'fontUnderlineClose'  : '}'                   ,
 729            'fontStrikeOpen'      : '\\sout{'             ,
 730            'fontStrikeClose'     : '}'                   ,
 731            'listOpen'            : '\\begin{itemize}'    ,
 732            'listClose'           : '\\end{itemize}'      ,
 733            'listItemOpen'        : '\\item '             ,
 734            'numlistOpen'         : '\\begin{enumerate}'  ,
 735            'numlistClose'        : '\\end{enumerate}'    ,
 736            'numlistItemOpen'     : '\\item '             ,
 737            'deflistOpen'         : '\\begin{description}',
 738            'deflistClose'        : '\\end{description}'  ,
 739            'deflistItem1Open'    : '\\item['             ,
 740            'deflistItem1Close'   : ']'                   ,
 741            'bar1'                : '\n\\hrulefill{}\n'   ,
 742            'bar2'                : '\n\\rule{\linewidth}{1mm}\n',
 743            'url'                 : '\\htmladdnormallink{\a}{\a}',
 744            'urlMark'             : '\\htmladdnormallink{\a}{\a}',
 745            'email'               : '\\htmladdnormallink{\a}{mailto:\a}',
 746            'emailMark'           : '\\htmladdnormallink{\a}{mailto:\a}',
 747            'img'                 : '\\includegraphics{\a}',
 748            'tableOpen'           : '\\begin{center}\\begin{tabular}{|~C~|}',
 749            'tableClose'          : '\\end{tabular}\\end{center}',
 750            'tableRowOpen'        : '\\hline ' ,
 751            'tableRowClose'       : ' \\\\'    ,
 752            'tableCellSep'        : ' & '      ,
 753            '_tableColAlignLeft'  : 'l'        ,
 754            '_tableColAlignRight' : 'r'        ,
 755            '_tableColAlignCenter': 'c'        ,
 756            'tableColAlignSep'    : '|'        ,
 757            'comment'             : '% \a'     ,
 758            'TOC'                 : '\\tableofcontents',
 759            'pageBreak'           : '\\clearpage',
 760            'EOD'                 : '\\end{document}'
 761         },
 762
 763         'lout': {
 764            'paragraphOpen'       : '@LP'                     ,
 765            'blockTitle1Open'     : '@BeginSections'          ,
 766            'blockTitle1Close'    : '@EndSections'            ,
 767            'blockTitle2Open'     : ' @BeginSubSections'      ,
 768            'blockTitle2Close'    : ' @EndSubSections'        ,
 769            'blockTitle3Open'     : '  @BeginSubSubSections'  ,
 770            'blockTitle3Close'    : '  @EndSubSubSections'    ,
 771            'title1Open'          : '\n~A~@Section @Title { \a } @Begin',
 772            'title1Close'         : '@End @Section'           ,
 773            'title2Open'          : '\n~A~ @SubSection @Title { \a } @Begin',
 774            'title2Close'         : ' @End @SubSection'       ,
 775            'title3Open'          : '\n~A~  @SubSubSection @Title { \a } @Begin',
 776            'title3Close'         : '  @End @SubSubSection'   ,
 777            'title4Open'          : '\n~A~@LP @LeftDisplay @B { \a }',
 778            'title5Open'          : '\n~A~@LP @LeftDisplay @B { \a }',
 779            'anchor'              : '@Tag { \a }\n'       ,
 780            'blockVerbOpen'       : '@LP @ID @F @RawVerbatim @Begin',
 781            'blockVerbClose'      : '@End @RawVerbatim'   ,
 782            'blockQuoteOpen'      : '@QD {'               ,
 783            'blockQuoteClose'     : '}'                   ,
 784            # enclosed inside {} to deal with joined**words**
 785            'fontMonoOpen'        : '{@F {'               ,
 786            'fontMonoClose'       : '}}'                  ,
 787            'fontBoldOpen'        : '{@B {'               ,
 788            'fontBoldClose'       : '}}'                  ,
 789            'fontItalicOpen'      : '{@II {'              ,
 790            'fontItalicClose'     : '}}'                  ,
 791            'fontUnderlineOpen'   : '{@Underline{'        ,
 792            'fontUnderlineClose'  : '}}'                  ,
 793            # the full form is more readable, but could be BL EL LI NL TL DTI
 794            'listOpen'            : '@BulletList'         ,
 795            'listClose'           : '@EndList'            ,
 796            'listItemOpen'        : '@ListItem{'          ,
 797            'listItemClose'       : '}'                   ,
 798            'numlistOpen'         : '@NumberedList'       ,
 799            'numlistClose'        : '@EndList'            ,
 800            'numlistItemOpen'     : '@ListItem{'          ,
 801            'numlistItemClose'    : '}'                   ,
 802            'deflistOpen'         : '@TaggedList'         ,
 803            'deflistClose'        : '@EndList'            ,
 804            'deflistItem1Open'    : '@DropTagItem {'      ,
 805            'deflistItem1Close'   : '}'                   ,
 806            'deflistItem2Open'    : '{'                   ,
 807            'deflistItem2Close'   : '}'                   ,
 808            'bar1'                : '\n@DP @FullWidthRule\n'     ,
 809            'url'                 : '{blue @Colour { \a }}'      ,
 810            'urlMark'             : '\a ({blue @Colour { \a }})' ,
 811            'email'               : '{blue @Colour { \a }}'      ,
 812            'emailMark'           : '\a ({blue Colour{ \a }})'   ,
 813            'img'                 : '~A~@IncludeGraphic { \a }'  ,  # eps only!
 814            '_imgAlignLeft'       : '@LeftDisplay '              ,
 815            '_imgAlignRight'      : '@RightDisplay '             ,
 816            '_imgAlignCenter'     : '@CentredDisplay '           ,
 817            # lout tables are *way* complicated, no support for now
 818            #'tableOpen'          : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
 819            #'tableClose'         : '}'     ,
 820            #'tableRowOpen'       : '@Rowa\n'       ,
 821            #'tableTitleRowOpen'  : '@HeaderRowa'       ,
 822            #'tableCenterAlign'   : '@CentredDisplay '         ,
 823            #'tableCellOpen'      : '\a {'                     ,  # A, B, ...
 824            #'tableCellClose'     : '}'                        ,
 825            #'_tableBorder'       : '\nrule {yes}'             ,
 826            'comment'             : '# \a'                     ,
 827            # @MakeContents must be on the config file
 828            'TOC'                 : '@DP @ContentsGoesHere @DP',
 829            'pageBreak'           : '\n@NP\n'                  ,
 830            'EOD'                 : '@End @Text'
 831         },
 832
 833         # http://moinmo.in/SyntaxReference
 834         'moin': {
 835            'title1'                : '= \a ='        ,
 836            'title2'                : '== \a =='      ,
 837            'title3'                : '=== \a ==='    ,
 838            'title4'                : '==== \a ===='  ,
 839            'title5'                : '===== \a =====',
 840            'blockVerbOpen'         : '{{{'           ,
 841            'blockVerbClose'        : '}}}'           ,
 842            'blockQuoteLine'        : '  '            ,
 843            'fontMonoOpen'          : '{{{'           ,
 844            'fontMonoClose'         : '}}}'           ,
 845            'fontBoldOpen'          : "'''"           ,
 846            'fontBoldClose'         : "'''"           ,
 847            'fontItalicOpen'        : "''"            ,
 848            'fontItalicClose'       : "''"            ,
 849            'fontUnderlineOpen'     : '__'            ,
 850            'fontUnderlineClose'    : '__'            ,
 851            'fontStrikeOpen'        : '--('           ,
 852            'fontStrikeClose'       : ')--'           ,
 853            'listItemOpen'          : ' * '           ,
 854            'numlistItemOpen'       : ' \a. '         ,
 855            'deflistItem1Open'      : ' '             ,
 856            'deflistItem1Close'     : '::'            ,
 857            'deflistItem2LinePrefix': ' :: '          ,
 858            'bar1'                  : '----'          ,
 859            'bar2'                  : '--------'      ,
 860            'url'                   : '[\a]'          ,
 861            'urlMark'               : '[\a \a]'       ,
 862            'email'                 : '[\a]'          ,
 863            'emailMark'             : '[\a \a]'       ,
 864            'img'                   : '[\a]'          ,
 865            'tableRowOpen'          : '||'            ,
 866            'tableCellOpen'         : '~A~'           ,
 867            'tableCellClose'        : '||'            ,
 868            'tableTitleCellClose'   : '||'            ,
 869            '_tableCellAlignRight'  : '<)>'           ,
 870            '_tableCellAlignCenter' : '<:>'           ,
 871            'comment'               : '/* \a */'      ,
 872            'TOC'                   : '[[TableOfContents]]'
 873         },
 874
 875         # http://code.google.com/p/support/wiki/WikiSyntax
 876         'gwiki': {
 877            'title1'              : '= \a ='        ,
 878            'title2'              : '== \a =='      ,
 879            'title3'              : '=== \a ==='    ,
 880            'title4'              : '==== \a ===='  ,
 881            'title5'              : '===== \a =====',
 882            'blockVerbOpen'       : '{{{'           ,
 883            'blockVerbClose'      : '}}}'           ,
 884            'blockQuoteLine'      : '  '            ,
 885            'fontMonoOpen'        : '{{{'           ,
 886            'fontMonoClose'       : '}}}'           ,
 887            'fontBoldOpen'        : '*'             ,
 888            'fontBoldClose'       : '*'             ,
 889            'fontItalicOpen'      : '_'             , # underline == italic
 890            'fontItalicClose'     : '_'             ,
 891            'fontStrikeOpen'      : '~~'            ,
 892            'fontStrikeClose'     : '~~'            ,
 893            'listItemOpen'        : ' * '           ,
 894            'numlistItemOpen'     : ' # '           ,
 895            'url'                 : '\a'            ,
 896            'urlMark'             : '[\a \a]'       ,
 897            'email'               : 'mailto:\a'     ,
 898            'emailMark'           : '[mailto:\a \a]',
 899            'img'                 : '[\a]'          ,
 900            'tableRowOpen'        : '|| '           ,
 901            'tableRowClose'       : ' ||'           ,
 902            'tableCellSep'        : ' || '          ,
 903         },
 904
 905         # http://wiki.splitbrain.org/wiki:syntax
 906         # Hint: <br> is \\ $
 907         # Hint: You can add footnotes ((This is a footnote))
 908         'doku': {
 909            'title1'              : '===== \a =====',
 910            'title2'              : '==== \a ===='  ,
 911            'title3'              : '=== \a ==='    ,
 912            'title4'              : '== \a =='      ,
 913            'title5'              : '= \a ='        ,
 914            # DokuWiki uses '  ' identation to mark verb blocks (see indentverbblock)
 915            'blockQuoteLine'      : '>'             ,
 916            'fontMonoOpen'        : "''"            ,
 917            'fontMonoClose'       : "''"            ,
 918            'fontBoldOpen'        : "**"            ,
 919            'fontBoldClose'       : "**"            ,
 920            'fontItalicOpen'      : "//"            ,
 921            'fontItalicClose'     : "//"            ,
 922            'fontUnderlineOpen'   : "__"            ,
 923            'fontUnderlineClose'  : "__"            ,
 924            'fontStrikeOpen'      : '<del>'         ,
 925            'fontStrikeClose'     : '</del>'        ,
 926            'listItemOpen'        : '  * '          ,
 927            'numlistItemOpen'     : '  - '          ,
 928            'bar1'                : '----'          ,
 929            'url'                 : '[[\a]]'        ,
 930            'urlMark'             : '[[\a|\a]]'     ,
 931            'email'               : '[[\a]]'        ,
 932            'emailMark'           : '[[\a|\a]]'     ,
 933            'img'                 : '{{\a}}'        ,
 934            'imgAlignLeft'        : '{{\a }}'       ,
 935            'imgAlignRight'       : '{{ \a}}'       ,
 936            'imgAlignCenter'      : '{{ \a }}'      ,
 937            'tableTitleRowOpen'   : '^ '            ,
 938            'tableTitleRowClose'  : ' ^'            ,
 939            'tableTitleCellSep'   : ' ^ '           ,
 940            'tableRowOpen'        : '| '            ,
 941            'tableRowClose'       : ' |'            ,
 942            'tableCellSep'        : ' | '           ,
 943 # DokuWiki has no attributes. The content must be aligned!
 944 #          '_tableCellAlignRight' : '<)>'           , # ??
 945 #          '_tableCellAlignCenter': '<:>'           , # ??
 946 # DokuWiki colspan is the same as txt2tags' with multiple |||
 947            # 'comment'             : '## \a'         , # ??
 948            # TOC is automatic
 949         },
 950
 951         # http://en.wikipedia.org/wiki/Help:Editing
 952         'wiki': {
 953            'title1'              : '== \a =='        ,
 954            'title2'              : '=== \a ==='      ,
 955            'title3'              : '==== \a ===='    ,
 956            'title4'              : '===== \a ====='  ,
 957            'title5'              : '====== \a ======',
 958            'blockVerbOpen'       : '<pre>'           ,
 959            'blockVerbClose'      : '</pre>'          ,
 960            'blockQuoteOpen'      : '<blockquote>'    ,
 961            'blockQuoteClose'     : '</blockquote>'   ,
 962            'fontMonoOpen'        : '<tt>'            ,
 963            'fontMonoClose'       : '</tt>'           ,
 964            'fontBoldOpen'        : "'''"             ,
 965            'fontBoldClose'       : "'''"             ,
 966            'fontItalicOpen'      : "''"              ,
 967            'fontItalicClose'     : "''"              ,
 968            'fontUnderlineOpen'   : '<u>'             ,
 969            'fontUnderlineClose'  : '</u>'            ,
 970            'fontStrikeOpen'      : '<s>'             ,
 971            'fontStrikeClose'     : '</s>'            ,
 972                 #XXX Mixed lists not working: *#* list inside numlist inside list
 973            'listItemLine'        : '*'               ,
 974            'numlistItemLine'     : '#'               ,
 975            'deflistItem1Open'    : '; '              ,
 976            'deflistItem2LinePrefix': ': '            ,
 977            'bar1'                : '----'            ,
 978            'url'                 : '[\a]'            ,
 979            'urlMark'             : '[\a \a]'         ,
 980            'email'               : 'mailto:\a'       ,
 981            'emailMark'           : '[mailto:\a \a]'  ,
 982            # [[Image:foo.png|right|Optional alt/caption text]] (right, left, center, none)
 983            'img'                 : '[[Image:\a~A~]]' ,
 984            '_imgAlignLeft'       : '|left'           ,
 985            '_imgAlignCenter'     : '|center'         ,
 986            '_imgAlignRight'      : '|right'          ,
 987                 # {| border="1" cellspacing="0" cellpadding="4" align="center"
 988            'tableOpen'           : '{|~A~~B~ cellpadding="4"',
 989            'tableClose'          : '|}'              ,
 990            'tableRowOpen'        : '|-\n| '          ,
 991            'tableTitleRowOpen'   : '|-\n! '          ,
 992            'tableCellSep'        : ' || '            ,
 993            'tableTitleCellSep'   : ' !! '            ,
 994            '_tableBorder'        : ' border="1"'     ,
 995            '_tableAlignCenter'   : ' align="center"' ,
 996            'comment'             : '<!-- \a -->'     ,
 997            'TOC'                 : '__TOC__'         ,
 998         },
 999
1000         # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX
1001         # http://en.wikipedia.org/wiki/MagicPoint
1002         'mgp': {
1003            'paragraphOpen'       : '%font "normal", size 5'     ,
1004            'title1'              : '%page\n\n\a\n'              ,
1005            'title2'              : '%page\n\n\a\n'              ,
1006            'title3'              : '%page\n\n\a\n'              ,
1007            'title4'              : '%page\n\n\a\n'              ,
1008            'title5'              : '%page\n\n\a\n'              ,
1009            'blockVerbOpen'       : '%font "mono"'               ,
1010            'blockVerbClose'      : '%font "normal"'             ,
1011            'blockQuoteOpen'      : '%prefix "       "'          ,
1012            'blockQuoteClose'     : '%prefix "  "'               ,
1013            'fontMonoOpen'        : '\n%cont, font "mono"\n'     ,
1014            'fontMonoClose'       : '\n%cont, font "normal"\n'   ,
1015            'fontBoldOpen'        : '\n%cont, font "normal-b"\n' ,
1016            'fontBoldClose'       : '\n%cont, font "normal"\n'   ,
1017            'fontItalicOpen'      : '\n%cont, font "normal-i"\n' ,
1018            'fontItalicClose'     : '\n%cont, font "normal"\n'   ,
1019            'fontUnderlineOpen'   : '\n%cont, fore "cyan"\n'     ,
1020            'fontUnderlineClose'  : '\n%cont, fore "white"\n'    ,
1021            'listItemLine'        : '\t'                         ,
1022            'numlistItemLine'     : '\t'                         ,
1023            'numlistItemOpen'     : '\a. '                       ,
1024            'deflistItem1Open'    : '\t\n%cont, font "normal-b"\n',
1025            'deflistItem1Close'   : '\n%cont, font "normal"\n'   ,
1026            'bar1'                : '%bar "white" 5'             ,
1027            'bar2'                : '%pause'                     ,
1028            'url'                 : '\n%cont, fore "cyan"\n\a'   +\
1029                                    '\n%cont, fore "white"\n'    ,
1030            'urlMark'             : '\a \n%cont, fore "cyan"\n\a'+\
1031                                    '\n%cont, fore "white"\n'    ,
1032            'email'               : '\n%cont, fore "cyan"\n\a'   +\
1033                                    '\n%cont, fore "white"\n'    ,
1034            'emailMark'           : '\a \n%cont, fore "cyan"\n\a'+\
1035                                    '\n%cont, fore "white"\n'    ,
1036            'img'                 : '~A~\n%newimage "\a"\n%left\n',
1037            '_imgAlignLeft'       : '\n%left'                    ,
1038            '_imgAlignRight'      : '\n%right'                   ,
1039            '_imgAlignCenter'     : '\n%center'                  ,
1040            'comment'             : '%% \a'                      ,
1041            'pageBreak'           : '%page\n\n\n'                ,
1042            'EOD'                 : '%%EOD'
1043         },
1044
1045         # man groff_man ; man 7 groff
1046         'man': {
1047            'paragraphOpen'       : '.P'     ,
1048            'title1'              : '.SH \a' ,
1049            'title2'              : '.SS \a' ,
1050            'title3'              : '.SS \a' ,
1051            'title4'              : '.SS \a' ,
1052            'title5'              : '.SS \a' ,
1053            'blockVerbOpen'       : '.nf'    ,
1054            'blockVerbClose'      : '.fi\n'  ,
1055            'blockQuoteOpen'      : '.RS'    ,
1056            'blockQuoteClose'     : '.RE'    ,
1057            'fontBoldOpen'        : '\\fB'   ,
1058            'fontBoldClose'       : '\\fR'   ,
1059            'fontItalicOpen'      : '\\fI'   ,
1060            'fontItalicClose'     : '\\fR'   ,
1061            'listOpen'            : '.RS'    ,
1062            'listItemOpen'        : '.IP \(bu 3\n',
1063            'listClose'           : '.RE'    ,
1064            'numlistOpen'         : '.RS'    ,
1065            'numlistItemOpen'     : '.IP \a. 3\n',
1066            'numlistClose'        : '.RE'    ,
1067            'deflistItem1Open'    : '.TP\n'  ,
1068            'bar1'                : '\n\n'   ,
1069            'url'                 : '\a'     ,
1070            'urlMark'             : '\a (\a)',
1071            'email'               : '\a'     ,
1072            'emailMark'           : '\a (\a)',
1073            'img'                 : '\a'     ,
1074            'tableOpen'           : '.TS\n~A~~B~tab(^); ~C~.',
1075            'tableClose'          : '.TE'     ,
1076            'tableRowOpen'        : ' '       ,
1077            'tableCellSep'        : '^'       ,
1078            '_tableAlignCenter'   : 'center, ',
1079            '_tableBorder'        : 'allbox, ',
1080            '_tableColAlignLeft'  : 'l'       ,
1081            '_tableColAlignRight' : 'r'       ,
1082            '_tableColAlignCenter': 'c'       ,
1083            'comment'             : '.\\" \a'
1084         },
1085
1086         'pm6': {
1087            'paragraphOpen'       : '<@Normal:>'    ,
1088            'title1'              : '\n<@Title1:>\a',
1089            'title2'              : '\n<@Title2:>\a',
1090            'title3'              : '\n<@Title3:>\a',
1091            'title4'              : '\n<@Title4:>\a',
1092            'title5'              : '\n<@Title5:>\a',
1093            'blockVerbOpen'       : '<@PreFormat:>' ,
1094            'blockQuoteLine'      : '<@Quote:>'     ,
1095            'fontMonoOpen'        : '<FONT "Lucida Console"><SIZE 9>' ,
1096            'fontMonoClose'       : '<SIZE$><FONT$>',
1097            'fontBoldOpen'        : '<B>'           ,
1098            'fontBoldClose'       : '<P>'           ,
1099            'fontItalicOpen'      : '<I>'           ,
1100            'fontItalicClose'     : '<P>'           ,
1101            'fontUnderlineOpen'   : '<U>'           ,
1102            'fontUnderlineClose'  : '<P>'           ,
1103            'listOpen'            : '<@Bullet:>'    ,
1104            'listItemOpen'        : '\x95\t'        ,  # \x95 == ~U
1105            'numlistOpen'         : '<@Bullet:>'    ,
1106            'numlistItemOpen'     : '\x95\t'        ,
1107            'bar1'                : '\a'            ,
1108            'url'                 : '<U>\a<P>'      ,  # underline
1109            'urlMark'             : '\a <U>\a<P>'   ,
1110            'email'               : '\a'            ,
1111            'emailMark'           : '\a \a'         ,
1112            'img'                 : '\a'
1113         }
1114         }
1115
1116         # Exceptions for --css-sugar
1117         if config['css-sugar'] and config['target'] in ('html','xhtml'):
1118                 # Change just HTML because XHTML inherits it
1119                 htmltags = alltags['html']
1120                 # Table with no cellpadding
1121                 htmltags['tableOpen'] = string.replace(
1122                         htmltags['tableOpen'], ' CELLPADDING="4"', '')
1123                 # DIVs
1124                 htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
1125                 htmltags['tocClose'] = '</DIV>'
1126                 htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
1127                 htmltags['bodyClose']= '</DIV>'
1128
1129         # Make the HTML -> XHTML inheritance
1130         xhtml = alltags['html'].copy()
1131         for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
1132         # Some like HTML tags as lowercase, some don't... (headers out)
1133         if HTML_LOWER: alltags['html'] = xhtml.copy()
1134         xhtml.update(alltags['xhtml'])
1135         alltags['xhtml'] = xhtml.copy()
1136
1137         # Compose the target tags dictionary
1138         tags = {}
1139         target_tags = alltags[config['target']].copy()
1140
1141         for key in keys: tags[key] = ''     # create empty keys
1142         for key in target_tags.keys():
1143                 tags[key] = maskEscapeChar(target_tags[key]) # populate
1144
1145         # Map strong line to separator if not defined
1146         if not tags['bar2'] and tags['bar1']:
1147                 tags['bar2'] = tags['bar1']
1148
1149         return tags
1150
1151
1152 ##############################################################################
1153
1154
1155 def getRules(config):
1156         "Returns all the target-specific syntax rules"
1157
1158         ret = {}
1159         allrules = [
1160
1161          # target rules (ON/OFF)
1162           'linkable',             # target supports external links
1163           'tableable',            # target supports tables
1164           'imglinkable',          # target supports images as links
1165           'imgalignable',         # target supports image alignment
1166           'imgasdefterm',         # target supports image as definition term
1167           'autonumberlist',       # target supports numbered lists natively
1168           'autonumbertitle',      # target supports numbered titles natively
1169           'stylable',             # target supports external style files
1170           'parainsidelist',       # lists items supports paragraph
1171           'spacedlistitem',       # lists support blank lines between items
1172           'listnotnested',        # lists cannot be nested
1173           'quotenotnested',       # quotes cannot be nested
1174           'verbblocknotescaped',  # don't escape specials in verb block
1175           'verbblockfinalescape', # do final escapes in verb block
1176           'escapeurl',            # escape special in link URL
1177           'onelinepara',          # dump paragraph as a single long line
1178           'tabletitlerowinbold',  # manually bold any cell on table titles
1179           'tablecellstrip',       # strip extra spaces from each table cell
1180           'tablecellspannable',   # the table cells can have span attribute
1181           'barinsidequote',       # bars are allowed inside quote blocks
1182           'finalescapetitle',     # perform final escapes on title lines
1183           'autotocnewpagebefore', # break page before automatic TOC
1184           'autotocnewpageafter',  # break page after automatic TOC
1185           'autotocwithbars',      # automatic TOC surrounded by bars
1186           'mapbar2pagebreak',     # map the strong bar to a page break
1187           'titleblocks',          # titles must be on open/close section blocks
1188
1189         # Target code beautify (ON/OFF)
1190           'indentverbblock',      # add leading spaces to verb block lines
1191           'breaktablecell',       # break lines after any table cell
1192           'breaktablelineopen',   # break line after opening table line
1193           'breaktitleopen',       # break line after any title
1194           'notbreaklistopen',     # don't break line after opening a new list
1195           'notbreakparaopen',     # don't break line after opening a new para
1196           'keepquoteindent',      # don't remove the leading TABs on quotes
1197           'keeplistindent',       # don't remove the leading spaces on lists
1198           'blankendmotherlist',   # append a blank line at the mother list end
1199           'blankendtable',        # append a blank line at the table end
1200           'blankendautotoc',      # append a blank line at the auto TOC end
1201           'tagnotindentable',     # tags must be placed at the line begining
1202           'spacedlistitemopen',   # append a space after the list item open tag
1203           'spacednumlistitemopen',# append a space after the numlist item open tag
1204           'deflisttextstrip',     # strip the contents of the deflist text
1205
1206         # Value settings
1207           'listmaxdepth',         # maximum depth for lists
1208           'quotemaxdepth',        # maximum depth for quotes
1209           'tablecellaligntype',   # type of table cell align: cell, column
1210         ]
1211
1212         rules_bank = {
1213           'txt' : {
1214             'indentverbblock':1,
1215             'spacedlistitem':1,
1216             'parainsidelist':1,
1217             'keeplistindent':1,
1218             'barinsidequote':1,
1219             'autotocwithbars':1,
1220             'blankendmotherlist':1,
1221             },
1222           'html': {
1223             'indentverbblock':1,
1224             'linkable':1,
1225             'stylable':1,
1226             'escapeurl':1,
1227             'imglinkable':1,
1228             'imgalignable':1,
1229             'imgasdefterm':1,
1230             'autonumberlist':1,
1231             'spacedlistitem':1,
1232             'parainsidelist':1,
1233             'blankendmotherlist':1,
1234             'tableable':1,
1235             'tablecellstrip':1,
1236             'blankendtable':1,
1237             'breaktablecell':1,
1238             'breaktablelineopen':1,
1239             'keeplistindent':1,
1240             'keepquoteindent':1,
1241             'barinsidequote':1,
1242             'autotocwithbars':1,
1243             'tablecellspannable':1,
1244             'tablecellaligntype':'cell',
1245             },
1246           #TIP xhtml inherits all HTML rules
1247           'xhtml': {
1248             },
1249           'sgml': {
1250             'linkable':1,
1251             'escapeurl':1,
1252             'autonumberlist':1,
1253             'spacedlistitem':1,
1254             'blankendmotherlist':1,
1255             'tableable':1,
1256             'tablecellstrip':1,
1257             'blankendtable':1,
1258             'blankendautotoc':1,
1259             'quotenotnested':1,
1260             'keeplistindent':1,
1261             'keepquoteindent':1,
1262             'barinsidequote':1,
1263             'finalescapetitle':1,
1264             'tablecellaligntype':'column',
1265             },
1266           'mgp' : {
1267             'blankendmotherlist':1,
1268             'tagnotindentable':1,
1269             'spacedlistitem':1,
1270             'imgalignable':1,
1271             'autotocnewpagebefore':1,
1272             },
1273           'tex' : {
1274             'stylable':1,
1275             'escapeurl':1,
1276             'autonumberlist':1,
1277             'autonumbertitle':1,
1278             'spacedlistitem':1,
1279             'blankendmotherlist':1,
1280             'tableable':1,
1281             'tablecellstrip':1,
1282             'tabletitlerowinbold':1,
1283             'blankendtable':1,
1284             'verbblocknotescaped':1,
1285             'keeplistindent':1,
1286             'listmaxdepth':4,  # deflist is 6
1287             'quotemaxdepth':6,
1288             'barinsidequote':1,
1289             'finalescapetitle':1,
1290             'autotocnewpageafter':1,
1291             'mapbar2pagebreak':1,
1292             'tablecellaligntype':'column',
1293             },
1294           'lout': {
1295             'keepquoteindent':1,
1296             'keeplistindent':1,
1297             'deflisttextstrip':1,
1298             'escapeurl':1,
1299             'verbblocknotescaped':1,
1300             'imgalignable':1,
1301             'mapbar2pagebreak':1,
1302             'titleblocks':1,
1303             'autonumberlist':1,
1304             'notbreakparaopen':1,
1305             },
1306           'moin': {
1307             'spacedlistitem':1,
1308             'linkable':1,
1309             'blankendmotherlist':1,
1310             'keeplistindent':1,
1311             'tableable':1,
1312             'barinsidequote':1,
1313             'blankendtable':1,
1314             'tabletitlerowinbold':1,
1315             'tablecellstrip':1,
1316             'autotocwithbars':1,
1317             'tablecellaligntype':'cell',
1318             'deflisttextstrip':1,
1319             },
1320           'gwiki': {
1321             'spacedlistitem':1,
1322             'linkable':1,
1323             'blankendmotherlist':1,
1324             'keeplistindent':1,
1325             'tableable':1,
1326             'tabletitlerowinbold':1,
1327             'tablecellstrip':1,
1328             'autonumberlist':1,
1329             'breaktitleopen':1,
1330             },
1331           'doku': {
1332             'indentverbblock':1,           # DokuWiki uses '  ' to mark verb blocks
1333             'spacedlistitem':1,
1334             'linkable':1,
1335             'blankendmotherlist':1,
1336             'keeplistindent':1,
1337             'tableable':1,
1338             'barinsidequote':1,
1339             'blankendtable':1,
1340             'tablecellstrip':1,
1341             'autotocwithbars':1,
1342             'autonumberlist':1,
1343             'imgalignable':1,
1344             'tablecellaligntype':'cell',
1345             },
1346           'wiki': {
1347             'linkable':1,
1348             'blankendmotherlist':1,
1349             'tableable':1,
1350             'blankendtable':1,
1351             'tablecellstrip':1,
1352             'autotocwithbars':1,
1353             'spacedlistitemopen':1,
1354             'spacednumlistitemopen':1,
1355             'deflisttextstrip':1,
1356             'autonumberlist':1,
1357             'imgalignable':1,
1358             },
1359           'man' : {
1360             'spacedlistitem':1,
1361             'indentverbblock':1,
1362             'blankendmotherlist':1,
1363             'tagnotindentable':1,
1364             'tableable':1,
1365             'tablecellaligntype':'column',
1366             'tabletitlerowinbold':1,
1367             'tablecellstrip':1,
1368             'blankendtable':1,
1369             'barinsidequote':1,
1370             'parainsidelist':0,
1371             },
1372           'pm6' : {
1373             'keeplistindent':1,
1374             'verbblockfinalescape':1,
1375             #TODO add support for these - maybe set a JOINNEXT char and
1376             #     do it on addLineBreaks()
1377             'notbreaklistopen':1,
1378             'notbreakparaopen':1,
1379             'barinsidequote':1,
1380             'autotocwithbars':1,
1381             'onelinepara':1,
1382             }
1383         }
1384
1385         # Exceptions for --css-sugar
1386         if config['css-sugar'] and config['target'] in ('html','xhtml'):
1387                 rules_bank['html']['indentverbblock'] = 0
1388                 rules_bank['html']['autotocwithbars'] = 0
1389
1390         # Get the target specific rules
1391         if config['target'] == 'xhtml':
1392                 myrules = rules_bank['html'].copy()   # inheritance
1393                 myrules.update(rules_bank['xhtml'])   # get XHTML specific
1394         else:
1395                 myrules = rules_bank[config['target']].copy()
1396
1397         # Populate return dictionary
1398         for key in allrules: ret[key] = 0        # reset all
1399         ret.update(myrules)                      # get rules
1400
1401         return ret
1402
1403
1404 ##############################################################################
1405
1406
1407 def getRegexes():
1408         "Returns all the regexes used to find the t2t marks"
1409
1410         bank = {
1411         'blockVerbOpen':
1412                 re.compile(r'^```\s*$'),
1413         'blockVerbClose':
1414                 re.compile(r'^```\s*$'),
1415         'blockRawOpen':
1416                 re.compile(r'^"""\s*$'),
1417         'blockRawClose':
1418                 re.compile(r'^"""\s*$'),
1419         'blockCommentOpen':
1420                 re.compile(r'^%%%\s*$'),
1421         'blockCommentClose':
1422                 re.compile(r'^%%%\s*$'),
1423         'quote':
1424                 re.compile(r'^\t+'),
1425         '1lineVerb':
1426                 re.compile(r'^``` (?=.)'),
1427         '1lineRaw':
1428                 re.compile(r'^""" (?=.)'),
1429         # mono, raw, bold, italic, underline:
1430         # - marks must be glued with the contents, no boundary spaces
1431         # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
1432         'fontMono':
1433                 re.compile(  r'``([^\s](|.*?[^\s])`*)``'),
1434         'raw':
1435                 re.compile(  r'""([^\s](|.*?[^\s])"*)""'),
1436         'fontBold':
1437                 re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
1438         'fontItalic':
1439                 re.compile(  r'//([^\s](|.*?[^\s])/*)//'),
1440         'fontUnderline':
1441                 re.compile(  r'__([^\s](|.*?[^\s])_*)__'),
1442         'fontStrike':
1443                 re.compile(  r'--([^\s](|.*?[^\s])-*)--'),
1444         'list':
1445                 re.compile(r'^( *)(-) (?=[^ ])'),
1446         'numlist':
1447                 re.compile(r'^( *)(\+) (?=[^ ])'),
1448         'deflist':
1449                 re.compile(r'^( *)(:) (.*)$'),
1450         'listclose':
1451                 re.compile(r'^( *)([-+:])\s*$'),
1452         'bar':
1453                 re.compile(r'^(\s*)([_=-]{20,})\s*$'),
1454         'table':
1455                 re.compile(r'^ *\|\|? '),
1456         'blankline':
1457                 re.compile(r'^\s*$'),
1458         'comment':
1459                 re.compile(r'^%'),
1460
1461         # Auxiliary tag regexes
1462         '_imgAlign'        : re.compile(r'~A~', re.I),
1463         '_tableAlign'      : re.compile(r'~A~', re.I),
1464         '_anchor'          : re.compile(r'~A~', re.I),
1465         '_tableBorder'     : re.compile(r'~B~', re.I),
1466         '_tableColAlign'   : re.compile(r'~C~', re.I),
1467         '_tableCellColSpan': re.compile(r'~S~', re.I),
1468         '_tableCellAlign'  : re.compile(r'~A~', re.I),
1469         }
1470
1471         # Special char to place data on TAGs contents  (\a == bell)
1472         bank['x'] = re.compile('\a')
1473
1474         # %%macroname [ (formatting) ]
1475         bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
1476                                     string.join(MACROS.keys(), '|')), re.I)
1477
1478         # %%TOC special macro for TOC positioning
1479         bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
1480
1481         # Almost complicated title regexes ;)
1482         titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
1483         bank[   'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
1484         bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
1485
1486         ### Complicated regexes begin here ;)
1487         #
1488         # Textual descriptions on --help's style: [...] is optional, | is OR
1489
1490
1491         ### First, some auxiliary variables
1492         #
1493
1494         # [image.EXT]
1495         patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
1496
1497         # Link things
1498         # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
1499         # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@
1500         # Recomended order: scheme://user:pass@domain/path?query=foo#anchor
1501         # Also works      : scheme://user:pass@domain/path#anchor?query=foo
1502         # TODO form: !'():
1503         urlskel = {
1504           'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
1505           'guess' : r'(www[23]?|ftp)\.',         # w/out proto, try to guess
1506           'login' : r'A-Za-z0-9_.-',             # for ftp://login@domain.com
1507           'pass'  : r'[^ @]*',                   # for ftp://login:pass@dom.com
1508           'chars' : r'A-Za-z0-9%._/~:,=$@&+-',   # %20(space), :80(port), D&D
1509           'anchor': r'A-Za-z0-9%._-',            # %nn(encoded)
1510           'form'  : r'A-Za-z0-9/%&=+;.,$@*_-',   # .,@*_-(as is)
1511           'punct' : r'.,;:!?'
1512         }
1513
1514         # username [ :password ] @
1515         patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
1516
1517         # [ http:// ] [ username:password@ ] domain.com [ / ]
1518         #     [ #anchor | ?form=data ]
1519         retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]*)?'%(
1520                      urlskel['proto'],patt_url_login, urlskel['guess'],
1521                      urlskel['chars'],urlskel['form'],urlskel['anchor'])
1522
1523         # filename | [ filename ] #anchor
1524         retxt_url_local = r'[%s]+|[%s]*(#[%s]*)'%(
1525                      urlskel['chars'],urlskel['chars'],urlskel['anchor'])
1526
1527         # user@domain [ ?form=data ]
1528         patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
1529                      urlskel['login'],urlskel['form'])
1530
1531         # Saving for future use
1532         bank['_urlskel'] = urlskel
1533
1534         ### And now the real regexes
1535         #
1536
1537         bank['email'] = re.compile(patt_email,re.I)
1538
1539         # email | url
1540         bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
1541
1542         # \[ label | imagetag    url | email | filename \]
1543         bank['linkmark'] = re.compile(
1544                 r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
1545                   patt_img, retxt_url, patt_email, retxt_url_local),
1546                 re.L+re.I)
1547
1548         # Image
1549         bank['img'] = re.compile(patt_img, re.L+re.I)
1550
1551         # Special things
1552         bank['special'] = re.compile(r'^%!\s*')
1553         return bank
1554 ### END OF regex nightmares
1555
1556
1557 ##############################################################################
1558
1559 class error(Exception):
1560         pass
1561 def echo(msg):   # for quick debug
1562         print '\033[32;1m%s\033[m'%msg
1563 def Quit(msg=''):
1564         if msg: print msg
1565         sys.exit(0)
1566 def Error(msg):
1567         msg = _("%s: Error: ")%my_name + msg
1568         raise error, msg
1569 def getTraceback():
1570         try:
1571                 from traceback import format_exception
1572                 etype, value, tb = sys.exc_info()
1573                 return string.join(format_exception(etype, value, tb), '')
1574         except: pass
1575 def getUnknownErrorMessage():
1576         msg = '%s\n%s (%s):\n\n%s'%(
1577           _('Sorry! Txt2tags aborted by an unknown error.'),
1578           _('Please send the following Error Traceback to the author'),
1579           my_email, getTraceback())
1580         return msg
1581 def Message(msg,level):
1582         if level <= VERBOSE and not QUIET:
1583                 prefix = '-'*5
1584                 print "%s %s"%(prefix*level, msg)
1585 def Debug(msg,id=0,linenr=None):
1586         "Show debug messages, categorized (colored or not)"
1587         if QUIET or not DEBUG: return
1588         if int(id) not in range(8): id = 0
1589         # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light
1590         ids            = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET']
1591         colors_bgdark  = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1']
1592         colors_bglight = ['0'  ,'1'  ,'3'  ,'6'  ,'4'  ,'5'  ,'2'  ,'0'  ]
1593         if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
1594         if COLOR_DEBUG:
1595                 if BG_LIGHT: color = colors_bglight[id]
1596                 else       : color = colors_bgdark[id]
1597                 msg = '\033[3%sm%s\033[m'%(color,msg)
1598         print "++ %s: %s"%(ids[id],msg)
1599 def Readfile(file, remove_linebreaks=0, ignore_error=0):
1600         data = []
1601         if file == '-':
1602                 try: data = sys.stdin.readlines()
1603                 except:
1604                         if not ignore_error:
1605                                 Error(_('You must feed me with data on STDIN!'))
1606         else:
1607                 try: f = open(file); data = f.readlines() ; f.close()
1608                 except:
1609                         if not ignore_error:
1610                                 Error(_("Cannot read file:")+" %s"%file)
1611         if remove_linebreaks:
1612                 data = map(lambda x:re.sub('[\n\r]+$','',x), data)
1613         Message(_("File read (%d lines): %s")%(len(data),file),2)
1614         return data
1615 def Savefile(file, contents):
1616         try: f = open(file, 'wb')
1617         except: Error(_("Cannot open file for writing:")+" %s"%file)
1618         if type(contents) == type([]): doit = f.writelines
1619         else: doit = f.write
1620         doit(contents) ; f.close()
1621
1622 def showdic(dic):
1623         for k in dic.keys(): print "%15s : %s" % (k,dic[k])
1624 def dotted_spaces(txt=''):
1625         return string.replace(txt,' ','.')
1626
1627 # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
1628 def get_rc_path():
1629         "Return the full path for the users' RC file"
1630         # Try to get the path from an env var. if yes, we're done
1631         user_defined = os.environ.get('T2TCONFIG')
1632         if user_defined: return user_defined
1633         # Env var not found, so perform automatic path composing
1634         # Set default filename according system platform
1635         rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'}
1636         rc_file = rc_names.get(sys.platform[:3]) or rc_names['default']
1637         # The file must be on the user directory, but where is this dir?
1638         rc_dir_search = ['HOME', 'HOMEPATH']
1639         for var in rc_dir_search:
1640                 rc_dir = os.environ.get(var)
1641                 if rc_dir: break
1642         # rc dir found, now we must join dir+file to compose the full path
1643         if rc_dir:
1644                 # Compose path and return it if the file exists
1645                 rc_path = os.path.join(rc_dir, rc_file)
1646                 # On windows, prefix with the drive (%homedrive%: 2k/XP/NT)
1647                 if sys.platform[:3] == 'win':
1648                         rc_drive = os.environ.get('HOMEDRIVE')
1649                         rc_path = os.path.join(rc_drive,rc_path)
1650                 return rc_path
1651         # Sorry, not found
1652         return ''
1653
1654
1655
1656 ##############################################################################
1657
1658 class CommandLine:
1659         """
1660         Command Line class - Masters command line
1661
1662         This class checks and extract data from the provided command line.
1663         The --long options and flags are taken from the global OPTIONS,
1664         FLAGS and ACTIONS dictionaries. The short options are registered
1665         here, and also their equivalence to the long ones.
1666
1667         METHODS:
1668           _compose_short_opts() -> str
1669           _compose_long_opts() -> list
1670               Compose the valid short and long options list, on the
1671               'getopt' format.
1672
1673           parse() -> (opts, args)
1674               Call getopt to check and parse the command line.
1675               It expects to receive the command line as a list, and
1676               without the program name (sys.argv[1:]).
1677
1678           get_raw_config() -> [RAW config]
1679               Scans command line and convert the data to the RAW config
1680               format. See ConfigMaster class to the RAW format description.
1681               Optional 'ignore' and 'filter' arguments are used to filter
1682               in or out specified keys.
1683
1684           compose_cmdline(dict) -> [Command line]
1685               Compose a command line list from an already parsed config
1686               dictionary, generated from RAW by ConfigMaster(). Use
1687               this to compose an optimal command line for a group of
1688               options.
1689
1690         The get_raw_config() calls parse(), so the tipical use of this
1691         class is:
1692
1693             raw = CommandLine().get_raw_config(sys.argv[1:])
1694         """
1695         def __init__(self):
1696                 self.all_options = OPTIONS.keys()
1697                 self.all_flags   = FLAGS.keys()
1698                 self.all_actions = ACTIONS.keys()
1699
1700                 # short:long options equivalence
1701                 self.short_long = {
1702                   'h':'help'     ,   'V':'version',
1703                   'n':'enum-title',  'i':'infile' ,
1704                   'H':'no-headers',  'o':'outfile',
1705                   'v':'verbose'   ,  't':'target' ,
1706                   'q':'quiet'     ,  'C':'config-file'
1707                 }
1708
1709                 # Compose valid short and long options data for getopt
1710                 self.short_opts = self._compose_short_opts()
1711                 self.long_opts  = self._compose_long_opts()
1712
1713         def _compose_short_opts(self):
1714                 "Returns a string like 'hVt:o' with all short options/flags"
1715                 ret = []
1716                 for opt in self.short_long.keys():
1717                         long = self.short_long[opt]
1718                         if long in self.all_options: # is flag or option?
1719                                 opt = opt+':'        # option: have param
1720                         ret.append(opt)
1721                 #Debug('Valid SHORT options: %s'%ret)
1722                 return string.join(ret, '')
1723
1724         def _compose_long_opts(self):
1725                 "Returns a list with all the valid long options/flags"
1726                 ret = map(lambda x:x+'=', self.all_options)          # add =
1727                 ret.extend(self.all_flags)                           # flag ON
1728                 ret.extend(self.all_actions)                         # acts
1729                 ret.extend(map(lambda x:'no-'+x, self.all_flags))    # add no-*
1730                 ret.extend(['no-style','no-encoding'])               # turn OFF
1731                 ret.extend(['no-outfile','no-infile'])               # turn OFF
1732                 ret.extend(['no-dump-config', 'no-dump-source'])     # turn OFF
1733                 #Debug('Valid LONG options: %s'%ret)
1734                 return ret
1735
1736         def _tokenize(self, cmd_string=''):
1737                 "Convert a command line string to a list"
1738                 #TODO protect quotes contents -- Don't use it, pass cmdline as list
1739                 return string.split(cmd_string)
1740
1741         def parse(self, cmdline=[]):
1742                 "Check/Parse a command line list     TIP: no program name!"
1743                 # Get the valid options
1744                 short, long = self.short_opts, self.long_opts
1745                 # Parse it!
1746                 try:
1747                         opts, args = getopt.getopt(cmdline, short, long)
1748                 except getopt.error, errmsg:
1749                         Error(_("%s (try --help)")%errmsg)
1750                 return (opts, args)
1751
1752         def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0):
1753                 "Returns the options/arguments found as RAW config"
1754                 if not cmdline: return []
1755                 ret = []
1756                 # We need lists, not strings
1757                 if type(cmdline) in (type(''), type(u'')):
1758                         cmdline = self._tokenize(cmdline)
1759                 opts, args = self.parse(cmdline[:])
1760                 # Parse all options
1761                 for name,value in opts:
1762                         # Remove leading - and --
1763                         name = re.sub('^--?', '', name)
1764                         # Alias to old misspelled 'suGGar'
1765                         if   name ==    'css-suggar': name =    'css-sugar'
1766                         elif name == 'no-css-suggar': name = 'no-css-sugar'
1767                         # Translate short opt to long
1768                         if len(name) == 1: name = self.short_long.get(name)
1769                         # Outfile exception: path relative to PWD
1770                         if name == 'outfile' and relative \
1771                            and value not in [STDOUT, MODULEOUT]:
1772                                 value = os.path.abspath(value)
1773                         # config-file inclusion, path relative to PWD
1774                         if name == 'config-file':
1775                                 configs = ConfigLines().include_config_file(
1776                                           value)
1777                                 # Remove the 'target' item of all configs
1778                                 configs = map(lambda c: [c[1],c[2]], configs)
1779                                 ret.extend(configs)
1780                                 continue
1781                         # Save it
1782                         ret.append([name, value])
1783                 # Get infile, if any
1784                 while args:
1785                         infile = args.pop(0)
1786                         ret.append(['infile', infile])
1787                 # Apply 'ignore' and 'filter' rules (filter is stronger)
1788                 temp = ret[:] ; ret = []
1789                 for name,value in temp:
1790                         if (not filter and not ignore) or \
1791                            (filter and name in filter) or \
1792                            (ignore and name not in ignore):
1793                                 ret.append( ['all', name, value] )
1794                 # Add the original command line string as 'realcmdline'
1795                 ret.append( ['all', 'realcmdline', cmdline] )
1796                 return ret
1797
1798         def compose_cmdline(self, conf={}, no_check=0):
1799                 "compose a full (and diet) command line from CONF dict"
1800                 if not conf: return []
1801                 args = []
1802                 dft_options = OPTIONS.copy()
1803                 cfg = conf.copy()
1804                 valid_opts = self.all_options + self.all_flags
1805                 use_short = {'no-headers':'H', 'enum-title':'n'}
1806                 # Remove useless options
1807                 if not no_check and cfg.get('toc-only'):
1808                         if cfg.has_key('no-headers'):
1809                                 del cfg['no-headers']
1810                         if cfg.has_key('outfile'):
1811                                 del cfg['outfile']      # defaults to STDOUT
1812                         if cfg.get('target') == 'txt':
1813                                 del cfg['target']       # already default
1814                         args.append('--toc-only')  # must be the first
1815                         del cfg['toc-only']
1816                 # Add target type
1817                 if cfg.has_key('target'):
1818                         args.append('-t '+cfg['target'])
1819                         del cfg['target']
1820                 # Add other options
1821                 for key in cfg.keys():
1822                         if key not in valid_opts: continue  # may be a %!setting
1823                         if key == 'outfile' or key == 'infile': continue # later
1824                         val = cfg[key]
1825                         if not val: continue
1826                         # Default values are useless on cmdline
1827                         if val == dft_options.get(key): continue
1828                         # -short format
1829                         if key in use_short.keys():
1830                                 args.append('-'+use_short[key])
1831                                 continue
1832                         # --long format
1833                         if key in self.all_flags: # add --option
1834                                 args.append('--'+key)
1835                         else:                     # add --option=value
1836                                 args.append('--%s=%s'%(key,val))
1837                 # The outfile using -o
1838                 if cfg.has_key('outfile') and \
1839                    cfg['outfile'] != dft_options.get('outfile'):
1840                         args.append('-o '+cfg['outfile'])
1841                 # Place input file(s) always at the end
1842                 if cfg.has_key('infile'):
1843                         args.append(string.join(cfg['infile'],' '))
1844                 # Return as a nice list
1845                 Debug("Diet command line: %s"%string.join(args,' '), 1)
1846                 return args
1847
1848 ##############################################################################
1849
1850 class SourceDocument:
1851         """
1852         SourceDocument class - scan document structure, extract data
1853
1854         It knows about full files. It reads a file and identify all
1855         the areas begining (Head,Conf,Body). With this info it can
1856         extract each area contents.
1857         Note: the original line break is removed.
1858
1859         DATA:
1860           self.arearef - Save Head, Conf, Body init line number
1861           self.areas   - Store the area names which are not empty
1862           self.buffer  - The full file contents (with NO \\r, \\n)
1863
1864         METHODS:
1865           get()   - Access the contents of an Area. Example:
1866                     config = SourceDocument(file).get('conf')
1867
1868           split() - Get all the document Areas at once. Example:
1869                     head, conf, body = SourceDocument(file).split()
1870
1871         RULES:
1872             * The document parts are sequential: Head, Conf and Body.
1873             * One ends when the next begins.
1874             * The Conf Area is optional, so a document can have just
1875               Head and Body Areas.
1876
1877             These are the Areas limits:
1878               - Head Area: the first three lines
1879               - Body Area: from the first valid text line to the end
1880               - Conf Area: the comments between Head and Body Areas
1881
1882             Exception: If the first line is blank, this means no
1883             header info, so the Head Area is just the first line.
1884         """
1885         def __init__(self, filename='', contents=[]):
1886                 self.areas = ['head','conf','body']
1887                 self.arearef = []
1888                 self.areas_fancy = ''
1889                 self.filename = filename
1890                 self.buffer = []
1891                 if filename:
1892                         self.scan_file(filename)
1893                 elif contents:
1894                         self.scan(contents)
1895
1896         def split(self):
1897                 "Returns all document parts, splitted into lists."
1898                 return self.get('head'), self.get('conf'), self.get('body')
1899
1900         def get(self, areaname):
1901                 "Returns head|conf|body contents from self.buffer"
1902                 # Sanity
1903                 if areaname not in self.areas: return []
1904                 if not self.buffer           : return []
1905                 # Go get it
1906                 bufini = 1
1907                 bufend = len(self.buffer)
1908                 if   areaname == 'head':
1909                         ini = bufini
1910                         end = self.arearef[1] or self.arearef[2] or bufend
1911                 elif areaname == 'conf':
1912                         ini = self.arearef[1]
1913                         end = self.arearef[2] or bufend
1914                 elif areaname == 'body':
1915                         ini = self.arearef[2]
1916                         end = bufend
1917                 else:
1918                         Error("Unknown Area name '%s'"%areaname)
1919                 lines = self.buffer[ini:end]
1920                 # Make sure head will always have 3 lines
1921                 while areaname == 'head' and len(lines) < 3:
1922                         lines.append('')
1923                 return lines
1924
1925         def scan_file(self, filename):
1926                 Debug("source file: %s"%filename)
1927                 Message(_("Loading source document"),1)
1928                 buf = Readfile(filename, remove_linebreaks=1)
1929                 self.scan(buf)
1930
1931         def scan(self, lines):
1932                 "Run through source file and identify head/conf/body areas"
1933                 buf = lines
1934                 if len(buf) == 0:
1935                         Error(_('The input file is empty: %s')%self.filename)
1936                 cfg_parser = ConfigLines().parse_line
1937                 buf.insert(0, '')                         # text start at pos 1
1938                 ref = [1,4,0]
1939                 if not string.strip(buf[1]):              # no header
1940                         ref[0] = 0 ; ref[1] = 2
1941                 rgx = getRegexes()
1942                 on_comment_block = 0
1943                 for i in xrange(ref[1],len(buf)):         # find body init:
1944                         # Handle comment blocks inside config area
1945                         if not on_comment_block \
1946                            and rgx['blockCommentOpen'].search(buf[i]):
1947                                 on_comment_block = 1
1948                                 continue
1949                         if on_comment_block \
1950                            and rgx['blockCommentOpen'].search(buf[i]):
1951                                 on_comment_block = 0
1952                                 continue
1953                         if on_comment_block: continue
1954
1955                         if string.strip(buf[i]) and (     # ... not blank and
1956                            buf[i][0] != '%' or            # ... not comment or
1957                            rgx['macros'].match(buf[i]) or # ... %%macro
1958                            rgx['toc'].match(buf[i])    or # ... %%toc
1959                            cfg_parser(buf[i],'include')[1]): # ... %!include
1960                                 ref[2] = i ; break
1961                 if ref[1] == ref[2]: ref[1] = 0           # no conf area
1962                 for i in 0,1,2:                           # del !existent
1963                         if ref[i] >= len(buf): ref[i] = 0 # title-only
1964                         if not ref[i]: self.areas[i] = ''
1965                 Debug('Head,Conf,Body start line: %s'%ref)
1966                 self.arearef = ref                        # save results
1967                 self.buffer  = buf
1968                 # Fancyness sample: head conf body (1 4 8)
1969                 self.areas_fancy = "%s (%s)"%(
1970                      string.join(self.areas),
1971                      string.join(map(str, map(lambda x:x or '', ref))))
1972                 Message(_("Areas found: %s")%self.areas_fancy, 2)
1973
1974         def get_raw_config(self):
1975                 "Handy method to get the CONF area RAW config (if any)"
1976                 if not self.areas.count('conf'): return []
1977                 Message(_("Scanning source document CONF area"),1)
1978                 raw = ConfigLines(
1979                       file=self.filename, lines=self.get('conf'),
1980                       first_line=self.arearef[1]).get_raw_config()
1981                 Debug("document raw config: %s"%raw, 1)
1982                 return raw
1983
1984 ##############################################################################
1985
1986 class ConfigMaster:
1987         """
1988         ConfigMaster class - the configuration wizard
1989
1990         This class is the configuration master. It knows how to handle
1991         the RAW and PARSED config format. It also performs the sanity
1992         checking for a given configuration.
1993
1994         DATA:
1995           self.raw         - Stores the config on the RAW format
1996           self.parsed      - Stores the config on the PARSED format
1997           self.defaults    - Stores the default values for all keys
1998           self.off         - Stores the OFF values for all keys
1999           self.multi       - List of keys which can have multiple values
2000           self.numeric     - List of keys which value must be a number
2001           self.incremental - List of keys which are incremental
2002
2003         RAW FORMAT:
2004           The RAW format is a list of lists, being each mother list item
2005           a full configuration entry. Any entry is a 3 item list, on
2006           the following format: [ TARGET, KEY, VALUE ]
2007           Being a list, the order is preserved, so it's easy to use
2008           different kinds of configs, as CONF area and command line,
2009           respecting the precedence.
2010           The special target 'all' is used when no specific target was
2011           defined on the original config.
2012
2013         PARSED FORMAT:
2014           The PARSED format is a dictionary, with all the 'key : value'
2015           found by reading the RAW config. The self.target contents
2016           matters, so this dictionary only contains the target's
2017           config. The configs of other targets are ignored.
2018
2019         The CommandLine and ConfigLines classes have the get_raw_config()
2020         method which convert the configuration found to the RAW format.
2021         Just feed it to parse() and get a brand-new ready-to-use config
2022         dictionary. Example:
2023
2024             >>> raw = CommandLine().get_raw_config(['-n', '-H'])
2025             >>> print raw
2026             [['all', 'enum-title', ''], ['all', 'no-headers', '']]
2027             >>> parsed = ConfigMaster(raw).parse()
2028             >>> print parsed
2029             {'enum-title': 1, 'headers': 0}
2030         """
2031         def __init__(self, raw=[], target=''):
2032                 self.raw          = raw
2033                 self.target       = target
2034                 self.parsed       = {}
2035                 self.dft_options  = OPTIONS.copy()
2036                 self.dft_flags    = FLAGS.copy()
2037                 self.dft_actions  = ACTIONS.copy()
2038                 self.dft_settings = SETTINGS.copy()
2039                 self.defaults     = self._get_defaults()
2040                 self.off          = self._get_off()
2041                 self.incremental  = ['verbose']
2042                 self.numeric      = ['toc-level','split']
2043                 self.multi        = ['infile', 'preproc', 'postproc',
2044                                      'options', 'style']
2045
2046         def _get_defaults(self):
2047                 "Get the default values for all config/options/flags"
2048                 empty = {}
2049                 for kw in CONFIG_KEYWORDS: empty[kw] = ''
2050                 empty.update(self.dft_options)
2051                 empty.update(self.dft_flags)
2052                 empty.update(self.dft_actions)
2053                 empty.update(self.dft_settings)
2054                 empty['realcmdline'] = ''  # internal use only
2055                 empty['sourcefile']  = ''  # internal use only
2056                 return empty
2057
2058         def _get_off(self):
2059                 "Turns OFF all the config/options/flags"
2060                 off = {}
2061                 for key in self.defaults.keys():
2062                         kind = type(self.defaults[key])
2063                         if kind == type(9):
2064                                 off[key] = 0
2065                         elif kind == type('') or kind == type(u''):
2066                                 off[key] = ''
2067                         elif kind == type([]):
2068                                 off[key] = []
2069                         else:
2070                                 Error('ConfigMaster: %s: Unknown type'+key)
2071                 return off
2072
2073         def _check_target(self):
2074                 "Checks if the target is already defined. If not, do it"
2075                 if not self.target:
2076                         self.target = self.find_value('target')
2077
2078         def get_target_raw(self):
2079                 "Returns the raw config for self.target or 'all'"
2080                 ret = []
2081                 self._check_target()
2082                 for entry in self.raw:
2083                         if entry[0] == self.target or entry[0] == 'all':
2084                                 ret.append(entry)
2085                 return ret
2086
2087         def add(self, key, val):
2088                 "Adds the key:value pair to the config dictionary (if needed)"
2089                 # %!options
2090                 if key == 'options':
2091                         ignoreme = self.dft_actions.keys() + ['target']
2092                         ignoreme.remove('dump-config')
2093                         ignoreme.remove('dump-source')
2094                         raw_opts = CommandLine().get_raw_config(
2095                                      val, ignore=ignoreme)
2096                         for target, key, val in raw_opts:
2097                                 self.add(key, val)
2098                         return
2099                 # The no- prefix turns OFF this key
2100                 if key[:3] == 'no-':
2101                         key = key[3:]              # remove prefix
2102                         val = self.off.get(key)    # turn key OFF
2103                 # Is this key valid?
2104                 if key not in self.defaults.keys():
2105                         Debug('Bogus Config %s:%s'%(key,val),1)
2106                         return
2107                 # Is this value the default one?
2108                 if val == self.defaults.get(key):
2109                         # If default value, remove previous key:val
2110                         if self.parsed.has_key(key):
2111                                 del self.parsed[key]
2112                         # Nothing more to do
2113                         return
2114                 # Flags ON comes empty. we'll add the 1 value now
2115                 if val == '' and (
2116                    key in self.dft_flags.keys() or
2117                    key in self.dft_actions.keys()):
2118                         val = 1
2119                 # Multi value or single?
2120                 if key in self.multi:
2121                         # First one? start new list
2122                         if not self.parsed.has_key(key):
2123                                 self.parsed[key] = []
2124                         self.parsed[key].append(val)
2125                 # Incremental value? so let's add it
2126                 elif key in self.incremental:
2127                         self.parsed[key] = (self.parsed.get(key) or 0) + val
2128                 else:
2129                         self.parsed[key] = val
2130                 fancykey = dotted_spaces("%12s"%key)
2131                 Message(_("Added config %s : %s")%(fancykey,val),3)
2132
2133         def get_outfile_name(self, config={}):
2134                 "Dirname is the same for {in,out}file"
2135                 infile, outfile = config['sourcefile'], config['outfile']
2136                 if outfile and outfile not in (STDOUT, MODULEOUT) \
2137                    and not os.path.isabs(outfile):
2138                         outfile = os.path.join(os.path.dirname(infile), outfile)
2139                 if infile == STDIN    and not outfile: outfile = STDOUT
2140                 if infile == MODULEIN and not outfile: outfile = MODULEOUT
2141                 if not outfile and (infile and config.get('target')):
2142                         basename = re.sub('\.(txt|t2t)$','',infile)
2143                         outfile = "%s.%s"%(basename, config['target'])
2144                 Debug(" infile: '%s'"%infile , 1)
2145                 Debug("outfile: '%s'"%outfile, 1)
2146                 return outfile
2147
2148         def sanity(self, config, gui=0):
2149                 "Basic config sanity checking"
2150                 if not config: return {}
2151                 target = config.get('target')
2152                 # Some actions don't require target specification
2153                 if not target:
2154                         for action in NO_TARGET:
2155                                 if config.get(action):
2156                                         target = 'txt'
2157                                         break
2158                 # On GUI, some checking are skipped
2159                 if not gui:
2160                         # We *need* a target
2161                         if not target:
2162                                 Error(_('No target specified (try --help)')+\
2163                                 '\n\n'+\
2164                                 _('Maybe trying to convert an old v1.x file?'))
2165                         # And of course, an infile also
2166                         if not config.get('infile'):
2167                                 Error(_('Missing input file (try --help)'))
2168                         # Is the target valid?
2169                         if not TARGETS.count(target):
2170                                 Error(_("Invalid target '%s' (try --help)")%\
2171                                      target)
2172                 # Ensure all keys are present
2173                 empty = self.defaults.copy() ; empty.update(config)
2174                 config = empty.copy()
2175                 # Check integers options
2176                 for key in config.keys():
2177                         if key in self.numeric:
2178                                 try: config[key] = int(config[key])
2179                                 except: Error(_('--%s value must be a number'
2180                                                 )%key)
2181                 # Check split level value
2182                 if config['split'] not in (0,1,2):
2183                         Error(_('Option --split must be 0, 1 or 2'))
2184                 # --toc-only is stronger than others
2185                 if config['toc-only']:
2186                         config['headers'] = 0
2187                         config['toc']     = 0
2188                         config['split']   = 0
2189                         config['gui']     = 0
2190                         config['outfile'] = config['outfile'] or STDOUT
2191                 # Splitting is disable for now (future: HTML only, no STDOUT)
2192                 config['split'] = 0
2193                 # Restore target
2194                 config['target'] = target
2195                 # Set output file name
2196                 config['outfile'] = self.get_outfile_name(config)
2197                 # Checking suicide
2198                 if config['sourcefile'] == config['outfile'] and \
2199                    config['outfile'] not in [STDOUT,MODULEOUT] and not gui:
2200                         Error(_("Input and Output files are the same: %s")%(
2201                         config['outfile']))
2202                 return config
2203
2204         def parse(self):
2205                 "Returns the parsed config for the current target"
2206                 raw = self.get_target_raw()
2207                 for target, key, value in raw:
2208                         self.add(key, value)
2209                 Message(_("Added the following keys: %s")%string.join(
2210                          self.parsed.keys(),', '),2)
2211                 return self.parsed.copy()
2212
2213         def find_value(self, key='', target=''):
2214                 "Scans ALL raw config to find the desired key"
2215                 ret = []
2216                 # Scan and save all values found
2217                 for targ, k, val in self.raw:
2218                         if k == key and (targ == target or targ == 'all'):
2219                                 ret.append(val)
2220                 if not ret: return ''
2221                 # If not multi value, return only the last found
2222                 if key in self.multi: return ret
2223                 else                : return ret[-1]
2224
2225 ########################################################################
2226
2227 class ConfigLines:
2228         """
2229         ConfigLines class - the config file data extractor
2230
2231         This class reads and parse the config lines on the %!key:val
2232         format, converting it to RAW config. It deals with user
2233         config file (RC file), source document CONF area and
2234         %!includeconf directives.
2235
2236         Call it passing a file name or feed the desired config lines.
2237         Then just call the get_raw_config() method and wait to
2238         receive the full config data on the RAW format. This method
2239         also follows the possible %!includeconf directives found on
2240         the config lines. Example:
2241
2242             raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
2243
2244         The parse_line() method is also useful to be used alone,
2245         to identify and tokenize a single config line. For example,
2246         to get the %!include command components, on the source
2247         document BODY:
2248
2249             target, key, value = ConfigLines().parse_line(body_line)
2250         """
2251         def __init__(self, file='', lines=[], first_line=1):
2252                 self.file = file or 'NOFILE'
2253                 self.lines = lines
2254                 self.first_line = first_line
2255
2256         def load_lines(self):
2257                 "Make sure we've loaded the file contents into buffer"
2258                 if not self.lines and not self.file:
2259                         Error("ConfigLines: No file or lines provided")
2260                 if not self.lines:
2261                         self.lines = self.read_config_file(self.file)
2262
2263         def read_config_file(self, filename=''):
2264                 "Read a Config File contents, aborting on invalid line"
2265                 if not filename: return []
2266                 errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
2267                 lines = Readfile(filename, remove_linebreaks=1)
2268                 # Sanity: try to find invalid config lines
2269                 for i in xrange(len(lines)):
2270                         line = string.rstrip(lines[i])
2271                         if not line: continue  # empty
2272                         if line[0] != '%': Error(errormsg%(filename,i+1,line))
2273                 return lines
2274
2275         def include_config_file(self, file=''):
2276                 "Perform the %!includeconf action, returning RAW config"
2277                 if not file: return []
2278                 # Current dir relative to the current file (self.file)
2279                 current_dir = os.path.dirname(self.file)
2280                 file = os.path.join(current_dir, file)
2281                 # Read and parse included config file contents
2282                 lines = self.read_config_file(file)
2283                 return ConfigLines(file=file, lines=lines).get_raw_config()
2284
2285         def get_raw_config(self):
2286                 "Scan buffer and extract all config as RAW (including includes)"
2287                 ret = []
2288                 self.load_lines()
2289                 first = self.first_line
2290                 for i in xrange(len(self.lines)):
2291                         line = self.lines[i]
2292                         Message(_("Processing line %03d: %s")%(first+i,line),2)
2293                         target, key, val = self.parse_line(line)
2294                         if not key: continue    # no config on this line
2295                         if key == 'includeconf':
2296                                 err = _('A file cannot include itself (loop!)')
2297                                 if val == self.file:
2298                                         Error("%s: %%!includeconf: %s"%(
2299                                                 err, self.file))
2300                                 more_raw = self.include_config_file(val)
2301                                 ret.extend(more_raw)
2302                                 Message(_("Finished Config file inclusion: %s"
2303                                           )%(val),2)
2304                         else:
2305                                 ret.append([target, key, val])
2306                                 Message(_("Added %s")%key,3)
2307                 return ret
2308
2309         def parse_line(self, line='', keyname='', target=''):
2310                 "Detects %!key:val config lines and extract data from it"
2311                 empty = ['', '', '']
2312                 if not line: return empty
2313                 no_target = ['target', 'includeconf']
2314                 re_name   = keyname or '[a-z]+'
2315                 re_target = target  or '[a-z]*'
2316                 # XXX TODO <value>\S.+?  requires TWO chars, breaks %!include:a
2317                 cfgregex  = re.compile("""
2318                   ^%%!\s*               # leading id with opt spaces
2319                   (?P<name>%s)\s*       # config name
2320                   (\((?P<target>%s)\))? # optional target spec inside ()
2321                   \s*:\s*               # key:value delimiter with opt spaces
2322                   (?P<value>\S.+?)      # config value
2323                   \s*$                  # rstrip() spaces and hit EOL
2324                   """%(re_name,re_target), re.I+re.VERBOSE)
2325                 prepostregex = re.compile("""
2326                                         # ---[ PATTERN ]---
2327                   ^( "([^"]*)"          # "double quoted" or
2328                    | '([^']*)'          # 'single quoted' or
2329                    | ([^\s]+)           # single_word
2330                    )
2331                     \s+                 # separated by spaces
2332
2333                                         # ---[ REPLACE ]---
2334                        ( "([^"]*)"      # "double quoted" or
2335                        | '([^']*)'      # 'single quoted' or
2336                        | (.*)           # anything
2337                            )
2338                             \s*$
2339                   """, re.VERBOSE)
2340                 guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
2341                 match = cfgregex.match(line)
2342                 if not match: return empty
2343
2344                 name   = string.lower(match.group('name') or '')
2345                 target = string.lower(match.group('target') or 'all')
2346                 value  = match.group('value')
2347
2348                 # NO target keywords: force all targets
2349                 if name in no_target: target = 'all'
2350
2351                 # Special config for GUI colors
2352                 if name == 'guicolors':
2353                         valmatch = guicolors.search(value)
2354                         if not valmatch: return empty
2355                         value = re.split('\s+', value)
2356
2357                 # Special config with two quoted values (%!preproc: "foo" 'bar')
2358                 if name == 'preproc' or name == 'postproc':
2359                         valmatch = prepostregex.search(value)
2360                         if not valmatch: return empty
2361                         getval = valmatch.group
2362                         patt   = getval(2) or getval(3) or getval(4) or ''
2363                         repl   = getval(6) or getval(7) or getval(8) or ''
2364                         value  = (patt, repl)
2365                 return [target, name, value]
2366
2367 ##############################################################################
2368
2369 class MaskMaster:
2370         "(Un)Protect important structures from escaping and formatting"
2371         def __init__(self):
2372                 self.linkmask  = 'vvvLINKvvv'
2373                 self.monomask  = 'vvvMONOvvv'
2374                 self.macromask = 'vvvMACROvvv'
2375                 self.rawmask   = 'vvvRAWvvv'
2376                 self.tocmask   = 'vvvTOCvvv'
2377                 self.macroman  = MacroMaster()
2378                 self.reset()
2379
2380         def reset(self):
2381                 self.linkbank = []
2382                 self.monobank = []
2383                 self.macrobank = []
2384                 self.rawbank = []
2385
2386         def mask(self, line=''):
2387                 global AUTOTOC
2388
2389                 # Protect raw text
2390                 while regex['raw'].search(line):
2391                         txt = regex['raw'].search(line).group(1)
2392                         txt = doEscape(TARGET,txt)
2393                         self.rawbank.append(txt)
2394                         line = regex['raw'].sub(self.rawmask,line,1)
2395
2396                 # Protect pre-formatted font text
2397                 while regex['fontMono'].search(line):
2398                         txt = regex['fontMono'].search(line).group(1)
2399                         txt = doEscape(TARGET,txt)
2400                         self.monobank.append(txt)
2401                         line = regex['fontMono'].sub(self.monomask,line,1)
2402
2403                 # Protect macros
2404                 while regex['macros'].search(line):
2405                         txt = regex['macros'].search(line).group()
2406                         self.macrobank.append(txt)
2407                         line = regex['macros'].sub(self.macromask,line,1)
2408
2409                 # Protect TOC location
2410                 while regex['toc'].search(line):
2411                         line = regex['toc'].sub(self.tocmask,line)
2412                         AUTOTOC = 0
2413
2414                 # Protect URLs and emails
2415                 while regex['linkmark'].search(line) or \
2416                       regex['link'    ].search(line):
2417
2418                         # Try to match plain or named links
2419                         match_link  = regex['link'].search(line)
2420                         match_named = regex['linkmark'].search(line)
2421
2422                         # Define the current match
2423                         if match_link and match_named:
2424                                 # Both types found, which is the first?
2425                                 m = match_link
2426                                 if match_named.start() < match_link.start():
2427                                         m = match_named
2428                         else:
2429                                 # Just one type found, we're fine
2430                                 m = match_link or match_named
2431
2432                         # Extract link data and apply mask
2433                         if m == match_link:              # plain link
2434                                 link = m.group()
2435                                 label = ''
2436                                 link_re = regex['link']
2437                         else:                            # named link
2438                                 link = m.group('link')
2439                                 label = string.rstrip(m.group('label'))
2440                                 link_re = regex['linkmark']
2441                         line = link_re.sub(self.linkmask,line,1)
2442
2443                         # Save link data to the link bank
2444                         self.linkbank.append((label, link))
2445                 return line
2446
2447         def undo(self, line):
2448
2449                 # url & email
2450                 for label,url in self.linkbank:
2451                         link = get_tagged_link(label, url)
2452                         line = string.replace(line, self.linkmask, link, 1)
2453
2454                 # Expand macros
2455                 for macro in self.macrobank:
2456                         macro = self.macroman.expand(macro)
2457                         line = string.replace(line, self.macromask, macro, 1)
2458
2459                 # Expand verb
2460                 for mono in self.monobank:
2461                         open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
2462                         tagged = open+mono+close
2463                         line = string.replace(line, self.monomask, tagged, 1)
2464
2465                 # Expand raw
2466                 for raw in self.rawbank:
2467                         line = string.replace(line, self.rawmask, raw, 1)
2468
2469                 return line
2470
2471
2472 ##############################################################################
2473
2474
2475 class TitleMaster:
2476         "Title things"
2477         def __init__(self):
2478                 self.count = ['',0,0,0,0,0]
2479                 self.toc   = []
2480                 self.level = 0
2481                 self.kind  = ''
2482                 self.txt   = ''
2483                 self.label = ''
2484                 self.tag   = ''
2485                 self.tag_hold = []
2486                 self.last_level = 0
2487                 self.count_id = ''
2488                 self.user_labels = {}
2489                 self.anchor_count = 0
2490                 self.anchor_prefix = 'toc'
2491
2492         def _open_close_blocks(self):
2493                 "Open new title blocks, closing the previous (if any)"
2494                 if not rules['titleblocks']: return
2495                 tag = ''
2496                 last = self.last_level
2497                 curr = self.level
2498
2499                 # Same level, just close the previous
2500                 if curr == last:
2501                         tag = TAGS.get('title%dClose'%last)
2502                         if tag: self.tag_hold.append(tag)
2503
2504                 # Section -> subsection, more depth
2505                 while curr > last:
2506                         last = last + 1
2507
2508                         # Open the new block of subsections
2509                         tag = TAGS.get('blockTitle%dOpen'%last)
2510                         if tag: self.tag_hold.append(tag)
2511
2512                         # Jump from title1 to title3 or more
2513                         # Fill the gap with an empty section
2514                         if curr - last > 0:
2515                                 tag = TAGS.get('title%dOpen'%last)
2516                                 tag = regex['x'].sub('', tag)      # del \a
2517                                 if tag: self.tag_hold.append(tag)
2518
2519                 # Section <- subsection, less depth
2520                 while curr < last:
2521                         # Close the current opened subsection
2522                         tag = TAGS.get('title%dClose'%last)
2523                         if tag: self.tag_hold.append(tag)
2524
2525                         # Close the current opened block of subsections
2526                         tag = TAGS.get('blockTitle%dClose'%last)
2527                         if tag: self.tag_hold.append(tag)
2528
2529                         last = last - 1
2530
2531                         # Close the previous section of the same level
2532                         # The subsections were under it
2533                         if curr == last:
2534                                 tag = TAGS.get('title%dClose'%last)
2535                                 if tag: self.tag_hold.append(tag)
2536
2537         def add(self, line):
2538                 "Parses a new title line."
2539                 if not line: return
2540                 self._set_prop(line)
2541                 self._open_close_blocks()
2542                 self._set_count_id()
2543                 self._set_label()
2544                 self._save_toc_info()
2545
2546         def close_all(self):
2547                 "Closes all opened title blocks"
2548                 ret = []
2549                 ret.extend(self.tag_hold)
2550                 while self.level:
2551                         tag = TAGS.get('title%dClose'%self.level)
2552                         if tag: ret.append(tag)
2553                         tag = TAGS.get('blockTitle%dClose'%self.level)
2554                         if tag: ret.append(tag)
2555                         self.level = self.level - 1
2556                 return ret
2557
2558         def _save_toc_info(self):
2559                 "Save TOC info, used by self.dump_marked_toc()"
2560                 self.toc.append((self.level, self.count_id,
2561                                  self.txt  , self.label   ))
2562
2563         def _set_prop(self, line=''):
2564                 "Extract info from original line and set data holders."
2565                 # Detect title type (numbered or not)
2566                 id = string.lstrip(line)[0]
2567                 if   id == '=': kind = 'title'
2568                 elif id == '+': kind = 'numtitle'
2569                 else: Error("Unknown Title ID '%s'"%id)
2570                 # Extract line info
2571                 match = regex[kind].search(line)
2572                 level = len(match.group('id'))
2573                 txt   = string.strip(match.group('txt'))
2574                 label = match.group('label')
2575                 # Parse info & save
2576                 if CONF['enum-title']: kind = 'numtitle'  # force
2577                 if rules['titleblocks']:
2578                         self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \
2579                                    TAGS.get('title%dOpen'%level)
2580                 else:
2581                         self.tag = TAGS.get(kind+`level`) or \
2582                                    TAGS.get('title'+`level`)
2583                 self.last_level = self.level
2584                 self.kind  = kind
2585                 self.level = level
2586                 self.txt   = txt
2587                 self.label = label
2588
2589         def _set_count_id(self):
2590                 "Compose and save the title count identifier (if needed)."
2591                 count_id = ''
2592                 if self.kind == 'numtitle' and not rules['autonumbertitle']:
2593                         # Manually increase title count
2594                         self.count[self.level] = self.count[self.level] +1
2595                         # Reset sublevels count (if any)
2596                         max_levels = len(self.count)
2597                         if self.level < max_levels-1:
2598                                 for i in xrange(self.level+1, max_levels):
2599                                         self.count[i] = 0
2600                         # Compose count id from hierarchy
2601                         for i in xrange(self.level):
2602                                 count_id= "%s%d."%(count_id, self.count[i+1])
2603                 self.count_id = count_id
2604
2605         def _set_label(self):
2606                 "Compose and save title label, used by anchors."
2607                 # Remove invalid chars from label set by user
2608                 self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
2609                 # Generate name as 15 first :alnum: chars
2610                 #TODO how to translate safely accented chars to plain?
2611                 #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
2612                 # 'tocN' label - sequential count, ignoring 'toc-level'
2613                 #self.label = self.anchor_prefix + str(len(self.toc)+1)
2614
2615         def _get_tagged_anchor(self):
2616                 "Return anchor if user defined a label, or TOC is on."
2617                 ret = ''
2618                 label = self.label
2619                 if CONF['toc'] and self.level <= CONF['toc-level']:
2620                         # This count is needed bcos self.toc stores all
2621                         # titles, regardless of the 'toc-level' setting,
2622                         # so we can't use self.toc length to number anchors
2623                         self.anchor_count = self.anchor_count + 1
2624                         # Autonumber label (if needed)
2625                         label = label or '%s%s'%(
2626                                 self.anchor_prefix, self.anchor_count)
2627                 if label and TAGS['anchor']:
2628                         ret = regex['x'].sub(label,TAGS['anchor'])
2629                 return ret
2630
2631         def _get_full_title_text(self):
2632                 "Returns the full title contents, already escaped."
2633                 ret = self.txt
2634                 # Insert count_id (if any) before text
2635                 if self.count_id:
2636                         ret = '%s %s'%(self.count_id, ret)
2637                 # Escape specials
2638                 ret = doEscape(TARGET, ret)
2639                 # Same targets needs final escapes on title lines
2640                 # It's here because there is a 'continue' after title
2641                 if rules['finalescapetitle']:
2642                         ret = doFinalEscape(TARGET, ret)
2643                 return ret
2644
2645         def get(self):
2646                 "Returns the tagged title as a list."
2647                 ret = []
2648
2649                 # Maybe some anchoring before?
2650                 anchor = self._get_tagged_anchor()
2651                 self.tag = regex['_anchor'].sub(anchor, self.tag)
2652
2653                 ### Compose & escape title text (TOC uses unescaped)
2654                 full_title = self._get_full_title_text()
2655
2656                 # Close previous section area
2657                 ret.extend(self.tag_hold)
2658                 self.tag_hold = []
2659
2660                 tagged = regex['x'].sub(full_title, self.tag)
2661
2662                 if rules['breaktitleopen']:
2663                         tagged = tagged + LB
2664
2665                 # Adds "underline" on TXT target
2666                 if TARGET == 'txt':
2667                         ret.append('') # blank line before
2668                         ret.append(tagged)
2669                         # Get the right letter count for UTF
2670                         if string.lower(CONF['encoding']) == 'utf-8':
2671                                 i = len(full_title.decode('utf-8'))
2672                         else:
2673                                 i = len(full_title)
2674                         ret.append(regex['x'].sub('='*i, self.tag))
2675                         ret.append('') # blank line after
2676                 else:
2677                         ret.append(tagged)
2678                 return ret
2679
2680         def dump_marked_toc(self, max_level=99):
2681                 "Dumps all toc itens as a valid t2t markup list"
2682                 #TODO maybe use quote+linebreaks instead lists
2683                 ret = []
2684                 toc_count = 1
2685                 for level, count_id, txt, label in self.toc:
2686                         if level > max_level: continue   # ignore
2687                         indent = '  '*level
2688                         id_txt = string.lstrip('%s %s'%(count_id, txt))
2689                         label = label or self.anchor_prefix+`toc_count`
2690                         toc_count = toc_count + 1
2691                         # TOC will have links
2692                         if TAGS['anchor']:
2693                                 # TOC is more readable with master topics
2694                                 # not linked at number. This is a stoled
2695                                 # idea from Windows .CHM help files
2696                                 if CONF['enum-title'] and level == 1:
2697                                         tocitem = '%s+ [""%s"" #%s]'%(
2698                                                   indent, txt, label)
2699                                 else:
2700                                         tocitem = '%s- [""%s"" #%s]'%(
2701                                                   indent, id_txt, label)
2702                         # No links on TOC, just text
2703                         else:
2704                                 # man don't reformat TOC lines, cool!
2705                                 if TARGET == 'txt' or TARGET == 'man':
2706                                         tocitem = '%s""%s""' %(
2707                                                   indent, id_txt)
2708                                 else:
2709                                         tocitem = '%s- ""%s""'%(
2710                                                   indent, id_txt)
2711                         ret.append(tocitem)
2712                 return ret
2713
2714
2715 ##############################################################################
2716
2717 #TODO check all this table mess
2718 # Trata linhas TABLE, com as prop do parse_row
2719 # O metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
2720 class TableMaster:
2721         def __init__(self, line=''):
2722                 self.rows      = []
2723                 self.border    = 0
2724                 self.align     = 'Left'
2725                 self.cellalign = []
2726                 self.cellspan  = []
2727                 if line:
2728                         prop = self.parse_row(line)
2729                         self.border    = prop['border']
2730                         self.align     = prop['align']
2731                         self.cellalign = prop['cellalign']
2732                         self.cellspan  = prop['cellspan']
2733
2734         def _get_open_tag(self):
2735                 topen     = TAGS['tableOpen']
2736                 tborder   = TAGS['_tableBorder']
2737                 talign    = TAGS['_tableAlign'+self.align]
2738                 calignsep = TAGS['tableColAlignSep']
2739                 calign    = ''
2740
2741                 # The first line defines if table has border or not
2742                 if not self.border: tborder = ''
2743                 # Set the columns alignment
2744                 if rules['tablecellaligntype'] == 'column':
2745                         calign = map(lambda x: TAGS['_tableColAlign%s'%x],
2746                                      self.cellalign)
2747                         calign = string.join(calign, calignsep)
2748                 # Align full table, set border and Column align (if any)
2749                 topen = regex['_tableAlign'   ].sub(talign , topen)
2750                 topen = regex['_tableBorder'  ].sub(tborder, topen)
2751                 topen = regex['_tableColAlign'].sub(calign , topen)
2752                 # Tex table spec, border or not: {|l|c|r|} , {lcr}
2753                 if calignsep and not self.border:
2754                         # Remove cell align separator
2755                         topen = string.replace(topen, calignsep, '')
2756                 return topen
2757
2758         def _get_cell_align(self, cells):
2759                 ret = []
2760                 for cell in cells:
2761                         align = 'Left'
2762                         if string.strip(cell):
2763                                 if cell[0] == ' ' and cell[-1] == ' ':
2764                                         align = 'Center'
2765                                 elif cell[0] == ' ':
2766                                         align = 'Right'
2767                         ret.append(align)
2768                 return ret
2769
2770         def _get_cell_span(self, cells):
2771                 ret = []
2772                 for cell in cells:
2773                         span = 0
2774                         m = re.search('\a(\|+)$', cell)
2775                         if m: span = len(m.group(1))+1
2776                         ret.append(span)
2777                 return ret
2778
2779         def _tag_cells(self, rowdata):
2780                 row = []
2781                 cells  = rowdata['cells']
2782                 open   = TAGS['tableCellOpen']
2783                 close  = TAGS['tableCellClose']
2784                 sep    = TAGS['tableCellSep']
2785                 calign = map(lambda x: TAGS['_tableCellAlign'+x],
2786                              rowdata['cellalign'])
2787                 # Populate the span tag
2788                 cspan = []
2789                 for i in rowdata['cellspan']:
2790                         if i > 0:
2791                                 cspan.append(regex['x'].sub(
2792                                 str(i), TAGS['_tableCellColSpan']))
2793                         else:
2794                                 cspan.append('')
2795
2796                 # Maybe is it a title row?
2797                 if rowdata['title']:
2798                         open  = TAGS['tableTitleCellOpen']  or open
2799                         close = TAGS['tableTitleCellClose'] or close
2800                         sep   = TAGS['tableTitleCellSep']   or sep
2801
2802                 # Should we break the line on *each* table cell?
2803                 if rules['breaktablecell']: close = close+'\n'
2804
2805                 # Cells pre processing
2806                 if rules['tablecellstrip']:
2807                         cells = map(lambda x: string.strip(x), cells)
2808                 if rowdata['title'] and rules['tabletitlerowinbold']:
2809                         cells = map(lambda x: enclose_me('fontBold',x), cells)
2810
2811                 # Add cell BEGIN/END tags
2812                 for cell in cells:
2813                         copen = open
2814                         # Make sure we will pop from some filled lists
2815                         # Fixes empty line bug '| |'
2816                         this_align = this_span = ''
2817                         if calign: this_align = calign.pop(0)
2818                         if cspan : this_span = cspan.pop(0)
2819                         # Insert cell align into open tag (if cell is alignable)
2820                         if rules['tablecellaligntype'] == 'cell':
2821                                 copen = regex['_tableCellAlign'].sub(
2822                                         this_align, copen)
2823                         if rules['tablecellspannable']:
2824                                 copen = regex['_tableCellColSpan'].sub(
2825                                         this_span, copen)
2826                         row.append(copen + cell + close)
2827
2828                 # Maybe there are cell separators?
2829                 return string.join(row, sep)
2830
2831         def add_row(self, cells):
2832                 self.rows.append(cells)
2833
2834         def parse_row(self, line):
2835                 # Default table properties
2836                 ret = {'border':0,'title':0,'align':'Left',
2837                        'cells':[],'cellalign':[], 'cellspan':[]}
2838                 # Detect table align (and remove spaces mark)
2839                 if line[0] == ' ': ret['align'] = 'Center'
2840                 line = string.lstrip(line)
2841                 # Detect title mark
2842                 if line[1] == '|': ret['title'] = 1
2843                 # Detect border mark and normalize the EOL
2844                 m = re.search(' (\|+) *$', line)
2845                 if m: line = line+' ' ; ret['border'] = 1
2846                 else: line = line+' | '
2847                 # Delete table mark
2848                 line = regex['table'].sub('', line)
2849                 # Detect colspan  | foo | bar baz |||
2850                 line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
2851                 # Split cells (the last is fake)
2852                 ret['cells'] = string.split(line, ' | ')[:-1]
2853                 # Find cells span
2854                 ret['cellspan'] = self._get_cell_span(ret['cells'])
2855                 # Remove span ID
2856                 ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells'])
2857                 # Find cells align
2858                 ret['cellalign'] = self._get_cell_align(ret['cells'])
2859                 # Hooray!
2860                 Debug('Table Prop: %s' % ret, 7)
2861                 return ret
2862
2863         def dump(self):
2864                 open  = self._get_open_tag()
2865                 rows  = self.rows
2866                 close = TAGS['tableClose']
2867
2868                 rowopen     = TAGS['tableRowOpen']
2869                 rowclose    = TAGS['tableRowClose']
2870                 rowsep      = TAGS['tableRowSep']
2871                 titrowopen  = TAGS['tableTitleRowOpen']  or rowopen
2872                 titrowclose = TAGS['tableTitleRowClose'] or rowclose
2873
2874                 if rules['breaktablelineopen']:
2875                         rowopen = rowopen + '\n'
2876                         titrowopen = titrowopen + '\n'
2877
2878                 # Tex gotchas
2879                 if TARGET == 'tex':
2880                         if not self.border:
2881                                 rowopen = titrowopen = ''
2882                         else:
2883                                 close = rowopen + close
2884
2885                 # Now we tag all the table cells on each row
2886                 #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
2887                 tagged_cells = []
2888                 for cell in rows: tagged_cells.append(self._tag_cells(cell))
2889
2890                 # Add row separator tags between lines
2891                 tagged_rows = []
2892                 if rowsep:
2893                         #!py15
2894                         #tagged_rows = map(lambda x:x+rowsep, tagged_cells)
2895                         for cell in tagged_cells:
2896                                 tagged_rows.append(cell+rowsep)
2897                         # Remove last rowsep, because the table is over
2898                         tagged_rows[-1] = string.replace(
2899                                           tagged_rows[-1], rowsep, '')
2900                 # Add row BEGIN/END tags for each line
2901                 else:
2902                         for rowdata in rows:
2903                                 if rowdata['title']:
2904                                         o,c = titrowopen, titrowclose
2905                                 else:
2906                                         o,c = rowopen, rowclose
2907                                 row = tagged_cells.pop(0)
2908                                 tagged_rows.append(o + row + c)
2909
2910                 fulltable = [open] + tagged_rows + [close]
2911
2912                 if rules['blankendtable']: fulltable.append('')
2913                 return fulltable
2914
2915
2916 ##############################################################################
2917
2918
2919 class BlockMaster:
2920         "TIP: use blockin/out to add/del holders"
2921         def __init__(self):
2922                 self.BLK = []
2923                 self.HLD = []
2924                 self.PRP = []
2925                 self.depth = 0
2926                 self.last = ''
2927                 self.tableparser = None
2928                 self.contains = {
2929                   'para'    :['comment','raw'],
2930                   'verb'    :[],
2931                   'table'   :['comment'],
2932                   'raw'     :[],
2933                   'tagged'  :[],
2934                   'comment' :[],
2935                   'quote'   :['quote','comment','raw'],
2936                   'list'    :['list' ,'numlist' ,'deflist','para','verb',
2937                               'comment', 'raw'],
2938                   'numlist' :['list' ,'numlist' ,'deflist','para','verb',
2939                               'comment', 'raw'],
2940                   'deflist' :['list' ,'numlist' ,'deflist','para','verb',
2941                               'comment', 'raw']
2942                 }
2943                 self.allblocks = self.contains.keys()
2944                 # If one found inside another, ignore the marks
2945                 self.exclusive = ['comment','verb','raw']
2946
2947         def block(self):
2948                 if not self.BLK: return ''
2949                 return self.BLK[-1]
2950
2951         def isblock(self, name=''):
2952                 return self.block() == name
2953
2954         def prop(self, key):
2955                 if not self.PRP: return ''
2956                 return self.PRP[-1].get(key) or ''
2957
2958         def propset(self, key, val):
2959                 self.PRP[-1][key] = val
2960                 #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
2961                 #Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
2962
2963         def hold(self):
2964                 if not self.HLD: return []
2965                 return self.HLD[-1]
2966
2967         def holdadd(self, line):
2968                 if self.block()[-4:] == 'list': line = [line]
2969                 self.HLD[-1].append(line)
2970                 Debug('HOLD add: %s'%repr(line), 4)
2971                 Debug('FULL HOLD: %s'%self.HLD, 4)
2972
2973         def holdaddsub(self, line):
2974                 self.HLD[-1][-1].append(line)
2975                 Debug('HOLD addsub: %s'%repr(line), 4)
2976                 Debug('FULL HOLD: %s'%self.HLD, 4)
2977
2978         def holdextend(self, lines):
2979                 if self.block()[-4:] == 'list': lines = [lines]
2980                 self.HLD[-1].extend(lines)
2981                 Debug('HOLD extend: %s'%repr(lines), 4)
2982                 Debug('FULL HOLD: %s'%self.HLD, 4)
2983
2984         def blockin(self, block):
2985                 ret = []
2986                 if block not in self.allblocks:
2987                         Error("Invalid block '%s'"%block)
2988                 # First, let's close other possible open blocks
2989                 while self.block() and block not in self.contains[self.block()]:
2990                         ret.extend(self.blockout())
2991                 # Now we can gladly add this new one
2992                 self.BLK.append(block)
2993                 self.HLD.append([])
2994                 self.PRP.append({})
2995                 if block == 'table': self.tableparser = TableMaster()
2996                 # Deeper and deeper
2997                 self.depth = len(self.BLK)
2998                 Debug('block ++ (%s): %s' % (block,self.BLK), 3)
2999                 return ret
3000
3001         def blockout(self):
3002                 if not self.BLK: Error('No block to pop')
3003                 self.last = self.BLK.pop()
3004                 result = getattr(self, self.last)()
3005                 parsed = self.HLD.pop()
3006                 self.PRP.pop()
3007                 self.depth = len(self.BLK)
3008                 if self.last == 'table': del self.tableparser
3009                 # Inserting a nested block into mother
3010                 if self.block():
3011                         if self.last != 'comment': # ignore comment blocks
3012                                 if self.block()[-4:] == 'list':
3013                                         self.HLD[-1][-1].append(result)
3014                                 else:
3015                                         self.HLD[-1].append(result)
3016                         # Reset now. Mother block will have it all
3017                         result = []
3018                 Debug('block -- (%s): %s' % (self.last,self.BLK), 3)
3019                 Debug('RELEASED (%s): %s' % (self.last,parsed), 3)
3020                 if result: Debug('BLOCK: %s'%result, 6)
3021                 return result
3022
3023         def _last_escapes(self, line):
3024                 return doFinalEscape(TARGET, line)
3025
3026         def _get_escaped_hold(self):
3027                 ret = []
3028                 for line in self.hold():
3029                         linetype = type(line)
3030                         if linetype == type('') or linetype == type(u''):
3031                                 ret.append(self._last_escapes(line))
3032                         elif linetype == type([]):
3033                                 ret.extend(line)
3034                         else:
3035                                 Error("BlockMaster: Unknown HOLD item type:"
3036                                       " %s"%linetype)
3037                 return ret
3038
3039         def _remove_twoblanks(self, lastitem):
3040                 if len(lastitem) > 1 and lastitem[-2:] == ['','']:
3041                         return lastitem[:-2]
3042                 return lastitem
3043
3044         def tagged(self):
3045                 return self.hold()
3046
3047         def comment(self):
3048                 return ''
3049
3050         def raw(self):
3051                 lines = self.hold()
3052                 return map(lambda x: doEscape(TARGET, x), lines)
3053
3054         def para(self):
3055                 result = []
3056                 open  = TAGS['paragraphOpen']
3057                 close = TAGS['paragraphClose']
3058                 lines = self._get_escaped_hold()
3059                 # Open (or not) paragraph
3060                 if not open+close and self.last == 'para':
3061                         pass # avoids multiple blank lines
3062                 else:
3063                         result.append(open)
3064                 # Pagemaker likes a paragraph as a single long line
3065                 if rules['onelinepara']:
3066                         result.append(string.join(lines,' '))
3067                 # Others are normal :)
3068                 else:
3069                         result.extend(lines)
3070                 result.append(close)
3071
3072                 # Very very very very very very very very very UGLY fix
3073                 # Needed because <center> can't appear inside <p>
3074                 try:
3075                         if len(lines) == 1 and \
3076                            TARGET in ('html', 'xhtml') and \
3077                            re.match('^\s*<center>.*</center>\s*$', lines[0]):
3078                                 result = [lines[0]]
3079                 except: pass
3080
3081                 return result
3082
3083         def verb(self):
3084                 "Verbatim lines are not masked, so there's no need to unmask"
3085                 result = []
3086                 result.append(TAGS['blockVerbOpen'])
3087                 for line in self.hold():
3088                         if self.prop('mapped') == 'table':
3089                                 line = MacroMaster().expand(line)
3090                         if not rules['verbblocknotescaped']:
3091                                 line = doEscape(TARGET,line)
3092                         if rules['indentverbblock']:
3093                                 line = '  '+line
3094                         if rules['verbblockfinalescape']:
3095                                 line = doFinalEscape(TARGET, line)
3096                         result.append(line)
3097                 #TODO maybe use if not TAGS['blockVerbClose']
3098                 if TARGET != 'pm6':
3099                         result.append(TAGS['blockVerbClose'])
3100                 return result
3101
3102         def table(self):
3103                 # Rewrite all table cells by the unmasked and escaped data
3104                 lines = self._get_escaped_hold()
3105                 for i in xrange(len(lines)):
3106                         cells = string.split(lines[i], SEPARATOR)
3107                         self.tableparser.rows[i]['cells'] = cells
3108
3109                 return self.tableparser.dump()
3110
3111         def quote(self):
3112                 result = []
3113                 myre   = regex['quote']
3114                 open   = TAGS['blockQuoteOpen']            # block based
3115                 close  = TAGS['blockQuoteClose']
3116                 qline  = TAGS['blockQuoteLine']            # line based
3117                 indent = tagindent = '\t'*self.depth
3118                 if rules['tagnotindentable']: tagindent = ''
3119                 if not rules['keepquoteindent']: indent = ''
3120
3121                 if open: result.append(tagindent+open)     # open block
3122                 for item in self.hold():
3123                         if type(item) == type([]):
3124                                 result.extend(item)        # subquotes
3125                         else:
3126                                 item = myre.sub('', item)  # del TABs
3127                                 if rules['barinsidequote']:
3128                                         item = get_tagged_bar(item)
3129                                 item = self._last_escapes(item)
3130                                 item = qline*self.depth + item
3131                                 result.append(indent+item) # quote line
3132                 if close: result.append(tagindent+close)   # close block
3133                 return result
3134
3135         def deflist(self): return self.list('deflist')
3136         def numlist(self): return self.list('numlist')
3137         def list(self, name='list'):
3138                 result    = []
3139                 items     = self.hold()
3140                 indent    = self.prop('indent')
3141                 tagindent = indent
3142                 listopen  = TAGS.get(name+'Open')
3143                 listclose = TAGS.get(name+'Close')
3144                 listline  = TAGS.get(name+'ItemLine')
3145                 itemcount = 0
3146                 if rules['tagnotindentable']: tagindent = ''
3147                 if not rules['keeplistindent']: indent = tagindent = ''
3148
3149                 if name == 'deflist':
3150                         itemopen  = TAGS[name+'Item1Open']
3151                         itemclose = TAGS[name+'Item2Close']
3152                         itemsep   = TAGS[name+'Item1Close']+\
3153                                     TAGS[name+'Item2Open']
3154                 else:
3155                         itemopen  = TAGS[name+'ItemOpen']
3156                         itemclose = TAGS[name+'ItemClose']
3157                         itemsep   = ''
3158
3159                 # ItemLine: number of leading chars identifies list depth
3160                 if listline:
3161                         itemopen  = listline*self.depth + itemopen
3162
3163                 # Adds trailing space on opening tags
3164                 if (name == 'list'    and rules['spacedlistitemopen']) or \
3165                    (name == 'numlist' and rules['spacednumlistitemopen']):
3166                         itemopen = itemopen + ' '
3167
3168                 # Remove two-blanks from list ending mark, to avoid <p>
3169                 items[-1] = self._remove_twoblanks(items[-1])
3170
3171                 # Open list (not nestable lists are only opened at mother)
3172                 if listopen and not \
3173                    (rules['listnotnested'] and BLOCK.depth != 1):
3174                         result.append(tagindent+listopen)
3175
3176                 # Tag each list item (multiline items)
3177                 itemopenorig = itemopen
3178                 for item in items:
3179
3180                         # Add "manual" item count for noautonum targets
3181                         itemcount = itemcount + 1
3182                         if name == 'numlist' and not rules['autonumberlist']:
3183                                 n = str(itemcount)
3184                                 itemopen = regex['x'].sub(n, itemopenorig)
3185                                 del n
3186
3187                         # Tag it
3188                         item[0] = self._last_escapes(item[0])
3189                         if name == 'deflist':
3190                                 z,term,rest = string.split(item[0],SEPARATOR,2)
3191                                 item[0] = rest
3192                                 if not item[0]: del item[0]      # to avoid <p>
3193                                 result.append(tagindent+itemopen+term+itemsep)
3194                         else:
3195                                 fullitem = tagindent+itemopen
3196                                 result.append(string.replace(
3197                                               item[0], SEPARATOR, fullitem))
3198                                 del item[0]
3199
3200                         # Process next lines for this item (if any)
3201                         for line in item:
3202                                 if type(line) == type([]): # sublist inside
3203                                         result.extend(line)
3204                                 else:
3205                                         line = self._last_escapes(line)
3206
3207                                         # Blank lines turns to <p>
3208                                         if not line and rules['parainsidelist']:
3209                                                 line = string.rstrip(indent   +\
3210                                                          TAGS['paragraphOpen']+\
3211                                                          TAGS['paragraphClose'])
3212
3213                                         # Some targets don't like identation here (wiki)
3214                                         if not rules['keeplistindent'] or (name == 'deflist' and rules['deflisttextstrip']):
3215                                                 line = string.lstrip(line)
3216
3217                                         # Maybe we have a line prefix to add? (wiki)
3218                                         if name == 'deflist' and TAGS['deflistItem2LinePrefix']:
3219                                                 line = TAGS['deflistItem2LinePrefix'] + line
3220
3221                                         result.append(line)
3222
3223                         # Close item (if needed)
3224                         if itemclose: result.append(tagindent+itemclose)
3225
3226                 # Close list (not nestable lists are only closed at mother)
3227                 if listclose and not \
3228                    (rules['listnotnested'] and BLOCK.depth != 1):
3229                         result.append(tagindent+listclose)
3230
3231                 if rules['blankendmotherlist'] and BLOCK.depth == 1:
3232                         result.append('')
3233
3234                 return result
3235
3236
3237 ##############################################################################
3238
3239
3240 class MacroMaster:
3241         def __init__(self, config={}):
3242                 self.name     = ''
3243                 self.config   = config or CONF
3244                 self.infile   = self.config['sourcefile']
3245                 self.outfile  = self.config['outfile']
3246                 self.currdate = time.localtime(time.time())
3247                 self.rgx      = regex.get('macros') or getRegexes()['macros']
3248                 self.fileinfo = { 'infile': None, 'outfile': None }
3249                 self.dft_fmt  = MACROS
3250
3251         def walk_file_format(self, fmt):
3252                 "Walks the %%{in/out}file format string, expanding the % flags"
3253                 i = 0; ret = ''                                 # counter/hold
3254                 while i < len(fmt):                             # char by char
3255                         c = fmt[i]; i = i + 1
3256                         if c == '%':                            # hot char!
3257                                 if i == len(fmt):               # % at the end
3258                                         ret = ret + c
3259                                         break
3260                                 c = fmt[i]; i = i + 1           # read next
3261                                 ret = ret + self.expand_file_flag(c)
3262                         else:
3263                                 ret = ret +c                    # common char
3264                 return ret
3265
3266         def expand_file_flag(self, flag):
3267                 "%f: filename          %F: filename (w/o extension)"
3268                 "%d: dirname           %D: dirname (only parent dir)"
3269                 "%p: file path         %e: extension"
3270                 info = self.fileinfo[self.name]           # get dict
3271                 if   flag == '%': x = '%'                 # %% -> %
3272                 elif flag == 'f': x = info['name']
3273                 elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
3274                 elif flag == 'd': x = info['dir']
3275                 elif flag == 'D': x = os.path.split(info['dir'])[-1]
3276                 elif flag == 'p': x = info['path']
3277                 elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name']
3278                                                 ).group(2) or ''
3279                 #TODO simpler way for %e ?
3280                 else            : x = '%'+flag            # false alarm
3281                 return x
3282
3283         def set_file_info(self, macroname):
3284                 if self.fileinfo.get(macroname): return   # already done
3285                 file = getattr(self, self.name)           # self.infile
3286                 if file == STDOUT or file == MODULEOUT:
3287                         dir = ''; path = name = file
3288                 else:
3289                         path = os.path.abspath(file)
3290                         dir  = os.path.dirname(path)
3291                         name = os.path.basename(path)
3292                 self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
3293
3294         def expand(self, line=''):
3295                 "Expand all macros found on the line"
3296                 while self.rgx.search(line):
3297                         m = self.rgx.search(line)
3298                         name = self.name = string.lower(m.group('name'))
3299                         fmt = m.group('fmt') or self.dft_fmt.get(name)
3300                         if name == 'date':
3301                                 txt = time.strftime(fmt,self.currdate)
3302                         elif name == 'mtime':
3303                                 if self.infile in (STDIN, MODULEIN):
3304                                         fdate = self.currdate
3305                                 else:
3306                                         mtime = os.path.getmtime(self.infile)
3307                                         fdate = time.localtime(mtime)
3308                                 txt = time.strftime(fmt,fdate)
3309                         elif name == 'infile' or name == 'outfile':
3310                                 self.set_file_info(name)
3311                                 txt = self.walk_file_format(fmt)
3312                         else:
3313                                 Error("Unknown macro name '%s'"%name)
3314                         line = self.rgx.sub(txt,line,1)
3315                 return line
3316
3317
3318 ##############################################################################
3319
3320
3321 def dumpConfig(source_raw, parsed_config):
3322         onoff = {1:_('ON'), 0:_('OFF')}
3323         data = [
3324           (_('RC file')        , RC_RAW     ),
3325           (_('source document'), source_raw ),
3326           (_('command line')   , CMDLINE_RAW)
3327         ]
3328         # First show all RAW data found
3329         for label, cfg in data:
3330                 print _('RAW config for %s')%label
3331                 for target,key,val in cfg:
3332                         target = '(%s)'%target
3333                         key    = dotted_spaces("%-14s"%key)
3334                         val    = val or _('ON')
3335                         print '  %-8s %s: %s'%(target,key,val)
3336                 print
3337         # Then the parsed results of all of them
3338         print _('Full PARSED config')
3339         keys = parsed_config.keys() ; keys.sort()  # sorted
3340         for key in keys:
3341                 val = parsed_config[key]
3342                 # Filters are the last
3343                 if key == 'preproc' or key == 'postproc':
3344                         continue
3345                 # Flag beautifier
3346                 if key in FLAGS.keys() or key in ACTIONS.keys():
3347                         val = onoff.get(val) or val
3348                 # List beautifier
3349                 if type(val) == type([]):
3350                         if key == 'options': sep = ' '
3351                         else               : sep = ', '
3352                         val = string.join(val, sep)
3353                 print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
3354         print
3355         print _('Active filters')
3356         for filter in ['preproc','postproc']:
3357                 for rule in parsed_config.get(filter) or []:
3358                         print "%25s: %s  ->  %s"%(
3359                            dotted_spaces("%-14s"%filter),rule[0],rule[1])
3360
3361
3362 def get_file_body(file):
3363         "Returns all the document BODY lines"
3364         return process_source_file(file, noconf=1)[1][2]
3365
3366
3367 def finish_him(outlist, config):
3368         "Writing output to screen or file"
3369         outfile = config['outfile']
3370         outlist = unmaskEscapeChar(outlist)
3371         outlist = expandLineBreaks(outlist)
3372
3373         # Apply PostProc filters
3374         if config['postproc']:
3375                 filters = compile_filters(config['postproc'],
3376                                         _('Invalid PostProc filter regex'))
3377                 postoutlist = []
3378                 errmsg = _('Invalid PostProc filter replacement')
3379                 for line in outlist:
3380                         for rgx,repl in filters:
3381                                 try: line = rgx.sub(repl, line)
3382                                 except: Error("%s: '%s'"%(errmsg, repl))
3383                         postoutlist.append(line)
3384                 outlist = postoutlist[:]
3385
3386         if outfile == MODULEOUT:
3387                 return outlist
3388         elif outfile == STDOUT:
3389                 if GUI:
3390                         return outlist, config
3391                 else:
3392                         for line in outlist: print line
3393         else:
3394                 Savefile(outfile, addLineBreaks(outlist))
3395                 if not GUI and not QUIET:
3396                         print _('%s wrote %s')%(my_name,outfile)
3397
3398         if config['split']:
3399                 if not QUIET: print "--- html..."
3400                 sgml2html = 'sgml2html -s %s -l %s %s'%(
3401                             config['split'],config['lang'] or lang,outfile)
3402                 if not QUIET: print "Running system command:", sgml2html
3403                 os.system(sgml2html)
3404
3405
3406 def toc_inside_body(body, toc, config):
3407         ret = []
3408         if AUTOTOC: return body                     # nothing to expand
3409         toc_mark = MaskMaster().tocmask
3410         # Expand toc mark with TOC contents
3411         for line in body:
3412                 if string.count(line, toc_mark):    # toc mark found
3413                         if config['toc']:
3414                                 ret.extend(toc)     # include if --toc
3415                         else:
3416                                 pass                # or remove %%toc line
3417                 else:
3418                         ret.append(line)            # common line
3419         return ret
3420
3421 def toc_tagger(toc, config):
3422         "Convert t2t-marked TOC (it is a list) to target-tagged TOC"
3423         ret = []
3424         # Tag if TOC-only TOC "by hand" (target don't have a TOC tag)
3425         if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
3426                 fakeconf = config.copy()
3427                 fakeconf['headers']    = 0
3428                 fakeconf['toc-only']   = 0
3429                 fakeconf['mask-email'] = 0
3430                 fakeconf['preproc']    = []
3431                 fakeconf['postproc']   = []
3432                 fakeconf['css-sugar']  = 0
3433                 ret,foo = convert(toc, fakeconf)
3434                 set_global_config(config)   # restore config
3435         # Target TOC is a tag
3436         elif config['toc'] and TAGS['TOC']:
3437                 ret = [TAGS['TOC']]
3438         return ret
3439
3440 def toc_formatter(toc, config):
3441         "Formats TOC for automatic placement between headers and body"
3442         if config['toc-only']: return toc              # no formatting needed
3443         if not config['toc'] : return []               # TOC disabled
3444         ret = toc
3445         # TOC open/close tags (if any)
3446         if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
3447         if TAGS['tocClose']: ret.append(TAGS['tocClose'])
3448         # Autotoc specific formatting
3449         if AUTOTOC:
3450                 if rules['autotocwithbars']:           # TOC between bars
3451                         para = TAGS['paragraphOpen']+TAGS['paragraphClose']
3452                         bar  = regex['x'].sub('-'*72,TAGS['bar1'])
3453                         tocbar = [para, bar, para]
3454                         ret = tocbar + ret + tocbar
3455                 if rules['blankendautotoc']:           # blank line after TOC
3456                         ret.append('')
3457                 if rules['autotocnewpagebefore']:      # page break before TOC
3458                         ret.insert(0,TAGS['pageBreak'])
3459                 if rules['autotocnewpageafter']:       # page break after TOC
3460                         ret.append(TAGS['pageBreak'])
3461         return ret
3462
3463
3464 def doHeader(headers, config):
3465         if not config['headers']: return []
3466         if not headers: headers = ['','','']
3467         target = config['target']
3468         if not HEADER_TEMPLATE.has_key(target):
3469                 Error("doheader: Unknow target '%s'"%target)
3470
3471         if target in ('html','xhtml') and config.get('css-sugar'):
3472                 template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
3473         else:
3474                 template = string.split(HEADER_TEMPLATE[target], '\n')
3475
3476         head_data = {'STYLE':[], 'ENCODING':''}
3477         for key in head_data.keys():
3478                 val = config.get(string.lower(key))
3479                 # Remove .sty extension from each style filename (freaking tex)
3480                 # XXX Can't handle --style foo.sty,bar.sty
3481                 if target == 'tex' and key == 'STYLE':
3482                         val = map(lambda x:re.sub('(?i)\.sty$','',x), val)
3483                 if key == 'ENCODING':
3484                         val = get_encoding_string(val, target)
3485                 head_data[key] = val
3486         # Parse header contents
3487         for i in 0,1,2:
3488                 # Expand macros
3489                 contents = MacroMaster(config=config).expand(headers[i])
3490                 # Escapes - on tex, just do it if any \tag{} present
3491                 if target != 'tex' or \
3492                   (target == 'tex' and re.search(r'\\\w+{', contents)):
3493                         contents = doEscape(target, contents)
3494                 if target == 'lout':
3495                         contents = doFinalEscape(target, contents)
3496
3497                 head_data['HEADER%d'%(i+1)] = contents
3498         # css-inside removes STYLE line
3499         #XXX In tex, this also removes the modules call (%!style:amsfonts)
3500         if target in ('html','xhtml') and config.get('css-inside') and \
3501            config.get('style'):
3502                 head_data['STYLE'] = []
3503         Debug("Header Data: %s"%head_data, 1)
3504         # Scan for empty dictionary keys
3505         # If found, scan template lines for that key reference
3506         # If found, remove the reference
3507         # If there isn't any other key reference on the same line, remove it
3508         #TODO loop by template line > key
3509         for key in head_data.keys():
3510                 if head_data.get(key): continue
3511                 for line in template:
3512                         if string.count(line, '%%(%s)s'%key):
3513                                 sline = string.replace(line, '%%(%s)s'%key, '')
3514                                 if not re.search(r'%\([A-Z0-9]+\)s', sline):
3515                                         template.remove(line)
3516         # Style is a multiple tag.
3517         # - If none or just one, use default template
3518         # - If two or more, insert extra lines in a loop (and remove original)
3519         styles = head_data['STYLE']
3520         if len(styles) == 1:
3521                 head_data['STYLE'] = styles[0]
3522         elif len(styles) > 1:
3523                 style_mark = '%(STYLE)s'
3524                 for i in xrange(len(template)):
3525                         if string.count(template[i], style_mark):
3526                                 while styles:
3527                                         template.insert(i+1,
3528                                                 string.replace(
3529                                                         template[i],
3530                                                         style_mark,
3531                                                         styles.pop()))
3532                                 del template[i]
3533                                 break
3534         # Populate template with data (dict expansion)
3535         template = string.join(template, '\n') % head_data
3536
3537         # Adding CSS contents into template (for --css-inside)
3538         # This code sux. Dirty++
3539         if target in ('html','xhtml') and config.get('css-inside') and \
3540            config.get('style'):
3541                 set_global_config(config) # usually on convert(), needed here
3542                 for i in xrange(len(config['style'])):
3543                         cssfile = config['style'][i]
3544                         if not os.path.isabs(cssfile):
3545                                 infile = config.get('sourcefile')
3546                                 cssfile = os.path.join(
3547                                         os.path.dirname(infile), cssfile)
3548                         try:
3549                                 contents = Readfile(cssfile, 1)
3550                                 css = "\n%s\n%s\n%s\n%s\n" % (
3551                                         doCommentLine("Included %s" % cssfile),
3552                                         TAGS['cssOpen'],
3553                                         string.join(contents, '\n'),
3554                                         TAGS['cssClose'])
3555                                 # Style now is content, needs escaping (tex)
3556                                 #css = maskEscapeChar(css)
3557                         except:
3558                                 errmsg = "CSS include failed for %s" % cssfile
3559                                 css = "\n%s\n" % (doCommentLine(errmsg))
3560                         # Insert this CSS file contents on the template
3561                         template = re.sub('(?i)(</HEAD>)', css+r'\1', template)
3562                         # template = re.sub(r'(?i)(\\begin{document})',
3563                         #               css+'\n'+r'\1', template) # tex
3564
3565                 # The last blank line to keep everything separated
3566                 template = re.sub('(?i)(</HEAD>)', '\n'+r'\1', template)
3567
3568         return string.split(template, '\n')
3569
3570 def doCommentLine(txt):
3571         # The -- string ends a (h|sg|xht)ml comment :(
3572         txt = maskEscapeChar(txt)
3573         if string.count(TAGS['comment'], '--') and \
3574            string.count(txt, '--'):
3575                 txt = re.sub('-(?=-)', r'-\\', txt)
3576
3577         if TAGS['comment']:
3578                 return regex['x'].sub(txt, TAGS['comment'])
3579         return ''
3580
3581 def doFooter(config):
3582         if not config['headers']: return []
3583         ret = []
3584         target = config['target']
3585         cmdline = config['realcmdline']
3586         typename = target
3587         if target == 'tex': typename = 'LaTeX2e'
3588         ppgd = '%s code generated by %s %s (%s)'%(
3589                 typename,my_name,my_version,my_url)
3590         cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
3591         ret.append('')
3592         ret.append(doCommentLine(ppgd))
3593         ret.append(doCommentLine(cmdline))
3594         ret.append(TAGS['EOD'])
3595         return ret
3596
3597 def doEscape(target,txt):
3598         "Target-specific special escapes. Apply *before* insert any tag."
3599         tmpmask = 'vvvvThisEscapingSuxvvvv'
3600         if target in ('html','sgml','xhtml'):
3601                 txt = re.sub('&','&amp;',txt)
3602                 txt = re.sub('<','&lt;',txt)
3603                 txt = re.sub('>','&gt;',txt)
3604                 if target == 'sgml':
3605                         txt = re.sub('\xff','&yuml;',txt)  # "+y
3606         elif target == 'pm6':
3607                 txt = re.sub('<','<\#60>',txt)
3608         elif target == 'mgp':
3609                 txt = re.sub('^%',' %',txt)  # add leading blank to avoid parse
3610         elif target == 'man':
3611                 txt = re.sub("^([.'])", '\\&\\1',txt)           # command ID
3612                 txt = string.replace(txt,ESCCHAR, ESCCHAR+'e')  # \e
3613         elif target == 'lout':
3614                 # TIP: / moved to FinalEscape to avoid //italic//
3615                 # TIP: these are also converted by lout:  ...  ---  --
3616                 txt = string.replace(txt, ESCCHAR, tmpmask)             # \
3617                 txt = string.replace(txt, '"', '"%s""'%ESCCHAR)         # "\""
3618                 txt = re.sub('([|&{}@#^~])', '"\\1"',txt)               # "@"
3619                 txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2))  # "\\"
3620         elif target == 'tex':
3621                 # Mark literal \ to be changed to $\backslash$ later
3622                 txt = string.replace( txt, ESCCHAR, tmpmask)
3623                 txt = re.sub('([#$&%{}])', ESCCHAR+r'\1'  , txt)  # \%
3624                 txt = re.sub('([~^])'    , ESCCHAR+r'\1{}', txt)  # \~{}
3625                 txt = re.sub('([<|>])'   ,         r'$\1$', txt)  # $>$
3626                 txt = string.replace(txt, tmpmask,
3627                                      maskEscapeChar(r'$\backslash$'))
3628                 # TIP the _ is escaped at the end
3629         return txt
3630
3631 # TODO man: where - really needs to be escaped?
3632 def doFinalEscape(target, txt):
3633         "Last escapes of each line"
3634         if   target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
3635         elif target == 'man' : txt = string.replace(txt, '-', r'\-')
3636         elif target == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
3637         elif target == 'lout': txt = string.replace(txt, '/', '"/"')
3638         elif target == 'tex' :
3639                 txt = string.replace(txt, '_', r'\_')
3640                 txt = string.replace(txt, 'vvvvTexUndervvvv', '_')  # shame!
3641         return txt
3642
3643 def EscapeCharHandler(action, data):
3644         "Mask/Unmask the Escape Char on the given string"
3645         if not string.strip(data): return data
3646         if action not in ('mask','unmask'):
3647                 Error("EscapeCharHandler: Invalid action '%s'"%action)
3648         if action == 'mask': return string.replace(data,'\\',ESCCHAR)
3649         else:                return string.replace(data,ESCCHAR,'\\')
3650
3651 def maskEscapeChar(data):
3652         "Replace any Escape Char \ with a text mask (Input: str or list)"
3653         if type(data) == type([]):
3654                 return map(lambda x: EscapeCharHandler('mask', x), data)
3655         return EscapeCharHandler('mask',data)
3656
3657 def unmaskEscapeChar(data):
3658         "Undo the Escape char \ masking (Input: str or list)"
3659         if type(data) == type([]):
3660                 return map(lambda x: EscapeCharHandler('unmask', x), data)
3661         return EscapeCharHandler('unmask',data)
3662
3663 def addLineBreaks(mylist):
3664         "use LB to respect sys.platform"
3665         ret = []
3666         for line in mylist:
3667                 line = string.replace(line,'\n',LB)  # embedded \n's
3668                 ret.append(line+LB)                  # add final line break
3669         return ret
3670
3671 # Convert ['foo\nbar'] to ['foo', 'bar']
3672 def expandLineBreaks(mylist):
3673         ret = []
3674         for line in mylist:
3675                 ret.extend(string.split(line, '\n'))
3676         return ret
3677
3678 def compile_filters(filters, errmsg='Filter'):
3679         if filters:
3680                 for i in xrange(len(filters)):
3681                         patt,repl = filters[i]
3682                         try: rgx = re.compile(patt)
3683                         except: Error("%s: '%s'"%(errmsg, patt))
3684                         filters[i] = (rgx,repl)
3685         return filters
3686
3687 def enclose_me(tagname, txt):
3688         return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
3689
3690 def beautify_me(name, line):
3691         "where name is: bold, italic, underline or strike"
3692
3693         # Exception: Doesn't parse an horizontal bar as strike
3694         if name == 'strike' and regex['bar'].search(line): return line
3695
3696         name  = 'font%s' % string.capitalize(name)
3697         open  = TAGS['%sOpen'%name]
3698         close = TAGS['%sClose'%name]
3699         txt = r'%s\1%s'%(open, close)
3700         line = regex[name].sub(txt,line)
3701         return line
3702
3703 def get_tagged_link(label, url):
3704         ret = ''
3705         target = CONF['target']
3706         image_re = regex['img']
3707
3708         # Set link type
3709         if regex['email'].match(url):
3710                 linktype = 'email'
3711         else:
3712                 linktype = 'url';
3713
3714         # Escape specials from TEXT parts
3715         label = doEscape(target,label)
3716
3717         # Escape specials from link URL
3718         if not rules['linkable'] or rules['escapeurl']:
3719                 url = doEscape(target, url)
3720
3721         # Adding protocol to guessed link
3722         guessurl = ''
3723         if linktype == 'url' and \
3724            re.match('(?i)'+regex['_urlskel']['guess'], url):
3725                 if url[0] in 'Ww': guessurl = 'http://' +url
3726                 else             : guessurl =  'ftp://' +url
3727
3728                 # Not link aware targets -> protocol is useless
3729                 if not rules['linkable']: guessurl = ''
3730
3731         # Simple link (not guessed)
3732         if not label and not guessurl:
3733                 if CONF['mask-email'] and linktype == 'email':
3734                         # Do the email mask feature (no TAGs, just text)
3735                         url = string.replace(url,'@',' (a) ')
3736                         url = string.replace(url,'.',' ')
3737                         url = "<%s>" % url
3738                         if rules['linkable']: url = doEscape(target, url)
3739                         ret = url
3740                 else:
3741                         # Just add link data to tag
3742                         tag = TAGS[linktype]
3743                         ret = regex['x'].sub(url,tag)
3744
3745         # Named link or guessed simple link
3746         else:
3747                 # Adjusts for guessed link
3748                 if not label: label = url         # no   protocol
3749                 if guessurl : url   = guessurl    # with protocol
3750
3751                 # Image inside link!
3752                 if image_re.match(label):
3753                         if rules['imglinkable']:  # get image tag
3754                                 label = parse_images(label)
3755                         else:                     #  img@link !supported
3756                                 label = "(%s)"%image_re.match(label).group(1)
3757
3758                 # Putting data on the right appearance order
3759                 if rules['linkable']:
3760                         urlorder = [url, label]   # link before label
3761                 else:
3762                         urlorder = [label, url]   # label before link
3763
3764                 # Add link data to tag (replace \a's)
3765                 ret = TAGS["%sMark"%linktype]
3766                 for data in urlorder:
3767                         ret = regex['x'].sub(data,ret,1)
3768
3769         return ret
3770
3771
3772 def parse_deflist_term(line):
3773         "Extract and parse definition list term contents"
3774         img_re = regex['img']
3775         term   = regex['deflist'].search(line).group(3)
3776
3777         # Mask image inside term as (image.jpg), where not supported
3778         if not rules['imgasdefterm'] and img_re.search(term):
3779                 while img_re.search(term):
3780                         imgfile = img_re.search(term).group(1)
3781                         term = img_re.sub('(%s)'%imgfile, term, 1)
3782
3783         #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
3784         return term
3785
3786
3787 def get_tagged_bar(line):
3788         m = regex['bar'].search(line)
3789         if not m: return line
3790         txt = m.group(2)
3791
3792         # Map strong bar to pagebreak
3793         if rules['mapbar2pagebreak'] and TAGS['pageBreak']:
3794                 TAGS['bar2'] = TAGS['pageBreak']
3795
3796         # Set bar type
3797         if txt[0] == '=': bar = TAGS['bar2']
3798         else            : bar = TAGS['bar1']
3799
3800         # To avoid comment tag confusion like <!-- ------ -->
3801         if string.count(TAGS['comment'], '--'):
3802                 txt = string.replace(txt,'--','__')
3803
3804         # Tag line
3805         return regex['x'].sub(txt, bar)
3806
3807
3808 def get_image_align(line):
3809         "Return the image (first found) align for the given line"
3810
3811         # First clear marks that can mess align detection
3812         line = re.sub(SEPARATOR+'$', '', line)  # remove deflist sep
3813         line = re.sub('^'+SEPARATOR, '', line)  # remove list sep
3814         line = re.sub('^[\t]+'     , '', line)  # remove quote mark
3815
3816         # Get image position on the line
3817         m = regex['img'].search(line)
3818         ini = m.start() ; head = 0
3819         end = m.end()   ; tail = len(line)
3820
3821         # The align detection algorithm
3822         if   ini == head and end != tail: align = 'left'   # ^img + text$
3823         elif ini != head and end == tail: align = 'right'  # ^text + img$
3824         else                            : align = 'center' # default align
3825
3826         # Some special cases
3827         if BLOCK.isblock('table'): align = 'center'    # ignore when table
3828 #       if TARGET == 'mgp' and align == 'center': align = 'center'
3829
3830         return align
3831
3832
3833 # Reference: http://www.iana.org/assignments/character-sets
3834 # http://www.drclue.net/F1.cgi/HTML/META/META.html
3835 def get_encoding_string(enc, target):
3836         if not enc: return ''
3837         # Target specific translation table
3838         translate = {
3839         'tex': {
3840           # missing: ansinew , applemac , cp437 , cp437de , cp865
3841           'us-ascii'    : 'ascii',
3842           'windows-1250': 'cp1250',
3843           'windows-1252': 'cp1252',
3844           'ibm850'      : 'cp850',
3845           'ibm852'      : 'cp852',
3846           'iso-8859-1'  : 'latin1',
3847           'iso-8859-2'  : 'latin2',
3848           'iso-8859-3'  : 'latin3',
3849           'iso-8859-4'  : 'latin4',
3850           'iso-8859-5'  : 'latin5',
3851           'iso-8859-9'  : 'latin9',
3852           'koi8-r'      : 'koi8-r'
3853           }
3854         }
3855         # Normalization
3856         enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii'  , enc)
3857         enc = re.sub('(?i)(ibm|cp)?85([02])'        ,'ibm85\\2'  , enc)
3858         enc = re.sub('(?i)(iso[_-]?)?8859[_-]?'     ,'iso-8859-' , enc)
3859         enc = re.sub('iso-8859-($|[^1-9]).*'        ,'iso-8859-1', enc)
3860         # Apply translation table
3861         try: enc = translate[target][string.lower(enc)]
3862         except: pass
3863         return enc
3864
3865
3866 ##############################################################################
3867 ##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
3868 ##############################################################################
3869
3870
3871 def process_source_file(file='', noconf=0, contents=[]):
3872         """
3873         Find and Join all the configuration available for a source file.
3874         No sanity checking is done on this step.
3875         It also extracts the source document parts into separate holders.
3876
3877         The config scan order is:
3878            1. The user configuration file (i.e. $HOME/.txt2tagsrc)
3879            2. The source document's CONF area
3880            3. The command line options
3881
3882         The return data is a tuple of two items:
3883            1. The parsed config dictionary
3884            2. The document's parts, as a (head, conf, body) tuple
3885
3886         All the conversion process will be based on the data and
3887         configuration returned by this function.
3888         The source files is read on this step only.
3889         """
3890         if contents:
3891                 source = SourceDocument(contents=contents)
3892         else:
3893                 source = SourceDocument(file)
3894         head, conf, body = source.split()
3895         Message(_("Source document contents stored"),2)
3896         if not noconf:
3897                 # Read document config
3898                 source_raw = source.get_raw_config()
3899                 # Join all the config directives found, then parse it
3900                 full_raw = RC_RAW + source_raw + CMDLINE_RAW
3901                 Message(_("Parsing and saving all config found (%03d items)")%(
3902                         len(full_raw)),1)
3903                 full_parsed = ConfigMaster(full_raw).parse()
3904                 # Add manually the filename to the conf dic
3905                 if contents:
3906                         full_parsed['sourcefile'] = MODULEIN
3907                         full_parsed['infile'] = MODULEIN
3908                         full_parsed['outfile'] = MODULEOUT
3909                 else:
3910                         full_parsed['sourcefile'] = file
3911                 # Maybe should we dump the config found?
3912                 if full_parsed.get('dump-config'):
3913                         dumpConfig(source_raw, full_parsed)
3914                         Quit()
3915                 # The user just want to know a single config value (hidden feature)
3916                 #TODO pick a better name than --show-config-value
3917                 elif full_parsed.get('show-config-value'):
3918                         config_value = full_parsed.get(full_parsed['show-config-value'])
3919                         if config_value:
3920                                 if type(config_value) == type([]):
3921                                         print '\n'.join(config_value)
3922                                 else:
3923                                         print config_value
3924                         Quit()
3925                 # Okay, all done
3926                 Debug("FULL config for this file: %s"%full_parsed, 1)
3927         else:
3928                 full_parsed = {}
3929         return full_parsed, (head,conf,body)
3930
3931 def get_infiles_config(infiles):
3932         """
3933         Find and Join into a single list, all configuration available
3934         for each input file. This function is supposed to be the very
3935         first one to be called, before any processing.
3936         """
3937         return map(process_source_file, infiles)
3938
3939 def convert_this_files(configs):
3940         global CONF
3941         for myconf,doc in configs:                 # multifile support
3942                 target_head = []
3943                 target_toc  = []
3944                 target_body = []
3945                 target_foot = []
3946                 source_head, source_conf, source_body = doc
3947                 myconf = ConfigMaster().sanity(myconf)
3948                 # Compose the target file Headers
3949                 #TODO escape line before?
3950                 #TODO see exceptions by tex and mgp
3951                 Message(_("Composing target Headers"),1)
3952                 target_head = doHeader(source_head, myconf)
3953                 # Parse the full marked body into tagged target
3954                 first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
3955                 Message(_("Composing target Body"),1)
3956                 target_body, marked_toc = convert(source_body, myconf,
3957                                           firstlinenr=first_body_line)
3958                 # If dump-source, we're done
3959                 if myconf['dump-source']:
3960                         for line in source_head+source_conf+target_body:
3961                                 print line
3962                         return
3963                 # Make TOC (if needed)
3964                 Message(_("Composing target TOC"),1)
3965                 tagged_toc  = toc_tagger(marked_toc, myconf)
3966                 target_toc  = toc_formatter(tagged_toc, myconf)
3967                 target_body = toc_inside_body(target_body, target_toc, myconf)
3968                 if not AUTOTOC and not myconf['toc-only']: target_toc = []
3969                 # Compose the target file Footer
3970                 Message(_("Composing target Footer"),1)
3971                 target_foot = doFooter(myconf)
3972                 # Finally, we have our document
3973                 outlist = target_head + target_toc + target_body + target_foot
3974                 # If on GUI, abort before finish_him
3975                 # If module, return finish_him as list
3976                 # Else, write results to file or STDOUT
3977                 if GUI:
3978                         return outlist, myconf
3979                 elif myconf.get('outfile') == MODULEOUT:
3980                         return finish_him(outlist, myconf), myconf
3981                 else:
3982                         Message(_("Saving results to the output file"),1)
3983                         finish_him(outlist, myconf)
3984
3985
3986 def parse_images(line):
3987         "Tag all images found"
3988         while regex['img'].search(line) and TAGS['img'] != '[\a]':
3989                 txt = regex['img'].search(line).group(1)
3990                 tag = TAGS['img']
3991
3992                 # If target supports image alignment, here we go
3993                 if rules['imgalignable']:
3994
3995                         align = get_image_align(line)         # right
3996                         align_name = string.capitalize(align) # Right
3997
3998                         # The align is a full tag, or part of the image tag (~A~)
3999                         if TAGS['imgAlign'+align_name]:
4000                                 tag = TAGS['imgAlign'+align_name]
4001                         else:
4002                                 align_tag = TAGS['_imgAlign'+align_name]
4003                                 tag = regex['_imgAlign'].sub(align_tag, tag, 1)
4004
4005                         # Dirty fix to allow centered solo images
4006                         if align == 'center' and TARGET in ('html','xhtml'):
4007                                 rest = regex['img'].sub('',line,1)
4008                                 if re.match('^\s+$', rest):
4009                                         tag = "<center>%s</center>" %tag
4010
4011                 if TARGET == 'tex':
4012                         tag = re.sub(r'\\b',r'\\\\b',tag)
4013                         txt = string.replace(txt, '_', 'vvvvTexUndervvvv')
4014
4015                 line = regex['img'].sub(tag,line,1)
4016                 line = regex['x'].sub(txt,line,1)
4017         return line
4018
4019
4020 def add_inline_tags(line):
4021         # Beautifiers
4022         for beauti in ('bold', 'italic', 'underline', 'strike'):
4023                 if regex['font%s'%beauti.capitalize()].search(line):
4024                         line = beautify_me(beauti, line)
4025
4026         line = parse_images(line)
4027         return line
4028
4029
4030 def get_include_contents(file, path=''):
4031         "Parses %!include: value and extract file contents"
4032         ids = {'`':'verb', '"':'raw', "'":'tagged' }
4033         id = 't2t'
4034         # Set include type and remove identifier marks
4035         mark = file[0]
4036         if mark in ids.keys():
4037                 if file[:2] == file[-2:] == mark*2:
4038                         id = ids[mark]     # set type
4039                         file = file[2:-2]  # remove marks
4040         # Handle remote dir execution
4041         filepath = os.path.join(path, file)
4042         # Read included file contents
4043         lines = Readfile(filepath, remove_linebreaks=1)
4044         # Default txt2tags marked text, just BODY matters
4045         if id == 't2t':
4046                 lines = get_file_body(filepath)
4047                 lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
4048                 # This appears when included hit EOF with verbatim area open
4049                 #lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
4050         return id, lines
4051
4052
4053 def set_global_config(config):
4054         global CONF, TAGS, regex, rules, TARGET
4055         CONF   = config
4056         TAGS   = getTags(CONF)
4057         rules  = getRules(CONF)
4058         regex  = getRegexes()
4059         TARGET = config['target']  # save for buggy functions that need global
4060
4061
4062 def convert(bodylines, config, firstlinenr=1):
4063         global BLOCK
4064
4065         set_global_config(config)
4066
4067         target = config['target']
4068         BLOCK = BlockMaster()
4069         MASK  =  MaskMaster()
4070         TITLE = TitleMaster()
4071
4072         ret = []
4073         dump_source = []
4074         f_lastwasblank = 0
4075
4076         # Compiling all PreProc regexes
4077         pre_filter = compile_filters(
4078                 CONF['preproc'], _('Invalid PreProc filter regex'))
4079
4080         # Let's mark it up!
4081         linenr = firstlinenr-1
4082         lineref = 0
4083         while lineref < len(bodylines):
4084                 # Defaults
4085                 MASK.reset()
4086                 results_box = ''
4087
4088                 untouchedline = bodylines[lineref]
4089                 dump_source.append(untouchedline)
4090
4091                 line = re.sub('[\n\r]+$','',untouchedline)   # del line break
4092
4093                 # Apply PreProc filters
4094                 if pre_filter:
4095                         errmsg = _('Invalid PreProc filter replacement')
4096                         for rgx,repl in pre_filter:
4097                                 try: line = rgx.sub(repl, line)
4098                                 except: Error("%s: '%s'"%(errmsg, repl))
4099
4100                 line = maskEscapeChar(line)                  # protect \ char
4101                 linenr  = linenr  +1
4102                 lineref = lineref +1
4103
4104                 Debug(repr(line), 2, linenr)  # heavy debug: show each line
4105
4106                 #------------------[ Comment Block ]------------------------
4107
4108                 # We're already on a comment block
4109                 if BLOCK.block() == 'comment':
4110
4111                         # Closing comment
4112                         if regex['blockCommentClose'].search(line):
4113                                 ret.extend(BLOCK.blockout() or [])
4114                                 continue
4115
4116                         # Normal comment-inside line. Ignore it.
4117                         continue
4118
4119                 # Detecting comment block init
4120                 if regex['blockCommentOpen'].search(line) \
4121                    and BLOCK.block() not in BLOCK.exclusive:
4122                         ret.extend(BLOCK.blockin('comment'))
4123                         continue
4124
4125                 #-------------------------[ Raw Text ]----------------------
4126
4127                 # We're already on a raw block
4128                 if BLOCK.block() == 'raw':
4129
4130                         # Closing raw
4131                         if regex['blockRawClose'].search(line):
4132                                 ret.extend(BLOCK.blockout())
4133                                 continue
4134
4135                         # Normal raw-inside line
4136                         BLOCK.holdadd(line)
4137                         continue
4138
4139                 # Detecting raw block init
4140                 if regex['blockRawOpen'].search(line) \
4141                    and BLOCK.block() not in BLOCK.exclusive:
4142                         ret.extend(BLOCK.blockin('raw'))
4143                         continue
4144
4145                 # One line raw text
4146                 if regex['1lineRaw'].search(line) \
4147                    and BLOCK.block() not in BLOCK.exclusive:
4148                         ret.extend(BLOCK.blockin('raw'))
4149                         line = regex['1lineRaw'].sub('',line)
4150                         BLOCK.holdadd(line)
4151                         ret.extend(BLOCK.blockout())
4152                         continue
4153
4154                 #------------------------[ Verbatim  ]----------------------
4155
4156                 #TIP We'll never support beautifiers inside verbatim
4157
4158                 # Closing table mapped to verb
4159                 if BLOCK.block() == 'verb' \
4160                    and BLOCK.prop('mapped') == 'table' \
4161                    and not regex['table'].search(line):
4162                         ret.extend(BLOCK.blockout())
4163
4164                 # We're already on a verb block
4165                 if BLOCK.block() == 'verb':
4166
4167                         # Closing verb
4168                         if regex['blockVerbClose'].search(line):
4169                                 ret.extend(BLOCK.blockout())
4170                                 continue
4171
4172                         # Normal verb-inside line
4173                         BLOCK.holdadd(line)
4174                         continue
4175
4176                 # Detecting verb block init
4177                 if regex['blockVerbOpen'].search(line) \
4178                    and BLOCK.block() not in BLOCK.exclusive:
4179                         ret.extend(BLOCK.blockin('verb'))
4180                         f_lastwasblank = 0
4181                         continue
4182
4183                 # One line verb-formatted text
4184                 if regex['1lineVerb'].search(line) \
4185                    and BLOCK.block() not in BLOCK.exclusive:
4186                         ret.extend(BLOCK.blockin('verb'))
4187                         line = regex['1lineVerb'].sub('',line)
4188                         BLOCK.holdadd(line)
4189                         ret.extend(BLOCK.blockout())
4190                         f_lastwasblank = 0
4191                         continue
4192
4193                 # Tables are mapped to verb when target is not table-aware
4194                 if not rules['tableable'] and regex['table'].search(line):
4195                         if not BLOCK.isblock('verb'):
4196                                 ret.extend(BLOCK.blockin('verb'))
4197                                 BLOCK.propset('mapped', 'table')
4198                                 BLOCK.holdadd(line)
4199                                 continue
4200
4201                 #---------------------[ blank lines ]-----------------------
4202
4203                 if regex['blankline'].search(line):
4204
4205                         # Close open paragraph
4206                         if BLOCK.isblock('para'):
4207                                 ret.extend(BLOCK.blockout())
4208                                 f_lastwasblank = 1
4209                                 continue
4210
4211                         # Close all open tables
4212                         if BLOCK.isblock('table'):
4213                                 ret.extend(BLOCK.blockout())
4214                                 f_lastwasblank = 1
4215                                 continue
4216
4217                         # Close all open quotes
4218                         while BLOCK.isblock('quote'):
4219                                 ret.extend(BLOCK.blockout())
4220
4221                         # Closing all open lists
4222                         if f_lastwasblank:          # 2nd consecutive blank
4223                                 if BLOCK.block()[-4:] == 'list':
4224                                         BLOCK.holdaddsub('')   # helps parser
4225                                 while BLOCK.depth:  # closes list (if any)
4226                                         ret.extend(BLOCK.blockout())
4227                                 continue            # ignore consecutive blanks
4228
4229                         # Paragraph (if any) is wanted inside lists also
4230                         if BLOCK.block()[-4:] == 'list':
4231                                 BLOCK.holdaddsub('')
4232                         else:
4233                                 # html: show blank line (needs tag)
4234                                 if target in ('html','xhtml'):
4235                                         ret.append(TAGS['paragraphOpen']+\
4236                                                    TAGS['paragraphClose'])
4237                                 # Otherwise we just show a blank line
4238                                 else:
4239                                         ret.append('')
4240
4241                         f_lastwasblank = 1
4242                         continue
4243
4244
4245                 #---------------------[ special ]---------------------------
4246
4247                 if regex['special'].search(line):
4248                         # Include command
4249                         targ, key, val = ConfigLines().parse_line(
4250                                            line, 'include', target)
4251                         if key:
4252                                 Debug("Found config '%s', value '%s'"%(
4253                                        key,val),1,linenr)
4254
4255                                 incpath = os.path.dirname(CONF['sourcefile'])
4256                                 incfile = val
4257                                 err = _('A file cannot include itself (loop!)')
4258                                 if CONF['sourcefile'] == incfile:
4259                                         Error("%s: %s"%(err,incfile))
4260                                 inctype, inclines = get_include_contents(
4261                                                       incfile, incpath)
4262                                 # Verb, raw and tagged are easy
4263                                 if inctype != 't2t':
4264                                         ret.extend(BLOCK.blockin(inctype))
4265                                         BLOCK.holdextend(inclines)
4266                                         ret.extend(BLOCK.blockout())
4267                                 else:
4268                                         # Insert include lines into body
4269                                         #TODO include maxdepth limit
4270                                         bodylines = bodylines[:lineref] \
4271                                                    +inclines \
4272                                                    +bodylines[lineref:]
4273                                         #TODO fix path if include@include
4274                                         # Remove %!include call
4275                                         if CONF['dump-source']:
4276                                                 dump_source.pop()
4277                                 continue
4278                         else:
4279                                 Debug('Bogus Special Line',1,linenr)
4280
4281                 #---------------------[ dump-source ]-----------------------
4282
4283                 # We don't need to go any further
4284                 if CONF['dump-source']:
4285                         continue
4286
4287                 #---------------------[ Comments ]--------------------------
4288
4289                 # Just skip them (if not macro)
4290                 if regex['comment'].search(line) and not \
4291                    regex['macros'].match(line) and not \
4292                    regex['toc'].match(line):
4293                         continue
4294
4295                 #---------------------[ Triggers ]--------------------------
4296
4297                 # Valid line, reset blank status
4298                 f_lastwasblank = 0
4299
4300                 # Any NOT quote line closes all open quotes
4301                 if BLOCK.isblock('quote') and not regex['quote'].search(line):
4302                         while BLOCK.isblock('quote'):
4303                                 ret.extend(BLOCK.blockout())
4304
4305                 # Any NOT table line closes an open table
4306                 if BLOCK.isblock('table') and not regex['table'].search(line):
4307                         ret.extend(BLOCK.blockout())
4308
4309
4310                 #---------------------[ Horizontal Bar ]--------------------
4311
4312                 if regex['bar'].search(line):
4313
4314                         # A bar closes a paragraph
4315                         if BLOCK.isblock('para'):
4316                                 ret.extend(BLOCK.blockout())
4317
4318                         # We need to close all opened quote blocks
4319                         # if bar isn't allowed inside or if not a quote line
4320                         if BLOCK.isblock('quote'):
4321                                 if not rules['barinsidequote'] or \
4322                                    not regex['quote'].search(line):
4323                                         while BLOCK.isblock('quote'):
4324                                                 ret.extend(BLOCK.blockout())
4325
4326                         # Quote + bar: continue processing for quoting
4327                         if rules['barinsidequote'] and \
4328                            regex['quote'].search(line):
4329                                 pass
4330
4331                         # Just bar: save tagged line and we're done
4332                         else:
4333                                 line = get_tagged_bar(line)
4334                                 if BLOCK.block()[-4:] == 'list':
4335                                         BLOCK.holdaddsub(line)
4336                                 elif BLOCK.block():
4337                                         BLOCK.holdadd(line)
4338                                 else:
4339                                         ret.append(line)
4340                                         Debug("BAR: %s"%line, 6)
4341                                 continue
4342
4343                 #---------------------[ Title ]-----------------------------
4344
4345                 #TODO set next blank and set f_lastwasblank or f_lasttitle
4346                 if (regex['title'].search(line) or
4347                     regex['numtitle'].search(line)) and \
4348                     BLOCK.block()[-4:] != 'list':
4349
4350                         # A title closes a paragraph
4351                         if BLOCK.isblock('para'):
4352                                 ret.extend(BLOCK.blockout())
4353
4354                         TITLE.add(line)
4355                         tagged_title = TITLE.get()
4356                         ret.extend(tagged_title)
4357                         Debug("TITLE: %s"%tagged_title, 6)
4358
4359                         f_lastwasblank = 1
4360                         continue
4361
4362                 #---------------------[ %%toc ]-----------------------
4363
4364                 # %%toc line closes paragraph
4365                 if BLOCK.block() == 'para' and regex['toc'].search(line):
4366                         ret.extend(BLOCK.blockout())
4367
4368                 #---------------------[ apply masks ]-----------------------
4369
4370                 line = MASK.mask(line)
4371
4372                 #XXX from here, only block-inside lines will pass
4373
4374                 #---------------------[ Quote ]-----------------------------
4375
4376                 if regex['quote'].search(line):
4377
4378                         # Store number of leading TABS
4379                         quotedepth = len(regex['quote'].search(line).group(0))
4380
4381                         # SGML doesn't support nested quotes
4382                         if rules['quotenotnested']: quotedepth = 1
4383
4384                         # Don't cross depth limit
4385                         maxdepth = rules['quotemaxdepth']
4386                         if maxdepth and quotedepth > maxdepth:
4387                                 quotedepth = maxdepth
4388
4389                         # New quote
4390                         if not BLOCK.isblock('quote'):
4391                                 ret.extend(BLOCK.blockin('quote'))
4392
4393                         # New subquotes
4394                         while BLOCK.depth < quotedepth:
4395                                 BLOCK.blockin('quote')
4396
4397                         # Closing quotes
4398                         while quotedepth < BLOCK.depth:
4399                                 ret.extend(BLOCK.blockout())
4400
4401                 #---------------------[ Lists ]-----------------------------
4402
4403                 # An empty item also closes the current list
4404                 if BLOCK.block()[-4:] == 'list':
4405                         m = regex['listclose'].match(line)
4406                         if m:
4407                                 listindent = m.group(1)
4408                                 listtype = m.group(2)
4409                                 currlisttype = BLOCK.prop('type')
4410                                 currlistindent = BLOCK.prop('indent')
4411                                 if listindent == currlistindent and \
4412                                    listtype == currlisttype:
4413                                         ret.extend(BLOCK.blockout())
4414                                         continue
4415
4416                 if   regex['list'].search(line) or \
4417                   regex['numlist'].search(line) or \
4418                   regex['deflist'].search(line):
4419
4420                         listindent = BLOCK.prop('indent')
4421                         listids = string.join(LISTNAMES.keys(), '')
4422                         m = re.match('^( *)([%s]) '%listids, line)
4423                         listitemindent = m.group(1)
4424                         listtype = m.group(2)
4425                         listname = LISTNAMES[listtype]
4426                         results_box = BLOCK.holdadd
4427
4428                         # Del list ID (and separate term from definition)
4429                         if listname == 'deflist':
4430                                 term = parse_deflist_term(line)
4431                                 line = regex['deflist'].sub(
4432                                         SEPARATOR+term+SEPARATOR,line)
4433                         else:
4434                                 line = regex[listname].sub(SEPARATOR,line)
4435
4436                         # Don't cross depth limit
4437                         maxdepth = rules['listmaxdepth']
4438                         if maxdepth and BLOCK.depth == maxdepth:
4439                                 if len(listitemindent) > len(listindent):
4440                                         listitemindent = listindent
4441
4442                         # List bumping (same indent, diff mark)
4443                         # Close the currently open list to clear the mess
4444                         if BLOCK.block()[-4:] == 'list' \
4445                            and listname != BLOCK.block() \
4446                            and len(listitemindent) == len(listindent):
4447                                 ret.extend(BLOCK.blockout())
4448                                 listindent = BLOCK.prop('indent')
4449
4450                         # Open mother list or sublist
4451                         if BLOCK.block()[-4:] != 'list' or \
4452                            len(listitemindent) > len(listindent):
4453                                 ret.extend(BLOCK.blockin(listname))
4454                                 BLOCK.propset('indent',listitemindent)
4455                                 BLOCK.propset('type',listtype)
4456
4457                         # Closing sublists
4458                         while len(listitemindent) < len(BLOCK.prop('indent')):
4459                                 ret.extend(BLOCK.blockout())
4460
4461                         # O-oh, sublist before list ("\n\n  - foo\n- foo")
4462                         # Fix: close sublist (as mother), open another list
4463                         if BLOCK.block()[-4:] != 'list':
4464                                 ret.extend(BLOCK.blockin(listname))
4465                                 BLOCK.propset('indent',listitemindent)
4466                                 BLOCK.propset('type',listtype)
4467
4468                 #---------------------[ Table ]-----------------------------
4469
4470                 #TODO escape undesired format inside table
4471                 #TODO add pm6 target
4472                 if regex['table'].search(line):
4473
4474                         if not BLOCK.isblock('table'):   # first table line!
4475                                 ret.extend(BLOCK.blockin('table'))
4476                                 BLOCK.tableparser.__init__(line)
4477
4478                         tablerow = TableMaster().parse_row(line)
4479                         BLOCK.tableparser.add_row(tablerow)     # save config
4480
4481                         # Maintain line to unmask and inlines
4482                         # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :(
4483                         # TODO isolate unmask+inlines parsing to use here
4484                         line = string.join(tablerow['cells'], SEPARATOR)
4485
4486                 #---------------------[ Paragraph ]-------------------------
4487
4488                 if not BLOCK.block() and \
4489                    not string.count(line, MASK.tocmask): # new para!
4490                         ret.extend(BLOCK.blockin('para'))
4491
4492
4493                 ############################################################
4494                 ############################################################
4495                 ############################################################
4496
4497
4498                 #---------------------[ Final Parses ]----------------------
4499
4500                 # The target-specific special char escapes for body lines
4501                 line = doEscape(target,line)
4502
4503                 line = add_inline_tags(line)
4504                 line = MASK.undo(line)
4505
4506
4507                 #---------------------[ Hold or Return? ]-------------------
4508
4509                 ### Now we must choose where to put the parsed line
4510                 #
4511                 if not results_box:
4512                         # List item extra lines
4513                         if BLOCK.block()[-4:] == 'list':
4514                                 results_box = BLOCK.holdaddsub
4515                         # Other blocks
4516                         elif BLOCK.block():
4517                                 results_box = BLOCK.holdadd
4518                         # No blocks
4519                         else:
4520                                 line = doFinalEscape(target, line)
4521                                 results_box = ret.append
4522
4523                 results_box(line)
4524
4525         # EOF: close any open para/verb/lists/table/quotes
4526         Debug('EOF',7)
4527         while BLOCK.block():
4528                 ret.extend(BLOCK.blockout())
4529
4530         # Maybe close some opened title area?
4531         if rules['titleblocks']:
4532                 ret.extend(TITLE.close_all())
4533
4534         # Maybe a major tag to enclose body? (like DIV for CSS)
4535         if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen'])
4536         if TAGS['bodyClose']: ret.append(TAGS['bodyClose'])
4537
4538         if CONF['toc-only']: ret = []
4539         marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
4540
4541         # If dump-source, all parsing is ignored
4542         if CONF['dump-source']: ret = dump_source[:]
4543
4544         return ret, marked_toc
4545
4546
4547
4548 ##############################################################################
4549 ################################### GUI ######################################
4550 ##############################################################################
4551 #
4552 # Tk help: http://python.org/topics/tkinter/
4553 #    Tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html
4554 #          /usr/lib/python*/lib-tk/Tkinter.py
4555 #
4556 # grid table : row=0, column=0, columnspan=2, rowspan=2
4557 # grid align : sticky='n,s,e,w' (North, South, East, West)
4558 # pack place : side='top,bottom,right,left'
4559 # pack fill  : fill='x,y,both,none', expand=1
4560 # pack align : anchor='n,s,e,w' (North, South, East, West)
4561 # padding    : padx=10, pady=10, ipadx=10, ipady=10 (internal)
4562 # checkbox   : offvalue is return if the _user_ deselected the box
4563 # label align: justify=left,right,center
4564
4565 def load_GUI_resources():
4566         "Load all extra modules and methods used by GUI"
4567         global askopenfilename, showinfo, showwarning, showerror, Tkinter
4568         from tkFileDialog import askopenfilename
4569         from tkMessageBox import showinfo,showwarning,showerror
4570         import Tkinter
4571
4572 class Gui:
4573         "Graphical Tk Interface"
4574         def __init__(self, conf={}):
4575                 self.root = Tkinter.Tk()    # mother window, come to butthead
4576                 self.root.title(my_name)    # window title bar text
4577                 self.window = self.root     # variable "focus" for inclusion
4578                 self.row = 0                # row count for grid()
4579
4580                 self.action_length = 150    # left column length (pixel)
4581                 self.frame_margin  = 10     # frame margin size  (pixel)
4582                 self.frame_border  = 6      # frame border size  (pixel)
4583
4584                 # The default Gui colors, can be changed by %!guicolors
4585                 self.dft_gui_colors = ['#6c6','white','#cf9','#030']
4586                 self.gui_colors = []
4587                 self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
4588
4589                 # On Tk, vars need to be set/get using setvar()/get()
4590                 self.infile  = self.setvar('')
4591                 self.target  = self.setvar('')
4592                 self.target_name = self.setvar('')
4593
4594                 # The checks appearance order
4595                 self.checks  = [
4596                   'headers','enum-title','toc','mask-email',
4597                   'toc-only','stdout']
4598
4599                 # Creating variables for all checks
4600                 for check in self.checks:
4601                         setattr(self, 'f_'+check, self.setvar(''))
4602
4603                 # Load RC config
4604                 self.conf = {}
4605                 if conf: self.load_config(conf)
4606
4607         def load_config(self, conf):
4608                 self.conf = conf
4609                 self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
4610                 self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
4611                 self.root.config(bd=15,bg=self.bg1)
4612
4613         ### Config as dic for python 1.5 compat (**opts don't work :( )
4614         def entry(self, **opts): return Tkinter.Entry(self.window, opts)
4615         def label(self, txt='', bg=None, **opts):
4616                 opts.update({'text':txt,'bg':bg or self.bg1})
4617                 return Tkinter.Label(self.window, opts)
4618         def button(self,name,cmd,**opts):
4619                 opts.update({'text':name,'command':cmd})
4620                 return Tkinter.Button(self.window, opts)
4621         def check(self,name,checked=0,**opts):
4622                 bg, fg = self.bg2, self.fg2
4623                 opts.update({
4624                   'text':name, 'onvalue':1, 'offvalue':0,
4625                   'activeforeground':fg, 'fg':fg,
4626                   'activebackground':bg, 'bg':bg,
4627                   'highlightbackground':bg, 'anchor':'w'
4628                 })
4629                 chk = Tkinter.Checkbutton(self.window, opts)
4630                 if checked: chk.select()
4631                 chk.grid(columnspan=2, sticky='w', padx=0)
4632         def menu(self,sel,items):
4633                 return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
4634
4635         # Handy auxiliary functions
4636         def action(self, txt):
4637                 self.label(txt, fg=self.fg1, bg=self.bg1,
4638                      wraplength=self.action_length).grid(column=0,row=self.row)
4639         def frame_open(self):
4640                 self.window = Tkinter.Frame(self.root,bg=self.bg2,
4641                      borderwidth=self.frame_border)
4642         def frame_close(self):
4643                 self.window.grid(column=1, row=self.row, sticky='w',
4644                      padx=self.frame_margin)
4645                 self.window = self.root
4646                 self.label('').grid()
4647                 self.row = self.row + 2   # update row count
4648         def target_name2key(self):
4649                 name = self.target_name.get()
4650                 target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
4651                 try   : key = target[0]
4652                 except: key = ''
4653                 self.target = self.setvar(key)
4654         def target_key2name(self):
4655                 key = self.target.get()
4656                 name = TARGET_NAMES.get(key) or key
4657                 self.target_name = self.setvar(name)
4658
4659         def exit(self): self.root.destroy()
4660         def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
4661
4662         def askfile(self):
4663                 ftypes= [(_('txt2tags files'),('*.t2t','*.txt')),
4664                          (_('All files'),'*')]
4665                 newfile = askopenfilename(filetypes=ftypes)
4666                 if newfile:
4667                         self.infile.set(newfile)
4668                         newconf = process_source_file(newfile)[0]
4669                         newconf = ConfigMaster().sanity(newconf, gui=1)
4670                         # Restate all checkboxes after file selection
4671                         #TODO how to make a refresh without killing it?
4672                         self.root.destroy()
4673                         self.__init__(newconf)
4674                         self.mainwindow()
4675
4676         def scrollwindow(self, txt='no text!', title=''):
4677                 # Create components
4678                 win    = Tkinter.Toplevel() ; win.title(title)
4679                 frame  = Tkinter.Frame(win)
4680                 scroll = Tkinter.Scrollbar(frame)
4681                 text   = Tkinter.Text(frame,yscrollcommand=scroll.set)
4682                 button = Tkinter.Button(win)
4683                 # Config
4684                 text.insert(Tkinter.END, string.join(txt,'\n'))
4685                 scroll.config(command=text.yview)
4686                 button.config(text=_('Close'), command=win.destroy)
4687                 button.focus_set()
4688                 # Packing
4689                 text.pack(side='left', fill='both', expand=1)
4690                 scroll.pack(side='right', fill='y')
4691                 frame.pack(fill='both', expand=1)
4692                 button.pack(ipadx=30)
4693
4694         def runprogram(self):
4695                 global CMDLINE_RAW
4696                 # Prepare
4697                 self.target_name2key()
4698                 infile, target = self.infile.get(), self.target.get()
4699                 # Sanity
4700                 if not target:
4701                         showwarning(my_name,_("You must select a target type!"))
4702                         return
4703                 if not infile:
4704                         showwarning(my_name,
4705                            _("You must provide the source file location!"))
4706                         return
4707                 # Compose cmdline
4708                 guiflags = []
4709                 real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
4710                 if real_cmdline_conf.has_key('infile'):
4711                         del real_cmdline_conf['infile']
4712                 if real_cmdline_conf.has_key('target'):
4713                         del real_cmdline_conf['target']
4714                 real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
4715                 default_outfile = ConfigMaster().get_outfile_name(
4716                      {'sourcefile':infile, 'outfile':'', 'target':target})
4717                 for opt in self.checks:
4718                         val = int(getattr(self, 'f_%s'%opt).get() or "0")
4719                         if opt == 'stdout': opt = 'outfile'
4720                         on_config  = self.conf.get(opt) or 0
4721                         on_cmdline = real_cmdline_conf.get(opt) or 0
4722                         if opt == 'outfile':
4723                                 if on_config  == STDOUT: on_config = 1
4724                                 else: on_config = 0
4725                                 if on_cmdline == STDOUT: on_cmdline = 1
4726                                 else: on_cmdline = 0
4727                         if val != on_config or (
4728                           val == on_config == on_cmdline and
4729                           real_cmdline_conf.has_key(opt)):
4730                                 if val:
4731                                         # Was not set, but user selected on GUI
4732                                         Debug("user turned  ON: %s"%opt)
4733                                         if opt == 'outfile': opt = '-o-'
4734                                         else: opt = '--%s'%opt
4735                                 else:
4736                                         # Was set, but user deselected on GUI
4737                                         Debug("user turned OFF: %s"%opt)
4738                                         if opt == 'outfile':
4739                                                 opt = "-o%s"%default_outfile
4740                                         else: opt = '--no-%s'%opt
4741                                 guiflags.append(opt)
4742                 cmdline = [my_name, '-t', target] +real_cmdline \
4743                           +guiflags +[infile]
4744                 Debug('Gui/Tk cmdline: %s'%cmdline,5)
4745                 # Run!
4746                 cmdline_raw_orig = CMDLINE_RAW
4747                 try:
4748                         # Fake the GUI cmdline as the real one, and parse file
4749                         CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
4750                         data = process_source_file(infile)
4751                         # On GUI, convert_* returns the data, not finish_him()
4752                         outlist, config = convert_this_files([data])
4753                         # On GUI and STDOUT, finish_him() returns the data
4754                         result = finish_him(outlist, config)
4755                         # Show outlist in s a nice new window
4756                         if result:
4757                                 outlist, config = result
4758                                 title = _('%s: %s converted to %s')%(
4759                                   my_name, os.path.basename(infile),
4760                                   string.upper(config['target']))
4761                                 self.scrollwindow(outlist, title)
4762                         # Show the "file saved" message
4763                         else:
4764                                 msg = "%s\n\n  %s\n%s\n\n  %s\n%s"%(
4765                                       _('Conversion done!'),
4766                                       _('FROM:'), infile,
4767                                       _('TO:'), config['outfile'])
4768                                 showinfo(my_name, msg)
4769                 except error:         # common error (windowed), not quit
4770                         pass
4771                 except:               # fatal error (windowed and printed)
4772                         errormsg = getUnknownErrorMessage()
4773                         print errormsg
4774                         showerror(_('%s FATAL ERROR!')%my_name,errormsg)
4775                         self.exit()
4776                 CMDLINE_RAW = cmdline_raw_orig
4777
4778         def mainwindow(self):
4779                 self.infile.set(self.conf.get('sourcefile') or '')
4780                 self.target.set(self.conf.get('target') or \
4781                               _('-- select one --'))
4782                 outfile = self.conf.get('outfile')
4783                 if outfile == STDOUT:                  # map -o-
4784                         self.conf['stdout'] = 1
4785                 if self.conf.get('headers') == None:
4786                         self.conf['headers'] = 1       # map default
4787
4788                 action1 = _("Enter the source file location:")
4789                 action2 = _("Choose the target document type:")
4790                 action3 = _("Some options you may check:")
4791                 action4 = _("Some extra options:")
4792                 checks_txt = {
4793                   'headers'   : _("Include headers on output"),
4794                   'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
4795                   'toc'       : _("Do TOC also (Table of Contents)"),
4796                   'mask-email': _("Hide e-mails from SPAM robots"),
4797
4798                   'toc-only'  : _("Just do TOC, nothing more"),
4799                   'stdout'    : _("Dump to screen (Don't save target file)")
4800                 }
4801                 targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
4802
4803                 # Header
4804                 self.label("%s %s"%(string.upper(my_name), my_version),
4805                      bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
4806                 self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
4807                      bg=self.bg1, fg=self.fg1).grid(columnspan=2)
4808                 self.row = 2
4809                 # Choose input file
4810                 self.action(action1) ; self.frame_open()
4811                 e_infile = self.entry(textvariable=self.infile,width=25)
4812                 e_infile.grid(row=self.row, column=0, sticky='e')
4813                 if not self.infile.get(): e_infile.focus_set()
4814                 self.button(_("Browse"), self.askfile).grid(
4815                     row=self.row, column=1, sticky='w', padx=10)
4816                 # Show outfile name, style and encoding (if any)
4817                 txt = ''
4818                 if outfile:
4819                         txt = outfile
4820                         if outfile == STDOUT: txt = _('<screen>')
4821                         l_output = self.label(_('Output: ')+txt,
4822                                         fg=self.fg2,bg=self.bg2)
4823                         l_output.grid(columnspan=2, sticky='w')
4824                 for setting in ['style','encoding']:
4825                         if self.conf.get(setting):
4826                                 name = string.capitalize(setting)
4827                                 val  = self.conf[setting]
4828                                 self.label('%s: %s'%(name, val),
4829                                      fg=self.fg2, bg=self.bg2).grid(
4830                                      columnspan=2, sticky='w')
4831                 # Choose target
4832                 self.frame_close() ; self.action(action2)
4833                 self.frame_open()
4834                 self.target_key2name()
4835                 self.menu(self.target_name, targets_menu).grid(
4836                      columnspan=2, sticky='w')
4837                 # Options checkboxes label
4838                 self.frame_close() ; self.action(action3)
4839                 self.frame_open()
4840                 # Compose options check boxes, example:
4841                 # self.check(checks_txt['toc'],1,variable=self.f_toc)
4842                 for check in self.checks:
4843                         # Extra options label
4844                         if check == 'toc-only':
4845                                 self.frame_close() ; self.action(action4)
4846                                 self.frame_open()
4847                         txt = checks_txt[check]
4848                         var = getattr(self, 'f_'+check)
4849                         checked = self.conf.get(check)
4850                         self.check(txt,checked,variable=var)
4851                 self.frame_close()
4852                 # Spacer and buttons
4853                 self.label('').grid() ; self.row = self.row + 1
4854                 b_quit = self.button(_("Quit"), self.exit)
4855                 b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
4856                 b_conv = self.button(_("Convert!"), self.runprogram)
4857                 b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
4858                 if self.target.get() and self.infile.get():
4859                         b_conv.focus_set()
4860
4861                 # As documentation told me
4862                 if sys.platform[:3] == 'win':
4863                         self.root.iconify()
4864                         self.root.update()
4865                         self.root.deiconify()
4866
4867                 self.root.mainloop()
4868
4869
4870 ##############################################################################
4871 ##############################################################################
4872
4873 def exec_command_line(user_cmdline=[]):
4874         global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, Error
4875
4876         # Extract command line data
4877         cmdline_data = user_cmdline or sys.argv[1:]
4878         CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1)
4879         cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
4880         DEBUG   = cmdline_parsed.get('debug'  ) or 0
4881         VERBOSE = cmdline_parsed.get('verbose') or 0
4882         QUIET   = cmdline_parsed.get('quiet'  ) or 0
4883         GUI     = cmdline_parsed.get('gui'    ) or 0
4884         infiles = cmdline_parsed.get('infile' ) or []
4885
4886         Message(_("Txt2tags %s processing begins")%my_version,1)
4887
4888         # The easy ones
4889         if cmdline_parsed.get('help'   ): Quit(USAGE)
4890         if cmdline_parsed.get('version'): Quit(VERSIONSTR)
4891
4892         # Multifile haters
4893         if len(infiles) > 1:
4894                 errmsg=_("Option --%s can't be used with multiple input files")
4895                 for option in NO_MULTI_INPUT:
4896                         if cmdline_parsed.get(option):
4897                                 Error(errmsg%option)
4898
4899         Debug("system platform: %s"%sys.platform)
4900         Debug("python version: %s"%(string.split(sys.version,'(')[0]))
4901         Debug("line break char: %s"%repr(LB))
4902         Debug("command line: %s"%sys.argv)
4903         Debug("command line raw config: %s"%CMDLINE_RAW,1)
4904
4905         # Extract RC file config
4906         if cmdline_parsed.get('rc') == 0:
4907                 Message(_("Ignoring user configuration file"),1)
4908         else:
4909                 rc_file = get_rc_path()
4910                 if os.path.isfile(rc_file):
4911                         Message(_("Loading user configuration file"),1)
4912                         RC_RAW = ConfigLines(file=rc_file).get_raw_config()
4913
4914                 Debug("rc file: %s"%rc_file)
4915                 Debug("rc file raw config: %s"%RC_RAW,1)
4916
4917         # Get all infiles config (if any)
4918         infiles_config = get_infiles_config(infiles)
4919
4920         # Is GUI available?
4921         # Try to load and start GUI interface for --gui
4922         # If program was called with no arguments, try GUI also
4923         if GUI or not infiles:
4924                 try:
4925                         load_GUI_resources()
4926                         Debug("GUI resources OK (Tk module is installed)")
4927                         winbox = Gui()
4928                         Debug("GUI display OK")
4929                         GUI = 1
4930                 except:
4931                         Debug("GUI Error: no Tk module or no DISPLAY")
4932                         GUI = 0
4933
4934         # User forced --gui, but it's not available
4935         if cmdline_parsed.get('gui') and not GUI:
4936                 print getTraceback(); print
4937                 Error("Sorry, I can't run my Graphical Interface - GUI\n"
4938                       "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
4939                       "- Make sure you are in a graphical environment (like X)")
4940
4941         # Okay, we will use GUI
4942         if GUI:
4943                 Message(_("We are on GUI interface"),1)
4944
4945                 # Redefine Error function to raise exception instead sys.exit()
4946                 def Error(msg):
4947                         showerror(_('txt2tags ERROR!'), msg)
4948                         raise error
4949
4950                 # If no input file, get RC+cmdline config, else full config
4951                 if not infiles:
4952                         gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
4953                 else:
4954                         try   : gui_conf = infiles_config[0][0]
4955                         except: gui_conf = {}
4956
4957                 # Sanity is needed to set outfile and other things
4958                 gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
4959                 Debug("GUI config: %s"%gui_conf,5)
4960
4961                 # Insert config and populate the nice window!
4962                 winbox.load_config(gui_conf)
4963                 winbox.mainwindow()
4964
4965         # Console mode rocks forever!
4966         else:
4967                 Message(_("We are on Command Line interface"),1)
4968
4969                 # Called with no arguments, show error
4970                 if not infiles: Error(_('Missing input file (try --help)'))
4971
4972                 convert_this_files(infiles_config)
4973
4974         Message(_("Txt2tags finished sucessfuly"),1)
4975
4976 if __name__ == '__main__':
4977         try:
4978                 exec_command_line()
4979         except error, msg:
4980                 sys.stderr.write("%s\n"%msg)
4981                 sys.stderr.flush()
4982                 sys.exit(1)
4983         except SystemExit:
4984                 pass
4985         except:
4986                 sys.stderr.write(getUnknownErrorMessage())
4987                 sys.stderr.flush()
4988                 sys.exit(1)
4989         Quit()
4990
4991 # The End.