2 # txt2tags - generic text conversion tool
3 # http://txt2tags.sf.net
5 # Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Aurelio Jargas
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, version 2.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You have received a copy of the GNU General Public License along
17 # with this program, on the COPYING file.
19 ########################################################################
21 # BORING CODE EXPLANATION AHEAD
23 # Just read it if you wish to understand how the txt2tags code works.
25 ########################################################################
27 # The code that [1] parses the marked text is separated from the
28 # code that [2] insert the target tags.
30 # [1] made by: def convert()
31 # [2] made by: class BlockMaster
33 # The structures of the marked text are identified and its contents are
34 # extracted into a data holder (Python lists and dictionaries).
36 # When parsing the source file, the blocks (para, lists, quote, table)
37 # are opened with BlockMaster, right when found. Then its contents,
38 # which spans on several lines, are feeded into a special holder on the
39 # BlockMaster instance. Just when the block is closed, the target tags
40 # are inserted for the full block as a whole, in one pass. This way, we
41 # have a better control on blocks. Much better than the previous line by
44 # In other words, whenever inside a block, the parser *holds* the tag
45 # insertion process, waiting until the full block is read. That was
46 # needed primary to close paragraphs for the XHTML target, but
47 # proved to be a very good adding, improving many other processing.
49 # -------------------------------------------------------------------
51 # These important classes are all documented:
52 # CommandLine, SourceDocument, ConfigMaster, ConfigLines.
54 # There is a RAW Config format and all kind of configuration is first
55 # converted to this format. Then a generic method parses it.
57 # These functions get information about the input file(s) and take
58 # care of the init processing:
59 # get_infiles_config(), process_source_file() and convert_this_files()
61 ########################################################################
63 #XXX Python coding warning
64 # Avoid common mistakes:
65 # - do NOT use newlist=list instead newlist=list[:]
66 # - do NOT use newdic=dic instead newdic=dic.copy()
67 # - do NOT use dic[key] instead dic.get(key)
68 # - do NOT use del dic[key] without has_key() before
70 #XXX Smart Image Align don't work if the image is a link
71 # Can't fix that because the image is expanded together with the
72 # link, at the linkbank filling moment. Only the image is passed
73 # to parse_images(), not the full line, so it is always 'middle'.
75 #XXX Paragraph separation not valid inside Quote
76 # Quote will not have <p></p> inside, instead will close and open
77 # again the <blockquote>. This really sux in CSS, when defining a
78 # different background color. Still don't know how to fix it.
81 # New mark or macro which expands to an anchor full title.
82 # It is necessary to parse the full document in this order:
83 # DONE 1st scan: HEAD: get all settings, including %!includeconf
84 # DONE 2nd scan: BODY: expand includes & apply %!preproc
85 # 3rd scan: BODY: read titles and compose TOC info
86 # 4th scan: BODY: full parsing, expanding [#anchor] 1st
87 # Steps 2 and 3 can be made together, with no tag adding.
88 # Two complete body scans will be *slow*, don't know if it worths.
89 # One solution may be add the titles as postproc rules
92 ##############################################################################
94 # User config (1=ON, 0=OFF)
96 USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
97 COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
98 BG_LIGHT = 0 # your terminal background color is light (default is 0)
99 HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
101 ##############################################################################
104 # These are all the core Python modules used by txt2tags (KISS!)
105 import re, string, os, sys, time, getopt
107 # Program information
108 my_url = 'http://txt2tags.sf.net'
110 my_email = 'verde@aurelio.net'
113 # i18n - just use if available
117 # If your locale dir is different, change it here
118 cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
125 # FLAGS : the conversion related flags , may be used in %!options
126 # OPTIONS : the conversion related options, may be used in %!options
127 # ACTIONS : the other behavior modifiers, valid on command line only
128 # MACROS : the valid macros with their default values for formatting
129 # SETTINGS: global miscellaneous settings, valid on RC file only
130 # NO_TARGET: actions that don't require a target specification
131 # NO_MULTI_INPUT: actions that don't accept more than one input file
132 # CONFIG_KEYWORDS: the valid %!key:val keywords
134 # FLAGS and OPTIONS are configs that affect the converted document.
135 # They usually have also a --no-<option> to turn them OFF.
137 # ACTIONS are needed because when doing multiple input files, strange
138 # behavior would be found, as use command line interface for the
139 # first file and gui for the second. There is no --no-<action>.
140 # --version and --help inside %!options are also odd
142 TARGETS = 'html xhtml sgml tex lout man mgp wiki gwiki doku moin pm6 txt'.split()
144 FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
145 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
146 'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 ,
148 OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
149 'infile' :'', 'outfile' :'', 'encoding' :'',
150 'config-file':'', 'split' :0 , 'lang' :'',
151 'show-config-value':'' }
152 ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
153 'verbose' :0 , 'debug' :0 , 'dump-config':0 ,
155 MACROS = {'date' : '%Y%m%d', 'infile': '%f',
156 'mtime': '%Y%m%d', 'outfile': '%f'}
157 SETTINGS = {} # for future use
158 NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source']
159 NO_MULTI_INPUT = ['gui','dump-config','dump-source']
161 'target', 'encoding', 'style', 'options', 'preproc','postproc',
165 'html' : _('HTML page'),
166 'xhtml': _('XHTML page'),
167 'sgml' : _('SGML document'),
168 'tex' : _('LaTeX document'),
169 'lout' : _('Lout document'),
170 'man' : _('UNIX Manual page'),
171 'mgp' : _('MagicPoint presentation'),
172 'wiki' : _('Wikipedia page'),
173 'gwiki': _('Google Wiki page'),
174 'doku' : _('DokuWiki page'),
175 'moin' : _('MoinMoin page'),
176 'pm6' : _('PageMaker document'),
177 'txt' : _('Plain Text'),
180 DEBUG = 0 # do not edit here, please use --debug
181 VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
182 QUIET = 0 # do not edit here, please use --quiet
183 GUI = 0 # do not edit here, please use --gui
184 AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc
198 MODULEIN = MODULEOUT = '-module-'
201 LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
202 LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
204 # Platform specific settings
205 LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
207 VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
209 USAGE = string.join([
211 _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
213 _(" -t, --target=TYPE set target document type. currently supported:"),
214 ' %s,' % string.join(TARGETS[:8], ', '),
215 ' %s' % string.join(TARGETS[8:], ', '),
216 _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
217 _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
218 _(" -H, --no-headers suppress header, title and footer contents"),
219 _(" --headers show header, title and footer contents (default ON)"),
220 _(" --encoding=ENC set target file encoding (utf-8, iso-8859-1, etc)"),
221 _(" --style=FILE use FILE as the document style (like HTML CSS)"),
222 _(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"),
223 _(" --css-inside insert CSS file contents inside HTML/XHTML headers"),
224 _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
225 _(" --toc add TOC (Table of Contents) to target document"),
226 _(" --toc-only print document TOC and exit"),
227 _(" --toc-level=N set maximum TOC level (depth) to N"),
228 _(" -n, --enum-title enumerate all titles as 1, 1.1, 1.1.1, etc"),
229 _(" -C, --config-file=F read config from file F"),
230 _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
231 _(" --gui invoke Graphical Tk Interface"),
232 _(" -q, --quiet quiet mode, suppress all output (except errors)"),
233 _(" -v, --verbose print informative messages during conversion"),
234 _(" -h, --help print this help information and exit"),
235 _(" -V, --version print program version and exit"),
236 _(" --dump-config print all the config found and exit"),
237 _(" --dump-source print the document source, with includes expanded"),
239 _("Turn OFF options:"),
240 " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
241 " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
242 " --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config",
245 _("Example:\n %s -t html --toc myfile.t2t") % my_name,
247 _("By default, converted output is saved to 'infile.<target>'."),
248 _("Use --outfile to force an output file name."),
249 _("If input file is '-', reads from STDIN."),
250 _("If output file is '-', dumps output to STDOUT."),
252 'http://txt2tags.sourceforge.net',
257 ##############################################################################
260 # Here is all the target's templates
261 # You may edit them to fit your needs
262 # - the %(HEADERn)s strings represent the Header lines
263 # - the %(STYLE)s string is changed by --style contents
264 # - the %(ENCODING)s string is changed by --encoding contents
265 # - if any of the above is empty, the full line is removed
266 # - use %% to represent a literal %
276 <!doctype linuxdoc system>
284 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
287 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
288 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
289 <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
290 <TITLE>%(HEADER1)s</TITLE>
291 </HEAD><BODY BGCOLOR="white" TEXT="black">
292 <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
294 <I>%(HEADER2)s</I><BR>
300 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
303 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
304 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
305 <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
306 <TITLE>%(HEADER1)s</TITLE>
310 <DIV CLASS="header" ID="header">
319 encoding="%(ENCODING)s"
321 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
322 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
323 <html xmlns="http://www.w3.org/1999/xhtml">
325 <title>%(HEADER1)s</title>
326 <meta name="generator" content="http://txt2tags.sf.net" />
327 <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
329 <body bgcolor="white" text="black">
339 encoding="%(ENCODING)s"
341 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
342 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
343 <html xmlns="http://www.w3.org/1999/xhtml">
345 <title>%(HEADER1)s</title>
346 <meta name="generator" content="http://txt2tags.sf.net" />
347 <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
351 <div class="header" id="header">
359 .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
364 <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
366 <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
367 <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
368 <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
369 <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
370 <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
371 <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
372 <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
373 ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
374 <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
375 ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
376 <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
377 ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
378 <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
379 ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
380 ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
381 ><@Title4=<@-PARENT "Title3">
382 ><@Title5=<@-PARENT "Title3">
383 ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
391 #!/usr/X11R6/bin/mgp -t 90
392 %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
393 %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
394 %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
395 %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
396 %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
398 %%default 2 size 8, fore "yellow", font "normal-b", center
399 %%default 3 size 5, fore "white", font "normal", left, prefix " "
400 %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
401 %%tab 2 prefix " ", icon arc "orange" 40, leftfill
402 %%tab 3 prefix " ", icon arc "brown" 40, leftfill
403 %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
404 %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
405 %%%%------------------------- end of headers -----------------------------
412 %%size 10, center, fore "yellow"
415 %%font "normal-i", size 6, fore "white", center
418 %%font "mono", size 7, center
440 ===== %(HEADER1)s =====
457 r"""\documentclass{article}
458 \usepackage{graphicx}
459 \usepackage[normalem]{ulem} %% needed by strike
460 \usepackage[urlcolor=blue,colorlinks=true]{hyperref}
461 \usepackage[%(ENCODING)s]{inputenc} %% char encoding
462 \usepackage{%(STYLE)s} %% user defined
475 @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ...
476 @PageOrientation { Portrait } # Portrait, Landscape
477 @ColumnNumber { 1 } # Number of columns (2, 3, ...)
478 @PageHeaders { Simple } # None, Simple, Titles, NoTitles
479 @InitialLanguage { English } # German, French, Portuguese, ...
480 @OptimizePages { Yes } # Yes/No smart page break feature
483 @Display @Heading { %(HEADER1)s }
484 @Display @I { %(HEADER2)s }
485 @Display { %(HEADER3)s }
486 #@NP # Break page after Headers
488 # @SysInclude { tbl } # Tables support
489 # setup: @MakeContents { Yes } # show TOC
490 # setup: @SectionGap # break page at each section
494 ##############################################################################
498 "Returns all the known tags for the specified target"
506 title1Open title1Close
507 title2Open title2Close
508 title3Open title3Close
509 title4Open title4Close
510 title5Open title5Close
511 blocktitle1Open blocktitle1Close
512 blocktitle2Open blocktitle2Close
513 blocktitle3Open blocktitle3Close
515 paragraphOpen paragraphClose
516 blockVerbOpen blockVerbClose
517 blockQuoteOpen blockQuoteClose blockQuoteLine
518 blockCommentOpen blockCommentClose
520 fontMonoOpen fontMonoClose
521 fontBoldOpen fontBoldClose
522 fontItalicOpen fontItalicClose
523 fontUnderlineOpen fontUnderlineClose
524 fontStrikeOpen fontStrikeClose
527 listItemOpen listItemClose listItemLine
528 numlistOpen numlistClose
529 numlistItemOpen numlistItemClose numlistItemLine
530 deflistOpen deflistClose
531 deflistItem1Open deflistItem1Close
532 deflistItem2Open deflistItem2Close deflistItem2LinePrefix
537 img imgAlignLeft imgAlignRight imgAlignCenter
538 _imgAlignLeft _imgAlignRight _imgAlignCenter
541 _tableBorder _tableAlignLeft _tableAlignCenter
542 tableRowOpen tableRowClose tableRowSep
543 tableTitleRowOpen tableTitleRowClose
544 tableCellOpen tableCellClose tableCellSep
545 tableTitleCellOpen tableTitleCellClose tableTitleCellSep
546 _tableColAlignLeft _tableColAlignRight _tableColAlignCenter
547 _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter
548 _tableCellColSpan tableColAlignSep
559 # TIP: \a represents the current text on the mark
560 # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
567 'title3' : '\t\t\a' ,
568 'title4' : '\t\t\t\a' ,
569 'title5' : '\t\t\t\t\a',
570 'blockQuoteLine' : '\t' ,
571 'listItemOpen' : '- ' ,
572 'numlistItemOpen' : '\a. ' ,
575 'urlMark' : '\a (\a)' ,
577 'emailMark' : '\a (\a)' ,
582 'paragraphOpen' : '<P>' ,
583 'paragraphClose' : '</P>' ,
584 'title1' : '~A~<H1>\a</H1>' ,
585 'title2' : '~A~<H2>\a</H2>' ,
586 'title3' : '~A~<H3>\a</H3>' ,
587 'title4' : '~A~<H4>\a</H4>' ,
588 'title5' : '~A~<H5>\a</H5>' ,
589 'anchor' : '<A NAME="\a"></A>\n',
590 'blockVerbOpen' : '<PRE>' ,
591 'blockVerbClose' : '</PRE>' ,
592 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
593 'blockQuoteClose' : '</BLOCKQUOTE>' ,
594 'fontMonoOpen' : '<CODE>' ,
595 'fontMonoClose' : '</CODE>' ,
596 'fontBoldOpen' : '<B>' ,
597 'fontBoldClose' : '</B>' ,
598 'fontItalicOpen' : '<I>' ,
599 'fontItalicClose' : '</I>' ,
600 'fontUnderlineOpen' : '<U>' ,
601 'fontUnderlineClose' : '</U>' ,
602 'fontStrikeOpen' : '<S>' ,
603 'fontStrikeClose' : '</S>' ,
604 'listOpen' : '<UL>' ,
605 'listClose' : '</UL>' ,
606 'listItemOpen' : '<LI>' ,
607 'numlistOpen' : '<OL>' ,
608 'numlistClose' : '</OL>' ,
609 'numlistItemOpen' : '<LI>' ,
610 'deflistOpen' : '<DL>' ,
611 'deflistClose' : '</DL>' ,
612 'deflistItem1Open' : '<DT>' ,
613 'deflistItem1Close' : '</DT>' ,
614 'deflistItem2Open' : '<DD>' ,
615 'bar1' : '<HR NOSHADE SIZE=1>' ,
616 'bar2' : '<HR NOSHADE SIZE=5>' ,
617 'url' : '<A HREF="\a">\a</A>' ,
618 'urlMark' : '<A HREF="\a">\a</A>' ,
619 'email' : '<A HREF="mailto:\a">\a</A>' ,
620 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
621 'img' : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
622 '_imgAlignLeft' : ' ALIGN="left"' ,
623 '_imgAlignCenter' : ' ALIGN="middle"',
624 '_imgAlignRight' : ' ALIGN="right"' ,
625 'tableOpen' : '<TABLE~A~~B~ CELLPADDING="4">',
626 'tableClose' : '</TABLE>' ,
627 'tableRowOpen' : '<TR>' ,
628 'tableRowClose' : '</TR>' ,
629 'tableCellOpen' : '<TD~A~~S~>' ,
630 'tableCellClose' : '</TD>' ,
631 'tableTitleCellOpen' : '<TH~S~>' ,
632 'tableTitleCellClose' : '</TH>' ,
633 '_tableBorder' : ' BORDER="1"' ,
634 '_tableAlignCenter' : ' ALIGN="center"',
635 '_tableCellAlignRight' : ' ALIGN="right"' ,
636 '_tableCellAlignCenter': ' ALIGN="center"',
637 '_tableCellColSpan' : ' COLSPAN="\a"' ,
638 'cssOpen' : '<STYLE TYPE="text/css">',
639 'cssClose' : '</STYLE>' ,
640 'comment' : '<!-- \a -->' ,
641 'EOD' : '</BODY></HTML>'
644 #TIP xhtml inherits all HTML definitions (lowercased)
645 #TIP http://www.w3.org/TR/xhtml1/#guidelines
646 #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
648 'listItemClose' : '</li>' ,
649 'numlistItemClose' : '</li>' ,
650 'deflistItem2Close' : '</dd>' ,
651 'bar1' : '<hr class="light" />',
652 'bar2' : '<hr class="heavy" />',
653 'anchor' : '<a id="\a" name="\a"></a>\n',
654 'img' : '<img~A~ src="\a" border="0" alt=""/>',
658 'paragraphOpen' : '<p>' ,
659 'title1' : '<sect>\a~A~<p>' ,
660 'title2' : '<sect1>\a~A~<p>' ,
661 'title3' : '<sect2>\a~A~<p>' ,
662 'title4' : '<sect3>\a~A~<p>' ,
663 'title5' : '<sect4>\a~A~<p>' ,
664 'anchor' : '<label id="\a">' ,
665 'blockVerbOpen' : '<tscreen><verb>' ,
666 'blockVerbClose' : '</verb></tscreen>' ,
667 'blockQuoteOpen' : '<quote>' ,
668 'blockQuoteClose' : '</quote>' ,
669 'fontMonoOpen' : '<tt>' ,
670 'fontMonoClose' : '</tt>' ,
671 'fontBoldOpen' : '<bf>' ,
672 'fontBoldClose' : '</bf>' ,
673 'fontItalicOpen' : '<em>' ,
674 'fontItalicClose' : '</em>' ,
675 'fontUnderlineOpen' : '<bf><em>' ,
676 'fontUnderlineClose' : '</em></bf>' ,
677 'listOpen' : '<itemize>' ,
678 'listClose' : '</itemize>' ,
679 'listItemOpen' : '<item>' ,
680 'numlistOpen' : '<enum>' ,
681 'numlistClose' : '</enum>' ,
682 'numlistItemOpen' : '<item>' ,
683 'deflistOpen' : '<descrip>' ,
684 'deflistClose' : '</descrip>' ,
685 'deflistItem1Open' : '<tag>' ,
686 'deflistItem1Close' : '</tag>' ,
687 'bar1' : '<!-- \a -->' ,
688 'url' : '<htmlurl url="\a" name="\a">' ,
689 'urlMark' : '<htmlurl url="\a" name="\a">' ,
690 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
691 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
692 'img' : '<figure><ph vspace=""><img src="\a">'+\
694 'tableOpen' : '<table><tabular ca="~C~">' ,
695 'tableClose' : '</tabular></table>' ,
696 'tableRowSep' : '<rowsep>' ,
697 'tableCellSep' : '<colsep>' ,
698 '_tableColAlignLeft' : 'l' ,
699 '_tableColAlignRight' : 'r' ,
700 '_tableColAlignCenter': 'c' ,
701 'comment' : '<!-- \a -->' ,
707 'title1' : '\n~A~\section*{\a}' ,
708 'title2' : '~A~\\subsection*{\a}' ,
709 'title3' : '~A~\\subsubsection*{\a}',
710 # title 4/5: DIRTY: para+BF+\\+\n
711 'title4' : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
712 'title5' : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
713 'numtitle1' : '\n~A~\section{\a}' ,
714 'numtitle2' : '~A~\\subsection{\a}' ,
715 'numtitle3' : '~A~\\subsubsection{\a}' ,
716 'anchor' : '\\hypertarget{\a}{}\n' ,
717 'blockVerbOpen' : '\\begin{verbatim}' ,
718 'blockVerbClose' : '\\end{verbatim}' ,
719 'blockQuoteOpen' : '\\begin{quotation}' ,
720 'blockQuoteClose' : '\\end{quotation}' ,
721 'fontMonoOpen' : '\\texttt{' ,
722 'fontMonoClose' : '}' ,
723 'fontBoldOpen' : '\\textbf{' ,
724 'fontBoldClose' : '}' ,
725 'fontItalicOpen' : '\\textit{' ,
726 'fontItalicClose' : '}' ,
727 'fontUnderlineOpen' : '\\underline{' ,
728 'fontUnderlineClose' : '}' ,
729 'fontStrikeOpen' : '\\sout{' ,
730 'fontStrikeClose' : '}' ,
731 'listOpen' : '\\begin{itemize}' ,
732 'listClose' : '\\end{itemize}' ,
733 'listItemOpen' : '\\item ' ,
734 'numlistOpen' : '\\begin{enumerate}' ,
735 'numlistClose' : '\\end{enumerate}' ,
736 'numlistItemOpen' : '\\item ' ,
737 'deflistOpen' : '\\begin{description}',
738 'deflistClose' : '\\end{description}' ,
739 'deflistItem1Open' : '\\item[' ,
740 'deflistItem1Close' : ']' ,
741 'bar1' : '\n\\hrulefill{}\n' ,
742 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
743 'url' : '\\htmladdnormallink{\a}{\a}',
744 'urlMark' : '\\htmladdnormallink{\a}{\a}',
745 'email' : '\\htmladdnormallink{\a}{mailto:\a}',
746 'emailMark' : '\\htmladdnormallink{\a}{mailto:\a}',
747 'img' : '\\includegraphics{\a}',
748 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
749 'tableClose' : '\\end{tabular}\\end{center}',
750 'tableRowOpen' : '\\hline ' ,
751 'tableRowClose' : ' \\\\' ,
752 'tableCellSep' : ' & ' ,
753 '_tableColAlignLeft' : 'l' ,
754 '_tableColAlignRight' : 'r' ,
755 '_tableColAlignCenter': 'c' ,
756 'tableColAlignSep' : '|' ,
758 'TOC' : '\\tableofcontents',
759 'pageBreak' : '\\clearpage',
760 'EOD' : '\\end{document}'
764 'paragraphOpen' : '@LP' ,
765 'blockTitle1Open' : '@BeginSections' ,
766 'blockTitle1Close' : '@EndSections' ,
767 'blockTitle2Open' : ' @BeginSubSections' ,
768 'blockTitle2Close' : ' @EndSubSections' ,
769 'blockTitle3Open' : ' @BeginSubSubSections' ,
770 'blockTitle3Close' : ' @EndSubSubSections' ,
771 'title1Open' : '\n~A~@Section @Title { \a } @Begin',
772 'title1Close' : '@End @Section' ,
773 'title2Open' : '\n~A~ @SubSection @Title { \a } @Begin',
774 'title2Close' : ' @End @SubSection' ,
775 'title3Open' : '\n~A~ @SubSubSection @Title { \a } @Begin',
776 'title3Close' : ' @End @SubSubSection' ,
777 'title4Open' : '\n~A~@LP @LeftDisplay @B { \a }',
778 'title5Open' : '\n~A~@LP @LeftDisplay @B { \a }',
779 'anchor' : '@Tag { \a }\n' ,
780 'blockVerbOpen' : '@LP @ID @F @RawVerbatim @Begin',
781 'blockVerbClose' : '@End @RawVerbatim' ,
782 'blockQuoteOpen' : '@QD {' ,
783 'blockQuoteClose' : '}' ,
784 # enclosed inside {} to deal with joined**words**
785 'fontMonoOpen' : '{@F {' ,
786 'fontMonoClose' : '}}' ,
787 'fontBoldOpen' : '{@B {' ,
788 'fontBoldClose' : '}}' ,
789 'fontItalicOpen' : '{@II {' ,
790 'fontItalicClose' : '}}' ,
791 'fontUnderlineOpen' : '{@Underline{' ,
792 'fontUnderlineClose' : '}}' ,
793 # the full form is more readable, but could be BL EL LI NL TL DTI
794 'listOpen' : '@BulletList' ,
795 'listClose' : '@EndList' ,
796 'listItemOpen' : '@ListItem{' ,
797 'listItemClose' : '}' ,
798 'numlistOpen' : '@NumberedList' ,
799 'numlistClose' : '@EndList' ,
800 'numlistItemOpen' : '@ListItem{' ,
801 'numlistItemClose' : '}' ,
802 'deflistOpen' : '@TaggedList' ,
803 'deflistClose' : '@EndList' ,
804 'deflistItem1Open' : '@DropTagItem {' ,
805 'deflistItem1Close' : '}' ,
806 'deflistItem2Open' : '{' ,
807 'deflistItem2Close' : '}' ,
808 'bar1' : '\n@DP @FullWidthRule\n' ,
809 'url' : '{blue @Colour { \a }}' ,
810 'urlMark' : '\a ({blue @Colour { \a }})' ,
811 'email' : '{blue @Colour { \a }}' ,
812 'emailMark' : '\a ({blue Colour{ \a }})' ,
813 'img' : '~A~@IncludeGraphic { \a }' , # eps only!
814 '_imgAlignLeft' : '@LeftDisplay ' ,
815 '_imgAlignRight' : '@RightDisplay ' ,
816 '_imgAlignCenter' : '@CentredDisplay ' ,
817 # lout tables are *way* complicated, no support for now
818 #'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
819 #'tableClose' : '}' ,
820 #'tableRowOpen' : '@Rowa\n' ,
821 #'tableTitleRowOpen' : '@HeaderRowa' ,
822 #'tableCenterAlign' : '@CentredDisplay ' ,
823 #'tableCellOpen' : '\a {' , # A, B, ...
824 #'tableCellClose' : '}' ,
825 #'_tableBorder' : '\nrule {yes}' ,
827 # @MakeContents must be on the config file
828 'TOC' : '@DP @ContentsGoesHere @DP',
829 'pageBreak' : '\n@NP\n' ,
833 # http://moinmo.in/SyntaxReference
835 'title1' : '= \a =' ,
836 'title2' : '== \a ==' ,
837 'title3' : '=== \a ===' ,
838 'title4' : '==== \a ====' ,
839 'title5' : '===== \a =====',
840 'blockVerbOpen' : '{{{' ,
841 'blockVerbClose' : '}}}' ,
842 'blockQuoteLine' : ' ' ,
843 'fontMonoOpen' : '{{{' ,
844 'fontMonoClose' : '}}}' ,
845 'fontBoldOpen' : "'''" ,
846 'fontBoldClose' : "'''" ,
847 'fontItalicOpen' : "''" ,
848 'fontItalicClose' : "''" ,
849 'fontUnderlineOpen' : '__' ,
850 'fontUnderlineClose' : '__' ,
851 'fontStrikeOpen' : '--(' ,
852 'fontStrikeClose' : ')--' ,
853 'listItemOpen' : ' * ' ,
854 'numlistItemOpen' : ' \a. ' ,
855 'deflistItem1Open' : ' ' ,
856 'deflistItem1Close' : '::' ,
857 'deflistItem2LinePrefix': ' :: ' ,
859 'bar2' : '--------' ,
861 'urlMark' : '[\a \a]' ,
863 'emailMark' : '[\a \a]' ,
865 'tableRowOpen' : '||' ,
866 'tableCellOpen' : '~A~' ,
867 'tableCellClose' : '||' ,
868 'tableTitleCellClose' : '||' ,
869 '_tableCellAlignRight' : '<)>' ,
870 '_tableCellAlignCenter' : '<:>' ,
871 'comment' : '/* \a */' ,
872 'TOC' : '[[TableOfContents]]'
875 # http://code.google.com/p/support/wiki/WikiSyntax
877 'title1' : '= \a =' ,
878 'title2' : '== \a ==' ,
879 'title3' : '=== \a ===' ,
880 'title4' : '==== \a ====' ,
881 'title5' : '===== \a =====',
882 'blockVerbOpen' : '{{{' ,
883 'blockVerbClose' : '}}}' ,
884 'blockQuoteLine' : ' ' ,
885 'fontMonoOpen' : '{{{' ,
886 'fontMonoClose' : '}}}' ,
887 'fontBoldOpen' : '*' ,
888 'fontBoldClose' : '*' ,
889 'fontItalicOpen' : '_' , # underline == italic
890 'fontItalicClose' : '_' ,
891 'fontStrikeOpen' : '~~' ,
892 'fontStrikeClose' : '~~' ,
893 'listItemOpen' : ' * ' ,
894 'numlistItemOpen' : ' # ' ,
896 'urlMark' : '[\a \a]' ,
897 'email' : 'mailto:\a' ,
898 'emailMark' : '[mailto:\a \a]',
900 'tableRowOpen' : '|| ' ,
901 'tableRowClose' : ' ||' ,
902 'tableCellSep' : ' || ' ,
905 # http://wiki.splitbrain.org/wiki:syntax
907 # Hint: You can add footnotes ((This is a footnote))
909 'title1' : '===== \a =====',
910 'title2' : '==== \a ====' ,
911 'title3' : '=== \a ===' ,
912 'title4' : '== \a ==' ,
913 'title5' : '= \a =' ,
914 # DokuWiki uses ' ' identation to mark verb blocks (see indentverbblock)
915 'blockQuoteLine' : '>' ,
916 'fontMonoOpen' : "''" ,
917 'fontMonoClose' : "''" ,
918 'fontBoldOpen' : "**" ,
919 'fontBoldClose' : "**" ,
920 'fontItalicOpen' : "//" ,
921 'fontItalicClose' : "//" ,
922 'fontUnderlineOpen' : "__" ,
923 'fontUnderlineClose' : "__" ,
924 'fontStrikeOpen' : '<del>' ,
925 'fontStrikeClose' : '</del>' ,
926 'listItemOpen' : ' * ' ,
927 'numlistItemOpen' : ' - ' ,
930 'urlMark' : '[[\a|\a]]' ,
932 'emailMark' : '[[\a|\a]]' ,
934 'imgAlignLeft' : '{{\a }}' ,
935 'imgAlignRight' : '{{ \a}}' ,
936 'imgAlignCenter' : '{{ \a }}' ,
937 'tableTitleRowOpen' : '^ ' ,
938 'tableTitleRowClose' : ' ^' ,
939 'tableTitleCellSep' : ' ^ ' ,
940 'tableRowOpen' : '| ' ,
941 'tableRowClose' : ' |' ,
942 'tableCellSep' : ' | ' ,
943 # DokuWiki has no attributes. The content must be aligned!
944 # '_tableCellAlignRight' : '<)>' , # ??
945 # '_tableCellAlignCenter': '<:>' , # ??
946 # DokuWiki colspan is the same as txt2tags' with multiple |||
947 # 'comment' : '## \a' , # ??
951 # http://en.wikipedia.org/wiki/Help:Editing
953 'title1' : '== \a ==' ,
954 'title2' : '=== \a ===' ,
955 'title3' : '==== \a ====' ,
956 'title4' : '===== \a =====' ,
957 'title5' : '====== \a ======',
958 'blockVerbOpen' : '<pre>' ,
959 'blockVerbClose' : '</pre>' ,
960 'blockQuoteOpen' : '<blockquote>' ,
961 'blockQuoteClose' : '</blockquote>' ,
962 'fontMonoOpen' : '<tt>' ,
963 'fontMonoClose' : '</tt>' ,
964 'fontBoldOpen' : "'''" ,
965 'fontBoldClose' : "'''" ,
966 'fontItalicOpen' : "''" ,
967 'fontItalicClose' : "''" ,
968 'fontUnderlineOpen' : '<u>' ,
969 'fontUnderlineClose' : '</u>' ,
970 'fontStrikeOpen' : '<s>' ,
971 'fontStrikeClose' : '</s>' ,
972 #XXX Mixed lists not working: *#* list inside numlist inside list
973 'listItemLine' : '*' ,
974 'numlistItemLine' : '#' ,
975 'deflistItem1Open' : '; ' ,
976 'deflistItem2LinePrefix': ': ' ,
979 'urlMark' : '[\a \a]' ,
980 'email' : 'mailto:\a' ,
981 'emailMark' : '[mailto:\a \a]' ,
982 # [[Image:foo.png|right|Optional alt/caption text]] (right, left, center, none)
983 'img' : '[[Image:\a~A~]]' ,
984 '_imgAlignLeft' : '|left' ,
985 '_imgAlignCenter' : '|center' ,
986 '_imgAlignRight' : '|right' ,
987 # {| border="1" cellspacing="0" cellpadding="4" align="center"
988 'tableOpen' : '{|~A~~B~ cellpadding="4"',
989 'tableClose' : '|}' ,
990 'tableRowOpen' : '|-\n| ' ,
991 'tableTitleRowOpen' : '|-\n! ' ,
992 'tableCellSep' : ' || ' ,
993 'tableTitleCellSep' : ' !! ' ,
994 '_tableBorder' : ' border="1"' ,
995 '_tableAlignCenter' : ' align="center"' ,
996 'comment' : '<!-- \a -->' ,
1000 # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX
1001 # http://en.wikipedia.org/wiki/MagicPoint
1003 'paragraphOpen' : '%font "normal", size 5' ,
1004 'title1' : '%page\n\n\a\n' ,
1005 'title2' : '%page\n\n\a\n' ,
1006 'title3' : '%page\n\n\a\n' ,
1007 'title4' : '%page\n\n\a\n' ,
1008 'title5' : '%page\n\n\a\n' ,
1009 'blockVerbOpen' : '%font "mono"' ,
1010 'blockVerbClose' : '%font "normal"' ,
1011 'blockQuoteOpen' : '%prefix " "' ,
1012 'blockQuoteClose' : '%prefix " "' ,
1013 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
1014 'fontMonoClose' : '\n%cont, font "normal"\n' ,
1015 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
1016 'fontBoldClose' : '\n%cont, font "normal"\n' ,
1017 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
1018 'fontItalicClose' : '\n%cont, font "normal"\n' ,
1019 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
1020 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
1021 'listItemLine' : '\t' ,
1022 'numlistItemLine' : '\t' ,
1023 'numlistItemOpen' : '\a. ' ,
1024 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
1025 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
1026 'bar1' : '%bar "white" 5' ,
1028 'url' : '\n%cont, fore "cyan"\n\a' +\
1029 '\n%cont, fore "white"\n' ,
1030 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
1031 '\n%cont, fore "white"\n' ,
1032 'email' : '\n%cont, fore "cyan"\n\a' +\
1033 '\n%cont, fore "white"\n' ,
1034 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
1035 '\n%cont, fore "white"\n' ,
1036 'img' : '~A~\n%newimage "\a"\n%left\n',
1037 '_imgAlignLeft' : '\n%left' ,
1038 '_imgAlignRight' : '\n%right' ,
1039 '_imgAlignCenter' : '\n%center' ,
1040 'comment' : '%% \a' ,
1041 'pageBreak' : '%page\n\n\n' ,
1045 # man groff_man ; man 7 groff
1047 'paragraphOpen' : '.P' ,
1048 'title1' : '.SH \a' ,
1049 'title2' : '.SS \a' ,
1050 'title3' : '.SS \a' ,
1051 'title4' : '.SS \a' ,
1052 'title5' : '.SS \a' ,
1053 'blockVerbOpen' : '.nf' ,
1054 'blockVerbClose' : '.fi\n' ,
1055 'blockQuoteOpen' : '.RS' ,
1056 'blockQuoteClose' : '.RE' ,
1057 'fontBoldOpen' : '\\fB' ,
1058 'fontBoldClose' : '\\fR' ,
1059 'fontItalicOpen' : '\\fI' ,
1060 'fontItalicClose' : '\\fR' ,
1061 'listOpen' : '.RS' ,
1062 'listItemOpen' : '.IP \(bu 3\n',
1063 'listClose' : '.RE' ,
1064 'numlistOpen' : '.RS' ,
1065 'numlistItemOpen' : '.IP \a. 3\n',
1066 'numlistClose' : '.RE' ,
1067 'deflistItem1Open' : '.TP\n' ,
1070 'urlMark' : '\a (\a)',
1072 'emailMark' : '\a (\a)',
1074 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
1075 'tableClose' : '.TE' ,
1076 'tableRowOpen' : ' ' ,
1077 'tableCellSep' : '^' ,
1078 '_tableAlignCenter' : 'center, ',
1079 '_tableBorder' : 'allbox, ',
1080 '_tableColAlignLeft' : 'l' ,
1081 '_tableColAlignRight' : 'r' ,
1082 '_tableColAlignCenter': 'c' ,
1083 'comment' : '.\\" \a'
1087 'paragraphOpen' : '<@Normal:>' ,
1088 'title1' : '\n<@Title1:>\a',
1089 'title2' : '\n<@Title2:>\a',
1090 'title3' : '\n<@Title3:>\a',
1091 'title4' : '\n<@Title4:>\a',
1092 'title5' : '\n<@Title5:>\a',
1093 'blockVerbOpen' : '<@PreFormat:>' ,
1094 'blockQuoteLine' : '<@Quote:>' ,
1095 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
1096 'fontMonoClose' : '<SIZE$><FONT$>',
1097 'fontBoldOpen' : '<B>' ,
1098 'fontBoldClose' : '<P>' ,
1099 'fontItalicOpen' : '<I>' ,
1100 'fontItalicClose' : '<P>' ,
1101 'fontUnderlineOpen' : '<U>' ,
1102 'fontUnderlineClose' : '<P>' ,
1103 'listOpen' : '<@Bullet:>' ,
1104 'listItemOpen' : '\x95\t' , # \x95 == ~U
1105 'numlistOpen' : '<@Bullet:>' ,
1106 'numlistItemOpen' : '\x95\t' ,
1108 'url' : '<U>\a<P>' , # underline
1109 'urlMark' : '\a <U>\a<P>' ,
1111 'emailMark' : '\a \a' ,
1116 # Exceptions for --css-sugar
1117 if config['css-sugar'] and config['target'] in ('html','xhtml'):
1118 # Change just HTML because XHTML inherits it
1119 htmltags = alltags['html']
1120 # Table with no cellpadding
1121 htmltags['tableOpen'] = string.replace(
1122 htmltags['tableOpen'], ' CELLPADDING="4"', '')
1124 htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
1125 htmltags['tocClose'] = '</DIV>'
1126 htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
1127 htmltags['bodyClose']= '</DIV>'
1129 # Make the HTML -> XHTML inheritance
1130 xhtml = alltags['html'].copy()
1131 for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
1132 # Some like HTML tags as lowercase, some don't... (headers out)
1133 if HTML_LOWER: alltags['html'] = xhtml.copy()
1134 xhtml.update(alltags['xhtml'])
1135 alltags['xhtml'] = xhtml.copy()
1137 # Compose the target tags dictionary
1139 target_tags = alltags[config['target']].copy()
1141 for key in keys: tags[key] = '' # create empty keys
1142 for key in target_tags.keys():
1143 tags[key] = maskEscapeChar(target_tags[key]) # populate
1145 # Map strong line to separator if not defined
1146 if not tags['bar2'] and tags['bar1']:
1147 tags['bar2'] = tags['bar1']
1152 ##############################################################################
1155 def getRules(config):
1156 "Returns all the target-specific syntax rules"
1161 # target rules (ON/OFF)
1162 'linkable', # target supports external links
1163 'tableable', # target supports tables
1164 'imglinkable', # target supports images as links
1165 'imgalignable', # target supports image alignment
1166 'imgasdefterm', # target supports image as definition term
1167 'autonumberlist', # target supports numbered lists natively
1168 'autonumbertitle', # target supports numbered titles natively
1169 'stylable', # target supports external style files
1170 'parainsidelist', # lists items supports paragraph
1171 'spacedlistitem', # lists support blank lines between items
1172 'listnotnested', # lists cannot be nested
1173 'quotenotnested', # quotes cannot be nested
1174 'verbblocknotescaped', # don't escape specials in verb block
1175 'verbblockfinalescape', # do final escapes in verb block
1176 'escapeurl', # escape special in link URL
1177 'onelinepara', # dump paragraph as a single long line
1178 'tabletitlerowinbold', # manually bold any cell on table titles
1179 'tablecellstrip', # strip extra spaces from each table cell
1180 'tablecellspannable', # the table cells can have span attribute
1181 'barinsidequote', # bars are allowed inside quote blocks
1182 'finalescapetitle', # perform final escapes on title lines
1183 'autotocnewpagebefore', # break page before automatic TOC
1184 'autotocnewpageafter', # break page after automatic TOC
1185 'autotocwithbars', # automatic TOC surrounded by bars
1186 'mapbar2pagebreak', # map the strong bar to a page break
1187 'titleblocks', # titles must be on open/close section blocks
1189 # Target code beautify (ON/OFF)
1190 'indentverbblock', # add leading spaces to verb block lines
1191 'breaktablecell', # break lines after any table cell
1192 'breaktablelineopen', # break line after opening table line
1193 'breaktitleopen', # break line after any title
1194 'notbreaklistopen', # don't break line after opening a new list
1195 'notbreakparaopen', # don't break line after opening a new para
1196 'keepquoteindent', # don't remove the leading TABs on quotes
1197 'keeplistindent', # don't remove the leading spaces on lists
1198 'blankendmotherlist', # append a blank line at the mother list end
1199 'blankendtable', # append a blank line at the table end
1200 'blankendautotoc', # append a blank line at the auto TOC end
1201 'tagnotindentable', # tags must be placed at the line begining
1202 'spacedlistitemopen', # append a space after the list item open tag
1203 'spacednumlistitemopen',# append a space after the numlist item open tag
1204 'deflisttextstrip', # strip the contents of the deflist text
1207 'listmaxdepth', # maximum depth for lists
1208 'quotemaxdepth', # maximum depth for quotes
1209 'tablecellaligntype', # type of table cell align: cell, column
1214 'indentverbblock':1,
1219 'autotocwithbars':1,
1220 'blankendmotherlist':1,
1223 'indentverbblock':1,
1233 'blankendmotherlist':1,
1238 'breaktablelineopen':1,
1240 'keepquoteindent':1,
1242 'autotocwithbars':1,
1243 'tablecellspannable':1,
1244 'tablecellaligntype':'cell',
1246 #TIP xhtml inherits all HTML rules
1254 'blankendmotherlist':1,
1258 'blankendautotoc':1,
1261 'keepquoteindent':1,
1263 'finalescapetitle':1,
1264 'tablecellaligntype':'column',
1267 'blankendmotherlist':1,
1268 'tagnotindentable':1,
1271 'autotocnewpagebefore':1,
1277 'autonumbertitle':1,
1279 'blankendmotherlist':1,
1282 'tabletitlerowinbold':1,
1284 'verbblocknotescaped':1,
1286 'listmaxdepth':4, # deflist is 6
1289 'finalescapetitle':1,
1290 'autotocnewpageafter':1,
1291 'mapbar2pagebreak':1,
1292 'tablecellaligntype':'column',
1295 'keepquoteindent':1,
1297 'deflisttextstrip':1,
1299 'verbblocknotescaped':1,
1301 'mapbar2pagebreak':1,
1304 'notbreakparaopen':1,
1309 'blankendmotherlist':1,
1314 'tabletitlerowinbold':1,
1316 'autotocwithbars':1,
1317 'tablecellaligntype':'cell',
1318 'deflisttextstrip':1,
1323 'blankendmotherlist':1,
1326 'tabletitlerowinbold':1,
1332 'indentverbblock':1, # DokuWiki uses ' ' to mark verb blocks
1335 'blankendmotherlist':1,
1341 'autotocwithbars':1,
1344 'tablecellaligntype':'cell',
1348 'blankendmotherlist':1,
1352 'autotocwithbars':1,
1353 'spacedlistitemopen':1,
1354 'spacednumlistitemopen':1,
1355 'deflisttextstrip':1,
1361 'indentverbblock':1,
1362 'blankendmotherlist':1,
1363 'tagnotindentable':1,
1365 'tablecellaligntype':'column',
1366 'tabletitlerowinbold':1,
1374 'verbblockfinalescape':1,
1375 #TODO add support for these - maybe set a JOINNEXT char and
1376 # do it on addLineBreaks()
1377 'notbreaklistopen':1,
1378 'notbreakparaopen':1,
1380 'autotocwithbars':1,
1385 # Exceptions for --css-sugar
1386 if config['css-sugar'] and config['target'] in ('html','xhtml'):
1387 rules_bank['html']['indentverbblock'] = 0
1388 rules_bank['html']['autotocwithbars'] = 0
1390 # Get the target specific rules
1391 if config['target'] == 'xhtml':
1392 myrules = rules_bank['html'].copy() # inheritance
1393 myrules.update(rules_bank['xhtml']) # get XHTML specific
1395 myrules = rules_bank[config['target']].copy()
1397 # Populate return dictionary
1398 for key in allrules: ret[key] = 0 # reset all
1399 ret.update(myrules) # get rules
1404 ##############################################################################
1408 "Returns all the regexes used to find the t2t marks"
1412 re.compile(r'^```\s*$'),
1414 re.compile(r'^```\s*$'),
1416 re.compile(r'^"""\s*$'),
1418 re.compile(r'^"""\s*$'),
1420 re.compile(r'^%%%\s*$'),
1421 'blockCommentClose':
1422 re.compile(r'^%%%\s*$'),
1424 re.compile(r'^\t+'),
1426 re.compile(r'^``` (?=.)'),
1428 re.compile(r'^""" (?=.)'),
1429 # mono, raw, bold, italic, underline:
1430 # - marks must be glued with the contents, no boundary spaces
1431 # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
1433 re.compile( r'``([^\s](|.*?[^\s])`*)``'),
1435 re.compile( r'""([^\s](|.*?[^\s])"*)""'),
1437 re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
1439 re.compile( r'//([^\s](|.*?[^\s])/*)//'),
1441 re.compile( r'__([^\s](|.*?[^\s])_*)__'),
1443 re.compile( r'--([^\s](|.*?[^\s])-*)--'),
1445 re.compile(r'^( *)(-) (?=[^ ])'),
1447 re.compile(r'^( *)(\+) (?=[^ ])'),
1449 re.compile(r'^( *)(:) (.*)$'),
1451 re.compile(r'^( *)([-+:])\s*$'),
1453 re.compile(r'^(\s*)([_=-]{20,})\s*$'),
1455 re.compile(r'^ *\|\|? '),
1457 re.compile(r'^\s*$'),
1461 # Auxiliary tag regexes
1462 '_imgAlign' : re.compile(r'~A~', re.I),
1463 '_tableAlign' : re.compile(r'~A~', re.I),
1464 '_anchor' : re.compile(r'~A~', re.I),
1465 '_tableBorder' : re.compile(r'~B~', re.I),
1466 '_tableColAlign' : re.compile(r'~C~', re.I),
1467 '_tableCellColSpan': re.compile(r'~S~', re.I),
1468 '_tableCellAlign' : re.compile(r'~A~', re.I),
1471 # Special char to place data on TAGs contents (\a == bell)
1472 bank['x'] = re.compile('\a')
1474 # %%macroname [ (formatting) ]
1475 bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
1476 string.join(MACROS.keys(), '|')), re.I)
1478 # %%TOC special macro for TOC positioning
1479 bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
1481 # Almost complicated title regexes ;)
1482 titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
1483 bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
1484 bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
1486 ### Complicated regexes begin here ;)
1488 # Textual descriptions on --help's style: [...] is optional, | is OR
1491 ### First, some auxiliary variables
1495 patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
1498 # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
1499 # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@
1500 # Recomended order: scheme://user:pass@domain/path?query=foo#anchor
1501 # Also works : scheme://user:pass@domain/path#anchor?query=foo
1504 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
1505 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
1506 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
1507 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com
1508 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D
1509 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
1510 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is)
1514 # username [ :password ] @
1515 patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
1517 # [ http:// ] [ username:password@ ] domain.com [ / ]
1518 # [ #anchor | ?form=data ]
1519 retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]*)?'%(
1520 urlskel['proto'],patt_url_login, urlskel['guess'],
1521 urlskel['chars'],urlskel['form'],urlskel['anchor'])
1523 # filename | [ filename ] #anchor
1524 retxt_url_local = r'[%s]+|[%s]*(#[%s]*)'%(
1525 urlskel['chars'],urlskel['chars'],urlskel['anchor'])
1527 # user@domain [ ?form=data ]
1528 patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
1529 urlskel['login'],urlskel['form'])
1531 # Saving for future use
1532 bank['_urlskel'] = urlskel
1534 ### And now the real regexes
1537 bank['email'] = re.compile(patt_email,re.I)
1540 bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
1542 # \[ label | imagetag url | email | filename \]
1543 bank['linkmark'] = re.compile(
1544 r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
1545 patt_img, retxt_url, patt_email, retxt_url_local),
1549 bank['img'] = re.compile(patt_img, re.L+re.I)
1552 bank['special'] = re.compile(r'^%!\s*')
1554 ### END OF regex nightmares
1557 ##############################################################################
1559 class error(Exception):
1561 def echo(msg): # for quick debug
1562 print '\033[32;1m%s\033[m'%msg
1567 msg = _("%s: Error: ")%my_name + msg
1571 from traceback import format_exception
1572 etype, value, tb = sys.exc_info()
1573 return string.join(format_exception(etype, value, tb), '')
1575 def getUnknownErrorMessage():
1576 msg = '%s\n%s (%s):\n\n%s'%(
1577 _('Sorry! Txt2tags aborted by an unknown error.'),
1578 _('Please send the following Error Traceback to the author'),
1579 my_email, getTraceback())
1581 def Message(msg,level):
1582 if level <= VERBOSE and not QUIET:
1584 print "%s %s"%(prefix*level, msg)
1585 def Debug(msg,id=0,linenr=None):
1586 "Show debug messages, categorized (colored or not)"
1587 if QUIET or not DEBUG: return
1588 if int(id) not in range(8): id = 0
1589 # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light
1590 ids = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET']
1591 colors_bgdark = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1']
1592 colors_bglight = ['0' ,'1' ,'3' ,'6' ,'4' ,'5' ,'2' ,'0' ]
1593 if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
1595 if BG_LIGHT: color = colors_bglight[id]
1596 else : color = colors_bgdark[id]
1597 msg = '\033[3%sm%s\033[m'%(color,msg)
1598 print "++ %s: %s"%(ids[id],msg)
1599 def Readfile(file, remove_linebreaks=0, ignore_error=0):
1602 try: data = sys.stdin.readlines()
1604 if not ignore_error:
1605 Error(_('You must feed me with data on STDIN!'))
1607 try: f = open(file); data = f.readlines() ; f.close()
1609 if not ignore_error:
1610 Error(_("Cannot read file:")+" %s"%file)
1611 if remove_linebreaks:
1612 data = map(lambda x:re.sub('[\n\r]+$','',x), data)
1613 Message(_("File read (%d lines): %s")%(len(data),file),2)
1615 def Savefile(file, contents):
1616 try: f = open(file, 'wb')
1617 except: Error(_("Cannot open file for writing:")+" %s"%file)
1618 if type(contents) == type([]): doit = f.writelines
1619 else: doit = f.write
1620 doit(contents) ; f.close()
1623 for k in dic.keys(): print "%15s : %s" % (k,dic[k])
1624 def dotted_spaces(txt=''):
1625 return string.replace(txt,' ','.')
1627 # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
1629 "Return the full path for the users' RC file"
1630 # Try to get the path from an env var. if yes, we're done
1631 user_defined = os.environ.get('T2TCONFIG')
1632 if user_defined: return user_defined
1633 # Env var not found, so perform automatic path composing
1634 # Set default filename according system platform
1635 rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'}
1636 rc_file = rc_names.get(sys.platform[:3]) or rc_names['default']
1637 # The file must be on the user directory, but where is this dir?
1638 rc_dir_search = ['HOME', 'HOMEPATH']
1639 for var in rc_dir_search:
1640 rc_dir = os.environ.get(var)
1642 # rc dir found, now we must join dir+file to compose the full path
1644 # Compose path and return it if the file exists
1645 rc_path = os.path.join(rc_dir, rc_file)
1646 # On windows, prefix with the drive (%homedrive%: 2k/XP/NT)
1647 if sys.platform[:3] == 'win':
1648 rc_drive = os.environ.get('HOMEDRIVE')
1649 rc_path = os.path.join(rc_drive,rc_path)
1656 ##############################################################################
1660 Command Line class - Masters command line
1662 This class checks and extract data from the provided command line.
1663 The --long options and flags are taken from the global OPTIONS,
1664 FLAGS and ACTIONS dictionaries. The short options are registered
1665 here, and also their equivalence to the long ones.
1668 _compose_short_opts() -> str
1669 _compose_long_opts() -> list
1670 Compose the valid short and long options list, on the
1673 parse() -> (opts, args)
1674 Call getopt to check and parse the command line.
1675 It expects to receive the command line as a list, and
1676 without the program name (sys.argv[1:]).
1678 get_raw_config() -> [RAW config]
1679 Scans command line and convert the data to the RAW config
1680 format. See ConfigMaster class to the RAW format description.
1681 Optional 'ignore' and 'filter' arguments are used to filter
1682 in or out specified keys.
1684 compose_cmdline(dict) -> [Command line]
1685 Compose a command line list from an already parsed config
1686 dictionary, generated from RAW by ConfigMaster(). Use
1687 this to compose an optimal command line for a group of
1690 The get_raw_config() calls parse(), so the tipical use of this
1693 raw = CommandLine().get_raw_config(sys.argv[1:])
1696 self.all_options = OPTIONS.keys()
1697 self.all_flags = FLAGS.keys()
1698 self.all_actions = ACTIONS.keys()
1700 # short:long options equivalence
1702 'h':'help' , 'V':'version',
1703 'n':'enum-title', 'i':'infile' ,
1704 'H':'no-headers', 'o':'outfile',
1705 'v':'verbose' , 't':'target' ,
1706 'q':'quiet' , 'C':'config-file'
1709 # Compose valid short and long options data for getopt
1710 self.short_opts = self._compose_short_opts()
1711 self.long_opts = self._compose_long_opts()
1713 def _compose_short_opts(self):
1714 "Returns a string like 'hVt:o' with all short options/flags"
1716 for opt in self.short_long.keys():
1717 long = self.short_long[opt]
1718 if long in self.all_options: # is flag or option?
1719 opt = opt+':' # option: have param
1721 #Debug('Valid SHORT options: %s'%ret)
1722 return string.join(ret, '')
1724 def _compose_long_opts(self):
1725 "Returns a list with all the valid long options/flags"
1726 ret = map(lambda x:x+'=', self.all_options) # add =
1727 ret.extend(self.all_flags) # flag ON
1728 ret.extend(self.all_actions) # acts
1729 ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
1730 ret.extend(['no-style','no-encoding']) # turn OFF
1731 ret.extend(['no-outfile','no-infile']) # turn OFF
1732 ret.extend(['no-dump-config', 'no-dump-source']) # turn OFF
1733 #Debug('Valid LONG options: %s'%ret)
1736 def _tokenize(self, cmd_string=''):
1737 "Convert a command line string to a list"
1738 #TODO protect quotes contents -- Don't use it, pass cmdline as list
1739 return string.split(cmd_string)
1741 def parse(self, cmdline=[]):
1742 "Check/Parse a command line list TIP: no program name!"
1743 # Get the valid options
1744 short, long = self.short_opts, self.long_opts
1747 opts, args = getopt.getopt(cmdline, short, long)
1748 except getopt.error, errmsg:
1749 Error(_("%s (try --help)")%errmsg)
1752 def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0):
1753 "Returns the options/arguments found as RAW config"
1754 if not cmdline: return []
1756 # We need lists, not strings
1757 if type(cmdline) in (type(''), type(u'')):
1758 cmdline = self._tokenize(cmdline)
1759 opts, args = self.parse(cmdline[:])
1761 for name,value in opts:
1762 # Remove leading - and --
1763 name = re.sub('^--?', '', name)
1764 # Alias to old misspelled 'suGGar'
1765 if name == 'css-suggar': name = 'css-sugar'
1766 elif name == 'no-css-suggar': name = 'no-css-sugar'
1767 # Translate short opt to long
1768 if len(name) == 1: name = self.short_long.get(name)
1769 # Outfile exception: path relative to PWD
1770 if name == 'outfile' and relative \
1771 and value not in [STDOUT, MODULEOUT]:
1772 value = os.path.abspath(value)
1773 # config-file inclusion, path relative to PWD
1774 if name == 'config-file':
1775 configs = ConfigLines().include_config_file(
1777 # Remove the 'target' item of all configs
1778 configs = map(lambda c: [c[1],c[2]], configs)
1782 ret.append([name, value])
1783 # Get infile, if any
1785 infile = args.pop(0)
1786 ret.append(['infile', infile])
1787 # Apply 'ignore' and 'filter' rules (filter is stronger)
1788 temp = ret[:] ; ret = []
1789 for name,value in temp:
1790 if (not filter and not ignore) or \
1791 (filter and name in filter) or \
1792 (ignore and name not in ignore):
1793 ret.append( ['all', name, value] )
1794 # Add the original command line string as 'realcmdline'
1795 ret.append( ['all', 'realcmdline', cmdline] )
1798 def compose_cmdline(self, conf={}, no_check=0):
1799 "compose a full (and diet) command line from CONF dict"
1800 if not conf: return []
1802 dft_options = OPTIONS.copy()
1804 valid_opts = self.all_options + self.all_flags
1805 use_short = {'no-headers':'H', 'enum-title':'n'}
1806 # Remove useless options
1807 if not no_check and cfg.get('toc-only'):
1808 if cfg.has_key('no-headers'):
1809 del cfg['no-headers']
1810 if cfg.has_key('outfile'):
1811 del cfg['outfile'] # defaults to STDOUT
1812 if cfg.get('target') == 'txt':
1813 del cfg['target'] # already default
1814 args.append('--toc-only') # must be the first
1817 if cfg.has_key('target'):
1818 args.append('-t '+cfg['target'])
1821 for key in cfg.keys():
1822 if key not in valid_opts: continue # may be a %!setting
1823 if key == 'outfile' or key == 'infile': continue # later
1825 if not val: continue
1826 # Default values are useless on cmdline
1827 if val == dft_options.get(key): continue
1829 if key in use_short.keys():
1830 args.append('-'+use_short[key])
1833 if key in self.all_flags: # add --option
1834 args.append('--'+key)
1835 else: # add --option=value
1836 args.append('--%s=%s'%(key,val))
1837 # The outfile using -o
1838 if cfg.has_key('outfile') and \
1839 cfg['outfile'] != dft_options.get('outfile'):
1840 args.append('-o '+cfg['outfile'])
1841 # Place input file(s) always at the end
1842 if cfg.has_key('infile'):
1843 args.append(string.join(cfg['infile'],' '))
1844 # Return as a nice list
1845 Debug("Diet command line: %s"%string.join(args,' '), 1)
1848 ##############################################################################
1850 class SourceDocument:
1852 SourceDocument class - scan document structure, extract data
1854 It knows about full files. It reads a file and identify all
1855 the areas begining (Head,Conf,Body). With this info it can
1856 extract each area contents.
1857 Note: the original line break is removed.
1860 self.arearef - Save Head, Conf, Body init line number
1861 self.areas - Store the area names which are not empty
1862 self.buffer - The full file contents (with NO \\r, \\n)
1865 get() - Access the contents of an Area. Example:
1866 config = SourceDocument(file).get('conf')
1868 split() - Get all the document Areas at once. Example:
1869 head, conf, body = SourceDocument(file).split()
1872 * The document parts are sequential: Head, Conf and Body.
1873 * One ends when the next begins.
1874 * The Conf Area is optional, so a document can have just
1875 Head and Body Areas.
1877 These are the Areas limits:
1878 - Head Area: the first three lines
1879 - Body Area: from the first valid text line to the end
1880 - Conf Area: the comments between Head and Body Areas
1882 Exception: If the first line is blank, this means no
1883 header info, so the Head Area is just the first line.
1885 def __init__(self, filename='', contents=[]):
1886 self.areas = ['head','conf','body']
1888 self.areas_fancy = ''
1889 self.filename = filename
1892 self.scan_file(filename)
1897 "Returns all document parts, splitted into lists."
1898 return self.get('head'), self.get('conf'), self.get('body')
1900 def get(self, areaname):
1901 "Returns head|conf|body contents from self.buffer"
1903 if areaname not in self.areas: return []
1904 if not self.buffer : return []
1907 bufend = len(self.buffer)
1908 if areaname == 'head':
1910 end = self.arearef[1] or self.arearef[2] or bufend
1911 elif areaname == 'conf':
1912 ini = self.arearef[1]
1913 end = self.arearef[2] or bufend
1914 elif areaname == 'body':
1915 ini = self.arearef[2]
1918 Error("Unknown Area name '%s'"%areaname)
1919 lines = self.buffer[ini:end]
1920 # Make sure head will always have 3 lines
1921 while areaname == 'head' and len(lines) < 3:
1925 def scan_file(self, filename):
1926 Debug("source file: %s"%filename)
1927 Message(_("Loading source document"),1)
1928 buf = Readfile(filename, remove_linebreaks=1)
1931 def scan(self, lines):
1932 "Run through source file and identify head/conf/body areas"
1935 Error(_('The input file is empty: %s')%self.filename)
1936 cfg_parser = ConfigLines().parse_line
1937 buf.insert(0, '') # text start at pos 1
1939 if not string.strip(buf[1]): # no header
1940 ref[0] = 0 ; ref[1] = 2
1942 on_comment_block = 0
1943 for i in xrange(ref[1],len(buf)): # find body init:
1944 # Handle comment blocks inside config area
1945 if not on_comment_block \
1946 and rgx['blockCommentOpen'].search(buf[i]):
1947 on_comment_block = 1
1949 if on_comment_block \
1950 and rgx['blockCommentOpen'].search(buf[i]):
1951 on_comment_block = 0
1953 if on_comment_block: continue
1955 if string.strip(buf[i]) and ( # ... not blank and
1956 buf[i][0] != '%' or # ... not comment or
1957 rgx['macros'].match(buf[i]) or # ... %%macro
1958 rgx['toc'].match(buf[i]) or # ... %%toc
1959 cfg_parser(buf[i],'include')[1]): # ... %!include
1961 if ref[1] == ref[2]: ref[1] = 0 # no conf area
1962 for i in 0,1,2: # del !existent
1963 if ref[i] >= len(buf): ref[i] = 0 # title-only
1964 if not ref[i]: self.areas[i] = ''
1965 Debug('Head,Conf,Body start line: %s'%ref)
1966 self.arearef = ref # save results
1968 # Fancyness sample: head conf body (1 4 8)
1969 self.areas_fancy = "%s (%s)"%(
1970 string.join(self.areas),
1971 string.join(map(str, map(lambda x:x or '', ref))))
1972 Message(_("Areas found: %s")%self.areas_fancy, 2)
1974 def get_raw_config(self):
1975 "Handy method to get the CONF area RAW config (if any)"
1976 if not self.areas.count('conf'): return []
1977 Message(_("Scanning source document CONF area"),1)
1979 file=self.filename, lines=self.get('conf'),
1980 first_line=self.arearef[1]).get_raw_config()
1981 Debug("document raw config: %s"%raw, 1)
1984 ##############################################################################
1988 ConfigMaster class - the configuration wizard
1990 This class is the configuration master. It knows how to handle
1991 the RAW and PARSED config format. It also performs the sanity
1992 checking for a given configuration.
1995 self.raw - Stores the config on the RAW format
1996 self.parsed - Stores the config on the PARSED format
1997 self.defaults - Stores the default values for all keys
1998 self.off - Stores the OFF values for all keys
1999 self.multi - List of keys which can have multiple values
2000 self.numeric - List of keys which value must be a number
2001 self.incremental - List of keys which are incremental
2004 The RAW format is a list of lists, being each mother list item
2005 a full configuration entry. Any entry is a 3 item list, on
2006 the following format: [ TARGET, KEY, VALUE ]
2007 Being a list, the order is preserved, so it's easy to use
2008 different kinds of configs, as CONF area and command line,
2009 respecting the precedence.
2010 The special target 'all' is used when no specific target was
2011 defined on the original config.
2014 The PARSED format is a dictionary, with all the 'key : value'
2015 found by reading the RAW config. The self.target contents
2016 matters, so this dictionary only contains the target's
2017 config. The configs of other targets are ignored.
2019 The CommandLine and ConfigLines classes have the get_raw_config()
2020 method which convert the configuration found to the RAW format.
2021 Just feed it to parse() and get a brand-new ready-to-use config
2022 dictionary. Example:
2024 >>> raw = CommandLine().get_raw_config(['-n', '-H'])
2026 [['all', 'enum-title', ''], ['all', 'no-headers', '']]
2027 >>> parsed = ConfigMaster(raw).parse()
2029 {'enum-title': 1, 'headers': 0}
2031 def __init__(self, raw=[], target=''):
2033 self.target = target
2035 self.dft_options = OPTIONS.copy()
2036 self.dft_flags = FLAGS.copy()
2037 self.dft_actions = ACTIONS.copy()
2038 self.dft_settings = SETTINGS.copy()
2039 self.defaults = self._get_defaults()
2040 self.off = self._get_off()
2041 self.incremental = ['verbose']
2042 self.numeric = ['toc-level','split']
2043 self.multi = ['infile', 'preproc', 'postproc',
2046 def _get_defaults(self):
2047 "Get the default values for all config/options/flags"
2049 for kw in CONFIG_KEYWORDS: empty[kw] = ''
2050 empty.update(self.dft_options)
2051 empty.update(self.dft_flags)
2052 empty.update(self.dft_actions)
2053 empty.update(self.dft_settings)
2054 empty['realcmdline'] = '' # internal use only
2055 empty['sourcefile'] = '' # internal use only
2059 "Turns OFF all the config/options/flags"
2061 for key in self.defaults.keys():
2062 kind = type(self.defaults[key])
2065 elif kind == type('') or kind == type(u''):
2067 elif kind == type([]):
2070 Error('ConfigMaster: %s: Unknown type'+key)
2073 def _check_target(self):
2074 "Checks if the target is already defined. If not, do it"
2076 self.target = self.find_value('target')
2078 def get_target_raw(self):
2079 "Returns the raw config for self.target or 'all'"
2081 self._check_target()
2082 for entry in self.raw:
2083 if entry[0] == self.target or entry[0] == 'all':
2087 def add(self, key, val):
2088 "Adds the key:value pair to the config dictionary (if needed)"
2090 if key == 'options':
2091 ignoreme = self.dft_actions.keys() + ['target']
2092 ignoreme.remove('dump-config')
2093 ignoreme.remove('dump-source')
2094 raw_opts = CommandLine().get_raw_config(
2095 val, ignore=ignoreme)
2096 for target, key, val in raw_opts:
2099 # The no- prefix turns OFF this key
2100 if key[:3] == 'no-':
2101 key = key[3:] # remove prefix
2102 val = self.off.get(key) # turn key OFF
2103 # Is this key valid?
2104 if key not in self.defaults.keys():
2105 Debug('Bogus Config %s:%s'%(key,val),1)
2107 # Is this value the default one?
2108 if val == self.defaults.get(key):
2109 # If default value, remove previous key:val
2110 if self.parsed.has_key(key):
2111 del self.parsed[key]
2112 # Nothing more to do
2114 # Flags ON comes empty. we'll add the 1 value now
2116 key in self.dft_flags.keys() or
2117 key in self.dft_actions.keys()):
2119 # Multi value or single?
2120 if key in self.multi:
2121 # First one? start new list
2122 if not self.parsed.has_key(key):
2123 self.parsed[key] = []
2124 self.parsed[key].append(val)
2125 # Incremental value? so let's add it
2126 elif key in self.incremental:
2127 self.parsed[key] = (self.parsed.get(key) or 0) + val
2129 self.parsed[key] = val
2130 fancykey = dotted_spaces("%12s"%key)
2131 Message(_("Added config %s : %s")%(fancykey,val),3)
2133 def get_outfile_name(self, config={}):
2134 "Dirname is the same for {in,out}file"
2135 infile, outfile = config['sourcefile'], config['outfile']
2136 if outfile and outfile not in (STDOUT, MODULEOUT) \
2137 and not os.path.isabs(outfile):
2138 outfile = os.path.join(os.path.dirname(infile), outfile)
2139 if infile == STDIN and not outfile: outfile = STDOUT
2140 if infile == MODULEIN and not outfile: outfile = MODULEOUT
2141 if not outfile and (infile and config.get('target')):
2142 basename = re.sub('\.(txt|t2t)$','',infile)
2143 outfile = "%s.%s"%(basename, config['target'])
2144 Debug(" infile: '%s'"%infile , 1)
2145 Debug("outfile: '%s'"%outfile, 1)
2148 def sanity(self, config, gui=0):
2149 "Basic config sanity checking"
2150 if not config: return {}
2151 target = config.get('target')
2152 # Some actions don't require target specification
2154 for action in NO_TARGET:
2155 if config.get(action):
2158 # On GUI, some checking are skipped
2160 # We *need* a target
2162 Error(_('No target specified (try --help)')+\
2164 _('Maybe trying to convert an old v1.x file?'))
2165 # And of course, an infile also
2166 if not config.get('infile'):
2167 Error(_('Missing input file (try --help)'))
2168 # Is the target valid?
2169 if not TARGETS.count(target):
2170 Error(_("Invalid target '%s' (try --help)")%\
2172 # Ensure all keys are present
2173 empty = self.defaults.copy() ; empty.update(config)
2174 config = empty.copy()
2175 # Check integers options
2176 for key in config.keys():
2177 if key in self.numeric:
2178 try: config[key] = int(config[key])
2179 except: Error(_('--%s value must be a number'
2181 # Check split level value
2182 if config['split'] not in (0,1,2):
2183 Error(_('Option --split must be 0, 1 or 2'))
2184 # --toc-only is stronger than others
2185 if config['toc-only']:
2186 config['headers'] = 0
2190 config['outfile'] = config['outfile'] or STDOUT
2191 # Splitting is disable for now (future: HTML only, no STDOUT)
2194 config['target'] = target
2195 # Set output file name
2196 config['outfile'] = self.get_outfile_name(config)
2198 if config['sourcefile'] == config['outfile'] and \
2199 config['outfile'] not in [STDOUT,MODULEOUT] and not gui:
2200 Error(_("Input and Output files are the same: %s")%(
2205 "Returns the parsed config for the current target"
2206 raw = self.get_target_raw()
2207 for target, key, value in raw:
2208 self.add(key, value)
2209 Message(_("Added the following keys: %s")%string.join(
2210 self.parsed.keys(),', '),2)
2211 return self.parsed.copy()
2213 def find_value(self, key='', target=''):
2214 "Scans ALL raw config to find the desired key"
2216 # Scan and save all values found
2217 for targ, k, val in self.raw:
2218 if k == key and (targ == target or targ == 'all'):
2220 if not ret: return ''
2221 # If not multi value, return only the last found
2222 if key in self.multi: return ret
2223 else : return ret[-1]
2225 ########################################################################
2229 ConfigLines class - the config file data extractor
2231 This class reads and parse the config lines on the %!key:val
2232 format, converting it to RAW config. It deals with user
2233 config file (RC file), source document CONF area and
2234 %!includeconf directives.
2236 Call it passing a file name or feed the desired config lines.
2237 Then just call the get_raw_config() method and wait to
2238 receive the full config data on the RAW format. This method
2239 also follows the possible %!includeconf directives found on
2240 the config lines. Example:
2242 raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
2244 The parse_line() method is also useful to be used alone,
2245 to identify and tokenize a single config line. For example,
2246 to get the %!include command components, on the source
2249 target, key, value = ConfigLines().parse_line(body_line)
2251 def __init__(self, file='', lines=[], first_line=1):
2252 self.file = file or 'NOFILE'
2254 self.first_line = first_line
2256 def load_lines(self):
2257 "Make sure we've loaded the file contents into buffer"
2258 if not self.lines and not self.file:
2259 Error("ConfigLines: No file or lines provided")
2261 self.lines = self.read_config_file(self.file)
2263 def read_config_file(self, filename=''):
2264 "Read a Config File contents, aborting on invalid line"
2265 if not filename: return []
2266 errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
2267 lines = Readfile(filename, remove_linebreaks=1)
2268 # Sanity: try to find invalid config lines
2269 for i in xrange(len(lines)):
2270 line = string.rstrip(lines[i])
2271 if not line: continue # empty
2272 if line[0] != '%': Error(errormsg%(filename,i+1,line))
2275 def include_config_file(self, file=''):
2276 "Perform the %!includeconf action, returning RAW config"
2277 if not file: return []
2278 # Current dir relative to the current file (self.file)
2279 current_dir = os.path.dirname(self.file)
2280 file = os.path.join(current_dir, file)
2281 # Read and parse included config file contents
2282 lines = self.read_config_file(file)
2283 return ConfigLines(file=file, lines=lines).get_raw_config()
2285 def get_raw_config(self):
2286 "Scan buffer and extract all config as RAW (including includes)"
2289 first = self.first_line
2290 for i in xrange(len(self.lines)):
2291 line = self.lines[i]
2292 Message(_("Processing line %03d: %s")%(first+i,line),2)
2293 target, key, val = self.parse_line(line)
2294 if not key: continue # no config on this line
2295 if key == 'includeconf':
2296 err = _('A file cannot include itself (loop!)')
2297 if val == self.file:
2298 Error("%s: %%!includeconf: %s"%(
2300 more_raw = self.include_config_file(val)
2301 ret.extend(more_raw)
2302 Message(_("Finished Config file inclusion: %s"
2305 ret.append([target, key, val])
2306 Message(_("Added %s")%key,3)
2309 def parse_line(self, line='', keyname='', target=''):
2310 "Detects %!key:val config lines and extract data from it"
2311 empty = ['', '', '']
2312 if not line: return empty
2313 no_target = ['target', 'includeconf']
2314 re_name = keyname or '[a-z]+'
2315 re_target = target or '[a-z]*'
2316 # XXX TODO <value>\S.+? requires TWO chars, breaks %!include:a
2317 cfgregex = re.compile("""
2318 ^%%!\s* # leading id with opt spaces
2319 (?P<name>%s)\s* # config name
2320 (\((?P<target>%s)\))? # optional target spec inside ()
2321 \s*:\s* # key:value delimiter with opt spaces
2322 (?P<value>\S.+?) # config value
2323 \s*$ # rstrip() spaces and hit EOL
2324 """%(re_name,re_target), re.I+re.VERBOSE)
2325 prepostregex = re.compile("""
2327 ^( "([^"]*)" # "double quoted" or
2328 | '([^']*)' # 'single quoted' or
2329 | ([^\s]+) # single_word
2331 \s+ # separated by spaces
2334 ( "([^"]*)" # "double quoted" or
2335 | '([^']*)' # 'single quoted' or
2340 guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
2341 match = cfgregex.match(line)
2342 if not match: return empty
2344 name = string.lower(match.group('name') or '')
2345 target = string.lower(match.group('target') or 'all')
2346 value = match.group('value')
2348 # NO target keywords: force all targets
2349 if name in no_target: target = 'all'
2351 # Special config for GUI colors
2352 if name == 'guicolors':
2353 valmatch = guicolors.search(value)
2354 if not valmatch: return empty
2355 value = re.split('\s+', value)
2357 # Special config with two quoted values (%!preproc: "foo" 'bar')
2358 if name == 'preproc' or name == 'postproc':
2359 valmatch = prepostregex.search(value)
2360 if not valmatch: return empty
2361 getval = valmatch.group
2362 patt = getval(2) or getval(3) or getval(4) or ''
2363 repl = getval(6) or getval(7) or getval(8) or ''
2364 value = (patt, repl)
2365 return [target, name, value]
2367 ##############################################################################
2370 "(Un)Protect important structures from escaping and formatting"
2372 self.linkmask = 'vvvLINKvvv'
2373 self.monomask = 'vvvMONOvvv'
2374 self.macromask = 'vvvMACROvvv'
2375 self.rawmask = 'vvvRAWvvv'
2376 self.tocmask = 'vvvTOCvvv'
2377 self.macroman = MacroMaster()
2386 def mask(self, line=''):
2390 while regex['raw'].search(line):
2391 txt = regex['raw'].search(line).group(1)
2392 txt = doEscape(TARGET,txt)
2393 self.rawbank.append(txt)
2394 line = regex['raw'].sub(self.rawmask,line,1)
2396 # Protect pre-formatted font text
2397 while regex['fontMono'].search(line):
2398 txt = regex['fontMono'].search(line).group(1)
2399 txt = doEscape(TARGET,txt)
2400 self.monobank.append(txt)
2401 line = regex['fontMono'].sub(self.monomask,line,1)
2404 while regex['macros'].search(line):
2405 txt = regex['macros'].search(line).group()
2406 self.macrobank.append(txt)
2407 line = regex['macros'].sub(self.macromask,line,1)
2409 # Protect TOC location
2410 while regex['toc'].search(line):
2411 line = regex['toc'].sub(self.tocmask,line)
2414 # Protect URLs and emails
2415 while regex['linkmark'].search(line) or \
2416 regex['link' ].search(line):
2418 # Try to match plain or named links
2419 match_link = regex['link'].search(line)
2420 match_named = regex['linkmark'].search(line)
2422 # Define the current match
2423 if match_link and match_named:
2424 # Both types found, which is the first?
2426 if match_named.start() < match_link.start():
2429 # Just one type found, we're fine
2430 m = match_link or match_named
2432 # Extract link data and apply mask
2433 if m == match_link: # plain link
2436 link_re = regex['link']
2438 link = m.group('link')
2439 label = string.rstrip(m.group('label'))
2440 link_re = regex['linkmark']
2441 line = link_re.sub(self.linkmask,line,1)
2443 # Save link data to the link bank
2444 self.linkbank.append((label, link))
2447 def undo(self, line):
2450 for label,url in self.linkbank:
2451 link = get_tagged_link(label, url)
2452 line = string.replace(line, self.linkmask, link, 1)
2455 for macro in self.macrobank:
2456 macro = self.macroman.expand(macro)
2457 line = string.replace(line, self.macromask, macro, 1)
2460 for mono in self.monobank:
2461 open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
2462 tagged = open+mono+close
2463 line = string.replace(line, self.monomask, tagged, 1)
2466 for raw in self.rawbank:
2467 line = string.replace(line, self.rawmask, raw, 1)
2472 ##############################################################################
2478 self.count = ['',0,0,0,0,0]
2488 self.user_labels = {}
2489 self.anchor_count = 0
2490 self.anchor_prefix = 'toc'
2492 def _open_close_blocks(self):
2493 "Open new title blocks, closing the previous (if any)"
2494 if not rules['titleblocks']: return
2496 last = self.last_level
2499 # Same level, just close the previous
2501 tag = TAGS.get('title%dClose'%last)
2502 if tag: self.tag_hold.append(tag)
2504 # Section -> subsection, more depth
2508 # Open the new block of subsections
2509 tag = TAGS.get('blockTitle%dOpen'%last)
2510 if tag: self.tag_hold.append(tag)
2512 # Jump from title1 to title3 or more
2513 # Fill the gap with an empty section
2515 tag = TAGS.get('title%dOpen'%last)
2516 tag = regex['x'].sub('', tag) # del \a
2517 if tag: self.tag_hold.append(tag)
2519 # Section <- subsection, less depth
2521 # Close the current opened subsection
2522 tag = TAGS.get('title%dClose'%last)
2523 if tag: self.tag_hold.append(tag)
2525 # Close the current opened block of subsections
2526 tag = TAGS.get('blockTitle%dClose'%last)
2527 if tag: self.tag_hold.append(tag)
2531 # Close the previous section of the same level
2532 # The subsections were under it
2534 tag = TAGS.get('title%dClose'%last)
2535 if tag: self.tag_hold.append(tag)
2537 def add(self, line):
2538 "Parses a new title line."
2540 self._set_prop(line)
2541 self._open_close_blocks()
2542 self._set_count_id()
2544 self._save_toc_info()
2546 def close_all(self):
2547 "Closes all opened title blocks"
2549 ret.extend(self.tag_hold)
2551 tag = TAGS.get('title%dClose'%self.level)
2552 if tag: ret.append(tag)
2553 tag = TAGS.get('blockTitle%dClose'%self.level)
2554 if tag: ret.append(tag)
2555 self.level = self.level - 1
2558 def _save_toc_info(self):
2559 "Save TOC info, used by self.dump_marked_toc()"
2560 self.toc.append((self.level, self.count_id,
2561 self.txt , self.label ))
2563 def _set_prop(self, line=''):
2564 "Extract info from original line and set data holders."
2565 # Detect title type (numbered or not)
2566 id = string.lstrip(line)[0]
2567 if id == '=': kind = 'title'
2568 elif id == '+': kind = 'numtitle'
2569 else: Error("Unknown Title ID '%s'"%id)
2571 match = regex[kind].search(line)
2572 level = len(match.group('id'))
2573 txt = string.strip(match.group('txt'))
2574 label = match.group('label')
2576 if CONF['enum-title']: kind = 'numtitle' # force
2577 if rules['titleblocks']:
2578 self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \
2579 TAGS.get('title%dOpen'%level)
2581 self.tag = TAGS.get(kind+`level`) or \
2582 TAGS.get('title'+`level`)
2583 self.last_level = self.level
2589 def _set_count_id(self):
2590 "Compose and save the title count identifier (if needed)."
2592 if self.kind == 'numtitle' and not rules['autonumbertitle']:
2593 # Manually increase title count
2594 self.count[self.level] = self.count[self.level] +1
2595 # Reset sublevels count (if any)
2596 max_levels = len(self.count)
2597 if self.level < max_levels-1:
2598 for i in xrange(self.level+1, max_levels):
2600 # Compose count id from hierarchy
2601 for i in xrange(self.level):
2602 count_id= "%s%d."%(count_id, self.count[i+1])
2603 self.count_id = count_id
2605 def _set_label(self):
2606 "Compose and save title label, used by anchors."
2607 # Remove invalid chars from label set by user
2608 self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
2609 # Generate name as 15 first :alnum: chars
2610 #TODO how to translate safely accented chars to plain?
2611 #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
2612 # 'tocN' label - sequential count, ignoring 'toc-level'
2613 #self.label = self.anchor_prefix + str(len(self.toc)+1)
2615 def _get_tagged_anchor(self):
2616 "Return anchor if user defined a label, or TOC is on."
2619 if CONF['toc'] and self.level <= CONF['toc-level']:
2620 # This count is needed bcos self.toc stores all
2621 # titles, regardless of the 'toc-level' setting,
2622 # so we can't use self.toc length to number anchors
2623 self.anchor_count = self.anchor_count + 1
2624 # Autonumber label (if needed)
2625 label = label or '%s%s'%(
2626 self.anchor_prefix, self.anchor_count)
2627 if label and TAGS['anchor']:
2628 ret = regex['x'].sub(label,TAGS['anchor'])
2631 def _get_full_title_text(self):
2632 "Returns the full title contents, already escaped."
2634 # Insert count_id (if any) before text
2636 ret = '%s %s'%(self.count_id, ret)
2638 ret = doEscape(TARGET, ret)
2639 # Same targets needs final escapes on title lines
2640 # It's here because there is a 'continue' after title
2641 if rules['finalescapetitle']:
2642 ret = doFinalEscape(TARGET, ret)
2646 "Returns the tagged title as a list."
2649 # Maybe some anchoring before?
2650 anchor = self._get_tagged_anchor()
2651 self.tag = regex['_anchor'].sub(anchor, self.tag)
2653 ### Compose & escape title text (TOC uses unescaped)
2654 full_title = self._get_full_title_text()
2656 # Close previous section area
2657 ret.extend(self.tag_hold)
2660 tagged = regex['x'].sub(full_title, self.tag)
2662 if rules['breaktitleopen']:
2663 tagged = tagged + LB
2665 # Adds "underline" on TXT target
2667 ret.append('') # blank line before
2669 # Get the right letter count for UTF
2670 if string.lower(CONF['encoding']) == 'utf-8':
2671 i = len(full_title.decode('utf-8'))
2674 ret.append(regex['x'].sub('='*i, self.tag))
2675 ret.append('') # blank line after
2680 def dump_marked_toc(self, max_level=99):
2681 "Dumps all toc itens as a valid t2t markup list"
2682 #TODO maybe use quote+linebreaks instead lists
2685 for level, count_id, txt, label in self.toc:
2686 if level > max_level: continue # ignore
2688 id_txt = string.lstrip('%s %s'%(count_id, txt))
2689 label = label or self.anchor_prefix+`toc_count`
2690 toc_count = toc_count + 1
2691 # TOC will have links
2693 # TOC is more readable with master topics
2694 # not linked at number. This is a stoled
2695 # idea from Windows .CHM help files
2696 if CONF['enum-title'] and level == 1:
2697 tocitem = '%s+ [""%s"" #%s]'%(
2700 tocitem = '%s- [""%s"" #%s]'%(
2701 indent, id_txt, label)
2702 # No links on TOC, just text
2704 # man don't reformat TOC lines, cool!
2705 if TARGET == 'txt' or TARGET == 'man':
2706 tocitem = '%s""%s""' %(
2709 tocitem = '%s- ""%s""'%(
2715 ##############################################################################
2717 #TODO check all this table mess
2718 # Trata linhas TABLE, com as prop do parse_row
2719 # O metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
2721 def __init__(self, line=''):
2728 prop = self.parse_row(line)
2729 self.border = prop['border']
2730 self.align = prop['align']
2731 self.cellalign = prop['cellalign']
2732 self.cellspan = prop['cellspan']
2734 def _get_open_tag(self):
2735 topen = TAGS['tableOpen']
2736 tborder = TAGS['_tableBorder']
2737 talign = TAGS['_tableAlign'+self.align]
2738 calignsep = TAGS['tableColAlignSep']
2741 # The first line defines if table has border or not
2742 if not self.border: tborder = ''
2743 # Set the columns alignment
2744 if rules['tablecellaligntype'] == 'column':
2745 calign = map(lambda x: TAGS['_tableColAlign%s'%x],
2747 calign = string.join(calign, calignsep)
2748 # Align full table, set border and Column align (if any)
2749 topen = regex['_tableAlign' ].sub(talign , topen)
2750 topen = regex['_tableBorder' ].sub(tborder, topen)
2751 topen = regex['_tableColAlign'].sub(calign , topen)
2752 # Tex table spec, border or not: {|l|c|r|} , {lcr}
2753 if calignsep and not self.border:
2754 # Remove cell align separator
2755 topen = string.replace(topen, calignsep, '')
2758 def _get_cell_align(self, cells):
2762 if string.strip(cell):
2763 if cell[0] == ' ' and cell[-1] == ' ':
2765 elif cell[0] == ' ':
2770 def _get_cell_span(self, cells):
2774 m = re.search('\a(\|+)$', cell)
2775 if m: span = len(m.group(1))+1
2779 def _tag_cells(self, rowdata):
2781 cells = rowdata['cells']
2782 open = TAGS['tableCellOpen']
2783 close = TAGS['tableCellClose']
2784 sep = TAGS['tableCellSep']
2785 calign = map(lambda x: TAGS['_tableCellAlign'+x],
2786 rowdata['cellalign'])
2787 # Populate the span tag
2789 for i in rowdata['cellspan']:
2791 cspan.append(regex['x'].sub(
2792 str(i), TAGS['_tableCellColSpan']))
2796 # Maybe is it a title row?
2797 if rowdata['title']:
2798 open = TAGS['tableTitleCellOpen'] or open
2799 close = TAGS['tableTitleCellClose'] or close
2800 sep = TAGS['tableTitleCellSep'] or sep
2802 # Should we break the line on *each* table cell?
2803 if rules['breaktablecell']: close = close+'\n'
2805 # Cells pre processing
2806 if rules['tablecellstrip']:
2807 cells = map(lambda x: string.strip(x), cells)
2808 if rowdata['title'] and rules['tabletitlerowinbold']:
2809 cells = map(lambda x: enclose_me('fontBold',x), cells)
2811 # Add cell BEGIN/END tags
2814 # Make sure we will pop from some filled lists
2815 # Fixes empty line bug '| |'
2816 this_align = this_span = ''
2817 if calign: this_align = calign.pop(0)
2818 if cspan : this_span = cspan.pop(0)
2819 # Insert cell align into open tag (if cell is alignable)
2820 if rules['tablecellaligntype'] == 'cell':
2821 copen = regex['_tableCellAlign'].sub(
2823 if rules['tablecellspannable']:
2824 copen = regex['_tableCellColSpan'].sub(
2826 row.append(copen + cell + close)
2828 # Maybe there are cell separators?
2829 return string.join(row, sep)
2831 def add_row(self, cells):
2832 self.rows.append(cells)
2834 def parse_row(self, line):
2835 # Default table properties
2836 ret = {'border':0,'title':0,'align':'Left',
2837 'cells':[],'cellalign':[], 'cellspan':[]}
2838 # Detect table align (and remove spaces mark)
2839 if line[0] == ' ': ret['align'] = 'Center'
2840 line = string.lstrip(line)
2842 if line[1] == '|': ret['title'] = 1
2843 # Detect border mark and normalize the EOL
2844 m = re.search(' (\|+) *$', line)
2845 if m: line = line+' ' ; ret['border'] = 1
2846 else: line = line+' | '
2848 line = regex['table'].sub('', line)
2849 # Detect colspan | foo | bar baz |||
2850 line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
2851 # Split cells (the last is fake)
2852 ret['cells'] = string.split(line, ' | ')[:-1]
2854 ret['cellspan'] = self._get_cell_span(ret['cells'])
2856 ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells'])
2858 ret['cellalign'] = self._get_cell_align(ret['cells'])
2860 Debug('Table Prop: %s' % ret, 7)
2864 open = self._get_open_tag()
2866 close = TAGS['tableClose']
2868 rowopen = TAGS['tableRowOpen']
2869 rowclose = TAGS['tableRowClose']
2870 rowsep = TAGS['tableRowSep']
2871 titrowopen = TAGS['tableTitleRowOpen'] or rowopen
2872 titrowclose = TAGS['tableTitleRowClose'] or rowclose
2874 if rules['breaktablelineopen']:
2875 rowopen = rowopen + '\n'
2876 titrowopen = titrowopen + '\n'
2881 rowopen = titrowopen = ''
2883 close = rowopen + close
2885 # Now we tag all the table cells on each row
2886 #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
2888 for cell in rows: tagged_cells.append(self._tag_cells(cell))
2890 # Add row separator tags between lines
2894 #tagged_rows = map(lambda x:x+rowsep, tagged_cells)
2895 for cell in tagged_cells:
2896 tagged_rows.append(cell+rowsep)
2897 # Remove last rowsep, because the table is over
2898 tagged_rows[-1] = string.replace(
2899 tagged_rows[-1], rowsep, '')
2900 # Add row BEGIN/END tags for each line
2902 for rowdata in rows:
2903 if rowdata['title']:
2904 o,c = titrowopen, titrowclose
2906 o,c = rowopen, rowclose
2907 row = tagged_cells.pop(0)
2908 tagged_rows.append(o + row + c)
2910 fulltable = [open] + tagged_rows + [close]
2912 if rules['blankendtable']: fulltable.append('')
2916 ##############################################################################
2920 "TIP: use blockin/out to add/del holders"
2927 self.tableparser = None
2929 'para' :['comment','raw'],
2931 'table' :['comment'],
2935 'quote' :['quote','comment','raw'],
2936 'list' :['list' ,'numlist' ,'deflist','para','verb',
2938 'numlist' :['list' ,'numlist' ,'deflist','para','verb',
2940 'deflist' :['list' ,'numlist' ,'deflist','para','verb',
2943 self.allblocks = self.contains.keys()
2944 # If one found inside another, ignore the marks
2945 self.exclusive = ['comment','verb','raw']
2948 if not self.BLK: return ''
2951 def isblock(self, name=''):
2952 return self.block() == name
2954 def prop(self, key):
2955 if not self.PRP: return ''
2956 return self.PRP[-1].get(key) or ''
2958 def propset(self, key, val):
2959 self.PRP[-1][key] = val
2960 #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
2961 #Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
2964 if not self.HLD: return []
2967 def holdadd(self, line):
2968 if self.block()[-4:] == 'list': line = [line]
2969 self.HLD[-1].append(line)
2970 Debug('HOLD add: %s'%repr(line), 4)
2971 Debug('FULL HOLD: %s'%self.HLD, 4)
2973 def holdaddsub(self, line):
2974 self.HLD[-1][-1].append(line)
2975 Debug('HOLD addsub: %s'%repr(line), 4)
2976 Debug('FULL HOLD: %s'%self.HLD, 4)
2978 def holdextend(self, lines):
2979 if self.block()[-4:] == 'list': lines = [lines]
2980 self.HLD[-1].extend(lines)
2981 Debug('HOLD extend: %s'%repr(lines), 4)
2982 Debug('FULL HOLD: %s'%self.HLD, 4)
2984 def blockin(self, block):
2986 if block not in self.allblocks:
2987 Error("Invalid block '%s'"%block)
2988 # First, let's close other possible open blocks
2989 while self.block() and block not in self.contains[self.block()]:
2990 ret.extend(self.blockout())
2991 # Now we can gladly add this new one
2992 self.BLK.append(block)
2995 if block == 'table': self.tableparser = TableMaster()
2997 self.depth = len(self.BLK)
2998 Debug('block ++ (%s): %s' % (block,self.BLK), 3)
3002 if not self.BLK: Error('No block to pop')
3003 self.last = self.BLK.pop()
3004 result = getattr(self, self.last)()
3005 parsed = self.HLD.pop()
3007 self.depth = len(self.BLK)
3008 if self.last == 'table': del self.tableparser
3009 # Inserting a nested block into mother
3011 if self.last != 'comment': # ignore comment blocks
3012 if self.block()[-4:] == 'list':
3013 self.HLD[-1][-1].append(result)
3015 self.HLD[-1].append(result)
3016 # Reset now. Mother block will have it all
3018 Debug('block -- (%s): %s' % (self.last,self.BLK), 3)
3019 Debug('RELEASED (%s): %s' % (self.last,parsed), 3)
3020 if result: Debug('BLOCK: %s'%result, 6)
3023 def _last_escapes(self, line):
3024 return doFinalEscape(TARGET, line)
3026 def _get_escaped_hold(self):
3028 for line in self.hold():
3029 linetype = type(line)
3030 if linetype == type('') or linetype == type(u''):
3031 ret.append(self._last_escapes(line))
3032 elif linetype == type([]):
3035 Error("BlockMaster: Unknown HOLD item type:"
3039 def _remove_twoblanks(self, lastitem):
3040 if len(lastitem) > 1 and lastitem[-2:] == ['','']:
3041 return lastitem[:-2]
3052 return map(lambda x: doEscape(TARGET, x), lines)
3056 open = TAGS['paragraphOpen']
3057 close = TAGS['paragraphClose']
3058 lines = self._get_escaped_hold()
3059 # Open (or not) paragraph
3060 if not open+close and self.last == 'para':
3061 pass # avoids multiple blank lines
3064 # Pagemaker likes a paragraph as a single long line
3065 if rules['onelinepara']:
3066 result.append(string.join(lines,' '))
3067 # Others are normal :)
3069 result.extend(lines)
3070 result.append(close)
3072 # Very very very very very very very very very UGLY fix
3073 # Needed because <center> can't appear inside <p>
3075 if len(lines) == 1 and \
3076 TARGET in ('html', 'xhtml') and \
3077 re.match('^\s*<center>.*</center>\s*$', lines[0]):
3084 "Verbatim lines are not masked, so there's no need to unmask"
3086 result.append(TAGS['blockVerbOpen'])
3087 for line in self.hold():
3088 if self.prop('mapped') == 'table':
3089 line = MacroMaster().expand(line)
3090 if not rules['verbblocknotescaped']:
3091 line = doEscape(TARGET,line)
3092 if rules['indentverbblock']:
3094 if rules['verbblockfinalescape']:
3095 line = doFinalEscape(TARGET, line)
3097 #TODO maybe use if not TAGS['blockVerbClose']
3099 result.append(TAGS['blockVerbClose'])
3103 # Rewrite all table cells by the unmasked and escaped data
3104 lines = self._get_escaped_hold()
3105 for i in xrange(len(lines)):
3106 cells = string.split(lines[i], SEPARATOR)
3107 self.tableparser.rows[i]['cells'] = cells
3109 return self.tableparser.dump()
3113 myre = regex['quote']
3114 open = TAGS['blockQuoteOpen'] # block based
3115 close = TAGS['blockQuoteClose']
3116 qline = TAGS['blockQuoteLine'] # line based
3117 indent = tagindent = '\t'*self.depth
3118 if rules['tagnotindentable']: tagindent = ''
3119 if not rules['keepquoteindent']: indent = ''
3121 if open: result.append(tagindent+open) # open block
3122 for item in self.hold():
3123 if type(item) == type([]):
3124 result.extend(item) # subquotes
3126 item = myre.sub('', item) # del TABs
3127 if rules['barinsidequote']:
3128 item = get_tagged_bar(item)
3129 item = self._last_escapes(item)
3130 item = qline*self.depth + item
3131 result.append(indent+item) # quote line
3132 if close: result.append(tagindent+close) # close block
3135 def deflist(self): return self.list('deflist')
3136 def numlist(self): return self.list('numlist')
3137 def list(self, name='list'):
3140 indent = self.prop('indent')
3142 listopen = TAGS.get(name+'Open')
3143 listclose = TAGS.get(name+'Close')
3144 listline = TAGS.get(name+'ItemLine')
3146 if rules['tagnotindentable']: tagindent = ''
3147 if not rules['keeplistindent']: indent = tagindent = ''
3149 if name == 'deflist':
3150 itemopen = TAGS[name+'Item1Open']
3151 itemclose = TAGS[name+'Item2Close']
3152 itemsep = TAGS[name+'Item1Close']+\
3153 TAGS[name+'Item2Open']
3155 itemopen = TAGS[name+'ItemOpen']
3156 itemclose = TAGS[name+'ItemClose']
3159 # ItemLine: number of leading chars identifies list depth
3161 itemopen = listline*self.depth + itemopen
3163 # Adds trailing space on opening tags
3164 if (name == 'list' and rules['spacedlistitemopen']) or \
3165 (name == 'numlist' and rules['spacednumlistitemopen']):
3166 itemopen = itemopen + ' '
3168 # Remove two-blanks from list ending mark, to avoid <p>
3169 items[-1] = self._remove_twoblanks(items[-1])
3171 # Open list (not nestable lists are only opened at mother)
3172 if listopen and not \
3173 (rules['listnotnested'] and BLOCK.depth != 1):
3174 result.append(tagindent+listopen)
3176 # Tag each list item (multiline items)
3177 itemopenorig = itemopen
3180 # Add "manual" item count for noautonum targets
3181 itemcount = itemcount + 1
3182 if name == 'numlist' and not rules['autonumberlist']:
3184 itemopen = regex['x'].sub(n, itemopenorig)
3188 item[0] = self._last_escapes(item[0])
3189 if name == 'deflist':
3190 z,term,rest = string.split(item[0],SEPARATOR,2)
3192 if not item[0]: del item[0] # to avoid <p>
3193 result.append(tagindent+itemopen+term+itemsep)
3195 fullitem = tagindent+itemopen
3196 result.append(string.replace(
3197 item[0], SEPARATOR, fullitem))
3200 # Process next lines for this item (if any)
3202 if type(line) == type([]): # sublist inside
3205 line = self._last_escapes(line)
3207 # Blank lines turns to <p>
3208 if not line and rules['parainsidelist']:
3209 line = string.rstrip(indent +\
3210 TAGS['paragraphOpen']+\
3211 TAGS['paragraphClose'])
3213 # Some targets don't like identation here (wiki)
3214 if not rules['keeplistindent'] or (name == 'deflist' and rules['deflisttextstrip']):
3215 line = string.lstrip(line)
3217 # Maybe we have a line prefix to add? (wiki)
3218 if name == 'deflist' and TAGS['deflistItem2LinePrefix']:
3219 line = TAGS['deflistItem2LinePrefix'] + line
3223 # Close item (if needed)
3224 if itemclose: result.append(tagindent+itemclose)
3226 # Close list (not nestable lists are only closed at mother)
3227 if listclose and not \
3228 (rules['listnotnested'] and BLOCK.depth != 1):
3229 result.append(tagindent+listclose)
3231 if rules['blankendmotherlist'] and BLOCK.depth == 1:
3237 ##############################################################################
3241 def __init__(self, config={}):
3243 self.config = config or CONF
3244 self.infile = self.config['sourcefile']
3245 self.outfile = self.config['outfile']
3246 self.currdate = time.localtime(time.time())
3247 self.rgx = regex.get('macros') or getRegexes()['macros']
3248 self.fileinfo = { 'infile': None, 'outfile': None }
3249 self.dft_fmt = MACROS
3251 def walk_file_format(self, fmt):
3252 "Walks the %%{in/out}file format string, expanding the % flags"
3253 i = 0; ret = '' # counter/hold
3254 while i < len(fmt): # char by char
3255 c = fmt[i]; i = i + 1
3256 if c == '%': # hot char!
3257 if i == len(fmt): # % at the end
3260 c = fmt[i]; i = i + 1 # read next
3261 ret = ret + self.expand_file_flag(c)
3263 ret = ret +c # common char
3266 def expand_file_flag(self, flag):
3267 "%f: filename %F: filename (w/o extension)"
3268 "%d: dirname %D: dirname (only parent dir)"
3269 "%p: file path %e: extension"
3270 info = self.fileinfo[self.name] # get dict
3271 if flag == '%': x = '%' # %% -> %
3272 elif flag == 'f': x = info['name']
3273 elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
3274 elif flag == 'd': x = info['dir']
3275 elif flag == 'D': x = os.path.split(info['dir'])[-1]
3276 elif flag == 'p': x = info['path']
3277 elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name']
3279 #TODO simpler way for %e ?
3280 else : x = '%'+flag # false alarm
3283 def set_file_info(self, macroname):
3284 if self.fileinfo.get(macroname): return # already done
3285 file = getattr(self, self.name) # self.infile
3286 if file == STDOUT or file == MODULEOUT:
3287 dir = ''; path = name = file
3289 path = os.path.abspath(file)
3290 dir = os.path.dirname(path)
3291 name = os.path.basename(path)
3292 self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
3294 def expand(self, line=''):
3295 "Expand all macros found on the line"
3296 while self.rgx.search(line):
3297 m = self.rgx.search(line)
3298 name = self.name = string.lower(m.group('name'))
3299 fmt = m.group('fmt') or self.dft_fmt.get(name)
3301 txt = time.strftime(fmt,self.currdate)
3302 elif name == 'mtime':
3303 if self.infile in (STDIN, MODULEIN):
3304 fdate = self.currdate
3306 mtime = os.path.getmtime(self.infile)
3307 fdate = time.localtime(mtime)
3308 txt = time.strftime(fmt,fdate)
3309 elif name == 'infile' or name == 'outfile':
3310 self.set_file_info(name)
3311 txt = self.walk_file_format(fmt)
3313 Error("Unknown macro name '%s'"%name)
3314 line = self.rgx.sub(txt,line,1)
3318 ##############################################################################
3321 def dumpConfig(source_raw, parsed_config):
3322 onoff = {1:_('ON'), 0:_('OFF')}
3324 (_('RC file') , RC_RAW ),
3325 (_('source document'), source_raw ),
3326 (_('command line') , CMDLINE_RAW)
3328 # First show all RAW data found
3329 for label, cfg in data:
3330 print _('RAW config for %s')%label
3331 for target,key,val in cfg:
3332 target = '(%s)'%target
3333 key = dotted_spaces("%-14s"%key)
3334 val = val or _('ON')
3335 print ' %-8s %s: %s'%(target,key,val)
3337 # Then the parsed results of all of them
3338 print _('Full PARSED config')
3339 keys = parsed_config.keys() ; keys.sort() # sorted
3341 val = parsed_config[key]
3342 # Filters are the last
3343 if key == 'preproc' or key == 'postproc':
3346 if key in FLAGS.keys() or key in ACTIONS.keys():
3347 val = onoff.get(val) or val
3349 if type(val) == type([]):
3350 if key == 'options': sep = ' '
3352 val = string.join(val, sep)
3353 print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
3355 print _('Active filters')
3356 for filter in ['preproc','postproc']:
3357 for rule in parsed_config.get(filter) or []:
3358 print "%25s: %s -> %s"%(
3359 dotted_spaces("%-14s"%filter),rule[0],rule[1])
3362 def get_file_body(file):
3363 "Returns all the document BODY lines"
3364 return process_source_file(file, noconf=1)[1][2]
3367 def finish_him(outlist, config):
3368 "Writing output to screen or file"
3369 outfile = config['outfile']
3370 outlist = unmaskEscapeChar(outlist)
3371 outlist = expandLineBreaks(outlist)
3373 # Apply PostProc filters
3374 if config['postproc']:
3375 filters = compile_filters(config['postproc'],
3376 _('Invalid PostProc filter regex'))
3378 errmsg = _('Invalid PostProc filter replacement')
3379 for line in outlist:
3380 for rgx,repl in filters:
3381 try: line = rgx.sub(repl, line)
3382 except: Error("%s: '%s'"%(errmsg, repl))
3383 postoutlist.append(line)
3384 outlist = postoutlist[:]
3386 if outfile == MODULEOUT:
3388 elif outfile == STDOUT:
3390 return outlist, config
3392 for line in outlist: print line
3394 Savefile(outfile, addLineBreaks(outlist))
3395 if not GUI and not QUIET:
3396 print _('%s wrote %s')%(my_name,outfile)
3399 if not QUIET: print "--- html..."
3400 sgml2html = 'sgml2html -s %s -l %s %s'%(
3401 config['split'],config['lang'] or lang,outfile)
3402 if not QUIET: print "Running system command:", sgml2html
3403 os.system(sgml2html)
3406 def toc_inside_body(body, toc, config):
3408 if AUTOTOC: return body # nothing to expand
3409 toc_mark = MaskMaster().tocmask
3410 # Expand toc mark with TOC contents
3412 if string.count(line, toc_mark): # toc mark found
3414 ret.extend(toc) # include if --toc
3416 pass # or remove %%toc line
3418 ret.append(line) # common line
3421 def toc_tagger(toc, config):
3422 "Convert t2t-marked TOC (it is a list) to target-tagged TOC"
3424 # Tag if TOC-only TOC "by hand" (target don't have a TOC tag)
3425 if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
3426 fakeconf = config.copy()
3427 fakeconf['headers'] = 0
3428 fakeconf['toc-only'] = 0
3429 fakeconf['mask-email'] = 0
3430 fakeconf['preproc'] = []
3431 fakeconf['postproc'] = []
3432 fakeconf['css-sugar'] = 0
3433 ret,foo = convert(toc, fakeconf)
3434 set_global_config(config) # restore config
3435 # Target TOC is a tag
3436 elif config['toc'] and TAGS['TOC']:
3440 def toc_formatter(toc, config):
3441 "Formats TOC for automatic placement between headers and body"
3442 if config['toc-only']: return toc # no formatting needed
3443 if not config['toc'] : return [] # TOC disabled
3445 # TOC open/close tags (if any)
3446 if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
3447 if TAGS['tocClose']: ret.append(TAGS['tocClose'])
3448 # Autotoc specific formatting
3450 if rules['autotocwithbars']: # TOC between bars
3451 para = TAGS['paragraphOpen']+TAGS['paragraphClose']
3452 bar = regex['x'].sub('-'*72,TAGS['bar1'])
3453 tocbar = [para, bar, para]
3454 ret = tocbar + ret + tocbar
3455 if rules['blankendautotoc']: # blank line after TOC
3457 if rules['autotocnewpagebefore']: # page break before TOC
3458 ret.insert(0,TAGS['pageBreak'])
3459 if rules['autotocnewpageafter']: # page break after TOC
3460 ret.append(TAGS['pageBreak'])
3464 def doHeader(headers, config):
3465 if not config['headers']: return []
3466 if not headers: headers = ['','','']
3467 target = config['target']
3468 if not HEADER_TEMPLATE.has_key(target):
3469 Error("doheader: Unknow target '%s'"%target)
3471 if target in ('html','xhtml') and config.get('css-sugar'):
3472 template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
3474 template = string.split(HEADER_TEMPLATE[target], '\n')
3476 head_data = {'STYLE':[], 'ENCODING':''}
3477 for key in head_data.keys():
3478 val = config.get(string.lower(key))
3479 # Remove .sty extension from each style filename (freaking tex)
3480 # XXX Can't handle --style foo.sty,bar.sty
3481 if target == 'tex' and key == 'STYLE':
3482 val = map(lambda x:re.sub('(?i)\.sty$','',x), val)
3483 if key == 'ENCODING':
3484 val = get_encoding_string(val, target)
3485 head_data[key] = val
3486 # Parse header contents
3489 contents = MacroMaster(config=config).expand(headers[i])
3490 # Escapes - on tex, just do it if any \tag{} present
3491 if target != 'tex' or \
3492 (target == 'tex' and re.search(r'\\\w+{', contents)):
3493 contents = doEscape(target, contents)
3494 if target == 'lout':
3495 contents = doFinalEscape(target, contents)
3497 head_data['HEADER%d'%(i+1)] = contents
3498 # css-inside removes STYLE line
3499 #XXX In tex, this also removes the modules call (%!style:amsfonts)
3500 if target in ('html','xhtml') and config.get('css-inside') and \
3501 config.get('style'):
3502 head_data['STYLE'] = []
3503 Debug("Header Data: %s"%head_data, 1)
3504 # Scan for empty dictionary keys
3505 # If found, scan template lines for that key reference
3506 # If found, remove the reference
3507 # If there isn't any other key reference on the same line, remove it
3508 #TODO loop by template line > key
3509 for key in head_data.keys():
3510 if head_data.get(key): continue
3511 for line in template:
3512 if string.count(line, '%%(%s)s'%key):
3513 sline = string.replace(line, '%%(%s)s'%key, '')
3514 if not re.search(r'%\([A-Z0-9]+\)s', sline):
3515 template.remove(line)
3516 # Style is a multiple tag.
3517 # - If none or just one, use default template
3518 # - If two or more, insert extra lines in a loop (and remove original)
3519 styles = head_data['STYLE']
3520 if len(styles) == 1:
3521 head_data['STYLE'] = styles[0]
3522 elif len(styles) > 1:
3523 style_mark = '%(STYLE)s'
3524 for i in xrange(len(template)):
3525 if string.count(template[i], style_mark):
3527 template.insert(i+1,
3534 # Populate template with data (dict expansion)
3535 template = string.join(template, '\n') % head_data
3537 # Adding CSS contents into template (for --css-inside)
3538 # This code sux. Dirty++
3539 if target in ('html','xhtml') and config.get('css-inside') and \
3540 config.get('style'):
3541 set_global_config(config) # usually on convert(), needed here
3542 for i in xrange(len(config['style'])):
3543 cssfile = config['style'][i]
3544 if not os.path.isabs(cssfile):
3545 infile = config.get('sourcefile')
3546 cssfile = os.path.join(
3547 os.path.dirname(infile), cssfile)
3549 contents = Readfile(cssfile, 1)
3550 css = "\n%s\n%s\n%s\n%s\n" % (
3551 doCommentLine("Included %s" % cssfile),
3553 string.join(contents, '\n'),
3555 # Style now is content, needs escaping (tex)
3556 #css = maskEscapeChar(css)
3558 errmsg = "CSS include failed for %s" % cssfile
3559 css = "\n%s\n" % (doCommentLine(errmsg))
3560 # Insert this CSS file contents on the template
3561 template = re.sub('(?i)(</HEAD>)', css+r'\1', template)
3562 # template = re.sub(r'(?i)(\\begin{document})',
3563 # css+'\n'+r'\1', template) # tex
3565 # The last blank line to keep everything separated
3566 template = re.sub('(?i)(</HEAD>)', '\n'+r'\1', template)
3568 return string.split(template, '\n')
3570 def doCommentLine(txt):
3571 # The -- string ends a (h|sg|xht)ml comment :(
3572 txt = maskEscapeChar(txt)
3573 if string.count(TAGS['comment'], '--') and \
3574 string.count(txt, '--'):
3575 txt = re.sub('-(?=-)', r'-\\', txt)
3578 return regex['x'].sub(txt, TAGS['comment'])
3581 def doFooter(config):
3582 if not config['headers']: return []
3584 target = config['target']
3585 cmdline = config['realcmdline']
3587 if target == 'tex': typename = 'LaTeX2e'
3588 ppgd = '%s code generated by %s %s (%s)'%(
3589 typename,my_name,my_version,my_url)
3590 cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
3592 ret.append(doCommentLine(ppgd))
3593 ret.append(doCommentLine(cmdline))
3594 ret.append(TAGS['EOD'])
3597 def doEscape(target,txt):
3598 "Target-specific special escapes. Apply *before* insert any tag."
3599 tmpmask = 'vvvvThisEscapingSuxvvvv'
3600 if target in ('html','sgml','xhtml'):
3601 txt = re.sub('&','&',txt)
3602 txt = re.sub('<','<',txt)
3603 txt = re.sub('>','>',txt)
3604 if target == 'sgml':
3605 txt = re.sub('\xff','ÿ',txt) # "+y
3606 elif target == 'pm6':
3607 txt = re.sub('<','<\#60>',txt)
3608 elif target == 'mgp':
3609 txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
3610 elif target == 'man':
3611 txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
3612 txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
3613 elif target == 'lout':
3614 # TIP: / moved to FinalEscape to avoid //italic//
3615 # TIP: these are also converted by lout: ... --- --
3616 txt = string.replace(txt, ESCCHAR, tmpmask) # \
3617 txt = string.replace(txt, '"', '"%s""'%ESCCHAR) # "\""
3618 txt = re.sub('([|&{}@#^~])', '"\\1"',txt) # "@"
3619 txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2)) # "\\"
3620 elif target == 'tex':
3621 # Mark literal \ to be changed to $\backslash$ later
3622 txt = string.replace( txt, ESCCHAR, tmpmask)
3623 txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \%
3624 txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{}
3625 txt = re.sub('([<|>])' , r'$\1$', txt) # $>$
3626 txt = string.replace(txt, tmpmask,
3627 maskEscapeChar(r'$\backslash$'))
3628 # TIP the _ is escaped at the end
3631 # TODO man: where - really needs to be escaped?
3632 def doFinalEscape(target, txt):
3633 "Last escapes of each line"
3634 if target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
3635 elif target == 'man' : txt = string.replace(txt, '-', r'\-')
3636 elif target == 'sgml': txt = string.replace(txt, '[', '[')
3637 elif target == 'lout': txt = string.replace(txt, '/', '"/"')
3638 elif target == 'tex' :
3639 txt = string.replace(txt, '_', r'\_')
3640 txt = string.replace(txt, 'vvvvTexUndervvvv', '_') # shame!
3643 def EscapeCharHandler(action, data):
3644 "Mask/Unmask the Escape Char on the given string"
3645 if not string.strip(data): return data
3646 if action not in ('mask','unmask'):
3647 Error("EscapeCharHandler: Invalid action '%s'"%action)
3648 if action == 'mask': return string.replace(data,'\\',ESCCHAR)
3649 else: return string.replace(data,ESCCHAR,'\\')
3651 def maskEscapeChar(data):
3652 "Replace any Escape Char \ with a text mask (Input: str or list)"
3653 if type(data) == type([]):
3654 return map(lambda x: EscapeCharHandler('mask', x), data)
3655 return EscapeCharHandler('mask',data)
3657 def unmaskEscapeChar(data):
3658 "Undo the Escape char \ masking (Input: str or list)"
3659 if type(data) == type([]):
3660 return map(lambda x: EscapeCharHandler('unmask', x), data)
3661 return EscapeCharHandler('unmask',data)
3663 def addLineBreaks(mylist):
3664 "use LB to respect sys.platform"
3667 line = string.replace(line,'\n',LB) # embedded \n's
3668 ret.append(line+LB) # add final line break
3671 # Convert ['foo\nbar'] to ['foo', 'bar']
3672 def expandLineBreaks(mylist):
3675 ret.extend(string.split(line, '\n'))
3678 def compile_filters(filters, errmsg='Filter'):
3680 for i in xrange(len(filters)):
3681 patt,repl = filters[i]
3682 try: rgx = re.compile(patt)
3683 except: Error("%s: '%s'"%(errmsg, patt))
3684 filters[i] = (rgx,repl)
3687 def enclose_me(tagname, txt):
3688 return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
3690 def beautify_me(name, line):
3691 "where name is: bold, italic, underline or strike"
3693 # Exception: Doesn't parse an horizontal bar as strike
3694 if name == 'strike' and regex['bar'].search(line): return line
3696 name = 'font%s' % string.capitalize(name)
3697 open = TAGS['%sOpen'%name]
3698 close = TAGS['%sClose'%name]
3699 txt = r'%s\1%s'%(open, close)
3700 line = regex[name].sub(txt,line)
3703 def get_tagged_link(label, url):
3705 target = CONF['target']
3706 image_re = regex['img']
3709 if regex['email'].match(url):
3714 # Escape specials from TEXT parts
3715 label = doEscape(target,label)
3717 # Escape specials from link URL
3718 if not rules['linkable'] or rules['escapeurl']:
3719 url = doEscape(target, url)
3721 # Adding protocol to guessed link
3723 if linktype == 'url' and \
3724 re.match('(?i)'+regex['_urlskel']['guess'], url):
3725 if url[0] in 'Ww': guessurl = 'http://' +url
3726 else : guessurl = 'ftp://' +url
3728 # Not link aware targets -> protocol is useless
3729 if not rules['linkable']: guessurl = ''
3731 # Simple link (not guessed)
3732 if not label and not guessurl:
3733 if CONF['mask-email'] and linktype == 'email':
3734 # Do the email mask feature (no TAGs, just text)
3735 url = string.replace(url,'@',' (a) ')
3736 url = string.replace(url,'.',' ')
3738 if rules['linkable']: url = doEscape(target, url)
3741 # Just add link data to tag
3742 tag = TAGS[linktype]
3743 ret = regex['x'].sub(url,tag)
3745 # Named link or guessed simple link
3747 # Adjusts for guessed link
3748 if not label: label = url # no protocol
3749 if guessurl : url = guessurl # with protocol
3751 # Image inside link!
3752 if image_re.match(label):
3753 if rules['imglinkable']: # get image tag
3754 label = parse_images(label)
3755 else: # img@link !supported
3756 label = "(%s)"%image_re.match(label).group(1)
3758 # Putting data on the right appearance order
3759 if rules['linkable']:
3760 urlorder = [url, label] # link before label
3762 urlorder = [label, url] # label before link
3764 # Add link data to tag (replace \a's)
3765 ret = TAGS["%sMark"%linktype]
3766 for data in urlorder:
3767 ret = regex['x'].sub(data,ret,1)
3772 def parse_deflist_term(line):
3773 "Extract and parse definition list term contents"
3774 img_re = regex['img']
3775 term = regex['deflist'].search(line).group(3)
3777 # Mask image inside term as (image.jpg), where not supported
3778 if not rules['imgasdefterm'] and img_re.search(term):
3779 while img_re.search(term):
3780 imgfile = img_re.search(term).group(1)
3781 term = img_re.sub('(%s)'%imgfile, term, 1)
3783 #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
3787 def get_tagged_bar(line):
3788 m = regex['bar'].search(line)
3789 if not m: return line
3792 # Map strong bar to pagebreak
3793 if rules['mapbar2pagebreak'] and TAGS['pageBreak']:
3794 TAGS['bar2'] = TAGS['pageBreak']
3797 if txt[0] == '=': bar = TAGS['bar2']
3798 else : bar = TAGS['bar1']
3800 # To avoid comment tag confusion like <!-- ------ -->
3801 if string.count(TAGS['comment'], '--'):
3802 txt = string.replace(txt,'--','__')
3805 return regex['x'].sub(txt, bar)
3808 def get_image_align(line):
3809 "Return the image (first found) align for the given line"
3811 # First clear marks that can mess align detection
3812 line = re.sub(SEPARATOR+'$', '', line) # remove deflist sep
3813 line = re.sub('^'+SEPARATOR, '', line) # remove list sep
3814 line = re.sub('^[\t]+' , '', line) # remove quote mark
3816 # Get image position on the line
3817 m = regex['img'].search(line)
3818 ini = m.start() ; head = 0
3819 end = m.end() ; tail = len(line)
3821 # The align detection algorithm
3822 if ini == head and end != tail: align = 'left' # ^img + text$
3823 elif ini != head and end == tail: align = 'right' # ^text + img$
3824 else : align = 'center' # default align
3826 # Some special cases
3827 if BLOCK.isblock('table'): align = 'center' # ignore when table
3828 # if TARGET == 'mgp' and align == 'center': align = 'center'
3833 # Reference: http://www.iana.org/assignments/character-sets
3834 # http://www.drclue.net/F1.cgi/HTML/META/META.html
3835 def get_encoding_string(enc, target):
3836 if not enc: return ''
3837 # Target specific translation table
3840 # missing: ansinew , applemac , cp437 , cp437de , cp865
3841 'us-ascii' : 'ascii',
3842 'windows-1250': 'cp1250',
3843 'windows-1252': 'cp1252',
3846 'iso-8859-1' : 'latin1',
3847 'iso-8859-2' : 'latin2',
3848 'iso-8859-3' : 'latin3',
3849 'iso-8859-4' : 'latin4',
3850 'iso-8859-5' : 'latin5',
3851 'iso-8859-9' : 'latin9',
3856 enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
3857 enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
3858 enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
3859 enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
3860 # Apply translation table
3861 try: enc = translate[target][string.lower(enc)]
3866 ##############################################################################
3867 ##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
3868 ##############################################################################
3871 def process_source_file(file='', noconf=0, contents=[]):
3873 Find and Join all the configuration available for a source file.
3874 No sanity checking is done on this step.
3875 It also extracts the source document parts into separate holders.
3877 The config scan order is:
3878 1. The user configuration file (i.e. $HOME/.txt2tagsrc)
3879 2. The source document's CONF area
3880 3. The command line options
3882 The return data is a tuple of two items:
3883 1. The parsed config dictionary
3884 2. The document's parts, as a (head, conf, body) tuple
3886 All the conversion process will be based on the data and
3887 configuration returned by this function.
3888 The source files is read on this step only.
3891 source = SourceDocument(contents=contents)
3893 source = SourceDocument(file)
3894 head, conf, body = source.split()
3895 Message(_("Source document contents stored"),2)
3897 # Read document config
3898 source_raw = source.get_raw_config()
3899 # Join all the config directives found, then parse it
3900 full_raw = RC_RAW + source_raw + CMDLINE_RAW
3901 Message(_("Parsing and saving all config found (%03d items)")%(
3903 full_parsed = ConfigMaster(full_raw).parse()
3904 # Add manually the filename to the conf dic
3906 full_parsed['sourcefile'] = MODULEIN
3907 full_parsed['infile'] = MODULEIN
3908 full_parsed['outfile'] = MODULEOUT
3910 full_parsed['sourcefile'] = file
3911 # Maybe should we dump the config found?
3912 if full_parsed.get('dump-config'):
3913 dumpConfig(source_raw, full_parsed)
3915 # The user just want to know a single config value (hidden feature)
3916 #TODO pick a better name than --show-config-value
3917 elif full_parsed.get('show-config-value'):
3918 config_value = full_parsed.get(full_parsed['show-config-value'])
3920 if type(config_value) == type([]):
3921 print '\n'.join(config_value)
3926 Debug("FULL config for this file: %s"%full_parsed, 1)
3929 return full_parsed, (head,conf,body)
3931 def get_infiles_config(infiles):
3933 Find and Join into a single list, all configuration available
3934 for each input file. This function is supposed to be the very
3935 first one to be called, before any processing.
3937 return map(process_source_file, infiles)
3939 def convert_this_files(configs):
3941 for myconf,doc in configs: # multifile support
3946 source_head, source_conf, source_body = doc
3947 myconf = ConfigMaster().sanity(myconf)
3948 # Compose the target file Headers
3949 #TODO escape line before?
3950 #TODO see exceptions by tex and mgp
3951 Message(_("Composing target Headers"),1)
3952 target_head = doHeader(source_head, myconf)
3953 # Parse the full marked body into tagged target
3954 first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
3955 Message(_("Composing target Body"),1)
3956 target_body, marked_toc = convert(source_body, myconf,
3957 firstlinenr=first_body_line)
3958 # If dump-source, we're done
3959 if myconf['dump-source']:
3960 for line in source_head+source_conf+target_body:
3963 # Make TOC (if needed)
3964 Message(_("Composing target TOC"),1)
3965 tagged_toc = toc_tagger(marked_toc, myconf)
3966 target_toc = toc_formatter(tagged_toc, myconf)
3967 target_body = toc_inside_body(target_body, target_toc, myconf)
3968 if not AUTOTOC and not myconf['toc-only']: target_toc = []
3969 # Compose the target file Footer
3970 Message(_("Composing target Footer"),1)
3971 target_foot = doFooter(myconf)
3972 # Finally, we have our document
3973 outlist = target_head + target_toc + target_body + target_foot
3974 # If on GUI, abort before finish_him
3975 # If module, return finish_him as list
3976 # Else, write results to file or STDOUT
3978 return outlist, myconf
3979 elif myconf.get('outfile') == MODULEOUT:
3980 return finish_him(outlist, myconf), myconf
3982 Message(_("Saving results to the output file"),1)
3983 finish_him(outlist, myconf)
3986 def parse_images(line):
3987 "Tag all images found"
3988 while regex['img'].search(line) and TAGS['img'] != '[\a]':
3989 txt = regex['img'].search(line).group(1)
3992 # If target supports image alignment, here we go
3993 if rules['imgalignable']:
3995 align = get_image_align(line) # right
3996 align_name = string.capitalize(align) # Right
3998 # The align is a full tag, or part of the image tag (~A~)
3999 if TAGS['imgAlign'+align_name]:
4000 tag = TAGS['imgAlign'+align_name]
4002 align_tag = TAGS['_imgAlign'+align_name]
4003 tag = regex['_imgAlign'].sub(align_tag, tag, 1)
4005 # Dirty fix to allow centered solo images
4006 if align == 'center' and TARGET in ('html','xhtml'):
4007 rest = regex['img'].sub('',line,1)
4008 if re.match('^\s+$', rest):
4009 tag = "<center>%s</center>" %tag
4012 tag = re.sub(r'\\b',r'\\\\b',tag)
4013 txt = string.replace(txt, '_', 'vvvvTexUndervvvv')
4015 line = regex['img'].sub(tag,line,1)
4016 line = regex['x'].sub(txt,line,1)
4020 def add_inline_tags(line):
4022 for beauti in ('bold', 'italic', 'underline', 'strike'):
4023 if regex['font%s'%beauti.capitalize()].search(line):
4024 line = beautify_me(beauti, line)
4026 line = parse_images(line)
4030 def get_include_contents(file, path=''):
4031 "Parses %!include: value and extract file contents"
4032 ids = {'`':'verb', '"':'raw', "'":'tagged' }
4034 # Set include type and remove identifier marks
4036 if mark in ids.keys():
4037 if file[:2] == file[-2:] == mark*2:
4038 id = ids[mark] # set type
4039 file = file[2:-2] # remove marks
4040 # Handle remote dir execution
4041 filepath = os.path.join(path, file)
4042 # Read included file contents
4043 lines = Readfile(filepath, remove_linebreaks=1)
4044 # Default txt2tags marked text, just BODY matters
4046 lines = get_file_body(filepath)
4047 lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
4048 # This appears when included hit EOF with verbatim area open
4049 #lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
4053 def set_global_config(config):
4054 global CONF, TAGS, regex, rules, TARGET
4056 TAGS = getTags(CONF)
4057 rules = getRules(CONF)
4058 regex = getRegexes()
4059 TARGET = config['target'] # save for buggy functions that need global
4062 def convert(bodylines, config, firstlinenr=1):
4065 set_global_config(config)
4067 target = config['target']
4068 BLOCK = BlockMaster()
4070 TITLE = TitleMaster()
4076 # Compiling all PreProc regexes
4077 pre_filter = compile_filters(
4078 CONF['preproc'], _('Invalid PreProc filter regex'))
4081 linenr = firstlinenr-1
4083 while lineref < len(bodylines):
4088 untouchedline = bodylines[lineref]
4089 dump_source.append(untouchedline)
4091 line = re.sub('[\n\r]+$','',untouchedline) # del line break
4093 # Apply PreProc filters
4095 errmsg = _('Invalid PreProc filter replacement')
4096 for rgx,repl in pre_filter:
4097 try: line = rgx.sub(repl, line)
4098 except: Error("%s: '%s'"%(errmsg, repl))
4100 line = maskEscapeChar(line) # protect \ char
4102 lineref = lineref +1
4104 Debug(repr(line), 2, linenr) # heavy debug: show each line
4106 #------------------[ Comment Block ]------------------------
4108 # We're already on a comment block
4109 if BLOCK.block() == 'comment':
4112 if regex['blockCommentClose'].search(line):
4113 ret.extend(BLOCK.blockout() or [])
4116 # Normal comment-inside line. Ignore it.
4119 # Detecting comment block init
4120 if regex['blockCommentOpen'].search(line) \
4121 and BLOCK.block() not in BLOCK.exclusive:
4122 ret.extend(BLOCK.blockin('comment'))
4125 #-------------------------[ Raw Text ]----------------------
4127 # We're already on a raw block
4128 if BLOCK.block() == 'raw':
4131 if regex['blockRawClose'].search(line):
4132 ret.extend(BLOCK.blockout())
4135 # Normal raw-inside line
4139 # Detecting raw block init
4140 if regex['blockRawOpen'].search(line) \
4141 and BLOCK.block() not in BLOCK.exclusive:
4142 ret.extend(BLOCK.blockin('raw'))
4146 if regex['1lineRaw'].search(line) \
4147 and BLOCK.block() not in BLOCK.exclusive:
4148 ret.extend(BLOCK.blockin('raw'))
4149 line = regex['1lineRaw'].sub('',line)
4151 ret.extend(BLOCK.blockout())
4154 #------------------------[ Verbatim ]----------------------
4156 #TIP We'll never support beautifiers inside verbatim
4158 # Closing table mapped to verb
4159 if BLOCK.block() == 'verb' \
4160 and BLOCK.prop('mapped') == 'table' \
4161 and not regex['table'].search(line):
4162 ret.extend(BLOCK.blockout())
4164 # We're already on a verb block
4165 if BLOCK.block() == 'verb':
4168 if regex['blockVerbClose'].search(line):
4169 ret.extend(BLOCK.blockout())
4172 # Normal verb-inside line
4176 # Detecting verb block init
4177 if regex['blockVerbOpen'].search(line) \
4178 and BLOCK.block() not in BLOCK.exclusive:
4179 ret.extend(BLOCK.blockin('verb'))
4183 # One line verb-formatted text
4184 if regex['1lineVerb'].search(line) \
4185 and BLOCK.block() not in BLOCK.exclusive:
4186 ret.extend(BLOCK.blockin('verb'))
4187 line = regex['1lineVerb'].sub('',line)
4189 ret.extend(BLOCK.blockout())
4193 # Tables are mapped to verb when target is not table-aware
4194 if not rules['tableable'] and regex['table'].search(line):
4195 if not BLOCK.isblock('verb'):
4196 ret.extend(BLOCK.blockin('verb'))
4197 BLOCK.propset('mapped', 'table')
4201 #---------------------[ blank lines ]-----------------------
4203 if regex['blankline'].search(line):
4205 # Close open paragraph
4206 if BLOCK.isblock('para'):
4207 ret.extend(BLOCK.blockout())
4211 # Close all open tables
4212 if BLOCK.isblock('table'):
4213 ret.extend(BLOCK.blockout())
4217 # Close all open quotes
4218 while BLOCK.isblock('quote'):
4219 ret.extend(BLOCK.blockout())
4221 # Closing all open lists
4222 if f_lastwasblank: # 2nd consecutive blank
4223 if BLOCK.block()[-4:] == 'list':
4224 BLOCK.holdaddsub('') # helps parser
4225 while BLOCK.depth: # closes list (if any)
4226 ret.extend(BLOCK.blockout())
4227 continue # ignore consecutive blanks
4229 # Paragraph (if any) is wanted inside lists also
4230 if BLOCK.block()[-4:] == 'list':
4231 BLOCK.holdaddsub('')
4233 # html: show blank line (needs tag)
4234 if target in ('html','xhtml'):
4235 ret.append(TAGS['paragraphOpen']+\
4236 TAGS['paragraphClose'])
4237 # Otherwise we just show a blank line
4245 #---------------------[ special ]---------------------------
4247 if regex['special'].search(line):
4249 targ, key, val = ConfigLines().parse_line(
4250 line, 'include', target)
4252 Debug("Found config '%s', value '%s'"%(
4255 incpath = os.path.dirname(CONF['sourcefile'])
4257 err = _('A file cannot include itself (loop!)')
4258 if CONF['sourcefile'] == incfile:
4259 Error("%s: %s"%(err,incfile))
4260 inctype, inclines = get_include_contents(
4262 # Verb, raw and tagged are easy
4263 if inctype != 't2t':
4264 ret.extend(BLOCK.blockin(inctype))
4265 BLOCK.holdextend(inclines)
4266 ret.extend(BLOCK.blockout())
4268 # Insert include lines into body
4269 #TODO include maxdepth limit
4270 bodylines = bodylines[:lineref] \
4272 +bodylines[lineref:]
4273 #TODO fix path if include@include
4274 # Remove %!include call
4275 if CONF['dump-source']:
4279 Debug('Bogus Special Line',1,linenr)
4281 #---------------------[ dump-source ]-----------------------
4283 # We don't need to go any further
4284 if CONF['dump-source']:
4287 #---------------------[ Comments ]--------------------------
4289 # Just skip them (if not macro)
4290 if regex['comment'].search(line) and not \
4291 regex['macros'].match(line) and not \
4292 regex['toc'].match(line):
4295 #---------------------[ Triggers ]--------------------------
4297 # Valid line, reset blank status
4300 # Any NOT quote line closes all open quotes
4301 if BLOCK.isblock('quote') and not regex['quote'].search(line):
4302 while BLOCK.isblock('quote'):
4303 ret.extend(BLOCK.blockout())
4305 # Any NOT table line closes an open table
4306 if BLOCK.isblock('table') and not regex['table'].search(line):
4307 ret.extend(BLOCK.blockout())
4310 #---------------------[ Horizontal Bar ]--------------------
4312 if regex['bar'].search(line):
4314 # A bar closes a paragraph
4315 if BLOCK.isblock('para'):
4316 ret.extend(BLOCK.blockout())
4318 # We need to close all opened quote blocks
4319 # if bar isn't allowed inside or if not a quote line
4320 if BLOCK.isblock('quote'):
4321 if not rules['barinsidequote'] or \
4322 not regex['quote'].search(line):
4323 while BLOCK.isblock('quote'):
4324 ret.extend(BLOCK.blockout())
4326 # Quote + bar: continue processing for quoting
4327 if rules['barinsidequote'] and \
4328 regex['quote'].search(line):
4331 # Just bar: save tagged line and we're done
4333 line = get_tagged_bar(line)
4334 if BLOCK.block()[-4:] == 'list':
4335 BLOCK.holdaddsub(line)
4340 Debug("BAR: %s"%line, 6)
4343 #---------------------[ Title ]-----------------------------
4345 #TODO set next blank and set f_lastwasblank or f_lasttitle
4346 if (regex['title'].search(line) or
4347 regex['numtitle'].search(line)) and \
4348 BLOCK.block()[-4:] != 'list':
4350 # A title closes a paragraph
4351 if BLOCK.isblock('para'):
4352 ret.extend(BLOCK.blockout())
4355 tagged_title = TITLE.get()
4356 ret.extend(tagged_title)
4357 Debug("TITLE: %s"%tagged_title, 6)
4362 #---------------------[ %%toc ]-----------------------
4364 # %%toc line closes paragraph
4365 if BLOCK.block() == 'para' and regex['toc'].search(line):
4366 ret.extend(BLOCK.blockout())
4368 #---------------------[ apply masks ]-----------------------
4370 line = MASK.mask(line)
4372 #XXX from here, only block-inside lines will pass
4374 #---------------------[ Quote ]-----------------------------
4376 if regex['quote'].search(line):
4378 # Store number of leading TABS
4379 quotedepth = len(regex['quote'].search(line).group(0))
4381 # SGML doesn't support nested quotes
4382 if rules['quotenotnested']: quotedepth = 1
4384 # Don't cross depth limit
4385 maxdepth = rules['quotemaxdepth']
4386 if maxdepth and quotedepth > maxdepth:
4387 quotedepth = maxdepth
4390 if not BLOCK.isblock('quote'):
4391 ret.extend(BLOCK.blockin('quote'))
4394 while BLOCK.depth < quotedepth:
4395 BLOCK.blockin('quote')
4398 while quotedepth < BLOCK.depth:
4399 ret.extend(BLOCK.blockout())
4401 #---------------------[ Lists ]-----------------------------
4403 # An empty item also closes the current list
4404 if BLOCK.block()[-4:] == 'list':
4405 m = regex['listclose'].match(line)
4407 listindent = m.group(1)
4408 listtype = m.group(2)
4409 currlisttype = BLOCK.prop('type')
4410 currlistindent = BLOCK.prop('indent')
4411 if listindent == currlistindent and \
4412 listtype == currlisttype:
4413 ret.extend(BLOCK.blockout())
4416 if regex['list'].search(line) or \
4417 regex['numlist'].search(line) or \
4418 regex['deflist'].search(line):
4420 listindent = BLOCK.prop('indent')
4421 listids = string.join(LISTNAMES.keys(), '')
4422 m = re.match('^( *)([%s]) '%listids, line)
4423 listitemindent = m.group(1)
4424 listtype = m.group(2)
4425 listname = LISTNAMES[listtype]
4426 results_box = BLOCK.holdadd
4428 # Del list ID (and separate term from definition)
4429 if listname == 'deflist':
4430 term = parse_deflist_term(line)
4431 line = regex['deflist'].sub(
4432 SEPARATOR+term+SEPARATOR,line)
4434 line = regex[listname].sub(SEPARATOR,line)
4436 # Don't cross depth limit
4437 maxdepth = rules['listmaxdepth']
4438 if maxdepth and BLOCK.depth == maxdepth:
4439 if len(listitemindent) > len(listindent):
4440 listitemindent = listindent
4442 # List bumping (same indent, diff mark)
4443 # Close the currently open list to clear the mess
4444 if BLOCK.block()[-4:] == 'list' \
4445 and listname != BLOCK.block() \
4446 and len(listitemindent) == len(listindent):
4447 ret.extend(BLOCK.blockout())
4448 listindent = BLOCK.prop('indent')
4450 # Open mother list or sublist
4451 if BLOCK.block()[-4:] != 'list' or \
4452 len(listitemindent) > len(listindent):
4453 ret.extend(BLOCK.blockin(listname))
4454 BLOCK.propset('indent',listitemindent)
4455 BLOCK.propset('type',listtype)
4458 while len(listitemindent) < len(BLOCK.prop('indent')):
4459 ret.extend(BLOCK.blockout())
4461 # O-oh, sublist before list ("\n\n - foo\n- foo")
4462 # Fix: close sublist (as mother), open another list
4463 if BLOCK.block()[-4:] != 'list':
4464 ret.extend(BLOCK.blockin(listname))
4465 BLOCK.propset('indent',listitemindent)
4466 BLOCK.propset('type',listtype)
4468 #---------------------[ Table ]-----------------------------
4470 #TODO escape undesired format inside table
4471 #TODO add pm6 target
4472 if regex['table'].search(line):
4474 if not BLOCK.isblock('table'): # first table line!
4475 ret.extend(BLOCK.blockin('table'))
4476 BLOCK.tableparser.__init__(line)
4478 tablerow = TableMaster().parse_row(line)
4479 BLOCK.tableparser.add_row(tablerow) # save config
4481 # Maintain line to unmask and inlines
4482 # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :(
4483 # TODO isolate unmask+inlines parsing to use here
4484 line = string.join(tablerow['cells'], SEPARATOR)
4486 #---------------------[ Paragraph ]-------------------------
4488 if not BLOCK.block() and \
4489 not string.count(line, MASK.tocmask): # new para!
4490 ret.extend(BLOCK.blockin('para'))
4493 ############################################################
4494 ############################################################
4495 ############################################################
4498 #---------------------[ Final Parses ]----------------------
4500 # The target-specific special char escapes for body lines
4501 line = doEscape(target,line)
4503 line = add_inline_tags(line)
4504 line = MASK.undo(line)
4507 #---------------------[ Hold or Return? ]-------------------
4509 ### Now we must choose where to put the parsed line
4512 # List item extra lines
4513 if BLOCK.block()[-4:] == 'list':
4514 results_box = BLOCK.holdaddsub
4517 results_box = BLOCK.holdadd
4520 line = doFinalEscape(target, line)
4521 results_box = ret.append
4525 # EOF: close any open para/verb/lists/table/quotes
4527 while BLOCK.block():
4528 ret.extend(BLOCK.blockout())
4530 # Maybe close some opened title area?
4531 if rules['titleblocks']:
4532 ret.extend(TITLE.close_all())
4534 # Maybe a major tag to enclose body? (like DIV for CSS)
4535 if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen'])
4536 if TAGS['bodyClose']: ret.append(TAGS['bodyClose'])
4538 if CONF['toc-only']: ret = []
4539 marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
4541 # If dump-source, all parsing is ignored
4542 if CONF['dump-source']: ret = dump_source[:]
4544 return ret, marked_toc
4548 ##############################################################################
4549 ################################### GUI ######################################
4550 ##############################################################################
4552 # Tk help: http://python.org/topics/tkinter/
4553 # Tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html
4554 # /usr/lib/python*/lib-tk/Tkinter.py
4556 # grid table : row=0, column=0, columnspan=2, rowspan=2
4557 # grid align : sticky='n,s,e,w' (North, South, East, West)
4558 # pack place : side='top,bottom,right,left'
4559 # pack fill : fill='x,y,both,none', expand=1
4560 # pack align : anchor='n,s,e,w' (North, South, East, West)
4561 # padding : padx=10, pady=10, ipadx=10, ipady=10 (internal)
4562 # checkbox : offvalue is return if the _user_ deselected the box
4563 # label align: justify=left,right,center
4565 def load_GUI_resources():
4566 "Load all extra modules and methods used by GUI"
4567 global askopenfilename, showinfo, showwarning, showerror, Tkinter
4568 from tkFileDialog import askopenfilename
4569 from tkMessageBox import showinfo,showwarning,showerror
4573 "Graphical Tk Interface"
4574 def __init__(self, conf={}):
4575 self.root = Tkinter.Tk() # mother window, come to butthead
4576 self.root.title(my_name) # window title bar text
4577 self.window = self.root # variable "focus" for inclusion
4578 self.row = 0 # row count for grid()
4580 self.action_length = 150 # left column length (pixel)
4581 self.frame_margin = 10 # frame margin size (pixel)
4582 self.frame_border = 6 # frame border size (pixel)
4584 # The default Gui colors, can be changed by %!guicolors
4585 self.dft_gui_colors = ['#6c6','white','#cf9','#030']
4586 self.gui_colors = []
4587 self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
4589 # On Tk, vars need to be set/get using setvar()/get()
4590 self.infile = self.setvar('')
4591 self.target = self.setvar('')
4592 self.target_name = self.setvar('')
4594 # The checks appearance order
4596 'headers','enum-title','toc','mask-email',
4597 'toc-only','stdout']
4599 # Creating variables for all checks
4600 for check in self.checks:
4601 setattr(self, 'f_'+check, self.setvar(''))
4605 if conf: self.load_config(conf)
4607 def load_config(self, conf):
4609 self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
4610 self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
4611 self.root.config(bd=15,bg=self.bg1)
4613 ### Config as dic for python 1.5 compat (**opts don't work :( )
4614 def entry(self, **opts): return Tkinter.Entry(self.window, opts)
4615 def label(self, txt='', bg=None, **opts):
4616 opts.update({'text':txt,'bg':bg or self.bg1})
4617 return Tkinter.Label(self.window, opts)
4618 def button(self,name,cmd,**opts):
4619 opts.update({'text':name,'command':cmd})
4620 return Tkinter.Button(self.window, opts)
4621 def check(self,name,checked=0,**opts):
4622 bg, fg = self.bg2, self.fg2
4624 'text':name, 'onvalue':1, 'offvalue':0,
4625 'activeforeground':fg, 'fg':fg,
4626 'activebackground':bg, 'bg':bg,
4627 'highlightbackground':bg, 'anchor':'w'
4629 chk = Tkinter.Checkbutton(self.window, opts)
4630 if checked: chk.select()
4631 chk.grid(columnspan=2, sticky='w', padx=0)
4632 def menu(self,sel,items):
4633 return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
4635 # Handy auxiliary functions
4636 def action(self, txt):
4637 self.label(txt, fg=self.fg1, bg=self.bg1,
4638 wraplength=self.action_length).grid(column=0,row=self.row)
4639 def frame_open(self):
4640 self.window = Tkinter.Frame(self.root,bg=self.bg2,
4641 borderwidth=self.frame_border)
4642 def frame_close(self):
4643 self.window.grid(column=1, row=self.row, sticky='w',
4644 padx=self.frame_margin)
4645 self.window = self.root
4646 self.label('').grid()
4647 self.row = self.row + 2 # update row count
4648 def target_name2key(self):
4649 name = self.target_name.get()
4650 target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
4651 try : key = target[0]
4653 self.target = self.setvar(key)
4654 def target_key2name(self):
4655 key = self.target.get()
4656 name = TARGET_NAMES.get(key) or key
4657 self.target_name = self.setvar(name)
4659 def exit(self): self.root.destroy()
4660 def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
4663 ftypes= [(_('txt2tags files'),('*.t2t','*.txt')),
4664 (_('All files'),'*')]
4665 newfile = askopenfilename(filetypes=ftypes)
4667 self.infile.set(newfile)
4668 newconf = process_source_file(newfile)[0]
4669 newconf = ConfigMaster().sanity(newconf, gui=1)
4670 # Restate all checkboxes after file selection
4671 #TODO how to make a refresh without killing it?
4673 self.__init__(newconf)
4676 def scrollwindow(self, txt='no text!', title=''):
4678 win = Tkinter.Toplevel() ; win.title(title)
4679 frame = Tkinter.Frame(win)
4680 scroll = Tkinter.Scrollbar(frame)
4681 text = Tkinter.Text(frame,yscrollcommand=scroll.set)
4682 button = Tkinter.Button(win)
4684 text.insert(Tkinter.END, string.join(txt,'\n'))
4685 scroll.config(command=text.yview)
4686 button.config(text=_('Close'), command=win.destroy)
4689 text.pack(side='left', fill='both', expand=1)
4690 scroll.pack(side='right', fill='y')
4691 frame.pack(fill='both', expand=1)
4692 button.pack(ipadx=30)
4694 def runprogram(self):
4697 self.target_name2key()
4698 infile, target = self.infile.get(), self.target.get()
4701 showwarning(my_name,_("You must select a target type!"))
4704 showwarning(my_name,
4705 _("You must provide the source file location!"))
4709 real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
4710 if real_cmdline_conf.has_key('infile'):
4711 del real_cmdline_conf['infile']
4712 if real_cmdline_conf.has_key('target'):
4713 del real_cmdline_conf['target']
4714 real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
4715 default_outfile = ConfigMaster().get_outfile_name(
4716 {'sourcefile':infile, 'outfile':'', 'target':target})
4717 for opt in self.checks:
4718 val = int(getattr(self, 'f_%s'%opt).get() or "0")
4719 if opt == 'stdout': opt = 'outfile'
4720 on_config = self.conf.get(opt) or 0
4721 on_cmdline = real_cmdline_conf.get(opt) or 0
4722 if opt == 'outfile':
4723 if on_config == STDOUT: on_config = 1
4725 if on_cmdline == STDOUT: on_cmdline = 1
4726 else: on_cmdline = 0
4727 if val != on_config or (
4728 val == on_config == on_cmdline and
4729 real_cmdline_conf.has_key(opt)):
4731 # Was not set, but user selected on GUI
4732 Debug("user turned ON: %s"%opt)
4733 if opt == 'outfile': opt = '-o-'
4734 else: opt = '--%s'%opt
4736 # Was set, but user deselected on GUI
4737 Debug("user turned OFF: %s"%opt)
4738 if opt == 'outfile':
4739 opt = "-o%s"%default_outfile
4740 else: opt = '--no-%s'%opt
4741 guiflags.append(opt)
4742 cmdline = [my_name, '-t', target] +real_cmdline \
4744 Debug('Gui/Tk cmdline: %s'%cmdline,5)
4746 cmdline_raw_orig = CMDLINE_RAW
4748 # Fake the GUI cmdline as the real one, and parse file
4749 CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
4750 data = process_source_file(infile)
4751 # On GUI, convert_* returns the data, not finish_him()
4752 outlist, config = convert_this_files([data])
4753 # On GUI and STDOUT, finish_him() returns the data
4754 result = finish_him(outlist, config)
4755 # Show outlist in s a nice new window
4757 outlist, config = result
4758 title = _('%s: %s converted to %s')%(
4759 my_name, os.path.basename(infile),
4760 string.upper(config['target']))
4761 self.scrollwindow(outlist, title)
4762 # Show the "file saved" message
4764 msg = "%s\n\n %s\n%s\n\n %s\n%s"%(
4765 _('Conversion done!'),
4767 _('TO:'), config['outfile'])
4768 showinfo(my_name, msg)
4769 except error: # common error (windowed), not quit
4771 except: # fatal error (windowed and printed)
4772 errormsg = getUnknownErrorMessage()
4774 showerror(_('%s FATAL ERROR!')%my_name,errormsg)
4776 CMDLINE_RAW = cmdline_raw_orig
4778 def mainwindow(self):
4779 self.infile.set(self.conf.get('sourcefile') or '')
4780 self.target.set(self.conf.get('target') or \
4781 _('-- select one --'))
4782 outfile = self.conf.get('outfile')
4783 if outfile == STDOUT: # map -o-
4784 self.conf['stdout'] = 1
4785 if self.conf.get('headers') == None:
4786 self.conf['headers'] = 1 # map default
4788 action1 = _("Enter the source file location:")
4789 action2 = _("Choose the target document type:")
4790 action3 = _("Some options you may check:")
4791 action4 = _("Some extra options:")
4793 'headers' : _("Include headers on output"),
4794 'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
4795 'toc' : _("Do TOC also (Table of Contents)"),
4796 'mask-email': _("Hide e-mails from SPAM robots"),
4798 'toc-only' : _("Just do TOC, nothing more"),
4799 'stdout' : _("Dump to screen (Don't save target file)")
4801 targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
4804 self.label("%s %s"%(string.upper(my_name), my_version),
4805 bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
4806 self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
4807 bg=self.bg1, fg=self.fg1).grid(columnspan=2)
4810 self.action(action1) ; self.frame_open()
4811 e_infile = self.entry(textvariable=self.infile,width=25)
4812 e_infile.grid(row=self.row, column=0, sticky='e')
4813 if not self.infile.get(): e_infile.focus_set()
4814 self.button(_("Browse"), self.askfile).grid(
4815 row=self.row, column=1, sticky='w', padx=10)
4816 # Show outfile name, style and encoding (if any)
4820 if outfile == STDOUT: txt = _('<screen>')
4821 l_output = self.label(_('Output: ')+txt,
4822 fg=self.fg2,bg=self.bg2)
4823 l_output.grid(columnspan=2, sticky='w')
4824 for setting in ['style','encoding']:
4825 if self.conf.get(setting):
4826 name = string.capitalize(setting)
4827 val = self.conf[setting]
4828 self.label('%s: %s'%(name, val),
4829 fg=self.fg2, bg=self.bg2).grid(
4830 columnspan=2, sticky='w')
4832 self.frame_close() ; self.action(action2)
4834 self.target_key2name()
4835 self.menu(self.target_name, targets_menu).grid(
4836 columnspan=2, sticky='w')
4837 # Options checkboxes label
4838 self.frame_close() ; self.action(action3)
4840 # Compose options check boxes, example:
4841 # self.check(checks_txt['toc'],1,variable=self.f_toc)
4842 for check in self.checks:
4843 # Extra options label
4844 if check == 'toc-only':
4845 self.frame_close() ; self.action(action4)
4847 txt = checks_txt[check]
4848 var = getattr(self, 'f_'+check)
4849 checked = self.conf.get(check)
4850 self.check(txt,checked,variable=var)
4852 # Spacer and buttons
4853 self.label('').grid() ; self.row = self.row + 1
4854 b_quit = self.button(_("Quit"), self.exit)
4855 b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
4856 b_conv = self.button(_("Convert!"), self.runprogram)
4857 b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
4858 if self.target.get() and self.infile.get():
4861 # As documentation told me
4862 if sys.platform[:3] == 'win':
4865 self.root.deiconify()
4867 self.root.mainloop()
4870 ##############################################################################
4871 ##############################################################################
4873 def exec_command_line(user_cmdline=[]):
4874 global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, Error
4876 # Extract command line data
4877 cmdline_data = user_cmdline or sys.argv[1:]
4878 CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1)
4879 cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
4880 DEBUG = cmdline_parsed.get('debug' ) or 0
4881 VERBOSE = cmdline_parsed.get('verbose') or 0
4882 QUIET = cmdline_parsed.get('quiet' ) or 0
4883 GUI = cmdline_parsed.get('gui' ) or 0
4884 infiles = cmdline_parsed.get('infile' ) or []
4886 Message(_("Txt2tags %s processing begins")%my_version,1)
4889 if cmdline_parsed.get('help' ): Quit(USAGE)
4890 if cmdline_parsed.get('version'): Quit(VERSIONSTR)
4893 if len(infiles) > 1:
4894 errmsg=_("Option --%s can't be used with multiple input files")
4895 for option in NO_MULTI_INPUT:
4896 if cmdline_parsed.get(option):
4897 Error(errmsg%option)
4899 Debug("system platform: %s"%sys.platform)
4900 Debug("python version: %s"%(string.split(sys.version,'(')[0]))
4901 Debug("line break char: %s"%repr(LB))
4902 Debug("command line: %s"%sys.argv)
4903 Debug("command line raw config: %s"%CMDLINE_RAW,1)
4905 # Extract RC file config
4906 if cmdline_parsed.get('rc') == 0:
4907 Message(_("Ignoring user configuration file"),1)
4909 rc_file = get_rc_path()
4910 if os.path.isfile(rc_file):
4911 Message(_("Loading user configuration file"),1)
4912 RC_RAW = ConfigLines(file=rc_file).get_raw_config()
4914 Debug("rc file: %s"%rc_file)
4915 Debug("rc file raw config: %s"%RC_RAW,1)
4917 # Get all infiles config (if any)
4918 infiles_config = get_infiles_config(infiles)
4921 # Try to load and start GUI interface for --gui
4922 # If program was called with no arguments, try GUI also
4923 if GUI or not infiles:
4925 load_GUI_resources()
4926 Debug("GUI resources OK (Tk module is installed)")
4928 Debug("GUI display OK")
4931 Debug("GUI Error: no Tk module or no DISPLAY")
4934 # User forced --gui, but it's not available
4935 if cmdline_parsed.get('gui') and not GUI:
4936 print getTraceback(); print
4937 Error("Sorry, I can't run my Graphical Interface - GUI\n"
4938 "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
4939 "- Make sure you are in a graphical environment (like X)")
4941 # Okay, we will use GUI
4943 Message(_("We are on GUI interface"),1)
4945 # Redefine Error function to raise exception instead sys.exit()
4947 showerror(_('txt2tags ERROR!'), msg)
4950 # If no input file, get RC+cmdline config, else full config
4952 gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
4954 try : gui_conf = infiles_config[0][0]
4955 except: gui_conf = {}
4957 # Sanity is needed to set outfile and other things
4958 gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
4959 Debug("GUI config: %s"%gui_conf,5)
4961 # Insert config and populate the nice window!
4962 winbox.load_config(gui_conf)
4965 # Console mode rocks forever!
4967 Message(_("We are on Command Line interface"),1)
4969 # Called with no arguments, show error
4970 if not infiles: Error(_('Missing input file (try --help)'))
4972 convert_this_files(infiles_config)
4974 Message(_("Txt2tags finished sucessfuly"),1)
4976 if __name__ == '__main__':
4980 sys.stderr.write("%s\n"%msg)
4986 sys.stderr.write(getUnknownErrorMessage())