docs/prettify.js

   1 // Copyright (C) 2006 Google Inc.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16 /**
  17  * @fileoverview
  18  * some functions for browser-side pretty printing of code contained in html.
  19  *
  20  * The lexer should work on a number of languages including C and friends,
  21  * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles.
  22  * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but,
  23  * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or
  24  * CAML-like languages.
  25  *
  26  * If there's a language not mentioned here, then I don't know it, and don't
  27  * know whether it works.  If it has a C-like, Bash-like, or XML-like syntax
  28  * then it should work passably.
  29  *
  30  * Usage:
  31  * 1) include this source file in an html page via
  32  * <script type="text/javascript" src="/path/to/prettify.js"></script>
  33  * 2) define style rules.  See the example page for examples.
  34  * 3) mark the <pre> and <code> tags in your source with class=prettyprint.
  35  *    You can also use the (html deprecated) <xmp> tag, but the pretty printer
  36  *    needs to do more substantial DOM manipulations to support that, so some
  37  *    css styles may not be preserved.
  38  * That's it.  I wanted to keep the API as simple as possible, so there's no
  39  * need to specify which language the code is in.
  40  *
  41  * Change log:
  42  * cbeust, 2006/08/22
  43  *   Java annotations (start with "@") are now captured as literals ("lit")
  44  */
  45
  46 // JSLint declarations
  47 /*global console, document, navigator, setTimeout, window */
  48
  49 /**
  50  * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
  51  * UI events.
  52  * If set to {@code false}, {@code prettyPrint()} is synchronous.
  53  */
  54 var PR_SHOULD_USE_CONTINUATION = true;
  55
  56 /** the number of characters between tab columns */
  57 var PR_TAB_WIDTH = 8;
  58
  59 /** Walks the DOM returning a properly escaped version of innerHTML.
  60   * @param {Node} node
  61   * @param {Array.<string>} out output buffer that receives chunks of HTML.
  62   */
  63 var PR_normalizedHtml;
  64
  65 /** Contains functions for creating and registering new language handlers.
  66   * @type {Object}
  67   */
  68 var PR;
  69
  70 /** Pretty print a chunk of code.
  71   *
  72   * @param {string} sourceCodeHtml code as html
  73   * @return {string} code as html, but prettier
  74   */
  75 var prettyPrintOne;
  76 /** find all the < pre > and < code > tags in the DOM with class=prettyprint
  77   * and prettify them.
  78   * @param {Function} opt_whenDone if specified, called when the last entry
  79   *     has been finished.
  80   */
  81 var prettyPrint;
  82
  83 /** browser detection. @extern */
  84 function _pr_isIE6() {
  85   var isIE6 = navigator && navigator.userAgent &&
  86       /\bMSIE 6\./.test(navigator.userAgent);
  87   _pr_isIE6 = function () { return isIE6; };
  88   return isIE6;
  89 }
  90
  91
  92 (function () {
  93   /** Splits input on space and returns an Object mapping each non-empty part to
  94     * true.
  95     */
  96   function wordSet(words) {
  97     words = words.split(/ /g);
  98     var set = {};
  99     for (var i = words.length; --i >= 0;) {
 100       var w = words[i];
 101       if (w) { set[w] = null; }
 102     }
 103     return set;
 104   }
 105
 106   // Keyword lists for various languages.
 107   var FLOW_CONTROL_KEYWORDS =
 108       "break continue do else for if return while ";
 109   var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " +
 110       "double enum extern float goto int long register short signed sizeof " +
 111       "static struct switch typedef union unsigned void volatile ";
 112   var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " +
 113       "new operator private protected public this throw true try ";
 114   var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " +
 115       "concept concept_map const_cast constexpr decltype " +
 116       "dynamic_cast explicit export friend inline late_check " +
 117       "mutable namespace nullptr reinterpret_cast static_assert static_cast " +
 118       "template typeid typename typeof using virtual wchar_t where ";
 119   var JAVA_KEYWORDS = COMMON_KEYWORDS +
 120       "boolean byte extends final finally implements import instanceof null " +
 121       "native package strictfp super synchronized throws transient ";
 122   var CSHARP_KEYWORDS = JAVA_KEYWORDS +
 123       "as base by checked decimal delegate descending event " +
 124       "fixed foreach from group implicit in interface internal into is lock " +
 125       "object out override orderby params readonly ref sbyte sealed " +
 126       "stackalloc string select uint ulong unchecked unsafe ushort var ";
 127   var JSCRIPT_KEYWORDS = COMMON_KEYWORDS +
 128       "debugger eval export function get null set undefined var with " +
 129       "Infinity NaN ";
 130   var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " +
 131       "goto if import last local my next no our print package redo require " +
 132       "sub undef unless until use wantarray while BEGIN END ";
 133   var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " +
 134       "elif except exec finally from global import in is lambda " +
 135       "nonlocal not or pass print raise try with yield " +
 136       "False True None ";
 137   var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" +
 138       " defined elsif end ensure false in module next nil not or redo rescue " +
 139       "retry self super then true undef unless until when yield BEGIN END ";
 140   var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " +
 141       "function in local set then until ";
 142   var ALL_KEYWORDS = (
 143       CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS +
 144       PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS);
 145
 146   // token style names.  correspond to css classes
 147   /** token style for a string literal */
 148   var PR_STRING = 'str';
 149   /** token style for a keyword */
 150   var PR_KEYWORD = 'kwd';
 151   /** token style for a comment */
 152   var PR_COMMENT = 'com';
 153   /** token style for a type */
 154   var PR_TYPE = 'typ';
 155   /** token style for a literal value.  e.g. 1, null, true. */
 156   var PR_LITERAL = 'lit';
 157   /** token style for a punctuation string. */
 158   var PR_PUNCTUATION = 'pun';
 159   /** token style for a punctuation string. */
 160   var PR_PLAIN = 'pln';
 161
 162   /** token style for an sgml tag. */
 163   var PR_TAG = 'tag';
 164   /** token style for a markup declaration such as a DOCTYPE. */
 165   var PR_DECLARATION = 'dec';
 166   /** token style for embedded source. */
 167   var PR_SOURCE = 'src';
 168   /** token style for an sgml attribute name. */
 169   var PR_ATTRIB_NAME = 'atn';
 170   /** token style for an sgml attribute value. */
 171   var PR_ATTRIB_VALUE = 'atv';
 172
 173   /**
 174    * A class that indicates a section of markup that is not code, e.g. to allow
 175    * embedding of line numbers within code listings.
 176    */
 177   var PR_NOCODE = 'nocode';
 178
 179   function isWordChar(ch) {
 180     return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
 181   }
 182
 183   /** Splice one array into another.
 184     * Like the python <code>
 185     * container[containerPosition:containerPosition + countReplaced] = inserted
 186     * </code>
 187     * @param {Array} inserted
 188     * @param {Array} container modified in place
 189     * @param {Number} containerPosition
 190     * @param {Number} countReplaced
 191     */
 192   function spliceArrayInto(
 193       inserted, container, containerPosition, countReplaced) {
 194     inserted.unshift(containerPosition, countReplaced || 0);
 195     try {
 196       container.splice.apply(container, inserted);
 197     } finally {
 198       inserted.splice(0, 2);
 199     }
 200   }
 201
 202   /** A set of tokens that can precede a regular expression literal in
 203     * javascript.
 204     * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full
 205     * list, but I've removed ones that might be problematic when seen in
 206     * languages that don't support regular expression literals.
 207     *
 208     * <p>Specifically, I've removed any keywords that can't precede a regexp
 209     * literal in a syntactically legal javascript program, and I've removed the
 210     * "in" keyword since it's not a keyword in many languages, and might be used
 211     * as a count of inches.
 212     * @private
 213     */
 214   var REGEXP_PRECEDER_PATTERN = function () {
 215       var preceders = [
 216           "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=",
 217           "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=",
 218           "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";",
 219           "<", "<<", "<<=", "<=", "=", "==", "===", ">",
 220           ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[",
 221           "^", "^=", "^^", "^^=", "{", "|", "|=", "||",
 222           "||=", "~" /* handles =~ and !~ */,
 223           "break", "case", "continue", "delete",
 224           "do", "else", "finally", "instanceof",
 225           "return", "throw", "try", "typeof"
 226           ];
 227       var pattern = '(?:' +
 228           '(?:(?:^|[^0-9.])\\.{1,3})|' +  // a dot that's not part of a number
 229           '(?:(?:^|[^\\+])\\+)|' +  // allow + but not ++
 230           '(?:(?:^|[^\\-])-)';  // allow - but not --
 231       for (var i = 0; i < preceders.length; ++i) {
 232         var preceder = preceders[i];
 233         if (isWordChar(preceder.charAt(0))) {
 234           pattern += '|\\b' + preceder;
 235         } else {
 236           pattern += '|' + preceder.replace(/([^=<>:&])/g, '\\$1');
 237         }
 238       }
 239       pattern += '|^)\\s*$';  // matches at end, and matches empty string
 240       return new RegExp(pattern);
 241       // CAVEAT: this does not properly handle the case where a regular
 242       // expression immediately follows another since a regular expression may
 243       // have flags for case-sensitivity and the like.  Having regexp tokens
 244       // adjacent is not
 245       // valid in any language I'm aware of, so I'm punting.
 246       // TODO: maybe style special characters inside a regexp as punctuation.
 247     }();
 248
 249   // Define regexps here so that the interpreter doesn't have to create an
 250   // object each time the function containing them is called.
 251   // The language spec requires a new object created even if you don't access
 252   // the $1 members.
 253   var pr_amp = /&/g;
 254   var pr_lt = /</g;
 255   var pr_gt = />/g;
 256   var pr_quot = /\"/g;
 257   /** like textToHtml but escapes double quotes to be attribute safe. */
 258   function attribToHtml(str) {
 259     return str.replace(pr_amp, '&amp;')
 260         .replace(pr_lt, '&lt;')
 261         .replace(pr_gt, '&gt;')
 262         .replace(pr_quot, '&quot;');
 263   }
 264
 265   /** escapest html special characters to html. */
 266   function textToHtml(str) {
 267     return str.replace(pr_amp, '&amp;')
 268         .replace(pr_lt, '&lt;')
 269         .replace(pr_gt, '&gt;');
 270   }
 271
 272
 273   var pr_ltEnt = /&lt;/g;
 274   var pr_gtEnt = /&gt;/g;
 275   var pr_aposEnt = /&apos;/g;
 276   var pr_quotEnt = /&quot;/g;
 277   var pr_ampEnt = /&amp;/g;
 278   var pr_nbspEnt = /&nbsp;/g;
 279   /** unescapes html to plain text. */
 280   function htmlToText(html) {
 281     var pos = html.indexOf('&');
 282     if (pos < 0) { return html; }
 283     // Handle numeric entities specially.  We can't use functional substitution
 284     // since that doesn't work in older versions of Safari.
 285     // These should be rare since most browsers convert them to normal chars.
 286     for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) {
 287       var end = html.indexOf(';', pos);
 288       if (end >= 0) {
 289         var num = html.substring(pos + 3, end);
 290         var radix = 10;
 291         if (num && num.charAt(0) === 'x') {
 292           num = num.substring(1);
 293           radix = 16;
 294         }
 295         var codePoint = parseInt(num, radix);
 296         if (!isNaN(codePoint)) {
 297           html = (html.substring(0, pos) + String.fromCharCode(codePoint) +
 298                   html.substring(end + 1));
 299         }
 300       }
 301     }
 302
 303     return html.replace(pr_ltEnt, '<')
 304         .replace(pr_gtEnt, '>')
 305         .replace(pr_aposEnt, "'")
 306         .replace(pr_quotEnt, '"')
 307         .replace(pr_ampEnt, '&')
 308         .replace(pr_nbspEnt, ' ');
 309   }
 310
 311   /** is the given node's innerHTML normally unescaped? */
 312   function isRawContent(node) {
 313     return 'XMP' === node.tagName;
 314   }
 315
 316   function normalizedHtml(node, out) {
 317     switch (node.nodeType) {
 318       case 1:  // an element
 319         var name = node.tagName.toLowerCase();
 320         out.push('<', name);
 321         for (var i = 0; i < node.attributes.length; ++i) {
 322           var attr = node.attributes[i];
 323           if (!attr.specified) { continue; }
 324           out.push(' ');
 325           normalizedHtml(attr, out);
 326         }
 327         out.push('>');
 328         for (var child = node.firstChild; child; child = child.nextSibling) {
 329           normalizedHtml(child, out);
 330         }
 331         if (node.firstChild || !/^(?:br|link|img)$/.test(name)) {
 332           out.push('<\/', name, '>');
 333         }
 334         break;
 335       case 2: // an attribute
 336         out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"');
 337         break;
 338       case 3: case 4: // text
 339         out.push(textToHtml(node.nodeValue));
 340         break;
 341     }
 342   }
 343
 344   var PR_innerHtmlWorks = null;
 345   function getInnerHtml(node) {
 346     // inner html is hopelessly broken in Safari 2.0.4 when the content is
 347     // an html description of well formed XML and the containing tag is a PRE
 348     // tag, so we detect that case and emulate innerHTML.
 349     if (null === PR_innerHtmlWorks) {
 350       var testNode = document.createElement('PRE');
 351       testNode.appendChild(
 352           document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />'));
 353       PR_innerHtmlWorks = !/</.test(testNode.innerHTML);
 354     }
 355
 356     if (PR_innerHtmlWorks) {
 357       var content = node.innerHTML;
 358       // XMP tags contain unescaped entities so require special handling.
 359       if (isRawContent(node)) {
 360         content = textToHtml(content);
 361       }
 362       return content;
 363     }
 364
 365     var out = [];
 366     for (var child = node.firstChild; child; child = child.nextSibling) {
 367       normalizedHtml(child, out);
 368     }
 369     return out.join('');
 370   }
 371
 372   /** returns a function that expand tabs to spaces.  This function can be fed
 373     * successive chunks of text, and will maintain its own internal state to
 374     * keep track of how tabs are expanded.
 375     * @return {function (string) : string} a function that takes
 376     *   plain text and return the text with tabs expanded.
 377     * @private
 378     */
 379   function makeTabExpander(tabWidth) {
 380     var SPACES = '                ';
 381     var charInLine = 0;
 382
 383     return function (plainText) {
 384       // walk over each character looking for tabs and newlines.
 385       // On tabs, expand them.  On newlines, reset charInLine.
 386       // Otherwise increment charInLine
 387       var out = null;
 388       var pos = 0;
 389       for (var i = 0, n = plainText.length; i < n; ++i) {
 390         var ch = plainText.charAt(i);
 391
 392         switch (ch) {
 393           case '\t':
 394             if (!out) { out = []; }
 395             out.push(plainText.substring(pos, i));
 396             // calculate how much space we need in front of this part
 397             // nSpaces is the amount of padding -- the number of spaces needed
 398             // to move us to the next column, where columns occur at factors of
 399             // tabWidth.
 400             var nSpaces = tabWidth - (charInLine % tabWidth);
 401             charInLine += nSpaces;
 402             for (; nSpaces >= 0; nSpaces -= SPACES.length) {
 403               out.push(SPACES.substring(0, nSpaces));
 404             }
 405             pos = i + 1;
 406             break;
 407           case '\n':
 408             charInLine = 0;
 409             break;
 410           default:
 411             ++charInLine;
 412         }
 413       }
 414       if (!out) { return plainText; }
 415       out.push(plainText.substring(pos));
 416       return out.join('');
 417     };
 418   }
 419
 420   // The below pattern matches one of the following
 421   // (1) /[^<]+/ : A run of characters other than '<'
 422   // (2) /<!--.*?-->/: an HTML comment
 423   // (3) /<!\[CDATA\[.*?\]\]>/: a cdata section
 424   // (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted
 425   // (4) /</ : A '<' that does not begin a larger chunk.  Treated as 1
 426   var pr_chunkPattern =
 427   /(?:[^<]+|<!--[\s\S]*?-->|<!\[CDATA\[([\s\S]*?)\]\]>|<\/?[a-zA-Z][^>]*>|<)/g;
 428   var pr_commentPrefix = /^<!--/;
 429   var pr_cdataPrefix = /^<\[CDATA\[/;
 430   var pr_brPrefix = /^<br\b/i;
 431   var pr_tagNameRe = /^<(\/?)([a-zA-Z]+)/;
 432
 433   /** split markup into chunks of html tags (style null) and
 434     * plain text (style {@link #PR_PLAIN}), converting tags which are
 435     * significant for tokenization (<br>) into their textual equivalent.
 436     *
 437     * @param {string} s html where whitespace is considered significant.
 438     * @return {Object} source code and extracted tags.
 439     * @private
 440     */
 441   function extractTags(s) {
 442     // since the pattern has the 'g' modifier and defines no capturing groups,
 443     // this will return a list of all chunks which we then classify and wrap as
 444     // PR_Tokens
 445     var matches = s.match(pr_chunkPattern);
 446     var sourceBuf = [];
 447     var sourceBufLen = 0;
 448     var extractedTags = [];
 449     if (matches) {
 450       for (var i = 0, n = matches.length; i < n; ++i) {
 451         var match = matches[i];
 452         if (match.length > 1 && match.charAt(0) === '<') {
 453           if (pr_commentPrefix.test(match)) { continue; }
 454           if (pr_cdataPrefix.test(match)) {
 455             // strip CDATA prefix and suffix.  Don't unescape since it's CDATA
 456             sourceBuf.push(match.substring(9, match.length - 3));
 457             sourceBufLen += match.length - 12;
 458           } else if (pr_brPrefix.test(match)) {
 459             // <br> tags are lexically significant so convert them to text.
 460             // This is undone later.
 461             sourceBuf.push('\n');
 462             ++sourceBufLen;
 463           } else {
 464             if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) {
 465               // A <span class="nocode"> will start a section that should be
 466               // ignored.  Continue walking the list until we see a matching end
 467               // tag.
 468               var name = match.match(pr_tagNameRe)[2];
 469               var depth = 1;
 470               end_tag_loop:
 471               for (var j = i + 1; j < n; ++j) {
 472                 var name2 = matches[j].match(pr_tagNameRe);
 473                 if (name2 && name2[2] === name) {
 474                   if (name2[1] === '/') {
 475                     if (--depth === 0) { break end_tag_loop; }
 476                   } else {
 477                     ++depth;
 478                   }
 479                 }
 480               }
 481               if (j < n) {
 482                 extractedTags.push(
 483                     sourceBufLen, matches.slice(i, j + 1).join(''));
 484                 i = j;
 485               } else {  // Ignore unclosed sections.
 486                 extractedTags.push(sourceBufLen, match);
 487               }
 488             } else {
 489               extractedTags.push(sourceBufLen, match);
 490             }
 491           }
 492         } else {
 493           var literalText = htmlToText(match);
 494           sourceBuf.push(literalText);
 495           sourceBufLen += literalText.length;
 496         }
 497       }
 498     }
 499     return { source: sourceBuf.join(''), tags: extractedTags };
 500   }
 501
 502   /** True if the given tag contains a class attribute with the nocode class. */
 503   function isNoCodeTag(tag) {
 504     return !!tag
 505         // First canonicalize the representation of attributes
 506         .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g,
 507                  ' $1="$2$3$4"')
 508         // Then look for the attribute we want.
 509         .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/);
 510   }
 511
 512   /** Given triples of [style, pattern, context] returns a lexing function,
 513     * The lexing function interprets the patterns to find token boundaries and
 514     * returns a decoration list of the form
 515     * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
 516     * where index_n is an index into the sourceCode, and style_n is a style
 517     * constant like PR_PLAIN.  index_n-1 <= index_n, and style_n-1 applies to
 518     * all characters in sourceCode[index_n-1:index_n].
 519     *
 520     * The stylePatterns is a list whose elements have the form
 521     * [style : string, pattern : RegExp, context : RegExp, shortcut : string].
 522     &
 523     * Style is a style constant like PR_PLAIN.
 524     *
 525     * Pattern must only match prefixes, and if it matches a prefix and context
 526     * is null or matches the last non-comment token parsed, then that match is
 527     * considered a token with the same style.
 528     *
 529     * Context is applied to the last non-whitespace, non-comment token
 530     * recognized.
 531     *
 532     * Shortcut is an optional string of characters, any of which, if the first
 533     * character, gurantee that this pattern and only this pattern matches.
 534     *
 535     * @param {Array} shortcutStylePatterns patterns that always start with
 536     *   a known character.  Must have a shortcut string.
 537     * @param {Array} fallthroughStylePatterns patterns that will be tried in
 538     *   order if the shortcut ones fail.  May have shortcuts.
 539     *
 540     * @return {function (string, number?) : Array.<number|string>} a
 541     *   function that takes source code and returns a list of decorations.
 542     */
 543   function createSimpleLexer(shortcutStylePatterns,
 544                              fallthroughStylePatterns) {
 545     var shortcuts = {};
 546     (function () {
 547       var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
 548       for (var i = allPatterns.length; --i >= 0;) {
 549         var patternParts = allPatterns[i];
 550         var shortcutChars = patternParts[3];
 551         if (shortcutChars) {
 552           for (var c = shortcutChars.length; --c >= 0;) {
 553             shortcuts[shortcutChars.charAt(c)] = patternParts;
 554           }
 555         }
 556       }
 557     })();
 558
 559     var nPatterns = fallthroughStylePatterns.length;
 560     var notWs = /\S/;
 561
 562     return function (sourceCode, opt_basePos) {
 563       opt_basePos = opt_basePos || 0;
 564       var decorations = [opt_basePos, PR_PLAIN];
 565       var lastToken = '';
 566       var pos = 0;  // index into sourceCode
 567       var tail = sourceCode;
 568
 569       while (tail.length) {
 570         var style;
 571         var token = null;
 572         var match;
 573
 574         var patternParts = shortcuts[tail.charAt(0)];
 575         if (patternParts) {
 576           match = tail.match(patternParts[1]);
 577           token = match[0];
 578           style = patternParts[0];
 579         } else {
 580           for (var i = 0; i < nPatterns; ++i) {
 581             patternParts = fallthroughStylePatterns[i];
 582             var contextPattern = patternParts[2];
 583             if (contextPattern && !contextPattern.test(lastToken)) {
 584               // rule can't be used
 585               continue;
 586             }
 587             match = tail.match(patternParts[1]);
 588             if (match) {
 589               token = match[0];
 590               style = patternParts[0];
 591               break;
 592             }
 593           }
 594
 595           if (!token) {  // make sure that we make progress
 596             style = PR_PLAIN;
 597             token = tail.substring(0, 1);
 598           }
 599         }
 600
 601         decorations.push(opt_basePos + pos, style);
 602         pos += token.length;
 603         tail = tail.substring(token.length);
 604         if (style !== PR_COMMENT && notWs.test(token)) { lastToken = token; }
 605       }
 606       return decorations;
 607     };
 608   }
 609
 610   var PR_MARKUP_LEXER = createSimpleLexer([], [
 611       [PR_PLAIN,       /^[^<]+/, null],
 612       [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null],
 613       [PR_COMMENT,     /^<!--[\s\S]*?(?:-->|$)/, null],
 614       [PR_SOURCE,      /^<\?[\s\S]*?(?:\?>|$)/, null],
 615       [PR_SOURCE,      /^<%[\s\S]*?(?:%>|$)/, null],
 616       [PR_SOURCE,
 617        // Tags whose content is not escaped, and which contain source code.
 618        /^<(script|style|xmp)\b[^>]*>[\s\S]*?<\/\1\b[^>]*>/i, null],
 619       [PR_TAG,         /^<\/?\w[^<>]*>/, null]
 620       ]);
 621   // Splits any of the source|style|xmp entries above into a start tag,
 622   // source content, and end tag.
 623   var PR_SOURCE_CHUNK_PARTS = /^(<[^>]*>)([\s\S]*)(<\/[^>]*>)$/;
 624   /** split markup on tags, comments, application directives, and other top
 625     * level constructs.  Tags are returned as a single token - attributes are
 626     * not yet broken out.
 627     * @private
 628     */
 629   function tokenizeMarkup(source) {
 630     var decorations = PR_MARKUP_LEXER(source);
 631     for (var i = 0; i < decorations.length; i += 2) {
 632       if (decorations[i + 1] === PR_SOURCE) {
 633         var start, end;
 634         start = decorations[i];
 635         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
 636         // Split out start and end script tags as actual tags, and leave the
 637         // body with style SCRIPT.
 638         var sourceChunk = source.substring(start, end);
 639         var match = sourceChunk.match(PR_SOURCE_CHUNK_PARTS);
 640         if (match) {
 641           decorations.splice(
 642               i, 2,
 643               start, PR_TAG,  // the open chunk
 644               start + match[1].length, PR_SOURCE,
 645               start + match[1].length + (match[2] || '').length, PR_TAG);
 646         }
 647       }
 648     }
 649     return decorations;
 650   }
 651
 652   var PR_TAG_LEXER = createSimpleLexer([
 653       [PR_ATTRIB_VALUE, /^\'[^\']*(?:\'|$)/, null, "'"],
 654       [PR_ATTRIB_VALUE, /^\"[^\"]*(?:\"|$)/, null, '"'],
 655       [PR_PUNCTUATION,  /^[<>\/=]+/, null, '<>/=']
 656       ], [
 657       [PR_TAG,          /^[\w:\-]+/, /^</],
 658       [PR_ATTRIB_VALUE, /^[\w\-]+/, /^=/],
 659       [PR_ATTRIB_NAME,  /^[\w:\-]+/, null],
 660       [PR_PLAIN,        /^\s+/, null, ' \t\r\n']
 661       ]);
 662   /** split tags attributes and their values out from the tag name, and
 663     * recursively lex source chunks.
 664     * @private
 665     */
 666   function splitTagAttributes(source, decorations) {
 667     for (var i = 0; i < decorations.length; i += 2) {
 668       var style = decorations[i + 1];
 669       if (style === PR_TAG) {
 670         var start, end;
 671         start = decorations[i];
 672         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
 673         var chunk = source.substring(start, end);
 674         var subDecorations = PR_TAG_LEXER(chunk, start);
 675         spliceArrayInto(subDecorations, decorations, i, 2);
 676         i += subDecorations.length - 2;
 677       }
 678     }
 679     return decorations;
 680   }
 681
 682   /** returns a function that produces a list of decorations from source text.
 683     *
 684     * This code treats ", ', and ` as string delimiters, and \ as a string
 685     * escape.  It does not recognize perl's qq() style strings.
 686     * It has no special handling for double delimiter escapes as in basic, or
 687     * the tripled delimiters used in python, but should work on those regardless
 688     * although in those cases a single string literal may be broken up into
 689     * multiple adjacent string literals.
 690     *
 691     * It recognizes C, C++, and shell style comments.
 692     *
 693     * @param {Object} options a set of optional parameters.
 694     * @return {function (string) : Array.<string|number>} a
 695     *     decorator that takes sourceCode as plain text and that returns a
 696     *     decoration list
 697     */
 698   function sourceDecorator(options) {
 699     var shortcutStylePatterns = [], fallthroughStylePatterns = [];
 700     if (options.tripleQuotedStrings) {
 701       // '''multi-line-string''', 'single-line-string', and double-quoted
 702       shortcutStylePatterns.push(
 703           [PR_STRING,  /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
 704            null, '\'"']);
 705     } else if (options.multiLineStrings) {
 706       // 'multi-line-string', "multi-line-string"
 707       shortcutStylePatterns.push(
 708           [PR_STRING,  /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
 709            null, '\'"`']);
 710     } else {
 711       // 'single-line-string', "single-line-string"
 712       shortcutStylePatterns.push(
 713           [PR_STRING,
 714            /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
 715            null, '"\'']);
 716     }
 717     fallthroughStylePatterns.push(
 718         [PR_PLAIN,   /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']);
 719     if (options.hashComments) {
 720       shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
 721     }
 722     if (options.cStyleComments) {
 723       fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
 724       fallthroughStylePatterns.push(
 725           [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
 726     }
 727     if (options.regexLiterals) {
 728       var REGEX_LITERAL = (
 729           // A regular expression literal starts with a slash that is
 730           // not followed by * or / so that it is not confused with
 731           // comments.
 732           '^/(?=[^/*])'
 733           // and then contains any number of raw characters,
 734           + '(?:[^/\\x5B\\x5C]'
 735           // escape sequences (\x5C),
 736           +    '|\\x5C[\\s\\S]'
 737           // or non-nesting character sets (\x5B\x5D);
 738           +    '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'
 739           // finally closed by a /.
 740           + '(?:/|$)');
 741       fallthroughStylePatterns.push(
 742           [PR_STRING, new RegExp(REGEX_LITERAL), REGEXP_PRECEDER_PATTERN]);
 743     }
 744
 745     var keywords = wordSet(options.keywords);
 746
 747     options = null;
 748
 749     /** splits the given string into comment, string, and "other" tokens.
 750       * @param {string} sourceCode as plain text
 751       * @return {Array.<number|string>} a decoration list.
 752       * @private
 753       */
 754     var splitStringAndCommentTokens = createSimpleLexer(
 755         shortcutStylePatterns, fallthroughStylePatterns);
 756
 757     var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [
 758         [PR_PLAIN,       /^\s+/, null, ' \r\n'],
 759         // TODO(mikesamuel): recognize non-latin letters and numerals in idents
 760         [PR_PLAIN,       /^[a-z_$@][a-z_$@0-9]*/i, null],
 761         // A hex number
 762         [PR_LITERAL,     /^0x[a-f0-9]+[a-z]/i, null],
 763         // An octal or decimal number, possibly in scientific notation
 764         [PR_LITERAL,
 765          /^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i,
 766          null, '123456789'],
 767         [PR_PUNCTUATION, /^[^\s\w\.$@]+/, null]
 768         // Fallback will handle decimal points not adjacent to a digit
 769       ]);
 770
 771     /** splits plain text tokens into more specific tokens, and then tries to
 772       * recognize keywords, and types.
 773       * @private
 774       */
 775     function splitNonStringNonCommentTokens(source, decorations) {
 776       for (var i = 0; i < decorations.length; i += 2) {
 777         var style = decorations[i + 1];
 778         if (style === PR_PLAIN) {
 779           var start, end, chunk, subDecs;
 780           start = decorations[i];
 781           end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
 782           chunk = source.substring(start, end);
 783           subDecs = styleLiteralIdentifierPuncRecognizer(chunk, start);
 784           for (var j = 0, m = subDecs.length; j < m; j += 2) {
 785             var subStyle = subDecs[j + 1];
 786             if (subStyle === PR_PLAIN) {
 787               var subStart = subDecs[j];
 788               var subEnd = j + 2 < m ? subDecs[j + 2] : chunk.length;
 789               var token = source.substring(subStart, subEnd);
 790               if (token === '.') {
 791                 subDecs[j + 1] = PR_PUNCTUATION;
 792               } else if (token in keywords) {
 793                 subDecs[j + 1] = PR_KEYWORD;
 794               } else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) {
 795                 // classify types and annotations using Java's style conventions
 796                 subDecs[j + 1] = token.charAt(0) === '@' ? PR_LITERAL : PR_TYPE;
 797               }
 798             }
 799           }
 800           spliceArrayInto(subDecs, decorations, i, 2);
 801           i += subDecs.length - 2;
 802         }
 803       }
 804       return decorations;
 805     }
 806
 807     return function (sourceCode) {
 808       // Split into strings, comments, and other.
 809       // We do this because strings and comments are easily recognizable and can
 810       // contain stuff that looks like other tokens, so we want to mark those
 811       // early so we don't recurse into them.
 812       var decorations = splitStringAndCommentTokens(sourceCode);
 813
 814       // Split non comment|string tokens on whitespace and word boundaries
 815       decorations = splitNonStringNonCommentTokens(sourceCode, decorations);
 816
 817       return decorations;
 818     };
 819   }
 820
 821   var decorateSource = sourceDecorator({
 822         keywords: ALL_KEYWORDS,
 823         hashComments: true,
 824         cStyleComments: true,
 825         multiLineStrings: true,
 826         regexLiterals: true
 827       });
 828
 829   /** identify regions of markup that are really source code, and recursivley
 830     * lex them.
 831     * @private
 832     */
 833   function splitSourceNodes(source, decorations) {
 834     for (var i = 0; i < decorations.length; i += 2) {
 835       var style = decorations[i + 1];
 836       if (style === PR_SOURCE) {
 837         // Recurse using the non-markup lexer
 838         var start, end;
 839         start = decorations[i];
 840         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
 841         var subDecorations = decorateSource(source.substring(start, end));
 842         for (var j = 0, m = subDecorations.length; j < m; j += 2) {
 843           subDecorations[j] += start;
 844         }
 845         spliceArrayInto(subDecorations, decorations, i, 2);
 846         i += subDecorations.length - 2;
 847       }
 848     }
 849     return decorations;
 850   }
 851
 852   /** identify attribute values that really contain source code and recursively
 853     * lex them.
 854     * @private
 855     */
 856   function splitSourceAttributes(source, decorations) {
 857     var nextValueIsSource = false;
 858     for (var i = 0; i < decorations.length; i += 2) {
 859       var style = decorations[i + 1];
 860       var start, end;
 861       if (style === PR_ATTRIB_NAME) {
 862         start = decorations[i];
 863         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
 864         nextValueIsSource = /^on|^style$/i.test(source.substring(start, end));
 865       } else if (style === PR_ATTRIB_VALUE) {
 866         if (nextValueIsSource) {
 867           start = decorations[i];
 868           end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
 869           var attribValue = source.substring(start, end);
 870           var attribLen = attribValue.length;
 871           var quoted =
 872               (attribLen >= 2 && /^[\"\']/.test(attribValue) &&
 873                attribValue.charAt(0) === attribValue.charAt(attribLen - 1));
 874
 875           var attribSource;
 876           var attribSourceStart;
 877           var attribSourceEnd;
 878           if (quoted) {
 879             attribSourceStart = start + 1;
 880             attribSourceEnd = end - 1;
 881             attribSource = attribValue;
 882           } else {
 883             attribSourceStart = start + 1;
 884             attribSourceEnd = end - 1;
 885             attribSource = attribValue.substring(1, attribValue.length - 1);
 886           }
 887
 888           var attribSourceDecorations = decorateSource(attribSource);
 889           for (var j = 0, m = attribSourceDecorations.length; j < m; j += 2) {
 890             attribSourceDecorations[j] += attribSourceStart;
 891           }
 892
 893           if (quoted) {
 894             attribSourceDecorations.push(attribSourceEnd, PR_ATTRIB_VALUE);
 895             spliceArrayInto(attribSourceDecorations, decorations, i + 2, 0);
 896           } else {
 897             spliceArrayInto(attribSourceDecorations, decorations, i, 2);
 898           }
 899         }
 900         nextValueIsSource = false;
 901       }
 902     }
 903     return decorations;
 904   }
 905
 906   /** returns a decoration list given a string of markup.
 907     *
 908     * This code recognizes a number of constructs.
 909     * <!-- ... --> comment
 910     * <!\w ... >   declaration
 911     * <\w ... >    tag
 912     * </\w ... >   tag
 913     * <?...?>      embedded source
 914     * <%...%>      embedded source
 915     * &[#\w]...;   entity
 916     *
 917     * It does not recognizes %foo; doctype entities from  .
 918     *
 919     * It will recurse into any <style>, <script>, and on* attributes using
 920     * PR_lexSource.
 921     */
 922   function decorateMarkup(sourceCode) {
 923     // This function works as follows:
 924     // 1) Start by splitting the markup into text and tag chunks
 925     //    Input:  string s
 926     //    Output: List<PR_Token> where style in (PR_PLAIN, null)
 927     // 2) Then split the text chunks further into comments, declarations,
 928     //    tags, etc.
 929     //    After each split, consider whether the token is the start of an
 930     //    embedded source section, i.e. is an open <script> tag.  If it is, find
 931     //    the corresponding close token, and don't bother to lex in between.
 932     //    Input:  List<string>
 933     //    Output: List<PR_Token> with style in
 934     //            (PR_TAG, PR_PLAIN, PR_SOURCE, null)
 935     // 3) Finally go over each tag token and split out attribute names and
 936     //    values.
 937     //    Input:  List<PR_Token>
 938     //    Output: List<PR_Token> where style in
 939     //            (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null)
 940     var decorations = tokenizeMarkup(sourceCode);
 941     decorations = splitTagAttributes(sourceCode, decorations);
 942     decorations = splitSourceNodes(sourceCode, decorations);
 943     decorations = splitSourceAttributes(sourceCode, decorations);
 944     return decorations;
 945   }
 946
 947   /**
 948     * @param {string} sourceText plain text
 949     * @param {Array.<number|string>} extractedTags chunks of raw html preceded
 950     *   by their position in sourceText in order.
 951     * @param {Array.<number|string>} decorations style classes preceded by their
 952     *   position in sourceText in order.
 953     * @return {string} html
 954     * @private
 955     */
 956   function recombineTagsAndDecorations(sourceText, extractedTags, decorations) {
 957     var html = [];
 958     // index past the last char in sourceText written to html
 959     var outputIdx = 0;
 960
 961     var openDecoration = null;
 962     var currentDecoration = null;
 963     var tagPos = 0;  // index into extractedTags
 964     var decPos = 0;  // index into decorations
 965     var tabExpander = makeTabExpander(PR_TAB_WIDTH);
 966
 967     var adjacentSpaceRe = /([\r\n ]) /g;
 968     var startOrSpaceRe = /(^| ) /gm;
 969     var newlineRe = /\r\n?|\n/g;
 970     var trailingSpaceRe = /[ \r\n]$/;
 971     var lastWasSpace = true;  // the last text chunk emitted ended with a space.
 972
 973     // A helper function that is responsible for opening sections of decoration
 974     // and outputing properly escaped chunks of source
 975     function emitTextUpTo(sourceIdx) {
 976       if (sourceIdx > outputIdx) {
 977         if (openDecoration && openDecoration !== currentDecoration) {
 978           // Close the current decoration
 979           html.push('</span>');
 980           openDecoration = null;
 981         }
 982         if (!openDecoration && currentDecoration) {
 983           openDecoration = currentDecoration;
 984           html.push('<span class="', openDecoration, '">');
 985         }
 986         // This interacts badly with some wikis which introduces paragraph tags
 987         // into pre blocks for some strange reason.
 988         // It's necessary for IE though which seems to lose the preformattedness
 989         // of <pre> tags when their innerHTML is assigned.
 990         // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html
 991         // and it serves to undo the conversion of <br>s to newlines done in
 992         // chunkify.
 993         var htmlChunk = textToHtml(
 994             tabExpander(sourceText.substring(outputIdx, sourceIdx)))
 995             .replace(lastWasSpace
 996                      ? startOrSpaceRe
 997                      : adjacentSpaceRe, '$1&nbsp;');
 998         // Keep track of whether we need to escape space at the beginning of the
 999         // next chunk.
1000         lastWasSpace = trailingSpaceRe.test(htmlChunk);
1001         html.push(htmlChunk.replace(newlineRe, '<br />'));
1002         outputIdx = sourceIdx;
1003       }
1004     }
1005
1006     while (true) {
1007       // Determine if we're going to consume a tag this time around.  Otherwise
1008       // we consume a decoration or exit.
1009       var outputTag;
1010       if (tagPos < extractedTags.length) {
1011         if (decPos < decorations.length) {
1012           // Pick one giving preference to extractedTags since we shouldn't open
1013           // a new style that we're going to have to immediately close in order
1014           // to output a tag.
1015           outputTag = extractedTags[tagPos] <= decorations[decPos];
1016         } else {
1017           outputTag = true;
1018         }
1019       } else {
1020         outputTag = false;
1021       }
1022       // Consume either a decoration or a tag or exit.
1023       if (outputTag) {
1024         emitTextUpTo(extractedTags[tagPos]);
1025         if (openDecoration) {
1026           // Close the current decoration
1027           html.push('</span>');
1028           openDecoration = null;
1029         }
1030         html.push(extractedTags[tagPos + 1]);
1031         tagPos += 2;
1032       } else if (decPos < decorations.length) {
1033         emitTextUpTo(decorations[decPos]);
1034         currentDecoration = decorations[decPos + 1];
1035         decPos += 2;
1036       } else {
1037         break;
1038       }
1039     }
1040     emitTextUpTo(sourceText.length);
1041     if (openDecoration) {
1042       html.push('</span>');
1043     }
1044
1045     return html.join('');
1046   }
1047
1048   /** Maps language-specific file extensions to handlers. */
1049   var langHandlerRegistry = {};
1050   /** Register a language handler for the given file extensions.
1051     * @param {function (string) : Array.<number|string>} handler
1052     *     a function from source code to a list of decorations.
1053     * @param {Array.<string>} fileExtensions
1054     */
1055   function registerLangHandler(handler, fileExtensions) {
1056     for (var i = fileExtensions.length; --i >= 0;) {
1057       var ext = fileExtensions[i];
1058       if (!langHandlerRegistry.hasOwnProperty(ext)) {
1059         langHandlerRegistry[ext] = handler;
1060       } else if ('console' in window) {
1061         console.log('cannot override language handler %s', ext);
1062       }
1063     }
1064   }
1065   registerLangHandler(decorateSource, ['default-code']);
1066   registerLangHandler(decorateMarkup,
1067                       ['default-markup', 'html', 'htm', 'xhtml', 'xml', 'xsl']);
1068   registerLangHandler(sourceDecorator({
1069           keywords: CPP_KEYWORDS,
1070           hashComments: true,
1071           cStyleComments: true
1072         }), ['c', 'cc', 'cpp', 'cxx', 'cyc']);
1073   registerLangHandler(sourceDecorator({
1074           keywords: CSHARP_KEYWORDS,
1075           hashComments: true,
1076           cStyleComments: true
1077         }), ['cs']);
1078   registerLangHandler(sourceDecorator({
1079           keywords: JAVA_KEYWORDS,
1080           cStyleComments: true
1081         }), ['java']);
1082   registerLangHandler(sourceDecorator({
1083           keywords: SH_KEYWORDS,
1084           hashComments: true,
1085           multiLineStrings: true
1086         }), ['bsh', 'csh', 'sh']);
1087   registerLangHandler(sourceDecorator({
1088           keywords: PYTHON_KEYWORDS,
1089           hashComments: true,
1090           multiLineStrings: true,
1091           tripleQuotedStrings: true
1092         }), ['cv', 'py']);
1093   registerLangHandler(sourceDecorator({
1094           keywords: PERL_KEYWORDS,
1095           hashComments: true,
1096           multiLineStrings: true,
1097           regexLiterals: true
1098         }), ['perl', 'pl', 'pm']);
1099   registerLangHandler(sourceDecorator({
1100           keywords: RUBY_KEYWORDS,
1101           hashComments: true,
1102           multiLineStrings: true,
1103           regexLiterals: true
1104         }), ['rb']);
1105   registerLangHandler(sourceDecorator({
1106           keywords: JSCRIPT_KEYWORDS,
1107           cStyleComments: true,
1108           regexLiterals: true
1109         }), ['js']);
1110
1111   function prettyPrintOne(sourceCodeHtml, opt_langExtension) {
1112     try {
1113       // Extract tags, and convert the source code to plain text.
1114       var sourceAndExtractedTags = extractTags(sourceCodeHtml);
1115       /** Plain text. @type {string} */
1116       var source = sourceAndExtractedTags.source;
1117
1118       /** Even entries are positions in source in ascending order.  Odd entries
1119         * are tags that were extracted at that position.
1120         * @type {Array.<number|string>}
1121         */
1122       var extractedTags = sourceAndExtractedTags.tags;
1123
1124       // Pick a lexer and apply it.
1125       if (!langHandlerRegistry.hasOwnProperty(opt_langExtension)) {
1126         // Treat it as markup if the first non whitespace character is a < and
1127         // the last non-whitespace character is a >.
1128         opt_langExtension =
1129             /^\s*</.test(source) ? 'default-markup' : 'default-code';
1130       }
1131
1132       /** Even entries are positions in source in ascending order.  Odd enties
1133         * are style markers (e.g., PR_COMMENT) that run from that position until
1134         * the end.
1135         * @type {Array.<number|string>}
1136         */
1137       var decorations = langHandlerRegistry[opt_langExtension].call({}, source);
1138
1139       // Integrate the decorations and tags back into the source code to produce
1140       // a decorated html string.
1141       return recombineTagsAndDecorations(source, extractedTags, decorations);
1142     } catch (e) {
1143       if ('console' in window) {
1144         console.log(e);
1145         console.trace();
1146       }
1147       return sourceCodeHtml;
1148     }
1149   }
1150
1151   function prettyPrint(opt_whenDone) {
1152     var isIE6 = _pr_isIE6();
1153
1154     // fetch a list of nodes to rewrite
1155     var codeSegments = [
1156         document.getElementsByTagName('pre'),
1157         document.getElementsByTagName('code'),
1158         document.getElementsByTagName('xmp') ];
1159     var elements = [];
1160     for (var i = 0; i < codeSegments.length; ++i) {
1161       for (var j = 0; j < codeSegments[i].length; ++j) {
1162         elements.push(codeSegments[i][j]);
1163       }
1164     }
1165     codeSegments = null;
1166
1167     // the loop is broken into a series of continuations to make sure that we
1168     // don't make the browser unresponsive when rewriting a large page.
1169     var k = 0;
1170
1171     function doWork() {
1172       var endTime = (PR_SHOULD_USE_CONTINUATION ?
1173                      new Date().getTime() + 250 /* ms */ :
1174                      Infinity);
1175       for (; k < elements.length && new Date().getTime() < endTime; k++) {
1176         var cs = elements[k];
1177         if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
1178           // If the classes includes a language extensions, use it.
1179           // Language extensions can be specified like
1180           //     <pre class="prettyprint lang-cpp">
1181           // the language extension "cpp" is used to find a language handler as
1182           // passed to PR_registerLangHandler.
1183           var langExtension = cs.className.match(/\blang-(\w+)\b/);
1184           if (langExtension) { langExtension = langExtension[1]; }
1185
1186           // make sure this is not nested in an already prettified element
1187           var nested = false;
1188           for (var p = cs.parentNode; p; p = p.parentNode) {
1189             if ((p.tagName === 'pre' || p.tagName === 'code' ||
1190                  p.tagName === 'xmp') &&
1191                 p.className && p.className.indexOf('prettyprint') >= 0) {
1192               nested = true;
1193               break;
1194             }
1195           }
1196           if (!nested) {
1197             // fetch the content as a snippet of properly escaped HTML.
1198             // Firefox adds newlines at the end.
1199             var content = getInnerHtml(cs);
1200             content = content.replace(/(?:\r\n?|\n)$/, '');
1201
1202             // do the pretty printing
1203             var newContent = prettyPrintOne(content, langExtension);
1204
1205             // push the prettified html back into the tag.
1206             if (!isRawContent(cs)) {
1207               // just replace the old html with the new
1208               cs.innerHTML = newContent;
1209             } else {
1210               // we need to change the tag to a <pre> since <xmp>s do not allow
1211               // embedded tags such as the span tags used to attach styles to
1212               // sections of source code.
1213               var pre = document.createElement('PRE');
1214               for (var i = 0; i < cs.attributes.length; ++i) {
1215                 var a = cs.attributes[i];
1216                 if (a.specified) {
1217                   var aname = a.name.toLowerCase();
1218                   if (aname === 'class') {
1219                     pre.className = a.value;  // For IE 6
1220                   } else {
1221                     pre.setAttribute(a.name, a.value);
1222                   }
1223                 }
1224               }
1225               pre.innerHTML = newContent;
1226
1227               // remove the old
1228               cs.parentNode.replaceChild(pre, cs);
1229               cs = pre;
1230             }
1231
1232             // Replace <br>s with line-feeds so that copying and pasting works
1233             // on IE 6.
1234             // Doing this on other browsers breaks lots of stuff since \r\n is
1235             // treated as two newlines on Firefox, and doing this also slows
1236             // down rendering.
1237             if (isIE6 && cs.tagName === 'PRE') {
1238               var lineBreaks = cs.getElementsByTagName('br');
1239               for (var j = lineBreaks.length; --j >= 0;) {
1240                 var lineBreak = lineBreaks[j];
1241                 lineBreak.parentNode.replaceChild(
1242                     document.createTextNode('\r\n'), lineBreak);
1243               }
1244             }
1245           }
1246         }
1247       }
1248       if (k < elements.length) {
1249         // finish up in a continuation
1250         setTimeout(doWork, 250);
1251       } else if (opt_whenDone) {
1252         opt_whenDone();
1253       }
1254     }
1255
1256     doWork();
1257   }
1258
1259   window['PR_normalizedHtml'] = normalizedHtml;
1260   window['prettyPrintOne'] = prettyPrintOne;
1261   window['prettyPrint'] = prettyPrint;
1262   window['PR'] = {
1263         'createSimpleLexer': createSimpleLexer,
1264         'registerLangHandler': registerLangHandler,
1265         'sourceDecorator': sourceDecorator,
1266         'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
1267         'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
1268         'PR_COMMENT': PR_COMMENT,
1269         'PR_DECLARATION': PR_DECLARATION,
1270         'PR_KEYWORD': PR_KEYWORD,
1271         'PR_LITERAL': PR_LITERAL,
1272         'PR_NOCODE': PR_NOCODE,
1273         'PR_PLAIN': PR_PLAIN,
1274         'PR_PUNCTUATION': PR_PUNCTUATION,
1275         'PR_SOURCE': PR_SOURCE,
1276         'PR_STRING': PR_STRING,
1277         'PR_TAG': PR_TAG,
1278         'PR_TYPE': PR_TYPE
1279       };
1280 })();