Zanurkuj w Pythonie/Źródła/colorize.py

Z Wikibooks, biblioteki wolnych podręczników.
"""
Colorize Python program listings embedded in HTML pages

This script is used during the build process of "Dive Into Python"
(http://diveintopython.org/) to recreate syntax highlighting of the Python
program listings and code examples embedded in the HTML pages by wrapping
Python keywords in <span> or <font> tags.  The following
tags are presumed to contain Python code:
  <pre class="programlisting">...</pre>
  <span class="userinput">...</span>
(These tags are generated automatically by the DocBook XSL stylesheets when
the book is transformed from XML to HTML.)

Looks for 2 arguments on the command line.  The first argument is a file or directory.
If a file, the file is processed; if a directory, all .html files in the directory
are processed.

The second argument, if given, is a flag for the type of tags to wrap around
keywords.
  0 (default) - use <span class="xxx"> tags, where xxx in
        ('comment', 'string', 'keyword', 'function', 'class').  Actual
        syntax highlighting must be defined in a <style> definition
        elsewhere in the document, or in an external style sheet.
  1 - use <font> tags.  See ColorizeParser.fontDataMap for the color values.
  
If no arguments are given, a test suite is performed on a hard-coded test file
which saves the output to a temporary file and opens it in a web browser locally.

Not safe to run on the same file(s) more than once, since it does not check for
existing <span> or <font> tags in the program listings.
"""

__author__ = "Mark Pilgrim (mark@diveintopython.org)"
__version__ = "$Revision: 1.3 $"
__date__ = "$Date: 2004/05/05 21:57:19 $"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"

import sys
import os
from BaseHTMLProcessor import BaseHTMLProcessor
import pyfontify

class ColorizeParser(BaseHTMLProcessor):
  fontDataMap = {"comment":("<font color='green'><i>", "</i></font>"),
           "string":("<font color='olive'>", "</font>"),
           "keyword":("<font color='navy'><b>", "</b></font>"),
           "function":("<font color='teal'><b>", "</b></font>"),
           "class":("<font color='blue'><b>", "</b></font>")}

  def __init__(self, usefonts=0):
    BaseHTMLProcessor.__init__(self)
    self.usefonts = usefonts

  def reset(self):
    BaseHTMLProcessor.reset(self)
    self.colorindex = 0
    self.needcolor = 0
    
  def HTMLfontify(self, text):
    fontmap = pyfontify.fontify(text)
    fontmap.reverse()
    for token, start, end, dummy in fontmap:
      if self.usefonts:
        text = "%s%s%s%s%s" % (text[:start], self.fontDataMap[token][0], text[start:end], \
                     self.fontDataMap[token][1], text[end:])
      else:
        text = "%s<span class='py%s'>%s</span>%s" % (text[:start], token, text[start:end], text[end:])
    return text

  def flushcolor(self):
    if self.colorindex:
      buffer = "".join(self.pieces[self.colorindex:])
      self.pieces = self.pieces[:self.colorindex]
      self.colorindex = 0
      BaseHTMLProcessor.handle_data(self, self.HTMLfontify(buffer))

  def unknown_starttag(self, tag, attrs):
    self.flushcolor()
    BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
    if self.needcolor:
      self.colorindex = len(self.pieces)

  def unknown_endtag(self, tag):
    self.flushcolor()
    BaseHTMLProcessor.unknown_endtag(self, tag)
    if self.needcolor:
      self.colorindex = len(self.pieces)

  def start_pre(self, attrs):
    self.unknown_starttag("pre", attrs)
    if ("class", "programlisting") in attrs:
      self.needcolor = 1
      self.colorindex = len(self.pieces)

  def end_pre(self):
    self.needcolor = 0
    self.unknown_endtag("pre")
    
  def start_span(self, attrs):
    self.unknown_starttag("span", attrs)
    if ("class", "userinput") in attrs:
      self.needcolor = 1
      self.colorindex = len(self.pieces)

  def end_span(self):
    self.needcolor = 0
    self.unknown_endtag("span")
    
def process(filename, usefonts=0, outfile=None):
  if not outfile:
    outfile = filename
  sock = open(filename, "r")
  parser = ColorizeParser(usefonts)
  parser.feed(sock.read())
  output = parser.output()
  sock.close()
  sock = open(outfile, "w")
  sock.write(output)
  sock.close()
  return output

def test(filename, usefonts=0, outfile="c:\\out.html"):
  output = process(filename, usefonts, outfile)
##  print output
  import webbrowser
  webbrowser.open(outfile)

if __name__ == "__main__":
  if sys.argv[1:]:
    filedir = sys.argv[1]
    usefonts = sys.argv[2:] and sys.argv[2] or 0
    if os.path.isdir(filedir):
      import glob
      for f in glob.glob(os.path.join(filedir, '**', '*.html')):
        print "Colorizing %s" % os.path.basename(f)
        process(f, usefonts)
    else:
      print "Colorizing %s" % os.path.basename(filedir)
      process(filedir, usefonts)
  else:
    print 'usage: colorize.py directory-or-file'