"""Convert Cyrillic from iso-8859-1 Unicode-encoded to KOI8-R-encoded
This script is used during the build process of the Russian translation
of "Dive Into Python" (http://diveintopython.org/).
It takes one argument, which can be either an HTML file or a directory.
If a file, it converts the file in place; if a directory, it converts
every HTML file in the immediate directory (but not recursively).
Safe but pointless to run more than once on the same file or directory.
"""
__author__ = "Mark Pilgrim (mark@diveintopython.org)"
__version__ = "$Revision: 1.2 $"
__date__ = "$Date: 2004/05/05 21:57:19 $"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"
import os
import sys
import re
unicodeToKOI8R = { \
'Ё': '\xb3',
'А': '\xe1',
'Б': '\xe2',
'В': '\xf7',
'Г': '\xe7',
'Д': '\xe4',
'Е': '\xe5',
'Ж': '\xf6',
'З': '\xfa',
'И': '\xe9',
'Й': '\xea',
'К': '\xeb',
'Л': '\xec',
'М': '\xed',
'Н': '\xee',
'О': '\xef',
'П': '\xf0',
'Р': '\xf2',
'С': '\xf3',
'Т': '\xf4',
'У': '\xf5',
'Ф': '\xe6',
'Х': '\xe8',
'Ц': '\xe3',
'Ч': '\xfe',
'Ш': '\xfb',
'Щ': '\xfd',
'Ъ': '\xff',
'Ы': '\xf9',
'Ь': '\xf8',
'Э': '\xfc',
'Ю': '\xe0',
'Я': '\xf1',
'а': '\xc1',
'б': '\xc2',
'в': '\xd7',
'г': '\xc7',
'д': '\xc4',
'е': '\xc5',
'ж': '\xd6',
'з': '\xda',
'и': '\xc9',
'й': '\xca',
'к': '\xcb',
'л': '\xcc',
'м': '\xcd',
'н': '\xce',
'о': '\xcf',
'п': '\xd0',
'р': '\xd2',
'с': '\xd3',
'т': '\xd4',
'у': '\xd5',
'ф': '\xc6',
'х': '\xc8',
'ц': '\xc3',
'ч': '\xde',
'ш': '\xdb',
'щ': '\xdd',
'ъ': '\xdf',
'ы': '\xd9',
'ь': '\xd8',
'э': '\xdc',
'ю': '\xc0',
'я': '\xd1',
'ё': '\xa3' }
unicodePattern = re.compile(r'&#[0-9]{4,4};')
charsetPattern = re.compile(r'ISO-8859-1', re.IGNORECASE)
def translateMatch(match):
unicode = match.group(0)
if unicodeToKOI8R.has_key(unicode):
return unicodeToKOI8R[unicode]
else:
return unicode
def translateBuffer(buffer):
buffer = unicodePattern.sub(translateMatch, buffer)
buffer = charsetPattern.sub('KOI8-R', buffer)
return buffer
def translateFile(filename, outfilename=None):
if not outfilename:
outfilename = filename
fsock = open(filename)
buffer = fsock.read()
fsock.close()
buffer = translateBuffer(buffer)
fsock = open(outfilename, 'wb')
fsock.write(buffer)
fsock.close()
def htmlFilter(filename):
return os.path.splitext(filename)[1] == '.html'
def translateDirectory(directoryname, filterFunc=htmlFilter):
fileList = [os.path.join(directoryname, f) for f in os.listdir(directoryname)]
fileList = filter(filterFunc, fileList)
map(translateFile, fileList)
if __name__ == "__main__":
name = sys.argv[1]
if os.path.isdir(name):
translateDirectory(name)
else:
translateFile(name)