From: david-sarah Date: Sun, 25 Jul 2010 09:28:49 +0000 (-0700) Subject: Add missing windows/fixups.py (for setting up Unicode args and output on Windows). X-Git-Tag: allmydata-tahoe-1.8.0b2~58 X-Git-Url: https://git.rkrishnan.org/Site/Content/Exhibitors/module-simplejson.tests.html?a=commitdiff_plain;h=9d04b2a317c2ecf4a8138cca93b66d043ad79a6a;p=tahoe-lafs%2Ftahoe-lafs.git Add missing windows/fixups.py (for setting up Unicode args and output on Windows). --- diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py new file mode 100644 index 00000000..6a60a664 --- /dev/null +++ b/src/allmydata/windows/fixups.py @@ -0,0 +1,131 @@ + +done = False + +def initialize(): + global done + import sys + if sys.platform != "win32" or done: + return True + done = True + + import codecs, re + from ctypes import WINFUNCTYPE, windll, CFUNCTYPE, cdll, POINTER, byref, \ + c_wchar_p, c_char_p, c_void_p, c_int, c_size_t + from allmydata.util import log + from allmydata.util.encodingutil import canonical_encoding + + # Work around . + codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) + + # Make Unicode console output work independently of the current code page. + # This also fixes . + # Credit to Michael Kaplan + # and TZOmegaTZIOY + # . + try: + STDOUT_FILENO = 1 + STDERR_FILENO = 2 + real_stdout = hasattr(sys.stdout, 'fileno') and sys.stdout.fileno() == STDOUT_FILENO + real_stderr = hasattr(sys.stderr, 'fileno') and sys.stderr.fileno() == STDERR_FILENO + + def force_utf8(stream, name): + if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8': + log.msg("%s (%r) had encoding %r, but we're going to write UTF-8 to it" % + (name, stream, stream.encoding), level=log.CURIOUS) + stream.encoding = 'utf-8' + + if not real_stdout: + force_utf8(sys.stdout, "sys.stdout") + + if not real_stderr: + force_utf8(sys.stderr, "sys.stderr") + + if real_stdout or real_stderr: + # FILE * _fdopen(int fd, const char *mode); + # #define _IOLBF 0x0040 + # int setvbuf(FILE *stream, char *buffer, int mode, size_t size); + # #define _O_U8TEXT 0x40000 + # int _setmode(int fd, int mode); + # int fputws(const wchar_t *ws, FILE *stream); + # int fflush(FILE *stream); + + c_runtime = cdll.msvcrt + NULL = None + _fdopen = CFUNCTYPE(c_void_p, c_int, c_char_p)(("_fdopen", c_runtime)) + _IOLBF = 0x0040 + setvbuf = CFUNCTYPE(c_int, c_void_p, c_char_p, c_int, c_size_t)(("setvbuf", c_runtime)) + _O_U8TEXT = 0x40000 + _setmode = CFUNCTYPE(c_int, c_int, c_int)(("_setmode", c_runtime)) + fputws = CFUNCTYPE(c_int, c_wchar_p, c_void_p)(("fputws", c_runtime)); + fflush = CFUNCTYPE(c_int, c_void_p)(("fflush", c_runtime)); + + buffer_chars = 1024 + + class UnicodeOutput: + def __init__(self, fileno, name): + self._stream = _fdopen(fileno, "w") + assert self._stream is not NULL + + # Deep magic. MSVCRT supports writing wide-oriented output to stdout/stderr + # to the console using the Unicode APIs, but it does the conversion in the + # stdio buffer, so you need that buffer to be as large as the maximum amount + # you're going to write in a single call (in bytes, not characters). + setvbuf(self._stream, NULL, _IOLBF, buffer_chars*4 + 100) + _setmode(fileno, _O_U8TEXT) + + self._fileno = fileno + self.closed = False + self.softspace = False + self.mode = 'w' + self.encoding = 'utf-8' + self.name = name + + def isatty(self): + return False + def close(self): + self.closed = True + self.flush() + def fileno(self): + return self._fileno + def flush(self): + fflush(self._stream) + + def write(self, text): + if not isinstance(text, unicode): + text = str(text).decode('utf-8') + for i in xrange(0, len(text), buffer_chars): + fputws(text[i:(i+buffer_chars)], self._stream) + fflush(self._stream) + + def writelines(self, lines): + for line in lines: + self.write(line) + + if real_stdout: + sys.stdout = UnicodeOutput(STDOUT_FILENO, '') + + if real_stderr: + sys.stderr = UnicodeOutput(STDERR_FILENO, '') + except Exception, e: + log.msg("exception %r while fixing up sys.stdout and sys.stderr" % (e,), level=log.WEIRD) + + # Unmangle command-line arguments. + GetCommandLineW = WINFUNCTYPE(c_wchar_p)(("GetCommandLineW", windll.kernel32)) + CommandLineToArgvW = WINFUNCTYPE(POINTER(c_wchar_p), c_wchar_p, POINTER(c_int)) \ + (("CommandLineToArgvW", windll.shell32)) + + argc = c_int(0) + argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) + + def unmangle(s): + return re.sub(ur'\x7f[0-9a-fA-F]*\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s) + + try: + sys.argv = [unmangle(argv_unicode[i]).encode('utf-8') for i in xrange(1, argc.value)] + except Exception, e: + print >>sys.stderr, "%s: could not unmangle Unicode arguments" % (sys.argv[0],) + print >>sys.stderr, [argv_unicode[i] for i in xrange(1, argc.value)] + raise + + if sys.argv[0].endswith('.pyscript'): + sys.argv[0] = sys.argv[0][:-9]