]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/windows/fixups.py
Add missing windows/fixups.py (for setting up Unicode args and output on Windows).
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / windows / fixups.py
1
2 done = False
3
4 def initialize():
5     global done
6     import sys
7     if sys.platform != "win32" or done:
8         return True
9     done = True
10
11     import codecs, re
12     from ctypes import WINFUNCTYPE, windll, CFUNCTYPE, cdll, POINTER, byref, \
13         c_wchar_p, c_char_p, c_void_p, c_int, c_size_t
14     from allmydata.util import log
15     from allmydata.util.encodingutil import canonical_encoding
16
17     # Work around <http://bugs.python.org/issue6058>.
18     codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
19
20     # Make Unicode console output work independently of the current code page.
21     # This also fixes <http://bugs.python.org/issue1602>.
22     # Credit to Michael Kaplan <http://blogs.msdn.com/b/michkap/archive/2008/03/18/8306597.aspx>
23     # and TZOmegaTZIOY
24     # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
25     try:
26         STDOUT_FILENO = 1
27         STDERR_FILENO = 2
28         real_stdout = hasattr(sys.stdout, 'fileno') and sys.stdout.fileno() == STDOUT_FILENO
29         real_stderr = hasattr(sys.stderr, 'fileno') and sys.stderr.fileno() == STDERR_FILENO
30
31         def force_utf8(stream, name):
32             if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8':
33                 log.msg("%s (%r) had encoding %r, but we're going to write UTF-8 to it" %
34                         (name, stream, stream.encoding), level=log.CURIOUS)
35             stream.encoding = 'utf-8'
36
37         if not real_stdout:
38             force_utf8(sys.stdout, "sys.stdout")
39
40         if not real_stderr:
41             force_utf8(sys.stderr, "sys.stderr")
42
43         if real_stdout or real_stderr:
44             # FILE * _fdopen(int fd, const char *mode);
45             # #define _IOLBF 0x0040
46             # int setvbuf(FILE *stream, char *buffer, int mode, size_t size);
47             # #define _O_U8TEXT 0x40000
48             # int _setmode(int fd, int mode);
49             # int fputws(const wchar_t *ws, FILE *stream);
50             # int fflush(FILE *stream);
51
52             c_runtime = cdll.msvcrt
53             NULL = None
54             _fdopen = CFUNCTYPE(c_void_p, c_int, c_char_p)(("_fdopen", c_runtime))
55             _IOLBF = 0x0040
56             setvbuf = CFUNCTYPE(c_int, c_void_p, c_char_p, c_int, c_size_t)(("setvbuf", c_runtime))
57             _O_U8TEXT = 0x40000
58             _setmode = CFUNCTYPE(c_int, c_int, c_int)(("_setmode", c_runtime))
59             fputws = CFUNCTYPE(c_int, c_wchar_p, c_void_p)(("fputws", c_runtime));
60             fflush = CFUNCTYPE(c_int, c_void_p)(("fflush", c_runtime));
61
62             buffer_chars = 1024
63
64             class UnicodeOutput:
65                 def __init__(self, fileno, name):
66                     self._stream = _fdopen(fileno, "w")
67                     assert self._stream is not NULL
68
69                     # Deep magic. MSVCRT supports writing wide-oriented output to stdout/stderr
70                     # to the console using the Unicode APIs, but it does the conversion in the
71                     # stdio buffer, so you need that buffer to be as large as the maximum amount
72                     # you're going to write in a single call (in bytes, not characters).
73                     setvbuf(self._stream, NULL, _IOLBF, buffer_chars*4 + 100)
74                     _setmode(fileno, _O_U8TEXT)
75
76                     self._fileno = fileno
77                     self.closed = False
78                     self.softspace = False
79                     self.mode = 'w'
80                     self.encoding = 'utf-8'
81                     self.name = name
82
83                 def isatty(self):
84                     return False
85                 def close(self):
86                     self.closed = True
87                     self.flush()
88                 def fileno(self):
89                     return self._fileno
90                 def flush(self):
91                     fflush(self._stream)
92
93                 def write(self, text):
94                     if not isinstance(text, unicode):
95                         text = str(text).decode('utf-8')
96                     for i in xrange(0, len(text), buffer_chars):
97                         fputws(text[i:(i+buffer_chars)], self._stream)
98                         fflush(self._stream)
99
100                 def writelines(self, lines):
101                     for line in lines:
102                         self.write(line)
103
104             if real_stdout:
105                 sys.stdout = UnicodeOutput(STDOUT_FILENO, '<Unicode stdout>')
106
107             if real_stderr:
108                 sys.stderr = UnicodeOutput(STDERR_FILENO, '<Unicode stderr>')
109     except Exception, e:
110         log.msg("exception %r while fixing up sys.stdout and sys.stderr" % (e,), level=log.WEIRD)
111
112     # Unmangle command-line arguments.
113     GetCommandLineW = WINFUNCTYPE(c_wchar_p)(("GetCommandLineW", windll.kernel32))
114     CommandLineToArgvW = WINFUNCTYPE(POINTER(c_wchar_p), c_wchar_p, POINTER(c_int)) \
115                             (("CommandLineToArgvW", windll.shell32))
116
117     argc = c_int(0)
118     argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
119
120     def unmangle(s):
121         return re.sub(ur'\x7f[0-9a-fA-F]*\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s)
122
123     try:
124         sys.argv = [unmangle(argv_unicode[i]).encode('utf-8') for i in xrange(1, argc.value)]
125     except Exception, e:
126         print >>sys.stderr, "%s:  could not unmangle Unicode arguments" % (sys.argv[0],)
127         print >>sys.stderr, [argv_unicode[i] for i in xrange(1, argc.value)]
128         raise
129
130     if sys.argv[0].endswith('.pyscript'):
131         sys.argv[0] = sys.argv[0][:-9]