From: Daira Hopwood Date: Mon, 28 Dec 2015 20:31:21 +0000 (+0000) Subject: Add FilePath support functions in encodingutil.py. X-Git-Url: https://git.rkrishnan.org/?p=tahoe-lafs%2Ftahoe-lafs.git;a=commitdiff_plain;h=be1600f4570d8d7b4129b7d7586b6faad1a40bc6 Add FilePath support functions in encodingutil.py. Signed-off-by: Daira Hopwood --- diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 9d6fe8f3..0632bea0 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -61,12 +61,15 @@ import os, sys, locale from twisted.trial import unittest +from twisted.python.filepath import FilePath + from allmydata.test.common_util import ReallyEqualMixin from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ - unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \ - get_filesystem_encoding, to_str, from_utf8_or_none, _reload + quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ + get_io_encoding, get_filesystem_encoding, to_str, from_utf8_or_none, _reload, \ + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from from allmydata.dirnode import normalize from twisted.python import usage @@ -410,6 +413,9 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): self.test_quote_output_utf8(None) +def win32_other(win32, other): + return win32 if sys.platform == "win32" else other + class QuotePaths(ReallyEqualMixin, unittest.TestCase): def test_quote_path(self): self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), "'foo/bar'") @@ -419,9 +425,6 @@ class QuotePaths(ReallyEqualMixin, unittest.TestCase): self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), '"foo/\\x0abar"') self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), '"foo/\\x0abar"') - def win32_other(win32, other): - return win32 if sys.platform == "win32" else other - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'")) self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), @@ -435,6 +438,73 @@ class QuotePaths(ReallyEqualMixin, unittest.TestCase): self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), win32_other("\\\\foo\\bar", "\\\\?\\UNC\\foo\\bar")) + def test_quote_filepath(self): + foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar')) + self.failUnlessReallyEqual(quote_filepath(foo_bar_fp), + win32_other("'C:\\foo\\bar'", "'/foo/bar'")) + self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True), + win32_other("'C:\\foo\\bar'", "'/foo/bar'")) + self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False), + win32_other("C:\\foo\\bar", "/foo/bar")) + + if sys.platform == "win32": + foo_longfp = FilePath(u'\\\\?\\C:\\foo') + self.failUnlessReallyEqual(quote_filepath(foo_longfp), + "'C:\\foo'") + self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True), + "'C:\\foo'") + self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False), + "C:\\foo") + + +class FilePaths(ReallyEqualMixin, unittest.TestCase): + def test_to_filepath(self): + foo_u = win32_other(u'C:\\foo', u'/foo') + + nosep_fp = to_filepath(foo_u) + sep_fp = to_filepath(foo_u + os.path.sep) + + for fp in (nosep_fp, sep_fp): + self.failUnlessReallyEqual(fp, FilePath(foo_u)) + if encodingutil.use_unicode_filepath: + self.failUnlessReallyEqual(fp.path, foo_u) + + if sys.platform == "win32": + long_u = u'\\\\?\\C:\\foo' + longfp = to_filepath(long_u + u'\\') + self.failUnlessReallyEqual(longfp, FilePath(long_u)) + self.failUnlessReallyEqual(longfp.path, long_u) + + def test_extend_filepath(self): + foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) + foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) + foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz') + + for foo_fp in (foo_bfp, foo_ufp): + fp = extend_filepath(foo_fp, [u'bar', u'baz']) + self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u)) + if encodingutil.use_unicode_filepath: + self.failUnlessReallyEqual(fp.path, foo_bar_baz_u) + + def test_unicode_from_filepath(self): + foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) + foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) + foo_u = win32_other(u'C:\\foo', u'/foo') + + for foo_fp in (foo_bfp, foo_ufp): + self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u) + + def test_unicode_segments_from(self): + foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) + foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) + foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz')) + foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')) + + for foo_fp in (foo_bfp, foo_ufp): + for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp): + self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp), + [u'bar', u'baz']) + class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 61ce23ec..65f5911a 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -6,8 +6,9 @@ unicode and back. import sys, os, re, locale from types import NoneType -from allmydata.util.assertutil import precondition +from allmydata.util.assertutil import precondition, _assert from twisted.python import usage +from twisted.python.filepath import FilePath from allmydata.util import log from allmydata.util.fileutil import abspath_expanduser_unicode @@ -35,9 +36,10 @@ def check_encoding(encoding): filesystem_encoding = None io_encoding = None is_unicode_platform = False +use_unicode_filepath = False def _reload(): - global filesystem_encoding, io_encoding, is_unicode_platform + global filesystem_encoding, io_encoding, is_unicode_platform, use_unicode_filepath filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) check_encoding(filesystem_encoding) @@ -61,6 +63,12 @@ def _reload(): is_unicode_platform = sys.platform in ["win32", "darwin"] + # Despite the Unicode-mode FilePath support added to Twisted in + # , we can't yet use + # Unicode-mode FilePaths with INotify on non-Windows platforms + # due to . + use_unicode_filepath = sys.platform == "win32" + _reload() @@ -249,6 +257,54 @@ def quote_local_unicode_path(path, quotemarks=True): return quote_output(path, quotemarks=quotemarks, quote_newlines=True) +def quote_filepath(path, quotemarks=True): + return quote_local_unicode_path(unicode_from_filepath(path), quotemarks=quotemarks) + +def extend_filepath(fp, segments): + # We cannot use FilePath.preauthChild, because + # * it has the security flaw described in ; + # * it may return a FilePath in the wrong mode. + + for segment in segments: + fp = fp.child(segment) + + if isinstance(fp.path, unicode) and not use_unicode_filepath: + return FilePath(fp.path.encode(filesystem_encoding)) + else: + return fp + +def to_filepath(path): + precondition(isinstance(path, unicode if use_unicode_filepath else basestring), + path=path) + + if isinstance(path, unicode) and not use_unicode_filepath: + path = path.encode(filesystem_encoding) + + if sys.platform == "win32": + _assert(isinstance(path, unicode), path=path) + if path.startswith(u"\\\\?\\") and len(path) > 4: + # FilePath normally strips trailing path separators, but not in this case. + path = path.rstrip(u"\\") + + return FilePath(path) + +def _decode(s): + precondition(isinstance(s, basestring), s=s) + + if isinstance(s, bytes): + return s.decode(filesystem_encoding) + else: + return s + +def unicode_from_filepath(fp): + precondition(isinstance(fp, FilePath), fp=fp) + return _decode(fp.path) + +def unicode_segments_from(base_fp, ancestor_fp): + precondition(isinstance(base_fp, FilePath), base_fp=base_fp) + precondition(isinstance(ancestor_fp, FilePath), ancestor_fp=ancestor_fp) + + return base_fp.asTextMode().segmentsFrom(ancestor_fp.asTextMode()) def unicode_platform(): """ @@ -296,3 +352,6 @@ def listdir_unicode(path): return os.listdir(path) else: return listdir_unicode_fallback(path) + +def listdir_filepath(fp): + return listdir_unicode(unicode_from_filepath(fp))