From: Francois Deppierraz Date: Thu, 20 May 2010 00:43:56 +0000 (-0700) Subject: Fix handling of correctly encoded unicode filenames (#534) X-Git-Url: https://git.rkrishnan.org/...?a=commitdiff_plain;h=496802420ebd25da630d516880e3f4c6c8258e97;p=tahoe-lafs%2Ftahoe-lafs.git Fix handling of correctly encoded unicode filenames (#534) Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe backup', have been improved to correctly handle filenames containing non-ASCII characters. In the case where Tahoe encounters a filename which cannot be decoded using the system encoding, an error will be returned and the operation will fail. Under Linux, this typically happens when the filesystem contains filenames encoded with another encoding, for instance latin1, than the system locale, for instance UTF-8. In such case, you'll need to fix your system with tools such as 'convmv' before using Tahoe CLI. All CLI commands have been improved to support non-ASCII parameters such as filenames and aliases on all supported Operating Systems except Windows as of now. --- diff --git a/NEWS b/NEWS index a4c1ee53..867fdd6d 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,26 @@ User visible changes in Tahoe-LAFS. -*- outline -*- +* Release 1.7.0 + +** Bugfixes + +*** Unicode filenames handling + +Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe +backup', have been improved to correctly handle filenames containing non-ASCII +characters. + +In the case where Tahoe encounters a filename which cannot be decoded using the +system encoding, an error will be returned and the operation will fail. Under +Linux, this typically happens when the filesystem contains filenames encoded +with another encoding, for instance latin1, than the system locale, for +instance UTF-8. In such case, you'll need to fix your system with tools such +as 'convmv' before using Tahoe CLI. + +All CLI commands have been improved to support non-ASCII parameters such as +filenames and aliases on all supported Operating Systems except Windows as of +now. + * Release 1.6.1 (2010-02-27) ** Bugfixes diff --git a/docs/frontends/CLI.txt b/docs/frontends/CLI.txt index 99fcba5a..bbfdea6c 100644 --- a/docs/frontends/CLI.txt +++ b/docs/frontends/CLI.txt @@ -136,13 +136,13 @@ starting directory provides a different, possibly overlapping perspective on the graph of files and directories. Each tahoe node remembers a list of starting points, named "aliases", -in a file named ~/.tahoe/private/aliases . These aliases are short -strings that stand in for a directory read- or write- cap. If you use -the command line "ls" without any "[STARTING_DIR]:" argument, then it -will use the default alias, which is "tahoe", therefore "tahoe ls" has -the same effect as "tahoe ls tahoe:". The same goes for the other -commands which can reasonably use a default alias: get, put, mkdir, -mv, and rm. +in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8 +encoded strings that stand in for a directory read- or write- cap. If +you use the command line "ls" without any "[STARTING_DIR]:" argument, +then it will use the default alias, which is "tahoe", therefore "tahoe +ls" has the same effect as "tahoe ls tahoe:". The same goes for the +other commands which can reasonably use a default alias: get, put, +mkdir, mv, and rm. For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not found in ~/.tahoe/private/aliases, the CLI will use the contents of diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 823f7458..80e102b9 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,6 +1,7 @@ import os.path, re, sys, fnmatch from twisted.python import usage from allmydata.scripts.common import BaseOptions, get_aliases +from allmydata.util.stringutils import argv_to_unicode NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -49,12 +50,12 @@ class VDriveOptions(BaseOptions, usage.Options): class MakeDirectoryOptions(VDriveOptions): def parseArgs(self, where=""): - self.where = where + self.where = argv_to_unicode(where) longdesc = """Create a new directory, either unlinked or as a subdirectory.""" class AddAliasOptions(VDriveOptions): def parseArgs(self, alias, cap): - self.alias = alias + self.alias = argv_to_unicode(alias) self.cap = cap def getSynopsis(self): @@ -64,7 +65,7 @@ class AddAliasOptions(VDriveOptions): class CreateAliasOptions(VDriveOptions): def parseArgs(self, alias): - self.alias = alias + self.alias = argv_to_unicode(alias) def getSynopsis(self): return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),) @@ -83,7 +84,7 @@ class ListOptions(VDriveOptions): ("json", None, "Show the raw JSON output"), ] def parseArgs(self, where=""): - self.where = where + self.where = argv_to_unicode(where) longdesc = """ List the contents of some portion of the grid. @@ -118,8 +119,13 @@ class GetOptions(VDriveOptions): # tahoe get FOO bar # write to local file # tahoe get tahoe:FOO bar # same - self.from_file = arg1 - self.to_file = arg2 + self.from_file = argv_to_unicode(arg1) + + if arg2: + self.to_file = argv_to_unicode(arg2) + else: + self.to_file = None + if self.to_file == "-": self.to_file = None @@ -151,15 +157,15 @@ class PutOptions(VDriveOptions): # see Examples below if arg1 is not None and arg2 is not None: - self.from_file = arg1 - self.to_file = arg2 + self.from_file = argv_to_unicode(arg1) + self.to_file = argv_to_unicode(arg2) elif arg1 is not None and arg2 is None: - self.from_file = arg1 # might be "-" + self.from_file = argv_to_unicode(arg1) # might be "-" self.to_file = None else: self.from_file = None self.to_file = None - if self.from_file == "-": + if self.from_file == u"-": self.from_file = None def getSynopsis(self): @@ -197,8 +203,8 @@ class CpOptions(VDriveOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = args[:-1] - self.destination = args[-1] + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) def getSynopsis(self): return "Usage: tahoe [options] cp FROM.. TO" longdesc = """ @@ -228,15 +234,15 @@ class CpOptions(VDriveOptions): class RmOptions(VDriveOptions): def parseArgs(self, where): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),) class MvOptions(VDriveOptions): def parseArgs(self, frompath, topath): - self.from_file = frompath - self.to_file = topath + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),) @@ -254,8 +260,8 @@ class MvOptions(VDriveOptions): class LnOptions(VDriveOptions): def parseArgs(self, frompath, topath): - self.from_file = frompath - self.to_file = topath + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),) @@ -279,8 +285,8 @@ class BackupOptions(VDriveOptions): self['exclude'] = set() def parseArgs(self, localdir, topath): - self.from_dir = localdir - self.to_dir = topath + self.from_dir = argv_to_unicode(localdir) + self.to_dir = argv_to_unicode(topath) def getSynopsis(Self): return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0]) @@ -337,7 +343,7 @@ class WebopenOptions(VDriveOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -354,7 +360,7 @@ class ManifestOptions(VDriveOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -367,7 +373,7 @@ class StatsOptions(VDriveOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -383,7 +389,7 @@ class CheckOptions(VDriveOptions): ("add-lease", None, "Add/renew lease on all shares"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -402,7 +408,7 @@ class DeepCheckOptions(VDriveOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py index 0ee7a3d0..6f82b7b4 100644 --- a/src/allmydata/scripts/common.py +++ b/src/allmydata/scripts/common.py @@ -1,7 +1,9 @@ import os, sys, urllib +import codecs from twisted.python import usage - +from allmydata.util.stringutils import unicode_to_url +from allmydata.util.assertutil import precondition class BaseOptions: # unit tests can override these to point at StringIO instances @@ -100,14 +102,14 @@ def get_aliases(nodedir): except EnvironmentError: pass try: - f = open(aliasfile, "r") + f = codecs.open(aliasfile, "r", "utf-8") for line in f.readlines(): line = line.strip() if line.startswith("#") or not line: continue name, cap = line.split(":", 1) # normalize it: remove http: prefix, urldecode - cap = cap.strip() + cap = cap.strip().encode('utf-8') aliases[name] = uri.from_string_dirnode(cap).to_string() except EnvironmentError: pass @@ -138,7 +140,7 @@ def get_alias(aliases, path, default): # and default is not found in aliases, an UnknownAliasError is # raised. path = path.strip() - if uri.has_uri_prefix(path): + if uri.has_uri_prefix(path.encode('utf-8')): # We used to require "URI:blah:./foo" in order to get a subpath, # stripping out the ":./" sequence. We still allow that for compatibility, # but now also allow just "URI:blah/foo". @@ -180,4 +182,4 @@ def get_alias(aliases, path, default): def escape_path(path): segments = path.split("/") - return "/".join([urllib.quote(s) for s in segments]) + return "/".join([urllib.quote(unicode_to_url(s)) for s in segments]) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 37c04e9f..7b0752c1 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -1,16 +1,20 @@ import os.path +import codecs +import sys from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place +from allmydata.util.stringutils import unicode_to_stdout + def add_line_to_aliasfile(aliasfile, alias, cap): # we use os.path.exists, rather than catching EnvironmentError, to avoid # clobbering the valuable alias file in case of spurious or transient # filesystem errors. if os.path.exists(aliasfile): - f = open(aliasfile, "r") + f = codecs.open(aliasfile, "r", "utf-8") aliases = f.read() f.close() if not aliases.endswith("\n"): @@ -18,7 +22,7 @@ def add_line_to_aliasfile(aliasfile, alias, cap): else: aliases = "" aliases += "%s: %s\n" % (alias, cap) - f = open(aliasfile+".tmp", "w") + f = codecs.open(aliasfile+".tmp", "w", "utf-8") f.write(aliases) f.close() move_into_place(aliasfile+".tmp", aliasfile) @@ -41,7 +45,7 @@ def add_alias(options): add_line_to_aliasfile(aliasfile, alias, cap) - print >>stdout, "Alias '%s' added" % (alias,) + print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),) return 0 def create_alias(options): @@ -74,7 +78,7 @@ def create_alias(options): add_line_to_aliasfile(aliasfile, alias, new_uri) - print >>stdout, "Alias '%s' created" % (alias,) + print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),) return 0 def list_aliases(options): diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index a7b96b14..2b065eb2 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -9,6 +9,11 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \ from allmydata.scripts.common_http import do_http from allmydata.util import time_format from allmydata.scripts import backupdb +import sys +from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode +from allmydata.util.assertutil import precondition +from twisted.python import usage + class HTTPError(Exception): pass @@ -154,12 +159,16 @@ class BackerUpper: def verboseprint(self, msg): if self.verbosity >= 2: + if isinstance(msg, unicode): + msg = unicode_to_stdout(msg) + print >>self.options.stdout, msg def warn(self, msg): print >>self.options.stderr, msg def process(self, localpath): + precondition(isinstance(localpath, unicode), localpath) # returns newdircap self.verboseprint("processing %s" % localpath) @@ -167,7 +176,7 @@ class BackerUpper: compare_contents = {} # childname -> rocap try: - children = os.listdir(localpath) + children = listdir_unicode(localpath) except EnvironmentError: self.directories_skipped += 1 self.warn("WARNING: permission denied on directory %s" % localpath) @@ -283,6 +292,8 @@ class BackerUpper: # This function will raise an IOError exception when called on an unreadable file def upload(self, childpath): + precondition(isinstance(childpath, unicode), childpath) + #self.verboseprint("uploading %s.." % childpath) metadata = get_local_metadata(childpath) @@ -291,7 +302,7 @@ class BackerUpper: if must_upload: self.verboseprint("uploading %s.." % childpath) - infileobj = open(os.path.expanduser(childpath), "rb") + infileobj = open_unicode(os.path.expanduser(childpath), "rb") url = self.options['node-url'] + "uri" resp = do_http("PUT", url, infileobj) if resp.status not in (200, 201): diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py index 6e639e0d..479996a9 100644 --- a/src/allmydata/scripts/tahoe_cp.py +++ b/src/allmydata/scripts/tahoe_cp.py @@ -2,12 +2,17 @@ import os.path import urllib import simplejson +import sys from cStringIO import StringIO from twisted.python.failure import Failure from allmydata.scripts.common import get_alias, escape_path, \ DefaultAliasMarker, UnknownAliasError from allmydata.scripts.common_http import do_http from allmydata import uri +from twisted.python import usage +from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode +from allmydata.util.assertutil import precondition + def ascii_or_none(s): if s is None: @@ -70,6 +75,7 @@ def make_tahoe_subdirectory(nodeurl, parent_writecap, name): class LocalFileSource: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def need_to_copy_bytes(self): @@ -80,6 +86,7 @@ class LocalFileSource: class LocalFileTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): outf = open(self.pathname, "wb") @@ -92,6 +99,7 @@ class LocalFileTarget: class LocalMissingTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): @@ -105,6 +113,8 @@ class LocalMissingTarget: class LocalDirectorySource: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -113,7 +123,7 @@ class LocalDirectorySource: if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = listdir_unicode(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) @@ -130,6 +140,8 @@ class LocalDirectorySource: class LocalDirectoryTarget: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -138,7 +150,7 @@ class LocalDirectoryTarget: if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = listdir_unicode(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) @@ -161,8 +173,9 @@ class LocalDirectoryTarget: return LocalDirectoryTarget(self.progressfunc, pathname) def put_file(self, name, inf): + precondition(isinstance(name, unicode), name) pathname = os.path.join(self.pathname, name) - outf = open(pathname, "wb") + outf = open_unicode(pathname, "wb") while True: data = inf.read(32768) if not data: @@ -355,7 +368,7 @@ class TahoeDirectoryTarget: if self.writecap: url = self.nodeurl + "/".join(["uri", urllib.quote(self.writecap), - urllib.quote(name.encode('utf-8'))]) + urllib.quote(unicode_to_url(name))]) self.children[name] = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) elif data[0] == "dirnode": diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py index bb204ddc..4e42f2fb 100644 --- a/src/allmydata/scripts/tahoe_ls.py +++ b/src/allmydata/scripts/tahoe_ls.py @@ -4,6 +4,7 @@ import simplejson from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http +from allmydata.util.stringutils import unicode_to_stdout def list(options): nodeurl = options['node-url'] @@ -130,7 +131,7 @@ def list(options): line.append(ctime_s) if not options["classify"]: classify = "" - line.append(name + classify) + line.append(unicode_to_stdout(name) + classify) if options["uri"]: line.append(uri) if options["readonly-uri"]: diff --git a/src/allmydata/scripts/tahoe_manifest.py b/src/allmydata/scripts/tahoe_manifest.py index 66f05a9b..6d0b9e8d 100644 --- a/src/allmydata/scripts/tahoe_manifest.py +++ b/src/allmydata/scripts/tahoe_manifest.py @@ -85,7 +85,7 @@ class ManifestStreamer(LineOnlyReceiver): try: print >>stdout, d["cap"], "/".join(d["path"]) except UnicodeEncodeError: - print >>stdout, d["cap"], "/".join([p.encode("utf-8") + print >>stdout, d["cap"], "/".join([unicode_to_stdout(p) for p in d["path"]]) def manifest(options): diff --git a/src/allmydata/scripts/tahoe_mkdir.py b/src/allmydata/scripts/tahoe_mkdir.py index 50223dc9..cc4b0688 100644 --- a/src/allmydata/scripts/tahoe_mkdir.py +++ b/src/allmydata/scripts/tahoe_mkdir.py @@ -2,6 +2,7 @@ import urllib from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError +from allmydata.util.stringutils import unicode_to_url def mkdir(options): nodeurl = options['node-url'] @@ -35,7 +36,7 @@ def mkdir(options): path = path[:-1] # path (in argv) must be "/".join([s.encode("utf-8") for s in segments]) url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap), - urllib.quote(path)) + urllib.quote(unicode_to_url(path))) resp = do_http("POST", url) check_http_error(resp, stderr) new_uri = resp.read().strip() diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index c65474f8..2451e105 100644 --- a/src/allmydata/test/test_cli.py +++ b/src/allmydata/test/test_cli.py @@ -6,6 +6,7 @@ from cStringIO import StringIO import urllib import re import simplejson +import sys from allmydata.util import fileutil, hashutil, base32 from allmydata import uri @@ -26,6 +27,9 @@ from allmydata.test.no_network import GridTestMixin from twisted.internet import threads # CLI tests use deferToThread from twisted.python import usage +from allmydata.util.stringutils import listdir_unicode, open_unicode, \ + unicode_platform, FilenameEncodingError + timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s @@ -284,7 +288,7 @@ class CLI(unittest.TestCase): "work": "WA", "c": "CA"} def ga1(path): - return get_alias(aliases, path, "tahoe") + return get_alias(aliases, path, u"tahoe") uses_lettercolon = common.platform_uses_lettercolon_drivename() self.failUnlessEqual(ga1("bare"), ("TA", "bare")) self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file")) @@ -379,7 +383,7 @@ class CLI(unittest.TestCase): # default set to something that isn't in the aliases argument should # raise an UnknownAliasError. def ga4(path): - return get_alias(aliases, path, "badddefault:") + return get_alias(aliases, path, u"badddefault:") self.failUnlessRaises(common.UnknownAliasError, ga4, "afile") self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/") @@ -387,12 +391,44 @@ class CLI(unittest.TestCase): old = common.pretend_platform_uses_lettercolon try: common.pretend_platform_uses_lettercolon = True - retval = get_alias(aliases, path, "baddefault:") + retval = get_alias(aliases, path, u"baddefault:") finally: common.pretend_platform_uses_lettercolon = old return retval self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows") + def test_listdir_unicode_good(self): + basedir = u"cli/common/listdir_unicode_good" + fileutil.make_dirs(basedir) + + files = (u'Lôzane', u'Bern', u'Genève') + + for file in files: + open(os.path.join(basedir, file), "w").close() + + for file in listdir_unicode(basedir): + self.failUnlessEqual(file in files, True) + + def test_listdir_unicode_bad(self): + if unicode_platform(): + raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.") + + basedir = u"cli/common/listdir_unicode_bad" + fileutil.make_dirs(basedir) + + files = (u'Lôzane', u'Bern', u'Genève') + + # We use a wrong encoding on purpose + if sys.getfilesystemencoding() == 'UTF-8': + encoding = 'latin1' + else: + encoding = 'UTF-8' + + for file in files: + path = os.path.join(basedir, file).encode(encoding) + open(path, "w").close() + + self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir) class Help(unittest.TestCase): @@ -592,8 +628,73 @@ class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:")) d.addCallback(_check_not_corrupted) - return d + def test_create_unicode(self): + if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'): + raise unittest.SkipTest("Arbitrary filenames are not supported by this platform") + + if sys.stdout.encoding not in ('UTF-8'): + raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform") + + self.basedir = "cli/CreateAlias/create_unicode" + self.set_up_grid() + aliasfile = os.path.join(self.get_clientdir(), "private", "aliases") + + d = self.do_cli("create-alias", "études") + def _check_create_unicode((rc,stdout,stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + # If stdout only supports ascii, accentuated characters are + # being replaced by '?' + if sys.stdout.encoding == "ANSI_X3.4-1968": + self.failUnless("Alias '?tudes' created" in stdout) + else: + self.failUnless("Alias 'études' created" in stdout) + + aliases = get_aliases(self.get_clientdir()) + self.failUnless(aliases[u"études"].startswith("URI:DIR2:")) + d.addCallback(_check_create_unicode) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls1((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "") + d.addCallback(_check_ls1) + + d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt", + stdin="Blah blah blah")) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls2((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "uploaded.txt\n") + d.addCallback(_check_ls2) + + d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt")) + def _check_get((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + self.failUnlessEqual(stdout, "Blah blah blah") + d.addCallback(_check_get) + + # Ensure that an Unicode filename in an Unicode alias works as expected + d.addCallback(lambda res: self.do_cli("put", "-", "études:lumière.txt", + stdin="Let the sunshine In!")) + + d.addCallback(lambda res: self.do_cli("get", + get_aliases(self.get_clientdir())[u"études"] + "/lumière.txt")) + def _check_get((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + self.failUnlessEqual(stdout, "Let the sunshine In!") + d.addCallback(_check_get) + + return d class Ln(GridTestMixin, CLITestMixin, unittest.TestCase): def _create_test_file(self): @@ -865,6 +966,40 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): return d + def test_immutable_from_file_unicode(self): + if sys.stdout.encoding not in ('UTF-8'): + raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform") + + # tahoe put file.txt "à trier.txt" + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + rel_fn = os.path.join(self.basedir, "DATAFILE") + abs_fn = os.path.abspath(rel_fn) + # we make the file small enough to fit in a LIT file, for speed + DATA = "short file" + f = open(rel_fn, "w") + f.write(DATA) + f.close() + + d = self.do_cli("create-alias", "tahoe") + + d.addCallback(lambda res: + self.do_cli("put", rel_fn, "à trier.txt")) + def _uploaded((rc,stdout,stderr)): + readcap = stdout.strip() + self.failUnless(readcap.startswith("URI:LIT:")) + self.failUnless("201 Created" in stderr, stderr) + self.readcap = readcap + d.addCallback(_uploaded) + + d.addCallback(lambda res: + self.do_cli("get", "tahoe:à trier.txt")) + d.addCallback(lambda (rc,stdout,stderr): + self.failUnlessEqual(stdout, DATA)) + + return d + class List(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list(self): self.basedir = "cli/List/list" @@ -1146,32 +1281,39 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): o.parseOptions, ["onearg"]) def test_unicode_filename(self): + if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'): + raise unittest.SkipTest("Arbitrary filenames are not supported by this platform") + + if sys.stdout.encoding not in ('UTF-8'): + raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform") + self.basedir = "cli/Cp/unicode_filename" self.set_up_grid() + d = self.do_cli("create-alias", "tahoe") - fn1 = os.path.join(self.basedir, "Ärtonwall") + # Use unicode strings when calling os functions + fn1 = os.path.join(self.basedir, u"Ärtonwall") DATA1 = "unicode file content" fileutil.write(fn1, DATA1) - fn2 = os.path.join(self.basedir, "Metallica") - DATA2 = "non-unicode file content" - fileutil.write(fn2, DATA2) - - # Bug #534 - # Assure that uploading a file whose name contains unicode character - # doesn't prevent further uploads in the same directory - d = self.do_cli("create-alias", "tahoe") - d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:")) - d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:")) + d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:")) d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) + fn2 = os.path.join(self.basedir, u"Metallica") + DATA2 = "non-unicode file content" + fileutil.write(fn2, DATA2) + + d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:")) + d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica")) d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2)) + d.addCallback(lambda res: self.do_cli("ls", "tahoe:")) + d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, "Metallica\nÄrtonwall\n")) + return d - test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms. See issue ticket #534." def test_dangling_symlink_vs_recursion(self): if not hasattr(os, 'symlink'): @@ -1278,6 +1420,17 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): return d +class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase): + def test_unicode_mkdir(self): + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + d = self.do_cli("create-alias", "tahoe") + d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead")) + + return d + + class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def writeto(self, path, data):