From 21226cbb82a51c98361e02a9ac3df5c05f9474a8 Mon Sep 17 00:00:00 2001 From: Daira Hopwood Date: Tue, 3 Mar 2015 20:04:57 +0000 Subject: [PATCH] Add from_utf8_or_none and tests. Signed-off-by: Daira Hopwood --- src/allmydata/test/test_encodingutil.py | 17 ++++++++++++++++- src/allmydata/util/encodingutil.py | 13 +++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 98156c6f..5613a1d6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -65,7 +65,7 @@ from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \ - get_filesystem_encoding, _reload + get_filesystem_encoding, to_str, from_utf8_or_none, _reload from allmydata.dirnode import normalize from twisted.python import usage @@ -467,3 +467,18 @@ class OpenBSD(EncodingUtil, unittest.TestCase): filesystem_encoding = '646' io_encoding = '646' # Oops, I cannot write filenames containing non-ascii characters + + +class TestToFromStr(ReallyEqualMixin, unittest.TestCase): + def test_to_str(self): + self.failUnlessReallyEqual(to_str("foo"), "foo") + self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re") + self.failUnlessReallyEqual(to_str("\xFF"), "\xFF") # passes through invalid UTF-8 -- is this what we want? + self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re") + self.failUnlessReallyEqual(to_str(None), None) + + def test_from_utf8_or_none(self): + self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo") + self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re") + self.failUnlessReallyEqual(from_utf8_or_none(None), None) + self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF") diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index feafd8f5..d14b08f6 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -3,12 +3,11 @@ Functions used to convert inputs from whatever encoding used in the system to unicode and back. """ -import sys -import os -import re +import sys, os, re, locale +from types import NoneType + from allmydata.util.assertutil import precondition from twisted.python import usage -import locale from allmydata.util import log from allmydata.util.fileutil import abspath_expanduser_unicode @@ -127,6 +126,12 @@ def to_str(s): return s return s.encode('utf-8') +def from_utf8_or_none(s): + precondition(isinstance(s, (NoneType, str)), s) + if s is None: + return s + return s.decode('utf-8') + PRINTABLE_ASCII = re.compile(r'^[\n\r\x20-\x7E]*$', re.DOTALL) PRINTABLE_8BIT = re.compile(r'^[\n\r\x20-\x7E\x80-\xFF]*$', re.DOTALL) -- 2.45.2