Add from_utf8_or_none and tests.
authorDaira Hopwood <daira@jacaranda.org>
Tue, 3 Mar 2015 20:04:57 +0000 (20:04 +0000)
committerDaira Hopwood <daira@jacaranda.org>
Tue, 24 Mar 2015 16:46:40 +0000 (16:46 +0000)
Signed-off-by: Daira Hopwood <daira@jacaranda.org>
src/allmydata/test/test_encodingutil.py
src/allmydata/util/encodingutil.py

index 98156c6f19cd9c8204478ba48c7ad3cb36c7dbbf..5613a1d6af721ec018e87d4f272f019ec02d053f 100644 (file)
@@ -65,7 +65,7 @@ from allmydata.util import encodingutil, fileutil
 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
     unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
     unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \
-    get_filesystem_encoding, _reload
+    get_filesystem_encoding, to_str, from_utf8_or_none, _reload
 from allmydata.dirnode import normalize
 
 from twisted.python import usage
@@ -467,3 +467,18 @@ class OpenBSD(EncodingUtil, unittest.TestCase):
     filesystem_encoding = '646'
     io_encoding = '646'
     # Oops, I cannot write filenames containing non-ascii characters
+
+
+class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
+    def test_to_str(self):
+        self.failUnlessReallyEqual(to_str("foo"), "foo")
+        self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
+        self.failUnlessReallyEqual(to_str("\xFF"), "\xFF")  # passes through invalid UTF-8 -- is this what we want?
+        self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
+        self.failUnlessReallyEqual(to_str(None), None)
+
+    def test_from_utf8_or_none(self):
+        self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
+        self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
+        self.failUnlessReallyEqual(from_utf8_or_none(None), None)
+        self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")
index feafd8f5c46e6470981c68e14922e1b223af99c2..d14b08f6be448782a454fefd1da22d6d8e9e45eb 100644 (file)
@@ -3,12 +3,11 @@ Functions used to convert inputs from whatever encoding used in the system to
 unicode and back.
 """
 
-import sys
-import os
-import re
+import sys, os, re, locale
+from types import NoneType
+
 from allmydata.util.assertutil import precondition
 from twisted.python import usage
-import locale
 from allmydata.util import log
 from allmydata.util.fileutil import abspath_expanduser_unicode
 
@@ -127,6 +126,12 @@ def to_str(s):
         return s
     return s.encode('utf-8')
 
+def from_utf8_or_none(s):
+    precondition(isinstance(s, (NoneType, str)), s)
+    if s is None:
+        return s
+    return s.decode('utf-8')
+
 PRINTABLE_ASCII = re.compile(r'^[\n\r\x20-\x7E]*$',          re.DOTALL)
 PRINTABLE_8BIT  = re.compile(r'^[\n\r\x20-\x7E\x80-\xFF]*$', re.DOTALL)