From: Daira Hopwood <daira@jacaranda.org>
Date: Tue, 3 Mar 2015 20:04:57 +0000 (+0000)
Subject: Add from_utf8_or_none and tests.
X-Git-Tag: allmydata-tahoe-1.10.1a1~53
X-Git-Url: https://git.rkrishnan.org/simplejson/components/com_hotproperty/install.html?a=commitdiff_plain;h=21226cbb82a51c98361e02a9ac3df5c05f9474a8;p=tahoe-lafs%2Ftahoe-lafs.git

Add from_utf8_or_none and tests.

Signed-off-by: Daira Hopwood <daira@jacaranda.org>
---

diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py
index 98156c6f..5613a1d6 100644
--- a/src/allmydata/test/test_encodingutil.py
+++ b/src/allmydata/test/test_encodingutil.py
@@ -65,7 +65,7 @@ from allmydata.util import encodingutil, fileutil
 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
     unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
     unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \
-    get_filesystem_encoding, _reload
+    get_filesystem_encoding, to_str, from_utf8_or_none, _reload
 from allmydata.dirnode import normalize
 
 from twisted.python import usage
@@ -467,3 +467,18 @@ class OpenBSD(EncodingUtil, unittest.TestCase):
     filesystem_encoding = '646'
     io_encoding = '646'
     # Oops, I cannot write filenames containing non-ascii characters
+
+
+class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
+    def test_to_str(self):
+        self.failUnlessReallyEqual(to_str("foo"), "foo")
+        self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
+        self.failUnlessReallyEqual(to_str("\xFF"), "\xFF")  # passes through invalid UTF-8 -- is this what we want?
+        self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
+        self.failUnlessReallyEqual(to_str(None), None)
+
+    def test_from_utf8_or_none(self):
+        self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
+        self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
+        self.failUnlessReallyEqual(from_utf8_or_none(None), None)
+        self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")
diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py
index feafd8f5..d14b08f6 100644
--- a/src/allmydata/util/encodingutil.py
+++ b/src/allmydata/util/encodingutil.py
@@ -3,12 +3,11 @@ Functions used to convert inputs from whatever encoding used in the system to
 unicode and back.
 """
 
-import sys
-import os
-import re
+import sys, os, re, locale
+from types import NoneType
+
 from allmydata.util.assertutil import precondition
 from twisted.python import usage
-import locale
 from allmydata.util import log
 from allmydata.util.fileutil import abspath_expanduser_unicode
 
@@ -127,6 +126,12 @@ def to_str(s):
         return s
     return s.encode('utf-8')
 
+def from_utf8_or_none(s):
+    precondition(isinstance(s, (NoneType, str)), s)
+    if s is None:
+        return s
+    return s.decode('utf-8')
+
 PRINTABLE_ASCII = re.compile(r'^[\n\r\x20-\x7E]*$',          re.DOTALL)
 PRINTABLE_8BIT  = re.compile(r'^[\n\r\x20-\x7E\x80-\xFF]*$', re.DOTALL)