From 21226cbb82a51c98361e02a9ac3df5c05f9474a8 Mon Sep 17 00:00:00 2001
From: Daira Hopwood <daira@jacaranda.org>
Date: Tue, 3 Mar 2015 20:04:57 +0000
Subject: [PATCH] Add from_utf8_or_none and tests.

Signed-off-by: Daira Hopwood <daira@jacaranda.org>
---
 src/allmydata/test/test_encodingutil.py | 17 ++++++++++++++++-
 src/allmydata/util/encodingutil.py      | 13 +++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py
index 98156c6f..5613a1d6 100644
--- a/src/allmydata/test/test_encodingutil.py
+++ b/src/allmydata/test/test_encodingutil.py
@@ -65,7 +65,7 @@ from allmydata.util import encodingutil, fileutil
 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
     unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
     unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \
-    get_filesystem_encoding, _reload
+    get_filesystem_encoding, to_str, from_utf8_or_none, _reload
 from allmydata.dirnode import normalize
 
 from twisted.python import usage
@@ -467,3 +467,18 @@ class OpenBSD(EncodingUtil, unittest.TestCase):
     filesystem_encoding = '646'
     io_encoding = '646'
     # Oops, I cannot write filenames containing non-ascii characters
+
+
+class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
+    def test_to_str(self):
+        self.failUnlessReallyEqual(to_str("foo"), "foo")
+        self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
+        self.failUnlessReallyEqual(to_str("\xFF"), "\xFF")  # passes through invalid UTF-8 -- is this what we want?
+        self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
+        self.failUnlessReallyEqual(to_str(None), None)
+
+    def test_from_utf8_or_none(self):
+        self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
+        self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
+        self.failUnlessReallyEqual(from_utf8_or_none(None), None)
+        self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")
diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py
index feafd8f5..d14b08f6 100644
--- a/src/allmydata/util/encodingutil.py
+++ b/src/allmydata/util/encodingutil.py
@@ -3,12 +3,11 @@ Functions used to convert inputs from whatever encoding used in the system to
 unicode and back.
 """
 
-import sys
-import os
-import re
+import sys, os, re, locale
+from types import NoneType
+
 from allmydata.util.assertutil import precondition
 from twisted.python import usage
-import locale
 from allmydata.util import log
 from allmydata.util.fileutil import abspath_expanduser_unicode
 
@@ -127,6 +126,12 @@ def to_str(s):
         return s
     return s.encode('utf-8')
 
+def from_utf8_or_none(s):
+    precondition(isinstance(s, (NoneType, str)), s)
+    if s is None:
+        return s
+    return s.decode('utf-8')
+
 PRINTABLE_ASCII = re.compile(r'^[\n\r\x20-\x7E]*$',          re.DOTALL)
 PRINTABLE_8BIT  = re.compile(r'^[\n\r\x20-\x7E\x80-\xFF]*$', re.DOTALL)
 
-- 
2.45.2