]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/util/fileutil.py
On Windows, the user's home directory may be either %USERPROFILE% or %HOMEDRIVE%...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / util / fileutil.py
index bd9deb43115daee48908d81baa90932edc48c5ac..0349669e22b24670bd4839c1f93af96c6cc9f62d 100644 (file)
@@ -2,10 +2,13 @@
 Futz with files like a pro.
 """
 
-import sys, exceptions, os, stat, tempfile, time
+import sys, exceptions, os, stat, tempfile, time, binascii
 
 from twisted.python import log
 
+from pycryptopp.cipher.aes import AES
+
+
 def rename(src, dst, tries=4, basedelay=0.1):
     """ Here is a superkludge to workaround the fact that occasionally on
     Windows some other process (e.g. an anti-virus scanner, a local search
@@ -112,6 +115,55 @@ class NamedTemporaryDirectory:
         if self.cleanup and hasattr(self, 'name'):
             rm_dir(self.name)
 
+class EncryptedTemporaryFile:
+    # not implemented: next, readline, readlines, xreadlines, writelines
+
+    def __init__(self):
+        self.file = tempfile.TemporaryFile()
+        self.key = os.urandom(16)  # AES-128
+
+    def _crypt(self, offset, data):
+        offset_big = offset // 16
+        offset_small = offset % 16
+        iv = binascii.unhexlify("%032x" % offset_big)
+        cipher = AES(self.key, iv=iv)
+        cipher.process("\x00"*offset_small)
+        return cipher.process(data)
+
+    def close(self):
+        self.file.close()
+
+    def flush(self):
+        self.file.flush()
+
+    def seek(self, offset, whence=0):  # 0 = SEEK_SET
+        self.file.seek(offset, whence)
+
+    def tell(self):
+        offset = self.file.tell()
+        return offset
+
+    def read(self, size=-1):
+        """A read must not follow a write, or vice-versa, without an intervening seek."""
+        index = self.file.tell()
+        ciphertext = self.file.read(size)
+        plaintext = self._crypt(index, ciphertext)
+        return plaintext
+
+    def write(self, plaintext):
+        """A read must not follow a write, or vice-versa, without an intervening seek.
+        If seeking and then writing causes a 'hole' in the file, the contents of the
+        hole are unspecified."""
+        index = self.file.tell()
+        ciphertext = self._crypt(index, plaintext)
+        self.file.write(ciphertext)
+
+    def truncate(self, newsize):
+        """Truncate or extend the file to 'newsize'. If it is extended, the contents after the
+        old end-of-file are unspecified. The file position after this operation is unspecified."""
+        self.file.truncate(newsize)
+
+
 def make_dirs(dirname, mode=0777):
     """
     An idempotent version of os.makedirs().  If the dir already exists, do
@@ -172,12 +224,6 @@ def remove_if_possible(f):
     except:
         pass
 
-def open_or_create(fname, binarymode=True):
-    try:
-        return open(fname, binarymode and "r+b" or "r+")
-    except EnvironmentError:
-        return open(fname, binarymode and "w+b" or "w+")
-
 def du(basedir):
     size = 0
 
@@ -195,8 +241,16 @@ def move_into_place(source, dest):
         remove_if_possible(dest)
     os.rename(source, dest)
 
-def write(path, data):
-    wf = open(path, "wb")
+def write_atomically(target, contents, mode="b"):
+    f = open(target+".tmp", "w"+mode)
+    try:
+        f.write(contents)
+    finally:
+        f.close()
+    move_into_place(target+".tmp", target)
+
+def write(path, data, mode="wb"):
+    wf = open(path, mode)
     try:
         wf.write(data)
     finally:
@@ -209,9 +263,11 @@ def read(path):
     finally:
         rf.close()
 
-def put_file(pathname, inf):
+def put_file(path, inf):
+    precondition_abspath(path)
+
     # TODO: create temporary file and move into place?
-    outf = open_expanduser(pathname, "wb")
+    outf = open(path, "wb")
     try:
         while True:
             data = inf.read(32768)
@@ -221,10 +277,265 @@ def put_file(pathname, inf):
     finally:
         outf.close()
 
-def open_expanduser(path, mode):
-    assert isinstance(path, unicode), path
-    return open(os.path.expanduser(path), mode)
 
-def abspath_expanduser(path):
-    assert isinstance(path, unicode), path
-    return os.path.abspath(os.path.expanduser(path))
+def precondition_abspath(path):
+    if not isinstance(path, unicode):
+        raise AssertionError("an abspath must be a Unicode string")
+
+    if sys.platform == "win32":
+        # This intentionally doesn't view absolute paths starting with a drive specification, or
+        # paths relative to the current drive, as acceptable.
+        if not path.startswith("\\\\"):
+            raise AssertionError("an abspath should be normalized using abspath_expanduser_unicode")
+    else:
+        # This intentionally doesn't view the path '~' or paths starting with '~/' as acceptable.
+        if not os.path.isabs(path):
+            raise AssertionError("an abspath should be normalized using abspath_expanduser_unicode")
+
+# Work around <http://bugs.python.org/issue3426>. This code is adapted from
+# <http://svn.python.org/view/python/trunk/Lib/ntpath.py?revision=78247&view=markup>
+# with some simplifications.
+
+_getfullpathname = None
+try:
+    from nt import _getfullpathname
+except ImportError:
+    pass
+
+def abspath_expanduser_unicode(path, base=None):
+    """
+    Return the absolute version of a path. If 'base' is given and 'path' is relative,
+    the path will be expanded relative to 'base'.
+    'path' must be a Unicode string. 'base', if given, must be a Unicode string
+    corresponding to an absolute path as returned by a previous call to
+    abspath_expanduser_unicode.
+    """
+    if not isinstance(path, unicode):
+        raise AssertionError("paths must be Unicode strings")
+    if base is not None:
+        precondition_abspath(base)
+
+    path = expanduser(path)
+
+    if _getfullpathname:
+        # On Windows, os.path.isabs will incorrectly return True
+        # for paths without a drive letter (that are not UNC paths),
+        # e.g. "\\". See <http://bugs.python.org/issue1669539>.
+        try:
+            if base is None:
+                path = _getfullpathname(path or u".")
+            else:
+                path = _getfullpathname(os.path.join(base, path))
+        except OSError:
+            pass
+
+    if not os.path.isabs(path):
+        if base is None:
+            path = os.path.join(os.getcwdu(), path)
+        else:
+            path = os.path.join(base, path)
+
+    # We won't hit <http://bugs.python.org/issue5827> because
+    # there is always at least one Unicode path component.
+    path = os.path.normpath(path)
+
+    if sys.platform == "win32":
+        path = to_windows_long_path(path)
+
+    return path
+
+def to_windows_long_path(path):
+    # '/' is normally a perfectly valid path component separator in Windows.
+    # However, when using the "\\?\" syntax it is not recognized, so we
+    # replace it with '\' here.
+    path = path.replace(u"/", u"\\")
+
+    # Note that other normalizations such as removing '.' and '..' should
+    # be done outside this function.
+
+    if path.startswith(u"\\\\?\\") or path.startswith(u"\\\\.\\"):
+        return path
+    elif path.startswith(u"\\\\"):
+        return u"\\\\?\\UNC\\" + path[2 :]
+    else:
+        return u"\\\\?\\" + path
+
+
+have_GetDiskFreeSpaceExW = False
+if sys.platform == "win32":
+    from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_ulonglong, create_unicode_buffer
+    from ctypes.wintypes import BOOL, DWORD, LPCWSTR, LPWSTR
+
+    # <http://msdn.microsoft.com/en-us/library/ms679360%28v=VS.85%29.aspx>
+    GetLastError = WINFUNCTYPE(DWORD)(("GetLastError", windll.kernel32))
+
+    # <http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188%28v=vs.85%29.aspx>
+    GetEnvironmentVariableW = WINFUNCTYPE(DWORD, LPCWSTR, LPWSTR, DWORD)(
+        ("GetEnvironmentVariableW", windll.kernel32))
+
+    try:
+        # <http://msdn.microsoft.com/en-us/library/aa383742%28v=VS.85%29.aspx>
+        PULARGE_INTEGER = POINTER(c_ulonglong)
+
+        # <http://msdn.microsoft.com/en-us/library/aa364937%28VS.85%29.aspx>
+        GetDiskFreeSpaceExW = WINFUNCTYPE(BOOL, LPCWSTR, PULARGE_INTEGER, PULARGE_INTEGER, PULARGE_INTEGER)(
+            ("GetDiskFreeSpaceExW", windll.kernel32))
+
+        have_GetDiskFreeSpaceExW = True
+    except Exception:
+        import traceback
+        traceback.print_exc()
+
+def expanduser(path):
+    # os.path.expanduser is hopelessly broken for Unicode paths on Windows (ticket #1674).
+    if sys.platform == "win32":
+        return windows_expanduser(path)
+    else:
+        return os.path.expanduser(path)
+
+def windows_expanduser(path):
+    if not path.startswith('~'):
+        return path
+
+    home_dir = windows_getenv(u'USERPROFILE')
+    if home_dir is None:
+        home_drive = windows_getenv(u'HOMEDRIVE')
+        home_path = windows_getenv(u'HOMEPATH')
+        if home_drive is None or home_path is None:
+            raise OSError("Could not find home directory: neither %USERPROFILE% nor (%HOMEDRIVE% and %HOMEPATH%) are set.")
+        home_dir = os.path.join(home_drive, home_path)
+
+    if path == '~':
+        return home_dir
+    elif path.startswith('~/') or path.startswith('~\\'):
+        return os.path.join(home_dir, path[2 :])
+    else:
+        return path
+
+# <https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382%28v=vs.85%29.aspx>
+ERROR_ENVVAR_NOT_FOUND = 203
+
+def windows_getenv(name):
+    # Based on <http://stackoverflow.com/questions/2608200/problems-with-umlauts-in-python-appdata-environvent-variable/2608368#2608368>,
+    # with improved error handling. Returns None if there is no enivronment variable of the given name.
+    if not isinstance(name, unicode):
+        raise AssertionError("name must be Unicode")
+
+    n = GetEnvironmentVariableW(name, None, 0)
+    # GetEnvironmentVariableW returns DWORD, so n cannot be negative.
+    if n == 0:
+        err = GetLastError()
+        if err == ERROR_ENVVAR_NOT_FOUND:
+            return None
+        raise OSError("Windows error %d attempting to read size of environment variable %r"
+                      % (err, name))
+    if n == 1:
+        # Avoid an ambiguity between a zero-length string and an error in the return value of the
+        # call to GetEnvironmentVariableW below.
+        return u""
+
+    buf = create_unicode_buffer(u'\0'*n)
+    retval = GetEnvironmentVariableW(name, buf, n)
+    if retval == 0:
+        err = GetLastError()
+        if err == ERROR_ENVVAR_NOT_FOUND:
+            return None
+        raise OSError("Windows error %d attempting to read environment variable %r"
+                      % (err, name))
+    if retval >= n:
+        raise OSError("Unexpected result %d (expected less than %d) from GetEnvironmentVariableW attempting to read environment variable %r"
+                      % (retval, n, name))
+
+    return buf.value
+
+def get_disk_stats(whichdir, reserved_space=0):
+    """Return disk statistics for the storage disk, in the form of a dict
+    with the following fields.
+      total:            total bytes on disk
+      free_for_root:    bytes actually free on disk
+      free_for_nonroot: bytes free for "a non-privileged user" [Unix] or
+                          the current user [Windows]; might take into
+                          account quotas depending on platform
+      used:             bytes used on disk
+      avail:            bytes available excluding reserved space
+    An AttributeError can occur if the OS has no API to get disk information.
+    An EnvironmentError can occur if the OS call fails.
+
+    whichdir is a directory on the filesystem in question -- the
+    answer is about the filesystem, not about the directory, so the
+    directory is used only to specify which filesystem.
+
+    reserved_space is how many bytes to subtract from the answer, so
+    you can pass how many bytes you would like to leave unused on this
+    filesystem as reserved_space.
+    """
+
+    if have_GetDiskFreeSpaceExW:
+        # If this is a Windows system and GetDiskFreeSpaceExW is available, use it.
+        # (This might put up an error dialog unless
+        # SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) has been called,
+        # which we do in allmydata.windows.fixups.initialize().)
+
+        n_free_for_nonroot = c_ulonglong(0)
+        n_total            = c_ulonglong(0)
+        n_free_for_root    = c_ulonglong(0)
+        retval = GetDiskFreeSpaceExW(whichdir, byref(n_free_for_nonroot),
+                                               byref(n_total),
+                                               byref(n_free_for_root))
+        if retval == 0:
+            raise OSError("Windows error %d attempting to get disk statistics for %r"
+                          % (GetLastError(), whichdir))
+        free_for_nonroot = n_free_for_nonroot.value
+        total            = n_total.value
+        free_for_root    = n_free_for_root.value
+    else:
+        # For Unix-like systems.
+        # <http://docs.python.org/library/os.html#os.statvfs>
+        # <http://opengroup.org/onlinepubs/7990989799/xsh/fstatvfs.html>
+        # <http://opengroup.org/onlinepubs/7990989799/xsh/sysstatvfs.h.html>
+        s = os.statvfs(whichdir)
+
+        # on my mac laptop:
+        #  statvfs(2) is a wrapper around statfs(2).
+        #    statvfs.f_frsize = statfs.f_bsize :
+        #     "minimum unit of allocation" (statvfs)
+        #     "fundamental file system block size" (statfs)
+        #    statvfs.f_bsize = statfs.f_iosize = stat.st_blocks : preferred IO size
+        # on an encrypted home directory ("FileVault"), it gets f_blocks
+        # wrong, and s.f_blocks*s.f_frsize is twice the size of my disk,
+        # but s.f_bavail*s.f_frsize is correct
+
+        total = s.f_frsize * s.f_blocks
+        free_for_root = s.f_frsize * s.f_bfree
+        free_for_nonroot = s.f_frsize * s.f_bavail
+
+    # valid for all platforms:
+    used = total - free_for_root
+    avail = max(free_for_nonroot - reserved_space, 0)
+
+    return { 'total': total,
+             'free_for_root': free_for_root,
+             'free_for_nonroot': free_for_nonroot,
+             'used': used,
+             'avail': avail,
+           }
+
+def get_available_space(whichdir, reserved_space):
+    """Returns available space for share storage in bytes, or None if no
+    API to get this information is available.
+
+    whichdir is a directory on the filesystem in question -- the
+    answer is about the filesystem, not about the directory, so the
+    directory is used only to specify which filesystem.
+
+    reserved_space is how many bytes to subtract from the answer, so
+    you can pass how many bytes you would like to leave unused on this
+    filesystem as reserved_space.
+    """
+    try:
+        return get_disk_stats(whichdir, reserved_space)['avail']
+    except AttributeError:
+        return None
+    except EnvironmentError:
+        log.msg("OS call to get disk statistics failed")
+        return 0