--- /dev/null
+from allmydata.util.assertutil import _assert
+
+# These are taken directly from the NTFS-3G table for Windows Vista,
+# with trivial syntax changes.
+# <https://fossies.org/dox/ntfs-3g_ntfsprogs-2015.3.14/unistr_8c_source.html#l01129>
+
+# (start, end, offset) => for c in xrange(start, end): c maps to c+offset
+uc_run_table = (
+ (0x0061, 0x007b, -32), (0x00e0, 0x00f7, -32), (0x00f8, 0x00ff, -32),
+ (0x0256, 0x0258, -205), (0x028a, 0x028c, -217), (0x037b, 0x037e, 130),
+ (0x03ac, 0x03ad, -38), (0x03ad, 0x03b0, -37), (0x03b1, 0x03c2, -32),
+ (0x03c2, 0x03c3, -31), (0x03c3, 0x03cc, -32), (0x03cc, 0x03cd, -64),
+ (0x03cd, 0x03cf, -63), (0x0430, 0x0450, -32), (0x0450, 0x0460, -80),
+ (0x0561, 0x0587, -48), (0x1f00, 0x1f08, 8), (0x1f10, 0x1f16, 8),
+ (0x1f20, 0x1f28, 8), (0x1f30, 0x1f38, 8), (0x1f40, 0x1f46, 8),
+ (0x1f51, 0x1f52, 8), (0x1f53, 0x1f54, 8), (0x1f55, 0x1f56, 8),
+ (0x1f57, 0x1f58, 8), (0x1f60, 0x1f68, 8), (0x1f70, 0x1f72, 74),
+ (0x1f72, 0x1f76, 86), (0x1f76, 0x1f78, 100), (0x1f78, 0x1f7a, 128),
+ (0x1f7a, 0x1f7c, 112), (0x1f7c, 0x1f7e, 126), (0x1f80, 0x1f88, 8),
+ (0x1f90, 0x1f98, 8), (0x1fa0, 0x1fa8, 8), (0x1fb0, 0x1fb2, 8),
+ (0x1fb3, 0x1fb4, 9), (0x1fcc, 0x1fcd, -9), (0x1fd0, 0x1fd2, 8),
+ (0x1fe0, 0x1fe2, 8), (0x1fe5, 0x1fe6, 7), (0x1ffc, 0x1ffd, -9),
+ (0x2170, 0x2180, -16), (0x24d0, 0x24ea, -26), (0x2c30, 0x2c5f, -48),
+ (0x2d00, 0x2d26, -7264), (0xff41, 0xff5b, -32),
+)
+
+# (start, end) => for c in xrange(start+1, end, 2): c maps to c-1
+uc_dup_table = (
+ (0x0100, 0x012f), (0x0132, 0x0137), (0x0139, 0x0149), (0x014a, 0x0178),
+ (0x0179, 0x017e), (0x01a0, 0x01a6), (0x01b3, 0x01b7), (0x01cd, 0x01dd),
+ (0x01de, 0x01ef), (0x01f4, 0x01f5), (0x01f8, 0x01f9), (0x01fa, 0x0220),
+ (0x0222, 0x0234), (0x023b, 0x023c), (0x0241, 0x0242), (0x0246, 0x024f),
+ (0x03d8, 0x03ef), (0x03f7, 0x03f8), (0x03fa, 0x03fb), (0x0460, 0x0481),
+ (0x048a, 0x04bf), (0x04c1, 0x04c4), (0x04c5, 0x04c8), (0x04c9, 0x04ce),
+ (0x04ec, 0x04ed), (0x04d0, 0x04eb), (0x04ee, 0x04f5), (0x04f6, 0x0513),
+ (0x1e00, 0x1e95), (0x1ea0, 0x1ef9), (0x2183, 0x2184), (0x2c60, 0x2c61),
+ (0x2c67, 0x2c6c), (0x2c75, 0x2c76), (0x2c80, 0x2ce3),
+)
+
+# (c, v) => c maps to v
+# This is uc_byte_table in NTFS-3G, but that's a poor name.
+uc_singleton_table = (
+ (0x00ff, 0x0178), (0x0180, 0x0243), (0x0183, 0x0182), (0x0185, 0x0184),
+ (0x0188, 0x0187), (0x018c, 0x018b), (0x0192, 0x0191), (0x0195, 0x01f6),
+ (0x0199, 0x0198), (0x019a, 0x023d), (0x019e, 0x0220), (0x01a8, 0x01a7),
+ (0x01ad, 0x01ac), (0x01b0, 0x01af), (0x01b9, 0x01b8), (0x01bd, 0x01bc),
+ (0x01bf, 0x01f7), (0x01c6, 0x01c4), (0x01c9, 0x01c7), (0x01cc, 0x01ca),
+ (0x01dd, 0x018e), (0x01f3, 0x01f1), (0x023a, 0x2c65), (0x023e, 0x2c66),
+ (0x0253, 0x0181), (0x0254, 0x0186), (0x0259, 0x018f), (0x025b, 0x0190),
+ (0x0260, 0x0193), (0x0263, 0x0194), (0x0268, 0x0197), (0x0269, 0x0196),
+ (0x026b, 0x2c62), (0x026f, 0x019c), (0x0272, 0x019d), (0x0275, 0x019f),
+ (0x027d, 0x2c64), (0x0280, 0x01a6), (0x0283, 0x01a9), (0x0288, 0x01ae),
+ (0x0289, 0x0244), (0x028c, 0x0245), (0x0292, 0x01b7), (0x03f2, 0x03f9),
+ (0x04cf, 0x04c0), (0x1d7d, 0x2c63), (0x214e, 0x2132),
+)
+
+# Let's simplify by converting these to a single table.
+# (start, end, step, offset) => for c in xrange(start, end, step): c maps to c+offset
+uc_full_table = (
+ [(start, end, 1, offset) for (start, end, offset) in uc_run_table] +
+ [(start+1, end, 2, -1) for (start, end) in uc_dup_table] +
+ [(c, c+1, 1, v-c) for (c, v) in uc_singleton_table]
+)
+
+# Now we create an offset_trie such that
+# c maps to c + offset_trie[c >> TABLE_BITS][c & TABLE_MASK].
+# This is memory-efficient because almost all of the subtables
+# are represented by an all-zero array, which can be shared.
+
+TABLE_BITS = 9
+TABLE_SIZE = 1 << TABLE_BITS
+TABLE_MASK = TABLE_SIZE-1
+
+idmap = [0]*TABLE_SIZE
+offset_trie = [idmap]*(0x110000/TABLE_SIZE)
+
+for (start, end, step, offset) in uc_full_table:
+ for c in xrange(start, end, step):
+ high = c >> TABLE_BITS
+ lowmap = offset_trie[high]
+ if lowmap == idmap:
+ # clone to avoid aliasing
+ offset_trie[high] = lowmap = idmap[:]
+
+ lowmap[c & TABLE_MASK] = offset
+
+def uppercase(s):
+ def ucase(c): return unichr(c + offset_trie[c >> TABLE_BITS][c & TABLE_MASK])
+ return "".join([ucase(ord(ch)) for ch in s])