misc/simulators/hashbasedsig.py

   1 #!python
   2
   3 # range of hash output lengths
   4 range_L_hash = [128]
   5
   6 lg_M = 53                   # lg(required number of signatures before losing security)
   7
   8 limit_bytes = 480000        # limit on signature length
   9 limit_cost = 500            # limit on Mcycles_Sig + weight_ver*Mcycles_ver
  10 weight_ver = 1              # how important verification cost is relative to signature cost
  11                             # (note: setting this too high will just exclude useful candidates)
  12
  13 L_block = 512               # bitlength of hash input blocks
  14 L_pad   = 64                # bitlength of hash padding overhead (for M-D hashes)
  15 L_label = 80                # bitlength of hash position label
  16 L_prf   = 256               # bitlength of hash output when used as a PRF
  17 cycles_per_byte = 15.8      # cost of hash
  18
  19 Mcycles_per_block = cycles_per_byte * L_block / (8 * 1000000.0)
  20
  21
  22 from math import floor, ceil, log, log1p, pow, e, sqrt
  23 from sys import stderr
  24 from gc import collect
  25
  26 def lg(x):
  27     return log(x, 2)
  28 def ln(x):
  29     return log(x, e)
  30 def ceil_log(x, B):
  31     return int(ceil(log(x, B)))
  32 def ceil_div(x, y):
  33     return int(ceil(float(x) / float(y)))
  34 def floor_div(x, y):
  35     return int(floor(float(x) / float(y)))
  36
  37 # number of compression function evaluations to hash k hash-outputs
  38 # we assume that there is a label in each block
  39 def compressions(k):
  40     return ceil_div(k + L_pad, L_block - L_label)
  41
  42 # sum of power series sum([pow(p, i) for i in range(n)])
  43 def sum_powers(p, n):
  44     if p == 1: return n
  45     return (pow(p, n) - 1)/(p - 1)
  46
  47
  48 def make_candidate(B, K, K1, K2, q, T, T_min, L_hash, lg_N, sig_bytes, c_sign, c_ver, c_ver_pm):
  49     Mcycles_sign   = c_sign   * Mcycles_per_block
  50     Mcycles_ver    = c_ver    * Mcycles_per_block
  51     Mcycles_ver_pm = c_ver_pm * Mcycles_per_block
  52     cost = Mcycles_sign + weight_ver*Mcycles_ver
  53
  54     if sig_bytes >= limit_bytes or cost > limit_cost:
  55         return []
  56
  57     return [{
  58         'B': B, 'K': K, 'K1': K1, 'K2': K2, 'q': q, 'T': T,
  59         'T_min': T_min,
  60         'L_hash': L_hash,
  61         'lg_N': lg_N,
  62         'sig_bytes': sig_bytes,
  63         'c_sign': c_sign,
  64         'Mcycles_sign': Mcycles_sign,
  65         'c_ver': c_ver,
  66         'c_ver_pm': c_ver_pm,
  67         'Mcycles_ver': Mcycles_ver,
  68         'Mcycles_ver_pm': Mcycles_ver_pm,
  69         'cost': cost,
  70     }]
  71
  72
  73 # K1 = size of root Merkle tree
  74 # K  = size of middle Merkle trees
  75 # K2 = size of leaf Merkle trees
  76 # q  = number of revealed private keys per signed message
  77
  78 # Winternitz with B < 4 is never optimal. For example, going from B=4 to B=2 halves the
  79 # chain depth, but that is cancelled out by doubling (roughly) the number of digits.
  80 range_B = xrange(4, 33)
  81
  82 M = pow(2, lg_M)
  83
  84 def calculate(K, K1, K2, q_max, L_hash, trees):
  85     candidates = []
  86     lg_K  = lg(K)
  87     lg_K1 = lg(K1)
  88     lg_K2 = lg(K2)
  89
  90     # We want the optimal combination of q and T. That takes too much time and memory
  91     # to search for directly, so we start by calculating the lowest possible value of T
  92     # for any q. Then for potential values of T, we calculate the smallest q such that we
  93     # will have at least L_hash bits of security against forgery using revealed private keys
  94     # (i.e. this method of forgery is no easier than finding a hash preimage), provided
  95     # that fewer than 2^lg_S_min messages are signed.
  96
  97     # min height of certification tree (excluding root and bottom layer)
  98     T_min = ceil_div(lg_M - lg_K1, lg_K)
  99
 100     last_q = None
 101     for T in xrange(T_min, T_min+21):
 102         # lg(total number of leaf private keys)
 103         lg_S = lg_K1 + lg_K*T
 104         lg_N = lg_S + lg_K2
 105
 106         # Suppose that m signatures have been made. The number of times X that a given bucket has
 107         # been chosen follows a binomial distribution B(m, p) where p = 1/S and S is the number of
 108         # buckets. I.e. Pr(X = x) = C(m, x) * p^x * (1-p)^(m-x).
 109         #
 110         # If an attacker picks a random seed and message that falls into a bucket that has been
 111         # chosen x times, then at most q*x private values in that bucket have been revealed, so
 112         # (ignoring the possibility of guessing private keys, which is negligable) the attacker's
 113         # success probability for a forgery using the revealed values is at most min(1, q*x / K2)^q.
 114         #
 115         # Let j = floor(K2/q). Conditioning on x, we have
 116         #
 117         # Pr(forgery) = sum_{x = 0..j}(Pr(X = x) * (q*x / K2)^q) + Pr(x > j)
 118         #             = sum_{x = 1..j}(Pr(X = x) * (q*x / K2)^q) + Pr(x > j)
 119         #
 120         # We lose nothing by approximating (q*x / K2)^q as 1 for x > 4, i.e. ignoring the resistance
 121         # of the HORS scheme to forgery when a bucket has been chosen 5 or more times.
 122         #
 123         # Pr(forgery) < sum_{x = 1..4}(Pr(X = x) * (q*x / K2)^q) + Pr(x > 4)
 124         #
 125         # where Pr(x > 4) = 1 - sum_{x = 0..4}(Pr(X = x))
 126         #
 127         # We use log arithmetic here because values very close to 1 cannot be represented accurately
 128         # in floating point, but their logarithms can (provided we use appropriate functions such as
 129         # log1p).
 130
 131         lg_p = -lg_S
 132         lg_1_p = log1p(-pow(2, lg_p))/ln(2)        # lg(1-p), computed accurately
 133         j = 5
 134         lg_px = [lg_1_p * M]*j
 135
 136         # We approximate lg(M-x) as lg(M)
 137         lg_px_step = lg_M + lg_p - lg_1_p
 138         for x in xrange(1, j):
 139             lg_px[x] = lg_px[x-1] - lg(x) + lg_px_step
 140
 141         def find_min_q():
 142             for q in xrange(1, q_max+1):
 143                 lg_q = lg(q)
 144                 lg_pforge = [lg_px[x] + (lg_q*x - lg_K2)*q for x in xrange(1, j)]
 145                 if max(lg_pforge) < -L_hash + lg(j) and lg_px[j-1] + 1.0 < -L_hash:
 146                     #print "K = %d, K1 = %d, K2 = %d, L_hash = %d, lg_K2 = %.3f, q = %d, lg_pforge_1 = %.3f, lg_pforge_2 = %.3f, lg_pforge_3 = %.3f" \
 147                     #      % (K, K1, K2, L_hash, lg_K2, q, lg_pforge_1, lg_pforge_2, lg_pforge_3)
 148                     return q
 149             return None
 150
 151         q = find_min_q()
 152         if q is None or q == last_q:
 153             # if q hasn't decreased, this will be strictly worse than the previous candidate
 154             continue
 155         last_q = q
 156
 157         # number of compressions to compute the Merkle hashes
 158         (h_M,  c_M,  _) = trees[K]
 159         (h_M1, c_M1, _) = trees[K1]
 160         (h_M2, c_M2, (dau, tri)) = trees[K2]
 161
 162         # B = generalized Winternitz base
 163         for B in range_B:
 164             # n is the number of digits needed to sign the message representative and checksum.
 165             # The representation is base-B, except that we allow the most significant digit
 166             # to be up to 2B-1.
 167             n_L = ceil_div(L_hash-1, lg(B))
 168             firstL_max = floor_div(pow(2, L_hash)-1, pow(B, n_L-1))
 169             C_max = firstL_max + (n_L-1)*(B-1)
 170             n_C = ceil_log(ceil_div(C_max, 2), B)
 171             n = n_L + n_C
 172             firstC_max = floor_div(C_max, pow(B, n_C-1))
 173
 174             # Total depth of Winternitz hash chains. The chains for the most significant
 175             # digit of the message representative and of the checksum may be a different
 176             # length to those for the other digits.
 177             c_D = (n-2)*(B-1) + firstL_max + firstC_max
 178
 179             # number of compressions to hash a Winternitz public key
 180             c_W = compressions(n*L_hash + L_label)
 181
 182             # bitlength of a single Winternitz signature and authentication path
 183             L_MW  = (n + h_M ) * L_hash
 184             L_MW1 = (n + h_M1) * L_hash
 185
 186             # bitlength of the HORS signature and authentication paths
 187             # For all but one of the q authentication paths, one of the sibling elements in
 188             # another path is made redundant where they intersect. This cancels out the hash
 189             # that would otherwise be needed at the bottom of the path, making the total
 190             # length of the signature q*h_M2 + 1 hashes, rather than q*(h_M2 + 1).
 191             L_leaf = (q*h_M2 + 1) * L_hash
 192
 193             # length of the overall GMSS+HORS signature and seeds
 194             sig_bytes = ceil_div(L_MW1 + T*L_MW + L_leaf + L_prf + ceil(lg_N), 8)
 195
 196             c_MW  = K *(c_D + c_W) + c_M  + ceil_div(K *n*L_hash, L_prf)
 197             c_MW1 = K1*(c_D + c_W) + c_M1 + ceil_div(K1*n*L_hash, L_prf)
 198
 199             # For simplicity, c_sign and c_ver don't take into account compressions saved
 200             # as a result of intersecting authentication paths in the HORS signature, so
 201             # are slight overestimates.
 202
 203             c_sign = c_MW1 + T*c_MW + q*(c_M2 + 1) + ceil_div(K2*L_hash, L_prf)
 204
 205             # *expected* number of compressions to verify a signature
 206             c_ver = c_D/2.0 + c_W + c_M1 + T*(c_D/2.0 + c_W + c_M) + q*(c_M2 + 1)
 207             c_ver_pm = (1 + T)*c_D/2.0
 208
 209             candidates += make_candidate(B, K, K1, K2, q, T, T_min, L_hash, lg_N, sig_bytes, c_sign, c_ver, c_ver_pm)
 210
 211     return candidates
 212
 213 def search():
 214     for L_hash in range_L_hash:
 215         print >>stderr, "collecting...   \r",
 216         collect()
 217
 218         print >>stderr, "precomputing... \r",
 219
 220         """
 221         # d/dq (lg(q+1) + L_hash/q) = 1/(ln(2)*(q+1)) - L_hash/q^2
 222         # Therefore lg(q+1) + L_hash/q is at a minimum when 1/(ln(2)*(q+1)) = L_hash/q^2.
 223         # Let alpha = L_hash*ln(2), then from the quadratic formula, the integer q that
 224         # minimizes lg(q+1) + L_hash/q is the floor or ceiling of (alpha + sqrt(alpha^2 - 4*alpha))/2.
 225         # (We don't want the other solution near 0.)
 226
 227         alpha = floor(L_hash*ln(2))  # float
 228         q = floor((alpha + sqrt(alpha*(alpha-4)))/2)
 229         if lg(q+2) + L_hash/(q+1) < lg(q+1) + L_hash/q:
 230             q += 1
 231         lg_S_margin = lg(q+1) + L_hash/q
 232         q_max = int(q)
 233
 234         q = floor(L_hash*ln(2))  # float
 235         if lg(q+1) + L_hash/(q+1) < lg(q) + L_hash/q:
 236             q += 1
 237         lg_S_margin = lg(q) + L_hash/q
 238         q_max = int(q)
 239         """
 240         q_max = 4000
 241
 242         # find optimal Merkle tree shapes for this L_hash and each K
 243         trees = {}
 244         K_max = 50
 245         c2 = compressions(2*L_hash + L_label)
 246         c3 = compressions(3*L_hash + L_label)
 247         for dau in xrange(0, 10):
 248             a = pow(2, dau)
 249             for tri in xrange(0, ceil_log(30-dau, 3)):
 250                 x = int(a*pow(3, tri))
 251                 h = dau + 2*tri
 252                 c_x = int(sum_powers(2, dau)*c2 + a*sum_powers(3, tri)*c3)
 253                 for y in xrange(1, x+1):
 254                     if tri > 0:
 255                         # If the bottom level has arity 3, then for every 2 nodes by which the tree is
 256                         # imperfect, we can save c3 compressions by pruning 3 leaves back to their parent.
 257                         # If the tree is imperfect by an odd number of nodes, we can prune one extra leaf,
 258                         # possibly saving a compression if c2 < c3.
 259                         c_y = c_x - floor_div(x-y, 2)*c3 - ((x-y) % 2)*(c3-c2)
 260                     else:
 261                         # If the bottom level has arity 2, then for each node by which the tree is
 262                         # imperfect, we can save c2 compressions by pruning 2 leaves back to their parent.
 263                         c_y = c_x - (x-y)*c2
 264
 265                     if y not in trees or (h, c_y, (dau, tri)) < trees[y]:
 266                         trees[y] = (h, c_y, (dau, tri))
 267
 268         #for x in xrange(1, K_max+1):
 269         #    print x, trees[x]
 270
 271         candidates = []
 272         progress = 0
 273         fuzz = 0
 274         complete = (K_max-1)*(2200-200)/100
 275         for K in xrange(2, K_max+1):
 276             for K2 in xrange(200, 2200, 100):
 277                 for K1 in xrange(max(2, K-fuzz), min(K_max, K+fuzz)+1):
 278                     candidates += calculate(K, K1, K2, q_max, L_hash, trees)
 279                 progress += 1
 280                 print >>stderr, "searching: %3d %% \r" % (100.0 * progress / complete,),
 281
 282         print >>stderr, "filtering...    \r",
 283         step = 2.0
 284         bins = {}
 285         limit = floor_div(limit_cost, step)
 286         for bin in xrange(0, limit+2):
 287             bins[bin] = []
 288
 289         for c in candidates:
 290             bin = floor_div(c['cost'], step)
 291             bins[bin] += [c]
 292
 293         del candidates
 294
 295         # For each in a range of signing times, find the best candidate.
 296         best = []
 297         for bin in xrange(0, limit):
 298             candidates = bins[bin] + bins[bin+1] + bins[bin+2]
 299             if len(candidates) > 0:
 300                 best += [min(candidates, key=lambda c: c['sig_bytes'])]
 301
 302         def format_candidate(candidate):
 303             return ("%(B)3d  %(K)3d  %(K1)3d  %(K2)5d %(q)4d %(T)4d  "
 304                     "%(L_hash)4d   %(lg_N)5.1f  %(sig_bytes)7d   "
 305                     "%(c_sign)7d (%(Mcycles_sign)7.2f) "
 306                     "%(c_ver)7d +/-%(c_ver_pm)5d (%(Mcycles_ver)5.2f +/-%(Mcycles_ver_pm)5.2f)   "
 307                    ) % candidate
 308
 309         print >>stderr, "                \r",
 310         if len(best) > 0:
 311             print "  B    K   K1     K2    q    T  L_hash  lg_N  sig_bytes  c_sign (Mcycles)        c_ver     (    Mcycles   )"
 312             print "---- ---- ---- ------ ---- ---- ------ ------ --------- ------------------ --------------------------------"
 313
 314             best.sort(key=lambda c: (c['sig_bytes'], c['cost']))
 315             last_sign = None
 316             last_ver = None
 317             for c in best:
 318                 if last_sign is None or c['c_sign'] < last_sign or c['c_ver'] < last_ver:
 319                     print format_candidate(c)
 320                     last_sign = c['c_sign']
 321                     last_ver = c['c_ver']
 322
 323             print
 324         else:
 325             print "No candidates found for L_hash = %d or higher." % (L_hash)
 326             return
 327
 328         del bins
 329         del best
 330
 331 print "Maximum signature size: %d bytes" % (limit_bytes,)
 332 print "Maximum (signing + %d*verification) cost: %.1f Mcycles" % (weight_ver, limit_cost)
 333 print "Hash parameters: %d-bit blocks with %d-bit padding and %d-bit labels, %.2f cycles per byte" \
 334       % (L_block, L_pad, L_label, cycles_per_byte)
 335 print "PRF output size: %d bits" % (L_prf,)
 336 print "Security level given by L_hash is maintained for up to 2^%d signatures.\n" % (lg_M,)
 337
 338 search()