3 """I represent a compressed list of booleans, one per index (an integer).
4 Typically, each index represents an offset into a large string, pointing
5 to a specific byte of a share. In this context, True means that byte has
6 been received, or has been requested.
8 Another way to look at this is maintaining a set of integers, optimized
9 for operations on spans like 'add range to set' and 'is range in set?'.
11 This is a python equivalent of perl's Set::IntSpan module, frequently
12 used to represent .newsrc contents.
14 Rather than storing an actual (large) list or dictionary, I represent my
15 internal state as a sorted list of spans, each with a start and a length.
16 My API is presented in terms of start+length pairs. I provide set
17 arithmetic operators, to efficiently answer questions like 'I want bytes
18 XYZ, I already requested bytes ABC, and I've already received bytes DEF:
19 what bytes should I request now?'.
21 The new downloader will use it to keep track of which bytes we've requested
25 def __init__(self, _span_or_start=None, length=None):
27 if length is not None:
28 self._spans.append( (_span_or_start, length) )
30 for (start,length) in _span_or_start:
31 self.add(start, length)
35 assert sorted(self._spans) == self._spans
38 for (start,length) in self._spans:
39 if prev_end is not None:
40 assert start > prev_end
41 prev_end = start+length
42 except AssertionError:
43 print "BAD:", self.dump()
46 def add(self, start, length):
49 #print " ADD [%d+%d -%d) to %s" % (start, length, start+length, self.dump())
50 first_overlap = last_overlap = None
51 for i,(s_start,s_length) in enumerate(self._spans):
52 #print " (%d+%d)-> overlap=%s adjacent=%s" % (s_start,s_length, overlap(s_start, s_length, start, length), adjacent(s_start, s_length, start, length))
53 if (overlap(s_start, s_length, start, length)
54 or adjacent(s_start, s_length, start, length)):
56 if first_overlap is None:
60 if first_overlap is not None:
62 #print " first_overlap", first_overlap, last_overlap
63 if first_overlap is None:
64 # no overlap, so just insert the span and sort by starting
66 self._spans.insert(0, (start,length))
69 # everything from [first_overlap] to [last_overlap] overlapped
70 first_start,first_length = self._spans[first_overlap]
71 last_start,last_length = self._spans[last_overlap]
72 newspan_start = min(start, first_start)
73 newspan_end = max(start+length, last_start+last_length)
74 newspan_length = newspan_end - newspan_start
75 newspan = (newspan_start, newspan_length)
76 self._spans[first_overlap:last_overlap+1] = [newspan]
77 #print " ADD done: %s" % self.dump()
82 def remove(self, start, length):
85 #print " REMOVE [%d+%d -%d) from %s" % (start, length, start+length, self.dump())
86 first_complete_overlap = last_complete_overlap = None
87 for i,(s_start,s_length) in enumerate(self._spans):
88 s_end = s_start + s_length
89 o = overlap(s_start, s_length, start, length)
92 o_end = o_start+o_length
93 if o_start == s_start and o_end == s_end:
94 # delete this span altogether
95 if first_complete_overlap is None:
96 first_complete_overlap = i
97 last_complete_overlap = i
98 elif o_start == s_start:
99 # we only overlap the left side, so trim the start
106 assert new_start > s_start
107 new_length = new_end - new_start
108 self._spans[i] = (new_start, new_length)
110 # we only overlap the right side
117 assert new_end < s_end
118 new_length = new_end - new_start
119 self._spans[i] = (new_start, new_length)
121 # we overlap the middle, so create a new span. No need to
122 # examine any other spans.
128 left_length = left_end - left_start
131 right_length = right_end - right_start
132 self._spans[i] = (left_start, left_length)
133 self._spans.append( (right_start, right_length) )
136 if first_complete_overlap is not None:
137 del self._spans[first_complete_overlap:last_complete_overlap+1]
138 #print " REMOVE done: %s" % self.dump()
143 return "len=%d: %s" % (self.len(),
144 ",".join(["[%d-%d]" % (start,start+l-1)
145 for (start,l) in self._spans]) )
148 for start, length in self._spans:
149 for i in range(start, start+length):
153 for s in self._spans:
156 def __nonzero__(self): # this gets us bool()
157 return bool(self.len())
160 # guess what! python doesn't allow __len__ to return a long, only an
161 # int. So we stop using len(spans), use spans.len() instead.
162 return sum([length for start,length in self._spans])
164 def __add__(self, other):
165 s = self.__class__(self)
166 for (start, length) in other:
170 def __sub__(self, other):
171 s = self.__class__(self)
172 for (start, length) in other:
173 s.remove(start, length)
176 def __iadd__(self, other):
177 for (start, length) in other:
178 self.add(start, length)
181 def __isub__(self, other):
182 for (start, length) in other:
183 self.remove(start, length)
186 def __and__(self, other):
188 return self.__class__()
189 bounds = self.__class__(self._spans[0][0],
190 self._spans[-1][0]+self._spans[-1][1])
191 not_other = bounds - other
192 return self - not_other
194 def __contains__(self, (start,length)):
195 for span_start,span_length in self._spans:
196 o = overlap(start, length, span_start, span_length)
199 if o_start == start and o_length == length:
203 def overlap(start0, length0, start1, length1):
204 # return start2,length2 of the overlapping region, or None
205 # 00 00 000 0000 00 00 000 00 00 00 00
206 # 11 11 11 11 111 11 11 1111 111 11 11
207 left = max(start0, start1)
208 right = min(start0+length0, start1+length1)
209 # if there is overlap, 'left' will be its start, and right-1 will
212 return (left, right-left)
215 def adjacent(start0, length0, start1, length1):
216 if (start0 < start1) and start0+length0 == start1:
218 elif (start1 < start0) and start1+length1 == start0:
223 """I represent portions of a large string. Equivalently, I can be said to
224 maintain a large array of characters (with gaps of empty elements). I can
225 be used to manage access to a remote share, where some pieces have been
226 retrieved, some have been requested, and others have not been read.
229 def __init__(self, other=None):
230 self.spans = [] # (start, data) tuples, non-overlapping, merged
232 for (start, data) in other.get_chunks():
233 self.add(start, data)
235 def __nonzero__(self): # this gets us bool()
236 return bool(self.len())
239 # return number of bytes we're holding
240 return sum([len(data) for (start,data) in self.spans])
243 # return iterator of sorted list of offsets, one per byte
244 for (start,data) in self.spans:
245 for i in range(start, start+len(data)):
249 return "len=%d: %s" % (self.len(),
250 ",".join(["[%d-%d]" % (start,start+len(data)-1)
251 for (start,data) in self.spans]) )
253 def get_chunks(self):
254 return list(self.spans)
257 """Return a Spans object with a bit set for each byte I hold"""
258 return Spans([(start, len(data)) for (start,data) in self.spans])
260 def assert_invariants(self):
263 prev_start = self.spans[0][0]
264 prev_end = prev_start + len(self.spans[0][1])
265 for start, data in self.spans[1:]:
266 if not start > prev_end:
267 # adjacent or overlapping: bad
268 print "ASSERTION FAILED", self.spans
271 def get(self, start, length):
272 # returns a string of LENGTH, or None
273 #print "get", start, length, self.spans
275 for (s_start,s_data) in self.spans:
276 s_end = s_start+len(s_data)
277 #print " ",s_start,s_end
278 if s_start <= start < s_end:
279 # we want some data from this span. Because we maintain
280 # strictly merged and non-overlapping spans, everything we
281 # want must be in this span.
282 offset = start - s_start
283 if offset + length > len(s_data):
284 #print " None, span falls short"
285 return None # span falls short
286 #print " some", s_data[offset:offset+length]
287 return s_data[offset:offset+length]
289 # we've gone too far: no further spans will overlap
290 #print " None, gone too far"
292 #print " None, ran out of spans"
295 def add(self, start, data):
296 # first: walk through existing spans, find overlap, modify-in-place
297 # create list of new spans
300 # merge adjacent spans
301 #print "add", start, data, self.spans
302 end = start + len(data)
305 #print " loop", start, data, i, len(self.spans), self.spans
306 if i >= len(self.spans):
307 #print " append and done"
309 self.spans.append( (start, data) )
311 (s_start,s_data) = self.spans[i]
313 # a: OLD b:OLDD c1:OLD c2:OLD d1:OLDD d2:OLD e: OLLDD
314 # NEW NEW NEW NEWW NEW NEW NEW
316 # we handle A by inserting a new segment (with "N") and looping,
317 # turning it into B or C. We handle B by replacing a prefix and
318 # terminating. We handle C (both c1 and c2) by replacing the
319 # segment (and, for c2, looping, turning it into A). We handle D
320 # by replacing a suffix (and, for d2, looping, turning it into
321 # A). We handle E by replacing the middle and terminating.
323 # case A: insert a new span, then loop with the remainder
324 #print " insert new span"
325 s_len = s_start-start
326 self.spans.insert(i, (start, data[:s_len]))
332 s_end = s_start+s_len
333 if s_start <= start < s_end:
334 #print " modify this span", s_start, start, s_end
335 # we want to modify some data in this span: a prefix, a
336 # suffix, or the whole thing
339 #print " replace whole segment"
340 # case C: replace this segment
341 self.spans[i] = (s_start, data[:s_len])
345 # C2 is where len(data)>0
347 # case B: modify the prefix, retain the suffix
348 #print " modify prefix"
349 self.spans[i] = (s_start, data + s_data[len(data):])
351 if start > s_start and end < s_end:
352 # case E: modify the middle
353 #print " modify middle"
354 prefix_len = start - s_start # we retain this much
355 suffix_len = s_end - end # and retain this much
356 newdata = s_data[:prefix_len] + data + s_data[-suffix_len:]
357 self.spans[i] = (s_start, newdata)
359 # case D: retain the prefix, modify the suffix
360 #print " modify suffix"
361 prefix_len = start - s_start # we retain this much
362 suffix_len = s_len - prefix_len # we replace this much
363 #print " ", s_data, prefix_len, suffix_len, s_len, data
364 self.spans[i] = (s_start,
365 s_data[:prefix_len] + data[:suffix_len])
368 data = data[suffix_len:]
369 #print " now", start, data
370 # D2 is where len(data)>0
372 # else we're not there yet
373 #print " still looking"
376 # now merge adjacent spans
377 #print " merging", self.spans
379 for (s_start,s_data) in self.spans:
380 if newspans and adjacent(newspans[-1][0], len(newspans[-1][1]),
381 s_start, len(s_data)):
382 newspans[-1] = (newspans[-1][0], newspans[-1][1] + s_data)
384 newspans.append( (s_start, s_data) )
385 self.spans = newspans
386 self.assert_invariants()
387 #print " done", self.spans
389 def remove(self, start, length):
392 #print "remove", start, length, self.spans
393 while i < len(self.spans):
394 (s_start,s_data) = self.spans[i]
396 # this segment is entirely right of the removed region, and
397 # all further segments are even further right. We're done.
400 s_end = s_start + s_len
401 o = overlap(start, length, s_start, s_len)
406 o_end = o_start + o_len
408 # remove the whole segment
411 if o_start == s_start:
412 # remove a prefix, leaving the suffix from o_end to s_end
413 prefix_len = o_end - o_start
414 self.spans[i] = (o_end, s_data[prefix_len:])
418 # remove a suffix, leaving the prefix from s_start to o_start
419 prefix_len = o_start - s_start
420 self.spans[i] = (s_start, s_data[:prefix_len])
423 # remove the middle, creating a new segment
424 # left is s_start:o_start, right is o_end:s_end
425 left_len = o_start - s_start
426 left = s_data[:left_len]
427 right_len = s_end - o_end
428 right = s_data[-right_len:]
429 self.spans[i] = (s_start, left)
430 self.spans.insert(i+1, (o_end, right))
432 #print " done", self.spans
434 def pop(self, start, length):
435 data = self.get(start, length)
437 self.remove(start, length)