2 "Rational" version definition and parsing for DistutilsVersionFight
3 discussion at PyCon 2009.
9 class IrrationalVersionError(Exception):
10 """This is an irrational version."""
13 class HugeMajorVersionNumError(IrrationalVersionError):
14 """An irrational version because the major version number is huge
15 (often because a year or date was used).
17 See `error_on_huge_major_num` option in `NormalizedVersion` for details.
18 This guard can be disabled by setting that option False.
22 # A marker used in the second and third parts of the `parts` tuple, for
23 # versions that don't have those segments, to sort properly. An example
24 # of versions in sort order ('highest' last):
25 # 1.0b1 ((1,0), ('b',1), ('f',))
26 # 1.0.dev345 ((1,0), ('f',), ('dev', 345))
27 # 1.0 ((1,0), ('f',), ('f',))
28 # 1.0.post256.dev345 ((1,0), ('f',), ('f', 'post', 256, 'dev', 345))
29 # 1.0.post345 ((1,0), ('f',), ('f', 'post', 345, 'f'))
31 # 'b' < 'f' ---------------------/ | |
33 # 'dev' < 'f' < 'post' -------------------/ |
35 # 'dev' < 'f' ----------------------------------------------/
36 # Other letters would do, but 'f' for 'final' is kind of nice.
39 VERSION_RE = re.compile(r'''
41 (?P<version>\d+\.\d+) # minimum 'N.N'
42 (?P<extraversion>(?:\.\d+)*) # any number of extra '.N' segments
44 (?P<prerel>[abc]|rc) # 'a'=alpha, 'b'=beta, 'c'=release candidate
45 # 'rc'= alias for release candidate
46 (?P<prerelversion>\d+(?:\.\d+)*)
48 (?P<postdev>(\.post(?P<post>\d+))?(\.dev(?P<dev>\d+))?)?
51 class NormalizedVersion(object):
52 """A rational version.
55 1.2 # equivalent to "1.2.0"
65 1 # mininum two numbers
66 1.2a # release level must have a release serial
69 def __init__(self, s, error_on_huge_major_num=True):
70 """Create a NormalizedVersion instance from a version string.
72 @param s {str} The version string.
73 @param error_on_huge_major_num {bool} Whether to consider an
74 apparent use of a year or full date as the major version number
75 an error. Default True. One of the observed patterns on PyPI before
76 the introduction of `NormalizedVersion` was version numbers like this:
80 This guard is here to strongly encourage the package author to
81 use an alternate version, because a release deployed into PyPI
82 and, e.g. downstream Linux package managers, will forever remove
83 the possibility of using a version number like "1.0" (i.e.
84 where the major number is less than that huge major number).
86 self._parse(s, error_on_huge_major_num)
89 def from_parts(cls, version, prerelease=FINAL_MARKER,
90 devpost=FINAL_MARKER):
91 return cls(cls.parts_to_str((version, prerelease, devpost)))
93 def _parse(self, s, error_on_huge_major_num=True):
94 """Parses a string version into parts."""
95 match = VERSION_RE.search(s)
97 raise IrrationalVersionError(s)
99 groups = match.groupdict()
103 block = self._parse_numdots(groups['version'], s, False, 2)
104 extraversion = groups.get('extraversion')
105 if extraversion not in ('', None):
106 block += self._parse_numdots(extraversion[1:], s)
107 parts.append(tuple(block))
110 prerel = groups.get('prerel')
111 if prerel is not None:
113 block += self._parse_numdots(groups.get('prerelversion'), s,
115 parts.append(tuple(block))
117 parts.append(FINAL_MARKER)
120 if groups.get('postdev'):
121 post = groups.get('post')
122 dev = groups.get('dev')
125 postdev.extend([FINAL_MARKER[0], 'post', int(post)])
127 postdev.append(FINAL_MARKER[0])
129 postdev.extend(['dev', int(dev)])
130 parts.append(tuple(postdev))
132 parts.append(FINAL_MARKER)
133 self.parts = tuple(parts)
134 if error_on_huge_major_num and self.parts[0][0] > 1980:
135 raise HugeMajorVersionNumError("huge major version number, %r, "
136 "which might cause future problems: %r" % (self.parts[0][0], s))
138 def _parse_numdots(self, s, full_ver_str, drop_trailing_zeros=True,
140 """Parse 'N.N.N' sequences, return a list of ints.
142 @param s {str} 'N.N.N...' sequence to be parsed
143 @param full_ver_str {str} The full version string from which this
144 comes. Used for error strings.
145 @param drop_trailing_zeros {bool} Whether to drop trailing zeros
146 from the returned list. Default True.
147 @param pad_zeros_length {int} The length to which to pad the
148 returned list with zeros, if necessary. Default 0.
151 for n in s.split("."):
152 if len(n) > 1 and n[0] == '0':
153 raise IrrationalVersionError("cannot have leading zero in "
154 "version number segment: '%s' in %r" % (n, full_ver_str))
156 if drop_trailing_zeros:
157 while nums and nums[-1] == 0:
159 while len(nums) < pad_zeros_length:
164 return self.parts_to_str(self.parts)
167 def parts_to_str(cls, parts):
168 """Transforms a version expressed in tuple into its string
170 # XXX This doesn't check for invalid tuples
171 main, prerel, postdev = parts
172 s = '.'.join(str(v) for v in main)
173 if prerel is not FINAL_MARKER:
175 s += '.'.join(str(v) for v in prerel[1:])
176 if postdev and postdev is not FINAL_MARKER:
177 if postdev[0] == 'f':
178 postdev = postdev[1:]
180 while i < len(postdev):
188 return "%s('%s')" % (self.__class__.__name__, self)
190 def _cannot_compare(self, other):
191 raise TypeError("cannot compare %s and %s"
192 % (type(self).__name__, type(other).__name__))
194 def __eq__(self, other):
195 if not isinstance(other, NormalizedVersion):
196 self._cannot_compare(other)
197 return self.parts == other.parts
199 def __lt__(self, other):
200 if not isinstance(other, NormalizedVersion):
201 self._cannot_compare(other)
202 return self.parts < other.parts
204 def __ne__(self, other):
205 return not self.__eq__(other)
207 def __gt__(self, other):
208 return not (self.__lt__(other) or self.__eq__(other))
210 def __le__(self, other):
211 return self.__eq__(other) or self.__lt__(other)
213 def __ge__(self, other):
214 return self.__eq__(other) or self.__gt__(other)
216 def suggest_normalized_version(s):
217 """Suggest a normalized version close to the given version string.
219 If you have a version string that isn't rational (i.e. NormalizedVersion
220 doesn't like it) then you might be able to get an equivalent (or close)
221 rational version from this function.
223 This does a number of simple normalizations to the given string, based
224 on observation of versions currently in use on PyPI. Given a dump of
225 those version during PyCon 2009, 4287 of them:
226 - 2312 (53.93%) match NormalizedVersion without change
227 - with the automatic suggestion
228 - 3474 (81.04%) match when using this suggestion method
230 @param s {str} An irrational version string.
231 @returns A rational version string, or None, if couldn't determine one.
235 return s # already rational
236 except IrrationalVersionError:
241 # part of this could use maketrans
242 for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
243 ('beta', 'b'), ('rc', 'c'), ('-final', ''),
245 ('-release', ''), ('.release', ''), ('-stable', ''),
246 ('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
248 rs = rs.replace(orig, repl)
250 # if something ends with dev or pre, we add a 0
251 rs = re.sub(r"pre$", r"pre0", rs)
252 rs = re.sub(r"dev$", r"dev0", rs)
254 # if we have something like "b-2" or "a.2" at the end of the
255 # version, that is pobably beta, alpha, etc
256 # let's remove the dash or dot
257 rs = re.sub(r"([abc|rc])[\-\.](\d+)$", r"\1\2", rs)
259 # 1.0-dev-r371 -> 1.0.dev371
260 # 0.1-dev-r79 -> 0.1.dev79
261 rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
263 # Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
264 rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
267 if rs.startswith('v'):
270 # Clean leading '0's on numbers.
271 #TODO: unintended side-effect on, e.g., "2003.05.09"
272 # PyPI stats: 77 (~2%) better
273 rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
275 # Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
277 # PyPI stats: 245 (7.56%) better
278 rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
280 # the 'dev-rNNN' tag is a dev tag
281 rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
283 # clean the - when used as a pre delimiter
284 rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
286 # a terminal "dev" or "devel" can be changed into ".dev0"
287 rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
289 # a terminal "dev" can be changed into ".dev0"
290 rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
292 # a terminal "final" or "stable" can be removed
293 rs = re.sub(r"(final|stable)$", "", rs)
295 # The 'r' and the '-' tags are post release tags
296 # 0.4a1.r10 -> 0.4a1.post10
297 # 0.9.33-17222 -> 0.9.33.post17222
298 # 0.9.33-r17222 -> 0.9.33.post17222
299 rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
301 # Clean 'r' instead of 'dev' usage:
302 # 0.9.33+r17222 -> 0.9.33.dev17222
303 # 1.0dev123 -> 1.0.dev123
304 # 1.0.git123 -> 1.0.dev123
305 # 1.0.bzr123 -> 1.0.dev123
306 # 0.1a0dev.123 -> 0.1a0.dev123
307 # PyPI stats: ~150 (~4%) better
308 rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
310 # Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
313 # 1.0preview123 -> 1.0c123
314 # PyPI stats: ~21 (0.62%) better
315 rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
318 # Tcl/Tk uses "px" for their post release markers
319 rs = re.sub(r"p(\d+)$", r".post\1", rs)
322 NormalizedVersion(rs)
323 return rs # already rational
324 except IrrationalVersionError: