Search code examples
pythonsortingdistutils

Sort Versions in Python


I'm trying to get it so that 1.7.0 comes after 1.7.0.rc0 but before 1.8.0, as it should if you were sorting versions. I thought the whole point of LooseVersion was that it handled the sorting and comparison of this kind of thing correctly.

>>> from distutils.version import LooseVersion
>>> versions = ["1.7.0", "1.7.0.rc0", "1.8.0"]
>>> lv = [LooseVersion(v) for v in versions]
>>> sorted(lv, reverse=True)
[LooseVersion ('1.8.0'), LooseVersion ('1.7.0.rc0'), LooseVersion ('1.7.0')]

Solution

  • MAJOR EDIT: old answer was too unpythonic. Here are two prettier solutions.

    So, I currently see about three ways of achieving the wished ordering, releases candidates "rc" before actual releases.

    1. my old, imperative-style ordering
    2. use "b" instead of "rc" in order to use StrictVersion, from the same package
    3. extend the Version class to add support for arbitrary tags and tag ordering

    1. Old, imperative-style ordering

    from distutils.version import LooseVersion
    versions = ["1.7.0", "1.7.0.rc0", "1.8.0"]
    lv = [LooseVersion(v) for v in versions]
    lv.sort()
    
    sorted_rc = [v.vstring for v in lv]
    
    import re
    p = re.compile('rc\\d+$')
    
    i = 0
    
    # skip the first RCs
    while i + 1 < len(sorted_rc):
        m = p.search(sorted_rc[i])
        if m:
            i += 1
        else:
            break
    
    while i + 1 < len(sorted_rc):
        tmp = sorted_rc[i]
        m = p.search(sorted_rc[i+1])
        if m and sorted_rc[i+1].startswith(tmp):
            sorted_rc[i] = sorted_rc[i+1]
            sorted_rc[i+1] = tmp
        i += 1
    

    with this I get:

    ['1.7.0rc0', '1.7.0', '1.11.0']
    

    2. Use "b" instead of "rc"

    The package distutils.version also has another class, StrictVersion which does the job, if your 1.7.0.rc0 is allowed to be written as 1.7.0a0 or 1.7.0b0 noting alpha or beta releases.

    That is:

    from distutils.version import StrictVersion
    versions = ["1.7.0", "1.7.0b0", "1.11.0"]
    sorted(versions, key=StrictVersion)
    

    This gives:

    ['1.7.0b0', '1.7.0', '1.11.0']
    

    Translation from one form to another can be done using the re module.

    3. Extend the Version class

    The obvious problem of the previous solution is the lack of flexibility of StrictVersion. Altering the version_re class attribute to use rc instead of a or b, even if it accepts 1.7.1rc0, still prints it as 1.7.1r0 (as of python 2.7.3).

    We can get it right by implementing our own custom version class. This can be done like this, with some unit tests to ensure correctness at least in some cases:

    #!/usr/bin/python
    # file: version2.py
    
    from distutils import version
    import re
    import functools
    
    @functools.total_ordering
    class NumberedVersion(version.Version):
        """
        A more flexible implementation of distutils.version.StrictVersion
    
        This implementation allows to specify:
        - an arbitrary number of version numbers:
            not only '1.2.3' , but also '1.2.3.4.5'
        - the separator between version numbers:
            '1-2-3' is allowed when '-' is specified as separator
        - an arbitrary ordering of pre-release tags:
            1.1alpha3 < 1.1beta2 < 1.1rc1 < 1.1
            when ["alpha", "beta", "rc"] is specified as pre-release tag list
        """
    
        def __init__(self, vstring=None, sep='.', prerel_tags=('a', 'b')):
            version.Version.__init__(self) 
                # super() is better here, but Version is an old-style class
    
            self.sep = sep
            self.prerel_tags = dict(zip(prerel_tags, xrange(len(prerel_tags))))
            self.version_re = self._compile_pattern(sep, self.prerel_tags.keys())
            self.sep_re = re.compile(re.escape(sep))
    
            if vstring:
                self.parse(vstring)
    
    
        _re_prerel_tag = 'rel_tag'
        _re_prerel_num = 'tag_num'
    
        def _compile_pattern(self, sep, prerel_tags):
            sep = re.escape(sep)
            tags = '|'.join(re.escape(tag) for tag in prerel_tags)
    
            if tags:
                release_re = '(?:(?P<{tn}>{tags})(?P<{nn}>\d+))?'\
                    .format(tags=tags, tn=self._re_prerel_tag, nn=self._re_prerel_num)
            else:
                release_re = ''
    
            return re.compile(r'^(\d+)(?:{sep}(\d+))*{rel}$'\
                .format(sep=sep, rel=release_re))
    
        def parse(self, vstring):
            m = self.version_re.match(vstring)
            if not m:
                raise ValueError("invalid version number '{}'".format(vstring))
    
            tag = m.group(self._re_prerel_tag)
            tag_num = m.group(self._re_prerel_num)
    
            if tag is not None and tag_num is not None:
                self.prerelease = (tag, int(tag_num))
                vnum_string = vstring[:-(len(tag) + len(tag_num))]
            else:
                self.prerelease = None
                vnum_string = vstring
    
            self.version = tuple(map(int, self.sep_re.split(vnum_string)))
    
    
        def __repr__(self):
            return "{cls} ('{vstring}', '{sep}', {prerel_tags})"\
                .format(cls=self.__class__.__name__, vstring=str(self),
                    sep=self.sep, prerel_tags = list(self.prerel_tags.keys()))
    
        def __str__(self):
            s = self.sep.join(map(str,self.version))
            if self.prerelease:
                return s + "{}{}".format(*self.prerelease)
            else:
                return s
    
        def __lt__(self, other):
            """
            Fails when  the separator is not the same or when the pre-release tags
            are not the same or do not respect the same order.
            """
            # TODO deal with trailing zeroes: e.g. "1.2.0" == "1.2"
            if self.prerel_tags != other.prerel_tags or self.sep != other.sep:
                raise ValueError("Unable to compare: instances have different"
                    " structures")
    
            if self.version == other.version and self.prerelease is not None and\
                    other.prerelease is not None:
    
                tag_index = self.prerel_tags[self.prerelease[0]]
                other_index = self.prerel_tags[other.prerelease[0]]
                if tag_index == other_index:
                    return self.prerelease[1] < other.prerelease[1]
    
                return tag_index < other_index
    
            elif self.version == other.version:
                return self.prerelease is not None and other.prerelease is None
    
            return self.version < other.version
    
        def __eq__(self, other):
            tag_index = self.prerel_tags[self.prerelease[0]]
            other_index = other.prerel_tags[other.prerelease[0]]
            return self.prerel_tags == other.prerel_tags and self.sep == other.sep\
                and self.version == other.version and tag_index == other_index and\
                    self.prerelease[1] == other.prerelease[1]
    
    
    
    
    import unittest
    
    class TestNumberedVersion(unittest.TestCase):
    
        def setUp(self):
            self.v = NumberedVersion()
    
        def test_compile_pattern(self):
            p = self.v._compile_pattern('.', ['a', 'b'])
            tests = {'1.2.3': True, '1a0': True, '1': True, '1.2.3.4a5': True,
                'b': False, '1c0': False, ' 1': False, '': False}
            for test, result in tests.iteritems():
                self.assertEqual(result, p.match(test) is not None, \
                    "test: {} result: {}".format(test, result))
    
    
        def test_parse(self):
            tests = {"1.2.3.4a5": ((1, 2, 3, 4), ('a', 5))}
            for test, result in tests.iteritems():
                self.v.parse(test)
                self.assertEqual(result, (self.v.version, self.v.prerelease))
    
        def test_str(self):
            tests = (('1.2.3',), ('10-2-42rc12', '-', ['rc']))
            for t in tests:
                self.assertEqual(t[0], str(NumberedVersion(*t)))
    
        def test_repr(self):
            v = NumberedVersion('1,2,3rc4', ',', ['lol', 'rc'])
            expected = "NumberedVersion ('1,2,3rc4', ',', ['lol', 'rc'])"
            self.assertEqual(expected, repr(v))
    
    
        def test_order(self):
            test = ["1.7.0", "1.7.0rc0", "1.11.0"]
            expected = ['1.7.0rc0', '1.7.0', '1.11.0']
            versions = [NumberedVersion(v, '.', ['rc']) for v in test]
            self.assertEqual(expected, list(map(str,sorted(versions))))
    
    
    if __name__ == '__main__':
        unittest.main()
    

    So, it can be used like this:

    import version2
    versions = ["1.7.0", "1.7.0rc2", "1.7.0rc1", "1.7.1", "1.11.0"]
    sorted(versions, key=lambda v: version2.NumberedVersion(v, '.', ['rc']))
    

    output:

    ['1.7.0rc1', '1.7.0rc2', '1.7.0', '1.7.1', '1.11.0']
    

    So, in conclusion, use python's included batteries or roll out your own.

    About this implementation: it could be improved by dealing with the trailing zeroes in the releases, and memoize the compilation of the regular expressions.