diff --git a/pytools/__init__.py b/pytools/__init__.py index d8fb7736a83c569b800d5318278c408d118a2b4a..778976a81d49acf7c5b0d1a0f53afd0d19f1d440 100644 --- a/pytools/__init__.py +++ b/pytools/__init__.py @@ -149,6 +149,12 @@ Log utilities .. autoclass:: ProcessLogger .. autoclass:: DebugProcessLogger .. autoclass:: log_process + +Sorting in natural order +------------------------ + +.. autofunction:: natorder +.. autofunction:: natsorted """ @@ -2284,6 +2290,59 @@ class log_process(object): # noqa: N801 # }}} +# {{{ sorting in natural order + +def natorder(item): + """Return a key for natural order string comparison. + + See :func:`natsorted`. + + .. versionadded:: 2020.1 + """ + import re + result = [] + for (int_val, string_val) in re.findall(r"(\d+)|(\D+)", item): + if int_val: + result.append(int(int_val)) + # Tie-breaker in case of leading zeros in *int_val*. Longer values + # compare smaller to preserve order of numbers in decimal notation, + # e.g., "1.001" < "1.01" + # (cf. https://github.com/sourcefrog/natsort) + result.append(-len(int_val)) + else: + result.append(string_val) + return result + + +def natsorted(iterable, key=None, reverse=False): + """Sort using natural order [1]_, as opposed to lexicographic order. + + Example:: + + >>> sorted(["_10", "_1", "_9"]) == ["_1", "_10", "_9"] + True + >>> natsorted(["_10", "_1", "_9"]) == ["_1", "_9", "_10"] + True + + :arg iterable: an iterable to be sorted. It must only have strings, unless + *key* is specified. + :arg key: if provided, a key function that returns strings for ordering + using natural order. + :arg reverse: if *True*, sorts in descending order. + + :returns: a sorted list + + .. [1] https://en.wikipedia.org/wiki/Natural_sort_order + + .. versionadded:: 2020.1 + """ + if key is None: + key = lambda x: x + return sorted(iterable, key=lambda y: natorder(key(y)), reverse=reverse) + +# }}} + + def _test(): import doctest doctest.testmod() diff --git a/pytools/version.py b/pytools/version.py index c28843608e8afb5b21c249ae4de15c848f20adfb..ea2c3fd8fd40807205fb95b617a3c80dac0fae2e 100644 --- a/pytools/version.py +++ b/pytools/version.py @@ -1,3 +1,3 @@ -VERSION = (2019, 1, 1) +VERSION = (2020, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff --git a/test/test_pytools.py b/test/test_pytools.py index e7cca358b834d7699aeeba988df256c333f3ef94..4a47a067c2af96fbcbc47214f9b245301ce16bca 100644 --- a/test/test_pytools.py +++ b/test/test_pytools.py @@ -250,6 +250,17 @@ def test_eoc(): print(p) +def test_natsorted(): + from pytools import natsorted, natorder + + assert natorder("1.001") < natorder("1.01") + + assert natsorted(["x10", "x1", "x9"]) == ["x1", "x9", "x10"] + assert natsorted(map(str, range(100))) == list(map(str, range(100))) + assert natsorted(["x10", "x1", "x9"], reverse=True) == ["x10", "x9", "x1"] + assert natsorted([10, 1, 9], key=lambda d: "x%d" % d) == [1, 9, 10] + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])