diff --git a/CHANGELOG.md b/CHANGELOG.md index 40429d9dc477b7df543b5db451bb09511b537670..51d5f77ea5446a5ef384f4a3e35c0ccd117b4358 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1 +1,3 @@ # v2.14.39 + +* added `merge_list` diff --git a/satella/__init__.py b/satella/__init__.py index 0c50e56ed136efa9e7b011182fa21d800fce3a42..d8819987fe2d6866c97ae44b3a9abda4b6f0879d 100644 --- a/satella/__init__.py +++ b/satella/__init__.py @@ -1 +1 @@ -__version__ = '2.14.39a1' +__version__ = '2.14.39' diff --git a/satella/coding/transforms/__init__.py b/satella/coding/transforms/__init__.py index 429fdfa548be06450625b39ed6053632388fa1b3..93e26ac7fd384215193e1a6e0792c345a38a206f 100644 --- a/satella/coding/transforms/__init__.py +++ b/satella/coding/transforms/__init__.py @@ -6,6 +6,7 @@ import typing as tp from satella.coding.decorators import for_argument from .jsonify import jsonify from .merger import merge_series +from .merge_list import merge_list from .percentile import percentile from .base64 import b64encode from .interpol import linear_interpolate @@ -13,7 +14,8 @@ from .words import hashables_to_int __all__ = ['stringify', 'split_shuffle_and_join', 'one_tuple', 'none_if_false', 'merge_series', 'pad_to_multiple_of_length', 'clip', 'hashables_to_int', - 'jsonify', 'intify', 'percentile', 'b64encode', 'linear_interpolate'] + 'jsonify', 'intify', 'percentile', 'b64encode', 'linear_interpolate', + 'merge_list'] from satella.coding.typing import T, NoArgCallable, Appendable, Number, Predicate diff --git a/satella/coding/transforms/merge_list.py b/satella/coding/transforms/merge_list.py new file mode 100644 index 0000000000000000000000000000000000000000..6b3aa8ba7dc1485e6d523317d8abd2bf77705187 --- /dev/null +++ b/satella/coding/transforms/merge_list.py @@ -0,0 +1,75 @@ +from satella.coding.structures import Heap +from satella.coding.typing import K, V +import typing as tp + + +class merge_list(tp.Iterator[tp.Tuple[K, V]]): + """ + Merge two sorted lists. + + This is an iterator which consumes elements as they are required. + + Each list must be of type tuple/2 with the first element being the key. + The list has to be sorted by this value, ascending. + + When the algorithm encounters two identical keys, it calls merge_function on it's + result and inserts the result. + + :param lists: lists to sort + :param merge_function: a callable that accepts two pieces of the tuple and returns a result + :return: an resulting iterator + """ + __slots__ = ('merge_func', 'its', 'heap', 'available_lists', + 'k', 'v', 'i', 'closed') + + def __iter__(self): + return self + + def __init__(self, *lists: tp.Iterator[tp.Tuple[K, V]], + merge_function: tp.Callable[[V, V], V]): + self.its = [iter(y) for y in lists] + self.merge_func = merge_function + self.available_lists = set() + self.heap = Heap() + self.closed = False + + for i, it in enumerate(self.its): + try: + self.heap.push((*next(it), i)) + self.available_lists.add(i) + except StopIteration: + pass + try: + self.k, self.v, self.i = self.pop() + except IndexError: + self.closed = True + + def __next__(self): + if self.closed: + raise StopIteration() + + try: + k2, v2, i2 = self.pop() + except IndexError: + self.closed = True + return self.k, self.v + + if k2 == self.k: + self.v = self.merge_func(self.v, v2) + return next(self) + + try: + return self.k, self.v + finally: + self.k, self.v, self.i = k2, v2, i2 + + def pop(self): + k, v, i = self.heap.pop() + if i in self.available_lists: + try: + k2, v2 = next(self.its[i]) + self.heap.push((k2, v2, i)) + except StopIteration: + self.available_lists.remove(i) + return k, v, i + diff --git a/tests/test_coding/test_transforms.py b/tests/test_coding/test_transforms.py index f5489d87d48f49f36eeaab599ea2eb19ac980974..cd0dd3c6d0b6d072d925a5c88b32fbc42fffcb53 100644 --- a/tests/test_coding/test_transforms.py +++ b/tests/test_coding/test_transforms.py @@ -1,14 +1,33 @@ import enum +import operator import unittest import base64 from satella.coding.transforms import stringify, split_shuffle_and_join, one_tuple, \ merge_series, pad_to_multiple_of_length, clip, b64encode, linear_interpolate, \ - hashables_to_int, none_if_false + hashables_to_int, none_if_false, merge_list class TestTransforms(unittest.TestCase): + def test_merge_list(self): + a = [(1, 1), (2, 2), (3, 3)] + b = [(1, 2), (3, 2), (4, 4)] + + c = merge_list(a, b, merge_function=operator.add) + self.assertEqual(list(c), [(1, 3), (2, 2), (3, 5), (4, 4)]) + + a = [(1, 1), (1, 3), (2, 2), (3, 3)] + b = [(1, 2), (3, 2), (4, 4)] + + c = merge_list(a, b, merge_function=operator.add) + self.assertEqual(list(c), [(1, 6), (2, 2), (3, 5), (4, 4)]) + + a = [] + b = [] + c = merge_list(a, b, merge_function=operator.add) + self.assertEqual(list(c), []) + def test_none_if_false(self): self.assertEqual(none_if_false(1), 1) self.assertEqual(none_if_false(''), None)