diff --git a/CHANGELOG.md b/CHANGELOG.md index d75a0e5355466d45afc51f409e31d45ca9906c21..c362bfe19edda26d6e6303f8b5eaba6a14447e7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ # v2.25.3 * CacheDict will now accept time-like strings +* add list_values_to_indices diff --git a/docs/coding/transforms.rst b/docs/coding/transforms.rst index 4f44c5eef1958dbb3897dd6df09574e251f19553..fa0ad0448a9ad993435c20d2457c06bba003b2fe 100644 --- a/docs/coding/transforms.rst +++ b/docs/coding/transforms.rst @@ -19,6 +19,8 @@ Rudimentary data transforms and algorithms .. autofunction:: satella.coding.transforms.b64encode +.. autofunction:: satella.coding.transforms.list_values_to_indices + pad_to_multiple_of_length ------------------------- diff --git a/satella/__init__.py b/satella/__init__.py index c3efbdc08113a5d2c8d6d5aecd672f6ad408013a..6faf7eb7c689dd92194f30ba4bd38147ac33b98f 100644 --- a/satella/__init__.py +++ b/satella/__init__.py @@ -1,3 +1,3 @@ -__version__ = '2.25.3a2' +__version__ = '2.25.3a3' diff --git a/satella/coding/transforms/__init__.py b/satella/coding/transforms/__init__.py index ec6192641883e9031477b2c3a97de9eddbfa67ea..b87b5a050c37f8cae343ba340b024e4cf3de989c 100644 --- a/satella/coding/transforms/__init__.py +++ b/satella/coding/transforms/__init__.py @@ -12,11 +12,12 @@ from .merger import merge_series from .percentile import percentile from .predicates import is_subset from .words import hashables_to_int +from .misc import list_values_to_indices __all__ = ['stringify', 'split_shuffle_and_join', 'one_tuple', 'none_if_false', 'merge_series', 'pad_to_multiple_of_length', 'clip', 'hashables_to_int', 'jsonify', 'intify', 'percentile', 'b64encode', 'linear_interpolate', - 'merge_list', 'is_subset', 'unpack_dict'] + 'merge_list', 'is_subset', 'unpack_dict', 'list_values_to_indices'] from satella.coding.typing import T, NoArgCallable, Appendable, Number, Predicate, K, V diff --git a/satella/coding/transforms/misc.py b/satella/coding/transforms/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..a64282be8e6d6f81a59b629962d91991c7260578 --- /dev/null +++ b/satella/coding/transforms/misc.py @@ -0,0 +1,24 @@ +import typing as tp + +from satella.coding.typing import V + + +def list_values_to_indices(lst: tp.List[V]) -> dict[V, int]: + """ + Transform a list of entries into a dict mapping where given entry can be found. + + Example: + + >>> a = ['abc', 'def', 'ghi'] + >>> b = list_values_to_indices(a) + >>> assert b == {'abc': 0, 'def': 1, 'ghi': 2} + + :param lst: list to process. Take care for the list to be composed of unique entries. + :raises ValueError: item was found more than once + """ + result = {} + for idx, val in enumerate(lst): + if val in result: + raise ValueError(f'Entry {val} found more than once!') + result[val] = idx + return result diff --git a/tests/test_coding/test_transforms.py b/tests/test_coding/test_transforms.py index 7f49e282106f35be20ca225700d489f81d6c6285..ddcabc28849da335bd7c0d43d10dc19e9a507916 100644 --- a/tests/test_coding/test_transforms.py +++ b/tests/test_coding/test_transforms.py @@ -6,11 +6,18 @@ import base64 from satella.coding.predicates import x from satella.coding.transforms import stringify, split_shuffle_and_join, one_tuple, \ merge_series, pad_to_multiple_of_length, clip, b64encode, linear_interpolate, \ - hashables_to_int, none_if_false, merge_list, is_subset, unpack_dict + hashables_to_int, none_if_false, merge_list, is_subset, unpack_dict, list_values_to_indices class TestTransforms(unittest.TestCase): + def test_list_values_to_indices(self): + a = ['abc', 'def', 'ghi'] + b = list_values_to_indices(a) + self.assertEqual(b, {'abc': 0, 'def': 1, 'ghi': 2}) + c = ['abc', 'abc', 'def'] + self.assertRaises(ValueError, list_values_to_indices, c) + def test_unpack_dict(self): a, b, c = unpack_dict({1: 2, 2: 3, 4: 5}, 1, 2, 4) self.assertTrue(a == 2 and b == 3 and c == 5)