From 91f57b8f4e74b010b5bb98ed10957bff2c9d7d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@ericsson.com> Date: Mon, 15 Apr 2024 08:25:34 +0200 Subject: [PATCH] add list_values_to_indices --- CHANGELOG.md | 1 + docs/coding/transforms.rst | 2 ++ satella/__init__.py | 2 +- satella/coding/transforms/__init__.py | 3 ++- satella/coding/transforms/misc.py | 24 ++++++++++++++++++++++++ tests/test_coding/test_transforms.py | 9 ++++++++- 6 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 satella/coding/transforms/misc.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d75a0e53..c362bfe1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ # v2.25.3 * CacheDict will now accept time-like strings +* add list_values_to_indices diff --git a/docs/coding/transforms.rst b/docs/coding/transforms.rst index 4f44c5ee..fa0ad044 100644 --- a/docs/coding/transforms.rst +++ b/docs/coding/transforms.rst @@ -19,6 +19,8 @@ Rudimentary data transforms and algorithms .. autofunction:: satella.coding.transforms.b64encode +.. autofunction:: satella.coding.transforms.list_values_to_indices + pad_to_multiple_of_length ------------------------- diff --git a/satella/__init__.py b/satella/__init__.py index c3efbdc0..6faf7eb7 100644 --- a/satella/__init__.py +++ b/satella/__init__.py @@ -1,3 +1,3 @@ -__version__ = '2.25.3a2' +__version__ = '2.25.3a3' diff --git a/satella/coding/transforms/__init__.py b/satella/coding/transforms/__init__.py index ec619264..b87b5a05 100644 --- a/satella/coding/transforms/__init__.py +++ b/satella/coding/transforms/__init__.py @@ -12,11 +12,12 @@ from .merger import merge_series from .percentile import percentile from .predicates import is_subset from .words import hashables_to_int +from .misc import list_values_to_indices __all__ = ['stringify', 'split_shuffle_and_join', 'one_tuple', 'none_if_false', 'merge_series', 'pad_to_multiple_of_length', 'clip', 'hashables_to_int', 'jsonify', 'intify', 'percentile', 'b64encode', 'linear_interpolate', - 'merge_list', 'is_subset', 'unpack_dict'] + 'merge_list', 'is_subset', 'unpack_dict', 'list_values_to_indices'] from satella.coding.typing import T, NoArgCallable, Appendable, Number, Predicate, K, V diff --git a/satella/coding/transforms/misc.py b/satella/coding/transforms/misc.py new file mode 100644 index 00000000..a64282be --- /dev/null +++ b/satella/coding/transforms/misc.py @@ -0,0 +1,24 @@ +import typing as tp + +from satella.coding.typing import V + + +def list_values_to_indices(lst: tp.List[V]) -> dict[V, int]: + """ + Transform a list of entries into a dict mapping where given entry can be found. + + Example: + + >>> a = ['abc', 'def', 'ghi'] + >>> b = list_values_to_indices(a) + >>> assert b == {'abc': 0, 'def': 1, 'ghi': 2} + + :param lst: list to process. Take care for the list to be composed of unique entries. + :raises ValueError: item was found more than once + """ + result = {} + for idx, val in enumerate(lst): + if val in result: + raise ValueError(f'Entry {val} found more than once!') + result[val] = idx + return result diff --git a/tests/test_coding/test_transforms.py b/tests/test_coding/test_transforms.py index 7f49e282..ddcabc28 100644 --- a/tests/test_coding/test_transforms.py +++ b/tests/test_coding/test_transforms.py @@ -6,11 +6,18 @@ import base64 from satella.coding.predicates import x from satella.coding.transforms import stringify, split_shuffle_and_join, one_tuple, \ merge_series, pad_to_multiple_of_length, clip, b64encode, linear_interpolate, \ - hashables_to_int, none_if_false, merge_list, is_subset, unpack_dict + hashables_to_int, none_if_false, merge_list, is_subset, unpack_dict, list_values_to_indices class TestTransforms(unittest.TestCase): + def test_list_values_to_indices(self): + a = ['abc', 'def', 'ghi'] + b = list_values_to_indices(a) + self.assertEqual(b, {'abc': 0, 'def': 1, 'ghi': 2}) + c = ['abc', 'abc', 'def'] + self.assertRaises(ValueError, list_values_to_indices, c) + def test_unpack_dict(self): a, b, c = unpack_dict({1: 2, 2: 3, 4: 5}, 1, 2, 4) self.assertTrue(a == 2 and b == 3 and c == 5) -- GitLab