From 91f57b8f4e74b010b5bb98ed10957bff2c9d7d89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@ericsson.com>
Date: Mon, 15 Apr 2024 08:25:34 +0200
Subject: [PATCH] add list_values_to_indices

---
 CHANGELOG.md                          |  1 +
 docs/coding/transforms.rst            |  2 ++
 satella/__init__.py                   |  2 +-
 satella/coding/transforms/__init__.py |  3 ++-
 satella/coding/transforms/misc.py     | 24 ++++++++++++++++++++++++
 tests/test_coding/test_transforms.py  |  9 ++++++++-
 6 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 satella/coding/transforms/misc.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d75a0e53..c362bfe1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,4 @@
 # v2.25.3
 
 * CacheDict will now accept time-like strings
+* add list_values_to_indices
diff --git a/docs/coding/transforms.rst b/docs/coding/transforms.rst
index 4f44c5ee..fa0ad044 100644
--- a/docs/coding/transforms.rst
+++ b/docs/coding/transforms.rst
@@ -19,6 +19,8 @@ Rudimentary data transforms and algorithms
 
 .. autofunction:: satella.coding.transforms.b64encode
 
+.. autofunction:: satella.coding.transforms.list_values_to_indices
+
 pad_to_multiple_of_length
 -------------------------
 
diff --git a/satella/__init__.py b/satella/__init__.py
index c3efbdc0..6faf7eb7 100644
--- a/satella/__init__.py
+++ b/satella/__init__.py
@@ -1,3 +1,3 @@
-__version__ = '2.25.3a2'
+__version__ = '2.25.3a3'
 
 
diff --git a/satella/coding/transforms/__init__.py b/satella/coding/transforms/__init__.py
index ec619264..b87b5a05 100644
--- a/satella/coding/transforms/__init__.py
+++ b/satella/coding/transforms/__init__.py
@@ -12,11 +12,12 @@ from .merger import merge_series
 from .percentile import percentile
 from .predicates import is_subset
 from .words import hashables_to_int
+from .misc import list_values_to_indices
 
 __all__ = ['stringify', 'split_shuffle_and_join', 'one_tuple', 'none_if_false',
            'merge_series', 'pad_to_multiple_of_length', 'clip', 'hashables_to_int',
            'jsonify', 'intify', 'percentile', 'b64encode', 'linear_interpolate',
-           'merge_list', 'is_subset', 'unpack_dict']
+           'merge_list', 'is_subset', 'unpack_dict', 'list_values_to_indices']
 
 from satella.coding.typing import T, NoArgCallable, Appendable, Number, Predicate, K, V
 
diff --git a/satella/coding/transforms/misc.py b/satella/coding/transforms/misc.py
new file mode 100644
index 00000000..a64282be
--- /dev/null
+++ b/satella/coding/transforms/misc.py
@@ -0,0 +1,24 @@
+import typing as tp
+
+from satella.coding.typing import V
+
+
+def list_values_to_indices(lst: tp.List[V]) -> dict[V, int]:
+    """
+    Transform a list of entries into a dict mapping where given entry can be found.
+
+    Example:
+
+    >>> a = ['abc', 'def', 'ghi']
+    >>> b = list_values_to_indices(a)
+    >>> assert b == {'abc': 0, 'def': 1, 'ghi': 2}
+
+    :param lst: list to process. Take care for the list to be composed of unique entries.
+    :raises ValueError: item was found more than once
+    """
+    result = {}
+    for idx, val in enumerate(lst):
+        if val in result:
+            raise ValueError(f'Entry {val} found more than once!')
+        result[val] = idx
+    return result
diff --git a/tests/test_coding/test_transforms.py b/tests/test_coding/test_transforms.py
index 7f49e282..ddcabc28 100644
--- a/tests/test_coding/test_transforms.py
+++ b/tests/test_coding/test_transforms.py
@@ -6,11 +6,18 @@ import base64
 from satella.coding.predicates import x
 from satella.coding.transforms import stringify, split_shuffle_and_join, one_tuple, \
     merge_series, pad_to_multiple_of_length, clip, b64encode, linear_interpolate, \
-    hashables_to_int, none_if_false, merge_list, is_subset, unpack_dict
+    hashables_to_int, none_if_false, merge_list, is_subset, unpack_dict, list_values_to_indices
 
 
 class TestTransforms(unittest.TestCase):
 
+    def test_list_values_to_indices(self):
+        a = ['abc', 'def', 'ghi']
+        b = list_values_to_indices(a)
+        self.assertEqual(b, {'abc': 0, 'def': 1, 'ghi': 2})
+        c = ['abc', 'abc', 'def']
+        self.assertRaises(ValueError, list_values_to_indices, c)
+
     def test_unpack_dict(self):
         a, b, c = unpack_dict({1: 2, 2: 3, 4: 5}, 1, 2, 4)
         self.assertTrue(a == 2 and b == 3 and c == 5)
-- 
GitLab