From a9f2ab3734de06ec32b42e87294570b4e8877d01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl>
Date: Fri, 6 Aug 2021 17:38:48 +0200
Subject: [PATCH] v2.7

---
 CHANGELOG.md           |  4 ++--
 Dockerfile             |  2 ++
 docs/usage.rst         |  9 +++++++++
 minijson.pyx           | 20 ++++++++++++++++----
 setup.cfg              |  2 +-
 tests/test_minijson.py |  5 +++++
 6 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fbf5cf8..5faaf8c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 Changelog is kept at [GitHub](https://github.com/Dronehub/minijson/releases),
 here's only the changelog for the version in development
 
-# v2.6
+# v2.7
 
-* added support for serializing and unserializing binary values
+* added support for strict ordering
diff --git a/Dockerfile b/Dockerfile
index a6138e8..b455368 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,8 @@ RUN python -m pip install Cython pytest coverage pytest-cov auditwheel doctor-wh
 WORKDIR /tmp/compile
 ADD . /tmp/compile/
 
+ENV DEBUG=1
+
 RUN python setup.py install && \
     chmod ugo+x /tmp/compile/tests/test.sh
 
diff --git a/docs/usage.rst b/docs/usage.rst
index 2685b86..540aa84 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -78,3 +78,12 @@ There's also a class available for encoding. Use it like you would a normal Pyth
 
 .. autoclass:: minijson.MiniJSONEncoder
     :members:
+
+.. warning:: The exact binary content output by :class:`~minijson.MiniJSONEncoder` will
+    depend on internal Python hash order. To have same dicts return the same representation, you
+    must use the parameter :code:`use_strict_order` in :class:`~minijson.MiniJSONEncoder`.
+
+:class:`~minijson.MiniJSONEncoder` will then extract the items from the dictionary,
+and sort them before dumping them to binary output.
+
+Only then strict order will be guaranteed. Your keys must be comparable anyway.
diff --git a/minijson.pyx b/minijson.pyx
index 4de6730..4cc6240 100644
--- a/minijson.pyx
+++ b/minijson.pyx
@@ -323,18 +323,23 @@ cdef class MiniJSONEncoder:
 
     :param default: a default function used
     :param use_double: whether to use doubles instead of floats to represent floating point numbers
-
+    :param use_strict_order: if set to True, dictionaries will be encoded by first
+        dumping them to items and sorting the resulting elements, essentially
+        two same dicts will be encoded in the same way.
     :ivar use_double: (bool) whether to use doubles instead of floats (used when
         :meth:`~minijson.MiniJSONEncoder.should_double_be_used` is not overrided)
     """
     cdef:
         object _default
         public bint use_double
+        public bint use_strict_order
 
     def __init__(self, default: tp.Optional[None] = None,
-                 bint use_double = False):
+                 bint use_double = False,
+                 bint use_strict_order = False):
         self._default = default
         self.use_double = use_double
+        self.use_strict_order = use_strict_order
 
     def should_double_be_used(self, y) -> bool:
         """
@@ -385,6 +390,7 @@ cdef class MiniJSONEncoder:
             str field_name
             unsigned int length
             bytes b_data
+            list items
         if data is None:
             cio.write(b'\x08')
             return 1
@@ -508,7 +514,10 @@ cdef class MiniJSONEncoder:
                     cio.write(b'\x12')
                     cio.write(STRUCT_L.pack(length))
                     length = 5
-                for field_name, elem in data.items():
+                items = list(data.items())
+                if self.use_strict_order:
+                    items.sort()
+                for field_name, elem in items:
                     cio.write(bytearray([len(field_name)]))
                     cio.write(field_name.encode('utf-8'))
                     length += self.dump(elem, cio)
@@ -529,7 +538,10 @@ cdef class MiniJSONEncoder:
                     cio.write(STRUCT_L.pack(length))
                     offset = 5
 
-                for key, value in data.items():
+                items = list(data.items())
+                if self.use_strict_order:
+                    items.sort()
+                for key, value in items:
                     offset += self.dump(key, cio)
                     offset += self.dump(value, cio)
                 return offset
diff --git a/setup.cfg b/setup.cfg
index 664a727..64b684a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 # coding: utf-8
 [metadata]
-version = 2.6
+version = 2.7
 name = minijson
 long_description = file: README.md
 long_description_content_type = text/markdown; charset=UTF-8
diff --git a/tests/test_minijson.py b/tests/test_minijson.py
index 0fdedcb..31f686b 100644
--- a/tests/test_minijson.py
+++ b/tests/test_minijson.py
@@ -6,6 +6,11 @@ from minijson import dumps, loads, dumps_object, loads_object, EncodingError, De
 
 class TestMiniJSON(unittest.TestCase):
 
+    def test_encoder_strict_output(self):
+        enc = MiniJSONEncoder(use_strict_order=True)
+        enc.encode({"test": "2", "value": 2})
+        enc.encode({b"test": "2", b"value": 2})
+
     def test_encoder_overrided_default(self):
         class Encoder(MiniJSONEncoder):
             def default(self, v):
-- 
GitLab