From 9a8f9e4fd6ec897c2b72d489febf6814b32cb4a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl>
Date: Fri, 13 Aug 2021 19:16:13 +0200
Subject: [PATCH] v2.11 major bugfix

---
 docs/changelog.rst     |  7 +++++++
 minijson.pyx           | 15 ++++++++-------
 setup.cfg              |  2 +-
 tests/test_minijson.py | 37 ++++++++++++++++++++++++++++---------
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index cf27b0a..1c46f06 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,13 @@
 Changelog
 =========
 
+v2.11
+-----
+
+* fixed a bug with serializing large integers
+* invalid UTF-8 field name will now display name of the field as repr
+* major bugfix: UTF-8 characters that took more than 1 bytes were serialized wrong
+
 v2.10
 -----
 
diff --git a/minijson.pyx b/minijson.pyx
index 2b27067..b6f5aa6 100644
--- a/minijson.pyx
+++ b/minijson.pyx
@@ -89,7 +89,7 @@ cdef inline tuple parse_dict(bytes data, int elem_count, int starting_position):
         try:
             s_field_name = b_field_name.decode('utf-8')
         except UnicodeDecodeError as e:
-            raise DecodingError('Invalid UTF-8 field name!') from e
+            raise DecodingError('Invalid UTF-8 field name "%s"!' % (repr(b_field_name), )) from e
         offset += ofs
         ofs, elem = parse_bytes(data, starting_position+offset)
         offset += ofs
@@ -406,24 +406,25 @@ cdef class MiniJSONEncoder:
                 cio.write(b'\x1B' + struct.pack('>L', length))
                 cio.write(data)
         elif isinstance(data, str):
-            length = len(data)
+            b_data = data.encode('utf-8')
+            length = len(b_data)
             if length <= 0x7F:
                 cio.write(bytearray([0x80 | length]))
-                cio.write(data.encode('utf-8'))
+                cio.write(b_data)
                 return 1+length
             elif length <= 0xFF:
                 cio.write(bytearray([0, length]))
-                cio.write(data.encode('utf-8'))
+                cio.write(b_data)
                 return 2+length
             elif length <= 0xFFFF:
                 cio.write(b'\x0D')
                 cio.write(STRUCT_H.pack(length))
-                cio.write(data.encode('utf-8'))
+                cio.write(b_data)
                 return 3+length
             else:       # Python strings cannot grow past 0xFFFFFFFF characters
                 cio.write(b'\x0E')
                 cio.write(STRUCT_L.pack(length))
-                cio.write(data.encode('utf-8'))
+                cio.write(b_data)
                 return 5+length
         elif isinstance(data, int):
             if -0x80 <= data <= 0x7F: # signed char, type 3
@@ -461,7 +462,7 @@ cdef class MiniJSONEncoder:
                         break
                     except OverflowError:
                         length += 1
-                cio.write(bytearray([0x18, length]))
+                cio.write(bytearray([0x18, len(b_data)]))
                 cio.write(b_data)
         elif isinstance(data, float):
             if self.should_double_be_used(data):
diff --git a/setup.cfg b/setup.cfg
index 49efe8b..819c57b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 # coding: utf-8
 [metadata]
-version = 2.10
+version = 2.11
 name = minijson
 long_description = file: README.md
 long_description_content_type = text/markdown; charset=UTF-8
diff --git a/tests/test_minijson.py b/tests/test_minijson.py
index 22052ae..de288d6 100644
--- a/tests/test_minijson.py
+++ b/tests/test_minijson.py
@@ -18,7 +18,7 @@ class TestMiniJSON(unittest.TestCase):
                 return v.real, v.imag
 
         e = Encoder()
-        e.encode(2+3j)
+        e.encode(2 + 3j)
 
     def test_bytes(self):
         a = {b'test': b'dupa'}
@@ -27,18 +27,18 @@ class TestMiniJSON(unittest.TestCase):
         self.assertEqual(loads(b), a)
 
     def test_bytes_26(self):
-        a = b'x'*256
+        a = b'x' * 256
         e = MiniJSONEncoder()
         b = e.encode(a)
         self.assertEqual(loads(b), a)
-        self.assertEqual(len(b), 256+3)
+        self.assertEqual(len(b), 256 + 3)
 
     def test_bytes_27(self):
         e = MiniJSONEncoder()
-        a = b'x'*65537
+        a = b'x' * 65537
         b = e.encode(a)
         self.assertEqual(loads(b), a)
-        self.assertEqual(len(b), 65537+5)
+        self.assertEqual(len(b), 65537 + 5)
 
     def test_encoder_given_default(self):
         def encode(v):
@@ -49,7 +49,7 @@ class TestMiniJSON(unittest.TestCase):
 
     def test_encoder_no_default(self):
         e = MiniJSONEncoder()
-        self.assertRaises(EncodingError, lambda: e.encode(2+3j))
+        self.assertRaises(EncodingError, lambda: e.encode(2 + 3j))
 
     def test_accepts_bytearrays(self):
         b = {'test': 'hello'}
@@ -69,13 +69,15 @@ class TestMiniJSON(unittest.TestCase):
 
     def test_default_returns_nonjsonable(self):
         """Assert that if transform returns a non-JSONable value, EncodingError is raised"""
+
         def transform(c):
             return c
 
-        self.assertRaises(EncodingError, lambda: dumps(2+3j, transform))
+        self.assertRaises(EncodingError, lambda: dumps(2 + 3j, transform))
 
     def test_default(self):
         """Assert the default argument works"""
+
         def transform(c):
             return c.real, c.imag
 
@@ -84,6 +86,7 @@ class TestMiniJSON(unittest.TestCase):
 
     def test_subclasses_of_dicts(self):
         """Assert that you can correctly serialize subclasses of dict"""
+
         class Subclass(dict):
             pass
 
@@ -103,6 +106,7 @@ class TestMiniJSON(unittest.TestCase):
 
     def test_subclasses_of_lists(self):
         """Assert that you can correctly serialize subclasses of list"""
+
         class Subclass(list):
             pass
 
@@ -112,6 +116,7 @@ class TestMiniJSON(unittest.TestCase):
 
     def test_subclasses_of_tuples(self):
         """Assert that you can correctly serialize subclasses of tuple"""
+
         class Subclass(tuple):
             pass
 
@@ -145,7 +150,6 @@ class TestMiniJSON(unittest.TestCase):
         self.assertEqual(loads(b), 4.5)
         switch_default_float()
 
-
     def test_minijson_encoder_returns_a_bool_and_a_bytes(self):
         class Encoder(MiniJSONEncoder):
             def default(self, v):
@@ -155,9 +159,23 @@ class TestMiniJSON(unittest.TestCase):
                     return b'test'
 
         e = Encoder()
-        e.encode(3+4j)
+        e.encode(3 + 4j)
         e.encode(object())
 
+    def test_smok_bug(self):
+        a = [{'service': 'gui', 'when': 1628873412444398,
+              'message': "DEBUG:gui.run:Loaded {'aspect_ratio': {'width': 16, 'height': 9}, 'menu': [{'label': 'GBĂłwna', 'id': 'main'}], 'visualizations': [{'label': 'GBĂłwna', 'id': 'main', 'elements': [{'type': 'text', 'zindex': 15, 'text': 'Stan wejsciowy alarmu', 'top': 10, 'left': 10, 'size': 5}, {'type': 'input', 'zindex': 15, 'widget_type': {'type': 'input'}, 'fqts': 'prog wej1', 'readonly': False, 'top': 10, 'left': 20, 'input_size': 5}, {'type': 'text', 'zindex': 15, 'text': 'Stan wyjsciowy alarmu', 'top': 20, 'left': 10, 'size': 5}, {'type': 'input', 'zindex': 15, 'widget_type': {'type': 'input'}, 'fqts': 'prog wyj1', 'readonly': True, 'top': 20, 'left': 20, 'input_size': 5}]}]} as visualizations",
+              'level': 10}]
+        self.assertSameAfterDumpsAndLoads(a)
+        a = [{'service': 'gui', 'when': 1628873412444398,
+              'message': "DEBUG:gui.run:Loaded {'aspect_ratio': {'width': 16, 'height': 9}, 'menu': [{'label': 'GBĂłwna', 'id': 'main'}], 'visualizations': [{'label': 'GBĂłwna', 'id': 'main', 'elements': [{'type': 'text', 'zindex': 15, 'text': 'Stan wejsciowy alarmu', 'top': 10, 'left': 10, 'size': 5}, {'type': 'input', 'zindex': 15, 'widget_type': {'type': 'input'}, 'fqts': 'prog wej1', 'readonly': False, 'top': 10, 'left': 20, 'input_size': 5}, {'type': 'text', 'zindex': 15, 'text': 'Stan wyjsciowy alarmu', 'top': 20, 'left': 10, 'size': 5}, {'type': 'input', 'zindex': 15, 'widget_type': {'type': 'input'}, 'fqts': 'prog wyj1', 'readonly': True, 'top': 20, 'left': 20, 'input_size': 5}]}]} as visualizations",
+              'level': 10}, {'service': 'gui', 'when': 1628873412444961,
+                             'message': 'DEBUG:rapid.channels:Reconnecting leds current amount of users is 0 due_to_failures=False',
+                             'level': 10}, {'service': 'gui', 'when': 1628873412446439,
+                                            'message': 'DEBUG:rapid.channels:Successfully reconnected leds',
+                                            'level': 10}]
+        self.assertSameAfterDumpsAndLoads(a)
+
     def test_booleans(self):
         self.assertSameAfterDumpsAndLoads({'test': True,
                                            'test2': False})
@@ -200,6 +218,7 @@ class TestMiniJSON(unittest.TestCase):
         self.assertSameAfterDumpsAndLoads(0xFFFFFFFFFFFFF)
         self.assertSameAfterDumpsAndLoads(0xFFFFFFFFFFFFFFFFFFFFFFFFFFF)
         self.assertSameAfterDumpsAndLoads(-0xFFFFFFFFFFFFFFFFFFFFFFFFFFFF)
+
     def test_dumps(self):
         self.assertSameAfterDumpsAndLoads({"name": "land", "operator_id": "dupa", "parameters":
             {"lat": 45.22999954223633, "lon": 54.79999923706055, "alt": 234}})
-- 
GitLab