From 81e623ab89cc2c84e2cfd13756f3ec78fac31e51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl>
Date: Sun, 1 Aug 2021 19:18:21 +0200
Subject: [PATCH] v2.6 - added support for byte serialization

---
 CHANGELOG.md           |  1 +
 README.md              |  3 ++-
 docs/specification.rst |  6 ++++++
 minijson.pyx           | 29 ++++++++++++++++++++++++++---
 setup.cfg              |  2 +-
 tests/test_minijson.py | 21 +++++++++++++++++++++
 6 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a706c3f..fbf5cf8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,3 +3,4 @@ here's only the changelog for the version in development
 
 # v2.6
 
+* added support for serializing and unserializing binary values
diff --git a/README.md b/README.md
index a1fb0f5..7e68f00 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,8 @@ MiniJSON
 [![License](https://img.shields.io/pypi/l/minijson)](https://github.com/Dronehub/minijson)
 
 
-MiniJSON is a codec for a compact binary representation of JSON.
+MiniJSON is a codec for a compact binary representation of a superset of JSON (binary values)
+are supported.
 
 Usage
 -----
diff --git a/docs/specification.rst b/docs/specification.rst
index cdcc72d..63c1a95 100644
--- a/docs/specification.rst
+++ b/docs/specification.rst
@@ -65,6 +65,12 @@ Type Value consists of:
 * If value is 23, then it's False
 * If value is 24, then next what comes is count of bytes, and then bytes follow. This is to be
     interpreted as a signed integer
+* If value is 25, then next comes an unsigned char denoting the length of the bytes, and
+    the remainder is binary data
+* If value is 26, then next comes an unsigned short denoting the length of the bytes, and
+    the remainder is binary data
+* If value is 27, then next comes an unsigned int denoting the length of the bytes, and
+    the remainder is binary data
 
 Coder **should** encode the value as one having the smallest binary representation, but that is not
 required. Decoder **must** parse any arbitrary valid string.
diff --git a/minijson.pyx b/minijson.pyx
index b223996..4de6730 100644
--- a/minijson.pyx
+++ b/minijson.pyx
@@ -132,8 +132,8 @@ cdef inline bint can_be_encoded_as_a_dict(dct):
 cdef tuple parse_bytes(bytes data, int starting_position):
     cdef:
         int value_type
-        int string_length, elements, i, offset, length
-        unsigned int uint32
+        int string_length, elements, i, offset
+        unsigned int uint32, length
         int sint32
         unsigned short uint16
         short sint16
@@ -264,6 +264,18 @@ cdef tuple parse_bytes(bytes data, int starting_position):
             length = data[starting_position+1]
             byte_data = data[starting_position+2:starting_position+2+length]
             return length+2, int.from_bytes(byte_data, 'big', signed=True)
+        elif value_type == 25:
+            length = data[starting_position+1]
+            byte_data = data[starting_position+2:starting_position+2+length]
+            return length+2, byte_data
+        elif value_type == 26:
+            length, = STRUCT_H.unpack(data[starting_position+1:starting_position+3])
+            byte_data = data[starting_position+3:starting_position+3+length]
+            return length+3, byte_data
+        elif value_type == 27:
+            length, = STRUCT_L.unpack(data[starting_position+1:starting_position+5])
+            byte_data = data[starting_position+5:starting_position+5+length]
+            return length+5, byte_data
         else:
             raise DecodingError('Unknown sequence type %s!' % (value_type, ))
     except (IndexError, struct.error) as e:
@@ -371,7 +383,7 @@ cdef class MiniJSONEncoder:
         """
         cdef:
             str field_name
-            int length
+            unsigned int length
             bytes b_data
         if data is None:
             cio.write(b'\x08')
@@ -382,6 +394,17 @@ cdef class MiniJSONEncoder:
         elif data is False:
             cio.write(b'\x17')
             return 1
+        elif isinstance(data, bytes):
+            length = len(data)
+            if length < 256:
+                cio.write(bytearray([0x19, length]))
+                cio.write(data)
+            elif length < 65536:
+                cio.write(b'\x1A' + struct.pack('>H', length))
+                cio.write(data)
+            else:
+                cio.write(b'\x1B' + struct.pack('>L', length))
+                cio.write(data)
         elif isinstance(data, str):
             length = len(data)
             if length < 128:
diff --git a/setup.cfg b/setup.cfg
index 22b7cb1..664a727 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 # coding: utf-8
 [metadata]
-version = 2.6a1
+version = 2.6
 name = minijson
 long_description = file: README.md
 long_description_content_type = text/markdown; charset=UTF-8
diff --git a/tests/test_minijson.py b/tests/test_minijson.py
index a4e10d5..0fdedcb 100644
--- a/tests/test_minijson.py
+++ b/tests/test_minijson.py
@@ -1,4 +1,5 @@
 import unittest
+
 from minijson import dumps, loads, dumps_object, loads_object, EncodingError, DecodingError, \
     switch_default_double, switch_default_float, MiniJSONEncoder
 
@@ -13,6 +14,26 @@ class TestMiniJSON(unittest.TestCase):
         e = Encoder()
         e.encode(2+3j)
 
+    def test_bytes(self):
+        a = {b'test': b'dupa'}
+        e = MiniJSONEncoder()
+        b = e.encode(a)
+        self.assertEqual(loads(b), a)
+
+    def test_bytes_26(self):
+        a = b'x'*256
+        e = MiniJSONEncoder()
+        b = e.encode(a)
+        self.assertEqual(loads(b), a)
+        self.assertEqual(len(b), 256+3)
+
+    def test_bytes_27(self):
+        e = MiniJSONEncoder()
+        a = b'x'*65537
+        b = e.encode(a)
+        self.assertEqual(loads(b), a)
+        self.assertEqual(len(b), 65537+5)
+
     def test_encoder_given_default(self):
         def encode(v):
             return v.real, v.imag
-- 
GitLab