From 81e623ab89cc2c84e2cfd13756f3ec78fac31e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl> Date: Sun, 1 Aug 2021 19:18:21 +0200 Subject: [PATCH] v2.6 - added support for byte serialization --- CHANGELOG.md | 1 + README.md | 3 ++- docs/specification.rst | 6 ++++++ minijson.pyx | 29 ++++++++++++++++++++++++++--- setup.cfg | 2 +- tests/test_minijson.py | 21 +++++++++++++++++++++ 6 files changed, 57 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a706c3f..fbf5cf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,3 +3,4 @@ here's only the changelog for the version in development # v2.6 +* added support for serializing and unserializing binary values diff --git a/README.md b/README.md index a1fb0f5..7e68f00 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ MiniJSON [](https://github.com/Dronehub/minijson) -MiniJSON is a codec for a compact binary representation of JSON. +MiniJSON is a codec for a compact binary representation of a superset of JSON (binary values) +are supported. Usage ----- diff --git a/docs/specification.rst b/docs/specification.rst index cdcc72d..63c1a95 100644 --- a/docs/specification.rst +++ b/docs/specification.rst @@ -65,6 +65,12 @@ Type Value consists of: * If value is 23, then it's False * If value is 24, then next what comes is count of bytes, and then bytes follow. This is to be interpreted as a signed integer +* If value is 25, then next comes an unsigned char denoting the length of the bytes, and + the remainder is binary data +* If value is 26, then next comes an unsigned short denoting the length of the bytes, and + the remainder is binary data +* If value is 27, then next comes an unsigned int denoting the length of the bytes, and + the remainder is binary data Coder **should** encode the value as one having the smallest binary representation, but that is not required. Decoder **must** parse any arbitrary valid string. diff --git a/minijson.pyx b/minijson.pyx index b223996..4de6730 100644 --- a/minijson.pyx +++ b/minijson.pyx @@ -132,8 +132,8 @@ cdef inline bint can_be_encoded_as_a_dict(dct): cdef tuple parse_bytes(bytes data, int starting_position): cdef: int value_type - int string_length, elements, i, offset, length - unsigned int uint32 + int string_length, elements, i, offset + unsigned int uint32, length int sint32 unsigned short uint16 short sint16 @@ -264,6 +264,18 @@ cdef tuple parse_bytes(bytes data, int starting_position): length = data[starting_position+1] byte_data = data[starting_position+2:starting_position+2+length] return length+2, int.from_bytes(byte_data, 'big', signed=True) + elif value_type == 25: + length = data[starting_position+1] + byte_data = data[starting_position+2:starting_position+2+length] + return length+2, byte_data + elif value_type == 26: + length, = STRUCT_H.unpack(data[starting_position+1:starting_position+3]) + byte_data = data[starting_position+3:starting_position+3+length] + return length+3, byte_data + elif value_type == 27: + length, = STRUCT_L.unpack(data[starting_position+1:starting_position+5]) + byte_data = data[starting_position+5:starting_position+5+length] + return length+5, byte_data else: raise DecodingError('Unknown sequence type %s!' % (value_type, )) except (IndexError, struct.error) as e: @@ -371,7 +383,7 @@ cdef class MiniJSONEncoder: """ cdef: str field_name - int length + unsigned int length bytes b_data if data is None: cio.write(b'\x08') @@ -382,6 +394,17 @@ cdef class MiniJSONEncoder: elif data is False: cio.write(b'\x17') return 1 + elif isinstance(data, bytes): + length = len(data) + if length < 256: + cio.write(bytearray([0x19, length])) + cio.write(data) + elif length < 65536: + cio.write(b'\x1A' + struct.pack('>H', length)) + cio.write(data) + else: + cio.write(b'\x1B' + struct.pack('>L', length)) + cio.write(data) elif isinstance(data, str): length = len(data) if length < 128: diff --git a/setup.cfg b/setup.cfg index 22b7cb1..664a727 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ # coding: utf-8 [metadata] -version = 2.6a1 +version = 2.6 name = minijson long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8 diff --git a/tests/test_minijson.py b/tests/test_minijson.py index a4e10d5..0fdedcb 100644 --- a/tests/test_minijson.py +++ b/tests/test_minijson.py @@ -1,4 +1,5 @@ import unittest + from minijson import dumps, loads, dumps_object, loads_object, EncodingError, DecodingError, \ switch_default_double, switch_default_float, MiniJSONEncoder @@ -13,6 +14,26 @@ class TestMiniJSON(unittest.TestCase): e = Encoder() e.encode(2+3j) + def test_bytes(self): + a = {b'test': b'dupa'} + e = MiniJSONEncoder() + b = e.encode(a) + self.assertEqual(loads(b), a) + + def test_bytes_26(self): + a = b'x'*256 + e = MiniJSONEncoder() + b = e.encode(a) + self.assertEqual(loads(b), a) + self.assertEqual(len(b), 256+3) + + def test_bytes_27(self): + e = MiniJSONEncoder() + a = b'x'*65537 + b = e.encode(a) + self.assertEqual(loads(b), a) + self.assertEqual(len(b), 65537+5) + def test_encoder_given_default(self): def encode(v): return v.real, v.imag -- GitLab