diff --git a/CHANGELOG.md b/CHANGELOG.md index a706c3f9d1d5b650fe8fc3b3d0e63d90c380a797..fbf5cf84bfceb323a3a71e1e18b3189c6f32f8da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,3 +3,4 @@ here's only the changelog for the version in development # v2.6 +* added support for serializing and unserializing binary values diff --git a/README.md b/README.md index a1fb0f5c0c9e6bab66e62ebbd535a4ad95af672d..7e68f005c9305540af63775f114c441645f97ee2 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ MiniJSON [](https://github.com/Dronehub/minijson) -MiniJSON is a codec for a compact binary representation of JSON. +MiniJSON is a codec for a compact binary representation of a superset of JSON (binary values) +are supported. Usage ----- diff --git a/docs/specification.rst b/docs/specification.rst index cdcc72db2e93b60604c9a6cf7ba50d49d264deeb..63c1a9569c852d9c18995de757b452e1e599ea42 100644 --- a/docs/specification.rst +++ b/docs/specification.rst @@ -65,6 +65,12 @@ Type Value consists of: * If value is 23, then it's False * If value is 24, then next what comes is count of bytes, and then bytes follow. This is to be interpreted as a signed integer +* If value is 25, then next comes an unsigned char denoting the length of the bytes, and + the remainder is binary data +* If value is 26, then next comes an unsigned short denoting the length of the bytes, and + the remainder is binary data +* If value is 27, then next comes an unsigned int denoting the length of the bytes, and + the remainder is binary data Coder **should** encode the value as one having the smallest binary representation, but that is not required. Decoder **must** parse any arbitrary valid string. diff --git a/minijson.pyx b/minijson.pyx index b2239966da71c2c0f03fbd325844e3b1dcda39d5..4de6730829171452e3d54c7ed3603f1c18ce3bb6 100644 --- a/minijson.pyx +++ b/minijson.pyx @@ -132,8 +132,8 @@ cdef inline bint can_be_encoded_as_a_dict(dct): cdef tuple parse_bytes(bytes data, int starting_position): cdef: int value_type - int string_length, elements, i, offset, length - unsigned int uint32 + int string_length, elements, i, offset + unsigned int uint32, length int sint32 unsigned short uint16 short sint16 @@ -264,6 +264,18 @@ cdef tuple parse_bytes(bytes data, int starting_position): length = data[starting_position+1] byte_data = data[starting_position+2:starting_position+2+length] return length+2, int.from_bytes(byte_data, 'big', signed=True) + elif value_type == 25: + length = data[starting_position+1] + byte_data = data[starting_position+2:starting_position+2+length] + return length+2, byte_data + elif value_type == 26: + length, = STRUCT_H.unpack(data[starting_position+1:starting_position+3]) + byte_data = data[starting_position+3:starting_position+3+length] + return length+3, byte_data + elif value_type == 27: + length, = STRUCT_L.unpack(data[starting_position+1:starting_position+5]) + byte_data = data[starting_position+5:starting_position+5+length] + return length+5, byte_data else: raise DecodingError('Unknown sequence type %s!' % (value_type, )) except (IndexError, struct.error) as e: @@ -371,7 +383,7 @@ cdef class MiniJSONEncoder: """ cdef: str field_name - int length + unsigned int length bytes b_data if data is None: cio.write(b'\x08') @@ -382,6 +394,17 @@ cdef class MiniJSONEncoder: elif data is False: cio.write(b'\x17') return 1 + elif isinstance(data, bytes): + length = len(data) + if length < 256: + cio.write(bytearray([0x19, length])) + cio.write(data) + elif length < 65536: + cio.write(b'\x1A' + struct.pack('>H', length)) + cio.write(data) + else: + cio.write(b'\x1B' + struct.pack('>L', length)) + cio.write(data) elif isinstance(data, str): length = len(data) if length < 128: diff --git a/setup.cfg b/setup.cfg index 22b7cb19ee924f7040b45a95d97c794c3a907bbc..664a7275dcf5c40e1ba7025c76ac056e26bf0701 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ # coding: utf-8 [metadata] -version = 2.6a1 +version = 2.6 name = minijson long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8 diff --git a/tests/test_minijson.py b/tests/test_minijson.py index a4e10d58f091201a69b69886d6e721c05f3c4a17..0fdedcb688ddfde92ec0cda47127bf014c6103d1 100644 --- a/tests/test_minijson.py +++ b/tests/test_minijson.py @@ -1,4 +1,5 @@ import unittest + from minijson import dumps, loads, dumps_object, loads_object, EncodingError, DecodingError, \ switch_default_double, switch_default_float, MiniJSONEncoder @@ -13,6 +14,26 @@ class TestMiniJSON(unittest.TestCase): e = Encoder() e.encode(2+3j) + def test_bytes(self): + a = {b'test': b'dupa'} + e = MiniJSONEncoder() + b = e.encode(a) + self.assertEqual(loads(b), a) + + def test_bytes_26(self): + a = b'x'*256 + e = MiniJSONEncoder() + b = e.encode(a) + self.assertEqual(loads(b), a) + self.assertEqual(len(b), 256+3) + + def test_bytes_27(self): + e = MiniJSONEncoder() + a = b'x'*65537 + b = e.encode(a) + self.assertEqual(loads(b), a) + self.assertEqual(len(b), 65537+5) + def test_encoder_given_default(self): def encode(v): return v.real, v.imag