diff --git a/CHANGELOG.md b/CHANGELOG.md index b73008e56a26b9f4a87bad4ca0388f3d780e6b03..0d9e211d21600b672dbbc81ecec1da41a6e7b708 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,3 +2,5 @@ Changelog is kept at [GitHub](https://github.com/Dronehub/minijson/releases), here's only the changelog for the version in development # v1.3 + +* object keys don't have to be strings anymore diff --git a/docs/index.rst b/docs/index.rst index 6b4552d493802022017b94a297cdb3236bb8e0db..0c4df873eab48403e4b7d00302e43aadc3a649ae 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,13 @@ Welcome to MiniJSON's documentation! usage specification +MiniJSON is a space-aware binary format for representing arbitary JSON. +It's however most efficient when dealing with short (less than 16 elements) lists and objects, +whose all keys are strings. + +You should avoid objects with keys different than strings, since they will always use a +4-byte length field. This is to be improved in a future release. + Indices and tables ================== diff --git a/docs/specification.rst b/docs/specification.rst index 2eb4a9e6c88c29b6c44781a864b23552f5b0a385..f18f4ad3e66241d1e654cad087302b636df1accd 100644 --- a/docs/specification.rst +++ b/docs/specification.rst @@ -1,10 +1,7 @@ MiniJSON specification ====================== -MiniJSON is a binary encoding for a subset of JSON that: - -* has no keys longer than 255 bytes UTF-8 -* all keys are string +MiniJSON is a space-aware binary encoding for a JSON. All data is stored as bigger endian. @@ -56,3 +53,5 @@ with len of (value & 0x7F) and then an object follows of that many elements * If value is 18, then next data is a unsigned int, and then an object follows of that many elements +* If value is 19, then next data is a unsigned int, + and then follow that many pairs of Values (key: value) diff --git a/minijson/routines.pyx b/minijson/routines.pyx index dc015d5ad3490b53ca93eb3c734af0233ae88e9f..166dc98f8d2fb699b01064bc4ed734ddaf3aeea4 100644 --- a/minijson/routines.pyx +++ b/minijson/routines.pyx @@ -81,6 +81,38 @@ cdef inline tuple parse_dict(bytes data, int elem_count, int starting_position): dct[s_field_name] = elem return offset, dct +cdef inline tuple parse_sdict(bytes data, int elem_count, int starting_position): + """ + Parse a sdict (with keys that are not strings) with this many elements + + :param data: data to parse as a list + :param elem_count: count of elements + :param starting_position: starting position + + :return: tuple of (how many bytes were there in the list, the dict itself) + """ + cdef: + dict dct = {} + bytes b_field_name + str s_field_name + int i, ofs, offset = 0 + for i in range(elem_count): + ofs, key = parse(data, starting_position+offset) + offset += ofs + ofs, elem = parse(data, starting_position+offset) + offset += ofs + dct[key] = elem + return offset, dct + + +cdef bint can_be_encoded_as_a_dict(dict dct): + for key, value in dct.items(): + if not isinstance(key, str): + return False + if len(key) > 255: + return False + return True + cpdef tuple parse(bytes data, int starting_position): """ @@ -192,6 +224,10 @@ cpdef tuple parse(bytes data, int starting_position): elements, = STRUCT_L.unpack(data[starting_position+1:starting_position+5]) offset, e_dict = parse_dict(data, elements, starting_position+5) return offset+5, e_dict + elif value_type == 19: + elements, = STRUCT_L.unpack(data[starting_position+1:starting_position+5]) + offset, e_dict = parse_sdict(data, elements, starting_position+5) + return offset+5, e_dict raise DecodingError('Unknown sequence type %s!' % (value_type, )) except IndexError as e: raise DecodingError('String too short!') from e @@ -302,28 +338,37 @@ cpdef int dump(object data, cio: io.BytesIO) except -1: return length elif isinstance(data, dict): length = len(data) - if length < 16: - cio.write(bytearray([0b01010000 | length])) - length = 1 - elif length < 256: - cio.write(bytearray([11, len(data)])) - length = 2 - elif length < 65536: - cio.write(b'\x11') - cio.write(STRUCT_H.pack(length)) - length = 3 - elif length <= 0xFFFFFFFF: - cio.write(b'\x12') + if can_be_encoded_as_a_dict(data): + if length < 16: + cio.write(bytearray([0b01010000 | length])) + length = 1 + elif length < 256: + cio.write(bytearray([11, len(data)])) + length = 2 + elif length < 65536: + cio.write(b'\x11') + cio.write(STRUCT_H.pack(length)) + length = 3 + elif length <= 0xFFFFFFFF: + cio.write(b'\x12') + cio.write(STRUCT_L.pack(length)) + length = 5 + try: + for field_name, elem in data.items(): + cio.write(bytearray([len(field_name)])) + cio.write(field_name.encode('utf-8')) + length += dump(elem, cio) + except TypeError as e: + raise EncodingError('Keys have to be strings!') from e + return length + else: + cio.write(b'\x13') cio.write(STRUCT_L.pack(length)) - length = 5 - try: - for field_name, elem in data.items(): - cio.write(bytearray([len(field_name)])) - cio.write(field_name.encode('utf-8')) - length += dump(elem, cio) - except TypeError as e: - raise EncodingError('Keys have to be strings!') from e - return length + offset = 5 + for key, value in data.items(): + offset += dump(key, cio) + offset += dump(value, cio) + return offset else: raise EncodingError('Unknown value type %s' % (data, )) diff --git a/setup.py b/setup.py index ed1ad9c549406606bbe9ac4d4d7e92f019e70723..16efff6fd109f3e4e3ffb53a4d84653e0fcf82da 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ if 'DEBUG' in os.environ: directives['embedsignature'] = True -setup(version='1.3a1', +setup(version='1.3', packages=find_packages(include=['minijson', 'minijson.*']), ext_modules=build([Multibuild('minijson', find_pyx('minijson'), dont_snakehouse=dont_snakehouse), ], diff --git a/tests/test_minijson.py b/tests/test_minijson.py index e485af7d5e44dc45541c9c2d5f116802ba659c84..0b2caccc72ed74682d591fcbc244b813043c4099 100644 --- a/tests/test_minijson.py +++ b/tests/test_minijson.py @@ -34,6 +34,12 @@ class TestMiniJSON(unittest.TestCase): a[str(i)] = i self.assertSameAfterDumpsAndLoads(a) + def test_dicts_not_string_keys(self): + a = {} + for i in range(17): + a[i] = i + self.assertSameAfterDumpsAndLoads(a) + def test_long_dicts_and_lists(self): a = {} for i in range(65535): @@ -60,7 +66,6 @@ class TestMiniJSON(unittest.TestCase): def test_loads_exception(self): b = b'\x1F' self.assertRaises(DecodingError, lambda: loads(b)) - self.assertRaises(EncodingError, lambda: dumps({1: 2})) def test_loads(self): a = loads(b'\x0B\x03\x04name\x84land\x0Boperator_id\x84dupa\x0Aparameters\x0B\x03\x03lat\x09B4\xeb\x85\x03lon\x09B[33\x03alt\x09Cj\x00\x00')