From c9bb59eca263e8c974890f21e775f18cf26d1507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl> Date: Wed, 26 May 2021 13:34:29 +0200 Subject: [PATCH] files added --- docs/Makefile | 20 +++ docs/conf.py | 55 ++++++++ docs/index.rst | 20 +++ docs/make.bat | 35 +++++ minijson/__init__.py | 3 + minijson/exceptions.pyx | 11 ++ minijson/routines.pxd | 11 ++ minijson/routines.pyx | 293 ++++++++++++++++++++++++++++++++++++++++ tests/test_minijson.py | 43 ++++++ 9 files changed, 491 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 minijson/__init__.py create mode 100644 minijson/exceptions.pyx create mode 100644 minijson/routines.pxd create mode 100644 minijson/routines.pyx create mode 100644 tests/test_minijson.py diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..d15125a --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,55 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'MiniJSON' +copyright = '2021, Piotr MaĹlanka' +author = 'Piotr MaĹlanka' + +# The full version, including alpha/beta/rc tags +release = '1.0' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..7bb946e --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,20 @@ +.. MiniJSON documentation master file, created by + sphinx-quickstart on Wed May 26 13:28:36 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to MiniJSON's documentation! +==================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..922152e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/minijson/__init__.py b/minijson/__init__.py new file mode 100644 index 0000000..709fcab --- /dev/null +++ b/minijson/__init__.py @@ -0,0 +1,3 @@ +from .routines import dumps, loads, switch_default_double, switch_default_float, \ + dumps_object, loads_object +from .exceptions import MiniJSONError, EncodingError, DecodingError diff --git a/minijson/exceptions.pyx b/minijson/exceptions.pyx new file mode 100644 index 0000000..0a2baac --- /dev/null +++ b/minijson/exceptions.pyx @@ -0,0 +1,11 @@ +class MiniJSONError(ValueError): + """Base class for MiniJSON errors""" + pass + +class EncodingError(MiniJSONError): + """Error during encoding""" + pass + +class DecodingError(MiniJSONError): + """Error during decoding""" + pass diff --git a/minijson/routines.pxd b/minijson/routines.pxd new file mode 100644 index 0000000..ceeb54f --- /dev/null +++ b/minijson/routines.pxd @@ -0,0 +1,11 @@ +import io + +cpdef object loads(bytes data) +cpdef int dump(object data, cio: io.BytesIO) except -1 +cpdef bytes dumps(object data) +cpdef tuple parse(bytes data, int starting_position) +cpdef void switch_default_float() +cpdef void switch_default_double() + +cpdef bytes dumps_object(object data) +cpdef object loads_object(bytes data, object obj_class) diff --git a/minijson/routines.pyx b/minijson/routines.pyx new file mode 100644 index 0000000..73f8fab --- /dev/null +++ b/minijson/routines.pyx @@ -0,0 +1,293 @@ +import typing as tp +import io +import struct + +from minijson.exceptions import DecodingError, EncodingError + +STRUCT_f = struct.Struct('>f') +STRUCT_d = struct.Struct('>d') +STRUCT_b = struct.Struct('>b') +STRUCT_h = struct.Struct('>h') +STRUCT_H = struct.Struct('>H') +STRUCT_l = struct.Struct('>l') +STRUCT_L = struct.Struct('>L') + +cdef int coding_mode = 0 # 0 for default FLOAT + # 1 for default DOUBLE + +cpdef void switch_default_float(): + """ + Set default encoding of floats to IEEE 754 single + """ + global coding_mode + coding_mode = 0 + +cpdef void switch_default_double(): + """ + Set default encoding of floats to IEEE 754 double + """ + global coding_mode + coding_mode = 1 + +cdef inline tuple parse_cstring(bytes data, int starting_position): + cdef: + int strlen = data[starting_position] + bytes subdata = data[starting_position+1:starting_position+1+strlen] + return strlen+1, subdata + +cpdef tuple parse(bytes data, int starting_position): + """ + Parse given stream of data starting at a position + and return a tuple of (how many bytes does this piece of data take, the piece of data itself) + + :param data: stream of bytes to examine + :param starting_position: first position in the bytestring at which to look + :return: a tuple of (how many bytes does this piece of data take, the piece of data itself) + :rtype: tp.Tuple[int, tp.Any] + """ + cdef: + int value_type = data[starting_position] + int string_length + unsigned int uint32 + int sint32 + unsigned short uint16 + short sint16 + unsigned char uint8 + char sint8 + list e_list + dict e_dict + int elements, i, offset, length + bytes b_field_name + str s_field_name + if value_type & 0x80: + string_length = value_type & 0x7F + try: + return string_length+1, data[starting_position+1:starting_position+string_length+1].decode('utf-8') + except UnicodeDecodeError as e: + raise DecodingError('Invalid UTF-8') from e + elif value_type & 0xF0 == 0b01000000: + list_length = value_type & 0xF + offset = 1 + e_list = [] + for i in range(elements): + length, elem = parse(data, starting_position+offset) + offset += length + e_list.append(elem) + return offset, e_list + elif value_type & 0xF0 == 0b01010000: + e_dict = {} + offset = 1 + elements = value_type & 0xF + for i in range(elements): + length, b_field_name = parse_cstring(data, starting_position+offset) + s_field_name = b_field_name.decode('utf-8') + offset += length + length, elem = parse(data, starting_position+offset) + offset += length + e_dict[s_field_name] = elem + return offset, e_dict + elif value_type == 0: + string_length = data[starting_position+1] + offset, b_field_name = parse_cstring(data, starting_position+1) + try: + return offset+1, b_field_name.decode('utf-8') + except UnicodeDecodeError as e: + raise DecodingError('Invalid UTF-8') from e + elif value_type in (1, 4): + uint32 = (data[starting_position+1] << 24) | (data[starting_position+2] << 16) | (data[starting_position+3] << 8) | data[starting_position+4] + if value_type == 4: + return 5, uint32 + else: + sint32 = uint32 + return 5, sint32 + elif value_type in (2, 5): + uint16 = (data[starting_position+1] << 8) | data[starting_position+2] + if value_type == 5: + return 3, uint16 + else: + sint16 = uint16 + return 3, sint16 + elif value_type in (3, 6): + uint8 = data[starting_position+1] + if value_type == 6: + return 2, uint8 + else: + sint8 = uint8 + return 2, sint8 + elif value_type == 7: + elements = data[starting_position+1] + e_list = [] + offset = 2 + for i in range(elements): + length, elem = parse(data, starting_position+offset) + offset += length + e_list.append(elem) + return e_list + elif value_type == 8: + return 1, None + elif value_type == 9: + return 5, *STRUCT_f.unpack(data[starting_position+1:starting_position+5]) + elif value_type == 10: + return 9, *STRUCT_d.unpack(data[starting_position+1:starting_position+9]) + elif value_type == 12: + uint32 = (data[starting_position+1] << 16) | (data[starting_position+2] << 8) | data[starting_position+3] + return 4, uint32 + elif value_type == 11: + elements = data[starting_position+1] + e_dict = {} + offset = 2 + + for i in range(elements): + length, b_field_name = parse_cstring(data, starting_position+offset) + s_field_name = b_field_name.decode('utf-8') + offset += length + length, elem = parse(data, starting_position+offset) + + offset += length + e_dict[s_field_name] = elem + return offset, e_dict + raise DecodingError(f'Unknown sequence type {value_type}!') + + +cpdef object loads(bytes data): + """ + Reconstruct given JSON from a given value + + :param data: MiniJSON value to reconstruct it from + :return: return value + :raises DecodingError: something was wrong with the stream + """ + return parse(data, 0)[1] + + +cpdef int dump(object data, cio: io.BytesIO) except -1: + """ + Write an object to a stream + + :param data: object to write + :param cio: stream to write to + :return: bytes written + """ + cdef: + str field_name + int length + if data is None: + cio.write(b'\x08') + return 1 + elif isinstance(data, str): + length = len(data) + if length > 255: + raise EncodingError('Cannot encode string longer than 255 characters') + if length < 128: + cio.write(bytearray([0x80 | length])) + cio.write(data.encode('utf-8')) + return 1+length + else: + cio.write(bytearray([0, length])) + cio.write(data.encode('utf-8')) + return 2+length + elif isinstance(data, int): + if -128 <= data <= 127: # signed char, type 3 + cio.write(b'\x03') + cio.write(STRUCT_b.pack(data)) + return 2 + elif 0 <= data <= 255: # unsigned char, type 6 + cio.write(bytearray([6, data])) + return 2 + elif -32768 <= data <= 32767: # signed short, type 2 + cio.write(b'\x02') + cio.write(STRUCT_h.pack(data)) + return 3 + elif 0 <= data <= 65535: # unsigned short, type 5 + cio.write(b'\x05') + cio.write(STRUCT_H.pack(data)) + return 3 + elif -2147483648 <= data <= 2147483647: # signed int, type 1 + cio.write(b'\x01') + cio.write(STRUCT_l.pack(data)) + return 5 + elif 0 <= data <= 0xFFFFFF: # unsigned 3byte, type 12 + cio.write(b'\x0C') + cio.write(STRUCT_L.pack(data)[1:]) + return 4 + elif 0 <= data <= 0xFFFFFFFF: # unsigned int, type 6 + cio.write(b'\x06') + cio.write(STRUCT_L.pack(data)) + return 5 + else: + raise EncodingError(f'Too large integer {data}') + elif isinstance(data, float): + if coding_mode == 0: + cio.write(b'\x09') + cio.write(STRUCT_f.pack(data)) + return 5 + else: + cio.write(b'\x0A') + cio.write(STRUCT_d.pack(data)) + return 9 + elif isinstance(data, (tuple, list)): + length = len(data) + if length > 255: + raise EncodingError('Too long of a list, maximum list length is 255') + if length < 16: + cio.write(bytearray([0b01000000 | length])) + length = 1 + else: + cio.write(bytearray([7, length])) + length = 2 + for elem in data: + length += dump(elem, cio) + return length + elif isinstance(data, dict): + length = len(data) + if length > 255: + raise EncodingError('Too long of a dict, maximum dict length is 255') + if length < 16: + cio.write(bytearray([0b01010000 | length])) + length = 1 + else: + cio.write(bytearray([11, len(data)])) + length = 2 + for field_name, elem in data.items(): + cio.write(bytearray([len(field_name)])) + cio.write(field_name.encode('utf-8')) + length += dump(elem, cio) + return length + else: + raise EncodingError(f'Unknown value type {data}') + +cpdef bytes dumps(object data): + """ + Serialize given data to a MiniJSON representation + + :param data: data to serialize + :return: return MiniJSON representation + :raises DecodingError: object not serializable + """ + cio = io.BytesIO() + dump(data, cio) + return cio.getvalue() + + +cpdef bytes dumps_object(object data): + """ + Dump an object's __dict__ + + :param data: object to dump + :return: resulting bytes + :raises EncodingError: encoding error + """ + return dumps(data.__dict__) + +cpdef object loads_object(bytes data, object obj_class): + """ + Load a dict from a bytestream, unserialize it and use it as a kwargs to instantiate + an object of given class + + :param data: data to unserialized + :param obj_class: class to instantiate + :return: instance of obj_class + :raises DecodingError: decoding error + """ + cdef dict kwargs = loads(data) + return obj_class(**kwargs) diff --git a/tests/test_minijson.py b/tests/test_minijson.py new file mode 100644 index 0000000..efc64d5 --- /dev/null +++ b/tests/test_minijson.py @@ -0,0 +1,43 @@ +import unittest +from minijson import dumps, loads, dumps_object, loads_object, EncodingError, DecodingError + + +class TestMiniJSON(unittest.TestCase): + def test_exceptions(self): + a = {} + for i in range(65535): + a[i] = i*2 + self.assertRaises(EncodingError, lambda: dumps(a)) + a = [] + for i in range(65535): + a.append(i) + self.assertRaises(EncodingError, lambda: dumps(a)) + + def test_dumps(self): + v = {"name": "land", "operator_id": "dupa", "parameters": + {"lat": 45.22999954223633, "lon": 54.79999923706055, "alt": 234}} + b = dumps(v) + print(f'Serialized {b}') + c = loads(b) + print(f'Recovered {c}') + self.assertEqual(v, c) + + def test_loads_exception(self): + b = b'\x1A' + self.assertRaises(DecodingError, lambda: loads(b)) + + def test_loads(self): + a = loads(b'\x0B\x03\x04name\x84land\x0Boperator_id\x84dupa\x0Aparameters\x0B\x03\x03lat\x09B4\xeb\x85\x03lon\x09B[33\x03alt\x09Cj\x00\x00') + self.assertEqual(a, {"name": "land", "operator_id": "dupa", "parameters": + {"lat": 45.22999954223633, "lon": 54.79999923706055, "alt": 234}}) + + def test_dumps_loads_object(self): + class Test: + def __init__(self, a): + self.a = a + + a = Test(2) + b = dumps_object(a) + c = loads_object(b, Test) + self.assertEqual(a.a, c.a) + self.assertIsInstance(c, Test) -- GitLab