From c9bb59eca263e8c974890f21e775f18cf26d1507 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl>
Date: Wed, 26 May 2021 13:34:29 +0200
Subject: [PATCH] files added

---
 docs/Makefile           |  20 +++
 docs/conf.py            |  55 ++++++++
 docs/index.rst          |  20 +++
 docs/make.bat           |  35 +++++
 minijson/__init__.py    |   3 +
 minijson/exceptions.pyx |  11 ++
 minijson/routines.pxd   |  11 ++
 minijson/routines.pyx   | 293 ++++++++++++++++++++++++++++++++++++++++
 tests/test_minijson.py  |  43 ++++++
 9 files changed, 491 insertions(+)
 create mode 100644 docs/Makefile
 create mode 100644 docs/conf.py
 create mode 100644 docs/index.rst
 create mode 100644 docs/make.bat
 create mode 100644 minijson/__init__.py
 create mode 100644 minijson/exceptions.pyx
 create mode 100644 minijson/routines.pxd
 create mode 100644 minijson/routines.pyx
 create mode 100644 tests/test_minijson.py

diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d4bb2cb
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..d15125a
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,55 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'MiniJSON'
+copyright = '2021, Piotr MaĹlanka'
+author = 'Piotr MaĹlanka'
+
+# The full version, including alpha/beta/rc tags
+release = '1.0'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..7bb946e
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,20 @@
+.. MiniJSON documentation master file, created by
+   sphinx-quickstart on Wed May 26 13:28:36 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to MiniJSON's documentation!
+====================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..922152e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/minijson/__init__.py b/minijson/__init__.py
new file mode 100644
index 0000000..709fcab
--- /dev/null
+++ b/minijson/__init__.py
@@ -0,0 +1,3 @@
+from .routines import dumps, loads, switch_default_double, switch_default_float, \
+    dumps_object, loads_object
+from .exceptions import MiniJSONError, EncodingError, DecodingError
diff --git a/minijson/exceptions.pyx b/minijson/exceptions.pyx
new file mode 100644
index 0000000..0a2baac
--- /dev/null
+++ b/minijson/exceptions.pyx
@@ -0,0 +1,11 @@
+class MiniJSONError(ValueError):
+    """Base class for MiniJSON errors"""
+    pass
+
+class EncodingError(MiniJSONError):
+    """Error during encoding"""
+    pass
+
+class DecodingError(MiniJSONError):
+    """Error during decoding"""
+    pass
diff --git a/minijson/routines.pxd b/minijson/routines.pxd
new file mode 100644
index 0000000..ceeb54f
--- /dev/null
+++ b/minijson/routines.pxd
@@ -0,0 +1,11 @@
+import io
+
+cpdef object loads(bytes data)
+cpdef int dump(object data, cio: io.BytesIO) except -1
+cpdef bytes dumps(object data)
+cpdef tuple parse(bytes data, int starting_position)
+cpdef void switch_default_float()
+cpdef void switch_default_double()
+
+cpdef bytes dumps_object(object data)
+cpdef object loads_object(bytes data, object obj_class)
diff --git a/minijson/routines.pyx b/minijson/routines.pyx
new file mode 100644
index 0000000..73f8fab
--- /dev/null
+++ b/minijson/routines.pyx
@@ -0,0 +1,293 @@
+import typing as tp
+import io
+import struct
+
+from minijson.exceptions import DecodingError, EncodingError
+
+STRUCT_f = struct.Struct('>f')
+STRUCT_d = struct.Struct('>d')
+STRUCT_b = struct.Struct('>b')
+STRUCT_h = struct.Struct('>h')
+STRUCT_H = struct.Struct('>H')
+STRUCT_l = struct.Struct('>l')
+STRUCT_L = struct.Struct('>L')
+
+cdef int coding_mode = 0     # 0 for default FLOAT
+                             # 1 for default DOUBLE
+
+cpdef void switch_default_float():
+    """
+    Set default encoding of floats to IEEE 754 single
+    """
+    global coding_mode
+    coding_mode = 0
+
+cpdef void switch_default_double():
+    """
+    Set default encoding of floats to IEEE 754 double
+    """
+    global coding_mode
+    coding_mode = 1
+
+cdef inline tuple parse_cstring(bytes data, int starting_position):
+    cdef:
+        int strlen = data[starting_position]
+        bytes subdata = data[starting_position+1:starting_position+1+strlen]
+    return strlen+1, subdata
+
+cpdef tuple parse(bytes data, int starting_position):
+    """
+    Parse given stream of data starting at a position
+    and return a tuple of (how many bytes does this piece of data take, the piece of data itself)
+    
+    :param data: stream of bytes to examine 
+    :param starting_position: first position in the bytestring at which to look
+    :return: a tuple of (how many bytes does this piece of data take, the piece of data itself)
+    :rtype: tp.Tuple[int, tp.Any]
+    """
+    cdef:
+        int value_type = data[starting_position]
+        int string_length
+        unsigned int uint32
+        int sint32
+        unsigned short uint16
+        short sint16
+        unsigned char uint8
+        char sint8
+        list e_list
+        dict e_dict
+        int elements, i, offset, length
+        bytes b_field_name
+        str s_field_name
+    if value_type & 0x80:
+        string_length = value_type & 0x7F
+        try:
+            return string_length+1, data[starting_position+1:starting_position+string_length+1].decode('utf-8')
+        except UnicodeDecodeError as e:
+            raise DecodingError('Invalid UTF-8') from e
+    elif value_type & 0xF0 == 0b01000000:
+        list_length = value_type & 0xF
+        offset = 1
+        e_list = []
+        for i in range(elements):
+            length, elem = parse(data, starting_position+offset)
+            offset += length
+            e_list.append(elem)
+        return offset, e_list
+    elif value_type & 0xF0 == 0b01010000:
+        e_dict = {}
+        offset = 1
+        elements = value_type & 0xF
+        for i in range(elements):
+            length, b_field_name = parse_cstring(data, starting_position+offset)
+            s_field_name = b_field_name.decode('utf-8')
+            offset += length
+            length, elem = parse(data, starting_position+offset)
+            offset += length
+            e_dict[s_field_name] = elem
+        return offset, e_dict
+    elif value_type == 0:
+        string_length = data[starting_position+1]
+        offset, b_field_name = parse_cstring(data, starting_position+1)
+        try:
+            return offset+1, b_field_name.decode('utf-8')
+        except UnicodeDecodeError as e:
+            raise DecodingError('Invalid UTF-8') from e
+    elif value_type in (1, 4):
+        uint32 = (data[starting_position+1] << 24) | (data[starting_position+2] << 16) | (data[starting_position+3] << 8) | data[starting_position+4]
+        if value_type == 4:
+            return 5, uint32
+        else:
+            sint32 = uint32
+            return 5, sint32
+    elif value_type in (2, 5):
+        uint16 = (data[starting_position+1] << 8) | data[starting_position+2]
+        if value_type == 5:
+            return 3, uint16
+        else:
+            sint16 = uint16
+            return 3, sint16
+    elif value_type in (3, 6):
+        uint8 = data[starting_position+1]
+        if value_type == 6:
+            return 2, uint8
+        else:
+            sint8 = uint8
+            return 2, sint8
+    elif value_type == 7:
+        elements = data[starting_position+1]
+        e_list = []
+        offset = 2
+        for i in range(elements):
+            length, elem = parse(data, starting_position+offset)
+            offset += length
+            e_list.append(elem)
+        return e_list
+    elif value_type == 8:
+        return 1, None
+    elif value_type == 9:
+        return 5, *STRUCT_f.unpack(data[starting_position+1:starting_position+5])
+    elif value_type == 10:
+        return 9, *STRUCT_d.unpack(data[starting_position+1:starting_position+9])
+    elif value_type == 12:
+        uint32 = (data[starting_position+1] << 16) | (data[starting_position+2] << 8) | data[starting_position+3]
+        return 4, uint32
+    elif value_type == 11:
+        elements = data[starting_position+1]
+        e_dict = {}
+        offset = 2
+
+        for i in range(elements):
+            length, b_field_name = parse_cstring(data, starting_position+offset)
+            s_field_name = b_field_name.decode('utf-8')
+            offset += length
+            length, elem = parse(data, starting_position+offset)
+
+            offset += length
+            e_dict[s_field_name] = elem
+        return offset, e_dict
+    raise DecodingError(f'Unknown sequence type {value_type}!')
+
+
+cpdef object loads(bytes data):
+    """
+    Reconstruct given JSON from a given value
+
+    :param data: MiniJSON value to reconstruct it from
+    :return: return value
+    :raises DecodingError: something was wrong with the stream
+    """
+    return parse(data, 0)[1]
+
+
+cpdef int dump(object data, cio: io.BytesIO) except -1:
+    """
+    Write an object to a stream
+
+    :param data: object to write
+    :param cio: stream to write to
+    :return: bytes written
+    """
+    cdef:
+        str field_name
+        int length
+    if data is None:
+        cio.write(b'\x08')
+        return 1
+    elif isinstance(data, str):
+        length = len(data)
+        if length > 255:
+            raise EncodingError('Cannot encode string longer than 255 characters')
+        if length < 128:
+            cio.write(bytearray([0x80 | length]))
+            cio.write(data.encode('utf-8'))
+            return 1+length
+        else:
+            cio.write(bytearray([0, length]))
+            cio.write(data.encode('utf-8'))
+            return 2+length
+    elif isinstance(data, int):
+        if -128 <= data <= 127: # signed char, type 3
+            cio.write(b'\x03')
+            cio.write(STRUCT_b.pack(data))
+            return 2
+        elif 0 <= data <= 255:  # unsigned char, type 6
+            cio.write(bytearray([6, data]))
+            return 2
+        elif -32768 <= data <= 32767:   # signed short, type 2
+            cio.write(b'\x02')
+            cio.write(STRUCT_h.pack(data))
+            return 3
+        elif 0 <= data <= 65535:        # unsigned short, type 5
+            cio.write(b'\x05')
+            cio.write(STRUCT_H.pack(data))
+            return 3
+        elif -2147483648 <= data <= 2147483647:     # signed int, type 1
+            cio.write(b'\x01')
+            cio.write(STRUCT_l.pack(data))
+            return 5
+        elif 0 <= data <= 0xFFFFFF:         # unsigned 3byte, type 12
+            cio.write(b'\x0C')
+            cio.write(STRUCT_L.pack(data)[1:])
+            return 4
+        elif 0 <= data <= 0xFFFFFFFF:       # unsigned int, type 6
+            cio.write(b'\x06')
+            cio.write(STRUCT_L.pack(data))
+            return 5
+        else:
+            raise EncodingError(f'Too large integer {data}')
+    elif isinstance(data, float):
+        if coding_mode == 0:
+            cio.write(b'\x09')
+            cio.write(STRUCT_f.pack(data))
+            return 5
+        else:
+            cio.write(b'\x0A')
+            cio.write(STRUCT_d.pack(data))
+            return 9
+    elif isinstance(data, (tuple, list)):
+        length = len(data)
+        if length > 255:
+            raise EncodingError('Too long of a list, maximum list length is 255')
+        if length < 16:
+            cio.write(bytearray([0b01000000 | length]))
+            length = 1
+        else:
+            cio.write(bytearray([7, length]))
+            length = 2
+        for elem in data:
+            length += dump(elem, cio)
+        return length
+    elif isinstance(data, dict):
+        length = len(data)
+        if length > 255:
+            raise EncodingError('Too long of a dict, maximum dict length is 255')
+        if length < 16:
+            cio.write(bytearray([0b01010000 | length]))
+            length = 1
+        else:
+            cio.write(bytearray([11, len(data)]))
+            length = 2
+        for field_name, elem in data.items():
+            cio.write(bytearray([len(field_name)]))
+            cio.write(field_name.encode('utf-8'))
+            length += dump(elem, cio)
+        return length
+    else:
+        raise EncodingError(f'Unknown value type {data}')
+
+cpdef bytes dumps(object data):
+    """
+    Serialize given data to a MiniJSON representation
+
+    :param data: data to serialize
+    :return: return MiniJSON representation
+    :raises DecodingError: object not serializable
+    """
+    cio = io.BytesIO()
+    dump(data, cio)
+    return cio.getvalue()
+
+
+cpdef bytes dumps_object(object data):
+    """
+    Dump an object's __dict__
+    
+    :param data: object to dump 
+    :return: resulting bytes
+    :raises EncodingError: encoding error
+    """
+    return dumps(data.__dict__)
+
+cpdef object loads_object(bytes data, object obj_class):
+    """
+    Load a dict from a bytestream, unserialize it and use it as a kwargs to instantiate
+    an object of given class
+    
+    :param data: data to unserialized 
+    :param obj_class: class to instantiate
+    :return: instance of obj_class
+    :raises DecodingError: decoding error
+    """
+    cdef dict kwargs = loads(data)
+    return obj_class(**kwargs)
diff --git a/tests/test_minijson.py b/tests/test_minijson.py
new file mode 100644
index 0000000..efc64d5
--- /dev/null
+++ b/tests/test_minijson.py
@@ -0,0 +1,43 @@
+import unittest
+from minijson import dumps, loads, dumps_object, loads_object, EncodingError, DecodingError
+
+
+class TestMiniJSON(unittest.TestCase):
+    def test_exceptions(self):
+        a = {}
+        for i in range(65535):
+            a[i] = i*2
+        self.assertRaises(EncodingError, lambda: dumps(a))
+        a = []
+        for i in range(65535):
+            a.append(i)
+        self.assertRaises(EncodingError, lambda: dumps(a))
+
+    def test_dumps(self):
+        v = {"name": "land", "operator_id": "dupa", "parameters":
+            {"lat": 45.22999954223633, "lon": 54.79999923706055, "alt": 234}}
+        b = dumps(v)
+        print(f'Serialized {b}')
+        c = loads(b)
+        print(f'Recovered {c}')
+        self.assertEqual(v, c)
+
+    def test_loads_exception(self):
+        b = b'\x1A'
+        self.assertRaises(DecodingError, lambda: loads(b))
+
+    def test_loads(self):
+        a = loads(b'\x0B\x03\x04name\x84land\x0Boperator_id\x84dupa\x0Aparameters\x0B\x03\x03lat\x09B4\xeb\x85\x03lon\x09B[33\x03alt\x09Cj\x00\x00')
+        self.assertEqual(a, {"name": "land", "operator_id": "dupa", "parameters":
+            {"lat": 45.22999954223633, "lon": 54.79999923706055, "alt": 234}})
+
+    def test_dumps_loads_object(self):
+        class Test:
+            def __init__(self, a):
+                self.a = a
+
+        a = Test(2)
+        b = dumps_object(a)
+        c = loads_object(b, Test)
+        self.assertEqual(a.a, c.a)
+        self.assertIsInstance(c, Test)
-- 
GitLab