From 9d1c7368a51b83275ae73a32a9be4ee1a7c7fd52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl>
Date: Tue, 15 Jun 2021 20:50:46 +0200
Subject: [PATCH] * added support for storing metadata as minijson     * this
 will be enabled by default is minijson is importable * fixed minor compiler
 warnings

---
 README.md              |  6 +++--
 docs/exceptions.rst    |  2 ++
 requirements.txt       |  1 +
 setup.cfg              | 12 +++++++++
 setup.py               |  7 +----
 tempsdb/database.pxd   |  1 -
 tempsdb/database.pyx   | 20 +++++---------
 tempsdb/exceptions.pyx |  6 +++++
 tempsdb/metadata.pxd   |  8 ++++++
 tempsdb/metadata.pyx   | 60 ++++++++++++++++++++++++++++++++++++++++++
 tempsdb/series.pxd     |  1 +
 tempsdb/series.pyx     | 21 +++++++--------
 tempsdb/varlen.pxd     |  2 +-
 unittest.Dockerfile    |  2 +-
 14 files changed, 114 insertions(+), 35 deletions(-)
 create mode 100644 tempsdb/metadata.pxd
 create mode 100644 tempsdb/metadata.pyx

diff --git a/README.md b/README.md
index f952765..d641c90 100644
--- a/README.md
+++ b/README.md
@@ -52,9 +52,11 @@ Then copy your resulting wheel and install it via pip on the target system.
 
 # Changelog
 
-## v0.5.5
+## v0.6
 
-* _TBA_
+* added support for storing metadata as minijson
+    * this will be enabled by default is minijson is importable
+* fixed minor compiler warnings
 
 ## v0.5.4
 
diff --git a/docs/exceptions.rst b/docs/exceptions.rst
index b8125bf..6001a9f 100644
--- a/docs/exceptions.rst
+++ b/docs/exceptions.rst
@@ -16,3 +16,5 @@ The exceptions that inherit from it are:
 .. autoclass:: tempsdb.exceptions.AlreadyExists
 
 .. autoclass:: tempsdb.exceptions.StillOpen
+
+.. autoclass:: tempsdb.exceptions.EnvironmentError
diff --git a/requirements.txt b/requirements.txt
index 08bdec7..56bdfce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ snakehouse>=1.2.3
 six
 nose2
 coverage
+minijson
diff --git a/setup.cfg b/setup.cfg
index c9fdb14..3fde48a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,7 @@
 # coding: utf-8
 [metadata]
 name = tempsdb
+version = 0.6a2
 long-description = file: README.md
 long-description-content-type = text/markdown; charset=UTF-8
 license_files = LICENSE
@@ -27,6 +28,17 @@ classifier =
     Topic :: Software Development :: Libraries
     Topic :: Database
 
+[options]
+install_requires =
+    satella>=2.17.7
+python_requires = !=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*
+packages = find:
+zip_safe = True
+
+[options.extras_require]
+FasterJSON = ujson
+FasterMetadata = minijson
+
 [pycodestyle]
 max-line-length = 100
 
diff --git a/setup.py b/setup.py
index ad301cd..45a8f88 100644
--- a/setup.py
+++ b/setup.py
@@ -27,12 +27,7 @@ ext_modules = build([Multibuild('tempsdb', find_pyx('tempsdb'),
                      compiler_directives=directives,
                      **cythonize_kwargs)
 
-setup(name='tempsdb',
-      version='0.5.5a1',
-      packages=find_packages(include=['tempsdb', 'tempsdb.*']),
-      install_requires=['satella>=2.14.24', 'ujson'],
+setup(packages=find_packages(include=['tempsdb', 'tempsdb.*']),
       ext_modules=ext_modules,
-      python_requires='!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*',
       test_suite="tests",
-      zip_safe=True
       )
diff --git a/tempsdb/database.pxd b/tempsdb/database.pxd
index f835ad9..03b94be 100644
--- a/tempsdb/database.pxd
+++ b/tempsdb/database.pxd
@@ -1,7 +1,6 @@
 from .series cimport TimeSeries
 from .varlen cimport VarlenSeries
 
-
 cdef class Database:
     cdef:
         readonly str path
diff --git a/tempsdb/database.pyx b/tempsdb/database.pyx
index 22a6f70..97c0e94 100644
--- a/tempsdb/database.pyx
+++ b/tempsdb/database.pyx
@@ -4,12 +4,11 @@ import threading
 import warnings
 
 from satella.coding import DictDeleter
-from satella.json import read_json_from_file, write_json_to_file
 
 from tempsdb.exceptions import DoesNotExist, AlreadyExists, StillOpen
 from .series cimport TimeSeries, create_series
-from .varlen cimport VarlenSeries, create_varlen_series
-
+from .varlen cimport VarlenSeries
+from .metadata cimport read_meta_at, write_meta_at
 
 cdef class Database:
     """
@@ -48,12 +47,7 @@ cdef class Database:
         
         This will change `metadata` attribute.
         """
-        self.metadata = {}
-        if os.path.isfile(os.path.join(self.path, 'metadata.txt')):
-            try:
-                self.metadata = read_json_from_file(os.path.join(self.path, 'metadata.txt')).get('metadata', {})
-            except ValueError:
-                pass
+        self.metadata = read_meta_at(self.path)
         return 0
 
     cpdef int set_metadata(self, dict metadata) except -1:
@@ -64,7 +58,7 @@ cdef class Database:
         
         :param metadata: new metadata to set
         """
-        write_json_to_file(os.path.join(self.path, 'metadata.txt'), {'metadata': metadata})
+        write_meta_at(self.path, metadata)
         self.metadata = metadata
         return 0
 
@@ -316,7 +310,7 @@ cdef class Database:
         """
         Create a new series.
         
-        Note that series cannot be named "varlen" or "metadata.txt"
+        Note that series cannot be named "varlen" or "metadata.txt" or "metadata.minijson"
         
         :param name: name of the series
         :param block_size: size of the data field
@@ -332,8 +326,8 @@ cdef class Database:
         """
         if block_size > page_size + 8:
             raise ValueError('Invalid block size, pick larger page')
-        if name == 'varlen' or name == 'metadata.txt':
-            raise ValueError('Series cannot be named varlen or metadata.txt')
+        if name == 'varlen' or name == 'metadata.txt' or name == 'metadata.minijson':
+            raise ValueError('Series cannot be named varlen or metadata.txt or metadata.minijson')
         if os.path.isdir(os.path.join(self.path, name)):
             raise AlreadyExists('Series already exists')
         cdef TimeSeries series
diff --git a/tempsdb/exceptions.pyx b/tempsdb/exceptions.pyx
index fd213a3..4c3b0f2 100644
--- a/tempsdb/exceptions.pyx
+++ b/tempsdb/exceptions.pyx
@@ -21,3 +21,9 @@ class AlreadyExists(TempsDBError):
 class StillOpen(TempsDBError):
     """This resource has outstanding references and cannot be closed"""
     ...
+
+class EnvironmentError(TempsDBError):
+    """
+    The environment is misconfigured, eg. minijson is required but is not installed
+    """
+    ...
diff --git a/tempsdb/metadata.pxd b/tempsdb/metadata.pxd
new file mode 100644
index 0000000..1e3651f
--- /dev/null
+++ b/tempsdb/metadata.pxd
@@ -0,0 +1,8 @@
+
+cdef enum:
+    MDV_JSON = 0
+    MDV_MINIJSON = 1
+
+
+cdef dict read_meta_at(str path)
+cdef int write_meta_at(str path, dict meta)
diff --git a/tempsdb/metadata.pyx b/tempsdb/metadata.pyx
new file mode 100644
index 0000000..b4aac46
--- /dev/null
+++ b/tempsdb/metadata.pyx
@@ -0,0 +1,60 @@
+import os
+
+from satella.json import read_json_from_file, write_json_to_file
+
+cdef bint minijson_enabled
+
+try:
+    import minijson
+    minijson_enabled = True
+except ImportError:
+    minijson_enabled = False
+
+DEF METADATA_FILE_NAME = 'metadata.txt'
+DEF METADATA_MINIJSON_FILE_NAME = 'metadata.minijson'
+
+
+cdef dict read_meta_at(str path):
+    cdef:
+        bint exists_minijson = os.path.exists(os.path.join(path, METADATA_MINIJSON_FILE_NAME))
+        bint exists_json = os.path.exists(os.path.join(path, METADATA_FILE_NAME))
+        bytes data
+    if exists_minijson:
+        if not minijson_enabled:
+            raise EnvironmentError('minijson required to open this series but not installed')
+        with open(os.path.join(path, METADATA_MINIJSON_FILE_NAME), 'rb') as f_in:
+            data = bytes(f_in.read())
+            return minijson.loads(data)
+    elif exists_json:
+        return read_json_from_file(os.path.join(path, METADATA_FILE_NAME))
+    else:
+        return {}
+
+cdef inline int write_meta_minijson(str path, dict meta):
+    with open(os.path.join(path, METADATA_MINIJSON_FILE_NAME), 'wb') as f_out:
+        f_out.write(minijson.dumps(meta))
+    return 0
+
+cdef inline int write_meta_json(str path, dict meta):
+    write_json_to_file(os.path.join(path, METADATA_FILE_NAME), meta)
+    return 0
+
+cdef int write_meta_at(str path, dict meta):
+    cdef:
+        bint exists_minijson = os.path.exists(os.path.join(path, METADATA_MINIJSON_FILE_NAME))
+        bint exists_json = os.path.exists(os.path.join(path, METADATA_FILE_NAME))
+        bytes data
+    if not exists_minijson and not exists_json:
+        if minijson_enabled:
+            return write_meta_minijson(path, meta)
+        else:
+            return write_meta_json(path, meta)
+    elif exists_minijson and not minijson_enabled:
+        raise EnvironmentError('minijson required to open this series but not installed')
+    elif exists_minijson:
+        return write_meta_minijson(path, meta)
+    elif exists_json:
+        return write_meta_json(path, meta)
+    else:
+        raise EnvironmentError('both metadata files exists!')
+    return 0
diff --git a/tempsdb/series.pxd b/tempsdb/series.pxd
index b91210b..98da3b1 100644
--- a/tempsdb/series.pxd
+++ b/tempsdb/series.pxd
@@ -23,6 +23,7 @@ cdef class TimeSeries:
         Chunk last_chunk
         object mpm      # satella.instrumentation.memory.MemoryPressureManager
 
+
     cdef void register_memory_pressure_manager(self, object mpm)
     cpdef int delete(self) except -1
     cdef dict get_metadata(self)
diff --git a/tempsdb/series.pyx b/tempsdb/series.pyx
index bb8c84d..7fad4e4 100644
--- a/tempsdb/series.pyx
+++ b/tempsdb/series.pyx
@@ -1,21 +1,22 @@
+import os
 import typing as tp
 import shutil
 import threading
 import warnings
 
-from satella.json import write_json_to_file, read_json_from_file
-
 from .chunks.base cimport Chunk
 from .chunks.normal cimport NormalChunk
 from .chunks.direct cimport DirectChunk
 from .chunks.maker cimport create_chunk
 from .exceptions import DoesNotExist, Corruption, InvalidState, AlreadyExists
-import os
+from .metadata cimport read_meta_at, write_meta_at
 
-DEF METADATA_FILE_NAME = 'metadata.txt'
 DEF DEFAULT_PAGE_SIZE=4096
 
 
+cdef set metadata_file_names = {'metadata.txt', 'metadata.minijson'}
+
+
 cdef class TimeSeries:
     """
     A single time series. This maps each timestamp (unsigned long long) to a block of data
@@ -108,15 +109,15 @@ cdef class TimeSeries:
             raise DoesNotExist('Chosen time series does not exist')
 
         cdef:
-            dict metadata
+            dict metadata = read_meta_at(self.path)
             str filename
             list files = os.listdir(self.path)
             unsigned long long last_chunk_name
             bint is_direct
             bint is_gzip
+            bytes meta_d
 
         try:
-            metadata = read_json_from_file(os.path.join(self.path, METADATA_FILE_NAME))
             self.block_size = metadata['block_size']
             self.max_entries_per_chunk = metadata['max_entries_per_chunk']
             self.last_entry_synced = metadata['last_entry_synced']
@@ -142,7 +143,7 @@ cdef class TimeSeries:
             self.last_entry_ts = 0
         else:
             for filename in files:
-                if filename == METADATA_FILE_NAME:
+                if filename in metadata_file_names:
                     continue
                 is_gzip = filename.endswith('.gz')
                 if is_gzip:
@@ -357,9 +358,7 @@ cdef class TimeSeries:
         """
         Write the metadata to disk
         """
-        with self.lock:
-            write_json_to_file(os.path.join(self.path, METADATA_FILE_NAME), self.get_metadata())
-        return 0
+        return write_meta_at(self.path, self.get_metadata())
 
     cpdef int sync(self) except -1:
         """
@@ -534,6 +533,6 @@ cpdef TimeSeries create_series(str path, str name, unsigned int block_size,
         meta['page_size'] = page_size
     if gzip_level:
         meta['gzip_level'] = gzip_level
-    write_json_to_file(os.path.join(path, 'metadata.txt'), meta)
+    write_meta_at(path, meta)
     return TimeSeries(path, name,
                       use_descriptor_based_access=use_descriptor_based_access)
diff --git a/tempsdb/varlen.pxd b/tempsdb/varlen.pxd
index 1b12527..04a1381 100644
--- a/tempsdb/varlen.pxd
+++ b/tempsdb/varlen.pxd
@@ -12,7 +12,7 @@ cdef class VarlenSeries:
         TimeSeries root_series
         readonly list series
         readonly list length_profile
-        readonly int max_entries_per_chunk
+        readonly unsigned int max_entries_per_chunk
         int current_maximum_length
         object mpm
         int gzip_level
diff --git a/unittest.Dockerfile b/unittest.Dockerfile
index 1877bb5..e296bc3 100644
--- a/unittest.Dockerfile
+++ b/unittest.Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.8
 
-RUN pip install satella>=2.14.24 snakehouse>=1.3 nose2 wheel ujson coverage
+RUN pip install satella>=2.14.24 snakehouse>=1.3 nose2 wheel coverage
 
 ADD tempsdb /app/tempsdb
 ADD setup.py /app/setup.py
-- 
GitLab