diff --git a/README.md b/README.md index f9527652d7c330619a5cb29d095c2568cd854d07..d641c902c95c3d0d322a765215a7f14212248d88 100644 --- a/README.md +++ b/README.md @@ -52,9 +52,11 @@ Then copy your resulting wheel and install it via pip on the target system. # Changelog -## v0.5.5 +## v0.6 -* _TBA_ +* added support for storing metadata as minijson + * this will be enabled by default is minijson is importable +* fixed minor compiler warnings ## v0.5.4 diff --git a/docs/exceptions.rst b/docs/exceptions.rst index b8125bfbeb01f2ed90bc95a3d086dd31af840f2c..6001a9fa2ed4e2c8c14076f4e62e26323bc6c717 100644 --- a/docs/exceptions.rst +++ b/docs/exceptions.rst @@ -16,3 +16,5 @@ The exceptions that inherit from it are: .. autoclass:: tempsdb.exceptions.AlreadyExists .. autoclass:: tempsdb.exceptions.StillOpen + +.. autoclass:: tempsdb.exceptions.EnvironmentError diff --git a/requirements.txt b/requirements.txt index 08bdec7a22be249f116398bc7820e950927cb8f8..56bdfce54f121eac01fea056832d1d76d4832e83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ snakehouse>=1.2.3 six nose2 coverage +minijson diff --git a/setup.cfg b/setup.cfg index c9fdb14cfd3e327a13a6f5620f583084ad59ea31..3fde48a995a33e7366d5d911ccc28b274fbbd4d0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,7 @@ # coding: utf-8 [metadata] name = tempsdb +version = 0.6a2 long-description = file: README.md long-description-content-type = text/markdown; charset=UTF-8 license_files = LICENSE @@ -27,6 +28,17 @@ classifier = Topic :: Software Development :: Libraries Topic :: Database +[options] +install_requires = + satella>=2.17.7 +python_requires = !=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.* +packages = find: +zip_safe = True + +[options.extras_require] +FasterJSON = ujson +FasterMetadata = minijson + [pycodestyle] max-line-length = 100 diff --git a/setup.py b/setup.py index ad301cd357f40b6919eaaf1f6e316f1b1b6f8d25..45a8f8831cb4b214fba64df102337f98c2e8e287 100644 --- a/setup.py +++ b/setup.py @@ -27,12 +27,7 @@ ext_modules = build([Multibuild('tempsdb', find_pyx('tempsdb'), compiler_directives=directives, **cythonize_kwargs) -setup(name='tempsdb', - version='0.5.5a1', - packages=find_packages(include=['tempsdb', 'tempsdb.*']), - install_requires=['satella>=2.14.24', 'ujson'], +setup(packages=find_packages(include=['tempsdb', 'tempsdb.*']), ext_modules=ext_modules, - python_requires='!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*', test_suite="tests", - zip_safe=True ) diff --git a/tempsdb/database.pxd b/tempsdb/database.pxd index f835ad92adec06a30a795bea29d81de4b1c79aa7..03b94beec0d87881a84c6b748f8daac218a4e757 100644 --- a/tempsdb/database.pxd +++ b/tempsdb/database.pxd @@ -1,7 +1,6 @@ from .series cimport TimeSeries from .varlen cimport VarlenSeries - cdef class Database: cdef: readonly str path diff --git a/tempsdb/database.pyx b/tempsdb/database.pyx index 22a6f70ca916fe9123c8bbe8025f42e9e630e809..97c0e94748240347822b1ff571a5b6caf2302d0b 100644 --- a/tempsdb/database.pyx +++ b/tempsdb/database.pyx @@ -4,12 +4,11 @@ import threading import warnings from satella.coding import DictDeleter -from satella.json import read_json_from_file, write_json_to_file from tempsdb.exceptions import DoesNotExist, AlreadyExists, StillOpen from .series cimport TimeSeries, create_series -from .varlen cimport VarlenSeries, create_varlen_series - +from .varlen cimport VarlenSeries +from .metadata cimport read_meta_at, write_meta_at cdef class Database: """ @@ -48,12 +47,7 @@ cdef class Database: This will change `metadata` attribute. """ - self.metadata = {} - if os.path.isfile(os.path.join(self.path, 'metadata.txt')): - try: - self.metadata = read_json_from_file(os.path.join(self.path, 'metadata.txt')).get('metadata', {}) - except ValueError: - pass + self.metadata = read_meta_at(self.path) return 0 cpdef int set_metadata(self, dict metadata) except -1: @@ -64,7 +58,7 @@ cdef class Database: :param metadata: new metadata to set """ - write_json_to_file(os.path.join(self.path, 'metadata.txt'), {'metadata': metadata}) + write_meta_at(self.path, metadata) self.metadata = metadata return 0 @@ -316,7 +310,7 @@ cdef class Database: """ Create a new series. - Note that series cannot be named "varlen" or "metadata.txt" + Note that series cannot be named "varlen" or "metadata.txt" or "metadata.minijson" :param name: name of the series :param block_size: size of the data field @@ -332,8 +326,8 @@ cdef class Database: """ if block_size > page_size + 8: raise ValueError('Invalid block size, pick larger page') - if name == 'varlen' or name == 'metadata.txt': - raise ValueError('Series cannot be named varlen or metadata.txt') + if name == 'varlen' or name == 'metadata.txt' or name == 'metadata.minijson': + raise ValueError('Series cannot be named varlen or metadata.txt or metadata.minijson') if os.path.isdir(os.path.join(self.path, name)): raise AlreadyExists('Series already exists') cdef TimeSeries series diff --git a/tempsdb/exceptions.pyx b/tempsdb/exceptions.pyx index fd213a324d9bb927e9a80ffd9742411b37f41b79..4c3b0f222eb8599fa4a06f6dbb6bbcb12d98d15e 100644 --- a/tempsdb/exceptions.pyx +++ b/tempsdb/exceptions.pyx @@ -21,3 +21,9 @@ class AlreadyExists(TempsDBError): class StillOpen(TempsDBError): """This resource has outstanding references and cannot be closed""" ... + +class EnvironmentError(TempsDBError): + """ + The environment is misconfigured, eg. minijson is required but is not installed + """ + ... diff --git a/tempsdb/metadata.pxd b/tempsdb/metadata.pxd new file mode 100644 index 0000000000000000000000000000000000000000..1e3651f9b2d7ead0ee92b02c1ac60b50407ba1b1 --- /dev/null +++ b/tempsdb/metadata.pxd @@ -0,0 +1,8 @@ + +cdef enum: + MDV_JSON = 0 + MDV_MINIJSON = 1 + + +cdef dict read_meta_at(str path) +cdef int write_meta_at(str path, dict meta) diff --git a/tempsdb/metadata.pyx b/tempsdb/metadata.pyx new file mode 100644 index 0000000000000000000000000000000000000000..b4aac46836eba34093468e76120ed124484733da --- /dev/null +++ b/tempsdb/metadata.pyx @@ -0,0 +1,60 @@ +import os + +from satella.json import read_json_from_file, write_json_to_file + +cdef bint minijson_enabled + +try: + import minijson + minijson_enabled = True +except ImportError: + minijson_enabled = False + +DEF METADATA_FILE_NAME = 'metadata.txt' +DEF METADATA_MINIJSON_FILE_NAME = 'metadata.minijson' + + +cdef dict read_meta_at(str path): + cdef: + bint exists_minijson = os.path.exists(os.path.join(path, METADATA_MINIJSON_FILE_NAME)) + bint exists_json = os.path.exists(os.path.join(path, METADATA_FILE_NAME)) + bytes data + if exists_minijson: + if not minijson_enabled: + raise EnvironmentError('minijson required to open this series but not installed') + with open(os.path.join(path, METADATA_MINIJSON_FILE_NAME), 'rb') as f_in: + data = bytes(f_in.read()) + return minijson.loads(data) + elif exists_json: + return read_json_from_file(os.path.join(path, METADATA_FILE_NAME)) + else: + return {} + +cdef inline int write_meta_minijson(str path, dict meta): + with open(os.path.join(path, METADATA_MINIJSON_FILE_NAME), 'wb') as f_out: + f_out.write(minijson.dumps(meta)) + return 0 + +cdef inline int write_meta_json(str path, dict meta): + write_json_to_file(os.path.join(path, METADATA_FILE_NAME), meta) + return 0 + +cdef int write_meta_at(str path, dict meta): + cdef: + bint exists_minijson = os.path.exists(os.path.join(path, METADATA_MINIJSON_FILE_NAME)) + bint exists_json = os.path.exists(os.path.join(path, METADATA_FILE_NAME)) + bytes data + if not exists_minijson and not exists_json: + if minijson_enabled: + return write_meta_minijson(path, meta) + else: + return write_meta_json(path, meta) + elif exists_minijson and not minijson_enabled: + raise EnvironmentError('minijson required to open this series but not installed') + elif exists_minijson: + return write_meta_minijson(path, meta) + elif exists_json: + return write_meta_json(path, meta) + else: + raise EnvironmentError('both metadata files exists!') + return 0 diff --git a/tempsdb/series.pxd b/tempsdb/series.pxd index b91210b28b1a0ad548188012c7e1adc5130e9696..98da3b16bd2f443e9bd29042d38cdf29570442df 100644 --- a/tempsdb/series.pxd +++ b/tempsdb/series.pxd @@ -23,6 +23,7 @@ cdef class TimeSeries: Chunk last_chunk object mpm # satella.instrumentation.memory.MemoryPressureManager + cdef void register_memory_pressure_manager(self, object mpm) cpdef int delete(self) except -1 cdef dict get_metadata(self) diff --git a/tempsdb/series.pyx b/tempsdb/series.pyx index bb8c84d69c56d0bb629283bcabc4b329f13d9096..7fad4e459d9aaf528420849fe8ea2b327a4c6352 100644 --- a/tempsdb/series.pyx +++ b/tempsdb/series.pyx @@ -1,21 +1,22 @@ +import os import typing as tp import shutil import threading import warnings -from satella.json import write_json_to_file, read_json_from_file - from .chunks.base cimport Chunk from .chunks.normal cimport NormalChunk from .chunks.direct cimport DirectChunk from .chunks.maker cimport create_chunk from .exceptions import DoesNotExist, Corruption, InvalidState, AlreadyExists -import os +from .metadata cimport read_meta_at, write_meta_at -DEF METADATA_FILE_NAME = 'metadata.txt' DEF DEFAULT_PAGE_SIZE=4096 +cdef set metadata_file_names = {'metadata.txt', 'metadata.minijson'} + + cdef class TimeSeries: """ A single time series. This maps each timestamp (unsigned long long) to a block of data @@ -108,15 +109,15 @@ cdef class TimeSeries: raise DoesNotExist('Chosen time series does not exist') cdef: - dict metadata + dict metadata = read_meta_at(self.path) str filename list files = os.listdir(self.path) unsigned long long last_chunk_name bint is_direct bint is_gzip + bytes meta_d try: - metadata = read_json_from_file(os.path.join(self.path, METADATA_FILE_NAME)) self.block_size = metadata['block_size'] self.max_entries_per_chunk = metadata['max_entries_per_chunk'] self.last_entry_synced = metadata['last_entry_synced'] @@ -142,7 +143,7 @@ cdef class TimeSeries: self.last_entry_ts = 0 else: for filename in files: - if filename == METADATA_FILE_NAME: + if filename in metadata_file_names: continue is_gzip = filename.endswith('.gz') if is_gzip: @@ -357,9 +358,7 @@ cdef class TimeSeries: """ Write the metadata to disk """ - with self.lock: - write_json_to_file(os.path.join(self.path, METADATA_FILE_NAME), self.get_metadata()) - return 0 + return write_meta_at(self.path, self.get_metadata()) cpdef int sync(self) except -1: """ @@ -534,6 +533,6 @@ cpdef TimeSeries create_series(str path, str name, unsigned int block_size, meta['page_size'] = page_size if gzip_level: meta['gzip_level'] = gzip_level - write_json_to_file(os.path.join(path, 'metadata.txt'), meta) + write_meta_at(path, meta) return TimeSeries(path, name, use_descriptor_based_access=use_descriptor_based_access) diff --git a/tempsdb/varlen.pxd b/tempsdb/varlen.pxd index 1b125279e8cebb0849c1506fc6bed0d00c5d96a7..04a1381fac6a6174adea6e0d03b50a78551bf329 100644 --- a/tempsdb/varlen.pxd +++ b/tempsdb/varlen.pxd @@ -12,7 +12,7 @@ cdef class VarlenSeries: TimeSeries root_series readonly list series readonly list length_profile - readonly int max_entries_per_chunk + readonly unsigned int max_entries_per_chunk int current_maximum_length object mpm int gzip_level diff --git a/unittest.Dockerfile b/unittest.Dockerfile index 1877bb59be6caac9fea5604b8fd99f351f1a798c..e296bc37485d24833bcada0c7b6eb7f0671f226a 100644 --- a/unittest.Dockerfile +++ b/unittest.Dockerfile @@ -1,6 +1,6 @@ FROM python:3.8 -RUN pip install satella>=2.14.24 snakehouse>=1.3 nose2 wheel ujson coverage +RUN pip install satella>=2.14.24 snakehouse>=1.3 nose2 wheel coverage ADD tempsdb /app/tempsdb ADD setup.py /app/setup.py