From 786d4faf7474d62838d3129efa1f62f9548bffc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl> Date: Fri, 11 Dec 2020 19:09:27 +0100 Subject: [PATCH] added an unit test --- docs/varlen.rst | 13 ++++++++-- tempsdb/varlen.pxd | 7 ++++++ tempsdb/varlen.pyx | 59 ++++++++++++++++++++++++++++++++++++-------- tests/test_varlen.py | 17 +++++++++++++ 4 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 tests/test_varlen.py diff --git a/docs/varlen.rst b/docs/varlen.rst index 04579d7..4275eaa 100644 --- a/docs/varlen.rst +++ b/docs/varlen.rst @@ -20,12 +20,17 @@ of 5 normal time series created to accomodate it, with length of: * 255 * 255 -Each entry is also prefixed by it's length. The size of that field is described by an -extra parameter called `size_struct`. +Note that an entry is written to enough series so that it fits. For example, a 8 byte piece of data +would be written to only to the first series. + +Each entry is also prefixed by it's length, so the actual size of the first +series is larger by that. The size of that field is described by an +extra parameter called `size_struct`. It represents an unsigned number. Note that the only valid sizes of `size_struct` are: * 1 for maximum length of 255 * 2 for maximum length of 65535 +* 3 for maximum length of 16777215 * 4 for maximum length of 4294967295 Accessing them @@ -39,6 +44,10 @@ Use methods :meth:`tempsdb.database.Database.create_varlen_series` and :members: +.. autoclass:: tempsdb.varlen.VarlenIterator + :members: + + .. autoclass:: tempsdb.varlen.VarlenEntry :members: diff --git a/tempsdb/varlen.pxd b/tempsdb/varlen.pxd index 7d5baa0..a5b5b34 100644 --- a/tempsdb/varlen.pxd +++ b/tempsdb/varlen.pxd @@ -29,6 +29,13 @@ cdef class VarlenSeries: elif self.size_field == 4: return 0xFFFFFFFF +cdef class VarlenIterator: + cdef: + object parent + unsigned long long start + unsigned long long stop + + cdef class VarlenEntry: cdef: list chunks diff --git a/tempsdb/varlen.pyx b/tempsdb/varlen.pyx index e0ee320..71263bf 100644 --- a/tempsdb/varlen.pyx +++ b/tempsdb/varlen.pyx @@ -72,7 +72,7 @@ cdef class VarlenEntry: :param start: position to start at :param stop: position to stop at :return: a slice of this entry - :raises ValueError: stop was smaller than start + :raises ValueError: stop was smaller than start or indices were invalid """ if stop < start: raise ValueError('stop smaller than start') @@ -96,20 +96,26 @@ cdef class VarlenEntry: pointer += next_chunk_len segment += 1 - cdef: int write_pointer = 0 int chunk_len = self.parent.get_length_for(segment) int len_to_read = self.parent.get_length_for(segment) - start_reading_at Chunk chunk = self.chunks[segment] - while write_pointer < length: - if chunk_len > start_reading_at + length: - ... # We end this right here - - b[write_pointer:write_pointer+len_to_read] = chunk.get_slice_of_piece_at(self.item_no[segment], - start_reading_at, - 0) + bytes temp_data + while write_pointer < length and len(self.chunks) < segment: + if chunk_len-start_reading_at >= + (length - write_pointer): + # We have all the data that we require + b[write_pointer:length] = chunk.get_slice_of_piece_at(self.item_no[segment], + 0, length-write_pointer) + return bytes(b) + + temp_data = chunk.get_slice_of_piece_at(self.item_no[segment], 0, chunk_len) + b[write_pointer:write_pointer+chunk_len] = temp_data + write_pointer += chunk_len + segment += 1 + start_reading_at = 0 + raise ValueError('invalid indices') cpdef bytes to_bytes(self): """ @@ -136,6 +142,31 @@ cdef class VarlenEntry: def __len__(self) -> int: return self.length() +STRUCT_L = struct.Struct('<L') +class ThreeByteStruct: + __slots__ = () + def pack(self, v: int) -> bytes: + return STRUCT_L.pack(v)[0:3] + + def unpack(self, v: bytes) -> tp.Tuple[int]: + return STRUCT_L.unpack(v+b'\x00') + + +cdef class VarlenIterator: + """ + A result of a varlen series query + """ + def __init__(self, parent: VarlenSeries, start: int, stop: int): + self.parent = parent + self.start = start + self.stop = stop + + def __next__(self) -> tp.Tuple[int, VarlenEntry]: + ... + + def __iter__(self): + return self + cdef class VarlenSeries: """ @@ -170,6 +201,8 @@ cdef class VarlenSeries: self.size_struct = struct.Struct('<B') elif self.size_field == 2: self.size_struct = struct.Struct('<H') + elif self.size_field == 3: + self.size_struct = ThreeByteStruct() elif self.size_field == 4: self.size_struct = struct.Struct('<L') else: @@ -286,6 +319,12 @@ cpdef VarlenSeries create_varlen_series(str path, str name, int size_struct, lis """ Create a variable length series + :param path: path where the directory will be placed + :param name: name of the series + :param size_struct: size of the length indicator. Must be one of 1, 2, 3 or 4. + :param length_profile: series' length profile + :param max_entries_per_chunk: maximum entries per a chunk file + :return: newly created VarlenSeries :raises AlreadyExists: directory exists at given path :raises ValueError: invalid length profile or max_entries_per_chunk or size_struct """ @@ -293,7 +332,7 @@ cpdef VarlenSeries create_varlen_series(str path, str name, int size_struct, lis raise AlreadyExists('directory present at paht') if not length_profile or not max_entries_per_chunk: raise ValueError('invalid parameter') - if size_struct not in (1, 2, 4): + if not (1 <= size_struct <= 4): raise ValueError('invalid size_struct') os.mkdir(path) diff --git a/tests/test_varlen.py b/tests/test_varlen.py new file mode 100644 index 0000000..9c64dd2 --- /dev/null +++ b/tests/test_varlen.py @@ -0,0 +1,17 @@ +import os +import unittest + +from tempsdb.varlen import create_varlen_series + + +class TestVarlen(unittest.TestCase): + def test_varlen(self): + varlen = create_varlen_series('test_dir', 'test_dir', 2, [10, 20, 10], 20) + try: + varlen.append(0, b'test skarabeusza') + self.assertEqual(len(os.listdir('test_dir')), 2) + + varlen.append(10, b'test skarabeuszatest skarabeusza') + self.assertEqual(len(os.listdir('test_dir')), 3) + finally: + varlen.close() -- GitLab