diff --git a/docs/varlen.rst b/docs/varlen.rst index 04579d7626ccc53a9b3f81aaf632160d36a8b09f..4275eaaffc6ed43a1dc79e934dfcd08cf1523356 100644 --- a/docs/varlen.rst +++ b/docs/varlen.rst @@ -20,12 +20,17 @@ of 5 normal time series created to accomodate it, with length of: * 255 * 255 -Each entry is also prefixed by it's length. The size of that field is described by an -extra parameter called `size_struct`. +Note that an entry is written to enough series so that it fits. For example, a 8 byte piece of data +would be written to only to the first series. + +Each entry is also prefixed by it's length, so the actual size of the first +series is larger by that. The size of that field is described by an +extra parameter called `size_struct`. It represents an unsigned number. Note that the only valid sizes of `size_struct` are: * 1 for maximum length of 255 * 2 for maximum length of 65535 +* 3 for maximum length of 16777215 * 4 for maximum length of 4294967295 Accessing them @@ -39,6 +44,10 @@ Use methods :meth:`tempsdb.database.Database.create_varlen_series` and :members: +.. autoclass:: tempsdb.varlen.VarlenIterator + :members: + + .. autoclass:: tempsdb.varlen.VarlenEntry :members: diff --git a/tempsdb/varlen.pxd b/tempsdb/varlen.pxd index 7d5baa03fdfebda916627dd0290807434dff0d8c..a5b5b347dcfbe942ba68b8c824629ef00d2bae7a 100644 --- a/tempsdb/varlen.pxd +++ b/tempsdb/varlen.pxd @@ -29,6 +29,13 @@ cdef class VarlenSeries: elif self.size_field == 4: return 0xFFFFFFFF +cdef class VarlenIterator: + cdef: + object parent + unsigned long long start + unsigned long long stop + + cdef class VarlenEntry: cdef: list chunks diff --git a/tempsdb/varlen.pyx b/tempsdb/varlen.pyx index e0ee320000e0f2226d749ed1142e445a13a7b636..71263bf6c40cdaac7cfa23cfbc6fd9f327d28983 100644 --- a/tempsdb/varlen.pyx +++ b/tempsdb/varlen.pyx @@ -72,7 +72,7 @@ cdef class VarlenEntry: :param start: position to start at :param stop: position to stop at :return: a slice of this entry - :raises ValueError: stop was smaller than start + :raises ValueError: stop was smaller than start or indices were invalid """ if stop < start: raise ValueError('stop smaller than start') @@ -96,20 +96,26 @@ cdef class VarlenEntry: pointer += next_chunk_len segment += 1 - cdef: int write_pointer = 0 int chunk_len = self.parent.get_length_for(segment) int len_to_read = self.parent.get_length_for(segment) - start_reading_at Chunk chunk = self.chunks[segment] - while write_pointer < length: - if chunk_len > start_reading_at + length: - ... # We end this right here - - b[write_pointer:write_pointer+len_to_read] = chunk.get_slice_of_piece_at(self.item_no[segment], - start_reading_at, - 0) + bytes temp_data + while write_pointer < length and len(self.chunks) < segment: + if chunk_len-start_reading_at >= + (length - write_pointer): + # We have all the data that we require + b[write_pointer:length] = chunk.get_slice_of_piece_at(self.item_no[segment], + 0, length-write_pointer) + return bytes(b) + + temp_data = chunk.get_slice_of_piece_at(self.item_no[segment], 0, chunk_len) + b[write_pointer:write_pointer+chunk_len] = temp_data + write_pointer += chunk_len + segment += 1 + start_reading_at = 0 + raise ValueError('invalid indices') cpdef bytes to_bytes(self): """ @@ -136,6 +142,31 @@ cdef class VarlenEntry: def __len__(self) -> int: return self.length() +STRUCT_L = struct.Struct('<L') +class ThreeByteStruct: + __slots__ = () + def pack(self, v: int) -> bytes: + return STRUCT_L.pack(v)[0:3] + + def unpack(self, v: bytes) -> tp.Tuple[int]: + return STRUCT_L.unpack(v+b'\x00') + + +cdef class VarlenIterator: + """ + A result of a varlen series query + """ + def __init__(self, parent: VarlenSeries, start: int, stop: int): + self.parent = parent + self.start = start + self.stop = stop + + def __next__(self) -> tp.Tuple[int, VarlenEntry]: + ... + + def __iter__(self): + return self + cdef class VarlenSeries: """ @@ -170,6 +201,8 @@ cdef class VarlenSeries: self.size_struct = struct.Struct('<B') elif self.size_field == 2: self.size_struct = struct.Struct('<H') + elif self.size_field == 3: + self.size_struct = ThreeByteStruct() elif self.size_field == 4: self.size_struct = struct.Struct('<L') else: @@ -286,6 +319,12 @@ cpdef VarlenSeries create_varlen_series(str path, str name, int size_struct, lis """ Create a variable length series + :param path: path where the directory will be placed + :param name: name of the series + :param size_struct: size of the length indicator. Must be one of 1, 2, 3 or 4. + :param length_profile: series' length profile + :param max_entries_per_chunk: maximum entries per a chunk file + :return: newly created VarlenSeries :raises AlreadyExists: directory exists at given path :raises ValueError: invalid length profile or max_entries_per_chunk or size_struct """ @@ -293,7 +332,7 @@ cpdef VarlenSeries create_varlen_series(str path, str name, int size_struct, lis raise AlreadyExists('directory present at paht') if not length_profile or not max_entries_per_chunk: raise ValueError('invalid parameter') - if size_struct not in (1, 2, 4): + if not (1 <= size_struct <= 4): raise ValueError('invalid size_struct') os.mkdir(path) diff --git a/tests/test_varlen.py b/tests/test_varlen.py new file mode 100644 index 0000000000000000000000000000000000000000..9c64dd2bd6b94a772bfda5a50f6fc8b85205ed0e --- /dev/null +++ b/tests/test_varlen.py @@ -0,0 +1,17 @@ +import os +import unittest + +from tempsdb.varlen import create_varlen_series + + +class TestVarlen(unittest.TestCase): + def test_varlen(self): + varlen = create_varlen_series('test_dir', 'test_dir', 2, [10, 20, 10], 20) + try: + varlen.append(0, b'test skarabeusza') + self.assertEqual(len(os.listdir('test_dir')), 2) + + varlen.append(10, b'test skarabeuszatest skarabeusza') + self.assertEqual(len(os.listdir('test_dir')), 3) + finally: + varlen.close()