diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..c69064de9430667129b5fbac7c32ca5511a104d5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,6 @@ +version: '3.2' +services: + unittest: + build: + context: . + dockerfile: unittest.Dockerfile diff --git a/setup.py b/setup.py index bb82f047c69c3c87905017374eeda21dd9849be9..fd459ccef3b67830a4054523199c38fc221613f2 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def find_pyx(*path) -> tp.List[str]: setup(name='tempsdb', - version='0.1_a1', + version='0.1_a2', packages=['tempsdb'], install_requires=['satella', 'ujson'], ext_modules=build([Multibuild('tempsdb', find_pyx('tempsdb')), ], diff --git a/tempsdb/chunks.pxd b/tempsdb/chunks.pxd index 6d3bb0b9de9c020629cb41b26f7731d3e3aea720..145996269502726f06c1a76fe6bad93b7e7bf904 100644 --- a/tempsdb/chunks.pxd +++ b/tempsdb/chunks.pxd @@ -1,4 +1,9 @@ from .series cimport TimeSeries +import struct + +STRUCT_Q = struct.Struct('<Q') +DEF HEADER_SIZE = 4 +DEF TIMESTAMP_SIZE = 8 cdef class Chunk: cdef: @@ -20,7 +25,33 @@ cdef class Chunk: cpdef tuple get_piece_at(self, unsigned int index) cpdef int append(self, unsigned long long timestamp, bytes data) except -1 cpdef int sync(self) except -1 + cpdef unsigned int find_left(self, unsigned long long timestamp) + cpdef unsigned int find_right(self, unsigned long long timestamp) + + cdef unsigned long long name(self): + """ + :return: the name of this chunk + :rtype: int + """ + return self.min_ts + cdef inline int length(self): + """ + :return: amount of entries in this chunk + :rtype: int + """ return self.entries + cdef inline unsigned long long get_timestamp_at(self, unsigned int index): + """ + Get timestamp at given entry + + :param index: index of the entry + :type index: int + :return: timestamp at this entry + :rtype: int + """ + cdef unsigned long offset = HEADER_SIZE+index*self.block_size_plus + return STRUCT_Q.unpack(self.mmap[offset:offset+TIMESTAMP_SIZE])[0] + cpdef Chunk create_chunk(TimeSeries parent, str path, list data) diff --git a/tempsdb/chunks.pyx b/tempsdb/chunks.pyx index a8cbb94e46a33418c475fe41687156331fc04049..4764a5c86db0316c39a8bbf33a51b38b932b33ec 100644 --- a/tempsdb/chunks.pyx +++ b/tempsdb/chunks.pyx @@ -6,10 +6,10 @@ import mmap from .exceptions import Corruption, InvalidState, AlreadyExists from .series cimport TimeSeries -STRUCT_L = struct.Struct('<L') -STRUCT_Q = struct.Struct('<Q') DEF HEADER_SIZE = 4 DEF TIMESTAMP_SIZE = 8 +STRUCT_Q = struct.Struct('<Q') +STRUCT_L = struct.Struct('<L') cdef class Chunk: @@ -60,6 +60,58 @@ cdef class Chunk: self.max_ts, = STRUCT_Q.unpack(self.mmap[file_size-self.block_size_plus:file_size-self.block_size]) self.min_ts, = STRUCT_Q.unpack(self.mmap[HEADER_SIZE:HEADER_SIZE+TIMESTAMP_SIZE]) + cpdef unsigned int find_left(self, unsigned long long timestamp): + """ + Return an index i of position such that ts[i] <= timestamp and + (timestamp-ts[i]) -> min. + + Used as bound in searches: you start from this index and finish at + :meth:`~tempsdb.chunks.Chunk.find_right`. + + :param timestamp: timestamp to look for, must be smaller or equal to largest element + in the chunk + :type timestamp: int + :return: index such that ts[i] <= timestamp and (timestamp-ts[i]) -> min, or length of the + array if timestamp is larger than largest element in this chunk + :rtype: int + """ + cdef: + unsigned int hi = self.length() + unsigned int lo = 0 + unsigned int mid + while lo < hi: + mid = (lo+hi)//2 + if self.get_timestamp_at(mid) < timestamp: + lo = mid+1 + else: + hi = mid + return lo + + cpdef unsigned int find_right(self, unsigned long long timestamp): + """ + Return an index i of position such that ts[i] > timestamp and + (ts[i]-timestamp) -> min + + Used as bound in searches: you start from + :meth:`~tempsdb.chunks.Chunk.find_right` and finish at this inclusive. + + :param timestamp: timestamp to look for + :type timestamp: int + :return: index such that ts[i] > timestamp and (ts[i]-timestamp) -> min + :rtype: int + """ + cdef: + unsigned int hi = self.length() + unsigned int lo = 0 + unsigned int mid + while lo < hi: + mid = (lo+hi)//2 + if timestamp < self.get_timestamp_at(mid): + hi = mid + else: + lo = mid+1 + return lo + def __getitem__(self, index: tp.Union[int, slice]): if isinstance(index, slice): return self.iterate_range(index.start, index.stop) diff --git a/tests/test_db.py b/tests/test_db.py index 5127d5bb817ceeca577b9d4dceb609c2dee38409..cd603659fd18e5e0bbdd5109085085a017a70939 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -21,5 +21,19 @@ class TestDB(unittest.TestCase): self.assertEqual(len(chunk), 3) self.assertEqual(list(iter(chunk)), data) chunk.append(5, b'test') + self.assertEqual(chunk.find_left(0), 0) + self.assertEqual(chunk.find_left(1), 1) + self.assertEqual(chunk.find_left(2), 2) + self.assertEqual(chunk.find_left(3), 2) + self.assertEqual(chunk.find_left(4), 2) + self.assertEqual(chunk.find_left(5), 3) + self.assertEqual(chunk.find_left(6), 4) + self.assertEqual(chunk.find_right(0), 1) + self.assertEqual(chunk.find_right(1), 2) + self.assertEqual(chunk.find_right(2), 2) + self.assertEqual(chunk.find_right(3), 2) + self.assertEqual(chunk.find_right(4), 3) + self.assertEqual(chunk.find_right(5), 4) + self.assertEqual(chunk.find_right(6), 4) chunk.close() self.assertEqual(os.path.getsize('chunk.db'), 4+4*12)