From 7cb3f8a6d937e9ce0043d1e1c8c5f85187e04f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl> Date: Mon, 30 Nov 2020 15:17:53 +0100 Subject: [PATCH] add find_right/left --- docker-compose.yml | 6 +++++ setup.py | 2 +- tempsdb/chunks.pxd | 31 +++++++++++++++++++++++++ tempsdb/chunks.pyx | 56 ++++++++++++++++++++++++++++++++++++++++++++-- tests/test_db.py | 14 ++++++++++++ 5 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c69064d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,6 @@ +version: '3.2' +services: + unittest: + build: + context: . + dockerfile: unittest.Dockerfile diff --git a/setup.py b/setup.py index bb82f04..fd459cc 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def find_pyx(*path) -> tp.List[str]: setup(name='tempsdb', - version='0.1_a1', + version='0.1_a2', packages=['tempsdb'], install_requires=['satella', 'ujson'], ext_modules=build([Multibuild('tempsdb', find_pyx('tempsdb')), ], diff --git a/tempsdb/chunks.pxd b/tempsdb/chunks.pxd index 6d3bb0b..1459962 100644 --- a/tempsdb/chunks.pxd +++ b/tempsdb/chunks.pxd @@ -1,4 +1,9 @@ from .series cimport TimeSeries +import struct + +STRUCT_Q = struct.Struct('<Q') +DEF HEADER_SIZE = 4 +DEF TIMESTAMP_SIZE = 8 cdef class Chunk: cdef: @@ -20,7 +25,33 @@ cdef class Chunk: cpdef tuple get_piece_at(self, unsigned int index) cpdef int append(self, unsigned long long timestamp, bytes data) except -1 cpdef int sync(self) except -1 + cpdef unsigned int find_left(self, unsigned long long timestamp) + cpdef unsigned int find_right(self, unsigned long long timestamp) + + cdef unsigned long long name(self): + """ + :return: the name of this chunk + :rtype: int + """ + return self.min_ts + cdef inline int length(self): + """ + :return: amount of entries in this chunk + :rtype: int + """ return self.entries + cdef inline unsigned long long get_timestamp_at(self, unsigned int index): + """ + Get timestamp at given entry + + :param index: index of the entry + :type index: int + :return: timestamp at this entry + :rtype: int + """ + cdef unsigned long offset = HEADER_SIZE+index*self.block_size_plus + return STRUCT_Q.unpack(self.mmap[offset:offset+TIMESTAMP_SIZE])[0] + cpdef Chunk create_chunk(TimeSeries parent, str path, list data) diff --git a/tempsdb/chunks.pyx b/tempsdb/chunks.pyx index a8cbb94..4764a5c 100644 --- a/tempsdb/chunks.pyx +++ b/tempsdb/chunks.pyx @@ -6,10 +6,10 @@ import mmap from .exceptions import Corruption, InvalidState, AlreadyExists from .series cimport TimeSeries -STRUCT_L = struct.Struct('<L') -STRUCT_Q = struct.Struct('<Q') DEF HEADER_SIZE = 4 DEF TIMESTAMP_SIZE = 8 +STRUCT_Q = struct.Struct('<Q') +STRUCT_L = struct.Struct('<L') cdef class Chunk: @@ -60,6 +60,58 @@ cdef class Chunk: self.max_ts, = STRUCT_Q.unpack(self.mmap[file_size-self.block_size_plus:file_size-self.block_size]) self.min_ts, = STRUCT_Q.unpack(self.mmap[HEADER_SIZE:HEADER_SIZE+TIMESTAMP_SIZE]) + cpdef unsigned int find_left(self, unsigned long long timestamp): + """ + Return an index i of position such that ts[i] <= timestamp and + (timestamp-ts[i]) -> min. + + Used as bound in searches: you start from this index and finish at + :meth:`~tempsdb.chunks.Chunk.find_right`. + + :param timestamp: timestamp to look for, must be smaller or equal to largest element + in the chunk + :type timestamp: int + :return: index such that ts[i] <= timestamp and (timestamp-ts[i]) -> min, or length of the + array if timestamp is larger than largest element in this chunk + :rtype: int + """ + cdef: + unsigned int hi = self.length() + unsigned int lo = 0 + unsigned int mid + while lo < hi: + mid = (lo+hi)//2 + if self.get_timestamp_at(mid) < timestamp: + lo = mid+1 + else: + hi = mid + return lo + + cpdef unsigned int find_right(self, unsigned long long timestamp): + """ + Return an index i of position such that ts[i] > timestamp and + (ts[i]-timestamp) -> min + + Used as bound in searches: you start from + :meth:`~tempsdb.chunks.Chunk.find_right` and finish at this inclusive. + + :param timestamp: timestamp to look for + :type timestamp: int + :return: index such that ts[i] > timestamp and (ts[i]-timestamp) -> min + :rtype: int + """ + cdef: + unsigned int hi = self.length() + unsigned int lo = 0 + unsigned int mid + while lo < hi: + mid = (lo+hi)//2 + if timestamp < self.get_timestamp_at(mid): + hi = mid + else: + lo = mid+1 + return lo + def __getitem__(self, index: tp.Union[int, slice]): if isinstance(index, slice): return self.iterate_range(index.start, index.stop) diff --git a/tests/test_db.py b/tests/test_db.py index 5127d5b..cd60365 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -21,5 +21,19 @@ class TestDB(unittest.TestCase): self.assertEqual(len(chunk), 3) self.assertEqual(list(iter(chunk)), data) chunk.append(5, b'test') + self.assertEqual(chunk.find_left(0), 0) + self.assertEqual(chunk.find_left(1), 1) + self.assertEqual(chunk.find_left(2), 2) + self.assertEqual(chunk.find_left(3), 2) + self.assertEqual(chunk.find_left(4), 2) + self.assertEqual(chunk.find_left(5), 3) + self.assertEqual(chunk.find_left(6), 4) + self.assertEqual(chunk.find_right(0), 1) + self.assertEqual(chunk.find_right(1), 2) + self.assertEqual(chunk.find_right(2), 2) + self.assertEqual(chunk.find_right(3), 2) + self.assertEqual(chunk.find_right(4), 3) + self.assertEqual(chunk.find_right(5), 4) + self.assertEqual(chunk.find_right(6), 4) chunk.close() self.assertEqual(os.path.getsize('chunk.db'), 4+4*12) -- GitLab