From 7cb3f8a6d937e9ce0043d1e1c8c5f85187e04f81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Ma=C5=9Blanka?= <piotr.maslanka@henrietta.com.pl>
Date: Mon, 30 Nov 2020 15:17:53 +0100
Subject: [PATCH] add find_right/left

---
 docker-compose.yml |  6 +++++
 setup.py           |  2 +-
 tempsdb/chunks.pxd | 31 +++++++++++++++++++++++++
 tempsdb/chunks.pyx | 56 ++++++++++++++++++++++++++++++++++++++++++++--
 tests/test_db.py   | 14 ++++++++++++
 5 files changed, 106 insertions(+), 3 deletions(-)
 create mode 100644 docker-compose.yml

diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..c69064d
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,6 @@
+version: '3.2'
+services:
+  unittest:
+    build:
+      context: .
+      dockerfile: unittest.Dockerfile
diff --git a/setup.py b/setup.py
index bb82f04..fd459cc 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ def find_pyx(*path) -> tp.List[str]:
 
 
 setup(name='tempsdb',
-      version='0.1_a1',
+      version='0.1_a2',
       packages=['tempsdb'],
       install_requires=['satella', 'ujson'],
       ext_modules=build([Multibuild('tempsdb', find_pyx('tempsdb')), ],
diff --git a/tempsdb/chunks.pxd b/tempsdb/chunks.pxd
index 6d3bb0b..1459962 100644
--- a/tempsdb/chunks.pxd
+++ b/tempsdb/chunks.pxd
@@ -1,4 +1,9 @@
 from .series cimport TimeSeries
+import struct
+
+STRUCT_Q = struct.Struct('<Q')
+DEF HEADER_SIZE = 4
+DEF TIMESTAMP_SIZE = 8
 
 cdef class Chunk:
     cdef:
@@ -20,7 +25,33 @@ cdef class Chunk:
     cpdef tuple get_piece_at(self, unsigned int index)
     cpdef int append(self, unsigned long long timestamp, bytes data) except -1
     cpdef int sync(self) except -1
+    cpdef unsigned int find_left(self, unsigned long long timestamp)
+    cpdef unsigned int find_right(self, unsigned long long timestamp)
+
+    cdef unsigned long long name(self):
+        """
+        :return: the name of this chunk
+        :rtype: int 
+        """
+        return self.min_ts
+
     cdef inline int length(self):
+        """
+        :return: amount of entries in this chunk
+        :rtype: int 
+        """
         return self.entries
 
+    cdef inline unsigned long long get_timestamp_at(self, unsigned int index):
+        """
+        Get timestamp at given entry
+        
+        :param index: index of the entry
+        :type index: int
+        :return: timestamp at this entry
+        :rtype: int
+        """
+        cdef unsigned long offset = HEADER_SIZE+index*self.block_size_plus
+        return STRUCT_Q.unpack(self.mmap[offset:offset+TIMESTAMP_SIZE])[0]
+
 cpdef Chunk create_chunk(TimeSeries parent, str path, list data)
diff --git a/tempsdb/chunks.pyx b/tempsdb/chunks.pyx
index a8cbb94..4764a5c 100644
--- a/tempsdb/chunks.pyx
+++ b/tempsdb/chunks.pyx
@@ -6,10 +6,10 @@ import mmap
 from .exceptions import Corruption, InvalidState, AlreadyExists
 from .series cimport TimeSeries
 
-STRUCT_L = struct.Struct('<L')
-STRUCT_Q = struct.Struct('<Q')
 DEF HEADER_SIZE = 4
 DEF TIMESTAMP_SIZE = 8
+STRUCT_Q = struct.Struct('<Q')
+STRUCT_L = struct.Struct('<L')
 
 
 cdef class Chunk:
@@ -60,6 +60,58 @@ cdef class Chunk:
         self.max_ts, = STRUCT_Q.unpack(self.mmap[file_size-self.block_size_plus:file_size-self.block_size])
         self.min_ts, = STRUCT_Q.unpack(self.mmap[HEADER_SIZE:HEADER_SIZE+TIMESTAMP_SIZE])
 
+    cpdef unsigned int find_left(self, unsigned long long timestamp):
+        """
+        Return an index i of position such that ts[i] <= timestamp and
+        (timestamp-ts[i]) -> min.
+        
+        Used as bound in searches: you start from this index and finish at 
+        :meth:`~tempsdb.chunks.Chunk.find_right`.
+        
+        :param timestamp: timestamp to look for, must be smaller or equal to largest element
+            in the chunk
+        :type timestamp: int
+        :return: index such that ts[i] <= timestamp and (timestamp-ts[i]) -> min, or length of the 
+            array if timestamp is larger than largest element in this chunk
+        :rtype: int
+        """
+        cdef:
+            unsigned int hi = self.length()
+            unsigned int lo = 0
+            unsigned int mid
+        while lo < hi:
+            mid = (lo+hi)//2
+            if self.get_timestamp_at(mid) < timestamp:
+                lo = mid+1
+            else:
+                hi = mid
+        return lo
+
+    cpdef unsigned int find_right(self, unsigned long long timestamp):
+        """
+        Return an index i of position such that ts[i] > timestamp and
+        (ts[i]-timestamp) -> min
+        
+        Used as bound in searches: you start from 
+        :meth:`~tempsdb.chunks.Chunk.find_right` and finish at this inclusive. 
+        
+        :param timestamp: timestamp to look for
+        :type timestamp: int
+        :return: index such that ts[i] > timestamp and (ts[i]-timestamp) -> min
+        :rtype: int 
+        """
+        cdef:
+            unsigned int hi = self.length()
+            unsigned int lo = 0
+            unsigned int mid
+        while lo < hi:
+            mid = (lo+hi)//2
+            if timestamp < self.get_timestamp_at(mid):
+                hi = mid
+            else:
+                lo = mid+1
+        return lo
+
     def __getitem__(self, index: tp.Union[int, slice]):
         if isinstance(index, slice):
             return self.iterate_range(index.start, index.stop)
diff --git a/tests/test_db.py b/tests/test_db.py
index 5127d5b..cd60365 100644
--- a/tests/test_db.py
+++ b/tests/test_db.py
@@ -21,5 +21,19 @@ class TestDB(unittest.TestCase):
         self.assertEqual(len(chunk), 3)
         self.assertEqual(list(iter(chunk)), data)
         chunk.append(5, b'test')
+        self.assertEqual(chunk.find_left(0), 0)
+        self.assertEqual(chunk.find_left(1), 1)
+        self.assertEqual(chunk.find_left(2), 2)
+        self.assertEqual(chunk.find_left(3), 2)
+        self.assertEqual(chunk.find_left(4), 2)
+        self.assertEqual(chunk.find_left(5), 3)
+        self.assertEqual(chunk.find_left(6), 4)
+        self.assertEqual(chunk.find_right(0), 1)
+        self.assertEqual(chunk.find_right(1), 2)
+        self.assertEqual(chunk.find_right(2), 2)
+        self.assertEqual(chunk.find_right(3), 2)
+        self.assertEqual(chunk.find_right(4), 3)
+        self.assertEqual(chunk.find_right(5), 4)
+        self.assertEqual(chunk.find_right(6), 4)
         chunk.close()
         self.assertEqual(os.path.getsize('chunk.db'), 4+4*12)
-- 
GitLab