From 1cb9ec4ee1d5b4e635ca2833f5650cc77c3b4749 Mon Sep 17 00:00:00 2001
From: hofmockel <dreagonfly@gmx.de>
Date: Mon, 28 Apr 2014 20:32:33 +0200
Subject: [PATCH] Allow it to configure the memtable representation

---
 docs/api/options.rst               | 76 ++++++++++++++++++++++++++++++
 docs/changelog.rst                 |  2 +
 rocksdb/_rocksdb.pyx               | 48 +++++++++++++++++++
 rocksdb/cpp/memtable_factories.hpp | 15 ++++++
 rocksdb/memtablerep.pxd            | 12 +++++
 rocksdb/options.pxd                |  3 +-
 6 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 rocksdb/cpp/memtable_factories.hpp
 create mode 100644 rocksdb/memtablerep.pxd

diff --git a/docs/api/options.rst b/docs/api/options.rst
index bc9513f..6e066c3 100644
--- a/docs/api/options.rst
+++ b/docs/api/options.rst
@@ -645,6 +645,18 @@ Options object
         | *Type:* ``int``
         | *Default:* ``8``
 
+    .. py:attribute:: memtable_factory
+
+        This is a factory that provides MemTableRep objects.
+        Right now you can assing instances of the following classes.
+
+        * :py:class:`rocksdb.VectorMemtableFactory`
+        * :py:class:`rocksdb.SkipListMemtableFactory`
+        * :py:class:`rocksdb.HashSkipListMemtableFactory`
+        * :py:class:`rocksdb.HashLinkListMemtableFactory`
+
+        *Default:* :py:class:`rocksdb.SkipListMemtableFactory`
+
     .. py:attribute:: table_factory
 
         Factory for the files forming the persisten data storage.
@@ -857,3 +869,67 @@ Tutorial of rocksdb table formats is available here:
 
         :param int index_sparseness:
             Need to build one index record for how many keys for binary search.
+
+
+.. _memtable_factories_label:
+
+MemtableFactories
+=================
+
+RocksDB has different classes to represent the in-memory buffer for the current
+operations. You have to assing instances of the following classes to
+:py:attr:`rocksdb.Options.memtable_factory`.
+This page has a comparison the most popular ones.
+https://github.com/facebook/rocksdb/wiki/Hash-based-memtable-implementations
+
+.. py:class:: rocksdb.VectorMemtableFactory
+
+    This creates MemTableReps that are backed by an std::vector.
+    On iteration, the vector is sorted. This is useful for workloads where
+    iteration is very rare and writes are generally not issued after reads begin.
+
+    .. py:method:: __init__(count=0)
+
+        :param int count:
+            Passed to the constructor of the underlying std::vector of each
+            VectorRep. On initialization, the underlying array will be at
+            least count bytes reserved for usage.
+
+.. py:class:: rocksdb.SkipListMemtableFactory
+
+    This uses a skip list to store keys.
+
+    .. py:method:: __init__()
+
+.. py:class:: rocksdb.HashSkipListMemtableFactory
+
+    This class contains a fixed array of buckets, each pointing
+    to a skiplist (null if the bucket is empty).
+
+    .. note::
+
+        :py:attr:`rocksdb.Options.prefix_extractor` must be set, otherwise
+        rocksdb fails back to skip-list.
+
+    .. py:method:: __init__(bucket_count = 1000000, skiplist_height = 4, skiplist_branching_factor = 4)
+
+        :param int bucket_count: number of fixed array buckets
+        :param int skiplist_height: the max height of the skiplist
+        :param int skiplist_branching_factor:
+            probabilistic size ratio between adjacent link lists in the skiplist
+
+.. py:class:: rocksdb.HashLinkListMemtableFactory
+
+    The factory is to create memtables with a hashed linked list.
+    It contains a fixed array of buckets, each pointing to a sorted single
+    linked list (null if the bucket is empty).
+
+    .. note::
+
+        :py:attr:`rocksdb.Options.prefix_extractor` must be set, otherwise
+        rocksdb fails back to skip-list.
+
+
+    .. py:method:: __init__(bucket_count=50000)
+
+        :param int bucket: number of fixed array buckets
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 88f2f8d..cddae5a 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -30,6 +30,8 @@ Target is to work with the next version of rocksdb (2.8.fb)
   * https://github.com/facebook/rocksdb/wiki/PlainTable-Format
   * https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database%3F
 
+* Add :py:attr:`rocksdb.Options.memtable_factory` option.
+
 Version 0.1
 -----------
 
diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx
index c1639dd..67eb39b 100644
--- a/rocksdb/_rocksdb.pyx
+++ b/rocksdb/_rocksdb.pyx
@@ -25,6 +25,7 @@ cimport iterator
 cimport backup
 cimport env
 cimport table_factory
+cimport memtablerep
 
 from slice_ cimport Slice
 from status cimport Status
@@ -583,6 +584,41 @@ cdef class TotalOrderPlainTableFactory(PyTableFactory):
                 index_sparseness))
 
 #############################################
+
+### Here are the MemtableFactories
+@cython.internal
+cdef class PyMemtableFactory(object):
+    cdef shared_ptr[memtablerep.MemTableRepFactory] factory
+
+    cdef shared_ptr[memtablerep.MemTableRepFactory] get_memtable_factory(self):
+        return self.factory
+
+cdef class SkipListMemtableFactory(PyMemtableFactory):
+    def __init__(self):
+        self.factory.reset(memtablerep.NewSkipListFactory())
+
+cdef class VectorMemtableFactory(PyMemtableFactory):
+    def __init__(self, count=0):
+        self.factory.reset(memtablerep.NewVectorRepFactory(count))
+
+cdef class HashSkipListMemtableFactory(PyMemtableFactory):
+    def __init__(
+            self,
+            bucket_count=1000000,
+            skiplist_height=4,
+            skiplist_branching_factor=4):
+
+        self.factory.reset(
+            memtablerep.NewHashSkipListRepFactory(
+                bucket_count,
+                skiplist_height,
+                skiplist_branching_factor))
+
+cdef class HashLinkListMemtableFactory(PyMemtableFactory):
+    def __init__(self, bucket_count=50000):
+        self.factory.reset(memtablerep.NewHashLinkListRepFactory(bucket_count))
+##################################
+
 cdef class CompressionType(object):
     no_compression = u'no_compression'
     snappy_compression = u'snappy_compression'
@@ -598,6 +634,8 @@ cdef class Options(object):
     cdef PyCache py_block_cache_compressed
     cdef PySliceTransform py_prefix_extractor
     cdef PyTableFactory py_table_factory
+    cdef PyMemtableFactory py_memtable_factory
+
     # Used to protect sharing of Options with many DB-objects
     cdef cpp_bool in_use
 
@@ -618,6 +656,7 @@ cdef class Options(object):
         self.py_block_cache_compressed = None
         self.py_prefix_extractor = None
         self.py_table_factory = None
+        self.py_memtable_factory = None
 
         for key, value in kwargs.items():
             setattr(self, key, value)
@@ -1024,8 +1063,17 @@ cdef class Options(object):
             return self.py_table_factory
 
         def __set__(self, PyTableFactory value):
+            self.py_table_factory = value
             self.opts.table_factory = value.get_table_factory()
 
+    property memtable_factory:
+        def __get__(self):
+            return self.py_memtable_factory
+
+        def __set__(self, PyMemtableFactory value):
+            self.py_memtable_factory = value
+            self.opts.memtable_factory = value.get_memtable_factory()
+
     property inplace_update_num_locks:
         def __get__(self):
             return self.opts.inplace_update_num_locks
diff --git a/rocksdb/cpp/memtable_factories.hpp b/rocksdb/cpp/memtable_factories.hpp
new file mode 100644
index 0000000..a0855be
--- /dev/null
+++ b/rocksdb/cpp/memtable_factories.hpp
@@ -0,0 +1,15 @@
+#include "rocksdb/memtablerep.h"
+
+using rocksdb::MemTableRepFactory;
+using rocksdb::VectorRepFactory;
+using rocksdb::SkipListFactory;
+
+namespace py_rocks {
+    MemTableRepFactory* NewVectorRepFactory(size_t count = 0) {
+        return new VectorRepFactory(count);
+    }
+
+    MemTableRepFactory* NewSkipListFactory() {
+        return new SkipListFactory();
+    }
+}
diff --git a/rocksdb/memtablerep.pxd b/rocksdb/memtablerep.pxd
new file mode 100644
index 0000000..cbd5639
--- /dev/null
+++ b/rocksdb/memtablerep.pxd
@@ -0,0 +1,12 @@
+from libc.stdint cimport int32_t
+
+cdef extern from "rocksdb/memtablerep.h" namespace "rocksdb":
+    cdef cppclass MemTableRepFactory:
+        MemTableRepFactory()
+
+    cdef MemTableRepFactory* NewHashSkipListRepFactory(size_t, int32_t, int32_t)
+    cdef MemTableRepFactory* NewHashLinkListRepFactory(size_t)
+
+cdef extern from "cpp/memtable_factories.hpp" namespace "py_rocks":
+    cdef MemTableRepFactory* NewVectorRepFactory(size_t)
+    cdef MemTableRepFactory* NewSkipListFactory()
diff --git a/rocksdb/options.pxd b/rocksdb/options.pxd
index b41ac13..90575f4 100644
--- a/rocksdb/options.pxd
+++ b/rocksdb/options.pxd
@@ -12,6 +12,7 @@ from slice_ cimport Slice
 from snapshot cimport Snapshot
 from slice_transform cimport SliceTransform
 from table_factory cimport TableFactory
+from memtablerep cimport MemTableRepFactory
 
 cdef extern from "rocksdb/options.h" namespace "rocksdb":
     ctypedef enum CompressionType:
@@ -104,7 +105,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb":
         # TODO: CompactionOptionsUniversal compaction_options_universal
         cpp_bool filter_deletes
         uint64_t max_sequential_skip_in_iterations
-        # TODO: memtable_factory
+        shared_ptr[MemTableRepFactory] memtable_factory
         shared_ptr[TableFactory] table_factory
         # TODO: table_properties_collectors
         cpp_bool inplace_update_support
-- 
GitLab