From 1cb9ec4ee1d5b4e635ca2833f5650cc77c3b4749 Mon Sep 17 00:00:00 2001 From: hofmockel <dreagonfly@gmx.de> Date: Mon, 28 Apr 2014 20:32:33 +0200 Subject: [PATCH] Allow it to configure the memtable representation --- docs/api/options.rst | 76 ++++++++++++++++++++++++++++++ docs/changelog.rst | 2 + rocksdb/_rocksdb.pyx | 48 +++++++++++++++++++ rocksdb/cpp/memtable_factories.hpp | 15 ++++++ rocksdb/memtablerep.pxd | 12 +++++ rocksdb/options.pxd | 3 +- 6 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 rocksdb/cpp/memtable_factories.hpp create mode 100644 rocksdb/memtablerep.pxd diff --git a/docs/api/options.rst b/docs/api/options.rst index bc9513f..6e066c3 100644 --- a/docs/api/options.rst +++ b/docs/api/options.rst @@ -645,6 +645,18 @@ Options object | *Type:* ``int`` | *Default:* ``8`` + .. py:attribute:: memtable_factory + + This is a factory that provides MemTableRep objects. + Right now you can assing instances of the following classes. + + * :py:class:`rocksdb.VectorMemtableFactory` + * :py:class:`rocksdb.SkipListMemtableFactory` + * :py:class:`rocksdb.HashSkipListMemtableFactory` + * :py:class:`rocksdb.HashLinkListMemtableFactory` + + *Default:* :py:class:`rocksdb.SkipListMemtableFactory` + .. py:attribute:: table_factory Factory for the files forming the persisten data storage. @@ -857,3 +869,67 @@ Tutorial of rocksdb table formats is available here: :param int index_sparseness: Need to build one index record for how many keys for binary search. + + +.. _memtable_factories_label: + +MemtableFactories +================= + +RocksDB has different classes to represent the in-memory buffer for the current +operations. You have to assing instances of the following classes to +:py:attr:`rocksdb.Options.memtable_factory`. +This page has a comparison the most popular ones. +https://github.com/facebook/rocksdb/wiki/Hash-based-memtable-implementations + +.. py:class:: rocksdb.VectorMemtableFactory + + This creates MemTableReps that are backed by an std::vector. + On iteration, the vector is sorted. This is useful for workloads where + iteration is very rare and writes are generally not issued after reads begin. + + .. py:method:: __init__(count=0) + + :param int count: + Passed to the constructor of the underlying std::vector of each + VectorRep. On initialization, the underlying array will be at + least count bytes reserved for usage. + +.. py:class:: rocksdb.SkipListMemtableFactory + + This uses a skip list to store keys. + + .. py:method:: __init__() + +.. py:class:: rocksdb.HashSkipListMemtableFactory + + This class contains a fixed array of buckets, each pointing + to a skiplist (null if the bucket is empty). + + .. note:: + + :py:attr:`rocksdb.Options.prefix_extractor` must be set, otherwise + rocksdb fails back to skip-list. + + .. py:method:: __init__(bucket_count = 1000000, skiplist_height = 4, skiplist_branching_factor = 4) + + :param int bucket_count: number of fixed array buckets + :param int skiplist_height: the max height of the skiplist + :param int skiplist_branching_factor: + probabilistic size ratio between adjacent link lists in the skiplist + +.. py:class:: rocksdb.HashLinkListMemtableFactory + + The factory is to create memtables with a hashed linked list. + It contains a fixed array of buckets, each pointing to a sorted single + linked list (null if the bucket is empty). + + .. note:: + + :py:attr:`rocksdb.Options.prefix_extractor` must be set, otherwise + rocksdb fails back to skip-list. + + + .. py:method:: __init__(bucket_count=50000) + + :param int bucket: number of fixed array buckets diff --git a/docs/changelog.rst b/docs/changelog.rst index 88f2f8d..cddae5a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -30,6 +30,8 @@ Target is to work with the next version of rocksdb (2.8.fb) * https://github.com/facebook/rocksdb/wiki/PlainTable-Format * https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database%3F +* Add :py:attr:`rocksdb.Options.memtable_factory` option. + Version 0.1 ----------- diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx index c1639dd..67eb39b 100644 --- a/rocksdb/_rocksdb.pyx +++ b/rocksdb/_rocksdb.pyx @@ -25,6 +25,7 @@ cimport iterator cimport backup cimport env cimport table_factory +cimport memtablerep from slice_ cimport Slice from status cimport Status @@ -583,6 +584,41 @@ cdef class TotalOrderPlainTableFactory(PyTableFactory): index_sparseness)) ############################################# + +### Here are the MemtableFactories +@cython.internal +cdef class PyMemtableFactory(object): + cdef shared_ptr[memtablerep.MemTableRepFactory] factory + + cdef shared_ptr[memtablerep.MemTableRepFactory] get_memtable_factory(self): + return self.factory + +cdef class SkipListMemtableFactory(PyMemtableFactory): + def __init__(self): + self.factory.reset(memtablerep.NewSkipListFactory()) + +cdef class VectorMemtableFactory(PyMemtableFactory): + def __init__(self, count=0): + self.factory.reset(memtablerep.NewVectorRepFactory(count)) + +cdef class HashSkipListMemtableFactory(PyMemtableFactory): + def __init__( + self, + bucket_count=1000000, + skiplist_height=4, + skiplist_branching_factor=4): + + self.factory.reset( + memtablerep.NewHashSkipListRepFactory( + bucket_count, + skiplist_height, + skiplist_branching_factor)) + +cdef class HashLinkListMemtableFactory(PyMemtableFactory): + def __init__(self, bucket_count=50000): + self.factory.reset(memtablerep.NewHashLinkListRepFactory(bucket_count)) +################################## + cdef class CompressionType(object): no_compression = u'no_compression' snappy_compression = u'snappy_compression' @@ -598,6 +634,8 @@ cdef class Options(object): cdef PyCache py_block_cache_compressed cdef PySliceTransform py_prefix_extractor cdef PyTableFactory py_table_factory + cdef PyMemtableFactory py_memtable_factory + # Used to protect sharing of Options with many DB-objects cdef cpp_bool in_use @@ -618,6 +656,7 @@ cdef class Options(object): self.py_block_cache_compressed = None self.py_prefix_extractor = None self.py_table_factory = None + self.py_memtable_factory = None for key, value in kwargs.items(): setattr(self, key, value) @@ -1024,8 +1063,17 @@ cdef class Options(object): return self.py_table_factory def __set__(self, PyTableFactory value): + self.py_table_factory = value self.opts.table_factory = value.get_table_factory() + property memtable_factory: + def __get__(self): + return self.py_memtable_factory + + def __set__(self, PyMemtableFactory value): + self.py_memtable_factory = value + self.opts.memtable_factory = value.get_memtable_factory() + property inplace_update_num_locks: def __get__(self): return self.opts.inplace_update_num_locks diff --git a/rocksdb/cpp/memtable_factories.hpp b/rocksdb/cpp/memtable_factories.hpp new file mode 100644 index 0000000..a0855be --- /dev/null +++ b/rocksdb/cpp/memtable_factories.hpp @@ -0,0 +1,15 @@ +#include "rocksdb/memtablerep.h" + +using rocksdb::MemTableRepFactory; +using rocksdb::VectorRepFactory; +using rocksdb::SkipListFactory; + +namespace py_rocks { + MemTableRepFactory* NewVectorRepFactory(size_t count = 0) { + return new VectorRepFactory(count); + } + + MemTableRepFactory* NewSkipListFactory() { + return new SkipListFactory(); + } +} diff --git a/rocksdb/memtablerep.pxd b/rocksdb/memtablerep.pxd new file mode 100644 index 0000000..cbd5639 --- /dev/null +++ b/rocksdb/memtablerep.pxd @@ -0,0 +1,12 @@ +from libc.stdint cimport int32_t + +cdef extern from "rocksdb/memtablerep.h" namespace "rocksdb": + cdef cppclass MemTableRepFactory: + MemTableRepFactory() + + cdef MemTableRepFactory* NewHashSkipListRepFactory(size_t, int32_t, int32_t) + cdef MemTableRepFactory* NewHashLinkListRepFactory(size_t) + +cdef extern from "cpp/memtable_factories.hpp" namespace "py_rocks": + cdef MemTableRepFactory* NewVectorRepFactory(size_t) + cdef MemTableRepFactory* NewSkipListFactory() diff --git a/rocksdb/options.pxd b/rocksdb/options.pxd index b41ac13..90575f4 100644 --- a/rocksdb/options.pxd +++ b/rocksdb/options.pxd @@ -12,6 +12,7 @@ from slice_ cimport Slice from snapshot cimport Snapshot from slice_transform cimport SliceTransform from table_factory cimport TableFactory +from memtablerep cimport MemTableRepFactory cdef extern from "rocksdb/options.h" namespace "rocksdb": ctypedef enum CompressionType: @@ -104,7 +105,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": # TODO: CompactionOptionsUniversal compaction_options_universal cpp_bool filter_deletes uint64_t max_sequential_skip_in_iterations - # TODO: memtable_factory + shared_ptr[MemTableRepFactory] memtable_factory shared_ptr[TableFactory] table_factory # TODO: table_properties_collectors cpp_bool inplace_update_support -- GitLab