From 482379cf94e5db501f9b0cedd2770e0d996497d2 Mon Sep 17 00:00:00 2001 From: hofmockel <dreagonfly@gmx.de> Date: Sun, 27 Apr 2014 19:20:30 +0200 Subject: [PATCH] Add support for the 'PlainTableFactories' --- docs/api/options.rst | 78 +++++++++++++++++++++++++++++++++++ rocksdb/_rocksdb.pyx | 53 +++++++++++++++++++++++- rocksdb/options.pxd | 3 +- rocksdb/table_factory.pxd | 9 ++++ rocksdb/tests/test_options.py | 8 ++++ 5 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 rocksdb/table_factory.pxd diff --git a/docs/api/options.rst b/docs/api/options.rst index 5866cd0..bc9513f 100644 --- a/docs/api/options.rst +++ b/docs/api/options.rst @@ -645,6 +645,18 @@ Options object | *Type:* ``int`` | *Default:* ``8`` + .. py:attribute:: table_factory + + Factory for the files forming the persisten data storage. + Sometimes they are also named SST-Files. Right now you can assign + instances of the following classes + + * :py:class:`rocksdb.BlockBasedTableFactory` + * :py:class:`rocksdb.PlainTableFactory` + * :py:class:`rocksdb.TotalOrderPlainTableFactory` + + *Default:* :py:class:`rocksdb.BlockBasedTableFactory` + .. py:attribute:: inplace_update_support Allows thread-safe inplace updates. Requires Updates if @@ -779,3 +791,69 @@ LRUCache the least-used order. If not enough space is freed, further free the entries in least used order. +TableFactories +============== + +Currently RocksDB supports two types of tables: plain table and block-based table. +Instances of this classes can assigned to :py:attr:`rocksdb.Options.table_factory` + +* *Block-based table:* This is the default table type that RocksDB inherited from + LevelDB. It was designed for storing data in hard disk or flash device. + +* *Plain table:* It is one of RocksDB's SST file format optimized + for low query latency on pure-memory or really low-latency media. + +Tutorial of rocksdb table formats is available here: + https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats + +.. py:class:: rocksdb.BlockBasedTableFactory + + Wraps BlockBasedTableFactory of RocksDB. + +.. py:class:: rocksdb.PlainTableFactory + + Plain Table with prefix-only seek. It wraps rocksdb PlainTableFactory. + + For this factory, you need to set :py:attr:`rocksdb.Options.prefix_extractor` + properly to make it work. Look-up will start with prefix hash lookup for + key prefix. Inside the hash bucket found, a binary search is executed for + hash conflicts. Finally, a linear search is used. + + .. py:method:: __init__(user_key_len=0, bloom_bits_per_prefix=10, hash_table_ratio=0.75, index_sparseness=10) + + :param int user_key_len: + Plain table has optimization for fix-sized keys, which can be + specified via user_key_len. + Alternatively, you can pass `0` if your keys have variable lengths. + + :param int bloom_bits_per_key: + The number of bits used for bloom filer per prefix. + You may disable it by passing `0`. + + :param float hash_table_ratio: + The desired utilization of the hash table used for prefix hashing. + hash_table_ratio = number of prefixes / #buckets in the hash table. + + :param int index_sparseness: + Inside each prefix, need to build one index record for how + many keys for binary search inside each hash bucket. + +.. py:class:: rocksdb.TotalOrderPlainTableFactory + + This factory of plain table ignores Options.prefix_extractor and assumes no + hashable prefix available to the key structure. Lookup will be based on + binary search index only. Total order seek() can be issued. + + .. py:method:: __init__(user_key_len=0, bloom_bits_per_key=0, index_sparseness=16) + + :param int user_key_len: + Plain table has optimization for fix-sized keys, which can be + specified via user_key_len. + Alternatively, you can pass `0` if your keys have variable lengths. + + :param int bloom_bits_per_key: + The number of bits used for bloom filer per key. + You may disable it by passing a zero. + + :param int index_sparseness: + Need to build one index record for how many keys for binary search. diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx index b98340e..c1639dd 100644 --- a/rocksdb/_rocksdb.pyx +++ b/rocksdb/_rocksdb.pyx @@ -24,6 +24,7 @@ cimport db cimport iterator cimport backup cimport env +cimport table_factory from slice_ cimport Slice from status cimport Status @@ -539,8 +540,49 @@ cdef cpp_bool slice_in_range_callback( tb = traceback.format_exc() logger.Log(log, "Error in slice transfrom callback: %s", <bytes>tb) error_msg.assign(<bytes>str(error)) - ########################################### + +## Here are the TableFactories +@cython.internal +cdef class PyTableFactory(object): + cdef shared_ptr[table_factory.TableFactory] factory + + cdef shared_ptr[table_factory.TableFactory] get_table_factory(self): + return self.factory + +cdef class BlockBasedTableFactory(PyTableFactory): + def __init__(self): + self.factory.reset(table_factory.NewBlockBasedTableFactory()) + +cdef class PlainTableFactory(PyTableFactory): + def __init__( + self, + user_key_len=0, + bloom_bits_per_prefix=10, + hash_table_ratio=0.75, + index_sparseness=10): + + self.factory.reset( + table_factory.NewPlainTableFactory( + user_key_len, + bloom_bits_per_prefix, + hash_table_ratio, + index_sparseness)) + +cdef class TotalOrderPlainTableFactory(PyTableFactory): + def __init__( + self, + user_key_len=0, + bloom_bits_per_key=0, + index_sparseness=16): + + self.factory.reset( + table_factory.NewTotalOrderPlainTableFactory( + user_key_len, + bloom_bits_per_key, + index_sparseness)) + +############################################# cdef class CompressionType(object): no_compression = u'no_compression' snappy_compression = u'snappy_compression' @@ -555,6 +597,7 @@ cdef class Options(object): cdef PyCache py_block_cache cdef PyCache py_block_cache_compressed cdef PySliceTransform py_prefix_extractor + cdef PyTableFactory py_table_factory # Used to protect sharing of Options with many DB-objects cdef cpp_bool in_use @@ -574,6 +617,7 @@ cdef class Options(object): self.py_block_cache = None self.py_block_cache_compressed = None self.py_prefix_extractor = None + self.py_table_factory = None for key, value in kwargs.items(): setattr(self, key, value) @@ -975,6 +1019,13 @@ cdef class Options(object): def __set__(self, value): self.opts.inplace_update_support = value + property table_factory: + def __get__(self): + return self.py_table_factory + + def __set__(self, PyTableFactory value): + self.opts.table_factory = value.get_table_factory() + property inplace_update_num_locks: def __get__(self): return self.opts.inplace_update_num_locks diff --git a/rocksdb/options.pxd b/rocksdb/options.pxd index f7f3b4e..b41ac13 100644 --- a/rocksdb/options.pxd +++ b/rocksdb/options.pxd @@ -11,6 +11,7 @@ from logger cimport Logger from slice_ cimport Slice from snapshot cimport Snapshot from slice_transform cimport SliceTransform +from table_factory cimport TableFactory cdef extern from "rocksdb/options.h" namespace "rocksdb": ctypedef enum CompressionType: @@ -104,7 +105,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": cpp_bool filter_deletes uint64_t max_sequential_skip_in_iterations # TODO: memtable_factory - # TODO: table_factory + shared_ptr[TableFactory] table_factory # TODO: table_properties_collectors cpp_bool inplace_update_support size_t inplace_update_num_locks diff --git a/rocksdb/table_factory.pxd b/rocksdb/table_factory.pxd new file mode 100644 index 0000000..418cb9e --- /dev/null +++ b/rocksdb/table_factory.pxd @@ -0,0 +1,9 @@ +from libc.stdint cimport uint32_t + +cdef extern from "rocksdb/table.h" namespace "rocksdb": + cdef cppclass TableFactory: + TableFactory() + + cdef TableFactory* NewBlockBasedTableFactory() + cdef TableFactory* NewPlainTableFactory(uint32_t, int, double, size_t) + cdef TableFactory* NewTotalOrderPlainTableFactory(uint32_t, int, size_t) diff --git a/rocksdb/tests/test_options.py b/rocksdb/tests/test_options.py index b9fa8f0..bfd8751 100644 --- a/rocksdb/tests/test_options.py +++ b/rocksdb/tests/test_options.py @@ -61,3 +61,11 @@ class TestOptions(unittest.TestCase): self.assertEqual(name, opts.db_log_dir) self.assertEqual(name, opts.wal_dir) + + def test_table_factory(self): + opts = rocksdb.Options() + self.assertIsNone(opts.table_factory) + + opts.table_factory = rocksdb.BlockBasedTableFactory() + opts.table_factory = rocksdb.PlainTableFactory() + opts.table_factory = rocksdb.TotalOrderPlainTableFactory() -- GitLab