diff --git a/README.rst b/README.rst index 1d409a8fb3a81ef7d993688ee26333e51bf492db..57243ed710182aaf2eea72fa33156c68e9a31e7e 100644 --- a/README.rst +++ b/README.rst @@ -1,42 +1,12 @@ python-rocksdb ============== -Python bindings for RocksDB. - -See https://rocksdb-tina.readthedocs.io/ for a more comprehensive install and -usage description. - - -Quick install -------------- - -.. code-block:: bash - - $ pip install rocksdb - - -Quick usage guide ------------------ - -.. code-block:: python - - >>> import rocksdb - >>> db = rocksdb.DB('test.db', rocksdb.Options(create_if_missing=True)) - >>> db.put(b'a', b'data') - >>> print(db.get(b'a')) - b'data' - - -Acknowledgements ----------------- - -This project attempts to collect the efforts put into different forks of the -`pyrocksdb`_ project that was originally written by `stephan-hof`_, as sadly -none seems to be actively maintained. In particular, the `python-rocksdb`_ fork -created by `twmht`_, but it also incorporates changes from other forks and -unfinished pull requests. - -.. _python-rocksdb: https://github.com/twmht/python-rocksdb -.. _twmht: https://github.com/twmht -.. _pyrocksdb: https://github.com/stephan-hof/pyrocksdb -.. _stephan-hof: https://github.com/stephan-hof +This project is based off the efforts from +https://github.com/NightTsarina/python-rocksdb and basically strips backup +support, filter support and several options, so that it can work on rocksdb +from 5.17 (which is what's available on Ubuntu 20.04) through 7.0 (the current +latest release, already the only one published on homebrew and alpine repos). + +It isn't intended for general use and should eventually be superseded by the +project is is based off when repos catch on and rocksdb 7.0 is properly +(without stripped features) supported. diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx index 9ae4feaf1c47e4f0a458568f3b0851d77a33c57e..bfd47ab74ebed0e26c5b6b3d98cfad12abee7bec 100644 --- a/rocksdb/_rocksdb.pyx +++ b/rocksdb/_rocksdb.pyx @@ -16,7 +16,6 @@ from cpython.unicode cimport PyUnicode_Decode from .std_memory cimport shared_ptr from . cimport options from . cimport merge_operator -from . cimport filter_policy from . cimport comparator from . cimport slice_transform from . cimport cache @@ -24,7 +23,6 @@ from . cimport logger from . cimport snapshot from . cimport db from . cimport iterator -from . cimport backup from . cimport env from . cimport table_factory from . cimport memtablerep @@ -46,7 +44,6 @@ from .status cimport Status import sys from .interfaces import MergeOperator as IMergeOperator from .interfaces import AssociativeMergeOperator as IAssociativeMergeOperator -from .interfaces import FilterPolicy as IFilterPolicy from .interfaces import Comparator as IComparator from .interfaces import SliceTransform as ISliceTransform @@ -62,8 +59,6 @@ from .errors import Incomplete import weakref -ctypedef const filter_policy.FilterPolicy ConstFilterPolicy - cdef extern from "cpp/utils.hpp" namespace "py_rocks": cdef const Slice* vector_data(vector[Slice]&) @@ -216,117 +211,6 @@ BytewiseComparator = PyBytewiseComparator -## Here comes the stuff for the filter policy -@cython.internal -cdef class PyFilterPolicy(object): - cdef object get_ob(self): - return None - - cdef shared_ptr[ConstFilterPolicy] get_policy(self): - return shared_ptr[ConstFilterPolicy]() - - cdef set_info_log(self, shared_ptr[logger.Logger] info_log): - pass - -@cython.internal -cdef class PyGenericFilterPolicy(PyFilterPolicy): - cdef shared_ptr[filter_policy.FilterPolicyWrapper] policy - cdef object ob - - def __cinit__(self, object ob): - if not isinstance(ob, IFilterPolicy): - raise TypeError("%s is not of type %s" % (ob, IFilterPolicy)) - - self.ob = ob - self.policy.reset(new filter_policy.FilterPolicyWrapper( - bytes_to_string(ob.name()), - <void*>ob, - create_filter_callback, - key_may_match_callback)) - - cdef object get_ob(self): - return self.ob - - cdef shared_ptr[ConstFilterPolicy] get_policy(self): - return <shared_ptr[ConstFilterPolicy]>(self.policy) - - cdef set_info_log(self, shared_ptr[logger.Logger] info_log): - self.policy.get().set_info_log(info_log) - - -cdef void create_filter_callback( - void* ctx, - logger.Logger* log, - string& error_msg, - const Slice* keys, - int n, - string* dst) with gil: - - try: - ret = (<object>ctx).create_filter( - [slice_to_bytes(keys[i]) for i in range(n)]) - dst.append(bytes_to_string(ret)) - except BaseException as error: - tb = traceback.format_exc() - logger.Log(log, "Error in create filter callback: %s", <bytes>tb) - error_msg.assign(<bytes>str(error)) - -cdef cpp_bool key_may_match_callback( - void* ctx, - logger.Logger* log, - string& error_msg, - const Slice& key, - const Slice& filt) with gil: - - try: - return (<object>ctx).key_may_match( - slice_to_bytes(key), - slice_to_bytes(filt)) - except BaseException as error: - tb = traceback.format_exc() - logger.Log(log, "Error in key_mach_match callback: %s", <bytes>tb) - error_msg.assign(<bytes>str(error)) - -@cython.internal -cdef class PyBloomFilterPolicy(PyFilterPolicy): - cdef shared_ptr[ConstFilterPolicy] policy - - def __cinit__(self, int bits_per_key): - self.policy.reset(filter_policy.NewBloomFilterPolicy(bits_per_key)) - - def name(self): - return PyBytes_FromString(self.policy.get().Name()) - - def create_filter(self, keys): - cdef string dst - cdef vector[Slice] c_keys - - for key in keys: - c_keys.push_back(bytes_to_slice(key)) - - self.policy.get().CreateFilter( - vector_data(c_keys), - <int>c_keys.size(), - cython.address(dst)) - - return string_to_bytes(dst) - - def key_may_match(self, key, filter_): - return self.policy.get().KeyMayMatch( - bytes_to_slice(key), - bytes_to_slice(filter_)) - - cdef object get_ob(self): - return self - - cdef shared_ptr[ConstFilterPolicy] get_policy(self): - return self.policy - -BloomFilterPolicy = PyBloomFilterPolicy -############################################# - - - ## Here comes the stuff for the merge operator @cython.internal cdef class PyMergeOperator(object): @@ -580,15 +464,12 @@ cdef class PyTableFactory(object): pass cdef class BlockBasedTableFactory(PyTableFactory): - cdef PyFilterPolicy py_filter_policy - def __init__(self, index_type='binary_search', py_bool hash_index_allow_collision=True, checksum='crc32', PyCache block_cache=None, PyCache block_cache_compressed=None, - filter_policy=None, no_block_cache=False, block_size=None, block_size_deviation=None, @@ -655,23 +536,10 @@ cdef class BlockBasedTableFactory(PyTableFactory): if format_version is not None: table_options.format_version = format_version - # Set the filter_policy - self.py_filter_policy = None - if filter_policy is not None: - if isinstance(filter_policy, PyFilterPolicy): - if (<PyFilterPolicy?>filter_policy).get_policy().get() == NULL: - raise Exception("Cannot set filter policy: %s" % filter_policy) - self.py_filter_policy = filter_policy - else: - self.py_filter_policy = PyGenericFilterPolicy(filter_policy) - - table_options.filter_policy = self.py_filter_policy.get_policy() - self.factory.reset(table_factory.NewBlockBasedTableFactory(table_options)) cdef set_info_log(self, shared_ptr[logger.Logger] info_log): - if self.py_filter_policy is not None: - self.py_filter_policy.set_info_log(info_log) + pass cdef class PlainTableFactory(PyTableFactory): def __init__( @@ -1036,12 +904,6 @@ cdef class ColumnFamilyOptions(object): def __set__(self, value): self.copts.level0_stop_writes_trigger = value - property max_mem_compaction_level: - def __get__(self): - return self.copts.max_mem_compaction_level - def __set__(self, value): - self.copts.max_mem_compaction_level = value - property target_file_size_base: def __get__(self): return self.copts.target_file_size_base @@ -1164,13 +1026,6 @@ cdef class ColumnFamilyOptions(object): else: raise Exception("Unknown compaction style") - # Deprecate - # property filter_deletes: - # def __get__(self): - # return self.copts.filter_deletes - # def __set__(self, value): - # self.copts.filter_deletes = value - property max_sequential_skip_in_iterations: def __get__(self): return self.copts.max_sequential_skip_in_iterations @@ -2160,9 +2015,6 @@ cdef class DB(object): fill_cache=True, snapshot=None, read_tier="all", - total_order_seek=False, - iterate_lower_bound=None, - iterate_upper_bound=None ): # TODO: Is this really effiencet ? @@ -2174,9 +2026,6 @@ cdef class DB(object): if py_opts['snapshot'] is not None: opts.snapshot = (<Snapshot?>(py_opts['snapshot'])).ptr - if py_opts['total_order_seek'] is not None: - opts.total_order_seek = py_opts['total_order_seek'] - if py_opts['read_tier'] == "all": opts.read_tier = options.kReadAllTier elif py_opts['read_tier'] == 'cache': @@ -2193,12 +2042,6 @@ cdef class DB(object): return str.encode(str(iterate_bound)) else: return None - if py_opts['iterate_lower_bound'] is not None: - # Calling this new without corresponding delete causes a memory leak. - # TODO: Figure out where the object should be destroyed without causing segfaults - opts.iterate_lower_bound = bytes_to_new_slice(make_bytes(py_opts['iterate_lower_bound'])) - if py_opts['iterate_upper_bound'] is not None: - opts.iterate_upper_bound = bytes_to_new_slice(make_bytes(py_opts['iterate_upper_bound'])) return opts @@ -2419,106 +2262,3 @@ cdef class ReversedIterator(object): self.it.ptr.Prev() check_status(self.it.ptr.status()) return ret - -cdef class BackupEngine(object): - cdef backup.BackupEngine* engine - - def __cinit__(self, backup_dir): - cdef Status st - cdef string c_backup_dir - self.engine = NULL - - c_backup_dir = path_to_string(backup_dir) - st = backup.BackupEngine_Open( - env.Env_Default(), - backup.BackupableDBOptions(c_backup_dir), - cython.address(self.engine)) - - check_status(st) - - def __dealloc__(self): - if not self.engine == NULL: - with nogil: - del self.engine - - def create_backup(self, DB db, flush_before_backup=False): - cdef Status st - cdef cpp_bool c_flush_before_backup - - c_flush_before_backup = flush_before_backup - - with nogil: - st = self.engine.CreateNewBackup(db.db, c_flush_before_backup) - check_status(st) - - def restore_backup(self, backup_id, db_dir, wal_dir): - cdef Status st - cdef backup.BackupID c_backup_id - cdef string c_db_dir - cdef string c_wal_dir - - c_backup_id = backup_id - c_db_dir = path_to_string(db_dir) - c_wal_dir = path_to_string(wal_dir) - - with nogil: - st = self.engine.RestoreDBFromBackup( - c_backup_id, - c_db_dir, - c_wal_dir) - - check_status(st) - - def restore_latest_backup(self, db_dir, wal_dir): - cdef Status st - cdef string c_db_dir - cdef string c_wal_dir - - c_db_dir = path_to_string(db_dir) - c_wal_dir = path_to_string(wal_dir) - - with nogil: - st = self.engine.RestoreDBFromLatestBackup(c_db_dir, c_wal_dir) - - check_status(st) - - def stop_backup(self): - with nogil: - self.engine.StopBackup() - - def purge_old_backups(self, num_backups_to_keep): - cdef Status st - cdef uint32_t c_num_backups_to_keep - - c_num_backups_to_keep = num_backups_to_keep - - with nogil: - st = self.engine.PurgeOldBackups(c_num_backups_to_keep) - check_status(st) - - def delete_backup(self, backup_id): - cdef Status st - cdef backup.BackupID c_backup_id - - c_backup_id = backup_id - - with nogil: - st = self.engine.DeleteBackup(c_backup_id) - - check_status(st) - - def get_backup_info(self): - cdef vector[backup.BackupInfo] backup_info - - with nogil: - self.engine.GetBackupInfo(cython.address(backup_info)) - - ret = [] - for ob in backup_info: - t = {} - t['backup_id'] = ob.backup_id - t['timestamp'] = ob.timestamp - t['size'] = ob.size - ret.append(t) - - return ret diff --git a/rocksdb/backup.pxd b/rocksdb/backup.pxd deleted file mode 100644 index 9efbb58b398b98d824b748154f95688299c99dde..0000000000000000000000000000000000000000 --- a/rocksdb/backup.pxd +++ /dev/null @@ -1,38 +0,0 @@ -from libcpp cimport bool as cpp_bool -from libcpp.string cimport string -from libcpp.vector cimport vector -from libc.stdint cimport uint32_t -from libc.stdint cimport int64_t -from libc.stdint cimport uint64_t - -from .status cimport Status -from .db cimport DB -from .env cimport Env - -# TODO: For rocksdb >= 6.21.0, change to `rocksdb/utilities/backup_engine.h`. -cdef extern from "rocksdb/utilities/backupable_db.h" namespace "rocksdb": - ctypedef uint32_t BackupID - - # TODO: For rocksdb >= 6.21.0, rename to `BackupEngineOptions`. - cdef cppclass BackupableDBOptions: - BackupableDBOptions(const string& backup_dir) - - cdef struct BackupInfo: - BackupID backup_id - int64_t timestamp - uint64_t size - - cdef cppclass BackupEngine: - Status CreateNewBackup(DB*, cpp_bool) nogil except+ - Status PurgeOldBackups(uint32_t) nogil except+ - Status DeleteBackup(BackupID) nogil except+ - void StopBackup() nogil except+ - void GetBackupInfo(vector[BackupInfo]*) nogil except+ - Status RestoreDBFromBackup(BackupID, string&, string&) nogil except+ - Status RestoreDBFromLatestBackup(string&, string&) nogil except+ - - # TODO: For rocksdb >= 6.21.0, swap order of first two parameters. - cdef Status BackupEngine_Open "rocksdb::BackupEngine::Open"( - Env*, - BackupableDBOptions&, - BackupEngine**) diff --git a/rocksdb/cpp/filter_policy_wrapper.hpp b/rocksdb/cpp/filter_policy_wrapper.hpp deleted file mode 100644 index f6e8bef32fd19a07f3b2f1ba584257af96247c1c..0000000000000000000000000000000000000000 --- a/rocksdb/cpp/filter_policy_wrapper.hpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "rocksdb/filter_policy.h" -#include "rocksdb/env.h" -#include <stdexcept> - -using std::string; -using rocksdb::FilterPolicy; -using rocksdb::Slice; -using rocksdb::Logger; - -namespace py_rocks { - class FilterPolicyWrapper: public FilterPolicy { - public: - typedef void (*create_filter_func)( - void* ctx, - Logger*, - string&, - const Slice* keys, - int n, - string* dst); - - typedef bool (*key_may_match_func)( - void* ctx, - Logger*, - string&, - const Slice& key, - const Slice& filter); - - FilterPolicyWrapper( - string name, - void* ctx, - create_filter_func create_filter_callback, - key_may_match_func key_may_match_callback): - name(name), - ctx(ctx), - create_filter_callback(create_filter_callback), - key_may_match_callback(key_may_match_callback) - {} - - virtual void - CreateFilter(const Slice* keys, int n, std::string* dst) const { - string error_msg; - - this->create_filter_callback( - this->ctx, - this->info_log.get(), - error_msg, - keys, - n, - dst); - - if (error_msg.size()) { - throw std::runtime_error(error_msg.c_str()); - } - } - - virtual bool - KeyMayMatch(const Slice& key, const Slice& filter) const { - string error_msg; - bool val; - - val = this->key_may_match_callback( - this->ctx, - this->info_log.get(), - error_msg, - key, - filter); - - if (error_msg.size()) { - throw std::runtime_error(error_msg.c_str()); - } - return val; - } - - virtual const char* Name() const { - return this->name.c_str(); - } - - void set_info_log(std::shared_ptr<Logger> info_log) { - this->info_log = info_log; - } - - private: - string name; - void* ctx; - create_filter_func create_filter_callback; - key_may_match_func key_may_match_callback; - std::shared_ptr<Logger> info_log; - }; -} diff --git a/rocksdb/filter_policy.pxd b/rocksdb/filter_policy.pxd deleted file mode 100644 index ca62697cc17f2443b5b0e078c4fe6b8e06fbd92a..0000000000000000000000000000000000000000 --- a/rocksdb/filter_policy.pxd +++ /dev/null @@ -1,39 +0,0 @@ -from libcpp cimport bool as cpp_bool -from libcpp.string cimport string -from libc.string cimport const_char -from .slice_ cimport Slice -from .std_memory cimport shared_ptr -from .logger cimport Logger - -cdef extern from "rocksdb/filter_policy.h" namespace "rocksdb": - cdef cppclass FilterPolicy: - void CreateFilter(const Slice*, int, string*) nogil except+ - cpp_bool KeyMayMatch(const Slice&, const Slice&) nogil except+ - const_char* Name() nogil except+ - - cdef extern const FilterPolicy* NewBloomFilterPolicy(int) nogil except+ - -ctypedef void (*create_filter_func)( - void*, - Logger*, - string&, - const Slice*, - int, - string*) - -ctypedef cpp_bool (*key_may_match_func)( - void*, - Logger*, - string&, - const Slice&, - const Slice&) - -cdef extern from "cpp/filter_policy_wrapper.hpp" namespace "py_rocks": - cdef cppclass FilterPolicyWrapper: - FilterPolicyWrapper( - string, - void*, - create_filter_func, - key_may_match_func) nogil except+ - - void set_info_log(shared_ptr[Logger]) nogil except+ diff --git a/rocksdb/table_factory.pxd b/rocksdb/table_factory.pxd index 8b3622ec0d5f7c6c7b9c32005e5d305b644203ff..324f3bcf0aa0400e2bb1907f376af5d16c519410 100644 --- a/rocksdb/table_factory.pxd +++ b/rocksdb/table_factory.pxd @@ -3,7 +3,6 @@ from libcpp cimport bool as cpp_bool from .std_memory cimport shared_ptr from .cache cimport Cache -from .filter_policy cimport FilterPolicy cdef extern from "rocksdb/table.h" namespace "rocksdb": cdef cppclass TableFactory: @@ -29,7 +28,6 @@ cdef extern from "rocksdb/table.h" namespace "rocksdb": cpp_bool whole_key_filtering shared_ptr[Cache] block_cache shared_ptr[Cache] block_cache_compressed - shared_ptr[FilterPolicy] filter_policy cpp_bool enable_index_compression cpp_bool cache_index_and_filter_blocks int format_version diff --git a/rocksdb/tests/test_options.py b/rocksdb/tests/test_options.py index 337195f34e170cdb8e81ac822f29555ed8aa0ac4..87049131b105872c17b2ea094d1c048dc306f8ce 100644 --- a/rocksdb/tests/test_options.py +++ b/rocksdb/tests/test_options.py @@ -109,7 +109,6 @@ class TestOptions(unittest.TestCase): def test_block_options(self): rocksdb.BlockBasedTableFactory( block_size=4096, - filter_policy=TestFilterPolicy(), block_cache=rocksdb.LRUCache(100)) def test_unicode_path(self): diff --git a/setup.cfg b/setup.cfg index 3d5afa77a43349f66ad367edf86f3ec66d9b8f40..333305acb2042be14a56b5f8a5f431d2529cd7cf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = rocksdb -version = 0.8.0rc3 +version = 0.9.0 description = Python bindings for RocksDB long_description = file: README.rst long_description_content_type = text/x-rst