diff --git a/README.rst b/README.rst index 4030f596800aa87c1b35f89611a41401bcde4ffe..1174f672839751d7bbea99dbab4ab9042eb80c63 100644 --- a/README.rst +++ b/README.rst @@ -2,15 +2,9 @@ Note ========= The original pyrocksdb (https://pypi.python.org/pypi/pyrocksdb/0.4) has not been updated for long time. I update pyrocksdb to support the latest rocksdb. Please open issues in github if you have any problem. -News (2019/04/18) +News (2020/09/03 iFA) ========= -Currently I am refactoring the code, and more features like TTL are coming soon. And the installation with cmake will be much more easily. - -News (2019/04/19) -========= -I have created a branch named `pybind11` which provides the basic functions (`put`, `get` and `delete`) now. And the installtion is much more easily! you can try it if you encounter any installtion issues in the current version of `python-rocksdb`. - -The branch is under development and will be released to PypI after I migrate most of the existing features. +Python version which lower than 3.0 is not supported anymore. pyrocksdb ========= diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx index edd856ab1aebd809a21ce1c5d10c2356b1b57ab8..910eb1d23b309f4a715134126a926a238d476d45 100644 --- a/rocksdb/_rocksdb.pyx +++ b/rocksdb/_rocksdb.pyx @@ -1,3 +1,4 @@ +#cython: language_level=3 import cython from libcpp.string cimport string from libcpp.deque cimport deque @@ -12,44 +13,52 @@ from cpython.bytes cimport PyBytes_FromString from cpython.bytes cimport PyBytes_FromStringAndSize from cpython.unicode cimport PyUnicode_Decode -from std_memory cimport shared_ptr -cimport options -cimport merge_operator -cimport filter_policy -cimport comparator -cimport slice_transform -cimport cache -cimport logger -cimport snapshot -cimport db -cimport iterator -cimport backup -cimport env -cimport table_factory -cimport memtablerep -cimport universal_compaction +from .std_memory cimport shared_ptr +from . cimport options +from . cimport merge_operator +from . cimport filter_policy +from . cimport comparator +from . cimport slice_transform +from . cimport cache +from . cimport logger +from . cimport snapshot +from . cimport db +from . cimport iterator +from . cimport backup +from . cimport env +from . cimport table_factory +from . cimport memtablerep +from . cimport universal_compaction # Enums are the only exception for direct imports # Their name als already unique enough -from universal_compaction cimport kCompactionStopStyleSimilarSize -from universal_compaction cimport kCompactionStopStyleTotalSize +from .universal_compaction cimport kCompactionStopStyleSimilarSize +from .universal_compaction cimport kCompactionStopStyleTotalSize -from options cimport kCompactionStyleLevel -from options cimport kCompactionStyleUniversal -from options cimport kCompactionStyleFIFO -from options cimport kCompactionStyleNone +from .options cimport kCompactionStyleLevel +from .options cimport kCompactionStyleUniversal +from .options cimport kCompactionStyleFIFO +from .options cimport kCompactionStyleNone -from slice_ cimport Slice -from status cimport Status +from .slice_ cimport Slice +from .status cimport Status import sys -from interfaces import MergeOperator as IMergeOperator -from interfaces import AssociativeMergeOperator as IAssociativeMergeOperator -from interfaces import FilterPolicy as IFilterPolicy -from interfaces import Comparator as IComparator -from interfaces import SliceTransform as ISliceTransform +from .interfaces import MergeOperator as IMergeOperator +from .interfaces import AssociativeMergeOperator as IAssociativeMergeOperator +from .interfaces import FilterPolicy as IFilterPolicy +from .interfaces import Comparator as IComparator +from .interfaces import SliceTransform as ISliceTransform + import traceback -import errors +from .errors import NotFound +from .errors import Corruption +from .errors import NotSupported +from .errors import InvalidArgument +from .errors import RocksIOError +from .errors import MergeInProgress +from .errors import Incomplete + import weakref ctypedef const filter_policy.FilterPolicy ConstFilterPolicy @@ -70,25 +79,25 @@ cdef check_status(const Status& st): return if st.IsNotFound(): - raise errors.NotFound(st.ToString()) + raise NotFound(st.ToString()) if st.IsCorruption(): - raise errors.Corruption(st.ToString()) + raise Corruption(st.ToString()) if st.IsNotSupported(): - raise errors.NotSupported(st.ToString()) + raise NotSupported(st.ToString()) if st.IsInvalidArgument(): - raise errors.InvalidArgument(st.ToString()) + raise InvalidArgument(st.ToString()) if st.IsIOError(): - raise errors.RocksIOError(st.ToString()) + raise RocksIOError(st.ToString()) if st.IsMergeInProgress(): - raise errors.MergeInProgress(st.ToString()) + raise MergeInProgress(st.ToString()) if st.IsIncomplete(): - raise errors.Incomplete(st.ToString()) + raise Incomplete(st.ToString()) raise Exception("Unknown error: %s" % st.ToString()) ###################################################### @@ -580,7 +589,11 @@ cdef class BlockBasedTableFactory(PyTableFactory): block_size=None, block_size_deviation=None, block_restart_interval=None, - whole_key_filtering=None): + whole_key_filtering=None, + enable_index_compression=False, + cache_index_and_filter_blocks=False, + format_version=2, + ): cdef table_factory.BlockBasedTableOptions table_options @@ -596,6 +609,11 @@ cdef class BlockBasedTableFactory(PyTableFactory): else: table_options.hash_index_allow_collision = False + if enable_index_compression: + table_options.enable_index_compression = True + else: + table_options.enable_index_compression = False + if checksum == 'crc32': table_options.checksum = table_factory.kCRC32c elif checksum == 'xxhash': @@ -624,12 +642,21 @@ cdef class BlockBasedTableFactory(PyTableFactory): else: table_options.whole_key_filtering = False + if cache_index_and_filter_blocks is not None: + if cache_index_and_filter_blocks: + table_options.cache_index_and_filter_blocks = True + else: + table_options.cache_index_and_filter_blocks = False + if block_cache is not None: table_options.block_cache = block_cache.get_cache() if block_cache_compressed is not None: table_options.block_cache_compressed = block_cache_compressed.get_cache() + if format_version is not None: + table_options.format_version = format_version + # Set the filter_policy self.py_filter_policy = None if filter_policy is not None: @@ -1234,6 +1261,17 @@ cdef class ColumnFamilyOptions(object): self.py_prefix_extractor = PySliceTransform(value) self.copts.prefix_extractor = self.py_prefix_extractor.get_transformer() + property optimize_filters_for_hits: + def __get__(self): + return self.copts.optimize_filters_for_hits + def __set__(self, value): + self.copts.optimize_filters_for_hits = value + + property paranoid_file_checks: + def __get__(self): + return self.copts.paranoid_file_checks + def __set__(self, value): + self.copts.paranoid_file_checks = value cdef class Options(ColumnFamilyOptions): cdef options.Options* opts @@ -1258,12 +1296,21 @@ cdef class Options(ColumnFamilyOptions): for key, value in kwargs.items(): setattr(self, key, value) + def IncreaseParallelism(self, int total_threads=16): + self.opts.IncreaseParallelism(total_threads) + property create_if_missing: def __get__(self): return self.opts.create_if_missing def __set__(self, value): self.opts.create_if_missing = value + property create_missing_column_families: + def __get__(self): + return self.opts.create_missing_column_families + def __set__(self, value): + self.opts.create_missing_column_families = value + property error_if_exists: def __get__(self): return self.opts.error_if_exists @@ -1312,6 +1359,18 @@ cdef class Options(ColumnFamilyOptions): def __set__(self, value): self.opts.max_background_compactions = value + property stats_history_buffer_size: + def __get__(self): + return self.opts.stats_history_buffer_size + def __set__(self, value): + self.opts.stats_history_buffer_size = value + + property max_background_jobs: + def __get__(self): + return self.opts.max_background_jobs + def __set__(self, value): + self.opts.max_background_jobs = value + property max_background_flushes: def __get__(self): return self.opts.max_background_flushes @@ -1672,23 +1731,30 @@ cdef class DB(object): def __dealloc__(self): self.close() - - def close(self): + + def close(self, safe=True): cdef ColumnFamilyOptions copts - if not self.db == NULL: + cdef cpp_bool c_safe = safe + if hasattr(self, "db"): + # We need stop backround compactions + with nogil: + db.CancelAllBackgroundWork(self.db, c_safe) # We have to make sure we delete the handles so rocksdb doesn't # assert when we delete the db - self.cf_handles.clear() + del self.cf_handles[:] for copts in self.cf_options: if copts: copts.in_use = False - self.cf_options.clear() + del self.cf_options[:] with nogil: + st = self.db.Close() del self.db - if self.opts is not None: - self.opts.in_use = False + if self.opts is not None: + self.opts.in_use = False + + check_status(st) @property def column_families(self): @@ -1799,6 +1865,9 @@ cdef class DB(object): check_status(st) def multi_get(self, keys, *args, **kwargs): + # Remove duplicate keys + keys = list(dict.fromkeys(keys)) + cdef vector[string] values values.resize(len(keys)) @@ -2037,6 +2106,21 @@ cdef class DB(object): return ret + def get_column_family_meta_data(self, ColumnFamilyHandle column_family=None): + cdef db.ColumnFamilyMetaData metadata + + cdef db.ColumnFamilyHandle* cf_handle = self.db.DefaultColumnFamily() + if column_family: + cf_handle = (<ColumnFamilyHandle?>column_family).get_handle() + + with nogil: + self.db.GetColumnFamilyMetaData(cf_handle, cython.address(metadata)) + + return { + "size":metadata.size, + "file_count":metadata.file_count, + } + def compact_range(self, begin=None, end=None, ColumnFamilyHandle column_family=None, **py_options): cdef options.CompactRangeOptions c_options diff --git a/rocksdb/backup.pxd b/rocksdb/backup.pxd index 04b3091990abc3986e119a4b2bb0bad7328fba02..1271c730a61152060074037ad5c92ea4bef0f0c0 100644 --- a/rocksdb/backup.pxd +++ b/rocksdb/backup.pxd @@ -5,9 +5,9 @@ from libc.stdint cimport uint32_t from libc.stdint cimport int64_t from libc.stdint cimport uint64_t -from status cimport Status -from db cimport DB -from env cimport Env +from .status cimport Status +from .db cimport DB +from .env cimport Env cdef extern from "rocksdb/utilities/backupable_db.h" namespace "rocksdb": ctypedef uint32_t BackupID diff --git a/rocksdb/cache.pxd b/rocksdb/cache.pxd index 740101b4b47ea7c27697ec13b3badb90623914cc..b9a5c78ae569882fcb3f4529caa59bc83368831f 100644 --- a/rocksdb/cache.pxd +++ b/rocksdb/cache.pxd @@ -1,4 +1,4 @@ -from std_memory cimport shared_ptr +from .std_memory cimport shared_ptr cdef extern from "rocksdb/cache.h" namespace "rocksdb": cdef cppclass Cache: diff --git a/rocksdb/comparator.pxd b/rocksdb/comparator.pxd index c54c26169ede8657dfacc4fe6a3aa2bc878e4a70..f6bb467d170e12783b5e7fc5f38aff76d92a2ba1 100644 --- a/rocksdb/comparator.pxd +++ b/rocksdb/comparator.pxd @@ -1,7 +1,7 @@ from libcpp.string cimport string -from slice_ cimport Slice -from logger cimport Logger -from std_memory cimport shared_ptr +from .slice_ cimport Slice +from .logger cimport Logger +from .std_memory cimport shared_ptr cdef extern from "rocksdb/comparator.h" namespace "rocksdb": cdef cppclass Comparator: diff --git a/rocksdb/db.pxd b/rocksdb/db.pxd index 357850138b9705d4385ad8ec62ac755530e92377..e5c239cd6b97ffa42c1592988b0bc82e83333524 100644 --- a/rocksdb/db.pxd +++ b/rocksdb/db.pxd @@ -1,12 +1,12 @@ -cimport options +from . cimport options from libc.stdint cimport uint64_t, uint32_t -from status cimport Status +from .status cimport Status from libcpp cimport bool as cpp_bool from libcpp.string cimport string from libcpp.vector cimport vector -from slice_ cimport Slice -from snapshot cimport Snapshot -from iterator cimport Iterator +from .slice_ cimport Slice +from .snapshot cimport Snapshot +from .iterator cimport Iterator cdef extern from "rocksdb/write_batch.h" namespace "rocksdb": cdef cppclass WriteBatch: @@ -51,6 +51,28 @@ cdef extern from "rocksdb/db.h" namespace "rocksdb": SequenceNumber smallest_seqno SequenceNumber largest_seqno + # cdef struct SstFileMetaData: + # uint64_t size + # string name + # uint64_t file_number + # string db_path + # string smallestkey + # string largestkey + # SequenceNumber smallest_seqno + # SequenceNumber largest_seqno + + # cdef struct LevelMetaData: + # int level + # uint64_t size + # string largestkey + # LiveFileMetaData files + + cdef struct ColumnFamilyMetaData: + uint64_t size + uint64_t file_count + # string largestkey + # LevelMetaData levels + cdef cppclass Range: Range(const Slice&, const Slice&) @@ -147,6 +169,7 @@ cdef extern from "rocksdb/db.h" namespace "rocksdb": Status Flush(const options.FlushOptions&, ColumnFamilyHandle*) nogil except+ Status DisableFileDeletions() nogil except+ Status EnableFileDeletions() nogil except+ + Status Close() nogil except+ # TODO: Status GetSortedWalFiles(VectorLogPtr& files) # TODO: SequenceNumber GetLatestSequenceNumber() @@ -156,6 +179,7 @@ cdef extern from "rocksdb/db.h" namespace "rocksdb": Status DeleteFile(string) nogil except+ void GetLiveFilesMetaData(vector[LiveFileMetaData]*) nogil except+ + void GetColumnFamilyMetaData(ColumnFamilyHandle*, ColumnFamilyMetaData*) nogil except+ ColumnFamilyHandle* DefaultColumnFamily() @@ -203,3 +227,6 @@ cdef extern from "rocksdb/db.h" namespace "rocksdb": const options.ColumnFamilyOptions&) nogil except+ string name options.ColumnFamilyOptions options + +cdef extern from "rocksdb/convenience.h" namespace "rocksdb": + void CancelAllBackgroundWork(DB*, cpp_bool) nogil except+ diff --git a/rocksdb/filter_policy.pxd b/rocksdb/filter_policy.pxd index 03face53abe5b46862b2c89e964f0ccec9cf95e1..ca62697cc17f2443b5b0e078c4fe6b8e06fbd92a 100644 --- a/rocksdb/filter_policy.pxd +++ b/rocksdb/filter_policy.pxd @@ -1,9 +1,9 @@ from libcpp cimport bool as cpp_bool from libcpp.string cimport string from libc.string cimport const_char -from slice_ cimport Slice -from std_memory cimport shared_ptr -from logger cimport Logger +from .slice_ cimport Slice +from .std_memory cimport shared_ptr +from .logger cimport Logger cdef extern from "rocksdb/filter_policy.h" namespace "rocksdb": cdef cppclass FilterPolicy: diff --git a/rocksdb/iterator.pxd b/rocksdb/iterator.pxd index 5cd34a8930442d1c03712969f3a137c74ee084ee..748e4db0382e2a41a4ef528e2e7bf4ac10051092 100644 --- a/rocksdb/iterator.pxd +++ b/rocksdb/iterator.pxd @@ -1,6 +1,6 @@ from libcpp cimport bool as cpp_bool -from slice_ cimport Slice -from status cimport Status +from .slice_ cimport Slice +from .status cimport Status cdef extern from "rocksdb/iterator.h" namespace "rocksdb": cdef cppclass Iterator: diff --git a/rocksdb/merge_operator.pxd b/rocksdb/merge_operator.pxd index 5db078b863e5964ce73277ced125f463d17fe083..510d5cd7de20f57af5cce488659b7c3df923b656 100644 --- a/rocksdb/merge_operator.pxd +++ b/rocksdb/merge_operator.pxd @@ -1,9 +1,9 @@ from libcpp.string cimport string from libcpp cimport bool as cpp_bool from libcpp.deque cimport deque -from slice_ cimport Slice -from logger cimport Logger -from std_memory cimport shared_ptr +from .slice_ cimport Slice +from .logger cimport Logger +from .std_memory cimport shared_ptr cdef extern from "rocksdb/merge_operator.h" namespace "rocksdb": cdef cppclass MergeOperator: diff --git a/rocksdb/options.pxd b/rocksdb/options.pxd index 894968df30d283b39aaaa3cce0021c12d2e1eb53..1c5a78816d6cd94c965faf23622ff0dc9efe3db2 100644 --- a/rocksdb/options.pxd +++ b/rocksdb/options.pxd @@ -3,17 +3,18 @@ from libcpp.string cimport string from libcpp.vector cimport vector from libc.stdint cimport uint64_t from libc.stdint cimport uint32_t -from std_memory cimport shared_ptr -from comparator cimport Comparator -from merge_operator cimport MergeOperator -from logger cimport Logger -from slice_ cimport Slice -from snapshot cimport Snapshot -from slice_transform cimport SliceTransform -from table_factory cimport TableFactory -from memtablerep cimport MemTableRepFactory -from universal_compaction cimport CompactionOptionsUniversal -from cache cimport Cache +from .std_memory cimport shared_ptr +from .comparator cimport Comparator +from .merge_operator cimport MergeOperator +from .logger cimport Logger +from .slice_ cimport Slice +from .snapshot cimport Snapshot +from .slice_transform cimport SliceTransform +from .table_factory cimport TableFactory +#from .statistics cimport Statistics +from .memtablerep cimport MemTableRepFactory +from .universal_compaction cimport CompactionOptionsUniversal +from .cache cimport Cache cdef extern from "rocksdb/options.h" namespace "rocksdb": cdef cppclass CompressionOptions: @@ -68,7 +69,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": shared_ptr[Logger] info_log int max_open_files int max_file_opening_threads - # TODO: statistics + #shared_ptr[Statistics] statistics cpp_bool use_fsync string db_log_dir string wal_dir @@ -81,6 +82,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": size_t log_file_time_to_roll size_t keep_log_file_num size_t recycle_log_file_num + size_t stats_history_buffer_size uint64_t max_manifest_file_size int table_cache_numshardbits uint64_t WAL_ttl_seconds @@ -102,6 +104,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": cpp_bool allow_concurrent_memtable_write cpp_bool enable_write_thread_adaptive_yield shared_ptr[Cache] row_cache + void IncreaseParallelism(int) nogil except+ cdef cppclass ColumnFamilyOptions: ColumnFamilyOptions() @@ -152,6 +155,8 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": # TODO: remove options source_compaction_factor, max_grandparent_overlap_bytes and expanded_compaction_factor from document uint64_t max_compaction_bytes CompressionOptions compression_opts + cpp_bool optimize_filters_for_hits + cpp_bool paranoid_file_checks cdef cppclass Options(DBOptions, ColumnFamilyOptions): pass diff --git a/rocksdb/slice_transform.pxd b/rocksdb/slice_transform.pxd index 34a61f0f00420e4f46a40ee9831da5713901e872..37d7740ab70a23bfe7aae606efd7f0600d0fc54f 100644 --- a/rocksdb/slice_transform.pxd +++ b/rocksdb/slice_transform.pxd @@ -1,8 +1,8 @@ -from slice_ cimport Slice +from .slice_ cimport Slice from libcpp.string cimport string from libcpp cimport bool as cpp_bool -from logger cimport Logger -from std_memory cimport shared_ptr +from .logger cimport Logger +from .std_memory cimport shared_ptr cdef extern from "rocksdb/slice_transform.h" namespace "rocksdb": cdef cppclass SliceTransform: diff --git a/rocksdb/statistics.pxd b/rocksdb/statistics.pxd new file mode 100644 index 0000000000000000000000000000000000000000..1028c8a72797489f640da9a88fbc2174f875e6fd --- /dev/null +++ b/rocksdb/statistics.pxd @@ -0,0 +1,13 @@ +from libc.stdint cimport uint32_t, uint8_t +from .std_memory cimport shared_ptr + +cdef extern from "rocksdb/statistics.h" namespace "rocksdb": + ctypedef enum StatsLevel: + kExceptHistogramOrTimers + kExceptTimers + kExceptDetailedTimers + kExceptTimeForMutex + kAll + + cdef cppclass Statistics: + void set_stats_level(StatsLevel) nogil except+ diff --git a/rocksdb/table_factory.pxd b/rocksdb/table_factory.pxd index 2359292a0f6f00b76a9c2ca0d53123d9f9db6ce3..8b3622ec0d5f7c6c7b9c32005e5d305b644203ff 100644 --- a/rocksdb/table_factory.pxd +++ b/rocksdb/table_factory.pxd @@ -1,9 +1,9 @@ from libc.stdint cimport uint32_t from libcpp cimport bool as cpp_bool -from std_memory cimport shared_ptr +from .std_memory cimport shared_ptr -from cache cimport Cache -from filter_policy cimport FilterPolicy +from .cache cimport Cache +from .filter_policy cimport FilterPolicy cdef extern from "rocksdb/table.h" namespace "rocksdb": cdef cppclass TableFactory: @@ -30,6 +30,9 @@ cdef extern from "rocksdb/table.h" namespace "rocksdb": shared_ptr[Cache] block_cache shared_ptr[Cache] block_cache_compressed shared_ptr[FilterPolicy] filter_policy + cpp_bool enable_index_compression + cpp_bool cache_index_and_filter_blocks + int format_version cdef TableFactory* NewBlockBasedTableFactory(const BlockBasedTableOptions&) diff --git a/setup.py b/setup.py index 6b4dfc4551377e74589c01a8ed54ff4fc0485c54..283285cddc5179685c01540d05944a3c955c8ca6 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -import platform +import platform, sys from setuptools import setup from setuptools import find_packages from setuptools import Extension @@ -17,6 +17,8 @@ extra_compile_args = [ if platform.system() == 'Darwin': extra_compile_args += ['-mmacosx-version-min=10.7', '-stdlib=libc++'] +if sys.version_info < (3 , 0): + raise Exception("pyRocksDB require Python 3.x") setup( name="python-rocksdb",