Is RTree index thread safe?
Hi I'm trying to use the duckdb spatial extension in a multi-threaded-single-process pattern, with duckdb-rs rust client. AFAIU, duckdb allows inner process concurrency as long as there are no write conflicts. My flow is appending new records (with new primary key) to a table that is indexed by RTree.
I have a single connection which I share between the threads
pub struct Database {
db: Mutex<Connection>,
}
impl Database {
.....
pub fn insert(....) { // this function is executed concurrently
let connection = {
let g = self.db.lock()?; // lock only for cloning connections in threads
g.try_clone()?
};
let sql = "INSERT INTO ....."; // This is where we append to the table and the index should be updated.
let exec_res = connection.execute(
&sql,
params![....]);
// Handle exec_res Ok or Err
}
From time to time I'm getting segmentation faults with the following stack trace:
#0 in void std::__introsort_loop<duckdb::RTreeEntry*, long, __gnu_cxx::__ops::_Iter_comp_iter<duckdb::RTreeNode::SortEntriesByRowId()::{lambda(duckdb::RTreeEntry const&, duckdb::RTreeEntry const&)#1}> >(duckdb::RTreeEntry*, duckdb::RTreeEntry*, long, __gnu_cxx::__ops::_Iter_comp_iter<duckdb::RTreeNode::SortEntriesByRowId()::{lambda(duckdb::RTreeEntry const&, duckdb::RTreeEntry const&)#1}>) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#1 in duckdb::RTree::SplitNode(duckdb::RTreeEntry&) const () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#2 in duckdb::RTree::BranchInsert(duckdb::RTreeEntry&, duckdb::RTreeEntry const&) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#3 in duckdb::RTree::BranchInsert(duckdb::RTreeEntry&, duckdb::RTreeEntry const&) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#4 in duckdb::RTree::RootInsert(duckdb::RTreeEntry&, duckdb::RTreeEntry const&) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#5 in duckdb::RTreeIndex::Insert(duckdb::IndexLock&, duckdb::DataChunk&, duckdb::Vector&) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#6 in duckdb::RTreeIndex::Append(duckdb::IndexLock&, duckdb::DataChunk&, duckdb::Vector&) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#7 in duckdb::BoundIndex::Append(duckdb::IndexLock&, duckdb::DataChunk&, duckdb::Vector&, duckdb::IndexAppendInfo&) () from /opt/duckdb/extensions/v1.4.0/linux_amd64/spatial.duckdb_extension
#8 in duckdb::BoundIndex::Append(duckdb::DataChunk&, duckdb::Vector&, duckdb::IndexAppendInfo&) ()
#9 in duckdb::DataTable::AppendToIndexes(duckdb::TableIndexList&, duckdb::optional_ptr<duckdb::TableIndexList, true>, duckdb::DataChunk&, duckdb::DataChunk&, duckdb::vector<duckdb::StorageIndex, true> const&, long, duckdb::IndexAppendMode)
#10 in duckdb::DataTable::AppendToIndexes(duckdb::optional_ptr<duckdb::TableIndexList, true>, duckdb::DataChunk&, duckdb::DataChunk&, duckdb::vector<duckdb::StorageIndex, true> const&, long, duckdb::IndexAppendMode) ()
#11 in std::_Function_handler<bool (duckdb::DataChunk&), duckdb::LocalTableStorage::AppendToIndexes(duckdb::DuckTransaction&, duckdb::TableAppendState&, bool)::{lambda(duckdb::DataChunk&)#1}>::_M_invoke(std::_Any_data const&, duckdb::DataChunk&) ()
#12 in duckdb::RowGroupCollection::Scan(duckdb::DuckTransaction&, duckdb::vector<duckdb::StorageIndex, true> const&, std::function<bool (duckdb::DataChunk&)> const&) ()
#13 in duckdb::RowGroupCollection::Scan(duckdb::DuckTransaction&, std::function<bool (duckdb::DataChunk&)> const&) ()
#14 in duckdb::LocalTableStorage::AppendToIndexes(duckdb::DuckTransaction&, duckdb::TableAppendState&, bool) ()
#15 in duckdb::LocalStorage::Flush(duckdb::DataTable&, duckdb::LocalTableStorage&, duckdb::optional_ptr<duckdb::StorageCommitState, true>)
#16 in duckdb::LocalStorage::Commit(duckdb::optional_ptr<duckdb::StorageCommitState, true>) ()
#17 in duckdb::DuckTransaction::WriteToWAL(duckdb::AttachedDatabase&, duckdb::unique_ptr<duckdb::StorageCommitState, std::default_delete<duckdb::StorageCommitState>, true>&) ()
#18 in duckdb::DuckTransactionManager::CommitTransaction(duckdb::ClientContext&, duckdb::Transaction&) ()
#19 in duckdb::MetaTransaction::Commit() ()
#20 in duckdb::TransactionContext::Commit() ()
#21 in duckdb::ClientContext::EndQueryInternal(duckdb::ClientContextLock&, bool, bool, duckdb::optional_ptr<duckdb::ErrorData, true>) ()
#22 in duckdb::ClientContext::CleanupInternal(duckdb::ClientContextLock&, duckdb::BaseQueryResult*, bool) ()
#23 in duckdb::ClientContext::FetchResultInternal(duckdb::ClientContextLock&, duckdb::PendingQueryResult&) ()
#24 in duckdb::PendingQueryResult::ExecuteInternal(duckdb::ClientContextLock&) ()
#25 in duckdb::PendingQueryResult::Execute() ()
#26 in duckdb::PreparedStatement::Execute(std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, duckdb::BoundParameterData, duckdb::CaseInsensitiveStringHashFunction, duckdb::CaseInsensitiveStringEquality, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, duckdb::BoundParameterData> > >&, bool) ()
... my app stack trace here
I want to verify that we are not misusing/abusing duckdb, and if so, is RTree index supporting this kind of access pattern? Thanks
Hello!
The R-tree index should be thread safe. Its hard to judge from the stack trace, but I recently fixed another issue in the r-tree that triggered after lots of inserts and updates in https://github.com/duckdb/duckdb-spatial/issues/681 that I suspect may be the same one you're seeing. You could try to FORCE INSTALL spatial FROM core_nightly to pull the latest CI build and test with that, otherwise DuckDB v1.4.2 is scheduled for next week which should contain the fix as well.