mongo-rust-driver
mongo-rust-driver copied to clipboard
Method list_collections provides cursor which fails to run deserialize_current method on some data.
Environment
Rust: rustc 1.82.0 (f6e511eec 2024-10-15)
OS: Linux 31004c8e 6.10.6+bpo-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.10.6-1~bpo12+1 (2024-08-26) x86_64 GNU/Linux
mongodb rust package: 3.2.3
bson rust package: 2.14.0
mongodb server: 4.4.18
mongodb topology: localhost replica set with one member
The bug
I expect Database::list_collections() method provide cursor which can do .deserialize_current() for every collection in every database.
In my environment it fails on local.oplog.rs collection.
In the same time the list_collections() method from pymongo python packages works fine with same mongodb server i.e. it can iterates over basic data of collections including "local.oplog.rs" collection.
Code which fails
use core::time::Duration;
use mongodb::{
bson::doc,
options::{ClientOptions, ServerAddress},
sync::Client,
};
fn main() {
let client = Client::with_options(
ClientOptions::builder()
.hosts(vec![ServerAddress::parse("localhost").unwrap()])
.build(),
)
.unwrap();
let mut cursor = client
.database("local")
.list_collections()
.filter(doc! {"name": "oplog.rs"})
.run()
.unwrap();
while cursor.advance().unwrap() {
match cursor.deserialize_current() {
Err(error) => {
println!("Raw document:");
println!("{:?}", cursor.current());
cursor.deserialize_current().unwrap();
}
_ => (),
};
}
}
Output
Raw document:
RawDocument { data: "91000000026e616d6500090000006f706c6f672e7273000274797065000b000000636f6c6c656374696f6e00036f7074696f6e73002a0000000863617070656400010173697a6500000000802392f341086175746f496e646578496400000003696e666f002b00000008726561644f6e6c79000005757569640010000000047f25e931b53642cca6a47030796979a20000" }
thread 'main' panicked at src/bin/test.rs:26:46:
called `Result::unwrap()` on an `Err` value: Error { kind: BsonDeserialization(DeserializationError { message: "invalid type: floating point `5253511168.0`, expected u64" }), labels: {}, wire_version: None, source: None }
stack backtrace:
0: rust_begin_unwind
at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/std/src/panicking.rs:662:5
1: core::panicking::panic_fmt
at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/panicking.rs:74:14
2: core::result::unwrap_failed
at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/result.rs:1677:5
3: core::result::Result<T,E>::unwrap
at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/result.rs:1102:23
4: test::main
at ./src/bin/test.rs:26:17
5: core::ops::function::FnOnce::call_once
at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
Output of mongodb shell collstats command
> db.runCommand({"collstats": "oplog.rs"})
{
"ns" : "local.oplog.rs",
"size" : 5222406025,
"count" : 11329019,
"avgObjSize" : 460,
"storageSize" : 1785073664,
"freeStorageSize" : 12488704,
"capped" : true,
"max" : -1,
"maxSize" : NumberLong("5253511168"),
"sleepCount" : 0,
"sleepMS" : 0,
... SOME OUTPUT IS TRUNCATED
"nindexes" : 0,
"indexDetails" : {
},
"indexBuilds" : [ ],
"totalIndexSize" : 0,
"totalSize" : 1785073664,
"indexSizes" : {
},
"scaleFactor" : 1,
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1742557269, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1742557269, 1)
}
Hi @lorien, thank you for this detailed report! I converted the raw document data from your output into the Document type and got the following:
Document({
"name": String(
"oplog.rs",
),
"type": String(
"collection",
),
"options": Document({
"capped": Boolean(
true,
),
"size": Double(
5253511168.0,
),
"autoIndexId": Boolean(
false,
),
}),
"info": Document({
"readOnly": Boolean(
false,
),
"uuid": Binary {
subtype: Uuid,
bytes: [
127,
37,
233,
49,
181,
54,
66,
204,
166,
164,
112,
48,
121,
105,
121,
162,
],
},
}),
})
The culprit here is size: the number returned is a double, but the driver attempts to deserialize this field as the size field on CreateCollectionOptions, which expects an integer. I wasn't able to reproduce this locally, so it seems that the server may return this field as different number types depending upon the value. We can handle this in the driver by adding logic to deserialize from both integers and doubles for the size field. I will discuss this with the team next week.
If you don't need the CollectionSpecification type specifically, a workaround in the meantime would be to convert the bytes into the Document type for a readable version of the output:
let mut collections = client
.database("local")
.list_collections()
.filter(doc! { "name": "oplog.rs" })
.run()?;
while collections.advance()? {
let bytes = collections.current().as_bytes();
let collection_document = Document::from_reader(bytes)?;
println!("{}", collection_document);
}
tracked by RUST-2184