mongo-rust-driver icon indicating copy to clipboard operation
mongo-rust-driver copied to clipboard

Method list_collections provides cursor which fails to run deserialize_current method on some data.

Open lorien opened this issue 8 months ago • 2 comments

Environment

Rust: rustc 1.82.0 (f6e511eec 2024-10-15) OS: Linux 31004c8e 6.10.6+bpo-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.10.6-1~bpo12+1 (2024-08-26) x86_64 GNU/Linux mongodb rust package: 3.2.3 bson rust package: 2.14.0 mongodb server: 4.4.18 mongodb topology: localhost replica set with one member

The bug

I expect Database::list_collections() method provide cursor which can do .deserialize_current() for every collection in every database. In my environment it fails on local.oplog.rs collection. In the same time the list_collections() method from pymongo python packages works fine with same mongodb server i.e. it can iterates over basic data of collections including "local.oplog.rs" collection.

Code which fails

use core::time::Duration;
use mongodb::{
    bson::doc,
    options::{ClientOptions, ServerAddress},
    sync::Client,
};

fn main() {
    let client = Client::with_options(
        ClientOptions::builder()
            .hosts(vec![ServerAddress::parse("localhost").unwrap()])
            .build(),
    )
    .unwrap();
    let mut cursor = client
        .database("local")
        .list_collections()
        .filter(doc! {"name": "oplog.rs"})
        .run()
        .unwrap();
    while cursor.advance().unwrap() {
        match cursor.deserialize_current() {
            Err(error) => {
                println!("Raw document:");
                println!("{:?}", cursor.current());
                cursor.deserialize_current().unwrap();
            }
            _ => (),
        };
    }
}

Output

Raw document:
RawDocument { data: "91000000026e616d6500090000006f706c6f672e7273000274797065000b000000636f6c6c656374696f6e00036f7074696f6e73002a0000000863617070656400010173697a6500000000802392f341086175746f496e646578496400000003696e666f002b00000008726561644f6e6c79000005757569640010000000047f25e931b53642cca6a47030796979a20000" }
thread 'main' panicked at src/bin/test.rs:26:46:
called `Result::unwrap()` on an `Err` value: Error { kind: BsonDeserialization(DeserializationError { message: "invalid type: floating point `5253511168.0`, expected u64" }), labels: {}, wire_version: None, source: None }
stack backtrace:
   0: rust_begin_unwind
             at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/std/src/panicking.rs:662:5
   1: core::panicking::panic_fmt
             at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/panicking.rs:74:14
   2: core::result::unwrap_failed
             at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/result.rs:1677:5
   3: core::result::Result<T,E>::unwrap
             at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/result.rs:1102:23
   4: test::main
             at ./src/bin/test.rs:26:17
   5: core::ops::function::FnOnce::call_once
             at /rustc/f6e511eec7342f59a25f7c0534f1dbea00d01b14/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

Output of mongodb shell collstats command

> db.runCommand({"collstats": "oplog.rs"})
{
        "ns" : "local.oplog.rs",
        "size" : 5222406025,
        "count" : 11329019,
        "avgObjSize" : 460,
        "storageSize" : 1785073664,
        "freeStorageSize" : 12488704,
        "capped" : true,
        "max" : -1,
        "maxSize" : NumberLong("5253511168"),
        "sleepCount" : 0,
        "sleepMS" : 0,

        ... SOME OUTPUT IS TRUNCATED

        "nindexes" : 0,
        "indexDetails" : {

        },
        "indexBuilds" : [ ],
        "totalIndexSize" : 0,
        "totalSize" : 1785073664,
        "indexSizes" : {

        },
        "scaleFactor" : 1,
        "ok" : 1,
        "$clusterTime" : {
                "clusterTime" : Timestamp(1742557269, 1),
                "signature" : {
                        "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
                        "keyId" : NumberLong(0)
                }
        },
        "operationTime" : Timestamp(1742557269, 1)
}

lorien avatar Mar 21 '25 11:03 lorien

Hi @lorien, thank you for this detailed report! I converted the raw document data from your output into the Document type and got the following:

Document({
    "name": String(
        "oplog.rs",
    ),
    "type": String(
        "collection",
    ),
    "options": Document({
        "capped": Boolean(
            true,
        ),
        "size": Double(
            5253511168.0,
        ),
        "autoIndexId": Boolean(
            false,
        ),
    }),
    "info": Document({
        "readOnly": Boolean(
            false,
        ),
        "uuid": Binary {
            subtype: Uuid,
            bytes: [
                127,
                37,
                233,
                49,
                181,
                54,
                66,
                204,
                166,
                164,
                112,
                48,
                121,
                105,
                121,
                162,
            ],
        },
    }),
})

The culprit here is size: the number returned is a double, but the driver attempts to deserialize this field as the size field on CreateCollectionOptions, which expects an integer. I wasn't able to reproduce this locally, so it seems that the server may return this field as different number types depending upon the value. We can handle this in the driver by adding logic to deserialize from both integers and doubles for the size field. I will discuss this with the team next week.

If you don't need the CollectionSpecification type specifically, a workaround in the meantime would be to convert the bytes into the Document type for a readable version of the output:

let mut collections = client
    .database("local")
    .list_collections()
    .filter(doc! { "name": "oplog.rs" })
    .run()?;
while collections.advance()? {
    let bytes = collections.current().as_bytes();
    let collection_document = Document::from_reader(bytes)?;
    println!("{}", collection_document);
}

isabelatkinson avatar Mar 21 '25 15:03 isabelatkinson

tracked by RUST-2184

isabelatkinson avatar Mar 25 '25 16:03 isabelatkinson