tantivy icon indicating copy to clipboard operation
tantivy copied to clipboard

Possible perf regression in Query Parser with deeply nested queries

Open cjrh opened this issue 1 year ago • 1 comments

Overview

When upgrading tantivy from 0.21 to 0.22 (used within tantivy-py), we observed a performance regression in query parsing. Rolling back resolved the issue. The issue occurred with queries that were deeply nested.

Reproducer

It's been difficult to get an exact reproducer, but I think I have a small example that demonstrates the issue:

Tag 0.22.0

 depth  leading+      parse time query                         
    20     false         0.64593 ((((((((((((((((((((title:test))))))))))))))))))))
    21     false         1.29425 (((((((((((((((((((((title:test)))))))))))))))))))))
    22     false         2.64207 ((((((((((((((((((((((title:test))))))))))))))))))))))
    23     false         5.31082 (((((((((((((((((((((((title:test)))))))))))))))))))))))
    24     false        10.62795 ((((((((((((((((((((((((title:test))))))))))))))))))))))))
    25     false        21.20079 (((((((((((((((((((((((((title:test)))))))))))))))))))))))))
 depth  leading+      parse time query                         
    20      true         0.68443 +((((((((((((((((((((title:test))))))))))))))))))))
    21      true         1.35804 +(((((((((((((((((((((title:test)))))))))))))))))))))
    22      true         2.70420 +((((((((((((((((((((((title:test))))))))))))))))))))))
    23      true         5.37000 +(((((((((((((((((((((((title:test)))))))))))))))))))))))
    24      true        10.82749 +((((((((((((((((((((((((title:test))))))))))))))))))))))))
    25      true        21.23502 +(((((((((((((((((((((((((title:test)))))))))))))))))))))))))

Tag 0.21.1

 depth  leading+       parse (s) query                         
    20     false         0.59977 ((((((((((((((((((((title:test))))))))))))))))))))
    21     false         1.20016 (((((((((((((((((((((title:test)))))))))))))))))))))
    22     false         2.39481 ((((((((((((((((((((((title:test))))))))))))))))))))))
    23     false         4.79820 (((((((((((((((((((((((title:test)))))))))))))))))))))))
    24     false         9.53619 ((((((((((((((((((((((((title:test))))))))))))))))))))))))
    25     false        19.15066 (((((((((((((((((((((((((title:test)))))))))))))))))))))))))
 depth  leading+       parse (s) query                         
    20      true         0.30958 +((((((((((((((((((((title:test))))))))))))))))))))
    21      true         0.62179 +(((((((((((((((((((((title:test)))))))))))))))))))))
    22      true         1.24262 +((((((((((((((((((((((title:test))))))))))))))))))))))
    23      true         2.49942 +(((((((((((((((((((((((title:test)))))))))))))))))))))))
    24      true         4.94994 +((((((((((((((((((((((((title:test))))))))))))))))))))))))
    25      true         9.89061 +(((((((((((((((((((((((((title:test)))))))))))))))))))))))))

main branch (55b0b52457c87df45250243c3310b4ffad85b088)

 depth  leading+       parse (s) query                         
    20     false         0.52478 ((((((((((((((((((((title:test))))))))))))))))))))
    21     false         1.03610 (((((((((((((((((((((title:test)))))))))))))))))))))
    22     false         2.09806 ((((((((((((((((((((((title:test))))))))))))))))))))))
    23     false         4.16839 (((((((((((((((((((((((title:test)))))))))))))))))))))))
    24     false         8.33352 ((((((((((((((((((((((((title:test))))))))))))))))))))))))
    25     false        16.73920 (((((((((((((((((((((((((title:test)))))))))))))))))))))))))
 depth  leading+       parse (s) query                         
    20      true         0.52754 +((((((((((((((((((((title:test))))))))))))))))))))
    21      true         1.05164 +(((((((((((((((((((((title:test)))))))))))))))))))))
    22      true         2.10110 +((((((((((((((((((((((title:test))))))))))))))))))))))
    23      true         4.19262 +(((((((((((((((((((((((title:test)))))))))))))))))))))))
    24      true         8.38012 +((((((((((((((((((((((((title:test))))))))))))))))))))))))
    25      true        16.75604 +(((((((((((((((((((((((((title:test)))))))))))))))))))))))))

It turns out, on 0.21, we had been benefitting from the faster path that happens when a leading + is in front of the query text. In 0.22, and in main branch, that benefit is gone. With 0.22, we observed some queries that took several minutes to parse. It is somewhat difficult to get those exact queries, but if necessary I can try.

It is also clear from this simple benchmark that the query parsing time is exponential in the depth of nesting, even for the very simple query above. That is its own issue, but my main question is whether the performance profile of 0.21 can be restored in 0.22.x, or main branch. As is, with 0.22 we will be unable to upgrade without additional work on our query generation code.

Here are the files to reproduce the project:

# Cargo.toml
[package]
name = "deepquery"
version = "0.1.0"
edition = "2021"

[dependencies]
zstd-sys = "=2.0.9"
# tantivy = { git = "https://github.com/quickwit-oss/tantivy.git", tag = "0.22.0" }
tantivy = { git = "https://github.com/quickwit-oss/tantivy.git", tag = "0.21.1" }
# tantivy = { git = "https://github.com/quickwit-oss/tantivy.git", tag = "0.21" }
# tantivy = { git = "https://github.com/quickwit-oss/tantivy.git", branch = "main" }
// lib.rs
#[cfg(test)]
mod tests {
    use std::time::Instant;
    use tantivy::query::QueryParser;
    use tantivy::schema::*;

    #[test]
    fn test_parse() -> Result<(), Box<dyn std::error::Error>> {
        let mut schema_builder = SchemaBuilder::new();
        schema_builder.add_u64_field("order", STORED | FAST);
        schema_builder.add_text_field("title", TEXT | STORED);
        let schema = schema_builder.build();
        let title = schema.get_field("title").unwrap();

        let mut index = tantivy::Index::create_in_ram(schema.clone());
        let query_parser = QueryParser::for_index(&index, vec![title]);

        for leading_plus in [false, true].into_iter() {
            println!(
                "{:>6}{:>10}{:>16} {:<30}",
                "depth", "leading+", "parse (s)", "query"
            );
            for depth in 20..=25 {
                let query_text = make_query_text(depth, leading_plus);
                let t0 = Instant::now();
                {
                    let query = query_parser.parse_query(&query_text)?;
                }
                let t1 = Instant::now();
                println!(
                    "{:>6?}{:>10?}{:>16.5} {:<30}",
                    depth,
                    leading_plus,
                    (t1 - t0).as_secs_f64(),
                    query_text
                );
            }
        }

        Ok(())
    }

    fn make_query_text(count: usize, leading_plus: bool) -> String {
        let leading = vec!['('; count].iter().collect::<String>();
        let trailing = vec![')'; count].iter().collect::<String>();
        let front = if leading_plus { "+" } else { "" };
        let q_text = vec![front, &leading, "title:test", &trailing].join("");
        q_text
    }
}

On rust 1.81, linux x86_64, I run the test case like this:

$ cargo test --release -- --nocapture

cjrh avatar Sep 21 '24 19:09 cjrh

Although it got slower with the exponential behavior I can see this easily being a problem in earlier versions too

PSeitz avatar Sep 25 '24 03:09 PSeitz