fuzzy-matcher
fuzzy-matcher copied to clipboard
Unicode Matching Indices are potentially incorrect
fn main() {
let matcher = SkimMatcherV2::default();
let text = " üäö ";
// [(0, ' '), (1, 'ü'), (3, 'ä'), (5, 'ö'), (7, ' ')]
println!("{:?}", text.char_indices().collect::<Vec<_>>());
println!("{:?}", matcher.fuzzy_indices(text, "ü")); // -> 1 (good)
println!("{:?}", matcher.fuzzy_indices(text, "ä")); // -> 2 (bad)
println!("{:?}", matcher.fuzzy_indices(text, "ö")); // -> 3 (bad)
let text = "2 üäö ";
// [(0, '2'), (1, ' '), (2, ' '), (3, 'ü'), (5, 'ä'), (7, 'ö'), (9, ' ')]
println!("{:?}", text.char_indices().collect::<Vec<_>>());
println!("{:?}", matcher.fuzzy_indices(text, "ü")); // -> 3 (good)
println!("{:?}", matcher.fuzzy_indices(text, "ä")); // -> 4 (bad)
println!("{:?}", matcher.fuzzy_indices(text, "ö")); // -> 5 (bad)
}
I would have expected the indices to match the 'indices' into a string, but it seems to be matching the indices into the chars()
/chars_indices()
iterator.
Is this intended behavior (which should probably be documented?), or is it a case of missing a + ch.len_utf8()
in order to properly increment the index into the string?