hayagriva icon indicating copy to clipboard operation
hayagriva copied to clipboard

Disambiguation year suffixes are not sorted

Open cherryblossom000 opened this issue 8 months ago • 7 comments

When there are multiple entries with the same author and the same year, the disambiguation suffixes (a, b, c…) are not in the correct sorted order. For example, the following code prints

Author. (2025b, January 1). Title 1.
Author. (2025c, January 1). Title 2.
Author. (2025a, January 1). Title 3.
use hayagriva::{
    archive::{locales, ArchivedStyle},
    citationberg::Style,
    io::from_yaml_str,
    BibliographyDriver, BibliographyRequest, CitationItem, CitationRequest,
};

fn main() {
    let style = ArchivedStyle::by_name("apa").unwrap().get();
    let locales = locales();
    let Style::Independent(style) = style else { panic!("dependent style") };

    let lib = from_yaml_str(
        r#"
        test3:
          type: misc
          author: Author
          title: Title 3
          date: 2025-01-01
        test1:
          type: misc
          author: Author
          title: Title 1
          date: 2025-01-01
        test2:
          type: misc
          author: Author
          title: Title 2
          date: 2025-01-01
        "#,
    )
    .unwrap();
    let mut driver = BibliographyDriver::new();
    driver.citation(CitationRequest::from_items(
        lib.iter().map(CitationItem::with_entry).collect(),
        &style,
        &locales,
    ));

    let rendered = driver.finish(BibliographyRequest::new(&style, None, &locales));
    rendered.bibliography.as_ref().unwrap().items.iter().for_each(|item| {
        let mut buf = String::new();
        item.content
            .write_buf(&mut buf, hayagriva::BufWriteFormat::Plain)
            .unwrap();
        println!("{}", buf);
    })
}
Test case that can be added to tests/citeproc.rs
#[test]
fn disambiguation() {
    let style = ArchivedStyle::by_name("apa").unwrap().get();
    let locales = locales();
    let Style::Independent(style) = style else {
        panic!("test has dependent style");
    };

    let lib = from_yaml_str(
        r#"
        test3:
          type: misc
          author: Author
          title: Title 3
          date: 2025-01-01
        test1:
          type: misc
          author: Author
          title: Title 1
          date: 2025-01-01
        test2:
          type: misc
          author: Author
          title: Title 2
          date: 2025-01-01
        "#,
    )
    .unwrap();

    let mut driver: BibliographyDriver<'_, Entry> = BibliographyDriver::new();
    driver.citation(CitationRequest::new(
        lib.iter()
            .map(|e| CitationItem::new(e, None, None, false, None))
            .collect(),
        &style,
        None,
        &locales,
        Some(1),
    ));

    let rendered = driver.finish(BibliographyRequest::new(&style, None, &locales));

    let expected_bibliography_items = vec![
        "Author. (2025a, January 1). Title 1.",
        "Author. (2025b, January 1). Title 2.",
        "Author. (2025c, January 1). Title 3.",
    ];
    rendered
        .bibliography
        .as_ref()
        .unwrap()
        .items
        .iter()
        .zip(expected_bibliography_items)
        .for_each(|(item, expected)| {
            let mut buf = String::new();
            item.content
                .write_buf(&mut buf, hayagriva::BufWriteFormat::Plain)
                .unwrap();
            assert_eq!(buf, expected);
        })
}

If anyone else needs a quick hacky fix, this seems to work for me:

diff --git a/src/csl/mod.rs b/src/csl/mod.rs
index 9515488..041dad7 100644
--- a/src/csl/mod.rs
+++ b/src/csl/mod.rs
@@ -249,9 +249,24 @@ impl<T: EntryLike + Hash + PartialEq + Eq + Debug> BibliographyDriver<'_, T> {
                 }
 
                 // 2c. Disambiguate by year-suffix.
-                disambiguate_year_suffix(&res, group, |entry, state| {
-                    mark(&mut rerender, entry, state)
-                });
+                disambiguate_year_suffix(
+                    &res,
+                    group,
+                    |entry, state| mark(&mut rerender, entry, state),
+                    |mut entries| {
+                        bib_style.sort_::<T, &T>(
+                            &mut entries,
+                            |e| CitationItem::with_entry(e),
+                            bib_style
+                                .csl
+                                .bibliography
+                                .as_ref()
+                                .and_then(|b| b.sort.as_ref()),
+                            request.locale.as_ref(),
+                            |_| 0,
+                        );
+                    },
+                );
             }
 
             if rerender.is_empty() {
@@ -772,6 +787,7 @@ fn disambiguate_year_suffix<F, T>(
     renders: &[SpeculativeCiteRender<'_, '_, T>],
     group: &AmbiguousGroup,
     mut mark: F,
+    sort: impl Fn(&mut Vec<&T>),
 ) where
     T: EntryLike + PartialEq,
     F: FnMut(&T, DisambiguateState),
@@ -817,6 +833,7 @@ fn disambiguate_year_suffix<F, T>(
         }
 
         // Assign year suffixes.
+        sort(&mut entries);
         for (i, entry) in entries.into_iter().enumerate() {
             mark(entry, DisambiguateState::YearSuffix(i as u8));
         }
diff --git a/src/csl/sort.rs b/src/csl/sort.rs
index 2783563..1db40e0 100644
--- a/src/csl/sort.rs
+++ b/src/csl/sort.rs
@@ -172,4 +172,35 @@ impl StyleContext<'_> {
             });
         }
     }
+
+    pub fn sort_<T: EntryLike, U>(
+        &self,
+        cites: &mut [U],
+        f: impl Fn(&U) -> CitationItem<T>,
+        sort: Option<&Sort>,
+        term_locale: Option<&LocaleCode>,
+        citation_number: impl Fn(&T) -> usize,
+    ) {
+        if let Some(sort) = sort {
+            cites.sort_by(|a, b| {
+                let a = &f(a);
+                let b = &f(b);
+                let mut ordering = Ordering::Equal;
+                for key in &sort.keys {
+                    ordering = self.cmp_entries(
+                        a,
+                        citation_number(a.entry),
+                        b,
+                        citation_number(b.entry),
+                        key,
+                        term_locale,
+                    );
+                    if ordering != Ordering::Equal {
+                        break;
+                    }
+                }
+                ordering
+            });
+        }
+    }
 }

cherryblossom000 avatar Jun 08 '25 04:06 cherryblossom000

Good eye, it seems we sort before assigning the suffixes.

Though it appears we also determine some other properties later. I wonder if the sort could influence this, or if we could just move the sort to the very end.

PgBiel avatar Jun 19 '25 01:06 PgBiel

This test already passes.

git clone -b disambiguation https://github.com/Andrew15-5/hayagriva.git
cargo test -F cli --locked -- disambiguation

https://github.com/typst/hayagriva/blob/f056c0ea57fdd1c1878e1e50c63682c010d5dfad/src/main.rs?plain=1#L496-L557

I tried adding it to the citeproc.rs, but tests there don't seem to run via cargo test.

Andrew15-5 avatar Jul 04 '25 02:07 Andrew15-5

I tried adding it to the citeproc.rs, but tests there don't seem to run via cargo test.

You need to enable the csl-json feature (see CI).

PgBiel avatar Jul 04 '25 02:07 PgBiel

Is this issue fixed?

Andrew15-5 avatar Jul 04 '25 04:07 Andrew15-5

No, just naively sorting again probably breaks citation numbers. I'm not sure why this doesn't appear to be explicitly tested at the moment, though it can be manually tested (either through the CLI or by hooking to Typst). Would be great to observe and take notes of what happens then.

PgBiel avatar Jul 04 '25 04:07 PgBiel

it seems we sort before assigning the suffixes.

Yeah, now I see exactly this. The suffixes stay in the order the entries were in the source.

Andrew15-5 avatar Jul 04 '25 06:07 Andrew15-5

Alright, so I had a bit of time on 4th of July to tackle this, and I made a bunch of comments, but they weren't super helpful, but last week I again had a bit of time, so these comments and general idea that I was very close to discovering something led me to making a bit more helpful comments and actually getting a first clue.

https://github.com/typst/hayagriva/blob/6153544608d2d24b6227948b6be8292a1e16072f/src/csl/mod.rs?plain=1#L136-L153

So, at first, bib_style is used for initial sorting of the entries, though they all get initial_idx = 0:

https://github.com/typst/hayagriva/blob/6153544608d2d24b6227948b6be8292a1e16072f/src/csl/mod.rs?plain=1#L100

https://github.com/typst/hayagriva/blob/6153544608d2d24b6227948b6be8292a1e16072f/src/csl/mod.rs?plain=1#L114-L119

And everything is probably going well, until you hit

https://github.com/typst/hayagriva/blob/6153544608d2d24b6227948b6be8292a1e16072f/src/csl/mod.rs?plain=1#L139

As it shows below, the id of both bib_style and style is APA in the new Rust test, which means they should be exactly the same? Didn't test anything else. So why re-create the CSL wrapper struct if one was already used for sorting items? Why not use the existing one?

Now we get to this:

https://github.com/typst/hayagriva/blob/6153544608d2d24b6227948b6be8292a1e16072f/src/csl/mod.rs?plain=1#L146-L152

I think now the indices should be correct, considering this is the function:

https://github.com/typst/hayagriva/blob/6153544608d2d24b6227948b6be8292a1e16072f/src/csl/mod.rs?plain=1#L123-L128

However, instead of using .csl.bibliography.as_ref().and_then(|b| b.sort.as_ref()) (why and_then even needed? doesn't look like a good API for getting bibliography style) now .csl.citation.sort.as_ref() is used, which is the clue I got. A styling for citation items is now used, but indices are sorted for bibliography items, and now style is used over bib_style. I looked at debug info, and it...shows that all keys are always equal, because there is no longer a title-sorting key/macro name/whatever. There is basically only author and date, which are identical.

logs
[src/csl/mod.rs:140:13] "interesting part" = "interesting part"
[src/csl/sort.rs:163:21] key = MacroName {
    name: "author-sort",
    names_min: Some(
        3,
    ),
    names_use_first: Some(
        1,
    ),
    names_use_last: None,
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key2"
[src/csl/sort.rs:165:21] b.entry.key() = "key3"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = MacroName {
    name: "date-sort-group",
    names_min: None,
    names_use_first: None,
    names_use_last: None,
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key2"
[src/csl/sort.rs:165:21] b.entry.key() = "key3"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = MacroName {
    name: "date-sort",
    names_min: None,
    names_use_first: None,
    names_use_last: None,
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key2"
[src/csl/sort.rs:165:21] b.entry.key() = "key3"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = Variable {
    variable: Standard(
        Status,
    ),
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key2"
[src/csl/sort.rs:165:21] b.entry.key() = "key3"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = MacroName {
    name: "author-sort",
    names_min: Some(
        3,
    ),
    names_use_first: Some(
        1,
    ),
    names_use_last: None,
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key1"
[src/csl/sort.rs:165:21] b.entry.key() = "key2"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = MacroName {
    name: "date-sort-group",
    names_min: None,
    names_use_first: None,
    names_use_last: None,
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key1"
[src/csl/sort.rs:165:21] b.entry.key() = "key2"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = MacroName {
    name: "date-sort",
    names_min: None,
    names_use_first: None,
    names_use_last: None,
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key1"
[src/csl/sort.rs:165:21] b.entry.key() = "key2"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/sort.rs:163:21] key = Variable {
    variable: Standard(
        Status,
    ),
    sort_direction: Ascending,
}
[src/csl/sort.rs:164:21] a.entry.key() = "key1"
[src/csl/sort.rs:165:21] b.entry.key() = "key2"
[src/csl/sort.rs:174:21] ordering = Equal
[src/csl/mod.rs:149:13] "end of interesting part" = "end of interesting part"

Checking the same output for first sorting, that is actually sorting the entries correctly, there is a "title" key/macro, and it does give something other than ordering = Equal. But this is sorting of bibliography items, not citation items.

So, either the APA is missing title check for citations (CSL style bug), or the Rust code is broken somewhere. Assuming the .csl.citation.sort is correct. Using .csl.bibliography.sort makes the test pass.

Andrew15-5 avatar Sep 05 '25 15:09 Andrew15-5