syntect `syntest` and assertions that extend over the end of the line

While looking at the CI failures from https://github.com/trishume/syntect/pull/166, I identified the reason why syntest reports failures for the Git Rebase syntax:

+FAILED testdata/Packages/Git Formats/syntax_test_git_rebase: 3

The syntax test file contains assertions that extend beyond the \n character. syntest currently compares these assertion scope selectors against the scope stack at the new line character. But in Sublime Text, the following syntax test passes:

# SYNTAX TEST "Git Rebase.sublime-syntax"
# <- text.git.rebase comment.line punctuation.definition.comment
pick d284bb2 Initial commit
# <- meta.commit
#^^^^^^^^^^^^^^^^^^^^^^^^^^^ meta.commit
#                           ^ text.git.rebase comment.line.git.rebase punctuation.definition.comment.git.rebase -meta.commit
#                            ^ comment.line.git - punctuation

which means, in Sublime Text, the assertions wrap onto the next line. Probably we should adapt syntest to also work this way.

Jun 04 '18 07:06 keith-hall

I had a go at this, with keeping the "read and process one line at a time" behavior (as opposed to Sublime's "read the whole syntax test file into memory first" implementation) but my Rust skills aren't good enough yet - I'm fighting the borrow checker it seems. My idea was to queue up any test assertions that belong to a line we haven't read yet. Here's the diff in case it helps someone else patch it up. I'm creating the queue with an initial capacity of 2 - I think no syntax test files reference more than one line below at the moment.

--- syntect/master/examples/syntest.rs
+++ syntect/syntest/examples/syntest.rs
@@ -25,6 +25,7 @@
 use std::cmp::{min, max};
 use std::time::Instant;
 use std::str::FromStr;
+use std::collections::VecDeque;
 
 use getopts::Options;
 use regex::Regex;
@@ -64,6 +65,7 @@
 
 #[derive(Debug)]
 struct AssertionRange<'a> {
+    line_number: usize,
     begin_char: usize,
     end_char: usize,
     scope_selector_text: &'a str,
@@ -84,7 +86,7 @@
     success: bool,
 }
 
-fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> {
+fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str, line_number: usize) -> Option<AssertionRange<'a>> {
     // if the test start token specified in the test file's header is on the line
     if let Some(index) = line.find(testtoken_start) {
         let (before_token_start, token_and_rest_of_line) = line.split_at(index);
@@ -101,6 +103,7 @@
                 }
             }
             return Some(AssertionRange {
+                line_number: line_number,
                 begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 },
                 end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 },
                 scope_selector_text: sst,
@@ -126,17 +129,6 @@
         };
         results.push(result);
     }
-    // don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
-    let last = test_against_line_scopes.last().unwrap();
-    if last.char_start + last.text_len < assertion.end_char {
-        let match_value = selector.does_match(last.scope.as_slice());
-        let result = RangeTestResult {
-            column_begin: max(last.char_start + last.text_len, assertion.begin_char),
-            column_end: assertion.end_char,
-            success: match_value.is_some()
-        };
-        results.push(result);
-    }
     results
 }
 
@@ -175,7 +167,8 @@
 
     let mut current_line_number = 1;
     let mut test_against_line_number = 1;
-    let mut scopes_on_line_being_tested = Vec::new();
+    let mut scopes_on_line_being_tested : Vec<ScopedText> = Vec::new();
+    let mut assertions : VecDeque<AssertionRange> = VecDeque::with_capacity(2);
     let mut previous_non_assertion_line = line.to_string();
 
     let mut assertion_failures: usize = 0;
@@ -184,7 +177,32 @@
     loop { // over lines of file, starting with the header line
         let mut line_only_has_assertion = false;
         let mut line_has_assertion = false;
-        if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
+        
+        // get assertion details from the current line
+        if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line, current_line_number) {
+            // add them to the end of the queue, we will process any remainders from the previous line(s) first
+            assertions.push_back(assertion);
+        }
+        let mut assertions_left_to_check = assertions.len();
+        loop { // loop over unprocessed assertion ranges
+            if assertions_left_to_check == 0 {
+                break;
+            }
+            let assertion = assertions.pop_front().unwrap();
+            assertions_left_to_check -= 1;
+            
+            // assertions after the newline reference a future line, probably the next one
+            let last = &scopes_on_line_being_tested.last().unwrap();
+            if last.char_start + last.text_len < assertion.end_char {
+                assertions.push_back(AssertionRange {
+                    line_number: assertion.line_number,
+                    begin_char: last.char_start - assertion.begin_char, // adjust the offset of the range to start from position 0 on the next line
+                    end_char: last.char_start - assertion.end_char,
+                    scope_selector_text: assertion.scope_selector_text,
+                    is_pure_assertion_line: false
+                });
+            }
+            
             let result = process_assertions(&assertion, &scopes_on_line_being_tested);
             total_assertions += &assertion.end_char - &assertion.begin_char;
             for failure in result.iter().filter(|r|!r.success) {
@@ -196,15 +214,18 @@
                         (with text {:?}) \
                         has scope {:?}",
                         assertion.scope_selector_text.trim(),
-                        current_line_number, test_against_line_number, failure.column_begin, failure.column_end,
+                        assertion.line_number, test_against_line_number, failure.column_begin, failure.column_end,
                         text,
                         scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope
                     );
                 }
                 assertion_failures += failure.column_end - failure.column_begin;
             }
-            line_only_has_assertion = assertion.is_pure_assertion_line;
-            line_has_assertion = true;
+            
+            if assertion.line_number == current_line_number { // if the assertion range is on the line being processed
+                line_only_has_assertion = assertion.is_pure_assertion_line;
+                line_has_assertion = true;
+            }
         }
         if !line_only_has_assertion || parse_test_lines {
             if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against

but it doesn't compile (so there could be lots more problems with my logic that I haven't found yet):

error[E0502]: cannot borrow `line` as mutable because it is also borrowed as immutable
   --> examples\syntest.rs:269:9
    |
182 |         if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line, current_line_number) {
    |
                   ---- immutable borrow occurs here
...
269 |         line.clear();
    |         ^^^^ mutable borrow occurs here
...
292 | }
    | - immutable borrow ends here

error[E0502]: cannot borrow `line` as mutable because it is also borrowed as immutable
   --> examples\syntest.rs:271:34
    |
182 |         if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line, current_line_number) {
    |
                   ---- immutable borrow occurs here
...
271 |         if reader.read_line(&mut line).unwrap() == 0 {
    |                                  ^^^^ mutable borrow occurs here
...
292 | }
    | - immutable borrow ends here

error[E0506]: cannot assign to `line` because it is borrowed
   --> examples\syntest.rs:274:9
    |
182 |         if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line, current_line_number) {
    |
                   ---- borrow of `line` occurs here
...
274 |         line = line.replace("\r", &"");
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ assignment to borrowed `line` occurs here

error: aborting due to 3 previous errors

error: Could not compile `syntect`.

To learn more, run the command again with --verbose.

Jun 05 '18 13:06 keith-hall

I think a better approach, and probably the one used in Sublime, is to parse all assertions as referring to a byte range in the document, sort those, then while iterating through the file keep track of the current byte index and iterate through the assertions list with that.

Jun 05 '18 17:06 trishume

syntect syntect copied to clipboard

`syntest` and assertions that extend over the end of the line

syntect
syntect copied to clipboard