compute-engine icon indicating copy to clipboard operation
compute-engine copied to clipboard

LaTeX parsing: Error when parsing subscripts with multiple non-numeric symbols

Open tejashah88 opened this issue 3 months ago • 1 comments

Description

I found a bunch of edge cases for parsing LaTeX expressions, specifically when dealing with variables that have multiple non-numeric symbols in their subscript. They either yield inconsistent results with expected cases or raise a MathJSON "incompaticle type" parsing error. This was tested on the mathlive.io demo website as of 10-10-2025.

Steps to Reproduce

import { ComputeEngine } from "@cortex-js/compute-engine";

const ce = new ComputeEngine();

const BATCH_1 = [
    'A_1',
    'A_1+1',
    '2A_1',
    '2\\cdot A_1',
    '\\frac{A_1}{4}',
    'A_1^2',
];

const BATCH_2 = [
    'A_{B}',
    'A_{B}+1',
    '2A_{B}',
    '2\\cdot A_{B}',
    '\\frac{A_{B}}{4}',
    'A_{B}^2',
];

const BATCH_3 = [
    'A_{CD}',
    'A_{CD}+1',
    '2A_{CD}',
    '2\\cdot A_{CD}',
    '\\frac{A_{CD}}{4}',
    'A_{CD}^2',
];

const BATCH_4 = [
    'AB_{CD}',
    'AB_{CD}+1',
    '2AB_{CD}',
    '2\\cdot AB_{CD}',
    '\\frac{AB_{CD}}{4}',
    'AB_{CD}^2',
];

function convertLatexToMathJson(latex) {
    const parsedExpr = ce.parse(latex);
    const boxedExpr = ce.box(parsedExpr, { canonical: true });
    return boxedExpr.json;
}

// Run testing
console.log('Batch 1 Testing');
for (const expr of BATCH_1) {
    const mjson = convertLatexToMathJson(expr);
    console.log(expr, ' => ', JSON.stringify(mjson, null, 2));
}
console.log();

console.log('Batch 2 Testing');
for (const expr of BATCH_2) {
    const mjson = convertLatexToMathJson(expr);
    console.log(expr, ' => ', JSON.stringify(mjson, null, 2));
}
console.log();

console.log('Batch 3 Testing');
for (const expr of BATCH_3) {
    const mjson = convertLatexToMathJson(expr);
    console.log(expr, ' => ', JSON.stringify(mjson, null, 2));
}
console.log();

console.log('Batch 4 Testing');
for (const expr of BATCH_4) {
    const mjson = convertLatexToMathJson(expr);
    console.log(expr, ' => ', JSON.stringify(mjson, null, 2));
}

Raw Script Output

Batch 1 Testing (Working)

Image
A_1  =>  "A_1"
A_1+1  =>  [
  "Add",
  "A_1",
  1
]
2A_1  =>  [
  "Multiply",
  2,
  "A_1"
]
2\cdot A_1  =>  [
  "Multiply",
  2,
  "A_1"
]
\frac{A_1}{4}  =>  [
  "Multiply",
  [
    "Rational",
    1,
    4
  ],
  "A_1"
]
A_1^2  =>  [
  "Power",
  "A_1",
  2
]

Batch 2 Testing (Working)

Image
A_{B}  =>  "A_B"
A_{B}+1  =>  [
  "Add",
  "A_B",
  1
]
2A_{B}  =>  [
  "Multiply",
  2,
  "A_B"
]
2\cdot A_{B}  =>  [
  "Multiply",
  2,
  "A_B"
]
\frac{A_{B}}{4}  =>  [
  "Multiply",
  [
    "Rational",
    1,
    4
  ],
  "A_B"
]
A_{B}^2  =>  [
  "Power",
  "A_B",
  2
]

Batch 3 Testing

Image
A_{CD}  =>  [
  "Subscript",
  "A",
  [
    "InvisibleOperator",
    "C",
    "D"
  ]
]
A_{CD}+1  =>  [
  "Add",
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "symbol"
    ]
  ],
  1
]
2A_{CD}  =>  [
  "Tuple",
  2,
  [
    "Subscript",
    "A",
    [
      "InvisibleOperator",
      "C",
      "D"
    ]
  ]
]
2\cdot A_{CD}  =>  [
  "Multiply",
  2,
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "symbol"
    ]
  ]
]
\frac{A_{CD}}{4}  =>  [
  "Divide",
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "symbol"
    ]
  ],
  4
]
A_{CD}^2  =>  [
  "Power",
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "symbol"
    ]
  ],
  2
]

Batch 4 Testing

Image
AB_{CD}  =>  [
  "Tuple",
  "A",
  [
    "Subscript",
    "B",
    [
      "InvisibleOperator",
      "C",
      "D"
    ]
  ]
]
AB_{CD}+1  =>  [
  "Add",
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "'tuple<unknown, symbol>'"
    ]
  ],
  1
]
2AB_{CD}  =>  [
  "Tuple",
  2,
  "A",
  [
    "Subscript",
    "B",
    [
      "InvisibleOperator",
      "C",
      "D"
    ]
  ]
]
2\cdot AB_{CD}  =>  [
  "Multiply",
  2,
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "'tuple<unknown, symbol>'"
    ]
  ]
]
\frac{AB_{CD}}{4}  =>  [
  "Divide",
  [
    "Error",
    [
      "ErrorCode",
      "'incompatible-type'",
      "number",
      "'tuple<unknown, symbol>'"
    ]
  ],
  4
]
AB_{CD}^2  =>  [
  "Tuple",
  "A",
  [
    "Power",
    [
      "Error",
      [
        "ErrorCode",
        "'incompatible-type'",
        "number",
        "symbol"
      ]
    ],
    2
  ]
]

Environment

Screenshots are from mathlive.io demo website: https://mathlive.io/mathfield/demo/ using compute engine v0.30.2. Testing code is ran against Node.js v22.20.0 and NPM v10.9.3 using compute engine v0.30.2.

tejashah88 avatar Oct 11 '25 04:10 tejashah88

One of the challenges is how to handle multi-letter subscripts (and multi-letter variables) when they can be ambiguously interpreted either as an invisible multiplication or a sequence of letters. For example is vk_2 the same as \operatorname{vk}_2 or v\operatorname{k}_2. Or is A_{BC} the same as A_{\operatorname{BC}} or A_{\operatorname{B}\operatorname{C}}. Right now, it’s possible to clarify by using an explicit grouping command, so for example A_{\mathit{BC}} when "BC" is a single variable, or A_{BC} when "B" and "C" are separate variables. If a raw “BC” was automatically interpreted as \operatorname{BC} then it would no longer be possible to use "BC" to mean implicit multiplication.

arnog avatar Oct 21 '25 15:10 arnog