orgajs
orgajs copied to clipboard
Parsing list with sub elements results in paragraph not part of list
I'd like to be able to parse org documents which have indented parts of lists on newlines without the list being split.
Tools like Pandoc handle this correctly when generating HTML, and org itself will collapse these sub elements as part of the list with org-cycle
.
- Item 1
Sub element to Item 1
- Item 2
import { readFile } from 'fs/promises';
import { parse } from 'orga';
(async () => {
const org = await readFile('test.org');
console.log(JSON.stringify(parse(org.toString()), (k, v) => k === 'parent' ? undefined : v));
})();
{
"type": "document",
"properties": {},
"children": [
{
"type": "list",
"indent": 0,
"ordered": false,
"children": [
{
"type": "list.item",
"indent": 0,
"children": [
{
"type": "list.item.bullet",
"indent": 0,
"ordered": false,
"position": {
"start": {
"line": 1,
"column": 1
},
"end": {
"line": 1,
"column": 2
}
}
},
{
"type": "text.plain",
"value": "Item 1",
"position": {
"start": {
"line": 1,
"column": 3
},
"end": {
"line": 1,
"column": 9
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1
},
"end": {
"line": 1,
"column": 9
}
}
}
],
"attributes": {},
"position": {
"start": {
"line": 1,
"column": 1
},
"end": {
"line": 1,
"column": 9
}
}
},
{
"type": "paragraph", <--- paragraph splits list into two separate lists
"children": [
{
"type": "text.plain",
"value": "Sub element to Item 1",
"position": {
"start": {
"line": 3,
"column": 3
},
"end": {
"line": 3,
"column": 24
}
}
}
],
"attributes": {},
"position": {
"start": {
"line": 3,
"column": 3
},
"end": {
"line": 5,
"column": 1
}
}
},
{
"type": "list",
"indent": 0,
"ordered": false,
"children": [
{
"type": "list.item",
"indent": 0,
"children": [
{
"type": "list.item.bullet",
"indent": 0,
"ordered": false,
"position": {
"start": {
"line": 5,
"column": 1
},
"end": {
"line": 5,
"column": 2
}
}
},
{
"type": "text.plain",
"value": "Item 2",
"position": {
"start": {
"line": 5,
"column": 3
},
"end": {
"line": 5,
"column": 9
}
}
}
],
"position": {
"start": {
"line": 5,
"column": 1
},
"end": {
"line": 5,
"column": 9
}
}
}
],
"attributes": {},
"position": {
"start": {
"line": 5,
"column": 1
},
"end": {
"line": 5,
"column": 9
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1
},
"end": {
"line": 5,
"column": 9
}
}
}
This is what I'm using for now. It'd definitely need some reworking to actually be used though. It includes the newlines which I use later to split parts of the list item. It also includes a change to parse blocks which are inside a list item.
diff --git a/packages/orga/src/parse/list.ts b/packages/orga/src/parse/list.ts
index cd37386..d88d128 100644
--- a/packages/orga/src/parse/list.ts
+++ b/packages/orga/src/parse/list.ts
@@ -1,9 +1,12 @@
import { push } from '../node'
import { Lexer } from '../tokenize'
import { List, ListItem, ListItemBullet } from '../types'
+import utils from './utils'
+import parseBlock from './block'
export default (lexer: Lexer): List | undefined => {
const { peek, eat } = lexer
+ const { tryTo } = utils(lexer);
const token = peek()
if (!token || token.type !== 'list.item.bullet') return undefined
@@ -20,11 +23,19 @@ export default (lexer: Lexer): List | undefined => {
const parseListItem = (listItem: ListItem): ListItem => {
const token = peek()
- if (!token || token.type === 'newline')
- return listItem
+
+ if (!token) {
+ return listItem;
+ } else if (token.type === 'list.item.bullet') {
+ if (listItem.children.length > 0) return listItem;
+ } else if (token.position.start.column === 1 && token.type !== 'newline') {
+ return listItem;
+ }
if (token.type === 'list.item.tag') {
listItem.tag = token.value
+ } else if (tryTo(parseBlock)(push(listItem))) {
+ return parseListItem(listItem)
} else {
push(listItem)(token)
}