orgajs Parsing list with sub elements results in paragraph not part of list

Parsing list with sub elements results in paragraph not part of list

Open garrett-hopper opened this issue 3 years ago • 1 comments

I'd like to be able to parse org documents which have indented parts of lists on newlines without the list being split.

Tools like Pandoc handle this correctly when generating HTML, and org itself will collapse these sub elements as part of the list with org-cycle.

- Item 1
  Sub element to Item 1
- Item 2

import { readFile } from 'fs/promises';
import { parse } from 'orga';

(async () => {
  const org = await readFile('test.org');
  console.log(JSON.stringify(parse(org.toString()), (k, v) => k === 'parent' ? undefined : v));
})();

{
  "type": "document",
  "properties": {},
  "children": [
    {
      "type": "list",
      "indent": 0,
      "ordered": false,
      "children": [
        {
          "type": "list.item",
          "indent": 0,
          "children": [
            {
              "type": "list.item.bullet",
              "indent": 0,
              "ordered": false,
              "position": {
                "start": {
                  "line": 1,
                  "column": 1
                },
                "end": {
                  "line": 1,
                  "column": 2
                }
              }
            },
            {
              "type": "text.plain",
              "value": "Item 1",
              "position": {
                "start": {
                  "line": 1,
                  "column": 3
                },
                "end": {
                  "line": 1,
                  "column": 9
                }
              }
            }
          ],
          "position": {
            "start": {
              "line": 1,
              "column": 1
            },
            "end": {
              "line": 1,
              "column": 9
            }
          }
        }
      ],
      "attributes": {},
      "position": {
        "start": {
          "line": 1,
          "column": 1
        },
        "end": {
          "line": 1,
          "column": 9
        }
      }
    },
    {
      "type": "paragraph", <--- paragraph splits list into two separate lists
      "children": [
        {
          "type": "text.plain",
          "value": "Sub element to Item 1",
          "position": {
            "start": {
              "line": 3,
              "column": 3
            },
            "end": {
              "line": 3,
              "column": 24
            }
          }
        }
      ],
      "attributes": {},
      "position": {
        "start": {
          "line": 3,
          "column": 3
        },
        "end": {
          "line": 5,
          "column": 1
        }
      }
    },
    {
      "type": "list",
      "indent": 0,
      "ordered": false,
      "children": [
        {
          "type": "list.item",
          "indent": 0,
          "children": [
            {
              "type": "list.item.bullet",
              "indent": 0,
              "ordered": false,
              "position": {
                "start": {
                  "line": 5,
                  "column": 1
                },
                "end": {
                  "line": 5,
                  "column": 2
                }
              }
            },
            {
              "type": "text.plain",
              "value": "Item 2",
              "position": {
                "start": {
                  "line": 5,
                  "column": 3
                },
                "end": {
                  "line": 5,
                  "column": 9
                }
              }
            }
          ],
          "position": {
            "start": {
              "line": 5,
              "column": 1
            },
            "end": {
              "line": 5,
              "column": 9
            }
          }
        }
      ],
      "attributes": {},
      "position": {
        "start": {
          "line": 5,
          "column": 1
        },
        "end": {
          "line": 5,
          "column": 9
        }
      }
    }
  ],
  "position": {
    "start": {
      "line": 1,
      "column": 1
    },
    "end": {
      "line": 5,
      "column": 9
    }
  }
}

Jun 06 '21 01:06 garrett-hopper

This is what I'm using for now. It'd definitely need some reworking to actually be used though. It includes the newlines which I use later to split parts of the list item. It also includes a change to parse blocks which are inside a list item.

diff --git a/packages/orga/src/parse/list.ts b/packages/orga/src/parse/list.ts
index cd37386..d88d128 100644
--- a/packages/orga/src/parse/list.ts
+++ b/packages/orga/src/parse/list.ts
@@ -1,9 +1,12 @@
 import { push } from '../node'
 import { Lexer } from '../tokenize'
 import { List, ListItem, ListItemBullet } from '../types'
+import utils from './utils'
+import parseBlock from './block'
 
 export default (lexer: Lexer): List | undefined => {
   const { peek, eat } = lexer
+  const { tryTo } = utils(lexer);
 
   const token = peek()
   if (!token || token.type !== 'list.item.bullet') return undefined
@@ -20,11 +23,19 @@ export default (lexer: Lexer): List | undefined => {
 
   const parseListItem = (listItem: ListItem): ListItem => {
     const token = peek()
-    if (!token || token.type === 'newline')
-      return listItem
+
+    if (!token) {
+      return listItem;
+    } else if (token.type === 'list.item.bullet') {
+      if (listItem.children.length > 0) return listItem;
+    } else if (token.position.start.column === 1 && token.type !== 'newline') {
+      return listItem;
+    }
 
     if (token.type === 'list.item.tag') {
       listItem.tag = token.value
+    } else if (tryTo(parseBlock)(push(listItem))) {
+      return parseListItem(listItem)
     } else {
       push(listItem)(token)
     }

Jun 07 '21 16:06 garrett-hopper

orgajs orgajs copied to clipboard

Parsing list with sub elements results in paragraph not part of list

orgajs
orgajs copied to clipboard