md4c icon indicating copy to clipboard operation
md4c copied to clipboard

Callbacks returning positive numbers

Open nyankers opened this issue 1 year ago • 3 comments

Per md4c.h, a callback may abort further processing by returning non-zero.

This is partially implemented in the various direct calls to the callback functions, but because all of these functions ultimately just pass along whatever value the callback returns, it'll eventually get caught by a MD_CHECK() call. Unfortunately, MD_CHECK() only aborts on negative values, not positive non-zero values, which can cause some pretty strange behavior.

Some possible fixes include:

  • Change md4c.h to specify negative numbers, and declare positive non-zero numbers as undefined.
  • Change MD_CHECK() from checking ret < 0 to ret != 0. This seems to resolve the issue, but I'm not sure if anything depends on this behavior.
  • Make all the direct handler calls return -1 on abort, either by setting ret to -1 or just returning it directly, where appropriate (there are a few text() calls outside the common macros that just return ret afterward).

nyankers avatar Aug 24 '24 00:08 nyankers

@nyankers, can you please show me a markdown example that shows the issue at work, is it possible? Thank you.

step- avatar Aug 24 '24 09:08 step-

This is an issue with the API, so virtually any markdown can demonstrate it, but the following C code should show what I mean:

#include <stdio.h>
#include <string.h>
#include <md4c.h>

struct return_values {
	int enter_block;
	int leave_block;
	int enter_span;
	int leave_span;
	int text;
};

static int handle_enter_block(MD_BLOCKTYPE type, void *detail, void *userdata) {
	int ret = ((struct return_values*) userdata)->enter_block;
	printf("\tenter_block: %d\n", ret);
	return ret;
}

static int handle_leave_block(MD_BLOCKTYPE type, void *detail, void *userdata) {
	int ret = ((struct return_values*) userdata)->leave_block;
	printf("\tleave_block: %d\n", ret);
	return ret;
}

static int handle_enter_span(MD_SPANTYPE type, void *detail, void *userdata) {
	int ret = ((struct return_values*) userdata)->enter_span;
	printf("\tenter_span: %d\n", ret);
	return ret;
}

static int handle_leave_span(MD_SPANTYPE type, void *detail, void *userdata) {
	int ret = ((struct return_values*) userdata)->leave_span;
	printf("\tleave_span: %d\n", ret);
	return ret;
}

static int handle_text(MD_TEXTTYPE type, const MD_CHAR *text, MD_SIZE size, void *userdata) {
	int ret = ((struct return_values*) userdata)->text;
	printf("\ttext: %d (%.*s)\n", ret, size, text);
	return ret;
}

static int test(MD_PARSER *parser, struct return_values *ret, const char *str, int *n, const char *name) {
	printf("\nTesting %s:\n", name);
	for (*n = -1; *n <= 1; *n += 2) {
		printf("\n%s: %d\n", name, *n);
		if (md_parse(str, strlen(str), parser, ret) != 0) {
			printf("Parser failed or aborted.\n");
		} else {
			printf("Parser succeeded.\n");
		}
	}
	*n = 0;
}

int main(int argc, const char **argv) {
	MD_PARSER parser;
	struct return_values ret = {0,0,0,0,0};
	const char *md = "foo *bar*\n\nfoo **bar**";

	parser.abi_version = 0;
	parser.flags = 0;
	parser.enter_block = &handle_enter_block;
	parser.leave_block = &handle_leave_block;
	parser.enter_span = &handle_enter_span;
	parser.leave_span = &handle_leave_span;
	parser.text = &handle_text;
	parser.debug_log = NULL;
	parser.syntax = NULL;

	test(&parser, &ret, md, &ret.enter_block, "enter_block");
	test(&parser, &ret, md, &ret.leave_block, "leave_block");
	test(&parser, &ret, md, &ret.enter_span, "enter_span");
	test(&parser, &ret, md, &ret.leave_span, "leave_span");
	test(&parser, &ret, md, &ret.text, "text");

	return 0;
}

Given the comments in md4c.h, I would expect returning -1 or 1 to give the same behavior for each parser function, but the output with the latest code is:


Testing enter_block:

enter_block: -1
	enter_block: -1
Parser failed or aborted.

enter_block: 1
	enter_block: 1
Parser failed or aborted.

Testing leave_block:

leave_block: -1
	enter_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 0
	text: 0 (bar)
	leave_span: 0
	leave_block: -1
Parser failed or aborted.

leave_block: 1
	enter_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 0
	text: 0 (bar)
	leave_span: 0
	leave_block: 1
	enter_block: 0
	text: 0 (foo )
	enter_span: 0
	text: 0 (bar)
	leave_span: 0
	leave_block: 1
	leave_block: 1
Parser failed or aborted.

Testing enter_span:

enter_span: -1
	enter_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: -1
Parser failed or aborted.

enter_span: 1
	enter_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 1
	leave_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 1
	leave_block: 0
	leave_block: 0
Parser succeeded.

Testing leave_span:

leave_span: -1
	enter_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 0
	text: 0 (bar)
	leave_span: -1
Parser failed or aborted.

leave_span: 1
	enter_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 0
	text: 0 (bar)
	leave_span: 1
	leave_block: 0
	enter_block: 0
	text: 0 (foo )
	enter_span: 0
	text: 0 (bar)
	leave_span: 1
	leave_block: 0
	leave_block: 0
Parser succeeded.

Testing text:

text: -1
	enter_block: 0
	enter_block: 0
	text: -1 (foo )
Parser failed or aborted.

text: 1
	enter_block: 0
	enter_block: 0
	text: 1 (foo )
	leave_block: 0
	enter_block: 0
	text: 1 (foo )
	leave_block: 0
	leave_block: 0
Parser succeeded.

As you can see, aside from enter_block, returning 1 instead of -1 will often cause the parser to keep going after the first attempt to abort (and in fairly unpredictable ways), and may even result md_parse() indicating a success by returning 0.

nyankers avatar Aug 26 '24 13:08 nyankers

I see. Thank you for taking the time to add the demonstration C code.

step- avatar Aug 26 '24 13:08 step-