ponyc icon indicating copy to clipboard operation
ponyc copied to clipboard

Generated documentation default value display bug

Open SeanTAllen opened this issue 4 years ago • 5 comments

You can see this with both Array.slice and String.substring. The docs show call as the value rather than:

USize: -1

and

ISize.max_value()

The fix for this is almost assuredly in docgen.c as that is the documentation generation code. If you know c, you should be able to tackle this one.

Seems like a boundary bug.

Thanks to __red__ from Zulip who got me looking at this issue.

SeanTAllen avatar Oct 06 '19 02:10 SeanTAllen

I believe this is exactly the same as #1262 and is the issue that #1323 is trying to address.

More specifically, the code_block_doc_params function assumes default values are literals (?) and calls ast_get_print which only returns a string for the first token in the AST node. For positive integers, booleans, strings, etc. this works, but fails for complex expressions.

For example:

(seq:scope 1)

(Prints "1")

(seq:scope (call (. (reference (id ISize)) (id max_value)) x x x))

(Prints "call")

In order to have the expected value in the documentation, there needs to be a generic AST->Pony code generator/"unparser". I don't think this exists?

presidentbeef avatar Oct 07 '19 23:10 presidentbeef

Yeah, we don't have a generic "unparser" within ponyc.

Perhaps an easier (but still significant effort) approach would be to update our parser to track not only the start position of the token but also the token length, so that we could render in the docs the exact string that was in the source code rather than having to generate/reconstruct it ourselves.

@sylvanc says (on today's sync call) that if we do this, we should make it a 32-bit integer (rather than size_t) and put it near the end of the token struct (just before bool frozen) to keep the total size <= 64 bytes, meaning that we can add this information without impacting memory usage.

https://github.com/ponylang/ponyc/blob/ddaa792a310eb14d7af956368a0a043479d5cca8/src/libponyc/ast/token.c#L11-L34

jemc avatar Oct 08 '19 16:10 jemc

This is related to:

https://github.com/ponylang/ponyc/issues/3727

SeanTAllen avatar Feb 15 '22 15:02 SeanTAllen

So...

to do this, we need to touch a number of other data structures...

this appears to be the diff to thread everything through:

diff --git a/src/libponyc/ast/ast.c b/src/libponyc/ast/ast.c
index 481e5956..1cfe4cbd 100644
--- a/src/libponyc/ast/ast.c
+++ b/src/libponyc/ast/ast.c
@@ -565,11 +565,11 @@ ast_t* ast_setid(ast_t* ast, token_id id)
   return ast;
 }
 
-void ast_setpos(ast_t* ast, source_t* source, size_t line, size_t pos)
+void ast_setpos(ast_t* ast, source_t* source, size_t line, size_t pos, size_t len)
 {
   pony_assert(ast != NULL);
   pony_assert(!ast->frozen);
-  token_set_pos(ast->t, source, line, pos);
+  token_set_pos(ast->t, source, line, pos, len);
 }
 
 token_id ast_id(ast_t* ast)
@@ -590,6 +590,12 @@ size_t ast_pos(ast_t* ast)
   return token_line_position(ast->t);
 }
 
+size_t ast_len(ast_t* ast)
+{
+  pony_assert(ast != NULL);
+  return token_line_length(ast->t);
+}
+
 source_t* ast_source(ast_t* ast)
 {
   pony_assert(ast != NULL);
diff --git a/src/libponyc/ast/ast.h b/src/libponyc/ast/ast.h
index 0e5157b6..0fb9c488 100644
--- a/src/libponyc/ast/ast.h
+++ b/src/libponyc/ast/ast.h
@@ -68,11 +68,12 @@ bool ast_has_scope(ast_t* ast);
 void ast_set_scope(ast_t* ast, ast_t* scope);
 symtab_t* ast_get_symtab(ast_t* ast);
 ast_t* ast_setid(ast_t* ast, token_id id);
-void ast_setpos(ast_t* ast, source_t* source, size_t line, size_t pos);
+void ast_setpos(ast_t* ast, source_t* source, size_t line, size_t pos, size_t len);
 
 token_id ast_id(ast_t* ast);
 size_t ast_line(ast_t* ast);
 size_t ast_pos(ast_t* ast);
+size_t ast_len(ast_t* ast);
 source_t* ast_source(ast_t* ast);
 
 void* ast_data(ast_t* ast);
@@ -91,6 +92,7 @@ void ast_clearflag(ast_t* ast, uint32_t flag);
 void ast_resetpass(ast_t *ast, uint32_t flag);
 
 const char* ast_get_print(ast_t* ast);
+const char* ast_from_source_print(ast_t* ast);
 const char* ast_name(ast_t* ast);
 const char* ast_nice_name(ast_t* ast);
 size_t ast_name_len(ast_t* ast);
diff --git a/src/libponyc/ast/lexer.c b/src/libponyc/ast/lexer.c
index f6d2771a..02b639c2 100644
--- a/src/libponyc/ast/lexer.c
+++ b/src/libponyc/ast/lexer.c
@@ -27,6 +27,7 @@ struct lexer_t
   // Position of current token
   size_t token_line;
   size_t token_pos;
+  size_t token_len;
 
   // Buffer containing current token text
   char* buffer;
@@ -363,7 +364,7 @@ static void append_to_token(lexer_t* lexer, char c)
 static token_t* make_token(lexer_t* lexer, token_id id)
 {
   token_t* t = token_new(id);
-  token_set_pos(t, lexer->source, lexer->token_line, lexer->token_pos);
+  token_set_pos(t, lexer->source, lexer->token_line, lexer->token_pos, lexer->token_len);
   return t;
 }
 
@@ -1299,6 +1300,7 @@ token_t* lexer_next(lexer_t* lexer)
   {
     lexer->token_line = lexer->line;
     lexer->token_pos = lexer->pos;
+    lexer->token_len = lexer->len;
     lexer->buflen = 0;
 
     if(is_eof(lexer))
diff --git a/src/libponyc/ast/parserapi.c b/src/libponyc/ast/parserapi.c
index 590013c7..a90860c9 100644
--- a/src/libponyc/ast/parserapi.c
+++ b/src/libponyc/ast/parserapi.c
@@ -35,7 +35,7 @@ static void fetch_next_lexer_token(parser_t* parser, bool free_last_token)
   {
     // Use location of last token for EOF to get better error reporting
     token_set_pos(new_token, token_source(old_token),
-      token_line_number(old_token), token_line_position(old_token));
+      token_line_number(old_token), token_line_position(old_token), token_line_length(old_token));
   }
 
   if(old_token != NULL)
@@ -176,7 +176,7 @@ static void process_deferred_ast(parser_t* parser, rule_state_t* state)
   if(state->deferred)
   {
     token_t* deferred_token = token_new(state->deferred_id);
-    token_set_pos(deferred_token, parser->source, state->line, state->pos);
+    token_set_pos(deferred_token, parser->source, state->line, state->pos, state->len);
     state->ast = ast_token(deferred_token);
     state->deferred = false;
   }
@@ -224,6 +224,7 @@ void add_deferrable_ast(parser_t* parser, rule_state_t* state, token_id id,
     state->deferred_id = id;
     state->line = token_line_number(token_for_pos);
     state->pos = token_line_position(token_for_pos);
+    state->len = token_line_length(token_for_pos);
     return;
   }
 
diff --git a/src/libponyc/ast/parserapi.h b/src/libponyc/ast/parserapi.h
index e11b09bd..507c1651 100644
--- a/src/libponyc/ast/parserapi.h
+++ b/src/libponyc/ast/parserapi.h
@@ -91,7 +91,7 @@ typedef struct rule_state_t
   bool scope;           // Is this rule a scope
   bool deferred;        // Do we have a deferred AST node
   token_id deferred_id; // ID of deferred AST node
-  size_t line, pos;     // Location to claim deferred node is from
+  size_t line, pos, len;     // Location to claim deferred node is from
 } rule_state_t;
 
 
@@ -158,7 +158,7 @@ bool parse(ast_t* package, source_t* source, rule_t start, const char* expected,
   { \
     (void)out_builder; \
     rule_state_t state = {#rule, NULL, NULL, rule_desc, NULL, TK_LEX_ERROR, \
-      false, false, false, TK_NONE, 0, 0}
+      false, false, false, TK_NONE, 0, 0, 0}
 
 
 /** Specify a restart point.
diff --git a/src/libponyc/ast/token.c b/src/libponyc/ast/token.c
index a838532b..a63b9337 100644
--- a/src/libponyc/ast/token.c
+++ b/src/libponyc/ast/token.c
@@ -28,6 +28,7 @@ struct token_t
     lexint_t integer;
   };
 
+  size_t len;
 #ifndef PONY_NDEBUG
   bool frozen;
 #endif
@@ -298,6 +299,13 @@ size_t token_line_position(token_t* token)
 }
 
 
+size_t token_line_length(token_t* token)
+{
+  pony_assert(token != NULL);
+  return token->len;
+}
+
+
 // Write accessors
 
 void token_set_id(token_t* token, token_id id)
@@ -341,7 +349,7 @@ void token_set_int(token_t* token, lexint_t* value)
 }
 
 
-void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos)
+void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos, size_t len)
 {
   pony_assert(token != NULL);
   pony_assert(!token->frozen);
@@ -351,6 +359,7 @@ void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos)
 
   token->line = line;
   token->pos = pos;
+  token->len = len;
 }
 
 // Serialisation
diff --git a/src/libponyc/ast/token.h b/src/libponyc/ast/token.h
index 497c9354..7a87953e 100644
--- a/src/libponyc/ast/token.h
+++ b/src/libponyc/ast/token.h
@@ -359,6 +359,9 @@ size_t token_line_number(token_t* token);
 /// Report the position within the line that the given token was found at
 size_t token_line_position(token_t* token);
 
+/// Report the length within the line that the given token takes
+size_t token_line_length(token_t* token);
+
 /// Report whether debug info should be generated.
 bool token_debug(token_t* token);
 
@@ -388,7 +391,7 @@ void token_set_int(token_t* token, lexint_t* value);
 /// Set the given token's position within its source file and optionally the
 /// source file.
 /// Set source to NULL to keep current file.
-void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos);
+void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos, size_t len);
 
 /// Set whether debug info should be generated.
 void token_set_debug(token_t* token, bool state);
diff --git a/src/libponyc/pass/names.c b/src/libponyc/pass/names.c
index b60287de..a54bafba 100644
--- a/src/libponyc/pass/names.c
+++ b/src/libponyc/pass/names.c
@@ -157,7 +157,7 @@ static bool names_typealias(pass_opt_t* opt, ast_t** astp, ast_t* def,
 
   // Maintain the position info of the original reference to aid error
   // reporting.
-  ast_setpos(r_alias, ast_source(ast), ast_line(ast), ast_pos(ast));
+  ast_setpos(r_alias, ast_source(ast), ast_line(ast), ast_pos(ast), ast_len(ast));
 
   // Replace this with the alias.
   ast_replace(astp, r_alias);
diff --git a/src/libponyc/reach/reach.c b/src/libponyc/reach/reach.c
index 62204595..cfb60077 100644
--- a/src/libponyc/reach/reach.c
+++ b/src/libponyc/reach/reach.c
@@ -675,7 +675,7 @@ static void add_fields(reach_t* r, reach_type_t* t, pass_opt_t* opt)
         bool embed = t->fields[index].embed = ast_id(member) == TK_EMBED;
         t->fields[index].ast = reify(ast_type(member), typeparams, typeargs,
           opt, true);
-        ast_setpos(t->fields[index].ast, NULL, ast_line(name), ast_pos(name));
+        ast_setpos(t->fields[index].ast, NULL, ast_line(name), ast_pos(name), ast_len(name));
         t->fields[index].type = add_type(r, type, opt);
 
         if(embed && !has_finaliser && !needs_finaliser)

Note that we probably want to change the various lengths that do exist elsewhere in touched datastructures to not be size_t if we are concerned about the memory usage of adding size_t in place where it needs to go.

This diff doesn't include the logic that would be needed to reach into the source and based on line number, line position and length, grab the correct information.

This was quickly done so our 80 column line limit is violated in places and for a real version, we would want to address that.

Nor does it add the fairly straightforward dropping into location in code_block_doc_params to handle the TK_CALL use.

Nor does it address how the memory that we would need to memcpy in is freed and by whom.

It is purely the diff I needed to quickly get the data in place to consider doing the "use original source" approach.

IMO, we do want to do this.

We should disucuss more and once we have a plan, create a new issue that has the plan and lists the tickets that doing this would allow to be closed with additional work.

SeanTAllen avatar Feb 23 '22 22:02 SeanTAllen

I see no objections to the above diff.

jemc avatar Apr 12 '22 18:04 jemc