ctags icon indicating copy to clipboard operation
ctags copied to clipboard

C/C++: extern "c" will cause parse error

Open chongchai opened this issue 5 years ago • 7 comments

I find the definition will not be parsed in the #else and extern "C".

The sample code:

#ifdef __cplusplus
extern "C" {
#endif

#ifdef MAX
  void testMax1() {}
  #define NB1 11
#else
  void testMax2() {}
  #define NB2 22
#endif

#ifdef __cplusplus
}
#endif

#ifdef MAX
  void testMax3() {}
  #define NB3 33
#else
  void testMax4() {}
  #define NB4 44
#endif

I use following command to parse:

ctags.exe --output-format=json --langmap=c:+.inc --kinds-c=+plz --fields=+nie -o - D:\tmp\test\testIfMacro.c

Result is:

{"_type": "tag", "name": "NB1", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  #define NB1 /", "file": true, "line": 7, "kind": "macro", "end": 7}
{"_type": "tag", "name": "NB3", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  #define NB3 /", "file": true, "line": 19, "kind": "macro", "end": 19}
{"_type": "tag", "name": "NB4", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  #define NB4 /", "file": true, "line": 22, "kind": "macro", "end": 22}
{"_type": "tag", "name": "testMax1", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  void testMax1() {}$/", "line": 6, "typeref": "typename:void", "kind": "function", "end": 6}
{"_type": "tag", "name": "testMax3", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  void testMax3() {}$/", "line": 18, "typeref": "typename:void", "kind": "function", "end": 18}
{"_type": "tag", "name": "testMax4", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  void testMax4() {}$/", "line": 21, "typeref": "typename:void", "kind": "function", "end": 21}

The testMax2 and NB2 are not parsed, while testMax4 and NB4 are parsed. I thinkextern "C" will cause this error. Can you support this situation?

chongchai avatar Sep 17 '20 03:09 chongchai

I thinkextern "C" will cause this error. Can you support this situation?

It seems that your guessing is correct. Following change is for ignoring the code between "#ifdef __cplusplus ~ #endif. @pragmaware, how do you think about my approach?

$ git diff | cat
diff --git a/parsers/cpreprocessor.c b/parsers/cpreprocessor.c
index 668171f0..4ad6087d 100644
--- a/parsers/cpreprocessor.c
+++ b/parsers/cpreprocessor.c
@@ -953,10 +953,31 @@ static void directivePragma (int c)
 	Cpp.directive.state = DRCTV_NONE;
 }
 
+static bool isDefCondition (const int c, const char *condition)
+{
+	if (*condition == '\0')
+		return true;
+	else if (c == EOF)
+		return false;
+
+	if (*condition != '\0' && c == condition[0])
+	{
+		const int next = cppGetcFromUngetBufferOrFile ();
+		return isDefCondition (next, condition + 1);
+	}
+
+	return false;
+}
+
 static bool directiveIf (const int c)
 {
 	DebugStatement ( const bool ignore0 = isIgnore (); )
-	const bool ignore = pushConditional ((bool) (c != '0'));
+	bool firstBranchChosen = true;
+
+	if (c == '0' || isDefCondition (c, "__cplusplus"))
+		firstBranchChosen = false;
+
+	const bool ignore = pushConditional (firstBranchChosen);
 
 	Cpp.directive.state = DRCTV_NONE;
 	DebugStatement ( debugCppNest (true, Cpp.directive.nestLevel);
$ u-ctags --output-format=json --langmap=c:+.inc --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

u-ctags --output-format=json --langmap=c:+.inc --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

{"_type": "tag", "name": "NB1", "path": "/tmp/foo.c", "pattern": "/^  #define NB1 /", "file": true, "line": 7, "kind": "macro", "end": 7}
{"_type": "tag", "name": "NB2", "path": "/tmp/foo.c", "pattern": "/^  #define NB2 /", "file": true, "line": 10, "kind": "macro", "end": 10}
{"_type": "tag", "name": "NB3", "path": "/tmp/foo.c", "pattern": "/^  #define NB3 /", "file": true, "line": 19, "kind": "macro", "end": 19}
{"_type": "tag", "name": "NB4", "path": "/tmp/foo.c", "pattern": "/^  #define NB4 /", "file": true, "line": 22, "kind": "macro", "end": 22}
{"_type": "tag", "name": "testMax1", "path": "/tmp/foo.c", "pattern": "/^  void testMax1() {}$/", "line": 6, "typeref": "typename:void", "kind": "function", "end": 6}
{"_type": "tag", "name": "testMax2", "path": "/tmp/foo.c", "pattern": "/^  void testMax2() {}$/", "line": 9, "typeref": "typename:void", "kind": "function", "end": 9}
{"_type": "tag", "name": "testMax3", "path": "/tmp/foo.c", "pattern": "/^  void testMax3() {}$/", "line": 18, "typeref": "typename:void", "kind": "function", "end": 18}
{"_type": "tag", "name": "testMax4", "path": "/tmp/foo.c", "pattern": "/^  void testMax4() {}$/", "line": 21, "typeref": "typename:void", "kind": "function", "end": 21}

masatake avatar Sep 17 '20 05:09 masatake

Well, if it fixes the problem and doesn't break tests then it looks good.

However, I don't understand the relation between the #ifdef __cplusplus define and the #ifdef MAX one. They shouldn't influence each other in terms of choosing one or multiple branches... or I'm missing something?

pragmaware avatar Sep 17 '20 10:09 pragmaware

My analysis is that it is related to how c parser handles extern "C" { }.

$ cat /tmp/foo.c
cat /tmp/foo.c
extern "C" {

#ifdef MAX
  void testMax1() {}
  #define NB1 11
#else
  void testMax2() {}
  #define NB2 22
#endif

}
$ u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

NB1              macro         5 /tmp/foo.c       #define NB1 11
testMax1         function      4 /tmp/foo.c       void testMax1() {}

$ cat /tmp/bar.c 
cat /tmp/bar.c 
// extern "C" {

#ifdef MAX
  void testMax1() {}
  #define NB1 11
#else
  void testMax2() {}
  #define NB2 22
#endif

// }
$ u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/bar.c

u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/bar.c

NB1              macro         5 /tmp/bar.c       #define NB1 11
NB2              macro         8 /tmp/bar.c       #define NB2 22
testMax1         function      4 /tmp/bar.c       void testMax1() {}
testMax2         function      7 /tmp/bar.c       void testMax2() {}

If cpreprocessor parser suppresses the area #ifdef __cplusplus ~ #end, c parser doesn't read the line extern "C" { and }.

I have to add one more condition to the patch (https://github.com/universal-ctags/ctags/issues/2647#issuecomment-693859110). The condition is "if the client parser of cpreprocessor parser is c parser" or "if the client parser of cpreprocessor parser is not c++ parser".

masatake avatar Sep 17 '20 14:09 masatake

Though I still don't understand why. In theory the behaviour of an #ifdef should not be influenced by any previous #ifdef... ... it seems to be more of a bug in the handling of the second #ifdef, not the first one...

pragmaware avatar Sep 18 '20 03:09 pragmaware

I found this is nothing to do with extern "C" { ... }. I can reproduce the behavior with struct s { ... }:

[yamato@control]~/var/ctags-github% cat /tmp/f.c
cat /tmp/f.c
struct s {
#ifdef X
  int i;
#else
  int j;
#endif  
};
[yamato@control]~/var/ctags-github% ./ctags -o - /tmp/f.c
./ctags -o - /tmp/f.c
i	/tmp/f.c	/^  int i;$/;"	m	language:C	struct:s	typeref:typename:int	file:
s	/tmp/f.c	/^struct s {$/;"	s	language:C	file:
[yamato@control]~/var/ctags-github% cat /tmp/g.c
cat /tmp/g.c

#ifdef X
  int i;
#else
  int j;
#endif  

[yamato@control]~/var/ctags-github% ./ctags -o - /tmp/g.c
./ctags -o - /tmp/g.c
i	/tmp/g.c	/^  int i;$/;"	v	language:C	typeref:typename:int
j	/tmp/g.c	/^  int j;$/;"	v	language:C	typeref:typename:int
[yamato@control]~/var/ctags-github% 

masatake avatar Sep 19 '20 06:09 masatake

I can add another testcase for thier behavior. Local variable In function body like follows

[yklhard@gotpc12]$ cat a.c
void test() {
#ifdef X
  int i = 1;
#else
  int j = 2;
#endif 
}
[yklhard@gotpc12]$ ./ctags-src/ctags --kinds-c=+l -o - a.c
i	a.c	/^  int i = 1;$/;"	l	function:test	typeref:typename:int	file:
test	a.c	/^void test() {$/;"	f	typeref:typename:void
[yklhard@gotpc12]$ cat b.c
#ifdef X
  int i = 1;
#else
  int j = 2;
#endif 
[yklhard@gotpc12]$ ./ctags-src/ctags --kinds-c=+l -o - b.c
i	b.c	/^  int i = 1;$/;"	v	typeref:typename:int
j	b.c	/^  int j = 2;$/;"	v	typeref:typename:int

chongchai avatar Sep 22 '20 06:09 chongchai

Seems that we should not call cppBeginStatement() here https://github.com/universal-ctags/ctags/blob/master/parsers/cxx/cxx_parser_block.c#L262 :

  • the fact that we are inside some code block (function, struct, namespace, extern "C", etc.) is not a good enough reason to skip (resolve) branches.
  • this change does not break any test

ArcsinX avatar Apr 07 '21 08:04 ArcsinX