zookeeper icon indicating copy to clipboard operation
zookeeper copied to clipboard

ZOOKEEPER-4614: Network event can cause C Client to "forget" to SASL-authenticate

Open ztzg opened this issue 2 years ago • 1 comments

Network hiccups can, if they occur at the "right" point during a SASL authentication sequence, cause the client to lose its connection without putting the itself in ZOO_AUTH_FAILED_STATE.

This is not necessarily a problem; if not in ZOO_AUTH_FAILED_STATE, the client simply tries to reconnect and restart the authentication.

Without this patch, however, a situation such as the one of the first paragraph, but occurring during the very last request/response cycle of a Kerberos/GSSAPI negotiation, causes the internal 'is_last_packet' flag not to be properly reset.

The following connection attempt then cuts its authentication sequence short, effectively "forgetting" to authenticate with the server. This generally causes subsequent requests to fail due to either global configuration and/or ACLs.

Note that:

  1. The window is very small, so this bug is not expected to happen very often under normal conditions;

  2. This does not give undue privileges to clients, it just prevents them from accessing protected members and/or nodes.

ztzg avatar Sep 22 '22 10:09 ztzg

Difficult to produce a reasonable test for this change, as it requires Kerberos and control over events. I was able to reproduce and verify the fix "interactively" with a hacked C library and client—see patch below—by:

  1. Successfully connecting and Kerberos-authenticating with a server;
  2. Successfully reading an ACL-protected node;
  3. Issuing repro_hack;
  4. Successfully reconnecting to the server, but observing the skipped authentication;
  5. Failing to read the same ACL-protected node.

diff --git a/zookeeper-client/zookeeper-client-c/include/zookeeper.h b/zookeeper-client/zookeeper-client-c/include/zookeeper.h
index 7d889f605..7b519391e 100644
--- a/zookeeper-client/zookeeper-client-c/include/zookeeper.h
+++ b/zookeeper-client/zookeeper-client-c/include/zookeeper.h
@@ -1669,6 +1669,8 @@ ZOOAPI int zoo_amulti(zhandle_t *zh, int count, const zoo_op_t *ops,
  */
 ZOOAPI const char* zerror(int c);
 
+ZOOAPI void zoo_trigger_repro_hack(zhandle_t *zh);
+
 /**
  * \brief specify application credentials.
  *
diff --git a/zookeeper-client/zookeeper-client-c/src/cli.c b/zookeeper-client/zookeeper-client-c/src/cli.c
index 823ed72a5..da5f40a30 100644
--- a/zookeeper-client/zookeeper-client-c/src/cli.c
+++ b/zookeeper-client/zookeeper-client-c/src/cli.c
@@ -354,6 +354,7 @@ void processline(const char *line) {
       fprintf(stderr, "    reconfig [-file <path> | -members <serverId=host:port1:port2;port3>,... | "
                           " -add <serverId=host:port1:port2;port3>,... | -remove <serverId>,...] [-version <version>]\n");
       fprintf(stderr, "    quit\n");
+      fprintf(stderr, "    repro_hack\n");
       fprintf(stderr, "\n");
       fprintf(stderr, "    prefix the command with the character 'a' to run the command asynchronously.\n");
       fprintf(stderr, "    run the 'verbose' command to toggle verbose logging.\n");
@@ -697,6 +698,8 @@ void processline(const char *line) {
     } else if (startsWith(line, "quit")) {
         fprintf(stderr, "Quitting...\n");
         shutdownThisThing=1;
+    } else if (startsWith(line, "repro_hack")) {
+        zoo_trigger_repro_hack(zh);
     } else if (startsWith(line, "od")) {
         const char val[]="fire off";
         fprintf(stderr, "Overdosing...\n");
diff --git a/zookeeper-client/zookeeper-client-c/src/zookeeper.c b/zookeeper-client/zookeeper-client-c/src/zookeeper.c
index 0dac4c3d0..6bd7bcfdb 100644
--- a/zookeeper-client/zookeeper-client-c/src/zookeeper.c
+++ b/zookeeper-client/zookeeper-client-c/src/zookeeper.c
@@ -2946,6 +2946,8 @@ static int process_sasl_response(zhandle_t *zh, char *buffer, int len)
 
 #endif /* HAVE_CYRUS_SASL_H */
 
+int repro_hack;
+
 static int check_events(zhandle_t *zh, int events)
 {
     if (zh->fd->sock == -1)
@@ -3021,6 +3023,13 @@ static int check_events(zhandle_t *zh, int events)
                     rc = process_sasl_response(zh, zh->input_buffer->buffer, zh->input_buffer->curr_offset);
                     free_buffer(zh->input_buffer);
                     zh->input_buffer = 0;
+                    if (rc == ZOK && zh->sasl_client->is_last_packet && repro_hack) {
+                        rc = repro_hack;
+                        repro_hack = 0;
+                        LOG_ERROR(LOGCALLBACK(zh), "REPRO HACK: Simulating connection fail at last packet.");
+                        handle_error(zh, rc);
+                        return rc;
+                    }
                     if (rc < 0) {
                         zoo_sasl_mark_failed(zh);
                         return rc;
@@ -5698,3 +5707,10 @@ int zoo_aremove_all_watches(zhandle_t *zh, const char *path,
     return aremove_watches(
         zh, path, wtype, NULL, NULL, local, completion, data, 1);
 }
+
+void zoo_trigger_repro_hack(zhandle_t *zh)
+{
+    repro_hack = ZOPERATIONTIMEOUT;
+    LOG_ERROR(LOGCALLBACK(zh), "REPRO HACK: Setup.");
+    handle_error(zh, repro_hack);
+}

ztzg avatar Sep 22 '22 10:09 ztzg