From d2c52dd9fd5d78b7fe18b13715860789f7483f69 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Sat, 29 Feb 2020 15:49:28 +0000 Subject: [PATCH 1/3] ctdb/tcp: initial bug fix, on it's own withough connection stop/start rework consoladate node_dead handling, tear down incomming connection in just one place e.g. TALLOC_FREE(node->in_queue) in node_dead upcall. Bug: https://attachments.samba.org/attachment.cgi?id=15826 --- ctdb/tcp/tcp_init.c | 2 +- ctdb/tcp/tcp_io.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 559ad8691d0..dbf6c4b9bcf 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) node->transport_data, struct ctdb_tcp_node); DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); - + TALLOC_FREE(tnode->in_queue); ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index df9ca02b413..bcb18fbf300 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -75,7 +75,6 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) return; failed: - TALLOC_FREE(tnode->in_queue); node->ctdb->upcalls->node_dead(node); TALLOC_FREE(data); -- 2.16.4 From 455b1e845febdc3f6ce397903bd4b69c301d597b Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:13:12 +0100 Subject: [PATCH 2/3] ctdb: always call node_dead_() upcall in ctdb_tcp_tnode_cb() ctdb_tcp_tnode_cb() is called when we receive data on the outgoing connection. This can happen when we get an EOF on the connection because the other side as closed. In this case data will be NULL. It would also be called if we received data from the peer. In this case data will not be NULL. The latter case is a fatal error though and we already call ctdb_tcp_stop_connection() for this case as well, which means even though the node is not fully connected anymore, by not calling the node_dead() upcall NODE_FLAGS_DISCONNECTED will not be set. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme --- ctdb/tcp/tcp_connect.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 559442f14bf..ea98e6126a6 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -65,9 +65,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) struct ctdb_tcp_node *tnode = talloc_get_type( node->transport_data, struct ctdb_tcp_node); - if (data == NULL) { - node->ctdb->upcalls->node_dead(node); - } + node->ctdb->upcalls->node_dead(node); ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, -- 2.16.4 From 5660d78a7d4538b85f9eb9e5f89cbfcd2e2b919d Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:26:19 +0100 Subject: [PATCH 3/3] ctdb: ctdb_node_dead: ensure restart() callback is called in half-connected state If NODE_FLAGS_DISCONNECTED is set the node can be in half-connected state. With this change we ensure to restart the transport for this case. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme --- ctdb/server/ctdb_server.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 0d5451d62a8..0bbd3751b51 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -301,6 +301,12 @@ done: */ void ctdb_node_dead(struct ctdb_node *node) { + if (node->ctdb->methods == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); + return; + } + + node->ctdb->methods->restart(node); if (node->flags & NODE_FLAGS_DISCONNECTED) { DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", node->ctdb->name, node->name, @@ -315,13 +321,6 @@ void ctdb_node_dead(struct ctdb_node *node) DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n", node->ctdb->name, node->name, node->ctdb->num_connected)); ctdb_daemon_cancel_controls(node->ctdb, node); - - if (node->ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); - return; - } - - node->ctdb->methods->restart(node); } /* -- 2.16.4