From 25632f5ab68cf466e68185f4c62f0277b0f3106c Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:26:19 +0100 Subject: [PATCH 1/7] ctdb-daemon: ensure restart() callback is called in half-connected state If NODE_FLAGS_DISCONNECTED is set the node can be in half-connected state. With this change we ensure to restart the transport for this case. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme --- ctdb/server/ctdb_server.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 0d5451d62a8..0bbd3751b51 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -301,6 +301,12 @@ done: */ void ctdb_node_dead(struct ctdb_node *node) { + if (node->ctdb->methods == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); + return; + } + + node->ctdb->methods->restart(node); if (node->flags & NODE_FLAGS_DISCONNECTED) { DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", node->ctdb->name, node->name, @@ -315,13 +321,6 @@ void ctdb_node_dead(struct ctdb_node *node) DEBUG(DEBUG_ERR,("%s: node %s is dead: %u connected\n", node->ctdb->name, node->name, node->ctdb->num_connected)); ctdb_daemon_cancel_controls(node->ctdb, node); - - if (node->ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); - return; - } - - node->ctdb->methods->restart(node); } /* -- 2.25.1 From adfc8ce7030e6f8fa3180a7ca602f36ad3b38e16 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 1 Mar 2020 16:40:41 +1100 Subject: [PATCH 2/7] SQ: more logical whitespace, debug modernisation Signed-off-by: Martin Schwenke --- ctdb/server/ctdb_server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 0bbd3751b51..1470b00dba5 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -302,11 +302,11 @@ done: void ctdb_node_dead(struct ctdb_node *node) { if (node->ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Can not restart transport while shutting down daemon.\n")); + DBG_ERR("Can not restart transport while shutting down\n"); return; } - node->ctdb->methods->restart(node); + if (node->flags & NODE_FLAGS_DISCONNECTED) { DEBUG(DEBUG_INFO,("%s: node %s is already marked disconnected: %u connected\n", node->ctdb->name, node->name, -- 2.25.1 From 315cbe933b2332f6c23295b1ab9cd35ba9d841e7 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Sat, 29 Feb 2020 15:49:28 +0000 Subject: [PATCH 3/7] ctdb-tcp: move free of inbound queue to TCP restart Since commit 77deaadca8e8dbc3c92ea16893099c72f6dc874e, a nodeA which had previously accepted a connection from nodeB (where nodeB dies e.g. as as result of fencing) when nodeB attempts to connect again after restarting is always rejected with ctdb_listen_event: Incoming queue active, rejecting connection from w.x.y.z messages. Consolidate dead node handling in the TCP restart handling. BUG: https://attachments.samba.org/attachment.cgi?id=15826 Signed-off-by: Noel Power --- ctdb/tcp/tcp_init.c | 2 +- ctdb/tcp/tcp_io.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 559ad8691d0..dbf6c4b9bcf 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) node->transport_data, struct ctdb_tcp_node); DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); - + TALLOC_FREE(tnode->in_queue); ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index df9ca02b413..bcb18fbf300 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -75,7 +75,6 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) return; failed: - TALLOC_FREE(tnode->in_queue); node->ctdb->upcalls->node_dead(node); TALLOC_FREE(data); -- 2.25.1 From 5ec227f54bf0bebad836cac0f2740bc08c55986b Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:13:12 +0100 Subject: [PATCH 4/7] ctdb-tcp: always call node_dead() upcall in ctdb_tcp_tnode_cb() ctdb_tcp_tnode_cb() is called when we receive data on the outgoing connection. This can happen when we get an EOF on the connection because the other side as closed. In this case data will be NULL. It would also be called if we received data from the peer. In this case data will not be NULL. The latter case is a fatal error though and we already call ctdb_tcp_stop_connection() for this case as well, which means even though the node is not fully connected anymore, by not calling the node_dead() upcall NODE_FLAGS_DISCONNECTED will not be set. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme --- ctdb/tcp/tcp_connect.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 559442f14bf..ea98e6126a6 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -65,9 +65,7 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) struct ctdb_tcp_node *tnode = talloc_get_type( node->transport_data, struct ctdb_tcp_node); - if (data == NULL) { - node->ctdb->upcalls->node_dead(node); - } + node->ctdb->upcalls->node_dead(node); ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, -- 2.25.1 From 5a4c860a1e488553dfed2aa9395d80e5a680b90e Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 12:28:20 +0100 Subject: [PATCH 5/7] ctdb-tcp: remove redundant call to ctdb_tcp_stop_connection() This is now reliably handled by the node_dead() upcall. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme --- ctdb/tcp/tcp_connect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index ea98e6126a6..25efed2ec0b 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -67,7 +67,6 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) node->ctdb->upcalls->node_dead(node); - ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, timeval_current_ofs(3, 0), ctdb_tcp_node_connect, node); -- 2.25.1 From 6076ab6a29b0468a5b4647037f16daf135561d2b Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Fri, 28 Feb 2020 11:36:00 +0100 Subject: [PATCH 6/7] ctdb-tcp: rename ctdb_tcp_stop_connection() to ctdb_tcp_stop_outgoing() No change in behaviour. This makes the code self-documenting. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme --- ctdb/tcp/ctdb_tcp.h | 2 +- ctdb/tcp/tcp_connect.c | 12 ++++++------ ctdb/tcp/tcp_init.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h index daabad74297..095056e8544 100644 --- a/ctdb/tcp/ctdb_tcp.h +++ b/ctdb/tcp/ctdb_tcp.h @@ -48,7 +48,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, struct timeval t, void *private_data); void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); -void ctdb_tcp_stop_connection(struct ctdb_node *node); +void ctdb_tcp_stop_outgoing(struct ctdb_node *node); #define CTDB_TCP_ALIGNMENT 8 diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 25efed2ec0b..582f7e4ccfb 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -38,9 +38,9 @@ #include "ctdb_tcp.h" /* - stop any connecting (established or pending) to a node + stop any outgoing connection (established or pending) to a node */ -void ctdb_tcp_stop_connection(struct ctdb_node *node) +void ctdb_tcp_stop_outgoing(struct ctdb_node *node) { struct ctdb_tcp_node *tnode = talloc_get_type( node->transport_data, struct ctdb_tcp_node); @@ -94,7 +94,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len); if (ret != 0 || error != 0) { - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), ctdb_tcp_node_connect, node); @@ -131,7 +131,7 @@ static void ctdb_node_connect_write(struct tevent_context *ev, node->name); if (tnode->out_queue == NULL) { DBG_ERR("Failed to set up outgoing queue\n"); - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), @@ -171,7 +171,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, ctdb_sock_addr sock_out; int ret; - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); sock_out = node->address; @@ -255,7 +255,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, return; failed: - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index dbf6c4b9bcf..c6acb166807 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -122,7 +122,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); TALLOC_FREE(tnode->in_queue); - ctdb_tcp_stop_connection(node); + ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, timeval_zero(), -- 2.25.1 From cf7ac5f784fcede226852fc5684526e9c299ac63 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 29 Feb 2020 11:54:51 +0100 Subject: [PATCH 7/7] ctdb-tcp: add ctdb_tcp_stop_incoming() No change in behaviour. This makes the code self-documenting. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14295 Signed-off-by: Ralph Boehme Signed-off-by: Martin Schwenke --- ctdb/tcp/ctdb_tcp.h | 1 + ctdb/tcp/tcp_connect.c | 10 ++++++++++ ctdb/tcp/tcp_init.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h index 095056e8544..cb8d66fa5dc 100644 --- a/ctdb/tcp/ctdb_tcp.h +++ b/ctdb/tcp/ctdb_tcp.h @@ -49,6 +49,7 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args); void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data); void ctdb_tcp_stop_outgoing(struct ctdb_node *node); +void ctdb_tcp_stop_incoming(struct ctdb_node *node); #define CTDB_TCP_ALIGNMENT 8 diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 582f7e4ccfb..79a093f8b8c 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -54,6 +54,16 @@ void ctdb_tcp_stop_outgoing(struct ctdb_node *node) } } +/* + stop incoming connection to a node + */ +void ctdb_tcp_stop_incoming(struct ctdb_node *node) +{ + struct ctdb_tcp_node *tnode = talloc_get_type( + node->transport_data, struct ctdb_tcp_node); + + TALLOC_FREE(tnode->in_queue); +} /* called when a complete packet has come in - should not happen on this socket diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index c6acb166807..97ebe1d887a 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -121,7 +121,7 @@ static void ctdb_tcp_restart(struct ctdb_node *node) node->transport_data, struct ctdb_tcp_node); DEBUG(DEBUG_NOTICE,("Tearing down connection to dead node :%d\n", node->pnn)); - TALLOC_FREE(tnode->in_queue); + ctdb_tcp_stop_incoming(node); ctdb_tcp_stop_outgoing(node); tnode->connect_te = tevent_add_timer(node->ctdb->ev, tnode, -- 2.25.1