From a16bb2fbd01c2b5c77dde936bf8112cd6ea8c8e9 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Nov 2017 15:17:15 +1100 Subject: [PATCH 1/3] ctdb-takeover: Refactor code to send tickle lists for all public IPs BUG: https://bugzilla.samba.org/show_bug.cgi?id=13154 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 2b253f6b1bc4e765f3fcb614a3b67b14084a625d) --- ctdb/server/ctdb_takeover.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index c359d664fc2..d8c27aee766 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1995,43 +1995,53 @@ static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb, return ret; } - -/* - perform tickle updates if required - */ -static void ctdb_update_tcp_tickles(struct tevent_context *ev, - struct tevent_timer *te, - struct timeval t, void *private_data) +static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb, + bool force) { - struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); - int ret; struct ctdb_vnn *vnn; + int ret; - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - /* we only send out updates for public addresses that + for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { + /* we only send out updates for public addresses that we have taken over */ if (ctdb->pnn != vnn->pnn) { continue; } + /* We only send out the updates if we need to */ - if (!vnn->tcp_update_needed) { + if (!force && !vnn->tcp_update_needed) { continue; } + ret = ctdb_send_set_tcp_tickles_for_ip(ctdb, &vnn->public_address, vnn->tcp_array); if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n", - ctdb_addr_to_str(&vnn->public_address))); + D_ERR("Failed to send the tickle update for ip %s\n", + ctdb_addr_to_str(&vnn->public_address)); + vnn->tcp_update_needed = true; } else { - DEBUG(DEBUG_INFO, - ("Sent tickle update for public address %s\n", - ctdb_addr_to_str(&vnn->public_address))); + D_INFO("Sent tickle update for ip %s\n", + ctdb_addr_to_str(&vnn->public_address)); vnn->tcp_update_needed = false; } } +} + +/* + perform tickle updates if required + */ +static void ctdb_update_tcp_tickles(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type( + private_data, struct ctdb_context); + + ctdb_send_set_tcp_tickles_for_all(ctdb, false); + tevent_add_timer(ctdb->ev, ctdb->tickle_update_context, timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), ctdb_update_tcp_tickles, ctdb); -- 2.15.0 From 73d6fb030a9dee2d25dc617068bdbdbb74b4b5a9 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Nov 2017 15:37:39 +1100 Subject: [PATCH 2/3] ctdb-takeover: Send tcp tickles immediately on STARTUP control BUG: https://bugzilla.samba.org/show_bug.cgi?id=13154 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 73e261b48c4abc91e00775ac7437752c9640e5bd) --- ctdb/server/ctdb_takeover.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index d8c27aee766..a7aa8db5372 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1495,24 +1495,23 @@ int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata) } +static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb, + bool force); + /* Called when another daemon starts - causes all tickles for all public addresses we are serving to be sent to the new node on the - next check. This actually causes the next scheduled call to - tdb_update_tcp_tickles() to update all nodes. This is simple and + next check. This actually causes the tickles to be sent to the + other node immediately. In case there is an error, the periodic + timer will send the updates on timer event. This is simple and doesn't require careful error handling. */ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn) { - struct ctdb_vnn *vnn; - DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n", (unsigned long) pnn)); - for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { - vnn->tcp_update_needed = true; - } - + ctdb_send_set_tcp_tickles_for_all(ctdb, true); return 0; } -- 2.15.0 From 790718eae255728aa311bd19925676e4c714a434 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Nov 2017 15:27:52 +1100 Subject: [PATCH 3/3] ctdb-daemon: Send STARTUP control after startup event BUG: https://bugzilla.samba.org/show_bug.cgi?id=13154 STARTUP control is primarily used to synchronise tcp tickles from running nodes to a node which has just started up. Earlier STARTUP control was sent (using BROADCAST_ALL) after setup event. Once the other nodes in the cluster connected to this node, the queued up messages would be sent and the tcp tickles would get synchronised. Recent fix to drop messages to disconnected or not-yet-connected nodes, the STARTUP control was never sent to the remote nodes and the tcp tickles did not get synchronised. To fix this problem send the STARTUP control (using BROADCAST_CONNECTED) after startup event. By this time all the running nodes in the cluster are connected. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Thu Nov 30 15:29:48 CET 2017 on sn-devel-144 (cherry picked from commit d7a5cd589b7b16d625dbc64dac21a1384519e32b) --- ctdb/server/ctdb_daemon.c | 6 ------ ctdb/server/ctdb_monitor.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index de38542083b..3abbee45eca 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -1081,12 +1081,6 @@ static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status, } ctdb_run_notification_script(ctdb, "setup"); - /* tell all other nodes we've just started up */ - ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, - 0, CTDB_CONTROL_STARTUP, 0, - CTDB_CTRL_FLAG_NOREPLY, - tdb_null, NULL, NULL); - /* Start the recovery daemon */ if (ctdb_start_recoverd(ctdb) != 0) { DEBUG(DEBUG_ALERT,("Failed to start recovery daemon\n")); diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index 738acb1167b..1864887fc61 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -243,6 +243,12 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p ctdb->monitor->monitoring_mode = CTDB_MONITORING_ENABLED; + /* tell all other nodes we've just started up */ + ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, + 0, CTDB_CONTROL_STARTUP, 0, + CTDB_CTRL_FLAG_NOREPLY, + tdb_null, NULL, NULL); + tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context, timeval_current_ofs(ctdb->monitor->next_interval, 0), ctdb_check_health, ctdb); -- 2.15.0