From deac868c2888bbd6ad1c5ec34071267ced2ca5b0 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Nov 2017 15:17:15 +1100 Subject: [PATCH 1/3] ctdb-takeover: Refactor code to send tickle lists for all public IPs BUG: https://bugzilla.samba.org/show_bug.cgi?id=13154 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 2b253f6b1bc4e765f3fcb614a3b67b14084a625d) --- ctdb/server/ctdb_takeover.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 513da63cd38..b75dfb07e90 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1995,43 +1995,53 @@ static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb, return ret; } - -/* - perform tickle updates if required - */ -static void ctdb_update_tcp_tickles(struct tevent_context *ev, - struct tevent_timer *te, - struct timeval t, void *private_data) +static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb, + bool force) { - struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); - int ret; struct ctdb_vnn *vnn; + int ret; - for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - /* we only send out updates for public addresses that + for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { + /* we only send out updates for public addresses that we have taken over */ if (ctdb->pnn != vnn->pnn) { continue; } + /* We only send out the updates if we need to */ - if (!vnn->tcp_update_needed) { + if (!force && !vnn->tcp_update_needed) { continue; } + ret = ctdb_send_set_tcp_tickles_for_ip(ctdb, &vnn->public_address, vnn->tcp_array); if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n", - ctdb_addr_to_str(&vnn->public_address))); + D_ERR("Failed to send the tickle update for ip %s\n", + ctdb_addr_to_str(&vnn->public_address)); + vnn->tcp_update_needed = true; } else { - DEBUG(DEBUG_INFO, - ("Sent tickle update for public address %s\n", - ctdb_addr_to_str(&vnn->public_address))); + D_INFO("Sent tickle update for ip %s\n", + ctdb_addr_to_str(&vnn->public_address)); vnn->tcp_update_needed = false; } } +} + +/* + perform tickle updates if required + */ +static void ctdb_update_tcp_tickles(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type( + private_data, struct ctdb_context); + + ctdb_send_set_tcp_tickles_for_all(ctdb, false); + tevent_add_timer(ctdb->ev, ctdb->tickle_update_context, timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), ctdb_update_tcp_tickles, ctdb); -- 2.15.0 From e867d0665c12c56e9c792ea084c43165d2ea5719 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Nov 2017 15:37:39 +1100 Subject: [PATCH 2/3] ctdb-takeover: Send tcp tickles immediately on STARTUP control BUG: https://bugzilla.samba.org/show_bug.cgi?id=13154 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 73e261b48c4abc91e00775ac7437752c9640e5bd) --- ctdb/server/ctdb_takeover.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index b75dfb07e90..b7fab0996d1 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1495,24 +1495,23 @@ int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata) } +static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb, + bool force); + /* Called when another daemon starts - causes all tickles for all public addresses we are serving to be sent to the new node on the - next check. This actually causes the next scheduled call to - tdb_update_tcp_tickles() to update all nodes. This is simple and + next check. This actually causes the tickles to be sent to the + other node immediately. In case there is an error, the periodic + timer will send the updates on timer event. This is simple and doesn't require careful error handling. */ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn) { - struct ctdb_vnn *vnn; - DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n", (unsigned long) pnn)); - for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { - vnn->tcp_update_needed = true; - } - + ctdb_send_set_tcp_tickles_for_all(ctdb, true); return 0; } -- 2.15.0 From 4b211a1582a1a7ffb547466835a512a6cb12cab3 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 20 Nov 2017 15:27:52 +1100 Subject: [PATCH 3/3] ctdb-daemon: Send STARTUP control after startup event BUG: https://bugzilla.samba.org/show_bug.cgi?id=13154 STARTUP control is primarily used to synchronise tcp tickles from running nodes to a node which has just started up. Earlier STARTUP control was sent (using BROADCAST_ALL) after setup event. Once the other nodes in the cluster connected to this node, the queued up messages would be sent and the tcp tickles would get synchronised. Recent fix to drop messages to disconnected or not-yet-connected nodes, the STARTUP control was never sent to the remote nodes and the tcp tickles did not get synchronised. To fix this problem send the STARTUP control (using BROADCAST_CONNECTED) after startup event. By this time all the running nodes in the cluster are connected. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Thu Nov 30 15:29:48 CET 2017 on sn-devel-144 (cherry picked from commit d7a5cd589b7b16d625dbc64dac21a1384519e32b) --- ctdb/server/ctdb_daemon.c | 6 ------ ctdb/server/ctdb_monitor.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index 4d6987a121e..80f205e3ed5 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -1085,12 +1085,6 @@ static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status, } ctdb_run_notification_script(ctdb, "setup"); - /* tell all other nodes we've just started up */ - ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, - 0, CTDB_CONTROL_STARTUP, 0, - CTDB_CTRL_FLAG_NOREPLY, - tdb_null, NULL, NULL); - /* Start the recovery daemon */ if (ctdb_start_recoverd(ctdb) != 0) { DEBUG(DEBUG_ALERT,("Failed to start recovery daemon\n")); diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index 738acb1167b..1864887fc61 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -243,6 +243,12 @@ static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p ctdb->monitor->monitoring_mode = CTDB_MONITORING_ENABLED; + /* tell all other nodes we've just started up */ + ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, + 0, CTDB_CONTROL_STARTUP, 0, + CTDB_CTRL_FLAG_NOREPLY, + tdb_null, NULL, NULL); + tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context, timeval_current_ofs(ctdb->monitor->next_interval, 0), ctdb_check_health, ctdb); -- 2.15.0