From fc5c1fe2e08405674273adaea5965830ccea5e74 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Jan 2022 18:09:47 +1100 Subject: [PATCH 1/6] ctdb-recoverd: Always cancel election in progress Election-in-progress is set by unknown leader broadcast, so needs to be cleared in all cases when election completes. This was seen in a case where the leader node stalled, so didn't send leader broadcasts for some time. The node continued to hold the cluster lock, so another node could not become leader. However, after the node returned to normal it still did not send leader broadcasts because election-in-progress was never cleared. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 188a9021565bc2c1bec1d7a4830d6f47cdbc44a9) --- ctdb/server/ctdb_recoverd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index cc239959c56..7a13339bf37 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1836,7 +1836,7 @@ static void cluster_lock_election(struct ctdb_recoverd *rec) if (cluster_lock_held(rec)) { cluster_lock_release(rec); } - return; + goto done; } /* @@ -1844,7 +1844,7 @@ static void cluster_lock_election(struct ctdb_recoverd *rec) * attempt to retake it. This provides stability. */ if (cluster_lock_held(rec)) { - return; + goto done; } rec->leader = CTDB_UNKNOWN_PNN; @@ -1856,6 +1856,7 @@ static void cluster_lock_election(struct ctdb_recoverd *rec) D_WARNING("Took cluster lock, leader=%"PRIu32"\n", rec->leader); } +done: rec->election_in_progress = false; } -- 2.34.1 From d23b62795467f5baafe3527c3d8c6116b61d5e94 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 23 Jan 2022 05:49:18 +1100 Subject: [PATCH 2/6] ctdb-recoverd: Consistently have caller set election-in-progress The problem here is that election-in-progress must be set to potentially avoid restarting the election broadcast timeout in main_loop(), so this is already done by leader_handler(). Have force_election() set election-in-progress for all election types and do not bother setting it in cluster_lock_election(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 9b3fab052bd2dccf2fc3fe9bd2b4354dff0b9ebb) --- ctdb/server/ctdb_recoverd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 7a13339bf37..a28a287b5aa 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1848,7 +1848,6 @@ static void cluster_lock_election(struct ctdb_recoverd *rec) } rec->leader = CTDB_UNKNOWN_PNN; - rec->election_in_progress = true; ok = cluster_lock_take(rec); if (ok) { @@ -1877,13 +1876,14 @@ static void force_election(struct ctdb_recoverd *rec) return; } + rec->election_in_progress = true; + if (cluster_lock_enabled(rec)) { cluster_lock_election(rec); return; } talloc_free(rec->election_timeout); - rec->election_in_progress = true; rec->election_timeout = tevent_add_timer( ctdb->ev, ctdb, fast_start ? -- 2.34.1 From 91b324c67cb0a717dd1429151300247eab51357a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 23 Jan 2022 06:18:51 +1100 Subject: [PATCH 3/6] ctdb-recoverd: Always send unknown leader broadcast when starting election This is currently missed when the cluster lock is lost. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit bf55a0117d045e8ca888f7e01591cc2a2bce9223) --- ctdb/server/ctdb_recoverd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index a28a287b5aa..820bb627694 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1877,6 +1877,8 @@ static void force_election(struct ctdb_recoverd *rec) } rec->election_in_progress = true; + /* Let other nodes know that an election is underway */ + leader_broadcast_send(rec, CTDB_UNKNOWN_PNN); if (cluster_lock_enabled(rec)) { cluster_lock_election(rec); @@ -1976,9 +1978,6 @@ static void leader_broadcast_timeout_handler(struct tevent_context *ev, rec->leader_broadcast_timeout_te = NULL; - /* Let other nodes know that an election is underway */ - leader_broadcast_send(rec, CTDB_UNKNOWN_PNN); - D_NOTICE("Leader broadcast timeout. Force election\n"); force_election(rec); } -- 2.34.1 From 1f49e1f2146d7ff7d77e6dde7bb917dcc7f65970 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 23 Jan 2022 06:21:51 +1100 Subject: [PATCH 4/6] ctdb-recoverd: Consistently log start of election Elections should now be quite rare, so always log when one begins. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 0e74e03c9cf83d5dc2d97fa9f38ff8fbaa3d2685) --- ctdb/server/ctdb_recoverd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 820bb627694..03698ef2928 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1867,7 +1867,7 @@ static void force_election(struct ctdb_recoverd *rec) int ret; struct ctdb_context *ctdb = rec->ctdb; - DEBUG(DEBUG_INFO,(__location__ " Force an election\n")); + D_ERR("Start election\n"); /* set all nodes to recovery mode to stop all internode traffic */ ret = set_recovery_mode(ctdb, rec, rec->nodemap, CTDB_RECOVERY_ACTIVE); @@ -1978,7 +1978,8 @@ static void leader_broadcast_timeout_handler(struct tevent_context *ev, rec->leader_broadcast_timeout_te = NULL; - D_NOTICE("Leader broadcast timeout. Force election\n"); + D_NOTICE("Leader broadcast timeout\n"); + force_election(rec); } -- 2.34.1 From 626f5885d2bbedaac59d4572cebbab592e9aa888 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 23 Jan 2022 06:42:52 +1100 Subject: [PATCH 5/6] ctdb-tests: Factor out functions to detect when generation changes BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 265e44abc42e1f5b7fef6550cd748459dbef80cb) --- .../simple/cluster.015.reclock_remove_lock.sh | 14 +----- ctdb/tests/scripts/integration.bash | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh index 35363d11f1d..2283c30edbf 100755 --- a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh +++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh @@ -56,24 +56,14 @@ echo leader_get "$test_node" -echo "Get initial generation" -ctdb_onnode "$test_node" status -# shellcheck disable=SC2154 -# $outfile set by ctdb_onnode() above -generation_init=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") -echo "Initial generation is ${generation_init}" -echo +generation_get echo "Remove recovery lock" rm "$reclock" echo # This will mean an election has taken place and a recovery has occured -echo "Wait until generation changes" -wait_until 30 generation_has_changed "$test_node" "$generation_init" -echo -echo "Generation changed to ${generation_new}" -echo +wait_until_generation_has_changed "$test_node" # shellcheck disable=SC2154 # $leader set by leader_get() above diff --git a/ctdb/tests/scripts/integration.bash b/ctdb/tests/scripts/integration.bash index 25ee4d945cc..eb3db1e1849 100644 --- a/ctdb/tests/scripts/integration.bash +++ b/ctdb/tests/scripts/integration.bash @@ -688,6 +688,50 @@ wait_until_leader_has_changed () ####################################### +# sets: generation +_generation_get () +{ + local node="$1" + + ctdb_onnode "$node" status + # shellcheck disable=SC2154 + # $outfile set by ctdb_onnode() above + generation=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile") +} + +generation_get () +{ + local node="$1" + + echo "Get generation" + _generation_get "$node" + echo "Generation is ${generation}" + echo +} + +_generation_has_changed () +{ + local node="$1" + local generation_old="$2" + + _generation_get "$node" + + [ "$generation" != "$generation_old" ] +} + +# uses: generation +wait_until_generation_has_changed () +{ + local node="$1" + + echo "Wait until generation changes..." + wait_until 30 _generation_has_changed "$node" "$generation" + echo "Generation changed to ${generation}" + echo +} + +####################################### + wait_for_monitor_event () { local pnn="$1" -- 2.34.1 From 8148c9918bd8f72ca61a581c2e8c40fb10206326 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Sun, 23 Jan 2022 07:08:02 +1100 Subject: [PATCH 6/6] ctdb-tests: Add a test for stalled node triggering election A stalled node probably continues to hold the cluster lock, so confirm elections work in this case. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Mon Feb 14 02:46:01 UTC 2022 on sn-devel-184 (cherry picked from commit 331c435ce520bef1274e076e6ed491400db3b5ad) --- .../cluster.030.node_stall_leader_timeout.sh | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100755 ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh diff --git a/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh new file mode 100755 index 00000000000..7bca58c222b --- /dev/null +++ b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Verify that nothing bad occurs if a node stalls and the leader +# broadcast timeout triggers + +. "${TEST_SCRIPTS_DIR}/integration.bash" + +set -e + +ctdb_test_init + +select_test_node +echo + +echo 'Get "leader timeout":' +conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config" +# shellcheck disable=SC2154 +# $test_node set by select_test_node() above +try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'" +# shellcheck disable=SC2154 +# $out set by ctdb_onnode() above +leader_timeout="$out" +echo "Leader timeout is ${leader_timeout} seconds" +echo + +# Assume leader timeout is reasonable and doesn't cause node to be +# disconnected +stall_time=$((leader_timeout * 2)) + +generation_get "$test_node" + +echo "Get ctdbd PID on node ${test_node}..." +ctdb_onnode -v "$test_node" "getpid" +ctdbd_pid="$out" +echo + +echo "Sending SIGSTOP to ctdbd on ${test_node}" +try_command_on_node "$test_node" "kill -STOP ${ctdbd_pid}" + +sleep_for "$stall_time" + +echo "Sending SIGCONT to ctdbd on ${test_node}" +try_command_on_node "$test_node" "kill -CONT ${ctdbd_pid}" +echo + +wait_until_generation_has_changed "$test_node" + +cluster_is_healthy -- 2.34.1