From 7b400dc72c2499b63d4f0f204119c08557ddd565 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 27 May 2016 15:22:27 +1000 Subject: [PATCH 1/2] ctdb-ipalloc: Use a cumulative timeout for takeover run stages RELEASE_IP sometimes times out because killing TCP connections can take a long time. The aim of the takeover timeout is actually to limit the total amount of time for an IP takeover run. So, calculate a combined timeout offset once and use it for each of the RELEASE_IP, TAKEOVER_IP, IPREALLOCATED stages. This gives RELEASE_IP more time to kill TCP connections but still limits the total time. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12161 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c40fc62642ff5ac49b75e9af49c299e33dbc9073) --- ctdb/server/ctdb_takeover.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index a613aa0..f52bc24 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1775,6 +1775,17 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodem bool *retry_data; bool can_host_ips; + /* Each of the later stages (RELEASE_IP, TAKEOVER_IP, + * IPREALLOCATED) notionally has a timeout of TakeoverTimeout + * seconds. However, RELEASE_IP can take longer due to TCP + * connection killing, so sometimes needs more time. + * Therefore, use a cumulative timeout of TakeoverTimeout * 3 + * seconds across all 3 stages. No explicit expiry checks are + * needed before each stage because tevent is smart enough to + * fire the timeouts even if they are in the past. Initialise + * this here to cope with early jumps to IPREALLOCATED. */ + timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout,0); + /* * ip failover is completely disabled, just send out the * ipreallocated event. @@ -1874,7 +1885,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodem ip.pnn = tmp_ip->pnn; ip.addr = tmp_ip->addr; - timeout = TAKEOVER_TIMEOUT(); data.dsize = sizeof(ip); data.dptr = (uint8_t *)&ip; state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn, @@ -1917,7 +1927,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodem ip.pnn = tmp_ip->pnn; ip.addr = tmp_ip->addr; - timeout = TAKEOVER_TIMEOUT(); data.dsize = sizeof(ip); data.dptr = (uint8_t *)&ip; state = ctdb_control_send(ctdb, tmp_ip->pnn, @@ -1955,7 +1964,7 @@ ipreallocated: nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true); ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED, - nodes, 0, TAKEOVER_TIMEOUT(), + nodes, 0, timeout, false, tdb_null, NULL, iprealloc_fail_callback, &iprealloc_data); -- 2.8.1 From 6e81d4ba3c38c24cebdb3deeeb042b5e0d8b1dad Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 18 Aug 2016 12:57:33 +1000 Subject: [PATCH 2/2] ctdb-ipalloc: Fix cumulative takeover timeout Commit c40fc62642ff5ac49b75e9af49c299e33dbc9073 runs the IP allocation algorithm after calculating the timeout offset. If the algorithm takes a long time then there may be no attempt to release or take over IPs. Instead, reset the timeout just before the RELEASE_IP stage if an early jump to IPREALLOCATED was not taken. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12161 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Thu Aug 18 12:36:37 CEST 2016 on sn-devel-144 (cherry picked from commit 626dcc9e493e2ac4fd502f75c7cb4d29f686f017) --- ctdb/server/ctdb_takeover.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index f52bc24..73679b2 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1775,16 +1775,9 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodem bool *retry_data; bool can_host_ips; - /* Each of the later stages (RELEASE_IP, TAKEOVER_IP, - * IPREALLOCATED) notionally has a timeout of TakeoverTimeout - * seconds. However, RELEASE_IP can take longer due to TCP - * connection killing, so sometimes needs more time. - * Therefore, use a cumulative timeout of TakeoverTimeout * 3 - * seconds across all 3 stages. No explicit expiry checks are - * needed before each stage because tevent is smart enough to - * fire the timeouts even if they are in the past. Initialise - * this here to cope with early jumps to IPREALLOCATED. */ - timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout,0); + /* Default timeout for early jump to IPREALLOCATED. See below + * for explanation of 3 times... */ + timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0); /* * ip failover is completely disabled, just send out the @@ -1863,6 +1856,20 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodem ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */ + /* Each of the following stages (RELEASE_IP, TAKEOVER_IP, + * IPREALLOCATED) notionally has a timeout of TakeoverTimeout + * seconds. However, RELEASE_IP can take longer due to TCP + * connection killing, so sometimes needs more time. + * Therefore, use a cumulative timeout of TakeoverTimeout * 3 + * seconds across all 3 stages. No explicit expiry checks are + * needed before each stage because tevent is smart enough to + * fire the timeouts even if they are in the past. Initialise + * this here so it explicitly covers the stages we're + * interested in but, in particular, not the time taken by the + * ipalloc(). + */ + timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0); + /* Send a RELEASE_IP to all nodes that should not be hosting * each IP. For each IP, all but one of these will be * redundant. However, the redundant ones are used to tell -- 2.8.1