From 01b32af0d6ccad7bccd515d82f39833d84aa1231 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 17 Mar 2016 17:16:09 +1100 Subject: [PATCH 1/3] ctdb-protocol: Add srvid for assigning banning credits Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit fc63eae80b7b521598560b970a4ce10a0838a3ce) --- ctdb/protocol/protocol.h | 5 ++++- ctdb/protocol/protocol_message.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h index 798c928..aae6347 100644 --- a/ctdb/protocol/protocol.h +++ b/ctdb/protocol/protocol.h @@ -124,6 +124,9 @@ struct ctdb_call { /* SRVID prefix used during recovery for pulling and pushing databases */ #define CTDB_SRVID_RECOVERY 0xF001000000000000LL +/* SRVID to assign of banning credits */ +#define CTDB_SRVID_BANNING 0xF002000000000000LL + /* SRVID to inform of election data */ #define CTDB_SRVID_ELECTION 0xF100000000000000LL @@ -993,7 +996,7 @@ union ctdb_message_data { uint32_t db_id; /* SRVID_MEM_DUMP, SRVID_TAKEOVER_RUN */ struct ctdb_srvid_message *msg; - /* SRVID_REBALANCE_NODE */ + /* SRVID_BANNING, SRVID_REBALANCE_NODE */ uint32_t pnn; /* SRVID_DISABLE_TAKEOVER_RUNS, SRVID_DISABLE_RECOVERIES */ struct ctdb_disable_message *disable; diff --git a/ctdb/protocol/protocol_message.c b/ctdb/protocol/protocol_message.c index 696367e..615a49f 100644 --- a/ctdb/protocol/protocol_message.c +++ b/ctdb/protocol/protocol_message.c @@ -40,6 +40,10 @@ static size_t ctdb_message_data_len(union ctdb_message_data *mdata, size_t len = 0; switch (srvid) { + case CTDB_SRVID_BANNING: + len = ctdb_uint32_len(mdata->pnn); + break; + case CTDB_SRVID_ELECTION: len = ctdb_election_message_len(mdata->election); break; @@ -114,6 +118,10 @@ static void ctdb_message_data_push(union ctdb_message_data *mdata, uint64_t srvid, uint8_t *buf) { switch (srvid) { + case CTDB_SRVID_BANNING: + ctdb_uint32_push(mdata->pnn, buf); + break; + case CTDB_SRVID_ELECTION: ctdb_election_message_push(mdata->election, buf); break; @@ -189,6 +197,10 @@ static int ctdb_message_data_pull(uint8_t *buf, size_t buflen, int ret = 0; switch (srvid) { + case CTDB_SRVID_BANNING: + ret = ctdb_uint32_pull(buf, buflen, mem_ctx, &mdata->pnn); + break; + case CTDB_SRVID_ELECTION: ret = ctdb_election_message_pull(buf, buflen, mem_ctx, &mdata->election); -- 2.5.5 From 85d48ee047651fea799a91a02b4a6a3fead3142a Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 17 Mar 2016 17:26:30 +1100 Subject: [PATCH 2/3] ctdb-recoverd: Add message handler to assigning banning credits This will be called from recovery helper to assign banning credits to misbehaving node. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit ae366fb932e9d42fbde5aa48f04d70e15dc36888) --- ctdb/server/ctdb_recoverd.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index e42433d..c0a06b6 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2660,6 +2660,30 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, srvid_requests_reply(ctdb, ¤t, result); } +/* + * handler for assigning banning credits + */ +static void banning_handler(uint64_t srvid, TDB_DATA data, void *private_data) +{ + struct ctdb_recoverd *rec = talloc_get_type( + private_data, struct ctdb_recoverd); + uint32_t ban_pnn; + + /* Ignore if we are not recmaster */ + if (rec->ctdb->pnn != rec->recmaster) { + return; + } + + if (data.dsize != sizeof(uint32_t)) { + DEBUG(DEBUG_ERR, (__location__ "invalid data size %zu\n", + data.dsize)); + return; + } + + ban_pnn = *(uint32_t *)data.dptr; + + ctdb_set_culprit_count(rec, ban_pnn, rec->nodemap->num); +} /* handler for recovery master elections @@ -3888,6 +3912,10 @@ static void monitor_cluster(struct ctdb_context *ctdb) /* register a message port for sending memory dumps */ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec); + /* when a node is assigned banning credits */ + ctdb_client_set_message_handler(ctdb, CTDB_SRVID_BANNING, + banning_handler, rec); + /* register a message port for recovery elections */ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_ELECTION, election_handler, rec); -- 2.5.5 From 324bdbc9904259b79299861e913087df70c175cc Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 15 Mar 2016 15:08:24 +1100 Subject: [PATCH 3/3] ctdb-recovery-helper: Add banning to parallel recovery If one or more nodes are misbehaving during recovery, keep track of failures as ban_credits. If the node with the highest ban_credits exceeds 5 ban credits, then tell recovery daemon to assign banning credits. This will ban only a single node at a time in case of recovery failure. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Fri Mar 25 06:57:32 CET 2016 on sn-devel-144 (cherry picked from commit c51b8c22349bde6a3280c51ac147cab5ea27b5a6) --- ctdb/server/ctdb_recovery_helper.c | 95 ++++++++++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 8 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index d1ec1e7..24aa42f 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -1218,6 +1218,7 @@ struct collect_highseqnum_db_state { uint32_t *pnn_list; int count; uint32_t *caps; + uint32_t *ban_credits; uint32_t db_id; struct recdb_context *recdb; uint32_t max_pnn; @@ -1231,7 +1232,8 @@ static struct tevent_req *collect_highseqnum_db_send( struct tevent_context *ev, struct ctdb_client_context *client, uint32_t *pnn_list, int count, uint32_t *caps, - uint32_t db_id, struct recdb_context *recdb) + uint32_t *ban_credits, uint32_t db_id, + struct recdb_context *recdb) { struct tevent_req *req, *subreq; struct collect_highseqnum_db_state *state; @@ -1248,6 +1250,7 @@ static struct tevent_req *collect_highseqnum_db_send( state->pnn_list = pnn_list; state->count = count; state->caps = caps; + state->ban_credits = ban_credits; state->db_id = db_id; state->recdb = recdb; @@ -1332,12 +1335,15 @@ static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); + struct collect_highseqnum_db_state *state = tevent_req_data( + req, struct collect_highseqnum_db_state); int ret; bool status; status = pull_database_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { + state->ban_credits[state->max_pnn] += 1; tevent_req_error(req, ret); return; } @@ -1360,6 +1366,7 @@ struct collect_all_db_state { uint32_t *pnn_list; int count; uint32_t *caps; + uint32_t *ban_credits; uint32_t db_id; struct recdb_context *recdb; struct ctdb_pulldb pulldb; @@ -1373,7 +1380,8 @@ static struct tevent_req *collect_all_db_send( struct tevent_context *ev, struct ctdb_client_context *client, uint32_t *pnn_list, int count, uint32_t *caps, - uint32_t db_id, struct recdb_context *recdb) + uint32_t *ban_credits, uint32_t db_id, + struct recdb_context *recdb) { struct tevent_req *req, *subreq; struct collect_all_db_state *state; @@ -1418,6 +1426,8 @@ static void collect_all_db_pulldb_done(struct tevent_req *subreq) status = pull_database_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { + pnn = state->pnn_list[state->index]; + state->ban_credits[pnn] += 1; tevent_req_error(req, ret); return; } @@ -1463,6 +1473,7 @@ struct recover_db_state { uint32_t *pnn_list; int count; uint32_t *caps; + uint32_t *ban_credits; uint32_t db_id; bool persistent; @@ -1489,6 +1500,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, struct ctdb_tunable_list *tun_list, uint32_t *pnn_list, int count, uint32_t *caps, + uint32_t *ban_credits, uint32_t generation, uint32_t db_id, bool persistent) { @@ -1507,6 +1519,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->pnn_list = pnn_list; state->count = count; state->caps = caps; + state->ban_credits = ban_credits; state->db_id = db_id; state->persistent = persistent; @@ -1692,12 +1705,14 @@ static void recover_db_transaction_started(struct tevent_req *subreq) subreq = collect_highseqnum_db_send( state, state->ev, state->client, state->pnn_list, state->count, state->caps, - state->db_id, state->recdb); + state->ban_credits, state->db_id, + state->recdb); } else { subreq = collect_all_db_send( state, state->ev, state->client, state->pnn_list, state->count, state->caps, - state->db_id, state->recdb); + state->ban_credits, state->db_id, + state->recdb); } if (tevent_req_nomem(subreq, req)) { return; @@ -1912,6 +1927,7 @@ struct db_recovery_one_state { uint32_t *pnn_list; int count; uint32_t *caps; + uint32_t *ban_credits; uint32_t generation; uint32_t db_id; bool persistent; @@ -1927,6 +1943,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, struct ctdb_tunable_list *tun_list, uint32_t *pnn_list, int count, uint32_t *caps, + uint32_t *ban_credits, uint32_t generation) { struct tevent_req *req, *subreq; @@ -1963,13 +1980,14 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->pnn_list = pnn_list; substate->count = count; substate->caps = caps; + substate->ban_credits = ban_credits; substate->generation = generation; substate->db_id = dbmap->dbs[i].db_id; substate->persistent = dbmap->dbs[i].flags & CTDB_DB_FLAGS_PERSISTENT; subreq = recover_db_send(state, ev, client, tun_list, - pnn_list, count, caps, + pnn_list, count, caps, ban_credits, generation, substate->db_id, substate->persistent); if (tevent_req_nomem(subreq, req)) { @@ -2005,7 +2023,7 @@ static void db_recovery_one_done(struct tevent_req *subreq) subreq = recover_db_send(state, state->ev, substate->client, substate->tun_list, substate->pnn_list, substate->count, - substate->caps, + substate->caps, substate->ban_credits, substate->generation, substate->db_id, substate->persistent); if (tevent_req_nomem(subreq, req)) { @@ -2074,6 +2092,7 @@ struct recovery_state { uint32_t destnode; struct ctdb_node_map *nodemap; uint32_t *caps; + uint32_t *ban_credits; struct ctdb_tunable_list *tun_list; struct ctdb_vnn_map *vnnmap; struct ctdb_dbid_map *dbmap; @@ -2088,6 +2107,7 @@ static void recovery_active_done(struct tevent_req *subreq); static void recovery_start_recovery_done(struct tevent_req *subreq); static void recovery_vnnmap_update_done(struct tevent_req *subreq); static void recovery_db_recovery_done(struct tevent_req *subreq); +static void recovery_failed_done(struct tevent_req *subreq); static void recovery_normal_done(struct tevent_req *subreq); static void recovery_end_recovery_done(struct tevent_req *subreq); @@ -2197,6 +2217,12 @@ static void recovery_nodemap_done(struct tevent_req *subreq) return; } + state->ban_credits = talloc_zero_array(state, uint32_t, + state->nodemap->num); + if (tevent_req_nomem(state->ban_credits, req)) { + return; + } + ctdb_req_control_getvnnmap(&request); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), @@ -2523,7 +2549,8 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) subreq = db_recovery_send(state, state->ev, state->client, state->dbmap, state->tun_list, state->pnn_list, state->count, - state->caps, state->vnnmap->generation); + state->caps, state->ban_credits, + state->vnnmap->generation); if (tevent_req_nomem(subreq, req)) { return; } @@ -2546,7 +2573,43 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) LOG("%d of %d databases recovered\n", count, state->dbmap->num); if (! status) { - tevent_req_error(req, EIO); + uint32_t max_pnn = CTDB_UNKNOWN_PNN, max_credits = 0; + int i; + + /* Bans are not enabled */ + if (state->tun_list->enable_bans == 0) { + tevent_req_error(req, EIO); + return; + } + + for (i=0; icount; i++) { + uint32_t pnn; + pnn = state->pnn_list[i]; + if (state->ban_credits[pnn] > max_credits) { + max_pnn = pnn; + max_credits = state->ban_credits[pnn]; + } + } + + /* If pulling database fails multiple times */ + if (max_credits >= NUM_RETRIES) { + struct ctdb_req_message message; + + LOG("Assigning banning credits to node %u\n", max_pnn); + + message.srvid = CTDB_SRVID_BANNING; + message.data.pnn = max_pnn; + + subreq = ctdb_client_message_send( + state, state->ev, state->client, + ctdb_client_pnn(state->client), + &message); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, recovery_failed_done, + req); + } return; } @@ -2561,6 +2624,22 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) tevent_req_set_callback(subreq, recovery_normal_done, req); } +static void recovery_failed_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + int ret; + bool status; + + status = ctdb_client_message_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + LOG("failed to assign banning credits, ret=%d\n", ret); + } + + tevent_req_error(req, EIO); +} + static void recovery_normal_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( -- 2.5.5