From cd5d0ab71a54210b78f7e71392042ee869bfb332 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 2 Mar 2020 13:59:42 +1100 Subject: [PATCH 01/17] ctdb-recovery: Fetched vnnmap is never used, so don't fetch it New vnnmap is constructed using the information from all the connected nodes. So there is no need to fetch the vnnmap from recovery master. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 072ff4d12b8f34766120ddae888d772e97bca491) --- ctdb/server/ctdb_recovery_helper.c | 40 ------------------------------ 1 file changed, 40 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 0597c507ba6..7356ebdb062 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2048,7 +2048,6 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) * * - Get tunables * - Get nodemap - * - Get vnnmap * - Get capabilities from all nodes * - Get dbmap * - Set RECOVERY_ACTIVE @@ -2076,7 +2075,6 @@ struct recovery_state { static void recovery_tunables_done(struct tevent_req *subreq); static void recovery_nodemap_done(struct tevent_req *subreq); -static void recovery_vnnmap_done(struct tevent_req *subreq); static void recovery_capabilities_done(struct tevent_req *subreq); static void recovery_dbmap_done(struct tevent_req *subreq); static void recovery_active_done(struct tevent_req *subreq); @@ -2199,43 +2197,6 @@ static void recovery_nodemap_done(struct tevent_req *subreq) return; } - ctdb_req_control_getvnnmap(&request); - subreq = ctdb_client_control_send(state, state->ev, state->client, - state->destnode, TIMEOUT(), - &request); - if (tevent_req_nomem(subreq, req)) { - return; - } - tevent_req_set_callback(subreq, recovery_vnnmap_done, req); -} - -static void recovery_vnnmap_done(struct tevent_req *subreq) -{ - struct tevent_req *req = tevent_req_callback_data( - subreq, struct tevent_req); - struct recovery_state *state = tevent_req_data( - req, struct recovery_state); - struct ctdb_reply_control *reply; - struct ctdb_req_control request; - bool status; - int ret; - - status = ctdb_client_control_recv(subreq, &ret, state, &reply); - TALLOC_FREE(subreq); - if (! status) { - D_ERR("control GETVNNMAP failed to node %u, ret=%d\n", - state->destnode, ret); - tevent_req_error(req, ret); - return; - } - - ret = ctdb_reply_control_getvnnmap(reply, state, &state->vnnmap); - if (ret != 0) { - D_ERR("control GETVNNMAP failed, ret=%d\n", ret); - tevent_req_error(req, ret); - return; - } - ctdb_req_control_get_capabilities(&request); subreq = ctdb_client_control_multi_send(state, state->ev, state->client, @@ -2435,7 +2396,6 @@ static void recovery_active_done(struct tevent_req *subreq) vnnmap->generation = state->generation; - talloc_free(state->vnnmap); state->vnnmap = vnnmap; ctdb_req_control_start_recovery(&request); -- 2.25.1 From 93e8ab320e97f2a4a55d6e57e9e0d5c16d02fec8 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 2 Mar 2020 15:07:21 +1100 Subject: [PATCH 02/17] ctdb-recovery: Consolidate node state This avoids passing multiple arguments to async computation. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 6e2f8756f1bce4dfc7fdc435e082f400116e29ec) --- ctdb/server/ctdb_recovery_helper.c | 585 +++++++++++++++++------------ 1 file changed, 346 insertions(+), 239 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 7356ebdb062..893cb15d9d6 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -70,6 +70,105 @@ static uint64_t srvid_next(void) return rec_srvid; } +/* + * Node related functions + */ + +struct node_list { + uint32_t *pnn_list; + uint32_t *caps; + uint32_t *ban_credits; + unsigned int size; + unsigned int count; +}; + +static struct node_list *node_list_init(TALLOC_CTX *mem_ctx, unsigned int size) +{ + struct node_list *nlist; + unsigned int i; + + nlist = talloc_zero(mem_ctx, struct node_list); + if (nlist == NULL) { + return NULL; + } + + nlist->pnn_list = talloc_array(nlist, uint32_t, size); + nlist->caps = talloc_zero_array(nlist, uint32_t, size); + nlist->ban_credits = talloc_zero_array(nlist, uint32_t, size); + + if (nlist->pnn_list == NULL || + nlist->caps == NULL || + nlist->ban_credits == NULL) { + talloc_free(nlist); + return NULL; + } + nlist->size = size; + + for (i=0; isize; i++) { + nlist->pnn_list[i] = CTDB_UNKNOWN_PNN; + } + + return nlist; +} + +static bool node_list_add(struct node_list *nlist, uint32_t pnn) +{ + unsigned int i; + + if (nlist->count == nlist->size) { + return false; + } + + for (i=0; icount; i++) { + if (nlist->pnn_list[i] == pnn) { + return false; + } + } + + nlist->pnn_list[nlist->count] = pnn; + nlist->count += 1; + + return true; +} + +static uint32_t *node_list_lmaster(struct node_list *nlist, + TALLOC_CTX *mem_ctx, + unsigned int *pnn_count) +{ + uint32_t *pnn_list; + unsigned int count, i; + + pnn_list = talloc_zero_array(mem_ctx, uint32_t, nlist->count); + if (pnn_list == NULL) { + return NULL; + } + + count = 0; + for (i=0; icount; i++) { + if (!(nlist->caps[i] & CTDB_CAP_LMASTER)) { + continue; + } + + pnn_list[count] = nlist->pnn_list[i]; + count += 1; + } + + *pnn_count = count; + return pnn_list; +} + +static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn) +{ + unsigned int i; + + for (i=0; icount; i++) { + if (nlist->pnn_list[i] == pnn) { + nlist->ban_credits[i] += 1; + break; + } + } +} + /* * Recovery database functions */ @@ -665,9 +764,9 @@ struct push_database_old_state { struct ctdb_client_context *client; struct recdb_context *recdb; uint32_t *pnn_list; - int count; + unsigned int count; struct ctdb_rec_buffer *recbuf; - int index; + unsigned int index; }; static void push_database_old_push_done(struct tevent_req *subreq); @@ -676,7 +775,8 @@ static struct tevent_req *push_database_old_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, + uint32_t *pnn_list, + unsigned int count, struct recdb_context *recdb) { struct tevent_req *req, *subreq; @@ -769,7 +869,7 @@ struct push_database_new_state { struct ctdb_client_context *client; struct recdb_context *recdb; uint32_t *pnn_list; - int count; + unsigned int count; uint64_t srvid; uint32_t dmaster; int fd; @@ -787,7 +887,8 @@ static struct tevent_req *push_database_new_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, + uint32_t *pnn_list, + unsigned int count, struct recdb_context *recdb, int max_size) { @@ -991,7 +1092,8 @@ static void push_database_new_confirmed(struct tevent_req *subreq) struct ctdb_reply_control **reply; int *err_list; bool status; - int ret, i; + unsigned int i; + int ret; uint32_t num_records; status = ctdb_client_control_multi_recv(subreq, &ret, state, @@ -1062,7 +1164,7 @@ static struct tevent_req *push_database_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, uint32_t *caps, + struct node_list *nlist, struct ctdb_tunable_list *tun_list, struct recdb_context *recdb) { @@ -1070,7 +1172,7 @@ static struct tevent_req *push_database_send( struct push_database_state *state; uint32_t *old_list, *new_list; unsigned int old_count, new_count; - int i; + unsigned int i; req = tevent_req_create(mem_ctx, &state, struct push_database_state); if (req == NULL) { @@ -1082,21 +1184,19 @@ static struct tevent_req *push_database_send( old_count = 0; new_count = 0; - old_list = talloc_array(state, uint32_t, count); - new_list = talloc_array(state, uint32_t, count); + old_list = talloc_array(state, uint32_t, nlist->count); + new_list = talloc_array(state, uint32_t, nlist->count); if (tevent_req_nomem(old_list, req) || tevent_req_nomem(new_list,req)) { return tevent_req_post(req, ev); } - for (i=0; icount; i++) { + if (nlist->caps[i] & CTDB_CAP_FRAGMENTED_CONTROLS) { + new_list[new_count] = nlist->pnn_list[i]; new_count += 1; } else { - old_list[old_count] = pnn; + old_list[old_count] = nlist->pnn_list[i]; old_count += 1; } } @@ -1183,12 +1283,10 @@ static bool push_database_recv(struct tevent_req *req, int *perr) struct collect_highseqnum_db_state { struct tevent_context *ev; struct ctdb_client_context *client; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t db_id; struct recdb_context *recdb; + uint32_t max_pnn; }; @@ -1199,8 +1297,8 @@ static struct tevent_req *collect_highseqnum_db_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, uint32_t *caps, - uint32_t *ban_credits, uint32_t db_id, + struct node_list *nlist, + uint32_t db_id, struct recdb_context *recdb) { struct tevent_req *req, *subreq; @@ -1215,17 +1313,18 @@ static struct tevent_req *collect_highseqnum_db_send( state->ev = ev; state->client = client; - state->pnn_list = pnn_list; - state->count = count; - state->caps = caps; - state->ban_credits = ban_credits; + state->nlist = nlist; state->db_id = db_id; state->recdb = recdb; ctdb_req_control_get_db_seqnum(&request, db_id); - subreq = ctdb_client_control_multi_send(mem_ctx, ev, client, - state->pnn_list, state->count, - TIMEOUT(), &request); + subreq = ctdb_client_control_multi_send(mem_ctx, + ev, + client, + nlist->pnn_list, + nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -1244,8 +1343,10 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) struct ctdb_reply_control **reply; int *err_list; bool status; - int ret, i; + unsigned int i; + int ret; uint64_t seqnum, max_seqnum; + uint32_t max_caps; status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list, &reply); @@ -1254,8 +1355,9 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, err_list, + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, &pnn); if (ret2 != 0) { D_ERR("control GET_DB_SEQNUM failed for db %s" @@ -1271,8 +1373,9 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) } max_seqnum = 0; - state->max_pnn = state->pnn_list[0]; - for (i=0; icount; i++) { + state->max_pnn = state->nlist->pnn_list[0]; + max_caps = state->nlist->caps[0]; + for (i=0; inlist->count; i++) { ret = ctdb_reply_control_get_db_seqnum(reply[i], &seqnum); if (ret != 0) { tevent_req_error(req, EPROTO); @@ -1281,7 +1384,8 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) if (max_seqnum < seqnum) { max_seqnum = seqnum; - state->max_pnn = state->pnn_list[i]; + state->max_pnn = state->nlist->pnn_list[i]; + max_caps = state->nlist->caps[i]; } } @@ -1290,9 +1394,11 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) D_INFO("Pull persistent db %s from node %d with seqnum 0x%"PRIx64"\n", recdb_name(state->recdb), state->max_pnn, max_seqnum); - subreq = pull_database_send(state, state->ev, state->client, + subreq = pull_database_send(state, + state->ev, + state->client, state->max_pnn, - state->caps[state->max_pnn], + max_caps, state->recdb); if (tevent_req_nomem(subreq, req)) { return; @@ -1313,7 +1419,7 @@ static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq) status = pull_database_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { - state->ban_credits[state->max_pnn] += 1; + node_list_ban_credits(state->nlist, state->max_pnn); tevent_req_error(req, ret); return; } @@ -1333,14 +1439,12 @@ static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr) struct collect_all_db_state { struct tevent_context *ev; struct ctdb_client_context *client; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t db_id; struct recdb_context *recdb; + struct ctdb_pulldb pulldb; - int index; + unsigned int index; }; static void collect_all_db_pulldb_done(struct tevent_req *subreq); @@ -1349,13 +1453,12 @@ static struct tevent_req *collect_all_db_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, uint32_t *caps, - uint32_t *ban_credits, uint32_t db_id, + struct node_list *nlist, + uint32_t db_id, struct recdb_context *recdb) { struct tevent_req *req, *subreq; struct collect_all_db_state *state; - uint32_t pnn; req = tevent_req_create(mem_ctx, &state, struct collect_all_db_state); @@ -1365,17 +1468,17 @@ static struct tevent_req *collect_all_db_send( state->ev = ev; state->client = client; - state->pnn_list = pnn_list; - state->count = count; - state->caps = caps; - state->ban_credits = ban_credits; + state->nlist = nlist; state->db_id = db_id; state->recdb = recdb; state->index = 0; - pnn = state->pnn_list[state->index]; - - subreq = pull_database_send(state, ev, client, pnn, caps[pnn], recdb); + subreq = pull_database_send(state, + ev, + client, + nlist->pnn_list[state->index], + nlist->caps[state->index], + recdb); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -1390,28 +1493,30 @@ static void collect_all_db_pulldb_done(struct tevent_req *subreq) subreq, struct tevent_req); struct collect_all_db_state *state = tevent_req_data( req, struct collect_all_db_state); - uint32_t pnn; int ret; bool status; status = pull_database_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { - pnn = state->pnn_list[state->index]; - state->ban_credits[pnn] += 1; + node_list_ban_credits(state->nlist, + state->nlist->pnn_list[state->index]); tevent_req_error(req, ret); return; } state->index += 1; - if (state->index == state->count) { + if (state->index == state->nlist->count) { tevent_req_done(req); return; } - pnn = state->pnn_list[state->index]; - subreq = pull_database_send(state, state->ev, state->client, - pnn, state->caps[pnn], state->recdb); + subreq = pull_database_send(state, + state->ev, + state->client, + state->nlist->pnn_list[state->index], + state->nlist->caps[state->index], + state->recdb); if (tevent_req_nomem(subreq, req)) { return; } @@ -1441,10 +1546,7 @@ struct recover_db_state { struct tevent_context *ev; struct ctdb_client_context *client; struct ctdb_tunable_list *tun_list; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t db_id; uint8_t db_flags; @@ -1469,11 +1571,10 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, struct ctdb_tunable_list *tun_list, - uint32_t *pnn_list, int count, - uint32_t *caps, - uint32_t *ban_credits, + struct node_list *nlist, uint32_t generation, - uint32_t db_id, uint8_t db_flags) + uint32_t db_id, + uint8_t db_flags) { struct tevent_req *req, *subreq; struct recover_db_state *state; @@ -1487,10 +1588,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->ev = ev; state->client = client; state->tun_list = tun_list; - state->pnn_list = pnn_list; - state->count = count; - state->caps = caps; - state->ban_credits = ban_credits; + state->nlist = nlist; state->db_id = db_id; state->db_flags = db_flags; @@ -1580,10 +1678,13 @@ static void recover_db_path_done(struct tevent_req *subreq) talloc_free(reply); ctdb_req_control_db_freeze(&request, state->db_id); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1608,14 +1709,16 @@ static void recover_db_freeze_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, err_list, + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, &pnn); if (ret2 != 0) { D_ERR("control FREEZE_DB failed for db %s" " on node %u, ret=%d\n", state->db_name, pnn, ret2); - state->ban_credits[pnn] += 1; + + node_list_ban_credits(state->nlist, pnn); } else { D_ERR("control FREEZE_DB failed for db %s, ret=%d\n", state->db_name, ret); @@ -1625,10 +1728,13 @@ static void recover_db_freeze_done(struct tevent_req *subreq) } ctdb_req_control_db_transaction_start(&request, &state->transdb); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1652,9 +1758,10 @@ static void recover_db_transaction_started(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control TRANSACTION_DB failed for db=%s" " on node %u, ret=%d\n", @@ -1677,17 +1784,19 @@ static void recover_db_transaction_started(struct tevent_req *subreq) if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { - subreq = collect_highseqnum_db_send( - state, state->ev, state->client, - state->pnn_list, state->count, state->caps, - state->ban_credits, state->db_id, - state->recdb); + subreq = collect_highseqnum_db_send(state, + state->ev, + state->client, + state->nlist, + state->db_id, + state->recdb); } else { - subreq = collect_all_db_send( - state, state->ev, state->client, - state->pnn_list, state->count, state->caps, - state->ban_credits, state->db_id, - state->recdb); + subreq = collect_all_db_send(state, + state->ev, + state->client, + state->nlist, + state->db_id, + state->recdb); } if (tevent_req_nomem(subreq, req)) { return; @@ -1718,10 +1827,13 @@ static void recover_db_collect_done(struct tevent_req *subreq) } ctdb_req_control_wipe_database(&request, &state->transdb); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1745,9 +1857,10 @@ static void recover_db_wipedb_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control WIPEDB failed for db %s on node %u," " ret=%d\n", state->db_name, pnn, ret2); @@ -1759,9 +1872,11 @@ static void recover_db_wipedb_done(struct tevent_req *subreq) return; } - subreq = push_database_send(state, state->ev, state->client, - state->pnn_list, state->count, - state->caps, state->tun_list, + subreq = push_database_send(state, + state->ev, + state->client, + state->nlist, + state->tun_list, state->recdb); if (tevent_req_nomem(subreq, req)) { return; @@ -1789,10 +1904,13 @@ static void recover_db_pushdb_done(struct tevent_req *subreq) TALLOC_FREE(state->recdb); ctdb_req_control_db_transaction_commit(&request, &state->transdb); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1817,9 +1935,10 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control DB_TRANSACTION_COMMIT failed for db %s" " on node %u, ret=%d\n", @@ -1833,10 +1952,13 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) } ctdb_req_control_db_thaw(&request, state->db_id); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1860,9 +1982,10 @@ static void recover_db_thaw_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control DB_THAW failed for db %s on node %u," " ret=%d\n", state->db_name, pnn, ret2); @@ -1901,10 +2024,7 @@ struct db_recovery_one_state { struct ctdb_client_context *client; struct ctdb_dbid_map *dbmap; struct ctdb_tunable_list *tun_list; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t generation; uint32_t db_id; uint8_t db_flags; @@ -1918,9 +2038,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, struct ctdb_client_context *client, struct ctdb_dbid_map *dbmap, struct ctdb_tunable_list *tun_list, - uint32_t *pnn_list, int count, - uint32_t *caps, - uint32_t *ban_credits, + struct node_list *nlist, uint32_t generation) { struct tevent_req *req, *subreq; @@ -1954,17 +2072,18 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->client = client; substate->dbmap = dbmap; substate->tun_list = tun_list; - substate->pnn_list = pnn_list; - substate->count = count; - substate->caps = caps; - substate->ban_credits = ban_credits; + substate->nlist = nlist; substate->generation = generation; substate->db_id = dbmap->dbs[i].db_id; substate->db_flags = dbmap->dbs[i].flags; - subreq = recover_db_send(state, ev, client, tun_list, - pnn_list, count, caps, ban_credits, - generation, substate->db_id, + subreq = recover_db_send(state, + ev, + client, + tun_list, + nlist, + generation, + substate->db_id, substate->db_flags); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); @@ -1996,11 +2115,13 @@ static void db_recovery_one_done(struct tevent_req *subreq) substate->num_fails += 1; if (substate->num_fails < NUM_RETRIES) { - subreq = recover_db_send(state, state->ev, substate->client, + subreq = recover_db_send(state, + state->ev, + substate->client, substate->tun_list, - substate->pnn_list, substate->count, - substate->caps, substate->ban_credits, - substate->generation, substate->db_id, + substate->nlist, + substate->generation, + substate->db_id, substate->db_flags); if (tevent_req_nomem(subreq, req)) { goto failed; @@ -2062,12 +2183,8 @@ struct recovery_state { struct tevent_context *ev; struct ctdb_client_context *client; uint32_t generation; - uint32_t *pnn_list; - unsigned int count; uint32_t destnode; - struct ctdb_node_map *nodemap; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; struct ctdb_tunable_list *tun_list; struct ctdb_vnn_map *vnnmap; struct ctdb_dbid_map *dbmap; @@ -2165,6 +2282,8 @@ static void recovery_nodemap_done(struct tevent_req *subreq) req, struct recovery_state); struct ctdb_reply_control *reply; struct ctdb_req_control request; + struct ctdb_node_map *nodemap; + unsigned int i; bool status; int ret; @@ -2177,31 +2296,34 @@ static void recovery_nodemap_done(struct tevent_req *subreq) return; } - ret = ctdb_reply_control_get_nodemap(reply, state, &state->nodemap); + ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); if (ret != 0) { D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } - state->count = list_of_active_nodes(state->nodemap, CTDB_UNKNOWN_PNN, - state, &state->pnn_list); - if (state->count <= 0) { - tevent_req_error(req, ENOMEM); + state->nlist = node_list_init(state, nodemap->num); + if (tevent_req_nomem(state->nlist, req)) { return; } - state->ban_credits = talloc_zero_array(state, uint32_t, - state->nodemap->num); - if (tevent_req_nomem(state->ban_credits, req)) { - return; + for (i=0; inum; i++) { + if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { + continue; + } + + node_list_add(state->nlist, nodemap->node[i].pnn); } ctdb_req_control_get_capabilities(&request); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2228,9 +2350,10 @@ static void recovery_capabilities_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control GET_CAPABILITIES failed on node %u," " ret=%d\n", pnn, ret2); @@ -2242,25 +2365,18 @@ static void recovery_capabilities_done(struct tevent_req *subreq) return; } - /* Make the array size same as nodemap */ - state->caps = talloc_zero_array(state, uint32_t, - state->nodemap->num); - if (tevent_req_nomem(state->caps, req)) { - return; - } - - for (i=0; icount; i++) { - uint32_t pnn; + for (i=0; inlist->count; i++) { + uint32_t caps; - pnn = state->pnn_list[i]; - ret = ctdb_reply_control_get_capabilities(reply[i], - &state->caps[pnn]); + ret = ctdb_reply_control_get_capabilities(reply[i], &caps); if (ret != 0) { D_ERR("control GET_CAPABILITIES failed on node %u\n", - pnn); + state->nlist->pnn_list[i]); tevent_req_error(req, EPROTO); return; } + + state->nlist->caps[i] = caps; } talloc_free(reply); @@ -2303,10 +2419,13 @@ static void recovery_dbmap_done(struct tevent_req *subreq) } ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2323,7 +2442,6 @@ static void recovery_active_done(struct tevent_req *subreq) struct ctdb_vnn_map *vnnmap; int *err_list; int ret; - unsigned int count, i; bool status; status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list, @@ -2333,9 +2451,10 @@ static void recovery_active_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to set recovery mode ACTIVE on node %u," " ret=%d\n", pnn, ret2); @@ -2350,48 +2469,20 @@ static void recovery_active_done(struct tevent_req *subreq) D_ERR("Set recovery mode to ACTIVE\n"); /* Calculate new VNNMAP */ - count = 0; - for (i=0; inodemap->num; i++) { - if (state->nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { - continue; - } - if (!(state->caps[i] & CTDB_CAP_LMASTER)) { - continue; - } - count += 1; - } - - if (count == 0) { - D_WARNING("No active lmasters found. Adding recmaster anyway\n"); - } - vnnmap = talloc_zero(state, struct ctdb_vnn_map); if (tevent_req_nomem(vnnmap, req)) { return; } - vnnmap->size = (count == 0 ? 1 : count); - vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size); + vnnmap->map = node_list_lmaster(state->nlist, vnnmap, &vnnmap->size); if (tevent_req_nomem(vnnmap->map, req)) { return; } - if (count == 0) { + if (vnnmap->size == 0) { + D_WARNING("No active lmasters found. Adding recmaster anyway\n"); vnnmap->map[0] = state->destnode; - } else { - count = 0; - for (i=0; inodemap->num; i++) { - if (state->nodemap->node[i].flags & - NODE_FLAGS_INACTIVE) { - continue; - } - if (!(state->caps[i] & CTDB_CAP_LMASTER)) { - continue; - } - - vnnmap->map[count] = state->nodemap->node[i].pnn; - count += 1; - } + vnnmap->size = 1; } vnnmap->generation = state->generation; @@ -2399,10 +2490,13 @@ static void recovery_active_done(struct tevent_req *subreq) state->vnnmap = vnnmap; ctdb_req_control_start_recovery(&request); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2427,9 +2521,10 @@ static void recovery_start_recovery_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to run start_recovery event on node %u," " ret=%d\n", pnn, ret2); @@ -2444,10 +2539,13 @@ static void recovery_start_recovery_done(struct tevent_req *subreq) D_ERR("start_recovery event finished\n"); ctdb_req_control_setvnnmap(&request, state->vnnmap); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2471,9 +2569,10 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to update VNNMAP on node %u, ret=%d\n", pnn, ret2); @@ -2486,10 +2585,12 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) D_NOTICE("updated VNNMAP\n"); - subreq = db_recovery_send(state, state->ev, state->client, - state->dbmap, state->tun_list, - state->pnn_list, state->count, - state->caps, state->ban_credits, + subreq = db_recovery_send(state, + state->ev, + state->client, + state->dbmap, + state->tun_list, + state->nlist, state->vnnmap->generation); if (tevent_req_nomem(subreq, req)) { return; @@ -2522,12 +2623,10 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) return; } - for (i=0; icount; i++) { - uint32_t pnn; - pnn = state->pnn_list[i]; - if (state->ban_credits[pnn] > max_credits) { - max_pnn = pnn; - max_credits = state->ban_credits[pnn]; + for (i=0; inlist->count; i++) { + if (state->nlist->ban_credits[i] > max_credits) { + max_pnn = state->nlist->pnn_list[i]; + max_credits = state->nlist->ban_credits[i]; } } @@ -2563,10 +2662,13 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) } ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_NORMAL); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2617,9 +2719,10 @@ static void recovery_normal_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to set recovery mode NORMAL on node %u," " ret=%d\n", pnn, ret2); @@ -2634,10 +2737,13 @@ static void recovery_normal_done(struct tevent_req *subreq) D_ERR("Set recovery mode to NORMAL\n"); ctdb_req_control_end_recovery(&request); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2661,9 +2767,10 @@ static void recovery_end_recovery_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to run recovered event on node %u," " ret=%d\n", pnn, ret2); -- 2.25.1 From 3d213d0c84880e4cd41412be9f143f9cdb79c27e Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 18 Feb 2020 16:17:00 +1100 Subject: [PATCH 03/17] ctdb-recovery: Don't trust nodemap obtained from local node It's possible to have a node stopped, but recovery master not yet updated flags on the local ctdb daemon when recovery is started. So do not trust the list of active nodes obtained from the local node. Query the connected nodes to calculate the list of active nodes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit c6a0ff1bed0265e44fd6135d16bfc41919fe5bf5) --- ctdb/server/ctdb_recovery_helper.c | 116 ++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 893cb15d9d6..5f38d55e50e 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2168,7 +2168,7 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) * Run the parallel database recovery * * - Get tunables - * - Get nodemap + * - Get nodemap from all nodes * - Get capabilities from all nodes * - Get dbmap * - Set RECOVERY_ACTIVE @@ -2192,6 +2192,7 @@ struct recovery_state { static void recovery_tunables_done(struct tevent_req *subreq); static void recovery_nodemap_done(struct tevent_req *subreq); +static void recovery_nodemap_verify(struct tevent_req *subreq); static void recovery_capabilities_done(struct tevent_req *subreq); static void recovery_dbmap_done(struct tevent_req *subreq); static void recovery_active_done(struct tevent_req *subreq); @@ -2309,13 +2310,122 @@ static void recovery_nodemap_done(struct tevent_req *subreq) } for (i=0; inum; i++) { - if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { + bool ok; + + if (nodemap->node[i].flags & NODE_FLAGS_DISCONNECTED) { continue; } - node_list_add(state->nlist, nodemap->node[i].pnn); + ok = node_list_add(state->nlist, nodemap->node[i].pnn); + if (!ok) { + tevent_req_error(req, EINVAL); + return; + } } + talloc_free(nodemap); + talloc_free(reply); + + /* Verify flags by getting local node information from each node */ + ctdb_req_control_get_nodemap(&request); + subreq = ctdb_client_control_multi_send(state, + state->ev, + state->client, + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, recovery_nodemap_verify, req); +} + +static void recovery_nodemap_verify(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct recovery_state *state = tevent_req_data( + req, struct recovery_state); + struct ctdb_req_control request; + struct ctdb_reply_control **reply; + struct node_list *nlist; + unsigned int i; + int *err_list; + int ret; + bool status; + + status = ctdb_client_control_multi_recv(subreq, + &ret, + state, + &err_list, + &reply); + TALLOC_FREE(subreq); + if (! status) { + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control GET_NODEMAP failed on node %u," + " ret=%d\n", pnn, ret2); + } else { + D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); + } + tevent_req_error(req, ret); + return; + } + + nlist = node_list_init(state, state->nlist->size); + if (tevent_req_nomem(nlist, req)) { + return; + } + + for (i=0; inlist->count; i++) { + struct ctdb_node_map *nodemap = NULL; + uint32_t pnn, flags; + unsigned int j; + bool ok; + + pnn = state->nlist->pnn_list[i]; + ret = ctdb_reply_control_get_nodemap(reply[i], + state, + &nodemap); + if (ret != 0) { + D_ERR("control GET_NODEMAP failed on node %u\n", pnn); + tevent_req_error(req, EPROTO); + return; + } + + flags = NODE_FLAGS_DISCONNECTED; + for (j=0; jnum; j++) { + if (nodemap->node[j].pnn == pnn) { + flags = nodemap->node[j].flags; + break; + } + } + + TALLOC_FREE(nodemap); + + if (flags & NODE_FLAGS_INACTIVE) { + continue; + } + + ok = node_list_add(nlist, pnn); + if (!ok) { + tevent_req_error(req, EINVAL); + return; + } + } + + talloc_free(reply); + + talloc_free(state->nlist); + state->nlist = nlist; + ctdb_req_control_get_capabilities(&request); subreq = ctdb_client_control_multi_send(state, state->ev, -- 2.25.1 From d35f5c2a9d2492d612f22e72bc6804f390e42523 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 2 Mar 2020 16:16:26 +1100 Subject: [PATCH 04/17] ctdb-recovery: Refactor banning a node into separate computation If a node is marked for banning, confirm that it's not become inactive during the recovery. If yes, then don't ban the node. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 1c56d6413f86cc15ebac232f39ef1e2a53ae4297) --- ctdb/server/ctdb_recovery_helper.c | 261 +++++++++++++++++++++++------ 1 file changed, 208 insertions(+), 53 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 5f38d55e50e..1f3b58312c4 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2163,6 +2163,206 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) return true; } +struct ban_node_state { + struct tevent_context *ev; + struct ctdb_client_context *client; + struct ctdb_tunable_list *tun_list; + struct node_list *nlist; + uint32_t destnode; + + uint32_t max_pnn; +}; + +static bool ban_node_check(struct tevent_req *req); +static void ban_node_check_done(struct tevent_req *subreq); +static void ban_node_done(struct tevent_req *subreq); + +static struct tevent_req *ban_node_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct ctdb_client_context *client, + struct ctdb_tunable_list *tun_list, + struct node_list *nlist) +{ + struct tevent_req *req; + struct ban_node_state *state; + bool ok; + + req = tevent_req_create(mem_ctx, &state, struct ban_node_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->client = client; + state->tun_list = tun_list; + state->nlist = nlist; + state->destnode = ctdb_client_pnn(client); + + /* Bans are not enabled */ + if (state->tun_list->enable_bans == 0) { + D_ERR("Bans are not enabled\n"); + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + ok = ban_node_check(req); + if (!ok) { + return tevent_req_post(req, ev); + } + + return req; +} + +static bool ban_node_check(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct ban_node_state *state = tevent_req_data( + req, struct ban_node_state); + struct ctdb_req_control request; + unsigned max_credits = 0, i; + + for (i=0; inlist->count; i++) { + if (state->nlist->ban_credits[i] > max_credits) { + state->max_pnn = state->nlist->pnn_list[i]; + max_credits = state->nlist->ban_credits[i]; + } + } + + if (max_credits < NUM_RETRIES) { + tevent_req_done(req); + return false; + } + + ctdb_req_control_get_nodemap(&request); + subreq = ctdb_client_control_send(state, + state->ev, + state->client, + state->max_pnn, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return false; + } + tevent_req_set_callback(subreq, ban_node_check_done, req); + + return true; +} + +static void ban_node_check_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct ban_node_state *state = tevent_req_data( + req, struct ban_node_state); + struct ctdb_reply_control *reply; + struct ctdb_node_map *nodemap; + struct ctdb_req_control request; + struct ctdb_ban_state ban; + unsigned int i; + int ret; + bool ok; + + ok = ctdb_client_control_recv(subreq, &ret, state, &reply); + TALLOC_FREE(subreq); + if (!ok) { + D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n", + state->max_pnn, ret); + tevent_req_error(req, ret); + return; + } + + ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); + if (ret != 0) { + D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); + tevent_req_error(req, ret); + return; + } + + for (i=0; inum; i++) { + if (nodemap->node[i].pnn != state->max_pnn) { + continue; + } + + /* If the node became inactive, reset ban_credits */ + if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { + unsigned int j; + + for (j=0; jnlist->count; j++) { + if (state->nlist->pnn_list[j] == + state->max_pnn) { + state->nlist->ban_credits[j] = 0; + break; + } + } + state->max_pnn = CTDB_UNKNOWN_PNN; + } + } + + talloc_free(nodemap); + talloc_free(reply); + + /* If node becames inactive during recovery, pick next */ + if (state->max_pnn == CTDB_UNKNOWN_PNN) { + (void) ban_node_check(req); + return; + } + + ban = (struct ctdb_ban_state) { + .pnn = state->max_pnn, + .time = state->tun_list->recovery_ban_period, + }; + + D_ERR("Banning node %u for %u seconds\n", ban.pnn, ban.time); + + ctdb_req_control_set_ban_state(&request, &ban); + subreq = ctdb_client_control_send(state, + state->ev, + state->client, + ban.pnn, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, ban_node_done, req); +} + +static void ban_node_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct node_ban_state *state = tevent_req_data( + req, struct node_ban_state); + struct ctdb_reply_control *reply; + int ret; + bool status; + + status = ctdb_client_control_recv(subreq, &ret, state, &reply); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, ret); + return; + } + + ret = ctdb_reply_control_set_ban_state(reply); + if (ret != 0) { + D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret); + tevent_req_error(req, ret); + return; + } + + talloc_free(reply); + tevent_req_done(req); +} + +static bool ban_node_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} /* * Run the parallel database recovery @@ -2724,50 +2924,15 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) D_ERR("%d of %d databases recovered\n", count, state->dbmap->num); if (! status) { - uint32_t max_pnn = CTDB_UNKNOWN_PNN, max_credits = 0; - unsigned int i; - - /* Bans are not enabled */ - if (state->tun_list->enable_bans == 0) { - tevent_req_error(req, EIO); + subreq = ban_node_send(state, + state->ev, + state->client, + state->tun_list, + state->nlist); + if (tevent_req_nomem(subreq, req)) { return; } - - for (i=0; inlist->count; i++) { - if (state->nlist->ban_credits[i] > max_credits) { - max_pnn = state->nlist->pnn_list[i]; - max_credits = state->nlist->ban_credits[i]; - } - } - - /* If pulling database fails multiple times */ - if (max_credits >= NUM_RETRIES) { - struct ctdb_ban_state ban_state = { - .pnn = max_pnn, - .time = state->tun_list->recovery_ban_period, - }; - - D_ERR("Banning node %u for %u seconds\n", - ban_state.pnn, - ban_state.time); - - ctdb_req_control_set_ban_state(&request, - &ban_state); - subreq = ctdb_client_control_send(state, - state->ev, - state->client, - ban_state.pnn, - TIMEOUT(), - &request); - if (tevent_req_nomem(subreq, req)) { - return; - } - tevent_req_set_callback(subreq, - recovery_failed_done, - req); - } else { - tevent_req_error(req, EIO); - } + tevent_req_set_callback(subreq, recovery_failed_done, req); return; } @@ -2789,25 +2954,15 @@ static void recovery_failed_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); - struct recovery_state *state = tevent_req_data( - req, struct recovery_state); - struct ctdb_reply_control *reply; int ret; bool status; - status = ctdb_client_control_recv(subreq, &ret, state, &reply); + status = ban_node_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { D_ERR("failed to ban node, ret=%d\n", ret); - goto done; } - ret = ctdb_reply_control_set_ban_state(reply); - if (ret != 0) { - D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret); - } - -done: tevent_req_error(req, EIO); } -- 2.25.1 From a68a51c56745c955912ecfd0b9d90b60854b76cc Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 20 Feb 2020 13:48:13 +1100 Subject: [PATCH 05/17] ctdb-daemon: Fix database attach deferral logic Commit 3cc230b5eeca749ab68d19cfda969f72c269f1f6 says: Dont allow clients to connect to databases untile we are well past and through the initial recovery phase It is unclear what this commit was attempting to do. The commit message implies that more attaches should be deferred but the code change adds a conjunction that causes less attaches to be deferred. In particular, no attaches will be deferred after startup is complete. This seems wrong. To implement what seems to be stated in the commit message an "or" needs to be used so that non-recovery daemon attaches are deferred either when in recovery or before startup is complete. Making this change highlights that attaches need to be allowed during the "startup" event because this is when smbd is started. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c6c89495fbe9b6f238d10a538eccc92b937a69de) --- ctdb/server/ctdb_ltdb_server.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index 970eb54b00b..a6709ff72de 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1135,9 +1135,9 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, return -1; } - if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE && - client->pid != ctdb->recoverd_pid && - ctdb->runstate < CTDB_RUNSTATE_RUNNING) { + if (client->pid != ctdb->recoverd_pid && + (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || + ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); if (da_ctx == NULL) { -- 2.25.1 From a101553665967e56186326cf80a28bfcf44af5d7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 26 Feb 2020 17:03:49 +1100 Subject: [PATCH 06/17] ctdb-daemon: Remove unused old client database functions BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit fc23cd1b9cdd1d70067491614b16e616291e8ff2) --- ctdb/include/ctdb_client.h | 22 ------ ctdb/include/ctdb_private.h | 2 + ctdb/server/ctdb_client.c | 146 ------------------------------------ 3 files changed, 2 insertions(+), 168 deletions(-) diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h index ef4950ab533..d1dce1e68d8 100644 --- a/ctdb/include/ctdb_client.h +++ b/ctdb/include/ctdb_client.h @@ -176,9 +176,6 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *runstate); -int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, uint32_t dbid, - TALLOC_CTX *mem_ctx, const char **path); int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **name); @@ -190,25 +187,6 @@ int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level); -/* - attach to a ctdb database -*/ -int ctdb_ctrl_db_open_flags(struct ctdb_context *ctdb, uint32_t db_id, - int *tdb_flags); - -struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, - struct timeval timeout, - const char *name, - uint8_t db_flags); - -/* a ctdb call function */ -typedef int (*ctdb_fn_t)(struct ctdb_call_info *); - -/* - setup a ctdb call function -*/ -int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id); - int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode); diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 6aba3c1d48e..fcffc23e2d6 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -36,6 +36,8 @@ struct ctdb_tcp_array { /* an installed ctdb remote call */ +typedef int (*ctdb_fn_t)(struct ctdb_call_info *); + struct ctdb_registered_call { struct ctdb_registered_call *next, *prev; uint32_t id; diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index 0da847c401b..f5366e874ee 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1107,36 +1107,6 @@ int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, return 0; } -/* - find the real path to a ltdb - */ -int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, - const char **path) -{ - int ret; - int32_t res; - TDB_DATA data; - - data.dptr = (uint8_t *)&dbid; - data.dsize = sizeof(dbid); - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GETDBPATH, 0, data, - mem_ctx, &data, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - return -1; - } - - (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); - if ((*path) == NULL) { - return -1; - } - - talloc_free(data.dptr); - - return 0; -} - /* find the name of a db */ @@ -1233,122 +1203,6 @@ int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32 return 0; } -/* - * Get db open flags - */ -int ctdb_ctrl_db_open_flags(struct ctdb_context *ctdb, uint32_t db_id, - int *tdb_flags) -{ - TDB_DATA indata, outdata; - int ret; - int32_t res; - - indata.dptr = (uint8_t *)&db_id; - indata.dsize = sizeof(db_id); - - ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, - CTDB_CONTROL_DB_OPEN_FLAGS, 0, indata, - ctdb, &outdata, &res, NULL, NULL); - if (ret != 0 || res != 0) { - D_ERR("ctdb control for db open flags failed\n"); - return -1; - } - - if (outdata.dsize != sizeof(int32_t)) { - D_ERR(__location__ " expected %zi bytes, received %zi bytes\n", - sizeof(int32_t), outdata.dsize); - talloc_free(outdata.dptr); - return -1; - } - - *tdb_flags = *(int32_t *)outdata.dptr; - talloc_free(outdata.dptr); - return 0; -} - -/* - attach to a specific database - client call -*/ -struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, - struct timeval timeout, - const char *name, - uint8_t db_flags) -{ - struct ctdb_db_context *ctdb_db; - int ret; - int tdb_flags; - - ctdb_db = ctdb_db_handle(ctdb, name); - if (ctdb_db) { - return ctdb_db; - } - - ctdb_db = talloc_zero(ctdb, struct ctdb_db_context); - CTDB_NO_MEMORY_NULL(ctdb, ctdb_db); - - ctdb_db->ctdb = ctdb; - ctdb_db->db_name = talloc_strdup(ctdb_db, name); - CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name); - - /* tell ctdb daemon to attach */ - ret = ctdb_ctrl_createdb(ctdb, timeout, CTDB_CURRENT_NODE, - ctdb_db, name, db_flags, &ctdb_db->db_id); - if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name)); - talloc_free(ctdb_db); - return NULL; - } - - ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path); - if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name)); - talloc_free(ctdb_db); - return NULL; - } - - ret = ctdb_ctrl_db_open_flags(ctdb, ctdb_db->db_id, &tdb_flags); - if (ret != 0) { - D_ERR("Failed to get tdb_flags for database '%s'\n", name); - talloc_free(ctdb_db); - return NULL; - } - - ctdb_db->ltdb = tdb_wrap_open(ctdb_db, ctdb_db->db_path, 0, tdb_flags, - O_RDWR, 0); - if (ctdb_db->ltdb == NULL) { - ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path); - talloc_free(ctdb_db); - return NULL; - } - - ctdb_db->db_flags = db_flags; - - DLIST_ADD(ctdb->db_list, ctdb_db); - - /* add well known functions */ - ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC); - ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC); - ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC); - - return ctdb_db; -} - -/* - setup a call for a database - */ -int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id) -{ - struct ctdb_registered_call *call; - - /* register locally */ - call = talloc(ctdb_db, struct ctdb_registered_call); - call->fn = fn; - call->id = id; - - DLIST_ADD(ctdb_db->calls, call); - return 0; -} - /* Freeze all databases */ int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode) -- 2.25.1 From 1b987c007c0b10ad12887b28709b33bf1c3218fe Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 11:04:14 +1100 Subject: [PATCH 07/17] ctdb-protocol: Add control flag CTDB_CTRL_FLAG_ATTACH_RECOVERY BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 17ed0425904a98624284d351ab7617b3e02c0f7b) --- ctdb/protocol/protocol.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h index 43175ae3a95..04a651018be 100644 --- a/ctdb/protocol/protocol.h +++ b/ctdb/protocol/protocol.h @@ -944,6 +944,7 @@ struct ctdb_req_control { #define CTDB_CTRL_FLAG_OPCODE_SPECIFIC 0xFFFF0000 /* Ugly overloading of this field... */ #define CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE 0x00010000 +#define CTDB_CTRL_FLAG_ATTACH_RECOVERY 0x00020000 uint32_t flags; struct ctdb_req_control_data rdata; }; -- 2.25.1 From 41a5e0cb49b6c8397322c8f36807d240c1caedb9 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 11:13:05 +1100 Subject: [PATCH 08/17] ctdb-recovery: Use CTDB_CTRL_FLAG_ATTACH_RECOVERY to attach during recovery ctdb_ctrl_createdb() is only called by the recovery daemon, so this is a safe, temporary change. This is temporary because ctdb_ctrl_createdb(), create_missing_remote_databases() and create_missing_local_databases() will all go away soon. Note that this doesn't cause a change in behaviour. The main daemon will still only defer attaches from non-recoverd processes during recovery. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 98e3d0db2bc5f33217e26fab1dfb4bb91eae534f) --- ctdb/server/ctdb_client.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index f5366e874ee..5d62e3c2c1b 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1160,8 +1160,17 @@ int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, opcode = CTDB_CONTROL_DB_ATTACH; } - ret = ctdb_control(ctdb, destnode, 0, opcode, 0, data, - mem_ctx, &data, &res, &timeout, NULL); + ret = ctdb_control(ctdb, + destnode, + 0, + opcode, + CTDB_CTRL_FLAG_ATTACH_RECOVERY, + data, + mem_ctx, + &data, + &res, + &timeout, + NULL); if (ret != 0 || res != 0) { return -1; -- 2.25.1 From 2cf233aa44243fb1ba2e5dfc13a666bbbd4cfc43 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 26 Feb 2020 11:50:09 +1100 Subject: [PATCH 09/17] ctdb-daemon: Respect CTDB_CTRL_FLAG_ATTACH_RECOVERY when attaching databases This is currently only set by the recovery daemon when it attaches missing databases, so there is no obvious behaviour change. However, attaching missing databases can now be moved to the recovery helper as long as it sets this flag. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 7e5a8a4884ea87bb985fe0e2b65ff130fc2ba8aa) --- ctdb/server/ctdb_ltdb_server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index a6709ff72de..e050b7304fe 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1135,7 +1135,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, return -1; } - if (client->pid != ctdb->recoverd_pid && + if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); -- 2.25.1 From 01ee438a35d9bdd14f063708a81750fcd327e7fb Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 12:24:39 +1100 Subject: [PATCH 10/17] ctdb-recovery: Replace use of ctdb_dbid_map with local db_list This will be used to build a merged list of databases from all nodes, allowing the recovery helper to create missing databases. It would be possible to also include the db_name field in this structure but that would cause a lot of churn. This field is used locally in the recovery of each database so can continue to live in the relevant state structure(s). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 4c0b9c36050a0ed8a180d4ac1853224089528e8e) --- ctdb/server/ctdb_recovery_helper.c | 176 ++++++++++++++++++++++++++--- 1 file changed, 161 insertions(+), 15 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 1f3b58312c4..df96240d8da 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -27,6 +27,7 @@ #include #include "lib/tdb_wrap/tdb_wrap.h" +#include "lib/util/dlinklist.h" #include "lib/util/sys_rw.h" #include "lib/util/time.h" #include "lib/util/tevent_unix.h" @@ -169,6 +170,130 @@ static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn) } } +/* + * Database list functions + * + * Simple, naive implementation that could be updated to a db_hash or similar + */ + +struct db { + struct db *prev, *next; + + uint32_t db_id; + uint32_t db_flags; + uint32_t *pnn_list; + unsigned int num_nodes; +}; + +struct db_list { + unsigned int num_dbs; + struct db *db; + unsigned int num_nodes; +}; + +static struct db_list *db_list_init(TALLOC_CTX *mem_ctx, unsigned int num_nodes) +{ + struct db_list *l; + + l = talloc_zero(mem_ctx, struct db_list); + l->num_nodes = num_nodes; + + return l; +} + +static struct db *db_list_find(struct db_list *dblist, uint32_t db_id) +{ + struct db *db; + + if (dblist == NULL) { + return NULL; + } + + db = dblist->db; + while (db != NULL && db->db_id != db_id) { + db = db->next; + } + + return db; +} + +static int db_list_add(struct db_list *dblist, + uint32_t db_id, + uint32_t db_flags, + uint32_t node) +{ + struct db *db = NULL; + + if (dblist == NULL) { + return EINVAL; + } + + db = talloc_zero(dblist, struct db); + if (db == NULL) { + return ENOMEM; + } + + db->db_id = db_id; + db->db_flags = db_flags; + db->pnn_list = talloc_zero_array(db, uint32_t, dblist->num_nodes); + if (db->pnn_list == NULL) { + talloc_free(db); + return ENOMEM; + } + db->pnn_list[0] = node; + db->num_nodes = 1; + + DLIST_ADD_END(dblist->db, db); + dblist->num_dbs++; + + return 0; +} + +static int db_list_check_and_add(struct db_list *dblist, + uint32_t db_id, + uint32_t db_flags, + uint32_t node) +{ + struct db *db = NULL; + int ret; + + /* + * These flags are masked out because they are only set on a + * node when a client attaches to that node, so they might not + * be set yet. They can't be passed as part of the attch, so + * they're no use here. + */ + db_flags &= ~(CTDB_DB_FLAGS_READONLY | CTDB_DB_FLAGS_STICKY); + + if (dblist == NULL) { + return EINVAL; + } + + db = db_list_find(dblist, db_id); + if (db == NULL) { + ret = db_list_add(dblist, db_id, db_flags, node); + return ret; + } + + if (db->db_flags != db_flags) { + D_ERR("Incompatible database flags for 0x%"PRIx32" " + "(0x%"PRIx32" != 0x%"PRIx32")\n", + db_id, + db_flags, + db->db_flags); + return EINVAL; + } + + if (db->num_nodes >= dblist->num_nodes) { + return EINVAL; + } + + db->pnn_list[db->num_nodes] = node; + db->num_nodes++; + + return 0; +} + /* * Recovery database functions */ @@ -2014,7 +2139,7 @@ static bool recover_db_recv(struct tevent_req *req) struct db_recovery_state { struct tevent_context *ev; - struct ctdb_dbid_map *dbmap; + struct db_list *dblist; unsigned int num_replies; unsigned int num_failed; }; @@ -2022,7 +2147,7 @@ struct db_recovery_state { struct db_recovery_one_state { struct tevent_req *req; struct ctdb_client_context *client; - struct ctdb_dbid_map *dbmap; + struct db_list *dblist; struct ctdb_tunable_list *tun_list; struct node_list *nlist; uint32_t generation; @@ -2036,14 +2161,14 @@ static void db_recovery_one_done(struct tevent_req *subreq); static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - struct ctdb_dbid_map *dbmap, + struct db_list *dblist, struct ctdb_tunable_list *tun_list, struct node_list *nlist, uint32_t generation) { struct tevent_req *req, *subreq; struct db_recovery_state *state; - unsigned int i; + struct db *db; req = tevent_req_create(mem_ctx, &state, struct db_recovery_state); if (req == NULL) { @@ -2051,16 +2176,16 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, } state->ev = ev; - state->dbmap = dbmap; + state->dblist = dblist; state->num_replies = 0; state->num_failed = 0; - if (dbmap->num == 0) { + if (dblist->num_dbs == 0) { tevent_req_done(req); return tevent_req_post(req, ev); } - for (i=0; inum; i++) { + for (db = dblist->db; db != NULL; db = db->next) { struct db_recovery_one_state *substate; substate = talloc_zero(state, struct db_recovery_one_state); @@ -2070,12 +2195,12 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->req = req; substate->client = client; - substate->dbmap = dbmap; + substate->dblist = dblist; substate->tun_list = tun_list; substate->nlist = nlist; substate->generation = generation; - substate->db_id = dbmap->dbs[i].db_id; - substate->db_flags = dbmap->dbs[i].flags; + substate->db_id = db->db_id; + substate->db_flags = db->db_flags; subreq = recover_db_send(state, ev, @@ -2138,7 +2263,7 @@ failed: done: state->num_replies += 1; - if (state->num_replies == state->dbmap->num) { + if (state->num_replies == state->dblist->num_dbs) { tevent_req_done(req); } } @@ -2387,7 +2512,7 @@ struct recovery_state { struct node_list *nlist; struct ctdb_tunable_list *tun_list; struct ctdb_vnn_map *vnnmap; - struct ctdb_dbid_map *dbmap; + struct db_list *dblist; }; static void recovery_tunables_done(struct tevent_req *subreq); @@ -2709,6 +2834,8 @@ static void recovery_dbmap_done(struct tevent_req *subreq) req, struct recovery_state); struct ctdb_reply_control *reply; struct ctdb_req_control request; + struct ctdb_dbid_map *dbmap = NULL; + unsigned int j; int ret; bool status; @@ -2721,13 +2848,32 @@ static void recovery_dbmap_done(struct tevent_req *subreq) return; } - ret = ctdb_reply_control_get_dbmap(reply, state, &state->dbmap); + state->dblist = db_list_init(state, state->nlist->count); + if (tevent_req_nomem(state->dblist, req)) { + D_ERR("memory allocation error\n"); + return; + } + + ret = ctdb_reply_control_get_dbmap(reply, state, &dbmap); if (ret != 0) { D_ERR("control GET_DBMAP failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } + for (j = 0; j < dbmap->num; j++) { + ret = db_list_check_and_add(state->dblist, + dbmap->dbs[j].db_id, + dbmap->dbs[j].flags, + state->destnode); + if (ret != 0) { + D_ERR("failed to add database list entry, ret=%d\n", + ret); + tevent_req_error(req, ret); + return; + } + } + ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); subreq = ctdb_client_control_multi_send(state, state->ev, @@ -2898,7 +3044,7 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) subreq = db_recovery_send(state, state->ev, state->client, - state->dbmap, + state->dblist, state->tun_list, state->nlist, state->vnnmap->generation); @@ -2921,7 +3067,7 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) status = db_recovery_recv(subreq, &count); TALLOC_FREE(subreq); - D_ERR("%d of %d databases recovered\n", count, state->dbmap->num); + D_ERR("%d of %d databases recovered\n", count, state->dblist->num_dbs); if (! status) { subreq = ban_node_send(state, -- 2.25.1 From 0e12c9539fdcf8c17875ff4d6b5cc3cf4a898e90 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 16:10:05 +1100 Subject: [PATCH 11/17] ctdb-recovery: GET_DBMAP from all nodes This builds a complete list of databases across the cluster so it can be used to create databases on the nodes where they are missing. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c6f74e590f602e2ed38fe293468770a5e669aefa) --- ctdb/server/ctdb_recovery_helper.c | 77 +++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index df96240d8da..d5a264df5d2 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2817,9 +2817,13 @@ static void recovery_capabilities_done(struct tevent_req *subreq) talloc_free(reply); ctdb_req_control_get_dbmap(&request); - subreq = ctdb_client_control_send(state, state->ev, state->client, - state->destnode, TIMEOUT(), - &request); + subreq = ctdb_client_control_multi_send(state, + state->ev, + state->client, + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2832,18 +2836,34 @@ static void recovery_dbmap_done(struct tevent_req *subreq) subreq, struct tevent_req); struct recovery_state *state = tevent_req_data( req, struct recovery_state); - struct ctdb_reply_control *reply; + struct ctdb_reply_control **reply; struct ctdb_req_control request; - struct ctdb_dbid_map *dbmap = NULL; - unsigned int j; + int *err_list; + unsigned int i, j; int ret; bool status; - status = ctdb_client_control_recv(subreq, &ret, state, &reply); + status = ctdb_client_control_multi_recv(subreq, + &ret, + state, + &err_list, + &reply); TALLOC_FREE(subreq); if (! status) { - D_ERR("control GET_DBMAP failed to node %u, ret=%d\n", - state->destnode, ret); + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control GET_DBMAP failed on node %u," + " ret=%d\n", pnn, ret2); + } else { + D_ERR("control GET_DBMAP failed, ret=%d\n", + ret); + } tevent_req_error(req, ret); return; } @@ -2854,24 +2874,35 @@ static void recovery_dbmap_done(struct tevent_req *subreq) return; } - ret = ctdb_reply_control_get_dbmap(reply, state, &dbmap); - if (ret != 0) { - D_ERR("control GET_DBMAP failed, ret=%d\n", ret); - tevent_req_error(req, ret); - return; - } + for (i = 0; i < state->nlist->count; i++) { + struct ctdb_dbid_map *dbmap = NULL; + uint32_t pnn; - for (j = 0; j < dbmap->num; j++) { - ret = db_list_check_and_add(state->dblist, - dbmap->dbs[j].db_id, - dbmap->dbs[j].flags, - state->destnode); + pnn = state->nlist->pnn_list[i]; + + ret = ctdb_reply_control_get_dbmap(reply[i], state, &dbmap); if (ret != 0) { - D_ERR("failed to add database list entry, ret=%d\n", - ret); - tevent_req_error(req, ret); + D_ERR("control GET_DBMAP failed on node %u\n", + pnn); + tevent_req_error(req, EPROTO); return; } + + for (j = 0; j < dbmap->num; j++) { + ret = db_list_check_and_add(state->dblist, + dbmap->dbs[j].db_id, + dbmap->dbs[j].flags, + pnn); + if (ret != 0) { + D_ERR("failed to add database list entry, " + "ret=%d\n", + ret); + tevent_req_error(req, ret); + return; + } + } + + TALLOC_FREE(dbmap); } ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); -- 2.25.1 From d79acde1b6fc8f85de8c0a240c49e8a33821dac4 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 16:51:10 +1100 Subject: [PATCH 12/17] ctdb-recovery: Pass db structure for each database recovery Instead of db_id and db_flags. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 1bdfeb3fdc06947a607957ab3d114f97bad5d7d7) --- ctdb/server/ctdb_recovery_helper.c | 59 ++++++++++++++---------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index d5a264df5d2..2b77542245a 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -1672,8 +1672,7 @@ struct recover_db_state { struct ctdb_client_context *client; struct ctdb_tunable_list *tun_list; struct node_list *nlist; - uint32_t db_id; - uint8_t db_flags; + struct db *db; uint32_t destnode; struct ctdb_transdb transdb; @@ -1698,8 +1697,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, struct ctdb_tunable_list *tun_list, struct node_list *nlist, uint32_t generation, - uint32_t db_id, - uint8_t db_flags) + struct db *db) { struct tevent_req *req, *subreq; struct recover_db_state *state; @@ -1714,14 +1712,13 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->client = client; state->tun_list = tun_list; state->nlist = nlist; - state->db_id = db_id; - state->db_flags = db_flags; + state->db = db; state->destnode = ctdb_client_pnn(client); - state->transdb.db_id = db_id; + state->transdb.db_id = db->db_id; state->transdb.tid = generation; - ctdb_req_control_get_dbname(&request, db_id); + ctdb_req_control_get_dbname(&request, db->db_id); subreq = ctdb_client_control_send(state, ev, client, state->destnode, TIMEOUT(), &request); if (tevent_req_nomem(subreq, req)) { @@ -1747,7 +1744,7 @@ static void recover_db_name_done(struct tevent_req *subreq) TALLOC_FREE(subreq); if (! status) { D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db_id, ret); + state->db->db_id, ret); tevent_req_error(req, ret); return; } @@ -1755,14 +1752,14 @@ static void recover_db_name_done(struct tevent_req *subreq) ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name); if (ret != 0) { D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db_id, ret); + state->db->db_id, ret); tevent_req_error(req, EPROTO); return; } talloc_free(reply); - ctdb_req_control_getdbpath(&request, state->db_id); + ctdb_req_control_getdbpath(&request, state->db->db_id); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), &request); @@ -1802,7 +1799,7 @@ static void recover_db_path_done(struct tevent_req *subreq) talloc_free(reply); - ctdb_req_control_db_freeze(&request, state->db_id); + ctdb_req_control_db_freeze(&request, state->db->db_id); subreq = ctdb_client_control_multi_send(state, state->ev, state->client, @@ -1873,6 +1870,7 @@ static void recover_db_transaction_started(struct tevent_req *subreq) struct recover_db_state *state = tevent_req_data( req, struct recover_db_state); int *err_list; + uint32_t flags; int ret; bool status; @@ -1899,28 +1897,31 @@ static void recover_db_transaction_started(struct tevent_req *subreq) return; } - state->recdb = recdb_create(state, state->db_id, state->db_name, + flags = state->db->db_flags; + state->recdb = recdb_create(state, + state->db->db_id, + state->db_name, state->db_path, state->tun_list->database_hash_size, - state->db_flags & CTDB_DB_FLAGS_PERSISTENT); + flags & CTDB_DB_FLAGS_PERSISTENT); if (tevent_req_nomem(state->recdb, req)) { return; } - if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || - (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { + if ((flags & CTDB_DB_FLAGS_PERSISTENT) || + (flags & CTDB_DB_FLAGS_REPLICATED)) { subreq = collect_highseqnum_db_send(state, state->ev, state->client, state->nlist, - state->db_id, + state->db->db_id, state->recdb); } else { subreq = collect_all_db_send(state, state->ev, state->client, state->nlist, - state->db_id, + state->db->db_id, state->recdb); } if (tevent_req_nomem(subreq, req)) { @@ -1939,8 +1940,8 @@ static void recover_db_collect_done(struct tevent_req *subreq) int ret; bool status; - if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || - (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { + if ((state->db->db_flags & CTDB_DB_FLAGS_PERSISTENT) || + (state->db->db_flags & CTDB_DB_FLAGS_REPLICATED)) { status = collect_highseqnum_db_recv(subreq, &ret); } else { status = collect_all_db_recv(subreq, &ret); @@ -2076,7 +2077,7 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) return; } - ctdb_req_control_db_thaw(&request, state->db_id); + ctdb_req_control_db_thaw(&request, state->db->db_id); subreq = ctdb_client_control_multi_send(state, state->ev, state->client, @@ -2151,8 +2152,7 @@ struct db_recovery_one_state { struct ctdb_tunable_list *tun_list; struct node_list *nlist; uint32_t generation; - uint32_t db_id; - uint8_t db_flags; + struct db *db; int num_fails; }; @@ -2199,8 +2199,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->tun_list = tun_list; substate->nlist = nlist; substate->generation = generation; - substate->db_id = db->db_id; - substate->db_flags = db->db_flags; + substate->db = db; subreq = recover_db_send(state, ev, @@ -2208,14 +2207,13 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, tun_list, nlist, generation, - substate->db_id, - substate->db_flags); + substate->db); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, db_recovery_one_done, substate); - D_NOTICE("recover database 0x%08x\n", substate->db_id); + D_NOTICE("recover database 0x%08x\n", substate->db->db_id); } return req; @@ -2246,14 +2244,13 @@ static void db_recovery_one_done(struct tevent_req *subreq) substate->tun_list, substate->nlist, substate->generation, - substate->db_id, - substate->db_flags); + substate->db); if (tevent_req_nomem(subreq, req)) { goto failed; } tevent_req_set_callback(subreq, db_recovery_one_done, substate); D_NOTICE("recover database 0x%08x, attempt %d\n", - substate->db_id, substate->num_fails+1); + substate->db->db_id, substate->num_fails+1); return; } -- 2.25.1 From a0dc2aa6e70d83764d9e85749f0e952377366c14 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Feb 2020 10:26:34 +1100 Subject: [PATCH 13/17] ctdb-recovery: Fetch database name from all nodes where it is attached BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e6e63f8fb8194634135bf34cda18f6cc8ff69a7c) --- ctdb/server/ctdb_recovery_helper.c | 80 +++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 2b77542245a..0fbafe45fb6 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -1656,7 +1656,7 @@ static bool collect_all_db_recv(struct tevent_req *req, int *perr) /** * For each database do the following: - * - Get DB name + * - Get DB name from all nodes * - Get DB path * - Freeze database on all nodes * - Start transaction on all nodes @@ -1719,8 +1719,13 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->transdb.tid = generation; ctdb_req_control_get_dbname(&request, db->db_id); - subreq = ctdb_client_control_send(state, ev, client, state->destnode, - TIMEOUT(), &request); + subreq = ctdb_client_control_multi_send(state, + ev, + client, + state->db->pnn_list, + state->db->num_nodes, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -1735,26 +1740,75 @@ static void recover_db_name_done(struct tevent_req *subreq) subreq, struct tevent_req); struct recover_db_state *state = tevent_req_data( req, struct recover_db_state); - struct ctdb_reply_control *reply; + struct ctdb_reply_control **reply; struct ctdb_req_control request; + int *err_list; + unsigned int i; int ret; bool status; - status = ctdb_client_control_recv(subreq, &ret, state, &reply); + status = ctdb_client_control_multi_recv(subreq, + &ret, + state, + &err_list, + &reply); TALLOC_FREE(subreq); if (! status) { - D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db->db_id, ret); + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error(state->db->pnn_list, + state->db->num_nodes, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control GET_DBNAME failed on node %u," + " ret=%d\n", + pnn, + ret2); + } else { + D_ERR("control GET_DBNAME failed, ret=%d\n", + ret); + } tevent_req_error(req, ret); return; } - ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name); - if (ret != 0) { - D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db->db_id, ret); - tevent_req_error(req, EPROTO); - return; + for (i = 0; i < state->db->num_nodes; i++) { + const char *db_name; + uint32_t pnn; + + pnn = state->nlist->pnn_list[i]; + + ret = ctdb_reply_control_get_dbname(reply[i], + state, + &db_name); + if (ret != 0) { + D_ERR("control GET_DBNAME failed on node %u " + "for db=0x%x, ret=%d\n", + pnn, + state->db->db_id, + ret); + tevent_req_error(req, EPROTO); + return; + } + + if (state->db_name == NULL) { + state->db_name = db_name; + continue; + } + + if (strcmp(state->db_name, db_name) != 0) { + D_ERR("Incompatible database name for 0x%"PRIx32" " + "(%s != %s) on node %"PRIu32"\n", + state->db->db_id, + db_name, + state->db_name, + pnn); + node_list_ban_credits(state->nlist, pnn); + tevent_req_error(req, ret); + return; + } } talloc_free(reply); -- 2.25.1 From 3a4e9cc36eeea0fa4df1f0c97edf8bea1ec7cc11 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Feb 2020 11:31:33 +1100 Subject: [PATCH 14/17] ctdb-recovery: Create database on nodes where it is missing BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 76a8174279f42486b36cc41d5831d4e6613f172e) --- ctdb/server/ctdb_recovery_helper.c | 178 ++++++++++++++++++++++++++++- 1 file changed, 177 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 0fbafe45fb6..f10e60104ae 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -294,6 +294,150 @@ static int db_list_check_and_add(struct db_list *dblist, return 0; } +/* + * Create database on nodes where it is missing + */ + +struct db_create_missing_state { + struct tevent_context *ev; + struct ctdb_client_context *client; + + struct node_list *nlist; + + const char *db_name; + uint32_t *missing_pnn_list; + int missing_num_nodes; +}; + +static void db_create_missing_done(struct tevent_req *subreq); + +static struct tevent_req *db_create_missing_send( + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct ctdb_client_context *client, + struct node_list *nlist, + const char *db_name, + struct db *db) +{ + struct tevent_req *req, *subreq; + struct db_create_missing_state *state; + struct ctdb_req_control request; + unsigned int i, j; + + req = tevent_req_create(mem_ctx, + &state, + struct db_create_missing_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->client = client; + state->nlist = nlist; + state->db_name = db_name; + + if (nlist->count == db->num_nodes) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + state->missing_pnn_list = talloc_array(mem_ctx, uint32_t, nlist->count); + if (tevent_req_nomem(state->missing_pnn_list, req)) { + return tevent_req_post(req, ev); + } + + for (i = 0; i < nlist->count; i++) { + uint32_t pnn = nlist->pnn_list[i] ; + + for (j = 0; j < db->num_nodes; j++) { + if (pnn == db->pnn_list[j]) { + break; + } + } + + if (j < db->num_nodes) { + continue; + } + + DBG_INFO("Create database %s on node %u\n", + state->db_name, + pnn); + state->missing_pnn_list[state->missing_num_nodes] = pnn; + state->missing_num_nodes++; + } + + if (db->db_flags & CTDB_DB_FLAGS_PERSISTENT) { + ctdb_req_control_db_attach_persistent(&request, db_name); + } else if (db->db_flags & CTDB_DB_FLAGS_REPLICATED) { + ctdb_req_control_db_attach_replicated(&request, db_name); + } else { + ctdb_req_control_db_attach(&request, db_name); + } + request.flags = CTDB_CTRL_FLAG_ATTACH_RECOVERY; + subreq = ctdb_client_control_multi_send(state, + state->ev, + state->client, + state->missing_pnn_list, + state->missing_num_nodes, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, db_create_missing_done, req); + + return req; +} + +static void db_create_missing_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct db_create_missing_state *state = tevent_req_data( + req, struct db_create_missing_state); + int *err_list; + int ret; + bool status; + + status = ctdb_client_control_multi_recv(subreq, + &ret, + NULL, + &err_list, + NULL); + TALLOC_FREE(subreq); + if (! status) { + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error( + state->missing_pnn_list, + state->missing_num_nodes, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control DB_ATTACH failed for db %s" + " on node %u, ret=%d\n", + state->db_name, + pnn, + ret2); + node_list_ban_credits(state->nlist, pnn); + } else { + D_ERR("control DB_ATTACH failed for db %s, ret=%d\n", + state->db_name, + ret); + } + tevent_req_error(req, ret); + return; + } + + tevent_req_done(req); +} + +static bool db_create_missing_recv(struct tevent_req *req, int *perr) +{ + return generic_recv(req, perr); +} + /* * Recovery database functions */ @@ -1657,6 +1801,7 @@ static bool collect_all_db_recv(struct tevent_req *req, int *perr) /** * For each database do the following: * - Get DB name from all nodes + * - Attach database on missing nodes * - Get DB path * - Freeze database on all nodes * - Start transaction on all nodes @@ -1682,6 +1827,7 @@ struct recover_db_state { }; static void recover_db_name_done(struct tevent_req *subreq); +static void recover_db_create_missing_done(struct tevent_req *subreq); static void recover_db_path_done(struct tevent_req *subreq); static void recover_db_freeze_done(struct tevent_req *subreq); static void recover_db_transaction_started(struct tevent_req *subreq); @@ -1741,7 +1887,6 @@ static void recover_db_name_done(struct tevent_req *subreq) struct recover_db_state *state = tevent_req_data( req, struct recover_db_state); struct ctdb_reply_control **reply; - struct ctdb_req_control request; int *err_list; unsigned int i; int ret; @@ -1813,6 +1958,37 @@ static void recover_db_name_done(struct tevent_req *subreq) talloc_free(reply); + subreq = db_create_missing_send(state, + state->ev, + state->client, + state->nlist, + state->db_name, + state->db); + + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, recover_db_create_missing_done, req); +} + +static void recover_db_create_missing_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct recover_db_state *state = tevent_req_data( + req, struct recover_db_state); + struct ctdb_req_control request; + int ret; + bool status; + + /* Could sanity check the db_id here */ + status = db_create_missing_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, ret); + return; + } + ctdb_req_control_getdbpath(&request, state->db->db_id); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), -- 2.25.1 From 8e55031be476cf9980cf81c33520c80d7eec45c5 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Feb 2020 19:51:19 +1100 Subject: [PATCH 15/17] ctdb-recovery: Remove old code for creating missing databases BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3a66d181b6f6199fca362fcb0aa06513645b589d) --- ctdb/server/ctdb_recoverd.c | 161 ------------------------------------ 1 file changed, 161 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 6fddb44a014..14b6e0fca26 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -424,140 +424,6 @@ static int set_recovery_mode(struct ctdb_context *ctdb, return 0; } -/* - ensure all other nodes have attached to any databases that we have - */ -static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, - uint32_t pnn, struct ctdb_dbid_map_old *dbmap, TALLOC_CTX *mem_ctx) -{ - unsigned int i, j, db; - int ret; - struct ctdb_dbid_map_old *remote_dbmap; - - /* verify that all other nodes have all our databases */ - for (j=0; jnum; j++) { - /* we don't need to ourself ourselves */ - if (nodemap->nodes[j].pnn == pnn) { - continue; - } - /* don't check nodes that are unavailable */ - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - mem_ctx, &remote_dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn)); - return -1; - } - - /* step through all local databases */ - for (db=0; dbnum;db++) { - const char *name; - - - for (i=0;inum;i++) { - if (dbmap->dbs[db].db_id == remote_dbmap->dbs[i].db_id) { - break; - } - } - /* the remote node already have this database */ - if (i!=remote_dbmap->num) { - continue; - } - /* ok so we need to create this database */ - ret = ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), pnn, - dbmap->dbs[db].db_id, mem_ctx, - &name); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", pnn)); - return -1; - } - ret = ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), - nodemap->nodes[j].pnn, - mem_ctx, name, - dbmap->dbs[db].flags, NULL); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create remote db:%s\n", name)); - return -1; - } - } - } - - return 0; -} - - -/* - ensure we are attached to any databases that anyone else is attached to - */ -static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, - uint32_t pnn, struct ctdb_dbid_map_old **dbmap, TALLOC_CTX *mem_ctx) -{ - unsigned int i, j, db; - int ret; - struct ctdb_dbid_map_old *remote_dbmap; - - /* verify that we have all database any other node has */ - for (j=0; jnum; j++) { - /* we don't need to ourself ourselves */ - if (nodemap->nodes[j].pnn == pnn) { - continue; - } - /* don't check nodes that are unavailable */ - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - mem_ctx, &remote_dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn)); - return -1; - } - - /* step through all databases on the remote node */ - for (db=0; dbnum;db++) { - const char *name; - - for (i=0;i<(*dbmap)->num;i++) { - if (remote_dbmap->dbs[db].db_id == (*dbmap)->dbs[i].db_id) { - break; - } - } - /* we already have this db locally */ - if (i!=(*dbmap)->num) { - continue; - } - /* ok so we need to create this database and - rebuild dbmap - */ - ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - remote_dbmap->dbs[db].db_id, mem_ctx, &name); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", - nodemap->nodes[j].pnn)); - return -1; - } - ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), pnn, - mem_ctx, name, - remote_dbmap->dbs[db].flags, NULL); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create local db:%s\n", name)); - return -1; - } - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to reread dbmap on node %u\n", pnn)); - return -1; - } - } - } - - return 0; -} - /* update flags on all active nodes */ @@ -1165,7 +1031,6 @@ static int do_recovery(struct ctdb_recoverd *rec, struct ctdb_context *ctdb = rec->ctdb; unsigned int i; int ret; - struct ctdb_dbid_map_old *dbmap; bool self_ban; DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n")); @@ -1245,32 +1110,6 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node)); - /* get a list of all databases */ - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node :%u\n", pnn)); - goto fail; - } - - /* we do the db creation before we set the recovery mode, so the freeze happens - on all databases we will be dealing with. */ - - /* verify that we have all the databases any other node has */ - ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create missing local databases\n")); - goto fail; - } - - /* verify that all other nodes have all our databases */ - ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create missing remote databases\n")); - goto fail; - } - DEBUG(DEBUG_NOTICE, (__location__ " Recovery - created remote databases\n")); - - /* Retrieve capabilities from all connected nodes */ ret = update_capabilities(rec, nodemap); if (ret!=0) { -- 2.25.1 From 49b10032e7fbafaa1f6e41696d604ea36418a2b9 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 25 Feb 2020 06:20:32 +1100 Subject: [PATCH 16/17] ctdb-daemon: Remove more unused old client database functions BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 052f1bdb9cf78f53f584edd32f81ae8e01e8e86e) --- ctdb/include/ctdb_client.h | 12 ----- ctdb/server/ctdb_client.c | 105 ------------------------------------- 2 files changed, 117 deletions(-) diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h index d1dce1e68d8..198a8a38dbb 100644 --- a/ctdb/include/ctdb_client.h +++ b/ctdb/include/ctdb_client.h @@ -165,10 +165,6 @@ int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster); -int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, TALLOC_CTX *mem_ctx, - struct ctdb_dbid_map_old **dbmap); - int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_node_map_old **nodemap); @@ -176,14 +172,6 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *runstate); -int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, uint32_t dbid, - TALLOC_CTX *mem_ctx, const char **name); - -int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, TALLOC_CTX *mem_ctx, - const char *name, uint8_t db_flags, uint32_t *db_id); - int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level); diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index 5d62e3c2c1b..453e7b28477 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1029,30 +1029,6 @@ int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, ui } -/* - get a list of databases off a remote node - */ -int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - TALLOC_CTX *mem_ctx, struct ctdb_dbid_map_old **dbmap) -{ - int ret; - TDB_DATA outdata; - int32_t res; - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GET_DBMAP, 0, tdb_null, - mem_ctx, &outdata, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res)); - return -1; - } - - *dbmap = (struct ctdb_dbid_map_old *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize); - talloc_free(outdata.dptr); - - return 0; -} - /* get a list of nodes (vnn and flags ) from a remote node */ @@ -1107,87 +1083,6 @@ int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, return 0; } -/* - find the name of a db - */ -int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, - const char **name) -{ - int ret; - int32_t res; - TDB_DATA data; - - data.dptr = (uint8_t *)&dbid; - data.dsize = sizeof(dbid); - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GET_DBNAME, 0, data, - mem_ctx, &data, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - return -1; - } - - (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); - if ((*name) == NULL) { - return -1; - } - - talloc_free(data.dptr); - - return 0; -} - -/* - create a database - */ -int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, TALLOC_CTX *mem_ctx, - const char *name, uint8_t db_flags, uint32_t *db_id) -{ - int ret; - int32_t res; - TDB_DATA data; - uint32_t opcode; - - data.dptr = discard_const(name); - data.dsize = strlen(name)+1; - - if (db_flags & CTDB_DB_FLAGS_PERSISTENT) { - opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT; - } else if (db_flags & CTDB_DB_FLAGS_REPLICATED) { - opcode = CTDB_CONTROL_DB_ATTACH_REPLICATED; - } else { - opcode = CTDB_CONTROL_DB_ATTACH; - } - - ret = ctdb_control(ctdb, - destnode, - 0, - opcode, - CTDB_CTRL_FLAG_ATTACH_RECOVERY, - data, - mem_ctx, - &data, - &res, - &timeout, - NULL); - - if (ret != 0 || res != 0) { - return -1; - } - - if (data.dsize != sizeof(uint32_t)) { - TALLOC_FREE(data.dptr); - return -1; - } - if (db_id != NULL) { - *db_id = *(uint32_t *)data.dptr; - } - talloc_free(data.dptr); - - return 0; -} - /* get debug level on a node */ -- 2.25.1 From b6a70655be240853bb3f480f2eaea941f90540ae Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 25 Feb 2020 17:32:56 +1100 Subject: [PATCH 17/17] ctdb-daemon: Don't allow attach from recovery if recovery is not active Neither the recovery daemon nor the recovery helper should attach databases outside of the recovery process. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 147afe77de372ddb9c180228d6fe1b04cca4610f) --- ctdb/server/ctdb_ltdb_server.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index e050b7304fe..ce3569fe7b1 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1135,6 +1135,13 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, return -1; } + if ((c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && + ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) { + DBG_ERR("Attach from recovery refused because " + "recovery is not active\n"); + return -1; + } + if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { -- 2.25.1