From c98d7c41c11b405f9e4b37d0993daa8bbe16ca4c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 May 2015 16:45:34 +0200 Subject: [PATCH 1/8] ctdb-locking: Avoid memory corruption in ctdb_lock_context_destructor If the lock request is freed from within the callback, then setting lock_ctx->request to NULL in ctdb_lock_context_destructor will end up corrupting memory. In this case, lock_ctx->request could be reallocated and pointing to something else. This may cause unexpected abort trying to dereference a NULL pointer. So, set lock_ctx->request to NULL before processing callbacks. This avoids the following valgrind problem. ==3636== Invalid write of size 8 ==3636== at 0x151F3D: ctdb_lock_context_destructor (ctdb_lock.c:276) ==3636== by 0x58B3618: _talloc_free_internal (talloc.c:993) ==3636== by 0x58AD692: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58AD692: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58AD692: _talloc_free (talloc.c:1594) ==3636== by 0x15292E: ctdb_lock_handler (ctdb_lock.c:471) ==3636== by 0x56A535A: epoll_event_loop (tevent_epoll.c:728) ==3636== by 0x56A535A: epoll_event_loop_once (tevent_epoll.c:926) ==3636== by 0x56A3826: std_event_loop_once (tevent_standard.c:114) ==3636== by 0x569FFFC: _tevent_loop_once (tevent.c:533) ==3636== by 0x56A019A: tevent_common_loop_wait (tevent.c:637) ==3636== by 0x56A37C6: std_event_loop_wait (tevent_standard.c:140) ==3636== by 0x11E03A: ctdb_start_daemon (ctdb_daemon.c:1320) ==3636== by 0x118557: main (ctdbd.c:321) ==3636== Address 0x9c5b660 is 96 bytes inside a block of size 120 free'd ==3636== at 0x4C29D17: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) ==3636== by 0x58B32D3: _talloc_free_internal (talloc.c:1063) ==3636== by 0x58B3232: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58B3232: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58B3232: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58B3232: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58AD692: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58AD692: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58AD692: _talloc_free (talloc.c:1594) ==3636== by 0x11EC30: daemon_incoming_packet (ctdb_daemon.c:844) ==3636== by 0x136F4A: lock_fetch_callback (ctdb_ltdb_server.c:268) ==3636== by 0x152489: process_callbacks (ctdb_lock.c:353) ==3636== by 0x152489: ctdb_lock_handler (ctdb_lock.c:468) ==3636== by 0x56A535A: epoll_event_loop (tevent_epoll.c:728) ==3636== by 0x56A535A: epoll_event_loop_once (tevent_epoll.c:926) ==3636== by 0x56A3826: std_event_loop_once (tevent_standard.c:114) ==3636== by 0x569FFFC: _tevent_loop_once (tevent.c:533) ==3636== by 0x56A019A: tevent_common_loop_wait (tevent.c:637) ==3636== by 0x56A37C6: std_event_loop_wait (tevent_standard.c:140) ==3636== by 0x11E03A: ctdb_start_daemon (ctdb_daemon.c:1320) ==3636== by 0x118557: main (ctdbd.c:321) BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index df22d0d..9b93bab 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -350,6 +350,10 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) /* Reset the destructor, so request is not removed from the list */ talloc_set_destructor(request, NULL); } + + /* Since request may be freed in the callback, unset the request */ + lock_ctx->request = NULL; + request->callback(request->private_data, locked); if (lock_ctx->auto_mark && locked) { -- 1.9.1 From 97131b94e4787c40e2d545ed6351416c7f4780ca Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Jun 2015 00:15:11 +1000 Subject: [PATCH 2/8] ctdb-locking: Set the lock_ctx->request to NULL when request is freed The code was added to ctdb_lock_context_destructor() to ensure that the if a lock_ctx gets freed first, the lock_request does not have a dangling pointer. However, the reverse is also true. When a lock_request is freed, then lock_ctx should not have a dangling pointer. In commit 374cbc7b0ff68e04ee4e395935509c7df817b3c0, the code for second condition was dropped causing a regression. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 9b93bab..0a27592 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -312,7 +312,13 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx) */ static int ctdb_lock_request_destructor(struct lock_request *lock_request) { + if (lock_request->lctx == NULL) { + return 0; + } + + lock_request->lctx->request = NULL; TALLOC_FREE(lock_request->lctx); + return 0; } -- 1.9.1 From fe0fd9b322e73f40d6c04617b4a05beaad7741a3 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 00:22:07 +1000 Subject: [PATCH 3/8] ctdb-locking: Set destructor when lock_context is created There is already code in the destructor to correctly remove it from the pending or the active queue. This also ensures that when lock context is in pending queue and if the lock request gets freed, the lock context is correctly removed from the pending queue. Thanks to Stefan Metzmacher for noticing this and suggesting the fix. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 0a27592..8cbb7cd 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -897,8 +897,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) /* Parent process */ close(lock_ctx->fd[1]); - talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor); - talloc_free(tmp_ctx); /* Set up timeout handler */ @@ -910,7 +908,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) if (lock_ctx->ttimer == NULL) { ctdb_kill(ctdb, lock_ctx->child, SIGKILL); lock_ctx->child = -1; - talloc_set_destructor(lock_ctx, NULL); close(lock_ctx->fd[0]); return; } @@ -926,7 +923,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) TALLOC_FREE(lock_ctx->ttimer); ctdb_kill(ctdb, lock_ctx->child, SIGKILL); lock_ctx->child = -1; - talloc_set_destructor(lock_ctx, NULL); close(lock_ctx->fd[0]); return; } @@ -1024,6 +1020,7 @@ static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx, request->private_data = private_data; talloc_set_destructor(request, ctdb_lock_request_destructor); + talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor); ctdb_lock_schedule(ctdb); -- 1.9.1 From 52f9740ad6411a8bbf7323cca243ce5c71a1d7a4 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 11:15:11 +1000 Subject: [PATCH 4/8] ctdb-locking: Avoid memory leak in the failure case BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 8cbb7cd..ac01853 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -987,6 +987,7 @@ static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx, if (lock_ctx->key.dptr == NULL) { DEBUG(DEBUG_ERR, (__location__ "Memory allocation error\n")); talloc_free(lock_ctx); + talloc_free(request); return NULL; } lock_ctx->key_hash = ctdb_hash(&key); -- 1.9.1 From 2bdad91800d03538ab579a14f077ae5b6e9078eb Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 11:25:44 +1000 Subject: [PATCH 5/8] ctdb-locking: Avoid resetting talloc destructor Let ctdb_lock_request_destructor() take care of the proper cleanup. If the request if freed from the callback function, then the lock context should not be freed. Setting request->lctx to NULL takes care of that in the destructor. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index ac01853..82ce7a0 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -353,8 +353,10 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) request = lock_ctx->request; if (lock_ctx->auto_mark) { - /* Reset the destructor, so request is not removed from the list */ - talloc_set_destructor(request, NULL); + /* Since request may be freed in the callback, unset the lock + * context, so request destructor will not free lock context. + */ + request->lctx = NULL; } /* Since request may be freed in the callback, unset the request */ -- 1.9.1 From 3eec4b56094af29c65eaa5ca2bcaac9c8620e414 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 13:15:37 +1000 Subject: [PATCH 6/8] ctdb-locking: Add a comment to explain auto_mark usage BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 82ce7a0..8454a69 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -41,6 +41,10 @@ * ctdb_lock_alldb() - get a lock on all DBs * * auto_mark - whether to mark/unmark DBs in before/after callback + * = false is used for freezing databases for + * recovery since the recovery cannot start till + * databases are locked on all the nodes. + * = true is used for record locks. */ enum lock_type { -- 1.9.1 From 0e9021c7dcadb48d617945015f0c64096dda5558 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Jun 2015 12:39:17 +0200 Subject: [PATCH 7/8] ctdb-locking: make process_callbacks() more robust We should not dereference lock_ctx after invocing the callback in the auto_mark == false case. The callback could have destroyed it. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 8454a69..85addd6 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -334,8 +334,9 @@ static int ctdb_lock_request_destructor(struct lock_request *lock_request) static void process_callbacks(struct lock_context *lock_ctx, bool locked) { struct lock_request *request; + bool auto_mark = lock_ctx->auto_mark; - if (lock_ctx->auto_mark && locked) { + if (auto_mark && locked) { switch (lock_ctx->type) { case LOCK_RECORD: tdb_chainlock_mark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key); @@ -356,7 +357,7 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) } request = lock_ctx->request; - if (lock_ctx->auto_mark) { + if (auto_mark) { /* Since request may be freed in the callback, unset the lock * context, so request destructor will not free lock context. */ @@ -368,7 +369,11 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) request->callback(request->private_data, locked); - if (lock_ctx->auto_mark && locked) { + if (!auto_mark) { + return; + } + + if (locked) { switch (lock_ctx->type) { case LOCK_RECORD: tdb_chainlock_unmark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key); -- 1.9.1 From 1b08a55b7e1518c5773b93f865c220bb0b42252f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Jun 2015 12:43:17 +0200 Subject: [PATCH 8/8] ctdb-locking: move all auto_mark logic into process_callbacks() The caller should not derefence lock_ctx after invocing process_callbacks() it might be destroyed already. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher --- ctdb/server/ctdb_lock.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 85addd6..509a9af 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -392,6 +392,8 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) break; } } + + talloc_free(lock_ctx); } @@ -437,7 +439,6 @@ static void ctdb_lock_handler(struct tevent_context *ev, void *private_data) { struct lock_context *lock_ctx; - TALLOC_CTX *tmp_ctx = NULL; char c; bool locked; double t; @@ -451,11 +452,6 @@ static void ctdb_lock_handler(struct tevent_context *ev, t = timeval_elapsed(&lock_ctx->start_time); id = lock_bucket_id(t); - if (lock_ctx->auto_mark) { - tmp_ctx = talloc_new(ev); - talloc_steal(tmp_ctx, lock_ctx); - } - /* Read the status from the child process */ if (sys_read(lock_ctx->fd[0], &c, 1) != 1) { locked = false; @@ -487,10 +483,6 @@ static void ctdb_lock_handler(struct tevent_context *ev, } process_callbacks(lock_ctx, locked); - - if (lock_ctx->auto_mark) { - talloc_free(tmp_ctx); - } } -- 1.9.1