From 81af9d7cd9b16367cb56c8ef8079f353c0cbad38 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 May 2015 16:45:34 +0200 Subject: [PATCH 1/8] ctdb-locking: Avoid memory corruption in ctdb_lock_context_destructor If the lock request is freed from within the callback, then setting lock_ctx->request to NULL in ctdb_lock_context_destructor will end up corrupting memory. In this case, lock_ctx->request could be reallocated and pointing to something else. This may cause unexpected abort trying to dereference a NULL pointer. So, set lock_ctx->request to NULL before processing callbacks. This avoids the following valgrind problem. ==3636== Invalid write of size 8 ==3636== at 0x151F3D: ctdb_lock_context_destructor (ctdb_lock.c:276) ==3636== by 0x58B3618: _talloc_free_internal (talloc.c:993) ==3636== by 0x58AD692: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58AD692: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58AD692: _talloc_free (talloc.c:1594) ==3636== by 0x15292E: ctdb_lock_handler (ctdb_lock.c:471) ==3636== by 0x56A535A: epoll_event_loop (tevent_epoll.c:728) ==3636== by 0x56A535A: epoll_event_loop_once (tevent_epoll.c:926) ==3636== by 0x56A3826: std_event_loop_once (tevent_standard.c:114) ==3636== by 0x569FFFC: _tevent_loop_once (tevent.c:533) ==3636== by 0x56A019A: tevent_common_loop_wait (tevent.c:637) ==3636== by 0x56A37C6: std_event_loop_wait (tevent_standard.c:140) ==3636== by 0x11E03A: ctdb_start_daemon (ctdb_daemon.c:1320) ==3636== by 0x118557: main (ctdbd.c:321) ==3636== Address 0x9c5b660 is 96 bytes inside a block of size 120 free'd ==3636== at 0x4C29D17: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) ==3636== by 0x58B32D3: _talloc_free_internal (talloc.c:1063) ==3636== by 0x58B3232: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58B3232: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58B3232: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58B3232: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58AD692: _talloc_free_children_internal (talloc.c:1472) ==3636== by 0x58AD692: _talloc_free_internal (talloc.c:1019) ==3636== by 0x58AD692: _talloc_free (talloc.c:1594) ==3636== by 0x11EC30: daemon_incoming_packet (ctdb_daemon.c:844) ==3636== by 0x136F4A: lock_fetch_callback (ctdb_ltdb_server.c:268) ==3636== by 0x152489: process_callbacks (ctdb_lock.c:353) ==3636== by 0x152489: ctdb_lock_handler (ctdb_lock.c:468) ==3636== by 0x56A535A: epoll_event_loop (tevent_epoll.c:728) ==3636== by 0x56A535A: epoll_event_loop_once (tevent_epoll.c:926) ==3636== by 0x56A3826: std_event_loop_once (tevent_standard.c:114) ==3636== by 0x569FFFC: _tevent_loop_once (tevent.c:533) ==3636== by 0x56A019A: tevent_common_loop_wait (tevent.c:637) ==3636== by 0x56A37C6: std_event_loop_wait (tevent_standard.c:140) ==3636== by 0x11E03A: ctdb_start_daemon (ctdb_daemon.c:1320) ==3636== by 0x118557: main (ctdbd.c:321) BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher Reviewed-by: Amitay Isaacs (cherry picked from commit ee02e40e869fd46f113d016122dd5384b7887228) --- ctdb/server/ctdb_lock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 7959d40..5765e1b 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -350,6 +350,10 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) /* Reset the destructor, so request is not removed from the list */ talloc_set_destructor(request, NULL); } + + /* Since request may be freed in the callback, unset the request */ + lock_ctx->request = NULL; + request->callback(request->private_data, locked); if (lock_ctx->auto_mark && locked) { -- 2.1.0 From 3d43cdd84d17a5f49cd3f118fca18d1ff43cfe77 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Jun 2015 00:15:11 +1000 Subject: [PATCH 2/8] ctdb-locking: Set the lock_ctx->request to NULL when request is freed The code was added to ctdb_lock_context_destructor() to ensure that the if a lock_ctx gets freed first, the lock_request does not have a dangling pointer. However, the reverse is also true. When a lock_request is freed, then lock_ctx should not have a dangling pointer. In commit 374cbc7b0ff68e04ee4e395935509c7df817b3c0, the code for second condition was dropped causing a regression. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher Reviewed-by: Amitay Isaacs (cherry picked from commit 752ec31bcbbfe9f5b3b1c5dde4179d69f41cb53c) --- ctdb/server/ctdb_lock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 5765e1b..61eedaa 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -312,7 +312,13 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx) */ static int ctdb_lock_request_destructor(struct lock_request *lock_request) { + if (lock_request->lctx == NULL) { + return 0; + } + + lock_request->lctx->request = NULL; TALLOC_FREE(lock_request->lctx); + return 0; } -- 2.1.0 From 8000453f316f1e3d21bba8c6750f65ed3fd117f2 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 00:22:07 +1000 Subject: [PATCH 3/8] ctdb-locking: Set destructor when lock_context is created There is already code in the destructor to correctly remove it from the pending or the active queue. This also ensures that when lock context is in pending queue and if the lock request gets freed, the lock context is correctly removed from the pending queue. Thanks to Stefan Metzmacher for noticing this and suggesting the fix. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher (cherry picked from commit 5ae6a8f2fff5b5f4d46f496fd83f555be4b3d448) --- ctdb/server/ctdb_lock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 61eedaa..442b653 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -815,8 +815,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) /* Parent process */ close(lock_ctx->fd[1]); - talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor); - talloc_free(tmp_ctx); /* Set up timeout handler */ @@ -828,7 +826,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) if (lock_ctx->ttimer == NULL) { ctdb_kill(ctdb, lock_ctx->child, SIGKILL); lock_ctx->child = -1; - talloc_set_destructor(lock_ctx, NULL); close(lock_ctx->fd[0]); return; } @@ -844,7 +841,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) TALLOC_FREE(lock_ctx->ttimer); ctdb_kill(ctdb, lock_ctx->child, SIGKILL); lock_ctx->child = -1; - talloc_set_destructor(lock_ctx, NULL); close(lock_ctx->fd[0]); return; } @@ -942,6 +938,7 @@ static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx, request->private_data = private_data; talloc_set_destructor(request, ctdb_lock_request_destructor); + talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor); ctdb_lock_schedule(ctdb); -- 2.1.0 From bf14d12e620d93b7771cb7e00fbb56aa55eb542b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 11:15:11 +1000 Subject: [PATCH 4/8] ctdb-locking: Avoid memory leak in the failure case BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher (cherry picked from commit 2b352ff20597b9e34b3777d35deca1bf56209f8a) --- ctdb/server/ctdb_lock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 442b653..77245b3 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -905,6 +905,7 @@ static struct lock_request *ctdb_lock_internal(TALLOC_CTX *mem_ctx, if (lock_ctx->key.dptr == NULL) { DEBUG(DEBUG_ERR, (__location__ "Memory allocation error\n")); talloc_free(lock_ctx); + talloc_free(request); return NULL; } lock_ctx->key_hash = ctdb_hash(&key); -- 2.1.0 From 29728c66c8a9840c1245003bc8b5d9653b250868 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 11:25:44 +1000 Subject: [PATCH 5/8] ctdb-locking: Avoid resetting talloc destructor Let ctdb_lock_request_destructor() take care of the proper cleanup. If the request if freed from the callback function, then the lock context should not be freed. Setting request->lctx to NULL takes care of that in the destructor. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher (cherry picked from commit bc747030d435447e62262541cf2e74be4c4229d8) --- ctdb/server/ctdb_lock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 77245b3..d5bf6ab 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -353,8 +353,10 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) request = lock_ctx->request; if (lock_ctx->auto_mark) { - /* Reset the destructor, so request is not removed from the list */ - talloc_set_destructor(request, NULL); + /* Since request may be freed in the callback, unset the lock + * context, so request destructor will not free lock context. + */ + request->lctx = NULL; } /* Since request may be freed in the callback, unset the request */ -- 2.1.0 From e077cd6b342e10495c2eb4cfcfd16cae1343b0b5 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 2 Jun 2015 13:15:37 +1000 Subject: [PATCH 6/8] ctdb-locking: Add a comment to explain auto_mark usage BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Amitay Isaacs Reviewed-by: Stefan Metzmacher (cherry picked from commit 89849c4d31c0bb0c47864e11abc89efe7d812d87) --- ctdb/server/ctdb_lock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index d5bf6ab..c6bd804 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -41,6 +41,10 @@ * ctdb_lock_alldb() - get a lock on all DBs * * auto_mark - whether to mark/unmark DBs in before/after callback + * = false is used for freezing databases for + * recovery since the recovery cannot start till + * databases are locked on all the nodes. + * = true is used for record locks. */ enum lock_type { -- 2.1.0 From 5fc66ac45ea53ae5f0567ed00eefbcda850f2408 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Jun 2015 12:39:17 +0200 Subject: [PATCH 7/8] ctdb-locking: make process_callbacks() more robust We should not dereference lock_ctx after invoking the callback in the auto_mark == false case. The callback could have destroyed it. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher Reviewed-by: Amitay Isaacs (cherry picked from commit a2690bc3f4e28a2ed50ccb47cb404fc8570fde6d) --- ctdb/server/ctdb_lock.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index c6bd804..bfb5e0a 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -334,8 +334,9 @@ static int ctdb_lock_request_destructor(struct lock_request *lock_request) static void process_callbacks(struct lock_context *lock_ctx, bool locked) { struct lock_request *request; + bool auto_mark = lock_ctx->auto_mark; - if (lock_ctx->auto_mark && locked) { + if (auto_mark && locked) { switch (lock_ctx->type) { case LOCK_RECORD: tdb_chainlock_mark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key); @@ -356,7 +357,7 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) } request = lock_ctx->request; - if (lock_ctx->auto_mark) { + if (auto_mark) { /* Since request may be freed in the callback, unset the lock * context, so request destructor will not free lock context. */ @@ -368,7 +369,11 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) request->callback(request->private_data, locked); - if (lock_ctx->auto_mark && locked) { + if (!auto_mark) { + return; + } + + if (locked) { switch (lock_ctx->type) { case LOCK_RECORD: tdb_chainlock_unmark(lock_ctx->ctdb_db->ltdb->tdb, lock_ctx->key); -- 2.1.0 From b3f36e93b5b095ef3651a28519ad94cd1a58a22d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Jun 2015 12:43:17 +0200 Subject: [PATCH 8/8] ctdb-locking: move all auto_mark logic into process_callbacks() The caller should not dereference lock_ctx after invoking process_callbacks(), it might be destroyed already. BUG: https://bugzilla.samba.org/show_bug.cgi?id=11293 Signed-off-by: Stefan Metzmacher Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Fri Jun 12 15:28:57 CEST 2015 on sn-devel-104 (cherry picked from commit b3a18d66c00dba73a3f56a6f95781b4d34db1fe2) --- ctdb/server/ctdb_lock.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index bfb5e0a..5b63d1e 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -392,6 +392,8 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked) break; } } + + talloc_free(lock_ctx); } @@ -437,7 +439,6 @@ static void ctdb_lock_handler(struct tevent_context *ev, void *private_data) { struct lock_context *lock_ctx; - TALLOC_CTX *tmp_ctx = NULL; char c; bool locked; double t; @@ -451,11 +452,6 @@ static void ctdb_lock_handler(struct tevent_context *ev, t = timeval_elapsed(&lock_ctx->start_time); id = lock_bucket_id(t); - if (lock_ctx->auto_mark) { - tmp_ctx = talloc_new(ev); - talloc_steal(tmp_ctx, lock_ctx); - } - /* Read the status from the child process */ if (sys_read(lock_ctx->fd[0], &c, 1) != 1) { locked = false; @@ -487,10 +483,6 @@ static void ctdb_lock_handler(struct tevent_context *ev, } process_callbacks(lock_ctx, locked); - - if (lock_ctx->auto_mark) { - talloc_free(tmp_ctx); - } } -- 2.1.0