From e1ce3410a20e2b2ce0c8d97d92a40db7d3d031b3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 16 May 2013 11:48:02 +0200 Subject: [PATCH 01/49] wscript: set conf.env.replace_add_global_pthread = True In Samba we currently add PTHREAD CFLAGS/LDFLAGS globally. The following changes will move the configure checks to lib/replace and the the default of adding the flags globally will change there. Signed-off-by: Stefan Metzmacher --- wscript | 1 + 1 file changed, 1 insertion(+) diff --git a/wscript b/wscript index 5007834..c7c1072 100644 --- a/wscript +++ b/wscript @@ -92,6 +92,7 @@ def configure(conf): conf.ADD_EXTRA_INCLUDES('#include/public #source4 #lib #source4/lib #source4/include #include #lib/replace') + conf.env.replace_add_global_pthread = True conf.RECURSE('lib/replace') conf.find_program('perl', var='PERL', mandatory=True) -- 1.7.9.5 From 1dcc2ad8fb71732c974ef88cc7950dd78b321fa0 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Mon, 14 Jan 2013 14:56:25 +0100 Subject: [PATCH 02/49] libreplace: Move thread checks from source3/wscript Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/replace/wscript | 32 ++++++++++++++++++++++++++++++++ source3/wscript | 32 -------------------------------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/lib/replace/wscript b/lib/replace/wscript index cabf91f..22615ab 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -422,6 +422,38 @@ removeea setea conf.CHECK_FUNCS_IN('pthread_create', 'pthread', checklibc=True, headers='pthread.h') + PTHREAD_CFLAGS='error' + PTHREAD_LDFLAGS='error' + + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthread'): + PTHREAD_CFLAGS='-D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS' + PTHREAD_LDFLAGS='-lpthread' + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthreads'): + PTHREAD_CFLAGS='-D_THREAD_SAFE' + PTHREAD_LDFLAGS='-lpthreads' + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_attr_init', 'c_r'): + PTHREAD_CFLAGS='-D_THREAD_SAFE -pthread' + PTHREAD_LDFLAGS='-pthread' + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS('pthread_attr_init'): + PTHREAD_CFLAGS='-D_REENTRANT' + PTHREAD_LDFLAGS='-lpthread' + # especially for HP-UX, where the CHECK_FUNC macro fails to test for + # pthread_attr_init. On pthread_mutex_lock it works there... + if PTHREAD_LDFLAGS == 'error': + if conf.CHECK_FUNCS_IN('pthread_mutex_lock', 'pthread'): + PTHREAD_CFLAGS='-D_REENTRANT' + PTHREAD_LDFLAGS='-lpthread' + + if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': + conf.ADD_CFLAGS(PTHREAD_CFLAGS) + conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) + conf.CHECK_HEADERS('pthread.h') + conf.DEFINE('HAVE_PTHREAD', '1') + conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) conf.CHECK_VARIABLE('rl_event_hook', define='HAVE_DECL_RL_EVENT_HOOK', always=True, diff --git a/source3/wscript b/source3/wscript index 4fe49fa..a9b8499 100644 --- a/source3/wscript +++ b/source3/wscript @@ -1739,38 +1739,6 @@ main() { if Options.options.with_profiling_data: conf.DEFINE('WITH_PROFILE', 1); - PTHREAD_CFLAGS='error' - PTHREAD_LDFLAGS='error' - - if PTHREAD_LDFLAGS == 'error': - if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthread'): - PTHREAD_CFLAGS='-D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS' - PTHREAD_LDFLAGS='-lpthread' - if PTHREAD_LDFLAGS == 'error': - if conf.CHECK_FUNCS_IN('pthread_attr_init', 'pthreads'): - PTHREAD_CFLAGS='-D_THREAD_SAFE' - PTHREAD_LDFLAGS='-lpthreads' - if PTHREAD_LDFLAGS == 'error': - if conf.CHECK_FUNCS_IN('pthread_attr_init', 'c_r'): - PTHREAD_CFLAGS='-D_THREAD_SAFE -pthread' - PTHREAD_LDFLAGS='-pthread' - if PTHREAD_LDFLAGS == 'error': - if conf.CHECK_FUNCS('pthread_attr_init'): - PTHREAD_CFLAGS='-D_REENTRANT' - PTHREAD_LDFLAGS='-lpthread' - # especially for HP-UX, where the CHECK_FUNC macro fails to test for - # pthread_attr_init. On pthread_mutex_lock it works there... - if PTHREAD_LDFLAGS == 'error': - if conf.CHECK_FUNCS_IN('pthread_mutex_lock', 'pthread'): - PTHREAD_CFLAGS='-D_REENTRANT' - PTHREAD_LDFLAGS='-lpthread' - - if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': - conf.ADD_CFLAGS(PTHREAD_CFLAGS) - conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) - conf.CHECK_HEADERS('pthread.h') - conf.DEFINE('HAVE_PTHREAD', '1') - if Options.options.with_pthreadpool: if conf.CONFIG_SET('HAVE_PTHREAD'): conf.DEFINE('WITH_PTHREADPOOL', '1') -- 1.7.9.5 From d02f88768c7f1b7292540b994cab27127e38f1ef Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 16 May 2013 11:50:38 +0200 Subject: [PATCH 03/49] libreplace: only add PTHREAD CFLAGS and LDFLAGS globally if asked for Signed-off-by: Stefan Metzmacher --- lib/replace/wscript | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/replace/wscript b/lib/replace/wscript index 22615ab..aeddee3 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -449,8 +449,9 @@ removeea setea PTHREAD_LDFLAGS='-lpthread' if PTHREAD_CFLAGS != 'error' and PTHREAD_LDFLAGS != 'error': - conf.ADD_CFLAGS(PTHREAD_CFLAGS) - conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) + if conf.CONFIG_SET('replace_add_global_pthread'): + conf.ADD_CFLAGS(PTHREAD_CFLAGS) + conf.ADD_LDFLAGS(PTHREAD_LDFLAGS) conf.CHECK_HEADERS('pthread.h') conf.DEFINE('HAVE_PTHREAD', '1') -- 1.7.9.5 From c57d4a6181ac7306a97ef129dc4938022ad29e7e Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Sat, 22 Dec 2012 08:42:48 +0100 Subject: [PATCH 04/49] libreplace: Add support for pthread_mutexattr_setrobust Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/replace/system/threads.h | 35 +++++++++++++++++++++++++++++++++++ lib/replace/wscript | 14 ++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 lib/replace/system/threads.h diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h new file mode 100644 index 0000000..3d89bb2 --- /dev/null +++ b/lib/replace/system/threads.h @@ -0,0 +1,35 @@ +#ifndef _system_threads_h +#define _system_threads_h +/* + Unix SMB/CIFS implementation. + + macros to go along with the lib/replace/ portability layer code + + Copyright (C) Volker Lendecke 2012 + + ** NOTE! The following LGPL license applies to the replace + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include + +#if defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP) && \ + !defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST) +#define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np +#endif + +#endif diff --git a/lib/replace/wscript b/lib/replace/wscript index aeddee3..a6d0ee8 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -455,6 +455,20 @@ removeea setea conf.CHECK_HEADERS('pthread.h') conf.DEFINE('HAVE_PTHREAD', '1') + if conf.CONFIG_SET('HAVE_PTHREAD'): + + conf.CHECK_DECLS('pthread_mutexattr_setrobust', headers='pthread.h') + conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', + checklibc=True, headers='pthread.h') + + conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', headers='pthread.h') + conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', + checklibc=True, headers='pthread.h') + + if (conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or + conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')): + conf.DEFINE('HAVE_ROBUST_MUTEXES', 1) + conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) conf.CHECK_VARIABLE('rl_event_hook', define='HAVE_DECL_RL_EVENT_HOOK', always=True, -- 1.7.9.5 From d571d8141a95f8e44faf8710d2a3ef5c5ccb340d Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Mon, 4 Feb 2013 12:26:47 +0100 Subject: [PATCH 05/49] libreplace: Add support for pthread_mutex_consistent Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/replace/system/threads.h | 5 +++++ lib/replace/wscript | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h index 3d89bb2..3aca088 100644 --- a/lib/replace/system/threads.h +++ b/lib/replace/system/threads.h @@ -32,4 +32,9 @@ #define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np #endif +#if defined(HAVE_PTHREAD_MUTEX_CONSISTENT_NP) && \ + !defined(HAVE_PTHREAD_MUTEX_CONSISTENT) +#define pthread_mutex_consistent pthread_mutex_consistent_np +#endif + #endif diff --git a/lib/replace/wscript b/lib/replace/wscript index a6d0ee8..6bd96f9 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -465,8 +465,18 @@ removeea setea conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', checklibc=True, headers='pthread.h') - if (conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or - conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')): + conf.CHECK_DECLS('pthread_mutex_consistent', headers='pthread.h') + conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', + checklibc=True, headers='pthread.h') + + conf.CHECK_DECLS('pthread_mutex_consistent_np', headers='pthread.h') + conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', + checklibc=True, headers='pthread.h') + + if ((conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or + conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')) and + (conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT') or + conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT_NP'))): conf.DEFINE('HAVE_ROBUST_MUTEXES', 1) conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) -- 1.7.9.5 From e6678106f50eeb7c90006132a3a41d983f3ea5a2 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 6 Feb 2013 12:15:41 +0100 Subject: [PATCH 06/49] libreplace-waf: Only check for _np functions if standard functions are not available Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/replace/wscript | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/replace/wscript b/lib/replace/wscript index 6bd96f9..5839ed0 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -458,20 +458,26 @@ removeea setea if conf.CONFIG_SET('HAVE_PTHREAD'): conf.CHECK_DECLS('pthread_mutexattr_setrobust', headers='pthread.h') - conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', - checklibc=True, headers='pthread.h') + if not conf.CONFIG_SET('HAVE_DECL_PTHREAD_MUTEXATTR_SETROBUST'): + conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', + headers='pthread.h') - conf.CHECK_DECLS('pthread_mutexattr_setrobust_np', headers='pthread.h') - conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', + conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust', 'pthread', checklibc=True, headers='pthread.h') + if not conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST'): + conf.CHECK_FUNCS_IN('pthread_mutexattr_setrobust_np', 'pthread', + checklibc=True, headers='pthread.h') conf.CHECK_DECLS('pthread_mutex_consistent', headers='pthread.h') - conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', - checklibc=True, headers='pthread.h') + if not conf.CONFIG_SET('HAVE_DECL_PTHREAD_MUTEX_CONSISTENT'): + conf.CHECK_DECLS('pthread_mutex_consistent_np', + headers='pthread.h') - conf.CHECK_DECLS('pthread_mutex_consistent_np', headers='pthread.h') - conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', + conf.CHECK_FUNCS_IN('pthread_mutex_consistent', 'pthread', checklibc=True, headers='pthread.h') + if not conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT'): + conf.CHECK_FUNCS_IN('pthread_mutex_consistent_np', 'pthread', + checklibc=True, headers='pthread.h') if ((conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST') or conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')) and -- 1.7.9.5 From 1e309aa00b62513a2ea121025a1f88c45dbf8e5f Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 6 Feb 2013 12:16:02 +0100 Subject: [PATCH 07/49] libreplace: Define PTHREAD_MUTEX_ROBUST along with pthread_mutexattr_setrobust Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/replace/system/threads.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/replace/system/threads.h b/lib/replace/system/threads.h index 3aca088..25d3502 100644 --- a/lib/replace/system/threads.h +++ b/lib/replace/system/threads.h @@ -29,7 +29,15 @@ #if defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP) && \ !defined(HAVE_PTHREAD_MUTEXATTR_SETROBUST) + #define pthread_mutexattr_setrobust pthread_mutexattr_setrobust_np + +/* + * We assume that PTHREAD_MUTEX_ROBUST_NP goes along with + * pthread_mutexattr_setrobust_np() + */ +#define PTHREAD_MUTEX_ROBUST PTHREAD_MUTEX_ROBUST_NP + #endif #if defined(HAVE_PTHREAD_MUTEX_CONSISTENT_NP) && \ -- 1.7.9.5 From cf35213036e909f41faad1c6eb1e74ecfc39b64d Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 17 May 2013 14:29:35 +0200 Subject: [PATCH 08/49] libreplace: Add robust mutex test program Signed-off-by: Volker Lendecke Signed-off-by: Stefan Metzmacher --- lib/replace/test/robust_mutex.c | 202 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 lib/replace/test/robust_mutex.c diff --git a/lib/replace/test/robust_mutex.c b/lib/replace/test/robust_mutex.c new file mode 100644 index 0000000..58e414f --- /dev/null +++ b/lib/replace/test/robust_mutex.c @@ -0,0 +1,202 @@ +/* + * Unix SMB/CIFS implementation. + * + * libreplace tests + * + * Copyright (C) Volker Lendecke 2013 + * + * ** NOTE! The following LGPL license applies to the talloc + * ** library. This does NOT imply that all of Samba is released + * ** under the LGPL + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . +*/ + +/* + * this tests whether robust mutexes are actually robust. For example in + * RHEL6.1 we have the headers and libc implementations, but the robust + * mutexes are not robust, i.e. they don't return EOWNERDEAD when they should. + */ + +#if defined(HAVE_UNISTD_H) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(void) +{ + void *ptr; + pthread_mutex_t *m; + pthread_mutexattr_t ma; + int ret = 1; + int fd = -1; + int pipe_down[2] = { -1, -1 }; + int pipe_up[2] = { -1, -1 }; + ssize_t nread; + char c = 0; + const char *fname = "conftest.robust_mutex"; + + fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, 0666); + if (fd == -1) { + perror("open failed"); + goto fail; + } + + ret = ftruncate(fd, sizeof(pthread_mutex_t)); + if (ret == -1) { + perror("ftruncate failed"); + goto fail; + } + + ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FILE, fd, 0); + if (ptr == MAP_FAILED) { + perror("mmap failed"); + goto fail; + } + m = (pthread_mutex_t *)ptr; + + ret = pipe(pipe_down); + if (ret != 0) { + perror("pipe failed"); + goto fail; + } + ret = pipe(pipe_up); + if (ret != 0) { + perror("pipe failed"); + goto fail; + } + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto fail; + } + ret = pthread_mutex_init(m, &ma); + if (ret != 0) { + goto fail; + } + pthread_mutexattr_destroy(&ma); + + if (fork() == 0) { + size_t nwritten; + close(pipe_down[1]); + close(pipe_up[0]); + ret = pthread_mutex_lock(m); + nwritten = write(pipe_up[1], &ret, sizeof(ret)); + if (nwritten != sizeof(ret)) { + exit(1); + } + if (ret != 0) { + exit(1); + } + nread = read(pipe_down[0], &c, 1); + if (nread != 1) { + exit(1); + } + /* leave locked */ + exit(0); + } + close(pipe_down[0]); + close(pipe_up[1]); + + nread = read(pipe_up[0], &ret, sizeof(ret)); + if (nread != sizeof(ret)) { + fprintf(stderr, "read returned %d, expected %d\n", + (int)nread, (int)sizeof(ret)); + ret = 1; + goto fail; + } + + ret = pthread_mutex_trylock(m); + if (ret != EBUSY) { + fprintf(stderr, "pthread_mutex_trylock returned %s, expected " + "EBUSY\n", strerror(ret)); + if (ret == 0) { + ret = 1; + } + goto fail; + } + + if (write(pipe_down[1], &c, 1) != 1) { + perror("write failed"); + goto fail; + } + + nread = read(pipe_up[0], &c, 1); + if (nread != 0) { + fprintf(stderr, "read returned %d, expected 0\n", + (int)nread); + goto fail; + } + + ret = pthread_mutex_trylock(m); + if (ret != EOWNERDEAD) { + fprintf(stderr, "pthread_mutex_trylock returned %s, expected " + "EOWNERDEAD\n", strerror(ret)); + if (ret == 0) { + ret = 1; + } + goto fail; + } + + ret = pthread_mutex_consistent(m); + if (ret != 0) { + fprintf(stderr, "pthread_mutex_consistent returned %s, " + "expected 0\n", strerror(ret)); + goto fail; + } + + ret = pthread_mutex_trylock(m); + if (ret != EDEADLK) { + fprintf(stderr, "pthread_mutex_trylock returned %s, expected " + "EDEADLK\n", strerror(ret)); + if (ret == 0) { + ret = 1; + } + goto fail; + } + + ret = pthread_mutex_unlock(m); + if (ret != 0) { + fprintf(stderr, "pthread_mutex_unlock returned %s, " + "expected 0\n", strerror(ret)); + goto fail; + } + + ret = 0; +fail: + unlink(fname); + return ret; +} -- 1.7.9.5 From c7f3aeb311034c3d31ed1d0f9cae5c845a914650 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 18 May 2013 10:23:43 +0200 Subject: [PATCH 09/49] libreplace: define HAVE_ROBUST_MUTEXES if test/robust_mutex.c works Signed-off-by: Stefan Metzmacher --- lib/replace/wscript | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/replace/wscript b/lib/replace/wscript index 5839ed0..2c0a7d8 100644 --- a/lib/replace/wscript +++ b/lib/replace/wscript @@ -483,7 +483,13 @@ removeea setea conf.CONFIG_SET('HAVE_PTHREAD_MUTEXATTR_SETROBUST_NP')) and (conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT') or conf.CONFIG_SET('HAVE_PTHREAD_MUTEX_CONSISTENT_NP'))): - conf.DEFINE('HAVE_ROBUST_MUTEXES', 1) + conf.CHECK_CODE(''' + #include "system/threads.h" + #include "test/robust_mutex.c" + ''', + addmain=False, add_headers=True, execute=True, + define='HAVE_ROBUST_MUTEXES', + msg="Checking for HAVE_ROBUST_MUTEXES") conf.CHECK_FUNCS_IN('crypt', 'crypt', checklibc=True) -- 1.7.9.5 From 4702e6c0bf7812bdb97bfc1c8058841d9be968aa Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 3 Feb 2014 11:29:38 +0100 Subject: [PATCH 10/49] pytdb: avoid const warnings by using discard_const_p() Signed-off-by: Stefan Metzmacher --- lib/tdb/pytdb.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/tdb/pytdb.c b/lib/tdb/pytdb.c index bf0fed6..9320799 100644 --- a/lib/tdb/pytdb.c +++ b/lib/tdb/pytdb.c @@ -91,9 +91,10 @@ static PyObject *py_tdb_open(PyTypeObject *type, PyObject *args, PyObject *kwarg int hash_size = 0, tdb_flags = TDB_DEFAULT, flags = O_RDWR, mode = 0600; TDB_CONTEXT *ctx; PyTdbObject *ret; - const char *kwnames[] = { "name", "hash_size", "tdb_flags", "flags", "mode", NULL }; + const char *_kwnames[] = { "name", "hash_size", "tdb_flags", "flags", "mode", NULL }; + char **kwnames = discard_const_p(char *, _kwnames); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siiii", (char **)kwnames, &name, &hash_size, &tdb_flags, &flags, &mode)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siiii", kwnames, &name, &hash_size, &tdb_flags, &flags, &mode)) return NULL; if (name == NULL) { @@ -548,13 +549,21 @@ static PyObject *obj_get_seqnum(PyTdbObject *self, void *closure) static PyGetSetDef tdb_object_getsetters[] = { - { (char *)"hash_size", (getter)obj_get_hash_size, NULL, NULL }, - { (char *)"map_size", (getter)obj_get_map_size, NULL, NULL }, - { (char *)"freelist_size", (getter)obj_get_freelist_size, NULL, NULL }, - { (char *)"flags", (getter)obj_get_flags, NULL, NULL }, - { (char *)"max_dead", NULL, (setter)obj_set_max_dead, NULL }, - { (char *)"filename", (getter)obj_get_filename, NULL, (char *)"The filename of this TDB file."}, - { (char *)"seqnum", (getter)obj_get_seqnum, NULL, NULL }, + { discard_const_p(char, "hash_size"), + (getter)obj_get_hash_size, NULL, NULL }, + { discard_const_p(char, "map_size"), + (getter)obj_get_map_size, NULL, NULL }, + { discard_const_p(char, "freelist_size"), + (getter)obj_get_freelist_size, NULL, NULL }, + { discard_const_p(char, "flags"), + (getter)obj_get_flags, NULL, NULL }, + { discard_const_p(char, "max_dead"), + NULL, (setter)obj_set_max_dead, NULL }, + { discard_const_p(char, "filename"), + (getter)obj_get_filename, NULL, + discard_const_p(char, "The filename of this TDB file.") }, + { discard_const_p(char, "seqnum"), + (getter)obj_get_seqnum, NULL, NULL }, { NULL } }; -- 1.7.9.5 From d2d18dbb77244c192af788ba3d65226ee04c41d5 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 11/49] tdb/tools: add -l option to tdbbackup This opens the tdb with TDB_NOLOCK. Reviewed-by: Stefan Metzmacher --- lib/tdb/man/tdbbackup.8.xml | 12 ++++++++++++ lib/tdb/tools/tdbbackup.c | 18 +++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/tdb/man/tdbbackup.8.xml b/lib/tdb/man/tdbbackup.8.xml index f24202e..30a658d 100644 --- a/lib/tdb/man/tdbbackup.8.xml +++ b/lib/tdb/man/tdbbackup.8.xml @@ -22,6 +22,7 @@ -s suffix -v -h + -l @@ -68,6 +69,17 @@ + + -l + + This options disables any locking, by passing TDB_NOLOCK + to tdb_open_ex(). Only use this for database files which + are not used by any other process! And also only if it is otherwise not + possible to open the database, e.g. databases which were created with + mutex locking. + + + diff --git a/lib/tdb/tools/tdbbackup.c b/lib/tdb/tools/tdbbackup.c index 276a281..eb33e25 100644 --- a/lib/tdb/tools/tdbbackup.c +++ b/lib/tdb/tools/tdbbackup.c @@ -104,7 +104,8 @@ static int test_fn(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void *state) only doing the backup if its OK this function is also used for restore */ -static int backup_tdb(const char *old_name, const char *new_name, int hash_size) +static int backup_tdb(const char *old_name, const char *new_name, + int hash_size, int nolock) { TDB_CONTEXT *tdb; TDB_CONTEXT *tdb_new; @@ -122,7 +123,8 @@ static int backup_tdb(const char *old_name, const char *new_name, int hash_size) } /* open the old tdb */ - tdb = tdb_open_ex(old_name, 0, 0, + tdb = tdb_open_ex(old_name, 0, + TDB_DEFAULT | (nolock ? TDB_NOLOCK : 0), O_RDWR, 0, &log_ctx, NULL); if (!tdb) { printf("Failed to open %s\n", old_name); @@ -249,7 +251,7 @@ static int verify_tdb(const char *fname, const char *bak_name) /* count is < 0 means an error */ if (count < 0) { printf("restoring %s\n", fname); - return backup_tdb(bak_name, fname, 0); + return backup_tdb(bak_name, fname, 0, 0); } printf("%s : %d records\n", fname, count); @@ -279,6 +281,7 @@ static void usage(void) printf(" -s suffix set the backup suffix\n"); printf(" -v verify mode (restore if corrupt)\n"); printf(" -n hashsize set the new hash size for the backup\n"); + printf(" -l open without locking to back up mutex dbs\n"); } int main(int argc, char *argv[]) @@ -288,11 +291,12 @@ static void usage(void) int c; int verify = 0; int hashsize = 0; + int nolock = 0; const char *suffix = ".bak"; log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "vhs:n:")) != -1) { + while ((c = getopt(argc, argv, "vhs:n:l")) != -1) { switch (c) { case 'h': usage(); @@ -306,6 +310,9 @@ static void usage(void) case 'n': hashsize = atoi(optarg); break; + case 'l': + nolock = 1; + break; } } @@ -329,7 +336,8 @@ static void usage(void) } } else { if (file_newer(fname, bak_name) && - backup_tdb(fname, bak_name, hashsize) != 0) { + backup_tdb(fname, bak_name, hashsize, + nolock) != 0) { ret = 1; } } -- 1.7.9.5 From f07310f6b91f79f637d04c2db78279961e57da08 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 12/49] tdb/tools: add -l option to tdbtool This opens the tdb with TDB_NOLOCK. Reviewed-by: Stefan Metzmacher --- lib/tdb/man/tdbtool.8.xml | 21 +++++++++++++++++++++ lib/tdb/tools/tdbtool.c | 18 ++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/tdb/man/tdbtool.8.xml b/lib/tdb/man/tdbtool.8.xml index ddca04c..cedc7eb 100644 --- a/lib/tdb/man/tdbtool.8.xml +++ b/lib/tdb/man/tdbtool.8.xml @@ -24,6 +24,7 @@ tdbtool + -l TDBFILE @@ -48,6 +49,26 @@ + + OPTIONS + + + + + -l + + This options disables any locking, by passing TDB_NOLOCK + to tdb_open_ex(). Only use this for database files which + are not used by any other process! And also only if it is otherwise not + possible to open the database, e.g. databases which were created with + mutex locking. + + + + + + + COMMANDS diff --git a/lib/tdb/tools/tdbtool.c b/lib/tdb/tools/tdbtool.c index 01b9a14..c486117 100644 --- a/lib/tdb/tools/tdbtool.c +++ b/lib/tdb/tools/tdbtool.c @@ -36,6 +36,7 @@ char *line; TDB_DATA iterate_kbuf; char cmdline[1024]; static int disable_mmap; +static int disable_lock; enum commands { CMD_CREATE_TDB, @@ -226,7 +227,10 @@ static void create_tdb(const char *tdbname) log_ctx.log_fn = tdb_log; if (tdb) tdb_close(tdb); - tdb = tdb_open_ex(tdbname, 0, TDB_CLEAR_IF_FIRST | (disable_mmap?TDB_NOMMAP:0), + tdb = tdb_open_ex(tdbname, 0, + TDB_CLEAR_IF_FIRST | + (disable_mmap?TDB_NOMMAP:0) | + (disable_lock?TDB_NOLOCK:0), O_RDWR | O_CREAT | O_TRUNC, 0600, &log_ctx, NULL); if (!tdb) { printf("Could not create %s: %s\n", tdbname, strerror(errno)); @@ -239,7 +243,10 @@ static void open_tdb(const char *tdbname) log_ctx.log_fn = tdb_log; if (tdb) tdb_close(tdb); - tdb = tdb_open_ex(tdbname, 0, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600, + tdb = tdb_open_ex(tdbname, 0, + (disable_mmap?TDB_NOMMAP:0) | + (disable_lock?TDB_NOLOCK:0), + O_RDWR, 0600, &log_ctx, NULL); if (!tdb) { printf("Could not open %s: %s\n", tdbname, strerror(errno)); @@ -736,6 +743,13 @@ int main(int argc, char *argv[]) arg2 = NULL; arg2len = 0; + if (argv[1] && (strcmp(argv[1], "-l") == 0)) { + disable_lock = 1; + argv[1] = argv[0]; + argv += 1; + argc -= 1; + } + if (argv[1]) { cmdname = "open"; arg1 = argv[1]; -- 1.7.9.5 From ba540e80299fd86f88b93e4c70a7cf9d784a1cd4 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 13/49] tdb/tools: explicitly use TDB_NOLOCK in tdbdump Reviewed-by: Stefan Metzmacher --- lib/tdb/tools/tdbdump.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/tdb/tools/tdbdump.c b/lib/tdb/tools/tdbdump.c index a739f99..9a0a7fe 100644 --- a/lib/tdb/tools/tdbdump.c +++ b/lib/tdb/tools/tdbdump.c @@ -99,8 +99,16 @@ static int dump_tdb(const char *fname, const char *keyname, bool emergency) TDB_CONTEXT *tdb; TDB_DATA key, value; struct tdb_logging_context logfn = { log_stderr }; + int tdb_flags = TDB_DEFAULT; - tdb = tdb_open_ex(fname, 0, 0, O_RDONLY, 0, &logfn, NULL); + /* + * Note: that O_RDONLY implies TDB_NOLOCK, but we want to make it + * explicit as it's important when working on databases which were + * created with mutex locking. + */ + tdb_flags |= TDB_NOLOCK; + + tdb = tdb_open_ex(fname, 0, tdb_flags, O_RDONLY, 0, &logfn, NULL); if (!tdb) { printf("Failed to open %s\n", fname); return 1; -- 1.7.9.5 From 961cb47999a242bc88e88190fc769d5dce65c93a Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 14/49] tdb/test: add shutdown_agent() helper function Signed-off-by: Volker Lendecke Signed-off-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 26 ++++++++++++++++++++------ lib/tdb/test/external-agent.h | 1 + 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 8710b47..0aca081 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -99,29 +99,29 @@ static enum agent_return do_operation(enum operation op, const char *name) struct agent { int cmdfd, responsefd; + pid_t pid; }; /* Do this before doing any tdb stuff. Return handle, or NULL. */ struct agent *prepare_external_agent(void) { - int pid, ret; + int ret; int command[2], response[2]; char name[1+PATH_MAX]; + struct agent *agent = malloc(sizeof(*agent)); if (pipe(command) != 0 || pipe(response) != 0) { fprintf(stderr, "pipe failed: %s\n", strerror(errno)); exit(1); } - pid = fork(); - if (pid < 0) { + agent->pid = fork(); + if (agent->pid < 0) { fprintf(stderr, "fork failed: %s\n", strerror(errno)); exit(1); } - if (pid != 0) { - struct agent *agent = malloc(sizeof(*agent)); - + if (agent->pid != 0) { close(command[0]); close(response[1]); agent->cmdfd = command[1]; @@ -146,6 +146,20 @@ struct agent *prepare_external_agent(void) exit(0); } +void shutdown_agent(struct agent *agent) +{ + pid_t p; + + close(agent->cmdfd); + close(agent->responsefd); + p = waitpid(agent->pid, NULL, WNOHANG); + if (p == 0) { + kill(agent->pid, SIGKILL); + } + waitpid(agent->pid, NULL, 0); + free(agent); +} + /* Ask the external agent to try to do an operation. */ enum agent_return external_agent_operation(struct agent *agent, enum operation op, diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index dffdca9..354f5b9 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -17,6 +17,7 @@ enum operation { /* Do this before doing any tdb stuff. Return handle, or -1. */ struct agent *prepare_external_agent(void); +void shutdown_agent(struct agent *agent); enum agent_return { SUCCESS, -- 1.7.9.5 From 58c37639dec9a6eca48e9382febe9c25717b6afc Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 15/49] tdb/test: add PING command to external-agent.c Reviewed-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 4 ++++ lib/tdb/test/external-agent.h | 1 + 2 files changed, 5 insertions(+) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 0aca081..57ed2f5 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -87,6 +87,9 @@ static enum agent_return do_operation(enum operation op, const char *name) ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE; tdb = NULL; break; + case PING: + ret = SUCCESS; + break; default: ret = OTHER_FAILURE; } @@ -207,6 +210,7 @@ const char *operation_name(enum operation op) case CHECK: return "CHECK"; case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; case CLOSE: return "CLOSE"; + case PING: return "PING"; } return "**INVALID**"; } diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index 354f5b9..bf8a221 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -13,6 +13,7 @@ enum operation { CHECK, NEEDS_RECOVERY, CLOSE, + PING, }; /* Do this before doing any tdb stuff. Return handle, or -1. */ -- 1.7.9.5 From 076e266dd7bc5f0a9e3922807ec8cf2b7dc473cd Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 16/49] tdb/test: add UNMAP command to external-agent.c Reviewed-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 7 +++++++ lib/tdb/test/external-agent.h | 1 + 2 files changed, 8 insertions(+) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 57ed2f5..443d382 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -90,6 +90,12 @@ static enum agent_return do_operation(enum operation op, const char *name) case PING: ret = SUCCESS; break; + case UNMAP: + ret = tdb_munmap(tdb) == 0 ? SUCCESS : OTHER_FAILURE; + if (ret == SUCCESS) { + tdb->flags |= TDB_NOMMAP; + } + break; default: ret = OTHER_FAILURE; } @@ -211,6 +217,7 @@ const char *operation_name(enum operation op) case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; case CLOSE: return "CLOSE"; case PING: return "PING"; + case UNMAP: return "UNMAP"; } return "**INVALID**"; } diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index bf8a221..de9d0ac 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -14,6 +14,7 @@ enum operation { NEEDS_RECOVERY, CLOSE, PING, + UNMAP, }; /* Do this before doing any tdb stuff. Return handle, or -1. */ -- 1.7.9.5 From f75754fbd2f5746a4997cfb0f826fc1ea1aecf23 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 Feb 2014 09:13:17 +0100 Subject: [PATCH 17/49] tdb/test: add external_agent_operation_send/recv Signed-off-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 66 +++++++++++++++++++++++++++++++++++------ lib/tdb/test/external-agent.h | 4 +++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 443d382..4230e21 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -109,6 +109,7 @@ static enum agent_return do_operation(enum operation op, const char *name) struct agent { int cmdfd, responsefd; pid_t pid; + enum agent_return state; }; /* Do this before doing any tdb stuff. Return handle, or NULL. */ @@ -135,6 +136,7 @@ struct agent *prepare_external_agent(void) close(response[1]); agent->cmdfd = command[1]; agent->responsefd = response[0]; + agent->state = SUCCESS; return agent; } @@ -169,15 +171,19 @@ void shutdown_agent(struct agent *agent) free(agent); } -/* Ask the external agent to try to do an operation. */ -enum agent_return external_agent_operation(struct agent *agent, - enum operation op, - const char *name) +enum agent_return external_agent_operation_send(struct agent *agent, + enum operation op, + const char *name) { - enum agent_return res; unsigned int len; + ssize_t ret; char *string; + if (agent->state != SUCCESS) { + return agent->state; + } + agent->state = WOULD_HAVE_BLOCKED; + if (!name) name = ""; len = 1 + strlen(name) + 1; @@ -186,11 +192,53 @@ enum agent_return external_agent_operation(struct agent *agent, string[0] = op; strcpy(string+1, name); - if (write(agent->cmdfd, string, len) != len - || read(agent->responsefd, &res, sizeof(res)) != sizeof(res)) - res = AGENT_DIED; - + ret = write(agent->cmdfd, string, len); free(string); + if (ret != len) { + agent->state = AGENT_DIED; + return agent->state; + } + + return SUCCESS; +} + +/* Ask the external agent to try to do an operation. */ +enum agent_return external_agent_operation_recv(struct agent *agent) +{ + enum agent_return res; + ssize_t ret; + + if (agent->state != WOULD_HAVE_BLOCKED) { + if (agent->state != SUCCESS) { + return OTHER_FAILURE; + } + return agent->state; + } + + ret = read(agent->responsefd, &res, sizeof(res)); + if (ret != sizeof(res)) { + agent->state = AGENT_DIED; + return agent->state; + } + + agent->state = SUCCESS; + return res; +} + +/* Ask the external agent to try to do an operation. */ +enum agent_return external_agent_operation(struct agent *agent, + enum operation op, + const char *name) +{ + enum agent_return res; + + res = external_agent_operation_send(agent, op, name); + if (res != SUCCESS) { + return res; + } + + res = external_agent_operation_recv(agent); + return res; } diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index de9d0ac..58c55c1 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -33,6 +33,10 @@ enum agent_return { * name == tdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST, * record name for FETCH/STORE (store stores name as data too) */ +enum agent_return external_agent_operation_send(struct agent *agent, + enum operation op, + const char *name); +enum agent_return external_agent_operation_recv(struct agent *agent); enum agent_return external_agent_operation(struct agent *handle, enum operation op, const char *name); -- 1.7.9.5 From d04df0b5151035caeaa045c6acab25d47e6bc462 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 Feb 2014 10:08:52 +0100 Subject: [PATCH 18/49] tdb/test: add BLOCKING, READONLY and MARKONLY commands to external-agent.c Signed-off-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 44 +++++++++++++++++++++++++++++++++++++++++ lib/tdb/test/external-agent.h | 3 +++ 2 files changed, 47 insertions(+) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 4230e21..b9a4dfd 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -21,6 +21,8 @@ static enum agent_return do_operation(enum operation op, const char *name) TDB_DATA k; enum agent_return ret; TDB_DATA data; + static bool readonly_locks; + static bool markonly_locks; if (op != OPEN && op != OPEN_WITH_CLEAR_IF_FIRST && !tdb) { diag("external: No tdb open!"); @@ -96,6 +98,45 @@ static enum agent_return do_operation(enum operation op, const char *name) tdb->flags |= TDB_NOMMAP; } break; + case BLOCKING: + if (strcmp(name, "true") == 0) { + nonblocking_locks = false; + ret = SUCCESS; + break; + } + if (strcmp(name, "false") == 0) { + nonblocking_locks = true; + ret = SUCCESS; + break; + } + ret = OTHER_FAILURE; + break; + case READONLY: + if (strcmp(name, "true") == 0) { + readonly_locks = true; + ret = SUCCESS; + break; + } + if (strcmp(name, "false") == 0) { + readonly_locks = false; + ret = SUCCESS; + break; + } + ret = OTHER_FAILURE; + break; + case MARKONLY: + if (strcmp(name, "true") == 0) { + markonly_locks = true; + ret = SUCCESS; + break; + } + if (strcmp(name, "false") == 0) { + markonly_locks = false; + ret = SUCCESS; + break; + } + ret = OTHER_FAILURE; + break; default: ret = OTHER_FAILURE; } @@ -266,6 +307,9 @@ const char *operation_name(enum operation op) case CLOSE: return "CLOSE"; case PING: return "PING"; case UNMAP: return "UNMAP"; + case BLOCKING: return "BLOCKING"; + case READONLY: return "READONLY"; + case MARKONLY: return "MARKONLY"; } return "**INVALID**"; } diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index 58c55c1..a9e668c 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -15,6 +15,9 @@ enum operation { CLOSE, PING, UNMAP, + BLOCKING, + READONLY, + MARKONLY, }; /* Do this before doing any tdb stuff. Return handle, or -1. */ -- 1.7.9.5 From 076ca2e2eca66b4a99b91a88c31deffeaca50881 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 Feb 2014 10:08:52 +0100 Subject: [PATCH 19/49] tdb/test: add LOCKALL and UNLOCKALL commands to external-agent.c Signed-off-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 28 ++++++++++++++++++++++++++++ lib/tdb/test/external-agent.h | 2 ++ 2 files changed, 30 insertions(+) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index b9a4dfd..7a78c6e 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -137,6 +137,32 @@ static enum agent_return do_operation(enum operation op, const char *name) } ret = OTHER_FAILURE; break; + case LOCKALL: + if (markonly_locks) { + ret = tdb_lockall_mark(tdb) == 0 ? SUCCESS : FAILED; + break; + } + + if (readonly_locks) { + ret = tdb_lockall_read(tdb) == 0 ? SUCCESS : FAILED; + break; + } + + ret = tdb_lockall(tdb) == 0 ? SUCCESS : FAILED; + break; + case UNLOCKALL: + if (markonly_locks) { + ret = tdb_lockall_unmark(tdb) == 0 ? SUCCESS : FAILED; + break; + } + + if (readonly_locks) { + ret = tdb_unlockall_read(tdb) == 0 ? SUCCESS : FAILED; + break; + } + + ret = tdb_unlockall(tdb) == 0 ? SUCCESS : FAILED; + break; default: ret = OTHER_FAILURE; } @@ -310,6 +336,8 @@ const char *operation_name(enum operation op) case BLOCKING: return "BLOCKING"; case READONLY: return "READONLY"; case MARKONLY: return "MARKONLY"; + case LOCKALL: return "LOCKALL"; + case UNLOCKALL: return "UNLOCKALL"; } return "**INVALID**"; } diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index a9e668c..ebd8f08 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -18,6 +18,8 @@ enum operation { BLOCKING, READONLY, MARKONLY, + LOCKALL, + UNLOCKALL, }; /* Do this before doing any tdb stuff. Return handle, or -1. */ -- 1.7.9.5 From 5344e6c39c21acfb9ad2245aa67da6cc5441efb3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 Feb 2014 10:08:52 +0100 Subject: [PATCH 20/49] tdb/test: add CHAINLOCK and CHAINUNLOCK commands to external-agent.c Signed-off-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 28 ++++++++++++++++++++++++++++ lib/tdb/test/external-agent.h | 2 ++ 2 files changed, 30 insertions(+) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 7a78c6e..3fc26d7 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -163,6 +163,32 @@ static enum agent_return do_operation(enum operation op, const char *name) ret = tdb_unlockall(tdb) == 0 ? SUCCESS : FAILED; break; + case CHAINLOCK: + if (markonly_locks) { + ret = tdb_chainlock_mark(tdb, k) == 0 ? SUCCESS : FAILED; + break; + } + + if (readonly_locks) { + ret = tdb_chainlock_read(tdb, k) == 0 ? SUCCESS : FAILED; + break; + } + + ret = tdb_chainlock(tdb, k) == 0 ? SUCCESS : FAILED; + break; + case CHAINUNLOCK: + if (markonly_locks) { + ret = tdb_chainlock_unmark(tdb, k) == 0 ? SUCCESS : FAILED; + break; + } + + if (readonly_locks) { + ret = tdb_chainunlock_read(tdb, k) == 0 ? SUCCESS : FAILED; + break; + } + + ret = tdb_chainunlock(tdb, k) == 0 ? SUCCESS : FAILED; + break; default: ret = OTHER_FAILURE; } @@ -338,6 +364,8 @@ const char *operation_name(enum operation op) case MARKONLY: return "MARKONLY"; case LOCKALL: return "LOCKALL"; case UNLOCKALL: return "UNLOCKALL"; + case CHAINLOCK: return "CHAINLOCK"; + case CHAINUNLOCK: return "CHAINUNLOCK"; } return "**INVALID**"; } diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index ebd8f08..cdebbaf 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -20,6 +20,8 @@ enum operation { MARKONLY, LOCKALL, UNLOCKALL, + CHAINLOCK, + CHAINUNLOCK, }; /* Do this before doing any tdb stuff. Return handle, or -1. */ -- 1.7.9.5 From 94ed392e8b64d6d7d3d57ac66c311276d1edccad Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 Feb 2014 10:08:52 +0100 Subject: [PATCH 21/49] tdb/test: add prepare_external_agent_with_tdb() external-agent.c Signed-off-by: Stefan Metzmacher --- lib/tdb/test/external-agent.c | 12 +++++++++++- lib/tdb/test/external-agent.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/tdb/test/external-agent.c b/lib/tdb/test/external-agent.c index 3fc26d7..d5eb159 100644 --- a/lib/tdb/test/external-agent.c +++ b/lib/tdb/test/external-agent.c @@ -206,7 +206,7 @@ struct agent { }; /* Do this before doing any tdb stuff. Return handle, or NULL. */ -struct agent *prepare_external_agent(void) +struct agent *prepare_external_agent_with_tdb(struct tdb_context *_tdb) { int ret; int command[2], response[2]; @@ -239,6 +239,10 @@ struct agent *prepare_external_agent(void) /* We want to fail, not block. */ nonblocking_locks = true; log_prefix = "external: "; + if (_tdb != NULL) { + tdb_reopen(_tdb); + tdb = _tdb; + } while ((ret = read(command[0], name, sizeof(name))) > 0) { enum agent_return result; @@ -250,6 +254,12 @@ struct agent *prepare_external_agent(void) exit(0); } +/* Do this before doing any tdb stuff. Return handle, or NULL. */ +struct agent *prepare_external_agent(void) +{ + return prepare_external_agent_with_tdb(NULL); +} + void shutdown_agent(struct agent *agent) { pid_t p; diff --git a/lib/tdb/test/external-agent.h b/lib/tdb/test/external-agent.h index cdebbaf..8beb2ec 100644 --- a/lib/tdb/test/external-agent.h +++ b/lib/tdb/test/external-agent.h @@ -24,6 +24,8 @@ enum operation { CHAINUNLOCK, }; +struct tdb_context; +struct agent *prepare_external_agent_with_tdb(struct tdb_context *_tdb); /* Do this before doing any tdb stuff. Return handle, or -1. */ struct agent *prepare_external_agent(void); void shutdown_agent(struct agent *agent); -- 1.7.9.5 From bec6f73de99e4c3d629b11786fc2ee1f904c25ce Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 22/49] ORIG-COMMIT....=> SPLITTED: tdb: Add mutex support --- lib/tdb/ABI/tdb-1.3.0.sigs | 68 +++ lib/tdb/common/check.c | 4 +- lib/tdb/common/io.c | 92 ++- lib/tdb/common/lock.c | 79 ++- lib/tdb/common/mutex.c | 902 ++++++++++++++++++++++++++++ lib/tdb/common/open.c | 197 +++++- lib/tdb/common/tdb.c | 9 + lib/tdb/common/tdb_private.h | 30 +- lib/tdb/common/transaction.c | 3 +- lib/tdb/docs/mutex.txt | 106 ++++ lib/tdb/include/tdb.h | 32 + lib/tdb/test/lock-tracking.c | 34 +- lib/tdb/test/logging.c | 3 - lib/tdb/test/run-3G-file.c | 7 +- lib/tdb/test/run-bad-tdb-header.c | 1 + lib/tdb/test/run-check.c | 1 + lib/tdb/test/run-corrupt.c | 1 + lib/tdb/test/run-die-during-transaction.c | 4 +- lib/tdb/test/run-endian.c | 1 + lib/tdb/test/run-incompatible.c | 1 + lib/tdb/test/run-mutex-allrecord-bench.c | 75 +++ lib/tdb/test/run-mutex-allrecord-block.c | 101 ++++ lib/tdb/test/run-mutex-allrecord-trylock.c | 97 +++ lib/tdb/test/run-mutex-die.c | 266 ++++++++ lib/tdb/test/run-mutex-openflags2.c | 94 +++ lib/tdb/test/run-mutex-trylock.c | 106 ++++ lib/tdb/test/run-mutex1.c | 120 ++++ lib/tdb/test/run-nested-transactions.c | 1 + lib/tdb/test/run-nested-traverse.c | 1 + lib/tdb/test/run-no-lock-during-traverse.c | 1 + lib/tdb/test/run-oldhash.c | 1 + lib/tdb/test/run-open-during-transaction.c | 1 + lib/tdb/test/run-readonly-check.c | 1 + lib/tdb/test/run-rescue-find_entry.c | 1 + lib/tdb/test/run-rescue.c | 1 + lib/tdb/test/run-rwlock-check.c | 1 + lib/tdb/test/run-summary.c | 1 + lib/tdb/test/run-transaction-expand.c | 1 + lib/tdb/test/run-traverse-in-transaction.c | 1 + lib/tdb/test/run-wronghash-fail.c | 1 + lib/tdb/test/run-zero-append.c | 1 + lib/tdb/test/run.c | 1 + lib/tdb/tools/tdbtorture.c | 29 +- lib/tdb/wscript | 43 +- 44 files changed, 2445 insertions(+), 76 deletions(-) create mode 100644 lib/tdb/ABI/tdb-1.3.0.sigs create mode 100644 lib/tdb/common/mutex.c create mode 100644 lib/tdb/docs/mutex.txt create mode 100644 lib/tdb/test/run-mutex-allrecord-bench.c create mode 100644 lib/tdb/test/run-mutex-allrecord-block.c create mode 100644 lib/tdb/test/run-mutex-allrecord-trylock.c create mode 100644 lib/tdb/test/run-mutex-die.c create mode 100644 lib/tdb/test/run-mutex-openflags2.c create mode 100644 lib/tdb/test/run-mutex-trylock.c create mode 100644 lib/tdb/test/run-mutex1.c diff --git a/lib/tdb/ABI/tdb-1.3.0.sigs b/lib/tdb/ABI/tdb-1.3.0.sigs new file mode 100644 index 0000000..7d3e469 --- /dev/null +++ b/lib/tdb/ABI/tdb-1.3.0.sigs @@ -0,0 +1,68 @@ +tdb_add_flags: void (struct tdb_context *, unsigned int) +tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) +tdb_chainlock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) +tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_close: int (struct tdb_context *) +tdb_delete: int (struct tdb_context *, TDB_DATA) +tdb_dump_all: void (struct tdb_context *) +tdb_enable_seqnum: void (struct tdb_context *) +tdb_error: enum TDB_ERROR (struct tdb_context *) +tdb_errorstr: const char *(struct tdb_context *) +tdb_exists: int (struct tdb_context *, TDB_DATA) +tdb_fd: int (struct tdb_context *) +tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_firstkey: TDB_DATA (struct tdb_context *) +tdb_freelist_size: int (struct tdb_context *) +tdb_get_flags: int (struct tdb_context *) +tdb_get_logging_private: void *(struct tdb_context *) +tdb_get_seqnum: int (struct tdb_context *) +tdb_hash_size: int (struct tdb_context *) +tdb_increment_seqnum_nonblock: void (struct tdb_context *) +tdb_jenkins_hash: unsigned int (TDB_DATA *) +tdb_lock_nonblock: int (struct tdb_context *, int, int) +tdb_lockall: int (struct tdb_context *) +tdb_lockall_mark: int (struct tdb_context *) +tdb_lockall_nonblock: int (struct tdb_context *) +tdb_lockall_read: int (struct tdb_context *) +tdb_lockall_read_nonblock: int (struct tdb_context *) +tdb_lockall_unmark: int (struct tdb_context *) +tdb_log_fn: tdb_log_func (struct tdb_context *) +tdb_map_size: size_t (struct tdb_context *) +tdb_name: const char *(struct tdb_context *) +tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_null: dptr = 0xXXXX, dsize = 0 +tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) +tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) +tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_printfreelist: int (struct tdb_context *) +tdb_remove_flags: void (struct tdb_context *, unsigned int) +tdb_reopen: int (struct tdb_context *) +tdb_reopen_all: int (int) +tdb_repack: int (struct tdb_context *) +tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_runtime_check_for_robust_mutexes: bool (void) +tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) +tdb_set_max_dead: void (struct tdb_context *, int) +tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) +tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) +tdb_summary: char *(struct tdb_context *) +tdb_transaction_cancel: int (struct tdb_context *) +tdb_transaction_commit: int (struct tdb_context *) +tdb_transaction_prepare_commit: int (struct tdb_context *) +tdb_transaction_start: int (struct tdb_context *) +tdb_transaction_start_nonblock: int (struct tdb_context *) +tdb_transaction_write_lock_mark: int (struct tdb_context *) +tdb_transaction_write_lock_unmark: int (struct tdb_context *) +tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_unlock: int (struct tdb_context *, int, int) +tdb_unlockall: int (struct tdb_context *) +tdb_unlockall_read: int (struct tdb_context *) +tdb_validate_freelist: int (struct tdb_context *, int *) +tdb_wipe_all: int (struct tdb_context *) diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c index 9f9d870..e632af5 100644 --- a/lib/tdb/common/check.c +++ b/lib/tdb/common/check.c @@ -39,7 +39,9 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) if (hdr.version != TDB_VERSION) goto corrupt; - if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) + if (hdr.rwlocks != 0 && + hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC && + hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) goto corrupt; tdb_header_hash(tdb, &h1, &h2); diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c index 11dfefd..537a9e9 100644 --- a/lib/tdb/common/io.c +++ b/lib/tdb/common/io.c @@ -28,6 +28,69 @@ #include "tdb_private.h" +/* + * We prepend the mutex area, so fixup offsets. See mutex.c for details. + */ + +static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) +{ + tdb_off_t tdb_off = *off; + size_t mutex_size = tdb_mutex_size(tdb); + + if (!tdb_add_off_t(tdb_off, mutex_size, &tdb_off)) { + errno = EIO; + return false; + } + *off = tdb_off; + return true; +} + +static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pwrite(tdb->fd, buf, count, offset); +} + +static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pread(tdb->fd, buf, count, offset); +} + +static int tdb_ftruncate(struct tdb_context *tdb, off_t length) +{ + if (!tdb_adjust_offset(tdb, &length)) { + return -1; + } + return ftruncate(tdb->fd, length); +} + +static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) +{ + size_t mutex_len; + int ret; + + ret = fstat(tdb->fd, buf); + if (ret == -1) { + return -1; + } + + mutex_len = tdb_mutex_size(tdb); + if (buf->st_size < mutex_len) { + errno = EIO; + return -1; + } + buf->st_size -= mutex_len; + + return ret; +} + /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary @@ -58,7 +121,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - if (fstat(tdb->fd, &st) == -1) { + if (tdb_fstat(tdb, &st) == -1) { tdb->ecode = TDB_ERR_IO; return -1; } @@ -122,16 +185,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t written = pwrite(tdb->fd, buf, len, off); + ssize_t written; + + written = tdb_pwrite(tdb, buf, len, off); + if ((written != (ssize_t)len) && (written != -1)) { /* try once more */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " "%zi of %u bytes at %u, trying once more\n", written, len, off)); - written = pwrite(tdb->fd, (const char *)buf+written, - len-written, - off+written); + written = tdb_pwrite(tdb, (const char *)buf+written, + len-written, off+written); } if (written == -1) { /* Ensure ecode is set for log fn. */ @@ -176,7 +241,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t ret = pread(tdb->fd, buf, len, off); + ssize_t ret; + + ret = tdb_pread(tdb, buf, len, off); if (ret != (ssize_t)len) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; @@ -258,7 +325,8 @@ int tdb_mmap(struct tdb_context *tdb) if (should_mmap(tdb)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), - MAP_SHARED|MAP_FILE, tdb->fd, 0); + MAP_SHARED|MAP_FILE, tdb->fd, + tdb_mutex_size(tdb)); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! @@ -303,12 +371,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad return -1; } - if (ftruncate(tdb->fd, new_size) == -1) { + if (tdb_ftruncate(tdb, new_size) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, new_size - 1); + written = tdb_pwrite(tdb, &b, 1, new_size - 1); } if (written == 0) { /* again - give up, guessing errno */ @@ -328,10 +396,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad memset(buf, TDB_PAD_BYTE, sizeof(buf)); while (addition) { size_t n = addition>sizeof(buf)?sizeof(buf):addition; - ssize_t written = pwrite(tdb->fd, buf, n, size); + ssize_t written = tdb_pwrite(tdb, buf, n, size); if (written == 0) { /* prevent infinite loops: try _once_ more */ - written = pwrite(tdb->fd, buf, n, size); + written = tdb_pwrite(tdb, buf, n, size); } if (written == 0) { /* give up, trying to provide a useful errno */ diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c index 486de79..6644c40 100644 --- a/lib/tdb/common/lock.c +++ b/lib/tdb/common/lock.c @@ -38,6 +38,15 @@ static int fcntl_lock(struct tdb_context *tdb, struct flock fl; int cmd; +#ifdef USE_TDB_MUTEX_LOCKING + { + int ret; + if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) { + return ret; + } + } +#endif + fl.l_type = rw; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -110,6 +119,15 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) fclose(locks); #endif +#ifdef USE_TDB_MUTEX_LOCKING + { + int ret; + if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) { + return ret; + } + } +#endif + fl.l_type = F_UNLCK; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -248,13 +266,27 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb) return -1; } - ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, - TDB_LOCK_WAIT|TDB_LOCK_PROBE); + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_upgrade(tdb); + if (ret == -1) { + goto fail; + } + ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size), + 0, TDB_LOCK_WAIT|TDB_LOCK_PROBE); + if (ret == -1) { + tdb_mutex_allrecord_downgrade(tdb); + } + } else { + ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, + TDB_LOCK_WAIT|TDB_LOCK_PROBE); + } + if (ret == 0) { tdb->allrecord_lock.ltype = F_WRLCK; tdb->allrecord_lock.off = 0; return 0; } +fail: TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n")); return -1; } @@ -593,6 +625,8 @@ static int tdb_chainlock_gradual(struct tdb_context *tdb, int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, enum tdb_lock_flags flags, bool upgradable) { + int ret; + switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) { case -1: return -1; @@ -607,16 +641,27 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, * * It is (1) which cause the starvation problem, so we're only * gradual for that. */ - if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, - tdb->hash_size * 4) == -1) { + + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_lock(tdb, ltype, flags); + } else { + ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, + tdb->hash_size * 4); + } + + if (ret == -1) { return -1; } /* Grab individual record locks. */ if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0, flags) == -1) { - tdb_brunlock(tdb, ltype, FREELIST_TOP, - tdb->hash_size * 4); + if (tdb_have_mutexes(tdb)) { + tdb_mutex_allrecord_unlock(tdb); + } else { + tdb_brunlock(tdb, ltype, FREELIST_TOP, + tdb->hash_size * 4); + } return -1; } @@ -672,9 +717,25 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock) return 0; } - if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); - return -1; + if (!mark_lock) { + int ret; + + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_unlock(tdb); + if (ret == 0) { + ret = tdb_brunlock(tdb, ltype, + lock_offset(tdb->hash_size), + 0); + } + } else { + ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0); + } + + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed " + "(%s)\n", strerror(errno))); + return -1; + } } tdb->allrecord_lock.count = 0; diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c new file mode 100644 index 0000000..c158b6c --- /dev/null +++ b/lib/tdb/common/mutex.c @@ -0,0 +1,902 @@ +/* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Volker Lendecke 2012,2013 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "tdb_private.h" +#include "system/threads.h" + +#ifdef USE_TDB_MUTEX_LOCKING + +/* + * If we run with mutexes, we store the "struct tdb_mutexes" at the + * beginning of the file. We store an additional tdb_header right + * beyond the mutex area, page aligned. All the offsets within the tdb + * are relative to the area behind the mutex area. tdb->map_ptr points + * behind the mmap area as well, so the read and write path in the + * mutex case can remain unchanged. + * + * Early in the mutex development the mutexes were placed between the hash + * chain pointers and the real tdb data. This had two drawbacks: First, it + * made pointer calculations more complex. Second, we had to mmap the mutex + * area twice. One was the normal map_ptr in the tdb. This frequently changed + * from within tdb_oob. At least the Linux glibc robust mutex code assumes + * constant pointers in memory, so a constantly changing mmap area destroys + * the mutex list. So we had to mmap the first bytes of the file with a second + * mmap call. With that scheme, very weird errors happened that could be + * easily fixed by doing the mutex mmap in a second file. It seemed that + * mapping the same memory area twice does not end up in accessing the same + * physical page, looking at the mutexes in gdb it seemed that old data showed + * up after some re-mapping. To avoid a separate mutex file, the code now puts + * the real content of the tdb file after the mutex area. This way we do not + * have overlapping mmap areas, the mutex area is mmapped once and not + * changed, the tdb data area's mmap is constantly changed but does not + * overlap. + */ + +struct tdb_mutexes { + struct tdb_header hdr; + pthread_mutex_t allrecord_mutex; /* protect allrecord_lock */ + + short int allrecord_lock; /* F_UNLCK: free, + F_RDLCK: shared, + F_WRLCK: exclusive */ + + pthread_mutex_t hashchains[1]; /* We allocate more */ +}; + +bool tdb_have_mutexes(struct tdb_context *tdb) +{ + return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0); +} + +size_t tdb_mutex_size(struct tdb_context *tdb) +{ + size_t mutex_size; + + if (!tdb_have_mutexes(tdb)) { + return 0; + } + + mutex_size = sizeof(struct tdb_mutexes); + mutex_size += tdb->hash_size * sizeof(pthread_mutex_t); + + return TDB_ALIGN(mutex_size, tdb->page_size); +} + +/* + * Get the index for a chain mutex + */ +static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len, + unsigned *idx) +{ + /* + * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before + * the 4 bytes of the freelist start and the hash chain that is about + * to be locked. See lock_offset() where the freelist is -1 vs the + * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in + * the tdb file itself as data, we need to adjust the offset here. + */ + const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t); + + if (!tdb_have_mutexes(tdb)) { + return false; + } + if (len != 1) { + /* Possibly the allrecord lock */ + return false; + } + if (off < freelist_lock_ofs) { + /* One of the special locks */ + return false; + } + if (tdb->hash_size == 0) { + /* tdb not initialized yet, called from tdb_open_ex() */ + return false; + } + if (off >= TDB_DATA_START(tdb->hash_size)) { + /* Single record lock from traverses */ + return false; + } + + /* + * Now we know it's a freelist or hash chain lock. Those are always 4 + * byte aligned. Paranoia check. + */ + if ((off % sizeof(tdb_off_t)) != 0) { + abort(); + } + + /* + * Re-index the fcntl offset into an offset into the mutex array + */ + off -= freelist_lock_ofs; /* rebase to index 0 */ + off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */ + + *idx = off; + return true; +} + +static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag) +{ + int ret; + + if (waitflag) { + ret = pthread_mutex_lock(m); + } else { + ret = pthread_mutex_trylock(m); + } + if (ret != EOWNERDEAD) { + return ret; + } + + /* + * For chainlocks, we don't do any cleanup (yet?) + */ + return pthread_mutex_consistent(m); +} + +static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag) +{ + int ret; + + if (waitflag) { + ret = pthread_mutex_lock(&m->allrecord_mutex); + } else { + ret = pthread_mutex_trylock(&m->allrecord_mutex); + } + if (ret != EOWNERDEAD) { + return ret; + } + + /* + * The allrecord lock holder died. We need to reset the allrecord_lock + * to F_UNLCK. This should also be the indication for + * tdb_needs_recovery. + */ + m->allrecord_lock = F_UNLCK; + + return pthread_mutex_consistent(&m->allrecord_mutex); +} + +bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, + bool waitflag, int *pret) +{ + struct tdb_mutexes *m = tdb->mutexes; + pthread_mutex_t *chain; + int ret; + unsigned idx; + bool allrecord_ok; + + if (!tdb_mutex_index(tdb, off, len, &idx)) { + return false; + } + chain = &m->hashchains[idx]; + +again: + ret = chain_mutex_lock(chain, waitflag); + if (ret == EBUSY) { + ret = EAGAIN; + } + if (ret != 0) { + errno = ret; + goto fail; + } + + if (tdb->num_lockrecs > 0) { + /* + * We can only check the allrecord lock once. If we do it with + * one chain mutex locked, we will deadlock with the allrecord + * locker process in the following way: We lock the first hash + * chain, we check for the allrecord lock. We keep the hash + * chain locked. Then the allrecord locker comes and takes the + * allrecord lock. It walks the list of chain mutexes, locking + * them all in sequence. Meanwhile, we have the chain mutex + * locked, so the allrecord locker blocks trying to lock our + * chain mutex. Then we come in and try to lock the second + * chain lock, which in most cases will be the freelist. We + * see that the allrecord lock is locked and put ourselves on + * the allrecord_waiters condition variable. This will never + * be signalled though because the allrecord locker waits for + * us to give up the chain lock. + */ + + *pret = 0; + return true; + } + + /* + * Check if someone is has the allrecord lock: queue if so. + */ + + allrecord_ok = false; + + if (m->allrecord_lock == F_UNLCK) { + /* + * allrecord lock not taken + */ + allrecord_ok = true; + } + + if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) { + /* + * allrecord shared lock taken, but we only want to read + */ + allrecord_ok = true; + } + + if (allrecord_ok) { + *pret = 0; + return true; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chain_mutex) failed: %s\n", strerror(ret))); + errno = ret; + goto fail; + } + ret = allrecord_mutex_lock(m, waitflag); + if (ret != 0) { + if (waitflag || (ret != EBUSY)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock" + "(allrecord_mutex) failed: %s\n", + waitflag ? "" : "try_", strerror(ret))); + } + errno = EAGAIN; + goto fail; + } + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + errno = ret; + goto fail; + } + goto again; + +fail: + *pret = -1; + return true; +} + +bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, + int *pret) +{ + struct tdb_mutexes *m = tdb->mutexes; + pthread_mutex_t *chain; + int ret; + unsigned idx; + + if (!tdb_mutex_index(tdb, off, len, &idx)) { + return false; + } + chain = &m->hashchains[idx]; + + ret = pthread_mutex_unlock(chain); + if (ret == 0) { + *pret = 0; + return true; + } + *pret = -1; + return true; +} + +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags) +{ + struct tdb_mutexes *m = tdb->mutexes; + int ret; + uint32_t i; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if (flags & TDB_LOCK_MARK_ONLY) { + return 0; + } + + ret = allrecord_mutex_lock(m, flags & TDB_LOCK_WAIT); + if (ret != 0) { + if (!(flags & TDB_LOCK_PROBE)) { + TDB_LOG((tdb, TDB_DEBUG_TRACE, "pthread_mutex_trylock " + "failed: %s\n", strerror(ret))); + } + return ret; + } + + if (m->allrecord_lock != F_UNLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + goto fail_unlock_allrecord_mutex; + } + m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK; + + for (i=0; ihash_size; i++) { + + /* ignore hashchains[0], the freelist */ + pthread_mutex_t *chain = &m->hashchains[i+1]; + + ret = chain_mutex_lock(chain, flags & TDB_LOCK_WAIT); + if (ret != 0) { + if (!(flags & TDB_LOCK_PROBE)) { + TDB_LOG((tdb, TDB_DEBUG_TRACE, + "pthread_mutex_trylock " + "failed: %s\n", strerror(ret))); + } + goto fail_unroll_allrecord_lock; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chainlock) failed: %s\n", strerror(ret))); + goto fail_unroll_allrecord_lock; + } + } + /* + * We leave this routine with m->allrecord_mutex locked + */ + return 0; + +fail_unroll_allrecord_lock: + m->allrecord_lock = F_UNLCK; + +fail_unlock_allrecord_mutex: + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + } + return -1; +} + +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + int ret; + uint32_t i; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if (m->allrecord_lock != F_RDLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + m->allrecord_lock = F_WRLCK; + + for (i=0; ihash_size; i++) { + + /* ignore hashchains[0], the freelist */ + pthread_mutex_t *chain = &m->hashchains[i+1]; + + ret = chain_mutex_lock(chain, true); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock" + "(chainlock) failed: %s\n", strerror(ret))); + goto fail_unroll_allrecord_lock; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chainlock) failed: %s\n", strerror(ret))); + return -1; + } + } + return 0; + +fail_unroll_allrecord_lock: + m->allrecord_lock = F_RDLCK; + return -1; +} + +int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + + if (m->allrecord_lock != F_WRLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + m->allrecord_lock = F_RDLCK; + return 0; +} + + +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + short old; + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + old = m->allrecord_lock; + m->allrecord_lock = F_UNLCK; + + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + m->allrecord_lock = old; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + return -1; + } + return 0; +} + +int tdb_mutex_init(struct tdb_context *tdb) +{ + struct tdb_mutexes *m; + pthread_mutexattr_t ma; + int i, ret; + + ret = tdb_mutex_mmap(tdb); + if (ret == -1) { + return errno; + } + m = tdb->mutexes; + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto fail_munmap; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto fail; + } + + for (i=0; ihash_size+1; i++) { + pthread_mutex_t *chain = &m->hashchains[i]; + + ret = pthread_mutex_init(chain, &ma); + if (ret != 0) { + goto fail; + } + } + + m->allrecord_lock = F_UNLCK; + + ret = pthread_mutex_init(&m->allrecord_mutex, &ma); + if (ret != 0) { + goto fail; + } + ret = 0; +fail: + pthread_mutexattr_destroy(&ma); +fail_munmap: + tdb_mutex_munmap(tdb); + return ret; +} + +int tdb_mutex_mmap(struct tdb_context *tdb) +{ + size_t len; + void *ptr; + + len = tdb_mutex_size(tdb); + if (len == 0) { + return 0; + } + + ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, + tdb->fd, 0); + + if (ptr == MAP_FAILED) { + return -1; + } + tdb->mutexes = (struct tdb_mutexes *)ptr; + return 0; +} + +int tdb_mutex_munmap(struct tdb_context *tdb) +{ + size_t len; + + len = tdb_mutex_size(tdb); + if (len == 0) { + return 0; + } + + return munmap(tdb->mutexes, len); +} + +static bool tdb_mutex_locking_cached; + +bool tdb_mutex_locking_supported(void) +{ + pthread_mutexattr_t ma; + pthread_mutex_t m; + int ret; + static bool initialized; + + if (initialized) { + return tdb_mutex_locking_cached; + } + + initialized = true; + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + return false; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_init(&m, &ma); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_lock(&m); + if (ret != 0) { + goto cleanup_m; + } + /* + * This makes sure we have real mutexes + * from a threading library instead of just + * stubs from libc. + */ + ret = pthread_mutex_lock(&m); + if (ret != EDEADLK) { + goto cleanup_lock; + } + ret = pthread_mutex_unlock(&m); + if (ret != 0) { + goto cleanup_m; + } + + tdb_mutex_locking_cached = true; + goto cleanup_m; + +cleanup_lock: + pthread_mutex_unlock(&m); +cleanup_m: + pthread_mutex_destroy(&m); +cleanup_ma: + pthread_mutexattr_destroy(&ma); + return tdb_mutex_locking_cached; +} + +static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR; +static pid_t tdb_robust_mutex_pid = -1; + +static void tdb_robust_mutex_handler(int sig) +{ + if (tdb_robust_mutex_pid != -1) { + pid_t pid; + int status; + + pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + return; + } + } + + if (tdb_robust_mutext_old_handler == SIG_DFL) { + return; + } + + if (tdb_robust_mutext_old_handler == SIG_IGN) { + return; + } + if (tdb_robust_mutext_old_handler == SIG_ERR) { + return; + } + + tdb_robust_mutext_old_handler(sig); +} + +_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) +{ + void *ptr; + pthread_mutex_t *m; + pthread_mutexattr_t ma; + int ret = 1; + int pipe_down[2] = { -1, -1 }; + int pipe_up[2] = { -1, -1 }; + ssize_t nread; + char c = 0; + bool ok; + int status; + static bool initialized; + + if (initialized) { + return tdb_mutex_locking_cached; + } + + initialized = true; + + ok = tdb_mutex_locking_supported(); + if (!ok) { + return false; + } + + tdb_mutex_locking_cached = false; + + ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANON, -1 /* fd */, 0); + if (ptr == MAP_FAILED) { + return false; + } + m = (pthread_mutex_t *)ptr; + + ret = pipe(pipe_down); + if (ret != 0) { + goto cleanup_mmap; + } + ret = pipe(pipe_up); + if (ret != 0) { + goto cleanup_pipe; + } + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto cleanup_pipe; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_init(m, &ma); + if (ret != 0) { + goto cleanup_ma; + } + + tdb_robust_mutext_old_handler = signal(SIGCHLD, + tdb_robust_mutex_handler); + + tdb_robust_mutex_pid = fork(); + if (tdb_robust_mutex_pid == 0) { + size_t nwritten; + close(pipe_down[1]); + close(pipe_up[0]); + ret = pthread_mutex_lock(m); + nwritten = write(pipe_up[1], &ret, sizeof(ret)); + if (nwritten != sizeof(ret)) { + exit(1); + } + if (ret != 0) { + exit(1); + } + nread = read(pipe_down[0], &c, 1); + if (nread != 1) { + exit(1); + } + /* leave locked */ + exit(0); + } + if (tdb_robust_mutex_pid == -1) { + goto cleanup_sig_child; + } + close(pipe_down[0]); + pipe_down[0] = -1; + close(pipe_up[1]); + pipe_up[1] = -1; + + nread = read(pipe_up[0], &ret, sizeof(ret)); + if (nread != sizeof(ret)) { + goto cleanup_child; + } + + ret = pthread_mutex_trylock(m); + if (ret != EBUSY) { + if (ret == 0) { + pthread_mutex_unlock(m); + } + goto cleanup_child; + } + + if (write(pipe_down[1], &c, 1) != 1) { + goto cleanup_child; + } + + nread = read(pipe_up[0], &c, 1); + if (nread != 0) { + goto cleanup_child; + } + + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + errno = 0; + pid = waitpid(tdb_robust_mutex_pid, &status, 0); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + break; + } + if (pid == -1 && errno != EINTR) { + goto cleanup_child; + } + } + signal(SIGCHLD, tdb_robust_mutext_old_handler); + + ret = pthread_mutex_trylock(m); + if (ret != EOWNERDEAD) { + if (ret == 0) { + pthread_mutex_unlock(m); + } + goto cleanup_m; + } + + ret = pthread_mutex_consistent(m); + if (ret != 0) { + goto cleanup_m; + } + + ret = pthread_mutex_trylock(m); + if (ret != EDEADLK) { + pthread_mutex_unlock(m); + goto cleanup_m; + } + + ret = pthread_mutex_unlock(m); + if (ret != 0) { + goto cleanup_m; + } + + tdb_mutex_locking_cached = true; + goto cleanup_m; + +cleanup_child: + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + kill(tdb_robust_mutex_pid, SIGKILL); + + errno = 0; + pid = waitpid(tdb_robust_mutex_pid, &status, 0); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + break; + } + if (pid == -1 && errno != EINTR) { + break; + } + } +cleanup_sig_child: + signal(SIGCHLD, tdb_robust_mutext_old_handler); +cleanup_m: + pthread_mutex_destroy(m); +cleanup_ma: + pthread_mutexattr_destroy(&ma); +cleanup_pipe: + if (pipe_down[0] != -1) { + close(pipe_down[0]); + } + if (pipe_down[1] != -1) { + close(pipe_down[1]); + } + if (pipe_up[0] != -1) { + close(pipe_up[0]); + } + if (pipe_up[1] != -1) { + close(pipe_up[1]); + } +cleanup_mmap: + munmap(ptr, sizeof(pthread_mutex_t)); + + return tdb_mutex_locking_cached; +} + +#else + +size_t tdb_mutex_size(struct tdb_context *tdb) +{ + return 0; +} + +bool tdb_have_mutexes(struct tdb_context *tdb) +{ + return false; +} + +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags) +{ + return -1; +} + +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_mmap(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +int tdb_mutex_munmap(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +int tdb_mutex_init(struct tdb_context *tdb) +{ + return ENOSYS; +} + +bool tdb_mutex_locking_supported(void) +{ + return false; +} + +_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) +{ + return false; +} + +#endif diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 789bc73..13d22e1 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -76,6 +76,16 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, if (tdb->flags & TDB_INCOMPATIBLE_HASH) newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; + /* + * For the mutex code we add the FEATURE_FLAG_MAGIC, overwriting the + * TDB_HASH_RWLOCK_MAGIC above. + */ + if ((tdb->flags & TDB_MUTEX_LOCKING) && + tdb_mutex_locking_supported()) { + newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC; + newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX; + } + if (tdb->flags & TDB_INTERNAL) { tdb->map_size = size; tdb->map_ptr = (char *)newdb; @@ -92,13 +102,55 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, /* This creates an endian-converted header, as if read from disk */ CONVERT(*newdb); - memcpy(header, newdb, sizeof(*header)); /* Don't endian-convert the magic food! */ memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); if (!tdb_write_all(tdb->fd, newdb, size)) goto fail; + if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + + tdb->feature_flags = newdb->feature_flags; + tdb->hash_size = newdb->hash_size; + + newdb->mutex_size = tdb_mutex_size(tdb); + + /* + * Overwrite newdb.mutex_size + */ + ret = lseek(tdb->fd, 0, SEEK_SET); + if (ret == -1) { + goto fail; + } + if (!tdb_write_all(tdb->fd, newdb, size)) { + goto fail; + } + + ret = ftruncate( + tdb->fd, + newdb->mutex_size + sizeof(struct tdb_header)); + if (ret == -1) { + goto fail; + } + ret = tdb_mutex_init(tdb); + if (ret == -1) { + goto fail; + } + + /* + * Write a second header behind the mutexes. That's the area + * that will be mmapp'ed. + */ + ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET); + if (ret == -1) { + goto fail; + } + if (!tdb_write_all(tdb->fd, newdb, size)) { + goto fail; + } + } + + memcpy(header, newdb, sizeof(*header)); ret = 0; fail: SAFE_FREE(newdb); @@ -165,6 +217,68 @@ static bool check_header_hash(struct tdb_context *tdb, return check_header_hash(tdb, header, false, m1, m2); } +static bool tdb_mutex_open_ok(struct tdb_context *tdb) +{ + int locked; + + if (tdb->flags & TDB_NOMMAP) { + /* + * We need to mmap the mutex area + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Can not open a tdb with " + "mutexes without mmap\n")); + return false; + } + + locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, + TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); + + if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) { + /* + * CLEAR_IF_FIRST still active. The tdb was created on this + * host, so we can assume the mutex implementation is + * compatible. Important for tools like tdbdump on a still + * open locking.tdb. + */ + return true; + } + + /* + * We got the CLEAR_IF_FIRST lock. That means the database was + * potentially copied from somewhere else. The mutex implementation + * might be incompatible. + */ + + if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { + /* + * Should not happen + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: " + "failed to release ACTIVE_LOCK on %s: %s\n", + tdb->name, strerror(errno))); + return false; + } + + if (tdb->flags & TDB_NOLOCK) { + /* + * We don't look at locks, so it does not matter to have a + * compatible mutex implementation. Allow the open. + */ + return true; + } + + if (tdb->flags & TDB_CLEAR_IF_FIRST) { + /* + * About to create the db here. + */ + return true; + } + + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Can use mutexes only with " + "CLEAR_IF_FIRST or NOLOCK\n")); + return false; +} + _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int open_flags, mode_t mode, const struct tdb_logging_context *log_ctx, @@ -180,6 +294,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td unsigned v; const char *hash_alg; uint32_t magic1, magic2; + int ret; ZERO_STRUCT(header); @@ -326,7 +441,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td if ((tdb_flags & TDB_CLEAR_IF_FIRST) && (!tdb->read_only) && (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { - int ret; ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT); if (ret == -1) { @@ -390,12 +504,29 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td goto fail; if (header.rwlocks != 0 && + header.rwlocks != TDB_FEATURE_FLAG_MAGIC && header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); goto fail; } tdb->hash_size = header.hash_size; + if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) { + tdb->feature_flags = header.feature_flags; + } + + if (tdb_mutex_size(tdb) != header.mutex_size) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Mutex size changed\n")); + errno = EINVAL; + goto fail; + } + + if ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && + !tdb_mutex_open_ok(tdb)) { + errno = EINVAL; + goto fail; + } + if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { /* older TDB without magic hash references */ tdb->hash_fn = tdb_old_hash; @@ -426,19 +557,52 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td } /* Beware truncation! */ - tdb->map_size = st.st_size; - if (tdb->map_size != st.st_size) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " - "len %llu too large!\n", (long long)st.st_size)); - errno = EIO; - goto fail; + + { + uint32_t map_size = st.st_size; + if (map_size != st.st_size) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "len %llu too large!\n", + (long long)st.st_size)); + errno = EIO; + goto fail; + } } tdb->device = st.st_dev; tdb->inode = st.st_ino; - tdb_mmap(tdb); + + /* + * We had tdb_mmap(tdb) here before, + * but for the mutex case we have a modified tdb_fstat() + * which is triggered from tdb_oob() before calling tdb_mmap(). + */ + tdb->map_size = 0; + ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); + if (ret == -1) { + goto fail; + } + + if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + if (!tdb_mutex_locking_supported()) { + /* + * Database was created with mutex locking, + * but we don't support it. + */ + errno = EINVAL; + goto fail; + } + + if (!(tdb->flags & TDB_NOLOCK)) { + ret = tdb_mutex_mmap(tdb); + if (ret != 0) { + goto fail; + } + } + } + if (locked) { if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " @@ -549,6 +713,9 @@ _PUBLIC_ int tdb_close(struct tdb_context *tdb) else tdb_munmap(tdb); } + + tdb_mutex_munmap(tdb); + SAFE_FREE(tdb->name); if (tdb->fd != -1) { ret = close(tdb->fd); @@ -628,7 +795,13 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); goto fail; } - if (tdb_mmap(tdb) != 0) { + /* + * We had tdb_mmap(tdb) here before, + * but for the mutex case we have a modified tdb_fstat() + * which is triggered from tdb_oob() before calling tdb_mmap(). + */ + tdb->map_size = 0; + if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { goto fail; } #endif /* fake pread or pwrite */ diff --git a/lib/tdb/common/tdb.c b/lib/tdb/common/tdb.c index ebd4ffe..ae98c96 100644 --- a/lib/tdb/common/tdb.c +++ b/lib/tdb/common/tdb.c @@ -723,6 +723,15 @@ _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) return; } + if ((flags & TDB_NOLOCK) && + (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && + (tdb->mutexes == NULL)) { + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " + "Can not remove NOLOCK flag on mutexed databases")); + return; + } + if (flags & TDB_ALLOW_NESTING) { tdb->flags |= TDB_DISALLOW_NESTING; } diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index a672159..9790552 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -53,6 +53,7 @@ typedef uint32_t tdb_off_t; #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) #define TDB_RECOVERY_INVALID_MAGIC (0x0) #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) +#define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U) #define TDB_ALIGNMENT 4 #define DEFAULT_HASH_SIZE 131 #define FREELIST_TOP (sizeof(struct tdb_header)) @@ -68,6 +69,8 @@ typedef uint32_t tdb_off_t; #define TDB_PAD_BYTE 0x42 #define TDB_PAD_U32 0x42424242 +#define TDB_FEATURE_FLAG_MUTEX 1 + /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */ @@ -152,7 +155,9 @@ struct tdb_header { tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ uint32_t magic2_hash; /* hash of TDB_MAGIC. */ - tdb_off_t reserved[27]; + uint32_t feature_flags; + uint32_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */ + tdb_off_t reserved[25]; }; struct tdb_lock_type { @@ -186,6 +191,8 @@ struct tdb_methods { int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); }; +struct tdb_mutexes; + struct tdb_context { char *name; /* the name of the database */ void *map_ptr; /* where it is currently mapped */ @@ -198,8 +205,12 @@ struct tdb_context { int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ int lockrecs_array_length; + + struct tdb_mutexes *mutexes; /* mmap of the mutex area */ + enum TDB_ERROR ecode; /* error code for last tdb error */ uint32_t hash_size; + uint32_t feature_flags; uint32_t flags; /* the flags passed to tdb_open */ struct tdb_traverse_lock travlocks; /* current traversal locks */ struct tdb_context *next; /* all tdbs to avoid multiple opens */ @@ -292,4 +303,21 @@ bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret); /* tdb_off_t and tdb_len_t right now are both uint32_t */ #define tdb_add_len_t tdb_add_off_t + +bool tdb_mutex_locking_supported(void); +size_t tdb_mutex_size(struct tdb_context *tdb); +bool tdb_have_mutexes(struct tdb_context *tdb); +int tdb_mutex_init(struct tdb_context *tdb); +int tdb_mutex_mmap(struct tdb_context *tdb); +int tdb_mutex_munmap(struct tdb_context *tdb); +bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, + bool waitflag, int *pret); +bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, + int *pret); +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags); +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb); +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb); +int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb); + #endif /* TDB_PRIVATE_H */ diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c index a2c3bbd..caef0be 100644 --- a/lib/tdb/common/transaction.c +++ b/lib/tdb/common/transaction.c @@ -421,7 +421,8 @@ static int _tdb_transaction_start(struct tdb_context *tdb, enum tdb_lock_flags lockflags) { /* some sanity checks */ - if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { + if (tdb->read_only || (tdb->flags & (TDB_INTERNAL|TDB_MUTEX_LOCKING)) + || tdb->traverse_read) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); tdb->ecode = TDB_ERR_EINVAL; return -1; diff --git a/lib/tdb/docs/mutex.txt b/lib/tdb/docs/mutex.txt new file mode 100644 index 0000000..2597987 --- /dev/null +++ b/lib/tdb/docs/mutex.txt @@ -0,0 +1,106 @@ +Tdb is a hashtable database with multiple concurrent writer and external +record lock support. For speed reasons, wherever possible tdb uses a shared +memory mapped area for data access. In its currently released form, it uses +fcntl byte-range locks to coordinate access to the data itself. + +The tdb data is organized as a hashtable. Hash collision are dealt with by +forming a linked list of records that share a hash value. The individual +linked lists are protected across processes with 1-byte fcntl locks on the +starting pointer of the linked list representing a hash value. + +The external locking API of tdb allows to lock individual records. Instead of +really locking individual records, the tdb API locks a complete linked list +with a fcntl lock. + +The external locking API of tdb also allows to lock the complete database, and +ctdb uses this facility to freeze databases during a recovery. While the +so-called allrecord lock is held, all linked lists and all individual records +are frozen alltogether. Tdb achieves this by locking the complete file range +with a single fcntl lock. Individual 1-byte locks for the linked lists +conflict with this. Access to records is prevented by the one large fnctl byte +range lock. + +Fcntl locks have been chosen for tdb for two reasons: First they are portable +across all current unixes. Secondly they provide auto-cleanup. If a process +dies while holding a fcntl lock, the lock is given up as if it was explicitly +unlocked. Thus fcntl locks provide a very robust locking scheme, if a process +dies for any reason the database will not stay blocked until reboot. This +robustness is very important for long-running services, a reboot is not an +option for most users of tdb. + +Unfortunately during stress testing fcntl locks have turned out to be a major +problem for performance. The particular problem that was seen happens when +ctdb on a busy server does a recovery. A recovery means that ctdb has to +freeze all tdb databases for some time, usually a few seconds. This is done +with the allrecord lock. During the recovery phase on a busy server many smbd +processes try to access the tdb file with blocking fcntl calls. The specific +test in question easily reproduces 7,000 processes piling up waiting for +1-byte fcntl locks. When ctdb is done with the recovery, it gives up the +allrecord lock, covering the whole file range. All 7,000 processes waiting for +1-byte fcntl locks are woken up, trying to acquire their lock. The special +implementation of fcntl locks in Linux (up to 2013-02-12 at least) protects +all fcntl lock operations with a single system-wide spinlock. If 7,000 process +waiting for the allrecord lock to become released this leads to a thundering +herd condition, all CPUs are spinning on that single spinlock. + +Functionally the kernel is fine, eventually the thundering herd slows down and +every process correctly gets his share and locking range, but the performance +of the system while the herd is active is worse than expected. + +The thundering herd is only the worst case scenario for fcntl lock use. The +single spinlock for fcntl operations is also a performance penalty for normal +operations. In the cluster case, every read and write SMB request has to do +two fcntl calls to provide correct SMB mandatory locks. The single spinlock +is one source of serialization for the SMB read/write requests, limiting the +parallelism that can be achieved in a multi-core system. + +While trying to tune his servers, Ira Cooper, Samba Team member, found fcntl +locks to be a problem on Solaris as well. Ira pointed out that there is a +potential alternative locking mechanism that might be more scalable: Process +shared robust mutexes, as defined by Posix 2008 for example via + +http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setpshared.html +http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setrobust.html + +Pthread mutexes provide one of the core mechanisms in posix threads to protect +in-process data structures from concurrent access by multiple threads. In the +Linux implementation, a pthread_mutex_t is represented by a data structure in +user space that requires no kernel calls in the uncontended case for locking +and unlocking. Locking and unlocking in the uncontended case is implemented +purely in user space with atomic CPU instructions and thus are very fast. + +The setpshared functions indicate to the kernel that the mutex is about to be +shared between processes in a common shared memory area. + +The process shared posix mutexes have the potential to replace fcntl locking +to coordinate mmap access for tdbs. However, they are missing the criticial +auto-cleanup property that fcntl provides when a process dies. A process that +dies hard while holding a shared mutex has no chance to clean up the protected +data structures and unlock the shared mutex. Thus with a pure process shared +mutex the mutex will remain locked forever until the data structures are +re-initialized from scratch. + +With the robust mutexes defined by Posix the process shared mutexes have been +extended with a limited auto-cleanup property. If a mutex has been declared +robust, when a process exits while holding that mutex, the next process trying +to lock the mutex will get the special error message EOWNERDEAD. This informs +the caller that the data structures the mutex protects are potentially corrupt +and need to be cleaned up. + +The error message EOWNERDEAD when trying to lock a mutex is an extension over +the fcntl functionality. A process that does a blocking fcntl lock call is not +informed about whether the lock was explicitly freed by a process still alive +or due to an unplanned process exit. At the time of this writing (February +2013), at least Linux and OpenSolaris also implement the robustness feature of +process-shared mutexes. + +Converting the tdb locking mechanism from fcntl to mutexes has to take care of +both types of locks that are used on tdb files. + +The easy part is to use mutexes to replace the 1-byte linked list locks +covering the individual hashes. Those can be represented by a mutex each. + +Covering the allrecord lock is more difficult. The allrecord lock uses a fcntl +lock spanning all hash list locks simultaneously. This basic functionality is +not easily possible with mutexes. A mutex carries 1 bit of information, a +fcntl lock can carry an arbitrary amount of information. diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h index a34f089..15c800e 100644 --- a/lib/tdb/include/tdb.h +++ b/lib/tdb/include/tdb.h @@ -80,6 +80,9 @@ extern "C" { #define TDB_ALLOW_NESTING 512 /** Allow transactions to nest */ #define TDB_DISALLOW_NESTING 1024 /** Disallow transactions to nest */ #define TDB_INCOMPATIBLE_HASH 2048 /** Better hashing: can't be opened by tdb < 1.2.6. */ +#define TDB_MUTEX_LOCKING 4096 /** optimized locking using robust mutexes if supported, + only with tdb >= 1.3.0 and + TDB_CLEAR_IF_FIRST or TDB_NOLOCK */ /** The tdb error codes */ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, @@ -143,6 +146,11 @@ struct tdb_logging_context { * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n + * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n + * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, + * can't be opened by tdb < 1.3.0. + * Only valid in combination with TDB_CLEAR_IF_FIRST + * or TDB_NOLOCK\n * * @param[in] open_flags Flags for the open(2) function. * @@ -179,6 +187,11 @@ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n + * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n + * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, + * can't be opened by tdb < 1.3.0. + * Only valid in combination with TDB_CLEAR_IF_FIRST + * or TDB_NOLOCK\n * * @param[in] open_flags Flags for the open(2) function. * @@ -842,6 +855,25 @@ int tdb_rescue(struct tdb_context *tdb, void (*walk) (TDB_DATA key, TDB_DATA data, void *private_data), void *private_data); +/** + * @brief Check if if support for TDB_MUTEX_LOCKING is available at runtime. + * + * The feature behind TDB_MUTEX_LOCKING is available on all systems. + * On some systems the API for pthread_mutexattr_setrobust() is not available. + * On other systems there are some bugs in the interaction between glibc and + * the linux kernel. + * + * This function provides a runtime check if robust mutexes are really + * available. + * + * @note This calls fork(), but the SIGCHILD handling should be transparent. + * + * @return true if supported, false otherwise. + * + * @see TDB_MUTEX_LOCKING + */ +bool tdb_runtime_check_for_robust_mutexes(void); + /* @} ******************************************************************/ /* Low level locking functions: use with care */ diff --git a/lib/tdb/test/lock-tracking.c b/lib/tdb/test/lock-tracking.c index b2f092c..3fe20a5 100644 --- a/lib/tdb/test/lock-tracking.c +++ b/lib/tdb/test/lock-tracking.c @@ -84,47 +84,37 @@ int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ) } } else { struct testlock *new, *i; - unsigned int fl_end = fl->l_start + fl->l_len; + unsigned int fl_end = fl->l_start + fl->l_len - 1; if (fl->l_len == 0) fl_end = (unsigned int)-1; /* Check for overlaps: we shouldn't do this. */ for (i = testlocks; i; i = i->next) { - unsigned int i_end = i->off + i->len; + unsigned int i_end = i->off + i->len - 1; if (i->len == 0) i_end = (unsigned int)-1; + /* Upgrade a lock */ + if (i->type == F_RDLCK && fl->l_type == F_WRLCK + && i->off == fl->l_start + && i->len == fl->l_len) { + if (ret == 0) + i->type = F_WRLCK; + goto done; + } + if (fl->l_start >= i->off && fl->l_start < i_end) break; if (fl_end >= i->off && fl_end < i_end) break; - - /* tdb_allrecord_lock does this, handle adjacent: */ - if (fl->l_start == i_end && fl->l_type == i->type) { - if (ret == 0) { - i->len = fl->l_len - ? i->len + fl->l_len - : 0; - } - goto done; - } } if (i) { - /* Special case: upgrade of allrecord lock. */ - if (i->type == F_RDLCK && fl->l_type == F_WRLCK - && i->off == FREELIST_TOP - && fl->l_start == FREELIST_TOP - && i->len == 0 - && fl->l_len == 0) { - if (ret == 0) - i->type = F_WRLCK; - goto done; - } if (!suppress_lockcheck) { diag("%s testlock %u@%u overlaps %u@%u", fl->l_type == F_WRLCK ? "write" : "read", (int)fl->l_len, (int)fl->l_start, i->len, (int)i->off); + fflush(stdout); locking_errors++; } } diff --git a/lib/tdb/test/logging.c b/lib/tdb/test/logging.c index dfab486..c79379c 100644 --- a/lib/tdb/test/logging.c +++ b/lib/tdb/test/logging.c @@ -24,9 +24,6 @@ static void taplog(struct tdb_context *tdb, va_end(ap); /* Strip trailing \n: diag adds it. */ - if (line[0] && line[strlen(line)-1] == '\n') - diag("%s%.*s", log_prefix, (unsigned)strlen(line)-1, line); - else diag("%s%s", log_prefix, line); } diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c index 67fd54f..748c972 100644 --- a/lib/tdb/test/run-3G-file.c +++ b/lib/tdb/test/run-3G-file.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" @@ -22,12 +23,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb, return -1; } - if (ftruncate(tdb->fd, size+addition) == -1) { + if (tdb_ftruncate(tdb, size+addition) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); } if (written == 0) { /* again - give up, guessing errno */ diff --git a/lib/tdb/test/run-bad-tdb-header.c b/lib/tdb/test/run-bad-tdb-header.c index b00fb89..9d29fdf 100644 --- a/lib/tdb/test/run-bad-tdb-header.c +++ b/lib/tdb/test/run-bad-tdb-header.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-check.c b/lib/tdb/test/run-check.c index b275691..ce389a2 100644 --- a/lib/tdb/test/run-check.c +++ b/lib/tdb/test/run-check.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-corrupt.c b/lib/tdb/test/run-corrupt.c index 93eae42..e6fc751 100644 --- a/lib/tdb/test/run-corrupt.c +++ b/lib/tdb/test/run-corrupt.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-die-during-transaction.c b/lib/tdb/test/run-die-during-transaction.c index 9b90415..2392f64 100644 --- a/lib/tdb/test/run-die-during-transaction.c +++ b/lib/tdb/test/run-die-during-transaction.c @@ -19,6 +19,7 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include @@ -223,7 +224,8 @@ int main(int argc, char *argv[]) agent = prepare_external_agent(); for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { - diag("Testing %s after death", operation_name(ops[i])); + diag("Testing %s after death\n", operation_name(ops[i])); + fflush(stdout); ok1(test_death(ops[i], agent)); } diff --git a/lib/tdb/test/run-endian.c b/lib/tdb/test/run-endian.c index 3116f7d..9d4d5f5 100644 --- a/lib/tdb/test/run-endian.c +++ b/lib/tdb/test/run-endian.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-incompatible.c b/lib/tdb/test/run-incompatible.c index af01ca6..b8e95b5 100644 --- a/lib/tdb/test/run-incompatible.c +++ b/lib/tdb/test/run-incompatible.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-mutex-allrecord-bench.c b/lib/tdb/test/run-mutex-allrecord-bench.c new file mode 100644 index 0000000..5b816b3 --- /dev/null +++ b/lib/tdb/test/run-mutex-allrecord-bench.c @@ -0,0 +1,75 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static double timeval_elapsed2(const struct timeval *tv1, const struct timeval *tv2) +{ + return (tv2->tv_sec - tv1->tv_sec) + + (tv2->tv_usec - tv1->tv_usec)*1.0e-6; +} + +static double timeval_elapsed(const struct timeval *tv) +{ + struct timeval tv2; + gettimeofday(&tv2, NULL); + return timeval_elapsed2(tv, &tv2); +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + struct timeval start; + double elapsed; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + tdb = tdb_open_ex("mutex-allrecord-bench.tdb", 1000000, + TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + gettimeofday(&start, NULL); + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + elapsed = timeval_elapsed(&start); + + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + printf("allrecord_lock took %f seconds\n", elapsed); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-allrecord-block.c b/lib/tdb/test/run-mutex-allrecord-block.c new file mode 100644 index 0000000..52ead07b --- /dev/null +++ b/lib/tdb/test/run-mutex-allrecord-block.c @@ -0,0 +1,101 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-allrecord-block.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + write(fd, &c, sizeof(c)); + + system("/bin/sleep 99999"); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-allrecord-block.tdb", 0, + tdb_flags, O_RDWR|O_CREAT, 0755, + &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock (nowait) should succeed\n"); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-allrecord-trylock.c b/lib/tdb/test/run-mutex-allrecord-trylock.c new file mode 100644 index 0000000..1ae1512 --- /dev/null +++ b/lib/tdb/test/run-mutex-allrecord-trylock.c @@ -0,0 +1,97 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); + ok(ret == -1, "tdb_allrecord_lock (nowait) not should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-die.c b/lib/tdb/test/run-mutex-die.c new file mode 100644 index 0000000..6e1c40d --- /dev/null +++ b/lib/tdb/test/run-mutex-die.c @@ -0,0 +1,266 @@ +#include "../common/tdb_private.h" +#include "lock-tracking.h" +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include "external-agent.h" +#include "logging.h" + +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +static int target, current; +#define TEST_DBNAME "run-mutex-die.tdb" +#define KEY_STRING "helloworld" + +static void maybe_die(int fd) +{ + if (target == 0) { + return; + } + current += 1; + if (current == target) { + _exit(1); + } +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + ssize_t ret; + + maybe_die(fd); + + ret = pwrite(fd, buf, count, offset); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + maybe_die(fd); + + ret = write(fd, buf, count); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static int ftruncate_check(int fd, off_t length) +{ + int ret; + + maybe_die(fd); + + ret = ftruncate(fd, length); + + maybe_die(fd); + return ret; +} + +static enum agent_return flakey_ops(struct agent *a, enum operation op, + TDB_DATA key) +{ + enum agent_return ret; + + /* + * Run in the external agent child + */ + + ret = external_agent_operation(a, OPEN, TEST_DBNAME); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to open: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, UNMAP, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to unmap: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, STORE, "xyz"); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to store: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, TRANSACTION_START, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed transaction_start: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, STORE, (char *)key.dptr); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed store: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, TRANSACTION_COMMIT, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed commit: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, FETCH, KEY_STRING); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed find key: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, PING, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed ping: %s\n", + agent_return_name(ret)); + return ret; + } + return ret; +} + +static void prep_db(TDB_DATA key) { + struct tdb_context *tdb; + TDB_DATA data; + + data.dptr = (uint8_t *)"foo"; + data.dsize = strlen((char *)data.dptr); + + unlink(TEST_DBNAME); + + tdb = tdb_open_ex( + TEST_DBNAME, 2, + TDB_INCOMPATIBLE_HASH|TDB_MUTEX_LOCKING|TDB_CLEAR_IF_FIRST, + O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL); + + if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { + return; + } + + tdb_close(tdb); + tdb = NULL; + + forget_locking(); +} + +static bool test_db(void) { + struct tdb_context *tdb; + int ret; + + tdb = tdb_open_ex( + TEST_DBNAME, 1024, TDB_INCOMPATIBLE_HASH, + O_RDWR, 0600, &taplogctx, NULL); + + if (tdb == NULL) { + perror("tdb_open_ex failed"); + return false; + } + + ret = tdb_traverse(tdb, NULL, NULL); + if (ret == -1) { + perror("traverse failed"); + goto fail; + } + + tdb_close(tdb); + + forget_locking(); + + return true; + +fail: + tdb_close(tdb); + return false; +} + +static bool test_one(enum operation op, TDB_DATA key) +{ + enum agent_return ret; + + ret = AGENT_DIED; + target = 19; + + while (ret != SUCCESS) { + struct agent *agent; + + { + int child_target = target; + target = 0; + prep_db(key); + target = child_target; + } + + agent = prepare_external_agent(); + + ret = flakey_ops(agent, STORE, key); + + printf("Agent (target=%d) returns %s\n", target, + agent_return_name(ret)); + + shutdown_agent(agent); + + { + int child_target = target; + target = 0; + if (!test_db()) { + return false; + } + target = child_target; + } + + target += 1; + } + + return true; +} + +int main(int argc, char *argv[]) +{ + enum operation ops[] = { FETCH }; + TDB_DATA key; + int i; + + plan_tests(12); + unlock_callback = maybe_die; + + key.dsize = strlen(KEY_STRING); + key.dptr = (void *)KEY_STRING; + + for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { + enum agent_return ret; + diag("Testing %s after death\n", operation_name(ops[i])); + fflush(stdout); + ret = test_one(ops[i], key); + } + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c new file mode 100644 index 0000000..a049dbc --- /dev/null +++ b/lib/tdb/test/run-mutex-openflags2.c @@ -0,0 +1,94 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + char c; + + read(fd, &c, 1); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), + "tdb_open_ex without mutexes should fail with EINVAL\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefd[2]; + char c = 0; + + ret = pipe(pipefd); + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without " + "TDB_CLEAR_IF_FIRST should fail with EINVAL\n"); + + child = fork(); + if (child == 0) { + return do_child(pipefd[0]); + } + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_CLEAR_IF_FIRST| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex with mutexes should succeed\n"); + + write(pipefd[1], &c, 1); + + wait_ret = wait(&status); + ok((wait_ret == child) && (status == 0), + "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-trylock.c b/lib/tdb/test/run-mutex-trylock.c new file mode 100644 index 0000000..a3ec440 --- /dev/null +++ b/lib/tdb/test/run-mutex-trylock.c @@ -0,0 +1,106 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); + + read(pipefds[0], &c, sizeof(c)); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == 0, "tdb_chainlock_nonblock should succeed\n"); + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex1.c b/lib/tdb/test/run-mutex1.c new file mode 100644 index 0000000..3753fef --- /dev/null +++ b/lib/tdb/test/run-mutex1.c @@ -0,0 +1,120 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + poll(NULL, 0, 1000); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + ret = tdb_store(tdb, key, data, 0); + ok(ret == 0, "tdb_store should succeed\n"); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); + + read(pipefds[0], &c, sizeof(c)); + + ret = tdb_delete(tdb, key); + ok(ret == 0, "tdb_delete should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-nested-transactions.c b/lib/tdb/test/run-nested-transactions.c index bf08e55..864adf2 100644 --- a/lib/tdb/test/run-nested-transactions.c +++ b/lib/tdb/test/run-nested-transactions.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-nested-traverse.c b/lib/tdb/test/run-nested-traverse.c index 361dc2e..22ee3e2 100644 --- a/lib/tdb/test/run-nested-traverse.c +++ b/lib/tdb/test/run-nested-traverse.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl #include diff --git a/lib/tdb/test/run-no-lock-during-traverse.c b/lib/tdb/test/run-no-lock-during-traverse.c index b5e31dc..737a32f 100644 --- a/lib/tdb/test/run-no-lock-during-traverse.c +++ b/lib/tdb/test/run-no-lock-during-traverse.c @@ -13,6 +13,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-oldhash.c b/lib/tdb/test/run-oldhash.c index 535336c..aaee6f6 100644 --- a/lib/tdb/test/run-oldhash.c +++ b/lib/tdb/test/run-oldhash.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-open-during-transaction.c b/lib/tdb/test/run-open-during-transaction.c index 04ba956..1605376 100644 --- a/lib/tdb/test/run-open-during-transaction.c +++ b/lib/tdb/test/run-open-during-transaction.c @@ -20,6 +20,7 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-readonly-check.c b/lib/tdb/test/run-readonly-check.c index e518532..c5e0f7d 100644 --- a/lib/tdb/test/run-readonly-check.c +++ b/lib/tdb/test/run-readonly-check.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rescue-find_entry.c b/lib/tdb/test/run-rescue-find_entry.c index 25f4f1c..5d6f8f7 100644 --- a/lib/tdb/test/run-rescue-find_entry.c +++ b/lib/tdb/test/run-rescue-find_entry.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rescue.c b/lib/tdb/test/run-rescue.c index 7c806a4..e43f53b 100644 --- a/lib/tdb/test/run-rescue.c +++ b/lib/tdb/test/run-rescue.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rwlock-check.c b/lib/tdb/test/run-rwlock-check.c index 8b8072d..2ac9dc3 100644 --- a/lib/tdb/test/run-rwlock-check.c +++ b/lib/tdb/test/run-rwlock-check.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-summary.c b/lib/tdb/test/run-summary.c index 2231284..8b9a1a0 100644 --- a/lib/tdb/test/run-summary.c +++ b/lib/tdb/test/run-summary.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/summary.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-transaction-expand.c b/lib/tdb/test/run-transaction-expand.c index ddf1f24..d36b894 100644 --- a/lib/tdb/test/run-transaction-expand.c +++ b/lib/tdb/test/run-transaction-expand.c @@ -37,6 +37,7 @@ static inline int fake_fdatasync(int fd) #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-traverse-in-transaction.c b/lib/tdb/test/run-traverse-in-transaction.c index 48194b8..17d6412 100644 --- a/lib/tdb/test/run-traverse-in-transaction.c +++ b/lib/tdb/test/run-traverse-in-transaction.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl_with_lockcheck #include diff --git a/lib/tdb/test/run-wronghash-fail.c b/lib/tdb/test/run-wronghash-fail.c index 9c78fc5..c44b0f5 100644 --- a/lib/tdb/test/run-wronghash-fail.c +++ b/lib/tdb/test/run-wronghash-fail.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-zero-append.c b/lib/tdb/test/run-zero-append.c index a2324c4..f9eba1b 100644 --- a/lib/tdb/test/run-zero-append.c +++ b/lib/tdb/test/run-zero-append.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run.c b/lib/tdb/test/run.c index f61fcf6..c744c4d 100644 --- a/lib/tdb/test/run.c +++ b/lib/tdb/test/run.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/tools/tdbtorture.c b/lib/tdb/tools/tdbtorture.c index 5ae08f6..3e26f65 100644 --- a/lib/tdb/tools/tdbtorture.c +++ b/lib/tdb/tools/tdbtorture.c @@ -33,6 +33,7 @@ static int always_transaction = 0; static int hash_size = 2; static int loopnum; static int count_pipe; +static bool mutex = false; static struct tdb_logging_context log_ctx; #ifdef PRINTF_ATTRIBUTE @@ -119,6 +120,7 @@ static void addrec_db(void) #if TRANSACTION_PROB if (in_transaction == 0 && + ((tdb_get_flags(db) & TDB_MUTEX_LOCKING) == 0) && (always_transaction || random() % TRANSACTION_PROB == 0)) { if (tdb_transaction_start(db) != 0) { fatal("tdb_transaction_start failed"); @@ -216,7 +218,7 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, static void usage(void) { - printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); + printf("Usage: tdbtorture [-t] [-k] [-m] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); exit(0); } @@ -230,7 +232,13 @@ static void send_count_and_suicide(int sig) static int run_child(const char *filename, int i, int seed, unsigned num_loops, unsigned start) { - db = tdb_open_ex(filename, hash_size, TDB_DEFAULT, + int tdb_flags = TDB_DEFAULT|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH; + + if (mutex) { + tdb_flags |= TDB_MUTEX_LOCKING; + } + + db = tdb_open_ex(filename, hash_size, tdb_flags, O_RDWR | O_CREAT, 0600, &log_ctx, NULL); if (!db) { fatal("db open failed"); @@ -302,7 +310,7 @@ int main(int argc, char * const *argv) log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) { + while ((c = getopt(argc, argv, "n:l:s:H:thkm")) != -1) { switch (c) { case 'n': num_procs = strtol(optarg, NULL, 0); @@ -322,6 +330,13 @@ int main(int argc, char * const *argv) case 'k': kill_random = 1; break; + case 'm': + mutex = tdb_runtime_check_for_robust_mutexes(); + if (!mutex) { + printf("tdb_runtime_check_for_robust_mutexes() returned false\n"); + exit(1); + } + break; default: usage(); } @@ -443,7 +458,13 @@ int main(int argc, char * const *argv) done: if (error_count == 0) { - db = tdb_open_ex(test_tdb, hash_size, TDB_DEFAULT, + int tdb_flags = TDB_DEFAULT; + + if (mutex) { + tdb_flags |= TDB_NOLOCK; + } + + db = tdb_open_ex(test_tdb, hash_size, tdb_flags, O_RDWR, 0, &log_ctx, NULL); if (!db) { fatal("db open failed\n"); diff --git a/lib/tdb/wscript b/lib/tdb/wscript index 7019693..e4309f4 100644 --- a/lib/tdb/wscript +++ b/lib/tdb/wscript @@ -1,7 +1,7 @@ #!/usr/bin/env python APPNAME = 'tdb' -VERSION = '1.2.13' +VERSION = '1.3.0' blddir = 'bin' @@ -40,12 +40,23 @@ tdb1_unit_tests = [ 'run-traverse-in-transaction', 'run-wronghash-fail', 'run-zero-append' + 'run-mutex-openflags2', + 'run-mutex-trylock', + 'run-mutex-allrecord-bench', + 'run-mutex-allrecord-trylock', + 'run-mutex-allrecord-block', + 'run-mutex-die', + 'run-mutex1', ] def set_options(opt): opt.BUILTIN_DEFAULT('replace') opt.PRIVATE_EXTENSION_DEFAULT('tdb', noextension='tdb') opt.RECURSE('lib/replace') + opt.add_option('--disable-tdb-mutex-locking', + help=("Disable the use of pthread robust mutexes"), + action="store_true", dest='disable_tdb_mutex_locking', + default=False) if opt.IN_LAUNCH_DIR(): opt.add_option('--disable-python', help=("disable the pytdb module"), @@ -53,6 +64,11 @@ def set_options(opt): def configure(conf): + conf.env.disable_tdb_mutex_locking = getattr(Options.options, + 'disable_tdb_mutex_locking', + False) + if not conf.env.disable_tdb_mutex_locking: + conf.env.replace_add_global_pthread = True conf.RECURSE('lib/replace') conf.env.standalone_tdb = conf.IN_LAUNCH_DIR() @@ -68,6 +84,11 @@ def configure(conf): conf.env.disable_python = getattr(Options.options, 'disable_python', False) + if (conf.CONFIG_SET('HAVE_ROBUST_MUTEXES') and + conf.env.building_tdb and + not conf.env.disable_tdb_mutex_locking): + conf.define('USE_TDB_MUTEX_LOCKING', 1) + conf.CHECK_XSLTPROC_MANPAGES() if not conf.env.disable_python: @@ -87,10 +108,12 @@ def configure(conf): def build(bld): bld.RECURSE('lib/replace') - COMMON_SRC = bld.SUBDIR('common', - '''check.c error.c tdb.c traverse.c - freelistcheck.c lock.c dump.c freelist.c - io.c open.c transaction.c hash.c summary.c rescue.c''') + COMMON_FILES='''check.c error.c tdb.c traverse.c + freelistcheck.c lock.c dump.c freelist.c + io.c open.c transaction.c hash.c summary.c rescue.c + mutex.c''' + + COMMON_SRC = bld.SUBDIR('common', COMMON_FILES) if bld.env.standalone_tdb: bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' @@ -99,9 +122,15 @@ def build(bld): private_library = True if not bld.CONFIG_SET('USING_SYSTEM_TDB'): + + tdb_deps = 'replace' + + if bld.CONFIG_SET('USE_TDB_MUTEX_LOCKING'): + tdb_deps += ' pthread' + bld.SAMBA_LIBRARY('tdb', COMMON_SRC, - deps='replace', + deps=tdb_deps, includes='include', abi_directory='ABI', abi_match='tdb_*', @@ -137,7 +166,7 @@ def build(bld): # FIXME: This hardcoded list is stupid, stupid, stupid. bld.SAMBA_SUBSYSTEM('tdb-test-helpers', 'test/external-agent.c test/lock-tracking.c test/logging.c', - 'replace', + tdb_deps, includes='include') for t in tdb1_unit_tests: -- 1.7.9.5 From d56be5cc6b4200aafb11eef8ef84296b5796ffc9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 11 Apr 2014 10:26:39 +0200 Subject: [PATCH 23/49] Revert "ORIG-COMMIT....=> SPLITTED: tdb: Add mutex support" This reverts commit a7a4e500e742546af11388915eef6d436e01fe18. --- lib/tdb/ABI/tdb-1.3.0.sigs | 68 --- lib/tdb/common/check.c | 4 +- lib/tdb/common/io.c | 92 +-- lib/tdb/common/lock.c | 79 +-- lib/tdb/common/mutex.c | 902 ---------------------------- lib/tdb/common/open.c | 197 +----- lib/tdb/common/tdb.c | 9 - lib/tdb/common/tdb_private.h | 30 +- lib/tdb/common/transaction.c | 3 +- lib/tdb/docs/mutex.txt | 106 ---- lib/tdb/include/tdb.h | 32 - lib/tdb/test/lock-tracking.c | 34 +- lib/tdb/test/logging.c | 3 + lib/tdb/test/run-3G-file.c | 7 +- lib/tdb/test/run-bad-tdb-header.c | 1 - lib/tdb/test/run-check.c | 1 - lib/tdb/test/run-corrupt.c | 1 - lib/tdb/test/run-die-during-transaction.c | 4 +- lib/tdb/test/run-endian.c | 1 - lib/tdb/test/run-incompatible.c | 1 - lib/tdb/test/run-mutex-allrecord-bench.c | 75 --- lib/tdb/test/run-mutex-allrecord-block.c | 101 ---- lib/tdb/test/run-mutex-allrecord-trylock.c | 97 --- lib/tdb/test/run-mutex-die.c | 266 -------- lib/tdb/test/run-mutex-openflags2.c | 94 --- lib/tdb/test/run-mutex-trylock.c | 106 ---- lib/tdb/test/run-mutex1.c | 120 ---- lib/tdb/test/run-nested-transactions.c | 1 - lib/tdb/test/run-nested-traverse.c | 1 - lib/tdb/test/run-no-lock-during-traverse.c | 1 - lib/tdb/test/run-oldhash.c | 1 - lib/tdb/test/run-open-during-transaction.c | 1 - lib/tdb/test/run-readonly-check.c | 1 - lib/tdb/test/run-rescue-find_entry.c | 1 - lib/tdb/test/run-rescue.c | 1 - lib/tdb/test/run-rwlock-check.c | 1 - lib/tdb/test/run-summary.c | 1 - lib/tdb/test/run-transaction-expand.c | 1 - lib/tdb/test/run-traverse-in-transaction.c | 1 - lib/tdb/test/run-wronghash-fail.c | 1 - lib/tdb/test/run-zero-append.c | 1 - lib/tdb/test/run.c | 1 - lib/tdb/tools/tdbtorture.c | 29 +- lib/tdb/wscript | 43 +- 44 files changed, 76 insertions(+), 2445 deletions(-) delete mode 100644 lib/tdb/ABI/tdb-1.3.0.sigs delete mode 100644 lib/tdb/common/mutex.c delete mode 100644 lib/tdb/docs/mutex.txt delete mode 100644 lib/tdb/test/run-mutex-allrecord-bench.c delete mode 100644 lib/tdb/test/run-mutex-allrecord-block.c delete mode 100644 lib/tdb/test/run-mutex-allrecord-trylock.c delete mode 100644 lib/tdb/test/run-mutex-die.c delete mode 100644 lib/tdb/test/run-mutex-openflags2.c delete mode 100644 lib/tdb/test/run-mutex-trylock.c delete mode 100644 lib/tdb/test/run-mutex1.c diff --git a/lib/tdb/ABI/tdb-1.3.0.sigs b/lib/tdb/ABI/tdb-1.3.0.sigs deleted file mode 100644 index 7d3e469..0000000 --- a/lib/tdb/ABI/tdb-1.3.0.sigs +++ /dev/null @@ -1,68 +0,0 @@ -tdb_add_flags: void (struct tdb_context *, unsigned int) -tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) -tdb_chainlock: int (struct tdb_context *, TDB_DATA) -tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) -tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) -tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) -tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) -tdb_chainunlock: int (struct tdb_context *, TDB_DATA) -tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) -tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) -tdb_close: int (struct tdb_context *) -tdb_delete: int (struct tdb_context *, TDB_DATA) -tdb_dump_all: void (struct tdb_context *) -tdb_enable_seqnum: void (struct tdb_context *) -tdb_error: enum TDB_ERROR (struct tdb_context *) -tdb_errorstr: const char *(struct tdb_context *) -tdb_exists: int (struct tdb_context *, TDB_DATA) -tdb_fd: int (struct tdb_context *) -tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) -tdb_firstkey: TDB_DATA (struct tdb_context *) -tdb_freelist_size: int (struct tdb_context *) -tdb_get_flags: int (struct tdb_context *) -tdb_get_logging_private: void *(struct tdb_context *) -tdb_get_seqnum: int (struct tdb_context *) -tdb_hash_size: int (struct tdb_context *) -tdb_increment_seqnum_nonblock: void (struct tdb_context *) -tdb_jenkins_hash: unsigned int (TDB_DATA *) -tdb_lock_nonblock: int (struct tdb_context *, int, int) -tdb_lockall: int (struct tdb_context *) -tdb_lockall_mark: int (struct tdb_context *) -tdb_lockall_nonblock: int (struct tdb_context *) -tdb_lockall_read: int (struct tdb_context *) -tdb_lockall_read_nonblock: int (struct tdb_context *) -tdb_lockall_unmark: int (struct tdb_context *) -tdb_log_fn: tdb_log_func (struct tdb_context *) -tdb_map_size: size_t (struct tdb_context *) -tdb_name: const char *(struct tdb_context *) -tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) -tdb_null: dptr = 0xXXXX, dsize = 0 -tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) -tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) -tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) -tdb_printfreelist: int (struct tdb_context *) -tdb_remove_flags: void (struct tdb_context *, unsigned int) -tdb_reopen: int (struct tdb_context *) -tdb_reopen_all: int (int) -tdb_repack: int (struct tdb_context *) -tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *) -tdb_runtime_check_for_robust_mutexes: bool (void) -tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) -tdb_set_max_dead: void (struct tdb_context *, int) -tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) -tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) -tdb_summary: char *(struct tdb_context *) -tdb_transaction_cancel: int (struct tdb_context *) -tdb_transaction_commit: int (struct tdb_context *) -tdb_transaction_prepare_commit: int (struct tdb_context *) -tdb_transaction_start: int (struct tdb_context *) -tdb_transaction_start_nonblock: int (struct tdb_context *) -tdb_transaction_write_lock_mark: int (struct tdb_context *) -tdb_transaction_write_lock_unmark: int (struct tdb_context *) -tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) -tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) -tdb_unlock: int (struct tdb_context *, int, int) -tdb_unlockall: int (struct tdb_context *) -tdb_unlockall_read: int (struct tdb_context *) -tdb_validate_freelist: int (struct tdb_context *, int *) -tdb_wipe_all: int (struct tdb_context *) diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c index e632af5..9f9d870 100644 --- a/lib/tdb/common/check.c +++ b/lib/tdb/common/check.c @@ -39,9 +39,7 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) if (hdr.version != TDB_VERSION) goto corrupt; - if (hdr.rwlocks != 0 && - hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC && - hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) + if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) goto corrupt; tdb_header_hash(tdb, &h1, &h2); diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c index 537a9e9..11dfefd 100644 --- a/lib/tdb/common/io.c +++ b/lib/tdb/common/io.c @@ -28,69 +28,6 @@ #include "tdb_private.h" -/* - * We prepend the mutex area, so fixup offsets. See mutex.c for details. - */ - -static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) -{ - tdb_off_t tdb_off = *off; - size_t mutex_size = tdb_mutex_size(tdb); - - if (!tdb_add_off_t(tdb_off, mutex_size, &tdb_off)) { - errno = EIO; - return false; - } - *off = tdb_off; - return true; -} - -static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, - size_t count, off_t offset) -{ - if (!tdb_adjust_offset(tdb, &offset)) { - return -1; - } - return pwrite(tdb->fd, buf, count, offset); -} - -static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, - size_t count, off_t offset) -{ - if (!tdb_adjust_offset(tdb, &offset)) { - return -1; - } - return pread(tdb->fd, buf, count, offset); -} - -static int tdb_ftruncate(struct tdb_context *tdb, off_t length) -{ - if (!tdb_adjust_offset(tdb, &length)) { - return -1; - } - return ftruncate(tdb->fd, length); -} - -static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) -{ - size_t mutex_len; - int ret; - - ret = fstat(tdb->fd, buf); - if (ret == -1) { - return -1; - } - - mutex_len = tdb_mutex_size(tdb); - if (buf->st_size < mutex_len) { - errno = EIO; - return -1; - } - buf->st_size -= mutex_len; - - return ret; -} - /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary @@ -121,7 +58,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - if (tdb_fstat(tdb, &st) == -1) { + if (fstat(tdb->fd, &st) == -1) { tdb->ecode = TDB_ERR_IO; return -1; } @@ -185,18 +122,16 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t written; - - written = tdb_pwrite(tdb, buf, len, off); - + ssize_t written = pwrite(tdb->fd, buf, len, off); if ((written != (ssize_t)len) && (written != -1)) { /* try once more */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " "%zi of %u bytes at %u, trying once more\n", written, len, off)); - written = tdb_pwrite(tdb, (const char *)buf+written, - len-written, off+written); + written = pwrite(tdb->fd, (const char *)buf+written, + len-written, + off+written); } if (written == -1) { /* Ensure ecode is set for log fn. */ @@ -241,9 +176,7 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t ret; - - ret = tdb_pread(tdb, buf, len, off); + ssize_t ret = pread(tdb->fd, buf, len, off); if (ret != (ssize_t)len) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; @@ -325,8 +258,7 @@ int tdb_mmap(struct tdb_context *tdb) if (should_mmap(tdb)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), - MAP_SHARED|MAP_FILE, tdb->fd, - tdb_mutex_size(tdb)); + MAP_SHARED|MAP_FILE, tdb->fd, 0); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! @@ -371,12 +303,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad return -1; } - if (tdb_ftruncate(tdb, new_size) == -1) { + if (ftruncate(tdb->fd, new_size) == -1) { char b = 0; - ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); + ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = tdb_pwrite(tdb, &b, 1, new_size - 1); + written = pwrite(tdb->fd, &b, 1, new_size - 1); } if (written == 0) { /* again - give up, guessing errno */ @@ -396,10 +328,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad memset(buf, TDB_PAD_BYTE, sizeof(buf)); while (addition) { size_t n = addition>sizeof(buf)?sizeof(buf):addition; - ssize_t written = tdb_pwrite(tdb, buf, n, size); + ssize_t written = pwrite(tdb->fd, buf, n, size); if (written == 0) { /* prevent infinite loops: try _once_ more */ - written = tdb_pwrite(tdb, buf, n, size); + written = pwrite(tdb->fd, buf, n, size); } if (written == 0) { /* give up, trying to provide a useful errno */ diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c index 6644c40..486de79 100644 --- a/lib/tdb/common/lock.c +++ b/lib/tdb/common/lock.c @@ -38,15 +38,6 @@ static int fcntl_lock(struct tdb_context *tdb, struct flock fl; int cmd; -#ifdef USE_TDB_MUTEX_LOCKING - { - int ret; - if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) { - return ret; - } - } -#endif - fl.l_type = rw; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -119,15 +110,6 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) fclose(locks); #endif -#ifdef USE_TDB_MUTEX_LOCKING - { - int ret; - if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) { - return ret; - } - } -#endif - fl.l_type = F_UNLCK; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -266,27 +248,13 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb) return -1; } - if (tdb_have_mutexes(tdb)) { - ret = tdb_mutex_allrecord_upgrade(tdb); - if (ret == -1) { - goto fail; - } - ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size), - 0, TDB_LOCK_WAIT|TDB_LOCK_PROBE); - if (ret == -1) { - tdb_mutex_allrecord_downgrade(tdb); - } - } else { - ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, - TDB_LOCK_WAIT|TDB_LOCK_PROBE); - } - + ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, + TDB_LOCK_WAIT|TDB_LOCK_PROBE); if (ret == 0) { tdb->allrecord_lock.ltype = F_WRLCK; tdb->allrecord_lock.off = 0; return 0; } -fail: TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n")); return -1; } @@ -625,8 +593,6 @@ static int tdb_chainlock_gradual(struct tdb_context *tdb, int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, enum tdb_lock_flags flags, bool upgradable) { - int ret; - switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) { case -1: return -1; @@ -641,27 +607,16 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, * * It is (1) which cause the starvation problem, so we're only * gradual for that. */ - - if (tdb_have_mutexes(tdb)) { - ret = tdb_mutex_allrecord_lock(tdb, ltype, flags); - } else { - ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, - tdb->hash_size * 4); - } - - if (ret == -1) { + if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, + tdb->hash_size * 4) == -1) { return -1; } /* Grab individual record locks. */ if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0, flags) == -1) { - if (tdb_have_mutexes(tdb)) { - tdb_mutex_allrecord_unlock(tdb); - } else { - tdb_brunlock(tdb, ltype, FREELIST_TOP, - tdb->hash_size * 4); - } + tdb_brunlock(tdb, ltype, FREELIST_TOP, + tdb->hash_size * 4); return -1; } @@ -717,25 +672,9 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock) return 0; } - if (!mark_lock) { - int ret; - - if (tdb_have_mutexes(tdb)) { - ret = tdb_mutex_allrecord_unlock(tdb); - if (ret == 0) { - ret = tdb_brunlock(tdb, ltype, - lock_offset(tdb->hash_size), - 0); - } - } else { - ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0); - } - - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed " - "(%s)\n", strerror(errno))); - return -1; - } + if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); + return -1; } tdb->allrecord_lock.count = 0; diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c deleted file mode 100644 index c158b6c..0000000 --- a/lib/tdb/common/mutex.c +++ /dev/null @@ -1,902 +0,0 @@ -/* - Unix SMB/CIFS implementation. - - trivial database library - - Copyright (C) Volker Lendecke 2012,2013 - - ** NOTE! The following LGPL license applies to the tdb - ** library. This does NOT imply that all of Samba is released - ** under the LGPL - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . -*/ -#include "tdb_private.h" -#include "system/threads.h" - -#ifdef USE_TDB_MUTEX_LOCKING - -/* - * If we run with mutexes, we store the "struct tdb_mutexes" at the - * beginning of the file. We store an additional tdb_header right - * beyond the mutex area, page aligned. All the offsets within the tdb - * are relative to the area behind the mutex area. tdb->map_ptr points - * behind the mmap area as well, so the read and write path in the - * mutex case can remain unchanged. - * - * Early in the mutex development the mutexes were placed between the hash - * chain pointers and the real tdb data. This had two drawbacks: First, it - * made pointer calculations more complex. Second, we had to mmap the mutex - * area twice. One was the normal map_ptr in the tdb. This frequently changed - * from within tdb_oob. At least the Linux glibc robust mutex code assumes - * constant pointers in memory, so a constantly changing mmap area destroys - * the mutex list. So we had to mmap the first bytes of the file with a second - * mmap call. With that scheme, very weird errors happened that could be - * easily fixed by doing the mutex mmap in a second file. It seemed that - * mapping the same memory area twice does not end up in accessing the same - * physical page, looking at the mutexes in gdb it seemed that old data showed - * up after some re-mapping. To avoid a separate mutex file, the code now puts - * the real content of the tdb file after the mutex area. This way we do not - * have overlapping mmap areas, the mutex area is mmapped once and not - * changed, the tdb data area's mmap is constantly changed but does not - * overlap. - */ - -struct tdb_mutexes { - struct tdb_header hdr; - pthread_mutex_t allrecord_mutex; /* protect allrecord_lock */ - - short int allrecord_lock; /* F_UNLCK: free, - F_RDLCK: shared, - F_WRLCK: exclusive */ - - pthread_mutex_t hashchains[1]; /* We allocate more */ -}; - -bool tdb_have_mutexes(struct tdb_context *tdb) -{ - return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0); -} - -size_t tdb_mutex_size(struct tdb_context *tdb) -{ - size_t mutex_size; - - if (!tdb_have_mutexes(tdb)) { - return 0; - } - - mutex_size = sizeof(struct tdb_mutexes); - mutex_size += tdb->hash_size * sizeof(pthread_mutex_t); - - return TDB_ALIGN(mutex_size, tdb->page_size); -} - -/* - * Get the index for a chain mutex - */ -static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len, - unsigned *idx) -{ - /* - * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before - * the 4 bytes of the freelist start and the hash chain that is about - * to be locked. See lock_offset() where the freelist is -1 vs the - * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in - * the tdb file itself as data, we need to adjust the offset here. - */ - const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t); - - if (!tdb_have_mutexes(tdb)) { - return false; - } - if (len != 1) { - /* Possibly the allrecord lock */ - return false; - } - if (off < freelist_lock_ofs) { - /* One of the special locks */ - return false; - } - if (tdb->hash_size == 0) { - /* tdb not initialized yet, called from tdb_open_ex() */ - return false; - } - if (off >= TDB_DATA_START(tdb->hash_size)) { - /* Single record lock from traverses */ - return false; - } - - /* - * Now we know it's a freelist or hash chain lock. Those are always 4 - * byte aligned. Paranoia check. - */ - if ((off % sizeof(tdb_off_t)) != 0) { - abort(); - } - - /* - * Re-index the fcntl offset into an offset into the mutex array - */ - off -= freelist_lock_ofs; /* rebase to index 0 */ - off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */ - - *idx = off; - return true; -} - -static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag) -{ - int ret; - - if (waitflag) { - ret = pthread_mutex_lock(m); - } else { - ret = pthread_mutex_trylock(m); - } - if (ret != EOWNERDEAD) { - return ret; - } - - /* - * For chainlocks, we don't do any cleanup (yet?) - */ - return pthread_mutex_consistent(m); -} - -static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag) -{ - int ret; - - if (waitflag) { - ret = pthread_mutex_lock(&m->allrecord_mutex); - } else { - ret = pthread_mutex_trylock(&m->allrecord_mutex); - } - if (ret != EOWNERDEAD) { - return ret; - } - - /* - * The allrecord lock holder died. We need to reset the allrecord_lock - * to F_UNLCK. This should also be the indication for - * tdb_needs_recovery. - */ - m->allrecord_lock = F_UNLCK; - - return pthread_mutex_consistent(&m->allrecord_mutex); -} - -bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, - bool waitflag, int *pret) -{ - struct tdb_mutexes *m = tdb->mutexes; - pthread_mutex_t *chain; - int ret; - unsigned idx; - bool allrecord_ok; - - if (!tdb_mutex_index(tdb, off, len, &idx)) { - return false; - } - chain = &m->hashchains[idx]; - -again: - ret = chain_mutex_lock(chain, waitflag); - if (ret == EBUSY) { - ret = EAGAIN; - } - if (ret != 0) { - errno = ret; - goto fail; - } - - if (tdb->num_lockrecs > 0) { - /* - * We can only check the allrecord lock once. If we do it with - * one chain mutex locked, we will deadlock with the allrecord - * locker process in the following way: We lock the first hash - * chain, we check for the allrecord lock. We keep the hash - * chain locked. Then the allrecord locker comes and takes the - * allrecord lock. It walks the list of chain mutexes, locking - * them all in sequence. Meanwhile, we have the chain mutex - * locked, so the allrecord locker blocks trying to lock our - * chain mutex. Then we come in and try to lock the second - * chain lock, which in most cases will be the freelist. We - * see that the allrecord lock is locked and put ourselves on - * the allrecord_waiters condition variable. This will never - * be signalled though because the allrecord locker waits for - * us to give up the chain lock. - */ - - *pret = 0; - return true; - } - - /* - * Check if someone is has the allrecord lock: queue if so. - */ - - allrecord_ok = false; - - if (m->allrecord_lock == F_UNLCK) { - /* - * allrecord lock not taken - */ - allrecord_ok = true; - } - - if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) { - /* - * allrecord shared lock taken, but we only want to read - */ - allrecord_ok = true; - } - - if (allrecord_ok) { - *pret = 0; - return true; - } - - ret = pthread_mutex_unlock(chain); - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" - "(chain_mutex) failed: %s\n", strerror(ret))); - errno = ret; - goto fail; - } - ret = allrecord_mutex_lock(m, waitflag); - if (ret != 0) { - if (waitflag || (ret != EBUSY)) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock" - "(allrecord_mutex) failed: %s\n", - waitflag ? "" : "try_", strerror(ret))); - } - errno = EAGAIN; - goto fail; - } - ret = pthread_mutex_unlock(&m->allrecord_mutex); - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" - "(allrecord_mutex) failed: %s\n", strerror(ret))); - errno = ret; - goto fail; - } - goto again; - -fail: - *pret = -1; - return true; -} - -bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, - int *pret) -{ - struct tdb_mutexes *m = tdb->mutexes; - pthread_mutex_t *chain; - int ret; - unsigned idx; - - if (!tdb_mutex_index(tdb, off, len, &idx)) { - return false; - } - chain = &m->hashchains[idx]; - - ret = pthread_mutex_unlock(chain); - if (ret == 0) { - *pret = 0; - return true; - } - *pret = -1; - return true; -} - -int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, - enum tdb_lock_flags flags) -{ - struct tdb_mutexes *m = tdb->mutexes; - int ret; - uint32_t i; - - if (tdb->flags & TDB_NOLOCK) { - return 0; - } - - if (flags & TDB_LOCK_MARK_ONLY) { - return 0; - } - - ret = allrecord_mutex_lock(m, flags & TDB_LOCK_WAIT); - if (ret != 0) { - if (!(flags & TDB_LOCK_PROBE)) { - TDB_LOG((tdb, TDB_DEBUG_TRACE, "pthread_mutex_trylock " - "failed: %s\n", strerror(ret))); - } - return ret; - } - - if (m->allrecord_lock != F_UNLCK) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", - (int)m->allrecord_lock)); - goto fail_unlock_allrecord_mutex; - } - m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK; - - for (i=0; ihash_size; i++) { - - /* ignore hashchains[0], the freelist */ - pthread_mutex_t *chain = &m->hashchains[i+1]; - - ret = chain_mutex_lock(chain, flags & TDB_LOCK_WAIT); - if (ret != 0) { - if (!(flags & TDB_LOCK_PROBE)) { - TDB_LOG((tdb, TDB_DEBUG_TRACE, - "pthread_mutex_trylock " - "failed: %s\n", strerror(ret))); - } - goto fail_unroll_allrecord_lock; - } - - ret = pthread_mutex_unlock(chain); - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" - "(chainlock) failed: %s\n", strerror(ret))); - goto fail_unroll_allrecord_lock; - } - } - /* - * We leave this routine with m->allrecord_mutex locked - */ - return 0; - -fail_unroll_allrecord_lock: - m->allrecord_lock = F_UNLCK; - -fail_unlock_allrecord_mutex: - ret = pthread_mutex_unlock(&m->allrecord_mutex); - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" - "(allrecord_mutex) failed: %s\n", strerror(ret))); - } - return -1; -} - -int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) -{ - struct tdb_mutexes *m = tdb->mutexes; - int ret; - uint32_t i; - - if (tdb->flags & TDB_NOLOCK) { - return 0; - } - - if (m->allrecord_lock != F_RDLCK) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", - (int)m->allrecord_lock)); - return -1; - } - - m->allrecord_lock = F_WRLCK; - - for (i=0; ihash_size; i++) { - - /* ignore hashchains[0], the freelist */ - pthread_mutex_t *chain = &m->hashchains[i+1]; - - ret = chain_mutex_lock(chain, true); - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock" - "(chainlock) failed: %s\n", strerror(ret))); - goto fail_unroll_allrecord_lock; - } - - ret = pthread_mutex_unlock(chain); - if (ret != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" - "(chainlock) failed: %s\n", strerror(ret))); - return -1; - } - } - return 0; - -fail_unroll_allrecord_lock: - m->allrecord_lock = F_RDLCK; - return -1; -} - -int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) -{ - struct tdb_mutexes *m = tdb->mutexes; - - if (m->allrecord_lock != F_WRLCK) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", - (int)m->allrecord_lock)); - return -1; - } - - m->allrecord_lock = F_RDLCK; - return 0; -} - - -int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) -{ - struct tdb_mutexes *m = tdb->mutexes; - short old; - int ret; - - if (tdb->flags & TDB_NOLOCK) { - return 0; - } - - if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", - (int)m->allrecord_lock)); - return -1; - } - - old = m->allrecord_lock; - m->allrecord_lock = F_UNLCK; - - ret = pthread_mutex_unlock(&m->allrecord_mutex); - if (ret != 0) { - m->allrecord_lock = old; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" - "(allrecord_mutex) failed: %s\n", strerror(ret))); - return -1; - } - return 0; -} - -int tdb_mutex_init(struct tdb_context *tdb) -{ - struct tdb_mutexes *m; - pthread_mutexattr_t ma; - int i, ret; - - ret = tdb_mutex_mmap(tdb); - if (ret == -1) { - return errno; - } - m = tdb->mutexes; - - ret = pthread_mutexattr_init(&ma); - if (ret != 0) { - goto fail_munmap; - } - ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); - if (ret != 0) { - goto fail; - } - ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); - if (ret != 0) { - goto fail; - } - ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); - if (ret != 0) { - goto fail; - } - - for (i=0; ihash_size+1; i++) { - pthread_mutex_t *chain = &m->hashchains[i]; - - ret = pthread_mutex_init(chain, &ma); - if (ret != 0) { - goto fail; - } - } - - m->allrecord_lock = F_UNLCK; - - ret = pthread_mutex_init(&m->allrecord_mutex, &ma); - if (ret != 0) { - goto fail; - } - ret = 0; -fail: - pthread_mutexattr_destroy(&ma); -fail_munmap: - tdb_mutex_munmap(tdb); - return ret; -} - -int tdb_mutex_mmap(struct tdb_context *tdb) -{ - size_t len; - void *ptr; - - len = tdb_mutex_size(tdb); - if (len == 0) { - return 0; - } - - ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, - tdb->fd, 0); - - if (ptr == MAP_FAILED) { - return -1; - } - tdb->mutexes = (struct tdb_mutexes *)ptr; - return 0; -} - -int tdb_mutex_munmap(struct tdb_context *tdb) -{ - size_t len; - - len = tdb_mutex_size(tdb); - if (len == 0) { - return 0; - } - - return munmap(tdb->mutexes, len); -} - -static bool tdb_mutex_locking_cached; - -bool tdb_mutex_locking_supported(void) -{ - pthread_mutexattr_t ma; - pthread_mutex_t m; - int ret; - static bool initialized; - - if (initialized) { - return tdb_mutex_locking_cached; - } - - initialized = true; - - ret = pthread_mutexattr_init(&ma); - if (ret != 0) { - return false; - } - ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutex_init(&m, &ma); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutex_lock(&m); - if (ret != 0) { - goto cleanup_m; - } - /* - * This makes sure we have real mutexes - * from a threading library instead of just - * stubs from libc. - */ - ret = pthread_mutex_lock(&m); - if (ret != EDEADLK) { - goto cleanup_lock; - } - ret = pthread_mutex_unlock(&m); - if (ret != 0) { - goto cleanup_m; - } - - tdb_mutex_locking_cached = true; - goto cleanup_m; - -cleanup_lock: - pthread_mutex_unlock(&m); -cleanup_m: - pthread_mutex_destroy(&m); -cleanup_ma: - pthread_mutexattr_destroy(&ma); - return tdb_mutex_locking_cached; -} - -static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR; -static pid_t tdb_robust_mutex_pid = -1; - -static void tdb_robust_mutex_handler(int sig) -{ - if (tdb_robust_mutex_pid != -1) { - pid_t pid; - int status; - - pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG); - if (pid == tdb_robust_mutex_pid) { - tdb_robust_mutex_pid = -1; - return; - } - } - - if (tdb_robust_mutext_old_handler == SIG_DFL) { - return; - } - - if (tdb_robust_mutext_old_handler == SIG_IGN) { - return; - } - if (tdb_robust_mutext_old_handler == SIG_ERR) { - return; - } - - tdb_robust_mutext_old_handler(sig); -} - -_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) -{ - void *ptr; - pthread_mutex_t *m; - pthread_mutexattr_t ma; - int ret = 1; - int pipe_down[2] = { -1, -1 }; - int pipe_up[2] = { -1, -1 }; - ssize_t nread; - char c = 0; - bool ok; - int status; - static bool initialized; - - if (initialized) { - return tdb_mutex_locking_cached; - } - - initialized = true; - - ok = tdb_mutex_locking_supported(); - if (!ok) { - return false; - } - - tdb_mutex_locking_cached = false; - - ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, - MAP_SHARED|MAP_ANON, -1 /* fd */, 0); - if (ptr == MAP_FAILED) { - return false; - } - m = (pthread_mutex_t *)ptr; - - ret = pipe(pipe_down); - if (ret != 0) { - goto cleanup_mmap; - } - ret = pipe(pipe_up); - if (ret != 0) { - goto cleanup_pipe; - } - - ret = pthread_mutexattr_init(&ma); - if (ret != 0) { - goto cleanup_pipe; - } - ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); - if (ret != 0) { - goto cleanup_ma; - } - ret = pthread_mutex_init(m, &ma); - if (ret != 0) { - goto cleanup_ma; - } - - tdb_robust_mutext_old_handler = signal(SIGCHLD, - tdb_robust_mutex_handler); - - tdb_robust_mutex_pid = fork(); - if (tdb_robust_mutex_pid == 0) { - size_t nwritten; - close(pipe_down[1]); - close(pipe_up[0]); - ret = pthread_mutex_lock(m); - nwritten = write(pipe_up[1], &ret, sizeof(ret)); - if (nwritten != sizeof(ret)) { - exit(1); - } - if (ret != 0) { - exit(1); - } - nread = read(pipe_down[0], &c, 1); - if (nread != 1) { - exit(1); - } - /* leave locked */ - exit(0); - } - if (tdb_robust_mutex_pid == -1) { - goto cleanup_sig_child; - } - close(pipe_down[0]); - pipe_down[0] = -1; - close(pipe_up[1]); - pipe_up[1] = -1; - - nread = read(pipe_up[0], &ret, sizeof(ret)); - if (nread != sizeof(ret)) { - goto cleanup_child; - } - - ret = pthread_mutex_trylock(m); - if (ret != EBUSY) { - if (ret == 0) { - pthread_mutex_unlock(m); - } - goto cleanup_child; - } - - if (write(pipe_down[1], &c, 1) != 1) { - goto cleanup_child; - } - - nread = read(pipe_up[0], &c, 1); - if (nread != 0) { - goto cleanup_child; - } - - while (tdb_robust_mutex_pid > 0) { - pid_t pid; - - errno = 0; - pid = waitpid(tdb_robust_mutex_pid, &status, 0); - if (pid == tdb_robust_mutex_pid) { - tdb_robust_mutex_pid = -1; - break; - } - if (pid == -1 && errno != EINTR) { - goto cleanup_child; - } - } - signal(SIGCHLD, tdb_robust_mutext_old_handler); - - ret = pthread_mutex_trylock(m); - if (ret != EOWNERDEAD) { - if (ret == 0) { - pthread_mutex_unlock(m); - } - goto cleanup_m; - } - - ret = pthread_mutex_consistent(m); - if (ret != 0) { - goto cleanup_m; - } - - ret = pthread_mutex_trylock(m); - if (ret != EDEADLK) { - pthread_mutex_unlock(m); - goto cleanup_m; - } - - ret = pthread_mutex_unlock(m); - if (ret != 0) { - goto cleanup_m; - } - - tdb_mutex_locking_cached = true; - goto cleanup_m; - -cleanup_child: - while (tdb_robust_mutex_pid > 0) { - pid_t pid; - - kill(tdb_robust_mutex_pid, SIGKILL); - - errno = 0; - pid = waitpid(tdb_robust_mutex_pid, &status, 0); - if (pid == tdb_robust_mutex_pid) { - tdb_robust_mutex_pid = -1; - break; - } - if (pid == -1 && errno != EINTR) { - break; - } - } -cleanup_sig_child: - signal(SIGCHLD, tdb_robust_mutext_old_handler); -cleanup_m: - pthread_mutex_destroy(m); -cleanup_ma: - pthread_mutexattr_destroy(&ma); -cleanup_pipe: - if (pipe_down[0] != -1) { - close(pipe_down[0]); - } - if (pipe_down[1] != -1) { - close(pipe_down[1]); - } - if (pipe_up[0] != -1) { - close(pipe_up[0]); - } - if (pipe_up[1] != -1) { - close(pipe_up[1]); - } -cleanup_mmap: - munmap(ptr, sizeof(pthread_mutex_t)); - - return tdb_mutex_locking_cached; -} - -#else - -size_t tdb_mutex_size(struct tdb_context *tdb) -{ - return 0; -} - -bool tdb_have_mutexes(struct tdb_context *tdb) -{ - return false; -} - -int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, - enum tdb_lock_flags flags) -{ - return -1; -} - -int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) -{ - return -1; -} - -int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) -{ - return -1; -} - -int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) -{ - return -1; -} - -int tdb_mutex_mmap(struct tdb_context *tdb) -{ - errno = ENOSYS; - return -1; -} - -int tdb_mutex_munmap(struct tdb_context *tdb) -{ - errno = ENOSYS; - return -1; -} - -int tdb_mutex_init(struct tdb_context *tdb) -{ - return ENOSYS; -} - -bool tdb_mutex_locking_supported(void) -{ - return false; -} - -_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) -{ - return false; -} - -#endif diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 13d22e1..789bc73 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -76,16 +76,6 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, if (tdb->flags & TDB_INCOMPATIBLE_HASH) newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; - /* - * For the mutex code we add the FEATURE_FLAG_MAGIC, overwriting the - * TDB_HASH_RWLOCK_MAGIC above. - */ - if ((tdb->flags & TDB_MUTEX_LOCKING) && - tdb_mutex_locking_supported()) { - newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC; - newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX; - } - if (tdb->flags & TDB_INTERNAL) { tdb->map_size = size; tdb->map_ptr = (char *)newdb; @@ -102,55 +92,13 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, /* This creates an endian-converted header, as if read from disk */ CONVERT(*newdb); + memcpy(header, newdb, sizeof(*header)); /* Don't endian-convert the magic food! */ memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); if (!tdb_write_all(tdb->fd, newdb, size)) goto fail; - if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { - - tdb->feature_flags = newdb->feature_flags; - tdb->hash_size = newdb->hash_size; - - newdb->mutex_size = tdb_mutex_size(tdb); - - /* - * Overwrite newdb.mutex_size - */ - ret = lseek(tdb->fd, 0, SEEK_SET); - if (ret == -1) { - goto fail; - } - if (!tdb_write_all(tdb->fd, newdb, size)) { - goto fail; - } - - ret = ftruncate( - tdb->fd, - newdb->mutex_size + sizeof(struct tdb_header)); - if (ret == -1) { - goto fail; - } - ret = tdb_mutex_init(tdb); - if (ret == -1) { - goto fail; - } - - /* - * Write a second header behind the mutexes. That's the area - * that will be mmapp'ed. - */ - ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET); - if (ret == -1) { - goto fail; - } - if (!tdb_write_all(tdb->fd, newdb, size)) { - goto fail; - } - } - - memcpy(header, newdb, sizeof(*header)); ret = 0; fail: SAFE_FREE(newdb); @@ -217,68 +165,6 @@ static bool check_header_hash(struct tdb_context *tdb, return check_header_hash(tdb, header, false, m1, m2); } -static bool tdb_mutex_open_ok(struct tdb_context *tdb) -{ - int locked; - - if (tdb->flags & TDB_NOMMAP) { - /* - * We need to mmap the mutex area - */ - TDB_LOG((tdb, TDB_DEBUG_ERROR, "Can not open a tdb with " - "mutexes without mmap\n")); - return false; - } - - locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, - TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); - - if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) { - /* - * CLEAR_IF_FIRST still active. The tdb was created on this - * host, so we can assume the mutex implementation is - * compatible. Important for tools like tdbdump on a still - * open locking.tdb. - */ - return true; - } - - /* - * We got the CLEAR_IF_FIRST lock. That means the database was - * potentially copied from somewhere else. The mutex implementation - * might be incompatible. - */ - - if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { - /* - * Should not happen - */ - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: " - "failed to release ACTIVE_LOCK on %s: %s\n", - tdb->name, strerror(errno))); - return false; - } - - if (tdb->flags & TDB_NOLOCK) { - /* - * We don't look at locks, so it does not matter to have a - * compatible mutex implementation. Allow the open. - */ - return true; - } - - if (tdb->flags & TDB_CLEAR_IF_FIRST) { - /* - * About to create the db here. - */ - return true; - } - - TDB_LOG((tdb, TDB_DEBUG_ERROR, "Can use mutexes only with " - "CLEAR_IF_FIRST or NOLOCK\n")); - return false; -} - _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int open_flags, mode_t mode, const struct tdb_logging_context *log_ctx, @@ -294,7 +180,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td unsigned v; const char *hash_alg; uint32_t magic1, magic2; - int ret; ZERO_STRUCT(header); @@ -441,6 +326,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td if ((tdb_flags & TDB_CLEAR_IF_FIRST) && (!tdb->read_only) && (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { + int ret; ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT); if (ret == -1) { @@ -504,29 +390,12 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td goto fail; if (header.rwlocks != 0 && - header.rwlocks != TDB_FEATURE_FLAG_MAGIC && header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); goto fail; } tdb->hash_size = header.hash_size; - if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) { - tdb->feature_flags = header.feature_flags; - } - - if (tdb_mutex_size(tdb) != header.mutex_size) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "Mutex size changed\n")); - errno = EINVAL; - goto fail; - } - - if ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && - !tdb_mutex_open_ok(tdb)) { - errno = EINVAL; - goto fail; - } - if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { /* older TDB without magic hash references */ tdb->hash_fn = tdb_old_hash; @@ -557,52 +426,19 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td } /* Beware truncation! */ - - { - uint32_t map_size = st.st_size; - if (map_size != st.st_size) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " - "len %llu too large!\n", - (long long)st.st_size)); - errno = EIO; - goto fail; - } + tdb->map_size = st.st_size; + if (tdb->map_size != st.st_size) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "len %llu too large!\n", (long long)st.st_size)); + errno = EIO; + goto fail; } tdb->device = st.st_dev; tdb->inode = st.st_ino; - - /* - * We had tdb_mmap(tdb) here before, - * but for the mutex case we have a modified tdb_fstat() - * which is triggered from tdb_oob() before calling tdb_mmap(). - */ - tdb->map_size = 0; - ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); - if (ret == -1) { - goto fail; - } - - if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { - if (!tdb_mutex_locking_supported()) { - /* - * Database was created with mutex locking, - * but we don't support it. - */ - errno = EINVAL; - goto fail; - } - - if (!(tdb->flags & TDB_NOLOCK)) { - ret = tdb_mutex_mmap(tdb); - if (ret != 0) { - goto fail; - } - } - } - + tdb_mmap(tdb); if (locked) { if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " @@ -713,9 +549,6 @@ _PUBLIC_ int tdb_close(struct tdb_context *tdb) else tdb_munmap(tdb); } - - tdb_mutex_munmap(tdb); - SAFE_FREE(tdb->name); if (tdb->fd != -1) { ret = close(tdb->fd); @@ -795,13 +628,7 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); goto fail; } - /* - * We had tdb_mmap(tdb) here before, - * but for the mutex case we have a modified tdb_fstat() - * which is triggered from tdb_oob() before calling tdb_mmap(). - */ - tdb->map_size = 0; - if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { + if (tdb_mmap(tdb) != 0) { goto fail; } #endif /* fake pread or pwrite */ diff --git a/lib/tdb/common/tdb.c b/lib/tdb/common/tdb.c index ae98c96..ebd4ffe 100644 --- a/lib/tdb/common/tdb.c +++ b/lib/tdb/common/tdb.c @@ -723,15 +723,6 @@ _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) return; } - if ((flags & TDB_NOLOCK) && - (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && - (tdb->mutexes == NULL)) { - tdb->ecode = TDB_ERR_LOCK; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " - "Can not remove NOLOCK flag on mutexed databases")); - return; - } - if (flags & TDB_ALLOW_NESTING) { tdb->flags |= TDB_DISALLOW_NESTING; } diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index 9790552..a672159 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -53,7 +53,6 @@ typedef uint32_t tdb_off_t; #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) #define TDB_RECOVERY_INVALID_MAGIC (0x0) #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) -#define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U) #define TDB_ALIGNMENT 4 #define DEFAULT_HASH_SIZE 131 #define FREELIST_TOP (sizeof(struct tdb_header)) @@ -69,8 +68,6 @@ typedef uint32_t tdb_off_t; #define TDB_PAD_BYTE 0x42 #define TDB_PAD_U32 0x42424242 -#define TDB_FEATURE_FLAG_MUTEX 1 - /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */ @@ -155,9 +152,7 @@ struct tdb_header { tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ uint32_t magic2_hash; /* hash of TDB_MAGIC. */ - uint32_t feature_flags; - uint32_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */ - tdb_off_t reserved[25]; + tdb_off_t reserved[27]; }; struct tdb_lock_type { @@ -191,8 +186,6 @@ struct tdb_methods { int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); }; -struct tdb_mutexes; - struct tdb_context { char *name; /* the name of the database */ void *map_ptr; /* where it is currently mapped */ @@ -205,12 +198,8 @@ struct tdb_context { int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ int lockrecs_array_length; - - struct tdb_mutexes *mutexes; /* mmap of the mutex area */ - enum TDB_ERROR ecode; /* error code for last tdb error */ uint32_t hash_size; - uint32_t feature_flags; uint32_t flags; /* the flags passed to tdb_open */ struct tdb_traverse_lock travlocks; /* current traversal locks */ struct tdb_context *next; /* all tdbs to avoid multiple opens */ @@ -303,21 +292,4 @@ bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret); /* tdb_off_t and tdb_len_t right now are both uint32_t */ #define tdb_add_len_t tdb_add_off_t - -bool tdb_mutex_locking_supported(void); -size_t tdb_mutex_size(struct tdb_context *tdb); -bool tdb_have_mutexes(struct tdb_context *tdb); -int tdb_mutex_init(struct tdb_context *tdb); -int tdb_mutex_mmap(struct tdb_context *tdb); -int tdb_mutex_munmap(struct tdb_context *tdb); -bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, - bool waitflag, int *pret); -bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, - int *pret); -int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, - enum tdb_lock_flags flags); -int tdb_mutex_allrecord_unlock(struct tdb_context *tdb); -int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb); -int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb); - #endif /* TDB_PRIVATE_H */ diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c index caef0be..a2c3bbd 100644 --- a/lib/tdb/common/transaction.c +++ b/lib/tdb/common/transaction.c @@ -421,8 +421,7 @@ static int _tdb_transaction_start(struct tdb_context *tdb, enum tdb_lock_flags lockflags) { /* some sanity checks */ - if (tdb->read_only || (tdb->flags & (TDB_INTERNAL|TDB_MUTEX_LOCKING)) - || tdb->traverse_read) { + if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); tdb->ecode = TDB_ERR_EINVAL; return -1; diff --git a/lib/tdb/docs/mutex.txt b/lib/tdb/docs/mutex.txt deleted file mode 100644 index 2597987..0000000 --- a/lib/tdb/docs/mutex.txt +++ /dev/null @@ -1,106 +0,0 @@ -Tdb is a hashtable database with multiple concurrent writer and external -record lock support. For speed reasons, wherever possible tdb uses a shared -memory mapped area for data access. In its currently released form, it uses -fcntl byte-range locks to coordinate access to the data itself. - -The tdb data is organized as a hashtable. Hash collision are dealt with by -forming a linked list of records that share a hash value. The individual -linked lists are protected across processes with 1-byte fcntl locks on the -starting pointer of the linked list representing a hash value. - -The external locking API of tdb allows to lock individual records. Instead of -really locking individual records, the tdb API locks a complete linked list -with a fcntl lock. - -The external locking API of tdb also allows to lock the complete database, and -ctdb uses this facility to freeze databases during a recovery. While the -so-called allrecord lock is held, all linked lists and all individual records -are frozen alltogether. Tdb achieves this by locking the complete file range -with a single fcntl lock. Individual 1-byte locks for the linked lists -conflict with this. Access to records is prevented by the one large fnctl byte -range lock. - -Fcntl locks have been chosen for tdb for two reasons: First they are portable -across all current unixes. Secondly they provide auto-cleanup. If a process -dies while holding a fcntl lock, the lock is given up as if it was explicitly -unlocked. Thus fcntl locks provide a very robust locking scheme, if a process -dies for any reason the database will not stay blocked until reboot. This -robustness is very important for long-running services, a reboot is not an -option for most users of tdb. - -Unfortunately during stress testing fcntl locks have turned out to be a major -problem for performance. The particular problem that was seen happens when -ctdb on a busy server does a recovery. A recovery means that ctdb has to -freeze all tdb databases for some time, usually a few seconds. This is done -with the allrecord lock. During the recovery phase on a busy server many smbd -processes try to access the tdb file with blocking fcntl calls. The specific -test in question easily reproduces 7,000 processes piling up waiting for -1-byte fcntl locks. When ctdb is done with the recovery, it gives up the -allrecord lock, covering the whole file range. All 7,000 processes waiting for -1-byte fcntl locks are woken up, trying to acquire their lock. The special -implementation of fcntl locks in Linux (up to 2013-02-12 at least) protects -all fcntl lock operations with a single system-wide spinlock. If 7,000 process -waiting for the allrecord lock to become released this leads to a thundering -herd condition, all CPUs are spinning on that single spinlock. - -Functionally the kernel is fine, eventually the thundering herd slows down and -every process correctly gets his share and locking range, but the performance -of the system while the herd is active is worse than expected. - -The thundering herd is only the worst case scenario for fcntl lock use. The -single spinlock for fcntl operations is also a performance penalty for normal -operations. In the cluster case, every read and write SMB request has to do -two fcntl calls to provide correct SMB mandatory locks. The single spinlock -is one source of serialization for the SMB read/write requests, limiting the -parallelism that can be achieved in a multi-core system. - -While trying to tune his servers, Ira Cooper, Samba Team member, found fcntl -locks to be a problem on Solaris as well. Ira pointed out that there is a -potential alternative locking mechanism that might be more scalable: Process -shared robust mutexes, as defined by Posix 2008 for example via - -http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setpshared.html -http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setrobust.html - -Pthread mutexes provide one of the core mechanisms in posix threads to protect -in-process data structures from concurrent access by multiple threads. In the -Linux implementation, a pthread_mutex_t is represented by a data structure in -user space that requires no kernel calls in the uncontended case for locking -and unlocking. Locking and unlocking in the uncontended case is implemented -purely in user space with atomic CPU instructions and thus are very fast. - -The setpshared functions indicate to the kernel that the mutex is about to be -shared between processes in a common shared memory area. - -The process shared posix mutexes have the potential to replace fcntl locking -to coordinate mmap access for tdbs. However, they are missing the criticial -auto-cleanup property that fcntl provides when a process dies. A process that -dies hard while holding a shared mutex has no chance to clean up the protected -data structures and unlock the shared mutex. Thus with a pure process shared -mutex the mutex will remain locked forever until the data structures are -re-initialized from scratch. - -With the robust mutexes defined by Posix the process shared mutexes have been -extended with a limited auto-cleanup property. If a mutex has been declared -robust, when a process exits while holding that mutex, the next process trying -to lock the mutex will get the special error message EOWNERDEAD. This informs -the caller that the data structures the mutex protects are potentially corrupt -and need to be cleaned up. - -The error message EOWNERDEAD when trying to lock a mutex is an extension over -the fcntl functionality. A process that does a blocking fcntl lock call is not -informed about whether the lock was explicitly freed by a process still alive -or due to an unplanned process exit. At the time of this writing (February -2013), at least Linux and OpenSolaris also implement the robustness feature of -process-shared mutexes. - -Converting the tdb locking mechanism from fcntl to mutexes has to take care of -both types of locks that are used on tdb files. - -The easy part is to use mutexes to replace the 1-byte linked list locks -covering the individual hashes. Those can be represented by a mutex each. - -Covering the allrecord lock is more difficult. The allrecord lock uses a fcntl -lock spanning all hash list locks simultaneously. This basic functionality is -not easily possible with mutexes. A mutex carries 1 bit of information, a -fcntl lock can carry an arbitrary amount of information. diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h index 15c800e..a34f089 100644 --- a/lib/tdb/include/tdb.h +++ b/lib/tdb/include/tdb.h @@ -80,9 +80,6 @@ extern "C" { #define TDB_ALLOW_NESTING 512 /** Allow transactions to nest */ #define TDB_DISALLOW_NESTING 1024 /** Disallow transactions to nest */ #define TDB_INCOMPATIBLE_HASH 2048 /** Better hashing: can't be opened by tdb < 1.2.6. */ -#define TDB_MUTEX_LOCKING 4096 /** optimized locking using robust mutexes if supported, - only with tdb >= 1.3.0 and - TDB_CLEAR_IF_FIRST or TDB_NOLOCK */ /** The tdb error codes */ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, @@ -146,11 +143,6 @@ struct tdb_logging_context { * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n - * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n - * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, - * can't be opened by tdb < 1.3.0. - * Only valid in combination with TDB_CLEAR_IF_FIRST - * or TDB_NOLOCK\n * * @param[in] open_flags Flags for the open(2) function. * @@ -187,11 +179,6 @@ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n - * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n - * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, - * can't be opened by tdb < 1.3.0. - * Only valid in combination with TDB_CLEAR_IF_FIRST - * or TDB_NOLOCK\n * * @param[in] open_flags Flags for the open(2) function. * @@ -855,25 +842,6 @@ int tdb_rescue(struct tdb_context *tdb, void (*walk) (TDB_DATA key, TDB_DATA data, void *private_data), void *private_data); -/** - * @brief Check if if support for TDB_MUTEX_LOCKING is available at runtime. - * - * The feature behind TDB_MUTEX_LOCKING is available on all systems. - * On some systems the API for pthread_mutexattr_setrobust() is not available. - * On other systems there are some bugs in the interaction between glibc and - * the linux kernel. - * - * This function provides a runtime check if robust mutexes are really - * available. - * - * @note This calls fork(), but the SIGCHILD handling should be transparent. - * - * @return true if supported, false otherwise. - * - * @see TDB_MUTEX_LOCKING - */ -bool tdb_runtime_check_for_robust_mutexes(void); - /* @} ******************************************************************/ /* Low level locking functions: use with care */ diff --git a/lib/tdb/test/lock-tracking.c b/lib/tdb/test/lock-tracking.c index 3fe20a5..b2f092c 100644 --- a/lib/tdb/test/lock-tracking.c +++ b/lib/tdb/test/lock-tracking.c @@ -84,37 +84,47 @@ int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ) } } else { struct testlock *new, *i; - unsigned int fl_end = fl->l_start + fl->l_len - 1; + unsigned int fl_end = fl->l_start + fl->l_len; if (fl->l_len == 0) fl_end = (unsigned int)-1; /* Check for overlaps: we shouldn't do this. */ for (i = testlocks; i; i = i->next) { - unsigned int i_end = i->off + i->len - 1; + unsigned int i_end = i->off + i->len; if (i->len == 0) i_end = (unsigned int)-1; - /* Upgrade a lock */ - if (i->type == F_RDLCK && fl->l_type == F_WRLCK - && i->off == fl->l_start - && i->len == fl->l_len) { - if (ret == 0) - i->type = F_WRLCK; - goto done; - } - if (fl->l_start >= i->off && fl->l_start < i_end) break; if (fl_end >= i->off && fl_end < i_end) break; + + /* tdb_allrecord_lock does this, handle adjacent: */ + if (fl->l_start == i_end && fl->l_type == i->type) { + if (ret == 0) { + i->len = fl->l_len + ? i->len + fl->l_len + : 0; + } + goto done; + } } if (i) { + /* Special case: upgrade of allrecord lock. */ + if (i->type == F_RDLCK && fl->l_type == F_WRLCK + && i->off == FREELIST_TOP + && fl->l_start == FREELIST_TOP + && i->len == 0 + && fl->l_len == 0) { + if (ret == 0) + i->type = F_WRLCK; + goto done; + } if (!suppress_lockcheck) { diag("%s testlock %u@%u overlaps %u@%u", fl->l_type == F_WRLCK ? "write" : "read", (int)fl->l_len, (int)fl->l_start, i->len, (int)i->off); - fflush(stdout); locking_errors++; } } diff --git a/lib/tdb/test/logging.c b/lib/tdb/test/logging.c index c79379c..dfab486 100644 --- a/lib/tdb/test/logging.c +++ b/lib/tdb/test/logging.c @@ -24,6 +24,9 @@ static void taplog(struct tdb_context *tdb, va_end(ap); /* Strip trailing \n: diag adds it. */ + if (line[0] && line[strlen(line)-1] == '\n') + diag("%s%.*s", log_prefix, (unsigned)strlen(line)-1, line); + else diag("%s%s", log_prefix, line); } diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c index 748c972..67fd54f 100644 --- a/lib/tdb/test/run-3G-file.c +++ b/lib/tdb/test/run-3G-file.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" @@ -23,12 +22,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb, return -1; } - if (tdb_ftruncate(tdb, size+addition) == -1) { + if (ftruncate(tdb->fd, size+addition) == -1) { char b = 0; - ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); + ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); + written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); } if (written == 0) { /* again - give up, guessing errno */ diff --git a/lib/tdb/test/run-bad-tdb-header.c b/lib/tdb/test/run-bad-tdb-header.c index 9d29fdf..b00fb89 100644 --- a/lib/tdb/test/run-bad-tdb-header.c +++ b/lib/tdb/test/run-bad-tdb-header.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-check.c b/lib/tdb/test/run-check.c index ce389a2..b275691 100644 --- a/lib/tdb/test/run-check.c +++ b/lib/tdb/test/run-check.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-corrupt.c b/lib/tdb/test/run-corrupt.c index e6fc751..93eae42 100644 --- a/lib/tdb/test/run-corrupt.c +++ b/lib/tdb/test/run-corrupt.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-die-during-transaction.c b/lib/tdb/test/run-die-during-transaction.c index 2392f64..9b90415 100644 --- a/lib/tdb/test/run-die-during-transaction.c +++ b/lib/tdb/test/run-die-during-transaction.c @@ -19,7 +19,6 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include @@ -224,8 +223,7 @@ int main(int argc, char *argv[]) agent = prepare_external_agent(); for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { - diag("Testing %s after death\n", operation_name(ops[i])); - fflush(stdout); + diag("Testing %s after death", operation_name(ops[i])); ok1(test_death(ops[i], agent)); } diff --git a/lib/tdb/test/run-endian.c b/lib/tdb/test/run-endian.c index 9d4d5f5..3116f7d 100644 --- a/lib/tdb/test/run-endian.c +++ b/lib/tdb/test/run-endian.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-incompatible.c b/lib/tdb/test/run-incompatible.c index b8e95b5..af01ca6 100644 --- a/lib/tdb/test/run-incompatible.c +++ b/lib/tdb/test/run-incompatible.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-mutex-allrecord-bench.c b/lib/tdb/test/run-mutex-allrecord-bench.c deleted file mode 100644 index 5b816b3..0000000 --- a/lib/tdb/test/run-mutex-allrecord-bench.c +++ /dev/null @@ -1,75 +0,0 @@ -#include "../common/tdb_private.h" -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include -#include - -static TDB_DATA key, data; - -static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, - const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -static double timeval_elapsed2(const struct timeval *tv1, const struct timeval *tv2) -{ - return (tv2->tv_sec - tv1->tv_sec) + - (tv2->tv_usec - tv1->tv_usec)*1.0e-6; -} - -static double timeval_elapsed(const struct timeval *tv) -{ - struct timeval tv2; - gettimeofday(&tv2, NULL); - return timeval_elapsed2(tv, &tv2); -} - -/* The code should barf on TDBs created with rwlocks. */ -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret; - struct timeval start; - double elapsed; - - key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dsize = strlen("world"); - data.dptr = (void *)"world"; - - tdb = tdb_open_ex("mutex-allrecord-bench.tdb", 1000000, - TDB_INCOMPATIBLE_HASH| - TDB_MUTEX_LOCKING| - TDB_CLEAR_IF_FIRST, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - gettimeofday(&start, NULL); - ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); - elapsed = timeval_elapsed(&start); - - ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - - printf("allrecord_lock took %f seconds\n", elapsed); - - return exit_status(); -} diff --git a/lib/tdb/test/run-mutex-allrecord-block.c b/lib/tdb/test/run-mutex-allrecord-block.c deleted file mode 100644 index 52ead07b..0000000 --- a/lib/tdb/test/run-mutex-allrecord-block.c +++ /dev/null @@ -1,101 +0,0 @@ -#include "../common/tdb_private.h" -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include -#include - -static TDB_DATA key, data; - -static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, - const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -static int do_child(int tdb_flags, int fd) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret; - char c = 0; - - tdb = tdb_open_ex("mutex-allrecord-block.tdb", 3, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); - ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - - write(fd, &c, sizeof(c)); - - system("/bin/sleep 99999"); - - ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); - ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); - - return 0; -} - -/* The code should barf on TDBs created with rwlocks. */ -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret, status; - pid_t child, wait_ret; - int pipefds[2]; - char c; - int tdb_flags; - - key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dsize = strlen("world"); - data.dptr = (void *)"world"; - - pipe(pipefds); - - tdb_flags = TDB_INCOMPATIBLE_HASH| - TDB_MUTEX_LOCKING| - TDB_CLEAR_IF_FIRST; - - child = fork(); - if (child == 0) { - return do_child(tdb_flags, pipefds[1]); - } - - read(pipefds[0], &c, sizeof(c)); - - tdb = tdb_open_ex("mutex-allrecord-block.tdb", 0, - tdb_flags, O_RDWR|O_CREAT, 0755, - &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_chainlock(tdb, key); - ok(ret == 0, "tdb_chainlock (nowait) should succeed\n"); - - ret = tdb_chainunlock(tdb, key); - ok(ret == 0, "tdb_chainunlock should succeed\n"); - - wait_ret = wait(&status); - ok(wait_ret == child, "child should have exited correctly\n"); - - return exit_status(); -} diff --git a/lib/tdb/test/run-mutex-allrecord-trylock.c b/lib/tdb/test/run-mutex-allrecord-trylock.c deleted file mode 100644 index 1ae1512..0000000 --- a/lib/tdb/test/run-mutex-allrecord-trylock.c +++ /dev/null @@ -1,97 +0,0 @@ -#include "../common/tdb_private.h" -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include -#include - -static TDB_DATA key, data; - -static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, - const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -static int do_child(int tdb_flags, int fd) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret; - char c = 0; - - tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 3, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_chainlock(tdb, key); - ok(ret == 0, "tdb_chainlock should succeed\n"); - - write(fd, &c, sizeof(c)); - - poll(NULL, 0, 1000); - - ret = tdb_chainunlock(tdb, key); - ok(ret == 0, "tdb_chainunlock should succeed\n"); - - return 0; -} - -/* The code should barf on TDBs created with rwlocks. */ -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret, status; - pid_t child, wait_ret; - int pipefds[2]; - char c; - int tdb_flags; - - key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dsize = strlen("world"); - data.dptr = (void *)"world"; - - pipe(pipefds); - - tdb_flags = TDB_INCOMPATIBLE_HASH| - TDB_MUTEX_LOCKING| - TDB_CLEAR_IF_FIRST; - - child = fork(); - if (child == 0) { - return do_child(tdb_flags, pipefds[1]); - } - - read(pipefds[0], &c, sizeof(c)); - - tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 0, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); - ok(ret == -1, "tdb_allrecord_lock (nowait) not should succeed\n"); - - wait_ret = wait(&status); - ok(wait_ret == child, "child should have exited correctly\n"); - - return exit_status(); -} diff --git a/lib/tdb/test/run-mutex-die.c b/lib/tdb/test/run-mutex-die.c deleted file mode 100644 index 6e1c40d..0000000 --- a/lib/tdb/test/run-mutex-die.c +++ /dev/null @@ -1,266 +0,0 @@ -#include "../common/tdb_private.h" -#include "lock-tracking.h" -static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); -static ssize_t write_check(int fd, const void *buf, size_t count); -static int ftruncate_check(int fd, off_t length); - -#define pwrite pwrite_check -#define write write_check -#define fcntl fcntl_with_lockcheck -#define ftruncate ftruncate_check - -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include "external-agent.h" -#include "logging.h" - -#undef write -#undef pwrite -#undef fcntl -#undef ftruncate - -static int target, current; -#define TEST_DBNAME "run-mutex-die.tdb" -#define KEY_STRING "helloworld" - -static void maybe_die(int fd) -{ - if (target == 0) { - return; - } - current += 1; - if (current == target) { - _exit(1); - } -} - -static ssize_t pwrite_check(int fd, - const void *buf, size_t count, off_t offset) -{ - ssize_t ret; - - maybe_die(fd); - - ret = pwrite(fd, buf, count, offset); - if (ret != count) - return ret; - - maybe_die(fd); - return ret; -} - -static ssize_t write_check(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - maybe_die(fd); - - ret = write(fd, buf, count); - if (ret != count) - return ret; - - maybe_die(fd); - return ret; -} - -static int ftruncate_check(int fd, off_t length) -{ - int ret; - - maybe_die(fd); - - ret = ftruncate(fd, length); - - maybe_die(fd); - return ret; -} - -static enum agent_return flakey_ops(struct agent *a, enum operation op, - TDB_DATA key) -{ - enum agent_return ret; - - /* - * Run in the external agent child - */ - - ret = external_agent_operation(a, OPEN, TEST_DBNAME); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed to open: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, UNMAP, ""); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed to unmap: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, STORE, "xyz"); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed to store: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, TRANSACTION_START, ""); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed transaction_start: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, STORE, (char *)key.dptr); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed store: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, TRANSACTION_COMMIT, ""); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed commit: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, FETCH, KEY_STRING); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed find key: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, PING, ""); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed ping: %s\n", - agent_return_name(ret)); - return ret; - } - return ret; -} - -static void prep_db(TDB_DATA key) { - struct tdb_context *tdb; - TDB_DATA data; - - data.dptr = (uint8_t *)"foo"; - data.dsize = strlen((char *)data.dptr); - - unlink(TEST_DBNAME); - - tdb = tdb_open_ex( - TEST_DBNAME, 2, - TDB_INCOMPATIBLE_HASH|TDB_MUTEX_LOCKING|TDB_CLEAR_IF_FIRST, - O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL); - - if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { - return; - } - - tdb_close(tdb); - tdb = NULL; - - forget_locking(); -} - -static bool test_db(void) { - struct tdb_context *tdb; - int ret; - - tdb = tdb_open_ex( - TEST_DBNAME, 1024, TDB_INCOMPATIBLE_HASH, - O_RDWR, 0600, &taplogctx, NULL); - - if (tdb == NULL) { - perror("tdb_open_ex failed"); - return false; - } - - ret = tdb_traverse(tdb, NULL, NULL); - if (ret == -1) { - perror("traverse failed"); - goto fail; - } - - tdb_close(tdb); - - forget_locking(); - - return true; - -fail: - tdb_close(tdb); - return false; -} - -static bool test_one(enum operation op, TDB_DATA key) -{ - enum agent_return ret; - - ret = AGENT_DIED; - target = 19; - - while (ret != SUCCESS) { - struct agent *agent; - - { - int child_target = target; - target = 0; - prep_db(key); - target = child_target; - } - - agent = prepare_external_agent(); - - ret = flakey_ops(agent, STORE, key); - - printf("Agent (target=%d) returns %s\n", target, - agent_return_name(ret)); - - shutdown_agent(agent); - - { - int child_target = target; - target = 0; - if (!test_db()) { - return false; - } - target = child_target; - } - - target += 1; - } - - return true; -} - -int main(int argc, char *argv[]) -{ - enum operation ops[] = { FETCH }; - TDB_DATA key; - int i; - - plan_tests(12); - unlock_callback = maybe_die; - - key.dsize = strlen(KEY_STRING); - key.dptr = (void *)KEY_STRING; - - for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { - enum agent_return ret; - diag("Testing %s after death\n", operation_name(ops[i])); - fflush(stdout); - ret = test_one(ops[i], key); - } - - return exit_status(); -} diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c deleted file mode 100644 index a049dbc..0000000 --- a/lib/tdb/test/run-mutex-openflags2.c +++ /dev/null @@ -1,94 +0,0 @@ -#include "../common/tdb_private.h" -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include -#include - -static TDB_DATA key, data; - -static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, - const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -static int do_child(int fd) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - char c; - - read(fd, &c, 1); - - tdb = tdb_open_ex("mutex-openflags2.tdb", 0, - TDB_INCOMPATIBLE_HASH| - TDB_CLEAR_IF_FIRST, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok((tdb == NULL) && (errno == EINVAL), - "tdb_open_ex without mutexes should fail with EINVAL\n"); - - return 0; -} - -/* The code should barf on TDBs created with rwlocks. */ -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret, status; - pid_t child, wait_ret; - int pipefd[2]; - char c = 0; - - ret = pipe(pipefd); - - key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dsize = strlen("world"); - data.dptr = (void *)"world"; - - tdb = tdb_open_ex("mutex-openflags2.tdb", 0, - TDB_INCOMPATIBLE_HASH| - TDB_MUTEX_LOCKING, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without " - "TDB_CLEAR_IF_FIRST should fail with EINVAL\n"); - - child = fork(); - if (child == 0) { - return do_child(pipefd[0]); - } - - tdb = tdb_open_ex("mutex-openflags2.tdb", 0, - TDB_INCOMPATIBLE_HASH| - TDB_CLEAR_IF_FIRST| - TDB_MUTEX_LOCKING, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex with mutexes should succeed\n"); - - write(pipefd[1], &c, 1); - - wait_ret = wait(&status); - ok((wait_ret == child) && (status == 0), - "child should have exited correctly\n"); - - return exit_status(); -} diff --git a/lib/tdb/test/run-mutex-trylock.c b/lib/tdb/test/run-mutex-trylock.c deleted file mode 100644 index a3ec440..0000000 --- a/lib/tdb/test/run-mutex-trylock.c +++ /dev/null @@ -1,106 +0,0 @@ -#include "../common/tdb_private.h" -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include -#include - -static TDB_DATA key, data; - -static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, - const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -static int do_child(int tdb_flags, int fd) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret; - char c = 0; - - tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_chainlock(tdb, key); - ok(ret == 0, "tdb_chainlock should succeed\n"); - - write(fd, &c, sizeof(c)); - - poll(NULL, 0, 1000); - - ret = tdb_chainunlock(tdb, key); - ok(ret == 0, "tdb_chainunlock should succeed\n"); - - write(fd, &c, sizeof(c)); - - return 0; -} - -/* The code should barf on TDBs created with rwlocks. */ -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret, status; - pid_t child, wait_ret; - int pipefds[2]; - char c; - int tdb_flags; - - key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dsize = strlen("world"); - data.dptr = (void *)"world"; - - pipe(pipefds); - - tdb_flags = TDB_INCOMPATIBLE_HASH| - TDB_MUTEX_LOCKING| - TDB_CLEAR_IF_FIRST; - - child = fork(); - if (child == 0) { - return do_child(tdb_flags, pipefds[1]); - } - - read(pipefds[0], &c, sizeof(c)); - - tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_chainlock_nonblock(tdb, key); - ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); - - read(pipefds[0], &c, sizeof(c)); - - ret = tdb_chainlock_nonblock(tdb, key); - ok(ret == 0, "tdb_chainlock_nonblock should succeed\n"); - ret = tdb_chainunlock(tdb, key); - ok(ret == 0, "tdb_chainunlock should succeed\n"); - - wait_ret = wait(&status); - ok(wait_ret == child, "child should have exited correctly\n"); - - return exit_status(); -} diff --git a/lib/tdb/test/run-mutex1.c b/lib/tdb/test/run-mutex1.c deleted file mode 100644 index 3753fef..0000000 --- a/lib/tdb/test/run-mutex1.c +++ /dev/null @@ -1,120 +0,0 @@ -#include "../common/tdb_private.h" -#include "../common/io.c" -#include "../common/tdb.c" -#include "../common/lock.c" -#include "../common/freelist.c" -#include "../common/traverse.c" -#include "../common/transaction.c" -#include "../common/error.c" -#include "../common/open.c" -#include "../common/check.c" -#include "../common/hash.c" -#include "../common/mutex.c" -#include "tap-interface.h" -#include -#include -#include -#include -#include - -static TDB_DATA key, data; - -static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, - const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -static int do_child(int tdb_flags, int fd) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret; - char c = 0; - - tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_chainlock(tdb, key); - ok(ret == 0, "tdb_chainlock should succeed\n"); - - write(fd, &c, sizeof(c)); - - poll(NULL, 0, 1000); - - ret = tdb_chainunlock(tdb, key); - ok(ret == 0, "tdb_chainunlock should succeed\n"); - - poll(NULL, 0, 1000); - - ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); - ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - - write(fd, &c, sizeof(c)); - - poll(NULL, 0, 1000); - - ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); - ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - - return 0; -} - -/* The code should barf on TDBs created with rwlocks. */ -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - unsigned int log_count; - struct tdb_logging_context log_ctx = { log_fn, &log_count }; - int ret, status; - pid_t child, wait_ret; - int pipefds[2]; - char c; - int tdb_flags; - - key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; - data.dsize = strlen("world"); - data.dptr = (void *)"world"; - - pipe(pipefds); - - tdb_flags = TDB_INCOMPATIBLE_HASH| - TDB_MUTEX_LOCKING| - TDB_CLEAR_IF_FIRST; - - child = fork(); - if (child == 0) { - return do_child(tdb_flags, pipefds[1]); - } - - read(pipefds[0], &c, sizeof(c)); - - tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, - O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok(tdb, "tdb_open_ex should succeed\n"); - - ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); - ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - - ret = tdb_store(tdb, key, data, 0); - ok(ret == 0, "tdb_store should succeed\n"); - - ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); - ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); - - read(pipefds[0], &c, sizeof(c)); - - ret = tdb_delete(tdb, key); - ok(ret == 0, "tdb_delete should succeed\n"); - - wait_ret = wait(&status); - ok(wait_ret == child, "child should have exited correctly\n"); - - return exit_status(); -} diff --git a/lib/tdb/test/run-nested-transactions.c b/lib/tdb/test/run-nested-transactions.c index 864adf2..bf08e55 100644 --- a/lib/tdb/test/run-nested-transactions.c +++ b/lib/tdb/test/run-nested-transactions.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-nested-traverse.c b/lib/tdb/test/run-nested-traverse.c index 22ee3e2..361dc2e 100644 --- a/lib/tdb/test/run-nested-traverse.c +++ b/lib/tdb/test/run-nested-traverse.c @@ -11,7 +11,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl #include diff --git a/lib/tdb/test/run-no-lock-during-traverse.c b/lib/tdb/test/run-no-lock-during-traverse.c index 737a32f..b5e31dc 100644 --- a/lib/tdb/test/run-no-lock-during-traverse.c +++ b/lib/tdb/test/run-no-lock-during-traverse.c @@ -13,7 +13,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-oldhash.c b/lib/tdb/test/run-oldhash.c index aaee6f6..535336c 100644 --- a/lib/tdb/test/run-oldhash.c +++ b/lib/tdb/test/run-oldhash.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-open-during-transaction.c b/lib/tdb/test/run-open-during-transaction.c index 1605376..04ba956 100644 --- a/lib/tdb/test/run-open-during-transaction.c +++ b/lib/tdb/test/run-open-during-transaction.c @@ -20,7 +20,6 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-readonly-check.c b/lib/tdb/test/run-readonly-check.c index c5e0f7d..e518532 100644 --- a/lib/tdb/test/run-readonly-check.c +++ b/lib/tdb/test/run-readonly-check.c @@ -11,7 +11,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rescue-find_entry.c b/lib/tdb/test/run-rescue-find_entry.c index 5d6f8f7..25f4f1c 100644 --- a/lib/tdb/test/run-rescue-find_entry.c +++ b/lib/tdb/test/run-rescue-find_entry.c @@ -10,7 +10,6 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rescue.c b/lib/tdb/test/run-rescue.c index e43f53b..7c806a4 100644 --- a/lib/tdb/test/run-rescue.c +++ b/lib/tdb/test/run-rescue.c @@ -10,7 +10,6 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rwlock-check.c b/lib/tdb/test/run-rwlock-check.c index 2ac9dc3..8b8072d 100644 --- a/lib/tdb/test/run-rwlock-check.c +++ b/lib/tdb/test/run-rwlock-check.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-summary.c b/lib/tdb/test/run-summary.c index 8b9a1a0..2231284 100644 --- a/lib/tdb/test/run-summary.c +++ b/lib/tdb/test/run-summary.c @@ -10,7 +10,6 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/summary.c" -#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-transaction-expand.c b/lib/tdb/test/run-transaction-expand.c index d36b894..ddf1f24 100644 --- a/lib/tdb/test/run-transaction-expand.c +++ b/lib/tdb/test/run-transaction-expand.c @@ -37,7 +37,6 @@ static inline int fake_fdatasync(int fd) #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-traverse-in-transaction.c b/lib/tdb/test/run-traverse-in-transaction.c index 17d6412..48194b8 100644 --- a/lib/tdb/test/run-traverse-in-transaction.c +++ b/lib/tdb/test/run-traverse-in-transaction.c @@ -11,7 +11,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl_with_lockcheck #include diff --git a/lib/tdb/test/run-wronghash-fail.c b/lib/tdb/test/run-wronghash-fail.c index c44b0f5..9c78fc5 100644 --- a/lib/tdb/test/run-wronghash-fail.c +++ b/lib/tdb/test/run-wronghash-fail.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-zero-append.c b/lib/tdb/test/run-zero-append.c index f9eba1b..a2324c4 100644 --- a/lib/tdb/test/run-zero-append.c +++ b/lib/tdb/test/run-zero-append.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run.c b/lib/tdb/test/run.c index c744c4d..f61fcf6 100644 --- a/lib/tdb/test/run.c +++ b/lib/tdb/test/run.c @@ -9,7 +9,6 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" -#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/tools/tdbtorture.c b/lib/tdb/tools/tdbtorture.c index 3e26f65..5ae08f6 100644 --- a/lib/tdb/tools/tdbtorture.c +++ b/lib/tdb/tools/tdbtorture.c @@ -33,7 +33,6 @@ static int always_transaction = 0; static int hash_size = 2; static int loopnum; static int count_pipe; -static bool mutex = false; static struct tdb_logging_context log_ctx; #ifdef PRINTF_ATTRIBUTE @@ -120,7 +119,6 @@ static void addrec_db(void) #if TRANSACTION_PROB if (in_transaction == 0 && - ((tdb_get_flags(db) & TDB_MUTEX_LOCKING) == 0) && (always_transaction || random() % TRANSACTION_PROB == 0)) { if (tdb_transaction_start(db) != 0) { fatal("tdb_transaction_start failed"); @@ -218,7 +216,7 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, static void usage(void) { - printf("Usage: tdbtorture [-t] [-k] [-m] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); + printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); exit(0); } @@ -232,13 +230,7 @@ static void send_count_and_suicide(int sig) static int run_child(const char *filename, int i, int seed, unsigned num_loops, unsigned start) { - int tdb_flags = TDB_DEFAULT|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH; - - if (mutex) { - tdb_flags |= TDB_MUTEX_LOCKING; - } - - db = tdb_open_ex(filename, hash_size, tdb_flags, + db = tdb_open_ex(filename, hash_size, TDB_DEFAULT, O_RDWR | O_CREAT, 0600, &log_ctx, NULL); if (!db) { fatal("db open failed"); @@ -310,7 +302,7 @@ int main(int argc, char * const *argv) log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "n:l:s:H:thkm")) != -1) { + while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) { switch (c) { case 'n': num_procs = strtol(optarg, NULL, 0); @@ -330,13 +322,6 @@ int main(int argc, char * const *argv) case 'k': kill_random = 1; break; - case 'm': - mutex = tdb_runtime_check_for_robust_mutexes(); - if (!mutex) { - printf("tdb_runtime_check_for_robust_mutexes() returned false\n"); - exit(1); - } - break; default: usage(); } @@ -458,13 +443,7 @@ int main(int argc, char * const *argv) done: if (error_count == 0) { - int tdb_flags = TDB_DEFAULT; - - if (mutex) { - tdb_flags |= TDB_NOLOCK; - } - - db = tdb_open_ex(test_tdb, hash_size, tdb_flags, + db = tdb_open_ex(test_tdb, hash_size, TDB_DEFAULT, O_RDWR, 0, &log_ctx, NULL); if (!db) { fatal("db open failed\n"); diff --git a/lib/tdb/wscript b/lib/tdb/wscript index e4309f4..7019693 100644 --- a/lib/tdb/wscript +++ b/lib/tdb/wscript @@ -1,7 +1,7 @@ #!/usr/bin/env python APPNAME = 'tdb' -VERSION = '1.3.0' +VERSION = '1.2.13' blddir = 'bin' @@ -40,23 +40,12 @@ tdb1_unit_tests = [ 'run-traverse-in-transaction', 'run-wronghash-fail', 'run-zero-append' - 'run-mutex-openflags2', - 'run-mutex-trylock', - 'run-mutex-allrecord-bench', - 'run-mutex-allrecord-trylock', - 'run-mutex-allrecord-block', - 'run-mutex-die', - 'run-mutex1', ] def set_options(opt): opt.BUILTIN_DEFAULT('replace') opt.PRIVATE_EXTENSION_DEFAULT('tdb', noextension='tdb') opt.RECURSE('lib/replace') - opt.add_option('--disable-tdb-mutex-locking', - help=("Disable the use of pthread robust mutexes"), - action="store_true", dest='disable_tdb_mutex_locking', - default=False) if opt.IN_LAUNCH_DIR(): opt.add_option('--disable-python', help=("disable the pytdb module"), @@ -64,11 +53,6 @@ def set_options(opt): def configure(conf): - conf.env.disable_tdb_mutex_locking = getattr(Options.options, - 'disable_tdb_mutex_locking', - False) - if not conf.env.disable_tdb_mutex_locking: - conf.env.replace_add_global_pthread = True conf.RECURSE('lib/replace') conf.env.standalone_tdb = conf.IN_LAUNCH_DIR() @@ -84,11 +68,6 @@ def configure(conf): conf.env.disable_python = getattr(Options.options, 'disable_python', False) - if (conf.CONFIG_SET('HAVE_ROBUST_MUTEXES') and - conf.env.building_tdb and - not conf.env.disable_tdb_mutex_locking): - conf.define('USE_TDB_MUTEX_LOCKING', 1) - conf.CHECK_XSLTPROC_MANPAGES() if not conf.env.disable_python: @@ -108,12 +87,10 @@ def configure(conf): def build(bld): bld.RECURSE('lib/replace') - COMMON_FILES='''check.c error.c tdb.c traverse.c - freelistcheck.c lock.c dump.c freelist.c - io.c open.c transaction.c hash.c summary.c rescue.c - mutex.c''' - - COMMON_SRC = bld.SUBDIR('common', COMMON_FILES) + COMMON_SRC = bld.SUBDIR('common', + '''check.c error.c tdb.c traverse.c + freelistcheck.c lock.c dump.c freelist.c + io.c open.c transaction.c hash.c summary.c rescue.c''') if bld.env.standalone_tdb: bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' @@ -122,15 +99,9 @@ def build(bld): private_library = True if not bld.CONFIG_SET('USING_SYSTEM_TDB'): - - tdb_deps = 'replace' - - if bld.CONFIG_SET('USE_TDB_MUTEX_LOCKING'): - tdb_deps += ' pthread' - bld.SAMBA_LIBRARY('tdb', COMMON_SRC, - deps=tdb_deps, + deps='replace', includes='include', abi_directory='ABI', abi_match='tdb_*', @@ -166,7 +137,7 @@ def build(bld): # FIXME: This hardcoded list is stupid, stupid, stupid. bld.SAMBA_SUBSYSTEM('tdb-test-helpers', 'test/external-agent.c test/lock-tracking.c test/logging.c', - tdb_deps, + 'replace', includes='include') for t in tdb1_unit_tests: -- 1.7.9.5 From 35c929be6875bf0ede8be0c0d44409fcad91226a Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 24/49] TODO: tdb: Add mutex support --- lib/tdb/ABI/tdb-1.3.0.sigs | 68 +++ lib/tdb/common/check.c | 4 +- lib/tdb/common/io.c | 92 ++- lib/tdb/common/lock.c | 79 ++- lib/tdb/common/mutex.c | 902 ++++++++++++++++++++++++++++ lib/tdb/common/open.c | 197 +++++- lib/tdb/common/tdb.c | 9 + lib/tdb/common/tdb_private.h | 30 +- lib/tdb/common/transaction.c | 3 +- lib/tdb/docs/mutex.txt | 106 ++++ lib/tdb/include/tdb.h | 32 + lib/tdb/test/run-3G-file.c | 7 +- lib/tdb/test/run-bad-tdb-header.c | 1 + lib/tdb/test/run-check.c | 1 + lib/tdb/test/run-corrupt.c | 1 + lib/tdb/test/run-die-during-transaction.c | 1 + lib/tdb/test/run-endian.c | 1 + lib/tdb/test/run-incompatible.c | 1 + lib/tdb/test/run-nested-transactions.c | 1 + lib/tdb/test/run-nested-traverse.c | 1 + lib/tdb/test/run-no-lock-during-traverse.c | 1 + lib/tdb/test/run-oldhash.c | 1 + lib/tdb/test/run-open-during-transaction.c | 1 + lib/tdb/test/run-readonly-check.c | 1 + lib/tdb/test/run-rescue-find_entry.c | 1 + lib/tdb/test/run-rescue.c | 1 + lib/tdb/test/run-rwlock-check.c | 1 + lib/tdb/test/run-summary.c | 1 + lib/tdb/test/run-transaction-expand.c | 1 + lib/tdb/test/run-traverse-in-transaction.c | 1 + lib/tdb/test/run-wronghash-fail.c | 1 + lib/tdb/test/run-zero-append.c | 1 + lib/tdb/test/run.c | 1 + lib/tdb/wscript | 36 +- 34 files changed, 1540 insertions(+), 46 deletions(-) create mode 100644 lib/tdb/ABI/tdb-1.3.0.sigs create mode 100644 lib/tdb/common/mutex.c create mode 100644 lib/tdb/docs/mutex.txt diff --git a/lib/tdb/ABI/tdb-1.3.0.sigs b/lib/tdb/ABI/tdb-1.3.0.sigs new file mode 100644 index 0000000..7d3e469 --- /dev/null +++ b/lib/tdb/ABI/tdb-1.3.0.sigs @@ -0,0 +1,68 @@ +tdb_add_flags: void (struct tdb_context *, unsigned int) +tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) +tdb_chainlock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) +tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_close: int (struct tdb_context *) +tdb_delete: int (struct tdb_context *, TDB_DATA) +tdb_dump_all: void (struct tdb_context *) +tdb_enable_seqnum: void (struct tdb_context *) +tdb_error: enum TDB_ERROR (struct tdb_context *) +tdb_errorstr: const char *(struct tdb_context *) +tdb_exists: int (struct tdb_context *, TDB_DATA) +tdb_fd: int (struct tdb_context *) +tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_firstkey: TDB_DATA (struct tdb_context *) +tdb_freelist_size: int (struct tdb_context *) +tdb_get_flags: int (struct tdb_context *) +tdb_get_logging_private: void *(struct tdb_context *) +tdb_get_seqnum: int (struct tdb_context *) +tdb_hash_size: int (struct tdb_context *) +tdb_increment_seqnum_nonblock: void (struct tdb_context *) +tdb_jenkins_hash: unsigned int (TDB_DATA *) +tdb_lock_nonblock: int (struct tdb_context *, int, int) +tdb_lockall: int (struct tdb_context *) +tdb_lockall_mark: int (struct tdb_context *) +tdb_lockall_nonblock: int (struct tdb_context *) +tdb_lockall_read: int (struct tdb_context *) +tdb_lockall_read_nonblock: int (struct tdb_context *) +tdb_lockall_unmark: int (struct tdb_context *) +tdb_log_fn: tdb_log_func (struct tdb_context *) +tdb_map_size: size_t (struct tdb_context *) +tdb_name: const char *(struct tdb_context *) +tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_null: dptr = 0xXXXX, dsize = 0 +tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) +tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) +tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_printfreelist: int (struct tdb_context *) +tdb_remove_flags: void (struct tdb_context *, unsigned int) +tdb_reopen: int (struct tdb_context *) +tdb_reopen_all: int (int) +tdb_repack: int (struct tdb_context *) +tdb_rescue: int (struct tdb_context *, void (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_runtime_check_for_robust_mutexes: bool (void) +tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) +tdb_set_max_dead: void (struct tdb_context *, int) +tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) +tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) +tdb_summary: char *(struct tdb_context *) +tdb_transaction_cancel: int (struct tdb_context *) +tdb_transaction_commit: int (struct tdb_context *) +tdb_transaction_prepare_commit: int (struct tdb_context *) +tdb_transaction_start: int (struct tdb_context *) +tdb_transaction_start_nonblock: int (struct tdb_context *) +tdb_transaction_write_lock_mark: int (struct tdb_context *) +tdb_transaction_write_lock_unmark: int (struct tdb_context *) +tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_unlock: int (struct tdb_context *, int, int) +tdb_unlockall: int (struct tdb_context *) +tdb_unlockall_read: int (struct tdb_context *) +tdb_validate_freelist: int (struct tdb_context *, int *) +tdb_wipe_all: int (struct tdb_context *) diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c index 9f9d870..e632af5 100644 --- a/lib/tdb/common/check.c +++ b/lib/tdb/common/check.c @@ -39,7 +39,9 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) if (hdr.version != TDB_VERSION) goto corrupt; - if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) + if (hdr.rwlocks != 0 && + hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC && + hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) goto corrupt; tdb_header_hash(tdb, &h1, &h2); diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c index 11dfefd..537a9e9 100644 --- a/lib/tdb/common/io.c +++ b/lib/tdb/common/io.c @@ -28,6 +28,69 @@ #include "tdb_private.h" +/* + * We prepend the mutex area, so fixup offsets. See mutex.c for details. + */ + +static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) +{ + tdb_off_t tdb_off = *off; + size_t mutex_size = tdb_mutex_size(tdb); + + if (!tdb_add_off_t(tdb_off, mutex_size, &tdb_off)) { + errno = EIO; + return false; + } + *off = tdb_off; + return true; +} + +static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pwrite(tdb->fd, buf, count, offset); +} + +static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pread(tdb->fd, buf, count, offset); +} + +static int tdb_ftruncate(struct tdb_context *tdb, off_t length) +{ + if (!tdb_adjust_offset(tdb, &length)) { + return -1; + } + return ftruncate(tdb->fd, length); +} + +static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) +{ + size_t mutex_len; + int ret; + + ret = fstat(tdb->fd, buf); + if (ret == -1) { + return -1; + } + + mutex_len = tdb_mutex_size(tdb); + if (buf->st_size < mutex_len) { + errno = EIO; + return -1; + } + buf->st_size -= mutex_len; + + return ret; +} + /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary @@ -58,7 +121,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - if (fstat(tdb->fd, &st) == -1) { + if (tdb_fstat(tdb, &st) == -1) { tdb->ecode = TDB_ERR_IO; return -1; } @@ -122,16 +185,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t written = pwrite(tdb->fd, buf, len, off); + ssize_t written; + + written = tdb_pwrite(tdb, buf, len, off); + if ((written != (ssize_t)len) && (written != -1)) { /* try once more */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " "%zi of %u bytes at %u, trying once more\n", written, len, off)); - written = pwrite(tdb->fd, (const char *)buf+written, - len-written, - off+written); + written = tdb_pwrite(tdb, (const char *)buf+written, + len-written, off+written); } if (written == -1) { /* Ensure ecode is set for log fn. */ @@ -176,7 +241,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t ret = pread(tdb->fd, buf, len, off); + ssize_t ret; + + ret = tdb_pread(tdb, buf, len, off); if (ret != (ssize_t)len) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; @@ -258,7 +325,8 @@ int tdb_mmap(struct tdb_context *tdb) if (should_mmap(tdb)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), - MAP_SHARED|MAP_FILE, tdb->fd, 0); + MAP_SHARED|MAP_FILE, tdb->fd, + tdb_mutex_size(tdb)); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! @@ -303,12 +371,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad return -1; } - if (ftruncate(tdb->fd, new_size) == -1) { + if (tdb_ftruncate(tdb, new_size) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, new_size - 1); + written = tdb_pwrite(tdb, &b, 1, new_size - 1); } if (written == 0) { /* again - give up, guessing errno */ @@ -328,10 +396,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad memset(buf, TDB_PAD_BYTE, sizeof(buf)); while (addition) { size_t n = addition>sizeof(buf)?sizeof(buf):addition; - ssize_t written = pwrite(tdb->fd, buf, n, size); + ssize_t written = tdb_pwrite(tdb, buf, n, size); if (written == 0) { /* prevent infinite loops: try _once_ more */ - written = pwrite(tdb->fd, buf, n, size); + written = tdb_pwrite(tdb, buf, n, size); } if (written == 0) { /* give up, trying to provide a useful errno */ diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c index 486de79..6644c40 100644 --- a/lib/tdb/common/lock.c +++ b/lib/tdb/common/lock.c @@ -38,6 +38,15 @@ static int fcntl_lock(struct tdb_context *tdb, struct flock fl; int cmd; +#ifdef USE_TDB_MUTEX_LOCKING + { + int ret; + if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) { + return ret; + } + } +#endif + fl.l_type = rw; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -110,6 +119,15 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) fclose(locks); #endif +#ifdef USE_TDB_MUTEX_LOCKING + { + int ret; + if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) { + return ret; + } + } +#endif + fl.l_type = F_UNLCK; fl.l_whence = SEEK_SET; fl.l_start = off; @@ -248,13 +266,27 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb) return -1; } - ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, - TDB_LOCK_WAIT|TDB_LOCK_PROBE); + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_upgrade(tdb); + if (ret == -1) { + goto fail; + } + ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size), + 0, TDB_LOCK_WAIT|TDB_LOCK_PROBE); + if (ret == -1) { + tdb_mutex_allrecord_downgrade(tdb); + } + } else { + ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, + TDB_LOCK_WAIT|TDB_LOCK_PROBE); + } + if (ret == 0) { tdb->allrecord_lock.ltype = F_WRLCK; tdb->allrecord_lock.off = 0; return 0; } +fail: TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n")); return -1; } @@ -593,6 +625,8 @@ static int tdb_chainlock_gradual(struct tdb_context *tdb, int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, enum tdb_lock_flags flags, bool upgradable) { + int ret; + switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) { case -1: return -1; @@ -607,16 +641,27 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, * * It is (1) which cause the starvation problem, so we're only * gradual for that. */ - if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, - tdb->hash_size * 4) == -1) { + + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_lock(tdb, ltype, flags); + } else { + ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, + tdb->hash_size * 4); + } + + if (ret == -1) { return -1; } /* Grab individual record locks. */ if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0, flags) == -1) { - tdb_brunlock(tdb, ltype, FREELIST_TOP, - tdb->hash_size * 4); + if (tdb_have_mutexes(tdb)) { + tdb_mutex_allrecord_unlock(tdb); + } else { + tdb_brunlock(tdb, ltype, FREELIST_TOP, + tdb->hash_size * 4); + } return -1; } @@ -672,9 +717,25 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock) return 0; } - if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); - return -1; + if (!mark_lock) { + int ret; + + if (tdb_have_mutexes(tdb)) { + ret = tdb_mutex_allrecord_unlock(tdb); + if (ret == 0) { + ret = tdb_brunlock(tdb, ltype, + lock_offset(tdb->hash_size), + 0); + } + } else { + ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0); + } + + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed " + "(%s)\n", strerror(errno))); + return -1; + } } tdb->allrecord_lock.count = 0; diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c new file mode 100644 index 0000000..c158b6c --- /dev/null +++ b/lib/tdb/common/mutex.c @@ -0,0 +1,902 @@ +/* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Volker Lendecke 2012,2013 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "tdb_private.h" +#include "system/threads.h" + +#ifdef USE_TDB_MUTEX_LOCKING + +/* + * If we run with mutexes, we store the "struct tdb_mutexes" at the + * beginning of the file. We store an additional tdb_header right + * beyond the mutex area, page aligned. All the offsets within the tdb + * are relative to the area behind the mutex area. tdb->map_ptr points + * behind the mmap area as well, so the read and write path in the + * mutex case can remain unchanged. + * + * Early in the mutex development the mutexes were placed between the hash + * chain pointers and the real tdb data. This had two drawbacks: First, it + * made pointer calculations more complex. Second, we had to mmap the mutex + * area twice. One was the normal map_ptr in the tdb. This frequently changed + * from within tdb_oob. At least the Linux glibc robust mutex code assumes + * constant pointers in memory, so a constantly changing mmap area destroys + * the mutex list. So we had to mmap the first bytes of the file with a second + * mmap call. With that scheme, very weird errors happened that could be + * easily fixed by doing the mutex mmap in a second file. It seemed that + * mapping the same memory area twice does not end up in accessing the same + * physical page, looking at the mutexes in gdb it seemed that old data showed + * up after some re-mapping. To avoid a separate mutex file, the code now puts + * the real content of the tdb file after the mutex area. This way we do not + * have overlapping mmap areas, the mutex area is mmapped once and not + * changed, the tdb data area's mmap is constantly changed but does not + * overlap. + */ + +struct tdb_mutexes { + struct tdb_header hdr; + pthread_mutex_t allrecord_mutex; /* protect allrecord_lock */ + + short int allrecord_lock; /* F_UNLCK: free, + F_RDLCK: shared, + F_WRLCK: exclusive */ + + pthread_mutex_t hashchains[1]; /* We allocate more */ +}; + +bool tdb_have_mutexes(struct tdb_context *tdb) +{ + return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0); +} + +size_t tdb_mutex_size(struct tdb_context *tdb) +{ + size_t mutex_size; + + if (!tdb_have_mutexes(tdb)) { + return 0; + } + + mutex_size = sizeof(struct tdb_mutexes); + mutex_size += tdb->hash_size * sizeof(pthread_mutex_t); + + return TDB_ALIGN(mutex_size, tdb->page_size); +} + +/* + * Get the index for a chain mutex + */ +static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len, + unsigned *idx) +{ + /* + * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before + * the 4 bytes of the freelist start and the hash chain that is about + * to be locked. See lock_offset() where the freelist is -1 vs the + * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in + * the tdb file itself as data, we need to adjust the offset here. + */ + const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t); + + if (!tdb_have_mutexes(tdb)) { + return false; + } + if (len != 1) { + /* Possibly the allrecord lock */ + return false; + } + if (off < freelist_lock_ofs) { + /* One of the special locks */ + return false; + } + if (tdb->hash_size == 0) { + /* tdb not initialized yet, called from tdb_open_ex() */ + return false; + } + if (off >= TDB_DATA_START(tdb->hash_size)) { + /* Single record lock from traverses */ + return false; + } + + /* + * Now we know it's a freelist or hash chain lock. Those are always 4 + * byte aligned. Paranoia check. + */ + if ((off % sizeof(tdb_off_t)) != 0) { + abort(); + } + + /* + * Re-index the fcntl offset into an offset into the mutex array + */ + off -= freelist_lock_ofs; /* rebase to index 0 */ + off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */ + + *idx = off; + return true; +} + +static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag) +{ + int ret; + + if (waitflag) { + ret = pthread_mutex_lock(m); + } else { + ret = pthread_mutex_trylock(m); + } + if (ret != EOWNERDEAD) { + return ret; + } + + /* + * For chainlocks, we don't do any cleanup (yet?) + */ + return pthread_mutex_consistent(m); +} + +static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag) +{ + int ret; + + if (waitflag) { + ret = pthread_mutex_lock(&m->allrecord_mutex); + } else { + ret = pthread_mutex_trylock(&m->allrecord_mutex); + } + if (ret != EOWNERDEAD) { + return ret; + } + + /* + * The allrecord lock holder died. We need to reset the allrecord_lock + * to F_UNLCK. This should also be the indication for + * tdb_needs_recovery. + */ + m->allrecord_lock = F_UNLCK; + + return pthread_mutex_consistent(&m->allrecord_mutex); +} + +bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, + bool waitflag, int *pret) +{ + struct tdb_mutexes *m = tdb->mutexes; + pthread_mutex_t *chain; + int ret; + unsigned idx; + bool allrecord_ok; + + if (!tdb_mutex_index(tdb, off, len, &idx)) { + return false; + } + chain = &m->hashchains[idx]; + +again: + ret = chain_mutex_lock(chain, waitflag); + if (ret == EBUSY) { + ret = EAGAIN; + } + if (ret != 0) { + errno = ret; + goto fail; + } + + if (tdb->num_lockrecs > 0) { + /* + * We can only check the allrecord lock once. If we do it with + * one chain mutex locked, we will deadlock with the allrecord + * locker process in the following way: We lock the first hash + * chain, we check for the allrecord lock. We keep the hash + * chain locked. Then the allrecord locker comes and takes the + * allrecord lock. It walks the list of chain mutexes, locking + * them all in sequence. Meanwhile, we have the chain mutex + * locked, so the allrecord locker blocks trying to lock our + * chain mutex. Then we come in and try to lock the second + * chain lock, which in most cases will be the freelist. We + * see that the allrecord lock is locked and put ourselves on + * the allrecord_waiters condition variable. This will never + * be signalled though because the allrecord locker waits for + * us to give up the chain lock. + */ + + *pret = 0; + return true; + } + + /* + * Check if someone is has the allrecord lock: queue if so. + */ + + allrecord_ok = false; + + if (m->allrecord_lock == F_UNLCK) { + /* + * allrecord lock not taken + */ + allrecord_ok = true; + } + + if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) { + /* + * allrecord shared lock taken, but we only want to read + */ + allrecord_ok = true; + } + + if (allrecord_ok) { + *pret = 0; + return true; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chain_mutex) failed: %s\n", strerror(ret))); + errno = ret; + goto fail; + } + ret = allrecord_mutex_lock(m, waitflag); + if (ret != 0) { + if (waitflag || (ret != EBUSY)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock" + "(allrecord_mutex) failed: %s\n", + waitflag ? "" : "try_", strerror(ret))); + } + errno = EAGAIN; + goto fail; + } + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + errno = ret; + goto fail; + } + goto again; + +fail: + *pret = -1; + return true; +} + +bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, + int *pret) +{ + struct tdb_mutexes *m = tdb->mutexes; + pthread_mutex_t *chain; + int ret; + unsigned idx; + + if (!tdb_mutex_index(tdb, off, len, &idx)) { + return false; + } + chain = &m->hashchains[idx]; + + ret = pthread_mutex_unlock(chain); + if (ret == 0) { + *pret = 0; + return true; + } + *pret = -1; + return true; +} + +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags) +{ + struct tdb_mutexes *m = tdb->mutexes; + int ret; + uint32_t i; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if (flags & TDB_LOCK_MARK_ONLY) { + return 0; + } + + ret = allrecord_mutex_lock(m, flags & TDB_LOCK_WAIT); + if (ret != 0) { + if (!(flags & TDB_LOCK_PROBE)) { + TDB_LOG((tdb, TDB_DEBUG_TRACE, "pthread_mutex_trylock " + "failed: %s\n", strerror(ret))); + } + return ret; + } + + if (m->allrecord_lock != F_UNLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + goto fail_unlock_allrecord_mutex; + } + m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK; + + for (i=0; ihash_size; i++) { + + /* ignore hashchains[0], the freelist */ + pthread_mutex_t *chain = &m->hashchains[i+1]; + + ret = chain_mutex_lock(chain, flags & TDB_LOCK_WAIT); + if (ret != 0) { + if (!(flags & TDB_LOCK_PROBE)) { + TDB_LOG((tdb, TDB_DEBUG_TRACE, + "pthread_mutex_trylock " + "failed: %s\n", strerror(ret))); + } + goto fail_unroll_allrecord_lock; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chainlock) failed: %s\n", strerror(ret))); + goto fail_unroll_allrecord_lock; + } + } + /* + * We leave this routine with m->allrecord_mutex locked + */ + return 0; + +fail_unroll_allrecord_lock: + m->allrecord_lock = F_UNLCK; + +fail_unlock_allrecord_mutex: + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + } + return -1; +} + +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + int ret; + uint32_t i; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if (m->allrecord_lock != F_RDLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + m->allrecord_lock = F_WRLCK; + + for (i=0; ihash_size; i++) { + + /* ignore hashchains[0], the freelist */ + pthread_mutex_t *chain = &m->hashchains[i+1]; + + ret = chain_mutex_lock(chain, true); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock" + "(chainlock) failed: %s\n", strerror(ret))); + goto fail_unroll_allrecord_lock; + } + + ret = pthread_mutex_unlock(chain); + if (ret != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(chainlock) failed: %s\n", strerror(ret))); + return -1; + } + } + return 0; + +fail_unroll_allrecord_lock: + m->allrecord_lock = F_RDLCK; + return -1; +} + +int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + + if (m->allrecord_lock != F_WRLCK) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + m->allrecord_lock = F_RDLCK; + return 0; +} + + +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) +{ + struct tdb_mutexes *m = tdb->mutexes; + short old; + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", + (int)m->allrecord_lock)); + return -1; + } + + old = m->allrecord_lock; + m->allrecord_lock = F_UNLCK; + + ret = pthread_mutex_unlock(&m->allrecord_mutex); + if (ret != 0) { + m->allrecord_lock = old; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock" + "(allrecord_mutex) failed: %s\n", strerror(ret))); + return -1; + } + return 0; +} + +int tdb_mutex_init(struct tdb_context *tdb) +{ + struct tdb_mutexes *m; + pthread_mutexattr_t ma; + int i, ret; + + ret = tdb_mutex_mmap(tdb); + if (ret == -1) { + return errno; + } + m = tdb->mutexes; + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto fail_munmap; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto fail; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto fail; + } + + for (i=0; ihash_size+1; i++) { + pthread_mutex_t *chain = &m->hashchains[i]; + + ret = pthread_mutex_init(chain, &ma); + if (ret != 0) { + goto fail; + } + } + + m->allrecord_lock = F_UNLCK; + + ret = pthread_mutex_init(&m->allrecord_mutex, &ma); + if (ret != 0) { + goto fail; + } + ret = 0; +fail: + pthread_mutexattr_destroy(&ma); +fail_munmap: + tdb_mutex_munmap(tdb); + return ret; +} + +int tdb_mutex_mmap(struct tdb_context *tdb) +{ + size_t len; + void *ptr; + + len = tdb_mutex_size(tdb); + if (len == 0) { + return 0; + } + + ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE, + tdb->fd, 0); + + if (ptr == MAP_FAILED) { + return -1; + } + tdb->mutexes = (struct tdb_mutexes *)ptr; + return 0; +} + +int tdb_mutex_munmap(struct tdb_context *tdb) +{ + size_t len; + + len = tdb_mutex_size(tdb); + if (len == 0) { + return 0; + } + + return munmap(tdb->mutexes, len); +} + +static bool tdb_mutex_locking_cached; + +bool tdb_mutex_locking_supported(void) +{ + pthread_mutexattr_t ma; + pthread_mutex_t m; + int ret; + static bool initialized; + + if (initialized) { + return tdb_mutex_locking_cached; + } + + initialized = true; + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + return false; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_init(&m, &ma); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_lock(&m); + if (ret != 0) { + goto cleanup_m; + } + /* + * This makes sure we have real mutexes + * from a threading library instead of just + * stubs from libc. + */ + ret = pthread_mutex_lock(&m); + if (ret != EDEADLK) { + goto cleanup_lock; + } + ret = pthread_mutex_unlock(&m); + if (ret != 0) { + goto cleanup_m; + } + + tdb_mutex_locking_cached = true; + goto cleanup_m; + +cleanup_lock: + pthread_mutex_unlock(&m); +cleanup_m: + pthread_mutex_destroy(&m); +cleanup_ma: + pthread_mutexattr_destroy(&ma); + return tdb_mutex_locking_cached; +} + +static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR; +static pid_t tdb_robust_mutex_pid = -1; + +static void tdb_robust_mutex_handler(int sig) +{ + if (tdb_robust_mutex_pid != -1) { + pid_t pid; + int status; + + pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + return; + } + } + + if (tdb_robust_mutext_old_handler == SIG_DFL) { + return; + } + + if (tdb_robust_mutext_old_handler == SIG_IGN) { + return; + } + if (tdb_robust_mutext_old_handler == SIG_ERR) { + return; + } + + tdb_robust_mutext_old_handler(sig); +} + +_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) +{ + void *ptr; + pthread_mutex_t *m; + pthread_mutexattr_t ma; + int ret = 1; + int pipe_down[2] = { -1, -1 }; + int pipe_up[2] = { -1, -1 }; + ssize_t nread; + char c = 0; + bool ok; + int status; + static bool initialized; + + if (initialized) { + return tdb_mutex_locking_cached; + } + + initialized = true; + + ok = tdb_mutex_locking_supported(); + if (!ok) { + return false; + } + + tdb_mutex_locking_cached = false; + + ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANON, -1 /* fd */, 0); + if (ptr == MAP_FAILED) { + return false; + } + m = (pthread_mutex_t *)ptr; + + ret = pipe(pipe_down); + if (ret != 0) { + goto cleanup_mmap; + } + ret = pipe(pipe_up); + if (ret != 0) { + goto cleanup_pipe; + } + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + goto cleanup_pipe; + } + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + goto cleanup_ma; + } + ret = pthread_mutex_init(m, &ma); + if (ret != 0) { + goto cleanup_ma; + } + + tdb_robust_mutext_old_handler = signal(SIGCHLD, + tdb_robust_mutex_handler); + + tdb_robust_mutex_pid = fork(); + if (tdb_robust_mutex_pid == 0) { + size_t nwritten; + close(pipe_down[1]); + close(pipe_up[0]); + ret = pthread_mutex_lock(m); + nwritten = write(pipe_up[1], &ret, sizeof(ret)); + if (nwritten != sizeof(ret)) { + exit(1); + } + if (ret != 0) { + exit(1); + } + nread = read(pipe_down[0], &c, 1); + if (nread != 1) { + exit(1); + } + /* leave locked */ + exit(0); + } + if (tdb_robust_mutex_pid == -1) { + goto cleanup_sig_child; + } + close(pipe_down[0]); + pipe_down[0] = -1; + close(pipe_up[1]); + pipe_up[1] = -1; + + nread = read(pipe_up[0], &ret, sizeof(ret)); + if (nread != sizeof(ret)) { + goto cleanup_child; + } + + ret = pthread_mutex_trylock(m); + if (ret != EBUSY) { + if (ret == 0) { + pthread_mutex_unlock(m); + } + goto cleanup_child; + } + + if (write(pipe_down[1], &c, 1) != 1) { + goto cleanup_child; + } + + nread = read(pipe_up[0], &c, 1); + if (nread != 0) { + goto cleanup_child; + } + + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + errno = 0; + pid = waitpid(tdb_robust_mutex_pid, &status, 0); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + break; + } + if (pid == -1 && errno != EINTR) { + goto cleanup_child; + } + } + signal(SIGCHLD, tdb_robust_mutext_old_handler); + + ret = pthread_mutex_trylock(m); + if (ret != EOWNERDEAD) { + if (ret == 0) { + pthread_mutex_unlock(m); + } + goto cleanup_m; + } + + ret = pthread_mutex_consistent(m); + if (ret != 0) { + goto cleanup_m; + } + + ret = pthread_mutex_trylock(m); + if (ret != EDEADLK) { + pthread_mutex_unlock(m); + goto cleanup_m; + } + + ret = pthread_mutex_unlock(m); + if (ret != 0) { + goto cleanup_m; + } + + tdb_mutex_locking_cached = true; + goto cleanup_m; + +cleanup_child: + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + kill(tdb_robust_mutex_pid, SIGKILL); + + errno = 0; + pid = waitpid(tdb_robust_mutex_pid, &status, 0); + if (pid == tdb_robust_mutex_pid) { + tdb_robust_mutex_pid = -1; + break; + } + if (pid == -1 && errno != EINTR) { + break; + } + } +cleanup_sig_child: + signal(SIGCHLD, tdb_robust_mutext_old_handler); +cleanup_m: + pthread_mutex_destroy(m); +cleanup_ma: + pthread_mutexattr_destroy(&ma); +cleanup_pipe: + if (pipe_down[0] != -1) { + close(pipe_down[0]); + } + if (pipe_down[1] != -1) { + close(pipe_down[1]); + } + if (pipe_up[0] != -1) { + close(pipe_up[0]); + } + if (pipe_up[1] != -1) { + close(pipe_up[1]); + } +cleanup_mmap: + munmap(ptr, sizeof(pthread_mutex_t)); + + return tdb_mutex_locking_cached; +} + +#else + +size_t tdb_mutex_size(struct tdb_context *tdb) +{ + return 0; +} + +bool tdb_have_mutexes(struct tdb_context *tdb) +{ + return false; +} + +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags) +{ + return -1; +} + +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) +{ + return -1; +} + +int tdb_mutex_mmap(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +int tdb_mutex_munmap(struct tdb_context *tdb) +{ + errno = ENOSYS; + return -1; +} + +int tdb_mutex_init(struct tdb_context *tdb) +{ + return ENOSYS; +} + +bool tdb_mutex_locking_supported(void) +{ + return false; +} + +_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) +{ + return false; +} + +#endif diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 789bc73..13d22e1 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -76,6 +76,16 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, if (tdb->flags & TDB_INCOMPATIBLE_HASH) newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; + /* + * For the mutex code we add the FEATURE_FLAG_MAGIC, overwriting the + * TDB_HASH_RWLOCK_MAGIC above. + */ + if ((tdb->flags & TDB_MUTEX_LOCKING) && + tdb_mutex_locking_supported()) { + newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC; + newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX; + } + if (tdb->flags & TDB_INTERNAL) { tdb->map_size = size; tdb->map_ptr = (char *)newdb; @@ -92,13 +102,55 @@ static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, /* This creates an endian-converted header, as if read from disk */ CONVERT(*newdb); - memcpy(header, newdb, sizeof(*header)); /* Don't endian-convert the magic food! */ memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); if (!tdb_write_all(tdb->fd, newdb, size)) goto fail; + if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + + tdb->feature_flags = newdb->feature_flags; + tdb->hash_size = newdb->hash_size; + + newdb->mutex_size = tdb_mutex_size(tdb); + + /* + * Overwrite newdb.mutex_size + */ + ret = lseek(tdb->fd, 0, SEEK_SET); + if (ret == -1) { + goto fail; + } + if (!tdb_write_all(tdb->fd, newdb, size)) { + goto fail; + } + + ret = ftruncate( + tdb->fd, + newdb->mutex_size + sizeof(struct tdb_header)); + if (ret == -1) { + goto fail; + } + ret = tdb_mutex_init(tdb); + if (ret == -1) { + goto fail; + } + + /* + * Write a second header behind the mutexes. That's the area + * that will be mmapp'ed. + */ + ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET); + if (ret == -1) { + goto fail; + } + if (!tdb_write_all(tdb->fd, newdb, size)) { + goto fail; + } + } + + memcpy(header, newdb, sizeof(*header)); ret = 0; fail: SAFE_FREE(newdb); @@ -165,6 +217,68 @@ static bool check_header_hash(struct tdb_context *tdb, return check_header_hash(tdb, header, false, m1, m2); } +static bool tdb_mutex_open_ok(struct tdb_context *tdb) +{ + int locked; + + if (tdb->flags & TDB_NOMMAP) { + /* + * We need to mmap the mutex area + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Can not open a tdb with " + "mutexes without mmap\n")); + return false; + } + + locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, + TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); + + if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) { + /* + * CLEAR_IF_FIRST still active. The tdb was created on this + * host, so we can assume the mutex implementation is + * compatible. Important for tools like tdbdump on a still + * open locking.tdb. + */ + return true; + } + + /* + * We got the CLEAR_IF_FIRST lock. That means the database was + * potentially copied from somewhere else. The mutex implementation + * might be incompatible. + */ + + if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { + /* + * Should not happen + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: " + "failed to release ACTIVE_LOCK on %s: %s\n", + tdb->name, strerror(errno))); + return false; + } + + if (tdb->flags & TDB_NOLOCK) { + /* + * We don't look at locks, so it does not matter to have a + * compatible mutex implementation. Allow the open. + */ + return true; + } + + if (tdb->flags & TDB_CLEAR_IF_FIRST) { + /* + * About to create the db here. + */ + return true; + } + + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Can use mutexes only with " + "CLEAR_IF_FIRST or NOLOCK\n")); + return false; +} + _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int open_flags, mode_t mode, const struct tdb_logging_context *log_ctx, @@ -180,6 +294,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td unsigned v; const char *hash_alg; uint32_t magic1, magic2; + int ret; ZERO_STRUCT(header); @@ -326,7 +441,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td if ((tdb_flags & TDB_CLEAR_IF_FIRST) && (!tdb->read_only) && (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { - int ret; ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT); if (ret == -1) { @@ -390,12 +504,29 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td goto fail; if (header.rwlocks != 0 && + header.rwlocks != TDB_FEATURE_FLAG_MAGIC && header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); goto fail; } tdb->hash_size = header.hash_size; + if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) { + tdb->feature_flags = header.feature_flags; + } + + if (tdb_mutex_size(tdb) != header.mutex_size) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Mutex size changed\n")); + errno = EINVAL; + goto fail; + } + + if ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && + !tdb_mutex_open_ok(tdb)) { + errno = EINVAL; + goto fail; + } + if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { /* older TDB without magic hash references */ tdb->hash_fn = tdb_old_hash; @@ -426,19 +557,52 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td } /* Beware truncation! */ - tdb->map_size = st.st_size; - if (tdb->map_size != st.st_size) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " - "len %llu too large!\n", (long long)st.st_size)); - errno = EIO; - goto fail; + + { + uint32_t map_size = st.st_size; + if (map_size != st.st_size) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "len %llu too large!\n", + (long long)st.st_size)); + errno = EIO; + goto fail; + } } tdb->device = st.st_dev; tdb->inode = st.st_ino; - tdb_mmap(tdb); + + /* + * We had tdb_mmap(tdb) here before, + * but for the mutex case we have a modified tdb_fstat() + * which is triggered from tdb_oob() before calling tdb_mmap(). + */ + tdb->map_size = 0; + ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); + if (ret == -1) { + goto fail; + } + + if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { + if (!tdb_mutex_locking_supported()) { + /* + * Database was created with mutex locking, + * but we don't support it. + */ + errno = EINVAL; + goto fail; + } + + if (!(tdb->flags & TDB_NOLOCK)) { + ret = tdb_mutex_mmap(tdb); + if (ret != 0) { + goto fail; + } + } + } + if (locked) { if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " @@ -549,6 +713,9 @@ _PUBLIC_ int tdb_close(struct tdb_context *tdb) else tdb_munmap(tdb); } + + tdb_mutex_munmap(tdb); + SAFE_FREE(tdb->name); if (tdb->fd != -1) { ret = close(tdb->fd); @@ -628,7 +795,13 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); goto fail; } - if (tdb_mmap(tdb) != 0) { + /* + * We had tdb_mmap(tdb) here before, + * but for the mutex case we have a modified tdb_fstat() + * which is triggered from tdb_oob() before calling tdb_mmap(). + */ + tdb->map_size = 0; + if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { goto fail; } #endif /* fake pread or pwrite */ diff --git a/lib/tdb/common/tdb.c b/lib/tdb/common/tdb.c index ebd4ffe..ae98c96 100644 --- a/lib/tdb/common/tdb.c +++ b/lib/tdb/common/tdb.c @@ -723,6 +723,15 @@ _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) return; } + if ((flags & TDB_NOLOCK) && + (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && + (tdb->mutexes == NULL)) { + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " + "Can not remove NOLOCK flag on mutexed databases")); + return; + } + if (flags & TDB_ALLOW_NESTING) { tdb->flags |= TDB_DISALLOW_NESTING; } diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index a672159..9790552 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -53,6 +53,7 @@ typedef uint32_t tdb_off_t; #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) #define TDB_RECOVERY_INVALID_MAGIC (0x0) #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) +#define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U) #define TDB_ALIGNMENT 4 #define DEFAULT_HASH_SIZE 131 #define FREELIST_TOP (sizeof(struct tdb_header)) @@ -68,6 +69,8 @@ typedef uint32_t tdb_off_t; #define TDB_PAD_BYTE 0x42 #define TDB_PAD_U32 0x42424242 +#define TDB_FEATURE_FLAG_MUTEX 1 + /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */ @@ -152,7 +155,9 @@ struct tdb_header { tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ uint32_t magic2_hash; /* hash of TDB_MAGIC. */ - tdb_off_t reserved[27]; + uint32_t feature_flags; + uint32_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */ + tdb_off_t reserved[25]; }; struct tdb_lock_type { @@ -186,6 +191,8 @@ struct tdb_methods { int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); }; +struct tdb_mutexes; + struct tdb_context { char *name; /* the name of the database */ void *map_ptr; /* where it is currently mapped */ @@ -198,8 +205,12 @@ struct tdb_context { int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ int lockrecs_array_length; + + struct tdb_mutexes *mutexes; /* mmap of the mutex area */ + enum TDB_ERROR ecode; /* error code for last tdb error */ uint32_t hash_size; + uint32_t feature_flags; uint32_t flags; /* the flags passed to tdb_open */ struct tdb_traverse_lock travlocks; /* current traversal locks */ struct tdb_context *next; /* all tdbs to avoid multiple opens */ @@ -292,4 +303,21 @@ bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret); /* tdb_off_t and tdb_len_t right now are both uint32_t */ #define tdb_add_len_t tdb_add_off_t + +bool tdb_mutex_locking_supported(void); +size_t tdb_mutex_size(struct tdb_context *tdb); +bool tdb_have_mutexes(struct tdb_context *tdb); +int tdb_mutex_init(struct tdb_context *tdb); +int tdb_mutex_mmap(struct tdb_context *tdb); +int tdb_mutex_munmap(struct tdb_context *tdb); +bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, + bool waitflag, int *pret); +bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, + int *pret); +int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags); +int tdb_mutex_allrecord_unlock(struct tdb_context *tdb); +int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb); +int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb); + #endif /* TDB_PRIVATE_H */ diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c index a2c3bbd..caef0be 100644 --- a/lib/tdb/common/transaction.c +++ b/lib/tdb/common/transaction.c @@ -421,7 +421,8 @@ static int _tdb_transaction_start(struct tdb_context *tdb, enum tdb_lock_flags lockflags) { /* some sanity checks */ - if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { + if (tdb->read_only || (tdb->flags & (TDB_INTERNAL|TDB_MUTEX_LOCKING)) + || tdb->traverse_read) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); tdb->ecode = TDB_ERR_EINVAL; return -1; diff --git a/lib/tdb/docs/mutex.txt b/lib/tdb/docs/mutex.txt new file mode 100644 index 0000000..2597987 --- /dev/null +++ b/lib/tdb/docs/mutex.txt @@ -0,0 +1,106 @@ +Tdb is a hashtable database with multiple concurrent writer and external +record lock support. For speed reasons, wherever possible tdb uses a shared +memory mapped area for data access. In its currently released form, it uses +fcntl byte-range locks to coordinate access to the data itself. + +The tdb data is organized as a hashtable. Hash collision are dealt with by +forming a linked list of records that share a hash value. The individual +linked lists are protected across processes with 1-byte fcntl locks on the +starting pointer of the linked list representing a hash value. + +The external locking API of tdb allows to lock individual records. Instead of +really locking individual records, the tdb API locks a complete linked list +with a fcntl lock. + +The external locking API of tdb also allows to lock the complete database, and +ctdb uses this facility to freeze databases during a recovery. While the +so-called allrecord lock is held, all linked lists and all individual records +are frozen alltogether. Tdb achieves this by locking the complete file range +with a single fcntl lock. Individual 1-byte locks for the linked lists +conflict with this. Access to records is prevented by the one large fnctl byte +range lock. + +Fcntl locks have been chosen for tdb for two reasons: First they are portable +across all current unixes. Secondly they provide auto-cleanup. If a process +dies while holding a fcntl lock, the lock is given up as if it was explicitly +unlocked. Thus fcntl locks provide a very robust locking scheme, if a process +dies for any reason the database will not stay blocked until reboot. This +robustness is very important for long-running services, a reboot is not an +option for most users of tdb. + +Unfortunately during stress testing fcntl locks have turned out to be a major +problem for performance. The particular problem that was seen happens when +ctdb on a busy server does a recovery. A recovery means that ctdb has to +freeze all tdb databases for some time, usually a few seconds. This is done +with the allrecord lock. During the recovery phase on a busy server many smbd +processes try to access the tdb file with blocking fcntl calls. The specific +test in question easily reproduces 7,000 processes piling up waiting for +1-byte fcntl locks. When ctdb is done with the recovery, it gives up the +allrecord lock, covering the whole file range. All 7,000 processes waiting for +1-byte fcntl locks are woken up, trying to acquire their lock. The special +implementation of fcntl locks in Linux (up to 2013-02-12 at least) protects +all fcntl lock operations with a single system-wide spinlock. If 7,000 process +waiting for the allrecord lock to become released this leads to a thundering +herd condition, all CPUs are spinning on that single spinlock. + +Functionally the kernel is fine, eventually the thundering herd slows down and +every process correctly gets his share and locking range, but the performance +of the system while the herd is active is worse than expected. + +The thundering herd is only the worst case scenario for fcntl lock use. The +single spinlock for fcntl operations is also a performance penalty for normal +operations. In the cluster case, every read and write SMB request has to do +two fcntl calls to provide correct SMB mandatory locks. The single spinlock +is one source of serialization for the SMB read/write requests, limiting the +parallelism that can be achieved in a multi-core system. + +While trying to tune his servers, Ira Cooper, Samba Team member, found fcntl +locks to be a problem on Solaris as well. Ira pointed out that there is a +potential alternative locking mechanism that might be more scalable: Process +shared robust mutexes, as defined by Posix 2008 for example via + +http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setpshared.html +http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutexattr_setrobust.html + +Pthread mutexes provide one of the core mechanisms in posix threads to protect +in-process data structures from concurrent access by multiple threads. In the +Linux implementation, a pthread_mutex_t is represented by a data structure in +user space that requires no kernel calls in the uncontended case for locking +and unlocking. Locking and unlocking in the uncontended case is implemented +purely in user space with atomic CPU instructions and thus are very fast. + +The setpshared functions indicate to the kernel that the mutex is about to be +shared between processes in a common shared memory area. + +The process shared posix mutexes have the potential to replace fcntl locking +to coordinate mmap access for tdbs. However, they are missing the criticial +auto-cleanup property that fcntl provides when a process dies. A process that +dies hard while holding a shared mutex has no chance to clean up the protected +data structures and unlock the shared mutex. Thus with a pure process shared +mutex the mutex will remain locked forever until the data structures are +re-initialized from scratch. + +With the robust mutexes defined by Posix the process shared mutexes have been +extended with a limited auto-cleanup property. If a mutex has been declared +robust, when a process exits while holding that mutex, the next process trying +to lock the mutex will get the special error message EOWNERDEAD. This informs +the caller that the data structures the mutex protects are potentially corrupt +and need to be cleaned up. + +The error message EOWNERDEAD when trying to lock a mutex is an extension over +the fcntl functionality. A process that does a blocking fcntl lock call is not +informed about whether the lock was explicitly freed by a process still alive +or due to an unplanned process exit. At the time of this writing (February +2013), at least Linux and OpenSolaris also implement the robustness feature of +process-shared mutexes. + +Converting the tdb locking mechanism from fcntl to mutexes has to take care of +both types of locks that are used on tdb files. + +The easy part is to use mutexes to replace the 1-byte linked list locks +covering the individual hashes. Those can be represented by a mutex each. + +Covering the allrecord lock is more difficult. The allrecord lock uses a fcntl +lock spanning all hash list locks simultaneously. This basic functionality is +not easily possible with mutexes. A mutex carries 1 bit of information, a +fcntl lock can carry an arbitrary amount of information. diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h index a34f089..15c800e 100644 --- a/lib/tdb/include/tdb.h +++ b/lib/tdb/include/tdb.h @@ -80,6 +80,9 @@ extern "C" { #define TDB_ALLOW_NESTING 512 /** Allow transactions to nest */ #define TDB_DISALLOW_NESTING 1024 /** Disallow transactions to nest */ #define TDB_INCOMPATIBLE_HASH 2048 /** Better hashing: can't be opened by tdb < 1.2.6. */ +#define TDB_MUTEX_LOCKING 4096 /** optimized locking using robust mutexes if supported, + only with tdb >= 1.3.0 and + TDB_CLEAR_IF_FIRST or TDB_NOLOCK */ /** The tdb error codes */ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, @@ -143,6 +146,11 @@ struct tdb_logging_context { * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n + * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n + * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, + * can't be opened by tdb < 1.3.0. + * Only valid in combination with TDB_CLEAR_IF_FIRST + * or TDB_NOLOCK\n * * @param[in] open_flags Flags for the open(2) function. * @@ -179,6 +187,11 @@ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, * default 5.\n * TDB_ALLOW_NESTING - Allow transactions to nest.\n * TDB_DISALLOW_NESTING - Disallow transactions to nest.\n + * TDB_INCOMPATIBLE_HASH - Better hashing: can't be opened by tdb < 1.2.6.\n + * TDB_MUTEX_LOCKING - Optimized locking using robust mutexes if supported, + * can't be opened by tdb < 1.3.0. + * Only valid in combination with TDB_CLEAR_IF_FIRST + * or TDB_NOLOCK\n * * @param[in] open_flags Flags for the open(2) function. * @@ -842,6 +855,25 @@ int tdb_rescue(struct tdb_context *tdb, void (*walk) (TDB_DATA key, TDB_DATA data, void *private_data), void *private_data); +/** + * @brief Check if if support for TDB_MUTEX_LOCKING is available at runtime. + * + * The feature behind TDB_MUTEX_LOCKING is available on all systems. + * On some systems the API for pthread_mutexattr_setrobust() is not available. + * On other systems there are some bugs in the interaction between glibc and + * the linux kernel. + * + * This function provides a runtime check if robust mutexes are really + * available. + * + * @note This calls fork(), but the SIGCHILD handling should be transparent. + * + * @return true if supported, false otherwise. + * + * @see TDB_MUTEX_LOCKING + */ +bool tdb_runtime_check_for_robust_mutexes(void); + /* @} ******************************************************************/ /* Low level locking functions: use with care */ diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c index 67fd54f..748c972 100644 --- a/lib/tdb/test/run-3G-file.c +++ b/lib/tdb/test/run-3G-file.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" @@ -22,12 +23,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb, return -1; } - if (ftruncate(tdb->fd, size+addition) == -1) { + if (tdb_ftruncate(tdb, size+addition) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); } if (written == 0) { /* again - give up, guessing errno */ diff --git a/lib/tdb/test/run-bad-tdb-header.c b/lib/tdb/test/run-bad-tdb-header.c index b00fb89..9d29fdf 100644 --- a/lib/tdb/test/run-bad-tdb-header.c +++ b/lib/tdb/test/run-bad-tdb-header.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-check.c b/lib/tdb/test/run-check.c index b275691..ce389a2 100644 --- a/lib/tdb/test/run-check.c +++ b/lib/tdb/test/run-check.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-corrupt.c b/lib/tdb/test/run-corrupt.c index 93eae42..e6fc751 100644 --- a/lib/tdb/test/run-corrupt.c +++ b/lib/tdb/test/run-corrupt.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-die-during-transaction.c b/lib/tdb/test/run-die-during-transaction.c index 9b90415..c636d87 100644 --- a/lib/tdb/test/run-die-during-transaction.c +++ b/lib/tdb/test/run-die-during-transaction.c @@ -19,6 +19,7 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-endian.c b/lib/tdb/test/run-endian.c index 3116f7d..9d4d5f5 100644 --- a/lib/tdb/test/run-endian.c +++ b/lib/tdb/test/run-endian.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-incompatible.c b/lib/tdb/test/run-incompatible.c index af01ca6..b8e95b5 100644 --- a/lib/tdb/test/run-incompatible.c +++ b/lib/tdb/test/run-incompatible.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-nested-transactions.c b/lib/tdb/test/run-nested-transactions.c index bf08e55..864adf2 100644 --- a/lib/tdb/test/run-nested-transactions.c +++ b/lib/tdb/test/run-nested-transactions.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-nested-traverse.c b/lib/tdb/test/run-nested-traverse.c index 361dc2e..22ee3e2 100644 --- a/lib/tdb/test/run-nested-traverse.c +++ b/lib/tdb/test/run-nested-traverse.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl #include diff --git a/lib/tdb/test/run-no-lock-during-traverse.c b/lib/tdb/test/run-no-lock-during-traverse.c index b5e31dc..737a32f 100644 --- a/lib/tdb/test/run-no-lock-during-traverse.c +++ b/lib/tdb/test/run-no-lock-during-traverse.c @@ -13,6 +13,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-oldhash.c b/lib/tdb/test/run-oldhash.c index 535336c..aaee6f6 100644 --- a/lib/tdb/test/run-oldhash.c +++ b/lib/tdb/test/run-oldhash.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-open-during-transaction.c b/lib/tdb/test/run-open-during-transaction.c index 04ba956..1605376 100644 --- a/lib/tdb/test/run-open-during-transaction.c +++ b/lib/tdb/test/run-open-during-transaction.c @@ -20,6 +20,7 @@ static int ftruncate_check(int fd, off_t length); #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include diff --git a/lib/tdb/test/run-readonly-check.c b/lib/tdb/test/run-readonly-check.c index e518532..c5e0f7d 100644 --- a/lib/tdb/test/run-readonly-check.c +++ b/lib/tdb/test/run-readonly-check.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rescue-find_entry.c b/lib/tdb/test/run-rescue-find_entry.c index 25f4f1c..5d6f8f7 100644 --- a/lib/tdb/test/run-rescue-find_entry.c +++ b/lib/tdb/test/run-rescue-find_entry.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rescue.c b/lib/tdb/test/run-rescue.c index 7c806a4..e43f53b 100644 --- a/lib/tdb/test/run-rescue.c +++ b/lib/tdb/test/run-rescue.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/rescue.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-rwlock-check.c b/lib/tdb/test/run-rwlock-check.c index 8b8072d..2ac9dc3 100644 --- a/lib/tdb/test/run-rwlock-check.c +++ b/lib/tdb/test/run-rwlock-check.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-summary.c b/lib/tdb/test/run-summary.c index 2231284..8b9a1a0 100644 --- a/lib/tdb/test/run-summary.c +++ b/lib/tdb/test/run-summary.c @@ -10,6 +10,7 @@ #include "../common/check.c" #include "../common/hash.c" #include "../common/summary.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-transaction-expand.c b/lib/tdb/test/run-transaction-expand.c index ddf1f24..d36b894 100644 --- a/lib/tdb/test/run-transaction-expand.c +++ b/lib/tdb/test/run-transaction-expand.c @@ -37,6 +37,7 @@ static inline int fake_fdatasync(int fd) #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run-traverse-in-transaction.c b/lib/tdb/test/run-traverse-in-transaction.c index 48194b8..17d6412 100644 --- a/lib/tdb/test/run-traverse-in-transaction.c +++ b/lib/tdb/test/run-traverse-in-transaction.c @@ -11,6 +11,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #undef fcntl_with_lockcheck #include diff --git a/lib/tdb/test/run-wronghash-fail.c b/lib/tdb/test/run-wronghash-fail.c index 9c78fc5..c44b0f5 100644 --- a/lib/tdb/test/run-wronghash-fail.c +++ b/lib/tdb/test/run-wronghash-fail.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include diff --git a/lib/tdb/test/run-zero-append.c b/lib/tdb/test/run-zero-append.c index a2324c4..f9eba1b 100644 --- a/lib/tdb/test/run-zero-append.c +++ b/lib/tdb/test/run-zero-append.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/test/run.c b/lib/tdb/test/run.c index f61fcf6..c744c4d 100644 --- a/lib/tdb/test/run.c +++ b/lib/tdb/test/run.c @@ -9,6 +9,7 @@ #include "../common/open.c" #include "../common/check.c" #include "../common/hash.c" +#include "../common/mutex.c" #include "tap-interface.h" #include #include "logging.h" diff --git a/lib/tdb/wscript b/lib/tdb/wscript index 7019693..6243ccf 100644 --- a/lib/tdb/wscript +++ b/lib/tdb/wscript @@ -1,7 +1,7 @@ #!/usr/bin/env python APPNAME = 'tdb' -VERSION = '1.2.13' +VERSION = '1.3.0' blddir = 'bin' @@ -46,6 +46,10 @@ def set_options(opt): opt.BUILTIN_DEFAULT('replace') opt.PRIVATE_EXTENSION_DEFAULT('tdb', noextension='tdb') opt.RECURSE('lib/replace') + opt.add_option('--disable-tdb-mutex-locking', + help=("Disable the use of pthread robust mutexes"), + action="store_true", dest='disable_tdb_mutex_locking', + default=False) if opt.IN_LAUNCH_DIR(): opt.add_option('--disable-python', help=("disable the pytdb module"), @@ -53,6 +57,11 @@ def set_options(opt): def configure(conf): + conf.env.disable_tdb_mutex_locking = getattr(Options.options, + 'disable_tdb_mutex_locking', + False) + if not conf.env.disable_tdb_mutex_locking: + conf.env.replace_add_global_pthread = True conf.RECURSE('lib/replace') conf.env.standalone_tdb = conf.IN_LAUNCH_DIR() @@ -68,6 +77,11 @@ def configure(conf): conf.env.disable_python = getattr(Options.options, 'disable_python', False) + if (conf.CONFIG_SET('HAVE_ROBUST_MUTEXES') and + conf.env.building_tdb and + not conf.env.disable_tdb_mutex_locking): + conf.define('USE_TDB_MUTEX_LOCKING', 1) + conf.CHECK_XSLTPROC_MANPAGES() if not conf.env.disable_python: @@ -87,10 +101,12 @@ def configure(conf): def build(bld): bld.RECURSE('lib/replace') - COMMON_SRC = bld.SUBDIR('common', - '''check.c error.c tdb.c traverse.c - freelistcheck.c lock.c dump.c freelist.c - io.c open.c transaction.c hash.c summary.c rescue.c''') + COMMON_FILES='''check.c error.c tdb.c traverse.c + freelistcheck.c lock.c dump.c freelist.c + io.c open.c transaction.c hash.c summary.c rescue.c + mutex.c''' + + COMMON_SRC = bld.SUBDIR('common', COMMON_FILES) if bld.env.standalone_tdb: bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' @@ -99,9 +115,15 @@ def build(bld): private_library = True if not bld.CONFIG_SET('USING_SYSTEM_TDB'): + + tdb_deps = 'replace' + + if bld.CONFIG_SET('USE_TDB_MUTEX_LOCKING'): + tdb_deps += ' pthread' + bld.SAMBA_LIBRARY('tdb', COMMON_SRC, - deps='replace', + deps=tdb_deps, includes='include', abi_directory='ABI', abi_match='tdb_*', @@ -137,7 +159,7 @@ def build(bld): # FIXME: This hardcoded list is stupid, stupid, stupid. bld.SAMBA_SUBSYSTEM('tdb-test-helpers', 'test/external-agent.c test/lock-tracking.c test/logging.c', - 'replace', + tdb_deps, includes='include') for t in tdb1_unit_tests: -- 1.7.9.5 From 2fb4381a9640e0193f97e77516ecc9f59cc52c66 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 3 Feb 2014 14:37:34 +0100 Subject: [PATCH 25/49] SQ tdb_open_ex require CLEAR_IF_FIRST with MUTEX_LOCKING we only need this if we don't support mutexes. --- lib/tdb/common/open.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 13d22e1..60034fc 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -350,6 +350,19 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td goto fail; } + if ((tdb->flags & TDB_MUTEX_LOCKING) && + !(tdb->flags & TDB_CLEAR_IF_FIRST)) + { + /* + * Here we catch bugs in the callers, + * the runtime check for existing tdb's comes later. + */ + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: invalid flags for %s - " + "TDB_MUTEX_LOCKING requires TDB_CLEAR_IF_FIRST\n", name)); + errno = EINVAL; + goto fail; + } + if (hash_fn) { tdb->hash_fn = hash_fn; hash_alg = "the user defined"; -- 1.7.9.5 From 7f3af3a9144bb3f055467bd4e4489f5038812f78 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 3 Feb 2014 15:35:25 +0100 Subject: [PATCH 26/49] SQ FIX deadlock... TODO: do we need a tdb_has_chain_mutexes() and check each lock against the mutex range? This fixes https://git.samba.org/?p=metze/samba/wip.git;a=commitdiff;h=5f68025f4b417b6027cb480bd5dfa5f5d8480d50 --- lib/tdb/common/mutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index c158b6c..9644aca 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -202,7 +202,7 @@ again: goto fail; } - if (tdb->num_lockrecs > 0) { + if (tdb_have_extra_locks(tdb)) { /* * We can only check the allrecord lock once. If we do it with * one chain mutex locked, we will deadlock with the allrecord -- 1.7.9.5 From e2ff02ebee2c04440c90dca2b157dfc5e038ae7f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 29 Jan 2014 15:42:19 +0100 Subject: [PATCH 27/49] SQ revert/fix comment --- lib/tdb/common/mutex.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index 9644aca..5a54281 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -208,16 +208,16 @@ again: * one chain mutex locked, we will deadlock with the allrecord * locker process in the following way: We lock the first hash * chain, we check for the allrecord lock. We keep the hash - * chain locked. Then the allrecord locker comes and takes the - * allrecord lock. It walks the list of chain mutexes, locking - * them all in sequence. Meanwhile, we have the chain mutex - * locked, so the allrecord locker blocks trying to lock our - * chain mutex. Then we come in and try to lock the second + * chain locked. Then the allrecord locker locks the + * allrecord_mutex. It walks the list of chain mutexes, + * locking them all in sequence. Meanwhile, we have the chain + * mutex locked, so the allrecord locker blocks trying to lock + * our chain mutex. Then we come in and try to lock the second * chain lock, which in most cases will be the freelist. We * see that the allrecord lock is locked and put ourselves on - * the allrecord_waiters condition variable. This will never - * be signalled though because the allrecord locker waits for - * us to give up the chain lock. + * the allrecord_mutex. This will never be signalled though + * because the allrecord locker waits for us to give up the + * chain lock. */ *pret = 0; -- 1.7.9.5 From 43a3b284c7f14a92363a3b5c38992bf59555a623 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Feb 2014 18:48:02 +0100 Subject: [PATCH 28/49] SQ comments about m->allrecord_mutex --- lib/tdb/common/mutex.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index 5a54281..e353804 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -382,6 +382,13 @@ int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb) return 0; } + /* + * Our only caller tdb_allrecord_upgrade() + * garantees that we already own the allrecord lock. + * + * Which means m->allrecord_mutex is still locked by us. + */ + if (m->allrecord_lock != F_RDLCK) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", (int)m->allrecord_lock)); @@ -420,6 +427,13 @@ int tdb_mutex_allrecord_downgrade(struct tdb_context *tdb) { struct tdb_mutexes *m = tdb->mutexes; + /* + * Our only caller tdb_allrecord_upgrade() (in the error case) + * garantees that we already own the allrecord lock. + * + * Which means m->allrecord_mutex is still locked by us. + */ + if (m->allrecord_lock != F_WRLCK) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", (int)m->allrecord_lock)); @@ -441,6 +455,14 @@ int tdb_mutex_allrecord_unlock(struct tdb_context *tdb) return 0; } + /* + * Our only callers tdb_allrecord_unlock() and + * tdb_allrecord_lock() (in the error path) + * garantee that we already own the allrecord lock. + * + * Which means m->allrecord_mutex is still locked by us. + */ + if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n", (int)m->allrecord_lock)); -- 1.7.9.5 From d6352ed573ec40c19cad970a6b55a60d3c80ccd1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 11 Apr 2014 21:06:08 +0200 Subject: [PATCH 29/49] SQ common/mutex.c: make sure the free list mutex/lock is independent of the allrecord lock --- lib/tdb/common/mutex.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index e353804..100ffcd 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -202,6 +202,16 @@ again: goto fail; } + if (idx == 0) { + /* + * This is a freelist lock, which is independent to + * the allrecord lock. So we're done once we got the + * freelist mutex. + */ + *pret = 0; + return true; + } + if (tdb_have_extra_locks(tdb)) { /* * We can only check the allrecord lock once. If we do it with -- 1.7.9.5 From aafc5a8b175689b5a501592bb369851e46df321a Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 3 Feb 2014 16:44:45 +0100 Subject: [PATCH 30/49] SQ tdb_have_mutex_chainlocks() --- lib/tdb/common/mutex.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index 100ffcd..8b73adf 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -136,6 +136,33 @@ static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len, return true; } +static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb) +{ + size_t i; + + for (i=0; i < tdb->num_lockrecs; i++) { + bool ret; + unsigned idx; + + ret = tdb_mutex_index(tdb, + tdb->lockrecs[i].off, + tdb->lockrecs[i].count, + &idx); + if (!ret) { + continue; + } + + if (idx == 0) { + /* this is the freelist mutex */ + continue; + } + + return true; + } + + return false; +} + static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag) { int ret; @@ -212,7 +239,7 @@ again: return true; } - if (tdb_have_extra_locks(tdb)) { + if (tdb_have_mutex_chainlocks(tdb)) { /* * We can only check the allrecord lock once. If we do it with * one chain mutex locked, we will deadlock with the allrecord -- 1.7.9.5 From 28aeaba818136517a1fa9bdfafef82dfc7e473ff Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 11 Apr 2014 22:19:08 +0200 Subject: [PATCH 31/49] SQ fix tdb_mutex_allrecord_lock() without TDB_LOCK_WAIT --- lib/tdb/common/mutex.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index 8b73adf..1dc6a62 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -345,6 +345,7 @@ int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, struct tdb_mutexes *m = tdb->mutexes; int ret; uint32_t i; + bool waitflag = (flags & TDB_LOCK_WAIT); if (tdb->flags & TDB_NOLOCK) { return 0; @@ -354,7 +355,11 @@ int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, return 0; } - ret = allrecord_mutex_lock(m, flags & TDB_LOCK_WAIT); + ret = allrecord_mutex_lock(m, waitflag); + if (!waitflag && (ret == EBUSY)) { + errno = EAGAIN; + return -1; + } if (ret != 0) { if (!(flags & TDB_LOCK_PROBE)) { TDB_LOG((tdb, TDB_DEBUG_TRACE, "pthread_mutex_trylock " -- 1.7.9.5 From 7c30ce2d5eaf5ec92b96ed65d3a29406b8b3adc6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Feb 2014 23:35:53 +0100 Subject: [PATCH 32/49] SQ TODO TDB_SUPPORTED_FEATURE_FLAGS --- lib/tdb/common/open.c | 7 +++++++ lib/tdb/common/tdb_private.h | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 60034fc..161c6e3 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -528,6 +528,13 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td tdb->feature_flags = header.feature_flags; } + if (tdb->feature_flags & ~TDB_SUPPORTED_FEATURE_FLAGS) { +//TODO + TDB_LOG((tdb, TDB_DEBUG_ERROR, "Unsupported features...\n")); + errno = EINVAL; + goto fail; + } + if (tdb_mutex_size(tdb) != header.mutex_size) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "Mutex size changed\n")); errno = EINVAL; diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index 9790552..97f98d4 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -69,7 +69,11 @@ typedef uint32_t tdb_off_t; #define TDB_PAD_BYTE 0x42 #define TDB_PAD_U32 0x42424242 -#define TDB_FEATURE_FLAG_MUTEX 1 +#define TDB_FEATURE_FLAG_MUTEX 0x00000001 + +#define TDB_SUPPORTED_FEATURE_FLAGS (\ + TDB_FEATURE_FLAG_MUTEX \ + ) /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style -- 1.7.9.5 From 1a97a7e94be0d5438d373b3d37ec22fd93964d2f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 5 Feb 2014 11:30:27 +0100 Subject: [PATCH 33/49] SQ TODO TDB_MUTEX_LOCKING => tdb_jenkins_hash TODO let TDB_MUTEX_LOCKING imply TDB_INCOMPATIBLE_HASH --- lib/tdb/common/open.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 161c6e3..253830e 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -370,6 +370,8 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td /* This controls what we use when creating a tdb. */ if (tdb->flags & TDB_INCOMPATIBLE_HASH) { tdb->hash_fn = tdb_jenkins_hash; + } else if (tdb->flags & TDB_MUTEX_LOCKING) { + tdb->hash_fn = tdb_jenkins_hash; } else { tdb->hash_fn = tdb_old_hash; } -- 1.7.9.5 From f15a56197e339049fb0a4d133f0c18bb663ed5af Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 34/49] tdb/tools: add -m option to tdbtorture This allows tdbtorture to run with mutexes. Reviewed-by: Stefan Metzmacher --- lib/tdb/tools/tdbtorture.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/tdb/tools/tdbtorture.c b/lib/tdb/tools/tdbtorture.c index 5ae08f6..3e26f65 100644 --- a/lib/tdb/tools/tdbtorture.c +++ b/lib/tdb/tools/tdbtorture.c @@ -33,6 +33,7 @@ static int always_transaction = 0; static int hash_size = 2; static int loopnum; static int count_pipe; +static bool mutex = false; static struct tdb_logging_context log_ctx; #ifdef PRINTF_ATTRIBUTE @@ -119,6 +120,7 @@ static void addrec_db(void) #if TRANSACTION_PROB if (in_transaction == 0 && + ((tdb_get_flags(db) & TDB_MUTEX_LOCKING) == 0) && (always_transaction || random() % TRANSACTION_PROB == 0)) { if (tdb_transaction_start(db) != 0) { fatal("tdb_transaction_start failed"); @@ -216,7 +218,7 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, static void usage(void) { - printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); + printf("Usage: tdbtorture [-t] [-k] [-m] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); exit(0); } @@ -230,7 +232,13 @@ static void send_count_and_suicide(int sig) static int run_child(const char *filename, int i, int seed, unsigned num_loops, unsigned start) { - db = tdb_open_ex(filename, hash_size, TDB_DEFAULT, + int tdb_flags = TDB_DEFAULT|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH; + + if (mutex) { + tdb_flags |= TDB_MUTEX_LOCKING; + } + + db = tdb_open_ex(filename, hash_size, tdb_flags, O_RDWR | O_CREAT, 0600, &log_ctx, NULL); if (!db) { fatal("db open failed"); @@ -302,7 +310,7 @@ int main(int argc, char * const *argv) log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) { + while ((c = getopt(argc, argv, "n:l:s:H:thkm")) != -1) { switch (c) { case 'n': num_procs = strtol(optarg, NULL, 0); @@ -322,6 +330,13 @@ int main(int argc, char * const *argv) case 'k': kill_random = 1; break; + case 'm': + mutex = tdb_runtime_check_for_robust_mutexes(); + if (!mutex) { + printf("tdb_runtime_check_for_robust_mutexes() returned false\n"); + exit(1); + } + break; default: usage(); } @@ -443,7 +458,13 @@ int main(int argc, char * const *argv) done: if (error_count == 0) { - db = tdb_open_ex(test_tdb, hash_size, TDB_DEFAULT, + int tdb_flags = TDB_DEFAULT; + + if (mutex) { + tdb_flags |= TDB_NOLOCK; + } + + db = tdb_open_ex(test_tdb, hash_size, tdb_flags, O_RDWR, 0, &log_ctx, NULL); if (!db) { fatal("db open failed\n"); -- 1.7.9.5 From 1e1e904e7b70263beaaecc58ccff14dc2acfb3eb Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: [PATCH 35/49] tdb/test: add mutex related tests --- lib/tdb/test/run-mutex-allrecord-bench.c | 75 ++++++++ lib/tdb/test/run-mutex-allrecord-block.c | 101 +++++++++++ lib/tdb/test/run-mutex-allrecord-trylock.c | 97 ++++++++++ lib/tdb/test/run-mutex-die.c | 266 ++++++++++++++++++++++++++++ lib/tdb/test/run-mutex-openflags2.c | 94 ++++++++++ lib/tdb/test/run-mutex-trylock.c | 106 +++++++++++ lib/tdb/test/run-mutex1.c | 120 +++++++++++++ lib/tdb/wscript | 9 +- 8 files changed, 867 insertions(+), 1 deletion(-) create mode 100644 lib/tdb/test/run-mutex-allrecord-bench.c create mode 100644 lib/tdb/test/run-mutex-allrecord-block.c create mode 100644 lib/tdb/test/run-mutex-allrecord-trylock.c create mode 100644 lib/tdb/test/run-mutex-die.c create mode 100644 lib/tdb/test/run-mutex-openflags2.c create mode 100644 lib/tdb/test/run-mutex-trylock.c create mode 100644 lib/tdb/test/run-mutex1.c diff --git a/lib/tdb/test/run-mutex-allrecord-bench.c b/lib/tdb/test/run-mutex-allrecord-bench.c new file mode 100644 index 0000000..5b816b3 --- /dev/null +++ b/lib/tdb/test/run-mutex-allrecord-bench.c @@ -0,0 +1,75 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static double timeval_elapsed2(const struct timeval *tv1, const struct timeval *tv2) +{ + return (tv2->tv_sec - tv1->tv_sec) + + (tv2->tv_usec - tv1->tv_usec)*1.0e-6; +} + +static double timeval_elapsed(const struct timeval *tv) +{ + struct timeval tv2; + gettimeofday(&tv2, NULL); + return timeval_elapsed2(tv, &tv2); +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + struct timeval start; + double elapsed; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + tdb = tdb_open_ex("mutex-allrecord-bench.tdb", 1000000, + TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + gettimeofday(&start, NULL); + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + elapsed = timeval_elapsed(&start); + + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + printf("allrecord_lock took %f seconds\n", elapsed); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-allrecord-block.c b/lib/tdb/test/run-mutex-allrecord-block.c new file mode 100644 index 0000000..52ead07b --- /dev/null +++ b/lib/tdb/test/run-mutex-allrecord-block.c @@ -0,0 +1,101 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-allrecord-block.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + write(fd, &c, sizeof(c)); + + system("/bin/sleep 99999"); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-allrecord-block.tdb", 0, + tdb_flags, O_RDWR|O_CREAT, 0755, + &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock (nowait) should succeed\n"); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-allrecord-trylock.c b/lib/tdb/test/run-mutex-allrecord-trylock.c new file mode 100644 index 0000000..1ae1512 --- /dev/null +++ b/lib/tdb/test/run-mutex-allrecord-trylock.c @@ -0,0 +1,97 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); + ok(ret == -1, "tdb_allrecord_lock (nowait) not should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-die.c b/lib/tdb/test/run-mutex-die.c new file mode 100644 index 0000000..6e1c40d --- /dev/null +++ b/lib/tdb/test/run-mutex-die.c @@ -0,0 +1,266 @@ +#include "../common/tdb_private.h" +#include "lock-tracking.h" +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include "external-agent.h" +#include "logging.h" + +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +static int target, current; +#define TEST_DBNAME "run-mutex-die.tdb" +#define KEY_STRING "helloworld" + +static void maybe_die(int fd) +{ + if (target == 0) { + return; + } + current += 1; + if (current == target) { + _exit(1); + } +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + ssize_t ret; + + maybe_die(fd); + + ret = pwrite(fd, buf, count, offset); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + maybe_die(fd); + + ret = write(fd, buf, count); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static int ftruncate_check(int fd, off_t length) +{ + int ret; + + maybe_die(fd); + + ret = ftruncate(fd, length); + + maybe_die(fd); + return ret; +} + +static enum agent_return flakey_ops(struct agent *a, enum operation op, + TDB_DATA key) +{ + enum agent_return ret; + + /* + * Run in the external agent child + */ + + ret = external_agent_operation(a, OPEN, TEST_DBNAME); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to open: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, UNMAP, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to unmap: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, STORE, "xyz"); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed to store: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, TRANSACTION_START, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed transaction_start: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, STORE, (char *)key.dptr); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed store: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, TRANSACTION_COMMIT, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed commit: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, FETCH, KEY_STRING); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed find key: %s\n", + agent_return_name(ret)); + return ret; + } + ret = external_agent_operation(a, PING, ""); + if (ret != SUCCESS) { + fprintf(stderr, "Agent failed ping: %s\n", + agent_return_name(ret)); + return ret; + } + return ret; +} + +static void prep_db(TDB_DATA key) { + struct tdb_context *tdb; + TDB_DATA data; + + data.dptr = (uint8_t *)"foo"; + data.dsize = strlen((char *)data.dptr); + + unlink(TEST_DBNAME); + + tdb = tdb_open_ex( + TEST_DBNAME, 2, + TDB_INCOMPATIBLE_HASH|TDB_MUTEX_LOCKING|TDB_CLEAR_IF_FIRST, + O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL); + + if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { + return; + } + + tdb_close(tdb); + tdb = NULL; + + forget_locking(); +} + +static bool test_db(void) { + struct tdb_context *tdb; + int ret; + + tdb = tdb_open_ex( + TEST_DBNAME, 1024, TDB_INCOMPATIBLE_HASH, + O_RDWR, 0600, &taplogctx, NULL); + + if (tdb == NULL) { + perror("tdb_open_ex failed"); + return false; + } + + ret = tdb_traverse(tdb, NULL, NULL); + if (ret == -1) { + perror("traverse failed"); + goto fail; + } + + tdb_close(tdb); + + forget_locking(); + + return true; + +fail: + tdb_close(tdb); + return false; +} + +static bool test_one(enum operation op, TDB_DATA key) +{ + enum agent_return ret; + + ret = AGENT_DIED; + target = 19; + + while (ret != SUCCESS) { + struct agent *agent; + + { + int child_target = target; + target = 0; + prep_db(key); + target = child_target; + } + + agent = prepare_external_agent(); + + ret = flakey_ops(agent, STORE, key); + + printf("Agent (target=%d) returns %s\n", target, + agent_return_name(ret)); + + shutdown_agent(agent); + + { + int child_target = target; + target = 0; + if (!test_db()) { + return false; + } + target = child_target; + } + + target += 1; + } + + return true; +} + +int main(int argc, char *argv[]) +{ + enum operation ops[] = { FETCH }; + TDB_DATA key; + int i; + + plan_tests(12); + unlock_callback = maybe_die; + + key.dsize = strlen(KEY_STRING); + key.dptr = (void *)KEY_STRING; + + for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { + enum agent_return ret; + diag("Testing %s after death\n", operation_name(ops[i])); + fflush(stdout); + ret = test_one(ops[i], key); + } + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c new file mode 100644 index 0000000..a049dbc --- /dev/null +++ b/lib/tdb/test/run-mutex-openflags2.c @@ -0,0 +1,94 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + char c; + + read(fd, &c, 1); + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_CLEAR_IF_FIRST, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), + "tdb_open_ex without mutexes should fail with EINVAL\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefd[2]; + char c = 0; + + ret = pipe(pipefd); + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without " + "TDB_CLEAR_IF_FIRST should fail with EINVAL\n"); + + child = fork(); + if (child == 0) { + return do_child(pipefd[0]); + } + + tdb = tdb_open_ex("mutex-openflags2.tdb", 0, + TDB_INCOMPATIBLE_HASH| + TDB_CLEAR_IF_FIRST| + TDB_MUTEX_LOCKING, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex with mutexes should succeed\n"); + + write(pipefd[1], &c, 1); + + wait_ret = wait(&status); + ok((wait_ret == child) && (status == 0), + "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex-trylock.c b/lib/tdb/test/run-mutex-trylock.c new file mode 100644 index 0000000..a3ec440 --- /dev/null +++ b/lib/tdb/test/run-mutex-trylock.c @@ -0,0 +1,106 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); + + read(pipefds[0], &c, sizeof(c)); + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == 0, "tdb_chainlock_nonblock should succeed\n"); + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/test/run-mutex1.c b/lib/tdb/test/run-mutex1.c new file mode 100644 index 0000000..3753fef --- /dev/null +++ b/lib/tdb/test/run-mutex1.c @@ -0,0 +1,120 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include + +static TDB_DATA key, data; + +static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static int do_child(int tdb_flags, int fd) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret; + char c = 0; + + tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_chainunlock(tdb, key); + ok(ret == 0, "tdb_chainunlock should succeed\n"); + + poll(NULL, 0, 1000); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + write(fd, &c, sizeof(c)); + + poll(NULL, 0, 1000); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + return 0; +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + unsigned int log_count; + struct tdb_logging_context log_ctx = { log_fn, &log_count }; + int ret, status; + pid_t child, wait_ret; + int pipefds[2]; + char c; + int tdb_flags; + + key.dsize = strlen("hi"); + key.dptr = (void *)"hi"; + data.dsize = strlen("world"); + data.dptr = (void *)"world"; + + pipe(pipefds); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + child = fork(); + if (child == 0) { + return do_child(tdb_flags, pipefds[1]); + } + + read(pipefds[0], &c, sizeof(c)); + + tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, + O_RDWR|O_CREAT, 0755, &log_ctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + ret = tdb_store(tdb, key, data, 0); + ok(ret == 0, "tdb_store should succeed\n"); + + ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); + ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); + + read(pipefds[0], &c, sizeof(c)); + + ret = tdb_delete(tdb, key); + ok(ret == 0, "tdb_delete should succeed\n"); + + wait_ret = wait(&status); + ok(wait_ret == child, "child should have exited correctly\n"); + + return exit_status(); +} diff --git a/lib/tdb/wscript b/lib/tdb/wscript index 6243ccf..c4f8b6a 100644 --- a/lib/tdb/wscript +++ b/lib/tdb/wscript @@ -39,7 +39,14 @@ tdb1_unit_tests = [ 'run-transaction-expand', 'run-traverse-in-transaction', 'run-wronghash-fail', - 'run-zero-append' + 'run-zero-append', + 'run-mutex-openflags2', + 'run-mutex-trylock', + 'run-mutex-allrecord-bench', + 'run-mutex-allrecord-trylock', + 'run-mutex-allrecord-block', + 'run-mutex-die', + 'run-mutex1', ] def set_options(opt): -- 1.7.9.5 From 772de8bf02cafd952a2d07bec7d0d65e9890b85c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 14:47:50 +0100 Subject: [PATCH 36/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex-die.c --- lib/tdb/test/run-mutex-die.c | 73 ++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/lib/tdb/test/run-mutex-die.c b/lib/tdb/test/run-mutex-die.c index 6e1c40d..3417bb4 100644 --- a/lib/tdb/test/run-mutex-die.c +++ b/lib/tdb/test/run-mutex-die.c @@ -88,8 +88,7 @@ static int ftruncate_check(int fd, off_t length) return ret; } -static enum agent_return flakey_ops(struct agent *a, enum operation op, - TDB_DATA key) +static enum agent_return flakey_ops(struct agent *a) { enum agent_return ret; @@ -97,7 +96,7 @@ static enum agent_return flakey_ops(struct agent *a, enum operation op, * Run in the external agent child */ - ret = external_agent_operation(a, OPEN, TEST_DBNAME); + ret = external_agent_operation(a, OPEN_WITH_CLEAR_IF_FIRST, TEST_DBNAME); if (ret != SUCCESS) { fprintf(stderr, "Agent failed to open: %s\n", agent_return_name(ret)); @@ -115,24 +114,12 @@ static enum agent_return flakey_ops(struct agent *a, enum operation op, agent_return_name(ret)); return ret; } - ret = external_agent_operation(a, TRANSACTION_START, ""); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed transaction_start: %s\n", - agent_return_name(ret)); - return ret; - } - ret = external_agent_operation(a, STORE, (char *)key.dptr); + ret = external_agent_operation(a, STORE, KEY_STRING); if (ret != SUCCESS) { fprintf(stderr, "Agent failed store: %s\n", agent_return_name(ret)); return ret; } - ret = external_agent_operation(a, TRANSACTION_COMMIT, ""); - if (ret != SUCCESS) { - fprintf(stderr, "Agent failed commit: %s\n", - agent_return_name(ret)); - return ret; - } ret = external_agent_operation(a, FETCH, KEY_STRING); if (ret != SUCCESS) { fprintf(stderr, "Agent failed find key: %s\n", @@ -148,11 +135,14 @@ static enum agent_return flakey_ops(struct agent *a, enum operation op, return ret; } -static void prep_db(TDB_DATA key) { +static bool prep_db(void) { struct tdb_context *tdb; + TDB_DATA key; TDB_DATA data; - data.dptr = (uint8_t *)"foo"; + key.dptr = discard_const_p(uint8_t, KEY_STRING); + key.dsize = strlen((char *)key.dptr); + data.dptr = discard_const_p(uint8_t, "foo"); data.dsize = strlen((char *)data.dptr); unlink(TEST_DBNAME); @@ -161,15 +151,20 @@ static void prep_db(TDB_DATA key) { TEST_DBNAME, 2, TDB_INCOMPATIBLE_HASH|TDB_MUTEX_LOCKING|TDB_CLEAR_IF_FIRST, O_CREAT|O_TRUNC|O_RDWR, 0600, &taplogctx, NULL); + if (tdb == NULL) { + return false; + } if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { - return; + return false; } tdb_close(tdb); tdb = NULL; forget_locking(); + + return true; } static bool test_db(void) { @@ -202,7 +197,7 @@ fail: return false; } -static bool test_one(enum operation op, TDB_DATA key) +static bool test_one(void) { enum agent_return ret; @@ -214,26 +209,34 @@ static bool test_one(enum operation op, TDB_DATA key) { int child_target = target; + bool pret; target = 0; - prep_db(key); + pret = prep_db(); + ok1(pret); target = child_target; } agent = prepare_external_agent(); - ret = flakey_ops(agent, STORE, key); + ret = flakey_ops(agent); - printf("Agent (target=%d) returns %s\n", target, - agent_return_name(ret)); + diag("Agent (target=%d) returns %s", + target, agent_return_name(ret)); + + if (ret == SUCCESS) { + ok((target > 19), "At least one AGENT_DIED expected"); + } else { + ok(ret == AGENT_DIED, "AGENT_DIED expected"); + } shutdown_agent(agent); { int child_target = target; + bool tret; target = 0; - if (!test_db()) { - return false; - } + tret = test_db(); + ok1(tret); target = child_target; } @@ -245,22 +248,14 @@ static bool test_one(enum operation op, TDB_DATA key) int main(int argc, char *argv[]) { - enum operation ops[] = { FETCH }; - TDB_DATA key; - int i; + bool ret; plan_tests(12); unlock_callback = maybe_die; - key.dsize = strlen(KEY_STRING); - key.dptr = (void *)KEY_STRING; - - for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { - enum agent_return ret; - diag("Testing %s after death\n", operation_name(ops[i])); - fflush(stdout); - ret = test_one(ops[i], key); - } + ret = test_one(); + ok1(ret); + diag("done"); return exit_status(); } -- 1.7.9.5 From f6222acc0582f189a0329b0213c9cea6442db941 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 15:03:52 +0100 Subject: [PATCH 37/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex-allrecord-block.c --- lib/tdb/test/run-mutex-allrecord-block.c | 33 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/lib/tdb/test/run-mutex-allrecord-block.c b/lib/tdb/test/run-mutex-allrecord-block.c index 52ead07b..80983d1 100644 --- a/lib/tdb/test/run-mutex-allrecord-block.c +++ b/lib/tdb/test/run-mutex-allrecord-block.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static TDB_DATA key, data; @@ -28,7 +27,7 @@ static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, va_end(ap); } -static int do_child(int tdb_flags, int fd) +static int do_child(int tdb_flags, int to, int from) { struct tdb_context *tdb; unsigned int log_count; @@ -43,9 +42,9 @@ static int do_child(int tdb_flags, int fd) ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - write(fd, &c, sizeof(c)); + write(to, &c, sizeof(c)); - system("/bin/sleep 99999"); + read(from, &c, sizeof(c)); ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); @@ -61,16 +60,18 @@ int main(int argc, char *argv[]) struct tdb_logging_context log_ctx = { log_fn, &log_count }; int ret, status; pid_t child, wait_ret; - int pipefds[2]; + int fromchild[2]; + int tochild[2]; char c; int tdb_flags; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); - pipe(pipefds); + pipe(fromchild); + pipe(tochild); tdb_flags = TDB_INCOMPATIBLE_HASH| TDB_MUTEX_LOCKING| @@ -78,18 +79,27 @@ int main(int argc, char *argv[]) child = fork(); if (child == 0) { - return do_child(tdb_flags, pipefds[1]); + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); } + close(fromchild[1]); + close(tochild[0]); - read(pipefds[0], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); tdb = tdb_open_ex("mutex-allrecord-block.tdb", 0, tdb_flags, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); ok(tdb, "tdb_open_ex should succeed\n"); + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); + + write(tochild[1], &c, sizeof(c)); + ret = tdb_chainlock(tdb, key); - ok(ret == 0, "tdb_chainlock (nowait) should succeed\n"); + ok(ret == 0, "tdb_chainlock should not succeed\n"); ret = tdb_chainunlock(tdb, key); ok(ret == 0, "tdb_chainunlock should succeed\n"); @@ -97,5 +107,6 @@ int main(int argc, char *argv[]) wait_ret = wait(&status); ok(wait_ret == child, "child should have exited correctly\n"); + diag("done"); return exit_status(); } -- 1.7.9.5 From 3fc0f52946b35d8da3355dff8b1e1eaa778612f5 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 15:09:44 +0100 Subject: [PATCH 38/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex-allrecord-trylock.c --- lib/tdb/test/run-mutex-allrecord-trylock.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lib/tdb/test/run-mutex-allrecord-trylock.c b/lib/tdb/test/run-mutex-allrecord-trylock.c index 1ae1512..91307ae 100644 --- a/lib/tdb/test/run-mutex-allrecord-trylock.c +++ b/lib/tdb/test/run-mutex-allrecord-trylock.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static TDB_DATA key, data; @@ -28,7 +27,7 @@ static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, va_end(ap); } -static int do_child(int tdb_flags, int fd) +static int do_child(int tdb_flags, int to, int from) { struct tdb_context *tdb; unsigned int log_count; @@ -43,9 +42,9 @@ static int do_child(int tdb_flags, int fd) ret = tdb_chainlock(tdb, key); ok(ret == 0, "tdb_chainlock should succeed\n"); - write(fd, &c, sizeof(c)); + write(to, &c, sizeof(c)); - poll(NULL, 0, 1000); + read(from, &c, sizeof(c)); ret = tdb_chainunlock(tdb, key); ok(ret == 0, "tdb_chainunlock should succeed\n"); @@ -61,16 +60,18 @@ int main(int argc, char *argv[]) struct tdb_logging_context log_ctx = { log_fn, &log_count }; int ret, status; pid_t child, wait_ret; - int pipefds[2]; + int fromchild[2]; + int tochild[2]; char c; int tdb_flags; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); - pipe(pipefds); + pipe(fromchild); + pipe(tochild); tdb_flags = TDB_INCOMPATIBLE_HASH| TDB_MUTEX_LOCKING| @@ -78,10 +79,14 @@ int main(int argc, char *argv[]) child = fork(); if (child == 0) { - return do_child(tdb_flags, pipefds[1]); + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); } + close(fromchild[1]); + close(tochild[0]); - read(pipefds[0], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); tdb = tdb_open_ex("mutex-allrecord-trylock.tdb", 0, tdb_flags, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); @@ -90,8 +95,11 @@ int main(int argc, char *argv[]) ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); ok(ret == -1, "tdb_allrecord_lock (nowait) not should succeed\n"); + write(tochild[1], &c, sizeof(c)); + wait_ret = wait(&status); ok(wait_ret == child, "child should have exited correctly\n"); + diag("done"); return exit_status(); } -- 1.7.9.5 From 19aeaa7ff1d34eb192bb2b4950db8bec8e74942e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 16:06:10 +0100 Subject: [PATCH 39/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex-openflags2.c --- lib/tdb/test/run-mutex-openflags2.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/tdb/test/run-mutex-openflags2.c b/lib/tdb/test/run-mutex-openflags2.c index a049dbc..4ef4c51 100644 --- a/lib/tdb/test/run-mutex-openflags2.c +++ b/lib/tdb/test/run-mutex-openflags2.c @@ -38,11 +38,9 @@ static int do_child(int fd) read(fd, &c, 1); tdb = tdb_open_ex("mutex-openflags2.tdb", 0, - TDB_INCOMPATIBLE_HASH| - TDB_CLEAR_IF_FIRST, + TDB_DEFAULT, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); - ok((tdb == NULL) && (errno == EINVAL), - "tdb_open_ex without mutexes should fail with EINVAL\n"); + ok((tdb != NULL), "Open with TDB_DEFAULT should work"); return 0; } @@ -59,18 +57,19 @@ int main(int argc, char *argv[]) char c = 0; ret = pipe(pipefd); + ok1(ret == 0); key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); tdb = tdb_open_ex("mutex-openflags2.tdb", 0, TDB_INCOMPATIBLE_HASH| TDB_MUTEX_LOCKING, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); ok((tdb == NULL) && (errno == EINVAL), "TDB_MUTEX_LOCKING without " - "TDB_CLEAR_IF_FIRST should fail with EINVAL\n"); + "TDB_CLEAR_IF_FIRST should fail with EINVAL - %d\n", errno); child = fork(); if (child == 0) { @@ -78,7 +77,6 @@ int main(int argc, char *argv[]) } tdb = tdb_open_ex("mutex-openflags2.tdb", 0, - TDB_INCOMPATIBLE_HASH| TDB_CLEAR_IF_FIRST| TDB_MUTEX_LOCKING, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); @@ -90,5 +88,6 @@ int main(int argc, char *argv[]) ok((wait_ret == child) && (status == 0), "child should have exited correctly\n"); + diag("done"); return exit_status(); } -- 1.7.9.5 From 32d0ee47c38f783685032d70efaf54760f983ca3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 17:03:59 +0100 Subject: [PATCH 40/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex-trylock.c --- lib/tdb/test/run-mutex-trylock.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/tdb/test/run-mutex-trylock.c b/lib/tdb/test/run-mutex-trylock.c index a3ec440..5423687 100644 --- a/lib/tdb/test/run-mutex-trylock.c +++ b/lib/tdb/test/run-mutex-trylock.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static TDB_DATA key, data; @@ -28,7 +27,7 @@ static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, va_end(ap); } -static int do_child(int tdb_flags, int fd) +static int do_child(int tdb_flags, int to, int from) { struct tdb_context *tdb; unsigned int log_count; @@ -43,14 +42,14 @@ static int do_child(int tdb_flags, int fd) ret = tdb_chainlock(tdb, key); ok(ret == 0, "tdb_chainlock should succeed\n"); - write(fd, &c, sizeof(c)); + write(to, &c, sizeof(c)); - poll(NULL, 0, 1000); + read(from, &c, sizeof(c)); ret = tdb_chainunlock(tdb, key); ok(ret == 0, "tdb_chainunlock should succeed\n"); - write(fd, &c, sizeof(c)); + write(to, &c, sizeof(c)); return 0; } @@ -63,16 +62,18 @@ int main(int argc, char *argv[]) struct tdb_logging_context log_ctx = { log_fn, &log_count }; int ret, status; pid_t child, wait_ret; - int pipefds[2]; + int fromchild[2]; + int tochild[2]; char c; int tdb_flags; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); - pipe(pipefds); + pipe(fromchild); + pipe(tochild); tdb_flags = TDB_INCOMPATIBLE_HASH| TDB_MUTEX_LOCKING| @@ -80,10 +81,14 @@ int main(int argc, char *argv[]) child = fork(); if (child == 0) { - return do_child(tdb_flags, pipefds[1]); + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); } + close(fromchild[1]); + close(tochild[0]); - read(pipefds[0], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); tdb = tdb_open_ex("mutex-trylock.tdb", 0, tdb_flags, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); @@ -92,7 +97,9 @@ int main(int argc, char *argv[]) ret = tdb_chainlock_nonblock(tdb, key); ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); - read(pipefds[0], &c, sizeof(c)); + write(tochild[1], &c, sizeof(c)); + + read(fromchild[0], &c, sizeof(c)); ret = tdb_chainlock_nonblock(tdb, key); ok(ret == 0, "tdb_chainlock_nonblock should succeed\n"); @@ -102,5 +109,6 @@ int main(int argc, char *argv[]) wait_ret = wait(&status); ok(wait_ret == child, "child should have exited correctly\n"); + diag("done"); return exit_status(); } -- 1.7.9.5 From ce0a098ede54229216eda2d356dd4dd615e4eecc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 17:11:36 +0100 Subject: [PATCH 41/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex1.c --- lib/tdb/test/run-mutex1.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/lib/tdb/test/run-mutex1.c b/lib/tdb/test/run-mutex1.c index 3753fef..8516e0e 100644 --- a/lib/tdb/test/run-mutex1.c +++ b/lib/tdb/test/run-mutex1.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static TDB_DATA key, data; @@ -28,7 +27,7 @@ static void log_fn(struct tdb_context *tdb, enum tdb_debug_level level, va_end(ap); } -static int do_child(int tdb_flags, int fd) +static int do_child(int tdb_flags, int to, int from) { struct tdb_context *tdb; unsigned int log_count; @@ -43,21 +42,20 @@ static int do_child(int tdb_flags, int fd) ret = tdb_chainlock(tdb, key); ok(ret == 0, "tdb_chainlock should succeed\n"); - write(fd, &c, sizeof(c)); - - poll(NULL, 0, 1000); + write(to, &c, sizeof(c)); + read(from, &c, sizeof(c)); ret = tdb_chainunlock(tdb, key); ok(ret == 0, "tdb_chainunlock should succeed\n"); - poll(NULL, 0, 1000); + write(to, &c, sizeof(c)); + read(from, &c, sizeof(c)); ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); ok(ret == 0, "tdb_allrecord_lock should succeed\n"); - write(fd, &c, sizeof(c)); - - poll(NULL, 0, 1000); + write(to, &c, sizeof(c)); + read(from, &c, sizeof(c)); ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); ok(ret == 0, "tdb_allrecord_lock should succeed\n"); @@ -73,16 +71,18 @@ int main(int argc, char *argv[]) struct tdb_logging_context log_ctx = { log_fn, &log_count }; int ret, status; pid_t child, wait_ret; - int pipefds[2]; + int fromchild[2]; + int tochild[2]; char c; int tdb_flags; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); - pipe(pipefds); + pipe(fromchild); + pipe(tochild); tdb_flags = TDB_INCOMPATIBLE_HASH| TDB_MUTEX_LOCKING| @@ -90,15 +90,22 @@ int main(int argc, char *argv[]) child = fork(); if (child == 0) { - return do_child(tdb_flags, pipefds[1]); + close(fromchild[0]); + close(tochild[1]); + return do_child(tdb_flags, fromchild[1], tochild[0]); } + close(fromchild[1]); + close(tochild[0]); - read(pipefds[0], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); tdb = tdb_open_ex("mutex1.tdb", 0, tdb_flags, O_RDWR|O_CREAT, 0755, &log_ctx, NULL); ok(tdb, "tdb_open_ex should succeed\n"); + write(tochild[1], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); ok(ret == 0, "tdb_allrecord_lock should succeed\n"); @@ -108,7 +115,9 @@ int main(int argc, char *argv[]) ret = tdb_allrecord_unlock(tdb, F_WRLCK, false); ok(ret == 0, "tdb_allrecord_unlock should succeed\n"); - read(pipefds[0], &c, sizeof(c)); + write(tochild[1], &c, sizeof(c)); + read(fromchild[0], &c, sizeof(c)); + write(tochild[1], &c, sizeof(c)); ret = tdb_delete(tdb, key); ok(ret == 0, "tdb_delete should succeed\n"); @@ -116,5 +125,6 @@ int main(int argc, char *argv[]) wait_ret = wait(&status); ok(wait_ret == child, "child should have exited correctly\n"); + diag("done"); return exit_status(); } -- 1.7.9.5 From 824b2ca20870edebd908eca06657169513838c7f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Jan 2014 17:13:32 +0100 Subject: [PATCH 42/49] SQ tdb/test: add mutex related tests sq lib/tdb/test/run-mutex-allrecord-bench.c --- lib/tdb/test/run-mutex-allrecord-bench.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/tdb/test/run-mutex-allrecord-bench.c b/lib/tdb/test/run-mutex-allrecord-bench.c index 5b816b3..36606d6 100644 --- a/lib/tdb/test/run-mutex-allrecord-bench.c +++ b/lib/tdb/test/run-mutex-allrecord-bench.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static TDB_DATA key, data; @@ -52,9 +51,9 @@ int main(int argc, char *argv[]) double elapsed; key.dsize = strlen("hi"); - key.dptr = (void *)"hi"; + key.dptr = discard_const_p(uint8_t, "hi"); data.dsize = strlen("world"); - data.dptr = (void *)"world"; + data.dptr = discard_const_p(uint8_t, "world"); tdb = tdb_open_ex("mutex-allrecord-bench.tdb", 1000000, TDB_INCOMPATIBLE_HASH| -- 1.7.9.5 From 4d25b130375be265eca9d4e3c495299f0fc883d2 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 22 Jan 2014 11:15:55 +0100 Subject: [PATCH 43/49] SQ tdb: add marklock deadlock test Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/tdb/test/run-mutex-marklock-deadlock.c | 177 ++++++++++++++++++++++++++++ lib/tdb/wscript | 1 + 2 files changed, 178 insertions(+) create mode 100644 lib/tdb/test/run-mutex-marklock-deadlock.c diff --git a/lib/tdb/test/run-mutex-marklock-deadlock.c b/lib/tdb/test/run-mutex-marklock-deadlock.c new file mode 100644 index 0000000..1a88d49 --- /dev/null +++ b/lib/tdb/test/run-mutex-marklock-deadlock.c @@ -0,0 +1,177 @@ +#include "../common/tdb_private.h" +#include "../common/io.c" +#include "../common/tdb.c" +#include "../common/lock.c" +#include "../common/freelist.c" +#include "../common/traverse.c" +#include "../common/transaction.c" +#include "../common/error.c" +#include "../common/open.c" +#include "../common/check.c" +#include "../common/hash.c" +#include "../common/mutex.c" +#include "tap-interface.h" +#include +#include +#include +#include +#include "logging.h" + +static TDB_DATA key, data; + +static void do_chainlock(int tdb_flags, int up, int down) +{ + struct tdb_context *tdb; + int ret; + ssize_t nread, nwritten; + char c = 0; + + tdb = tdb_open_ex("mutex-marklock-deadlock.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &taplogctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_chainlock(tdb, key); + ok(ret == 0, "tdb_chainlock should succeed\n"); + + nwritten = write(up, &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed\n"); + + nread = read(down, &c, sizeof(c)); + ok(nread == 0, "read should succeed\n"); + + exit(0); +} + +static void do_allrecord_lock(int tdb_flags, int up, int down) +{ + struct tdb_context *tdb; + int ret; + ssize_t nread, nwritten; + char c = 0; + + tdb = tdb_open_ex("mutex-marklock-deadlock.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &taplogctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + ok(ret == 0, "tdb_allrecord_lock should succeed\n"); + + nwritten = write(up, &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed\n"); + + nread = read(down, &c, sizeof(c)); + ok(nread == 0, "read should succeed\n"); + + exit(0); +} + +/* The code should barf on TDBs created with rwlocks. */ +int main(int argc, char *argv[]) +{ + struct tdb_context *tdb; + int ret; + pid_t chainlock_child, allrecord_child; + int chainlock_down[2]; + int chainlock_up[2]; + int allrecord_down[2]; + int allrecord_up[2]; + char c; + int tdb_flags; + ssize_t nread; + + key.dsize = strlen("hi"); + key.dptr = discard_const_p(uint8_t, "hi"); + data.dsize = strlen("world"); + data.dptr = discard_const_p(uint8_t, "world"); + + ret = pipe(chainlock_down); + ok(ret == 0, "pipe should succeed\n"); + + ret = pipe(chainlock_up); + ok(ret == 0, "pipe should succeed\n"); + + ret = pipe(allrecord_down); + ok(ret == 0, "pipe should succeed\n"); + + ret = pipe(allrecord_up); + ok(ret == 0, "pipe should succeed\n"); + + tdb_flags = TDB_INCOMPATIBLE_HASH| + TDB_MUTEX_LOCKING| + TDB_CLEAR_IF_FIRST; + + chainlock_child = fork(); + ok(chainlock_child != -1, "fork should succeed\n"); + + if (chainlock_child == 0) { + close(chainlock_up[0]); + close(chainlock_down[1]); + close(allrecord_up[0]); + close(allrecord_up[1]); + close(allrecord_down[0]); + close(allrecord_down[1]); + do_chainlock(tdb_flags, chainlock_up[1], chainlock_down[0]); + exit(0); + } + + nread = read(chainlock_up[0], &c, sizeof(c)); + ok(nread == sizeof(c), "read should succeed\n"); + + /* + * Now we have a process holding a chainlock. Start another process + * trying the allrecord lock. This will block. + */ + + allrecord_child = fork(); + ok(allrecord_child != -1, "fork should succeed\n"); + + if (allrecord_child == 0) { + close(chainlock_up[0]); + close(chainlock_up[1]); + close(chainlock_down[0]); + close(chainlock_down[1]); + close(allrecord_up[0]); + close(allrecord_down[1]); + do_allrecord_lock(tdb_flags, + allrecord_up[1], allrecord_down[0]); + exit(0); + } + + poll(NULL, 0, 500); + + tdb = tdb_open_ex("mutex-marklock-deadlock.tdb", 3, tdb_flags, + O_RDWR|O_CREAT, 0755, &taplogctx, NULL); + ok(tdb, "tdb_open_ex should succeed\n"); + + /* + * We have someone else having done the lock for us. Just mark it. + */ + + ret = tdb_chainlock_mark(tdb, key); + ok(ret == 0, "tdb_chainlock_mark should succeed\n"); + + /* + * The tdb_store below will block the freelist. In one version of the + * mutex patches, the freelist was already blocked here by the + * allrecord child, which was waiting for the chainlock child to give + * up its chainlock. Make sure that we don't run into this + * deadlock. To excercise the deadlock, just comment out the "ok" + * line. + */ + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed\n"); + + if (ret == 0) { + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed\n"); + + ret = tdb_store(tdb, key, data, TDB_INSERT); + ok(ret == 0, "tdb_store should succeed\n"); + } + + ret = tdb_chainlock_unmark(tdb, key); + ok(ret == 0, "tdb_chainlock_unmark should succeed\n"); + + return exit_status(); +} diff --git a/lib/tdb/wscript b/lib/tdb/wscript index c4f8b6a..c82626c 100644 --- a/lib/tdb/wscript +++ b/lib/tdb/wscript @@ -47,6 +47,7 @@ tdb1_unit_tests = [ 'run-mutex-allrecord-block', 'run-mutex-die', 'run-mutex1', + 'run-mutex-marklock-deadlock', ] def set_options(opt): -- 1.7.9.5 From 9afc5baf9621bf46770a580d3182e913c30bb146 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 11 Apr 2014 21:07:40 +0200 Subject: [PATCH 44/49] SQ lib/tdb/test/run-mutex-marklock-deadlock.c --- lib/tdb/test/run-mutex-marklock-deadlock.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/tdb/test/run-mutex-marklock-deadlock.c b/lib/tdb/test/run-mutex-marklock-deadlock.c index 1a88d49..b863121 100644 --- a/lib/tdb/test/run-mutex-marklock-deadlock.c +++ b/lib/tdb/test/run-mutex-marklock-deadlock.c @@ -157,18 +157,18 @@ int main(int argc, char *argv[]) * up its chainlock. Make sure that we don't run into this * deadlock. To excercise the deadlock, just comment out the "ok" * line. + * + * The freelist lock/mutex is independent from the allrecord lock/mutex. */ ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); ok(ret == 0, "tdb_lock_nonblock should succeed\n"); - if (ret == 0) { - ret = tdb_unlock(tdb, -1, F_WRLCK); - ok(ret == 0, "tdb_unlock should succeed\n"); + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed\n"); - ret = tdb_store(tdb, key, data, TDB_INSERT); - ok(ret == 0, "tdb_store should succeed\n"); - } + ret = tdb_store(tdb, key, data, TDB_INSERT); + ok(ret == 0, "tdb_store should succeed\n"); ret = tdb_chainlock_unmark(tdb, key); ok(ret == 0, "tdb_chainlock_unmark should succeed\n"); -- 1.7.9.5 From 4b0e61ba6f64a5dfd0040316e11134a6a7128394 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 11 Apr 2014 22:19:56 +0200 Subject: [PATCH 45/49] SQ lib/tdb/test/run-mutex-marklock-deadlock.c TODO also without mutex --- lib/tdb/test/run-mutex-marklock-deadlock.c | 69 ++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/lib/tdb/test/run-mutex-marklock-deadlock.c b/lib/tdb/test/run-mutex-marklock-deadlock.c index b863121..8d2a998 100644 --- a/lib/tdb/test/run-mutex-marklock-deadlock.c +++ b/lib/tdb/test/run-mutex-marklock-deadlock.c @@ -37,7 +37,7 @@ static void do_chainlock(int tdb_flags, int up, int down) ok(nwritten == sizeof(c), "write should succeed\n"); nread = read(down, &c, sizeof(c)); - ok(nread == 0, "read should succeed\n"); + ok(nread == sizeof(c), "read should succeed\n"); exit(0); } @@ -60,7 +60,7 @@ static void do_allrecord_lock(int tdb_flags, int up, int down) ok(nwritten == sizeof(c), "write should succeed\n"); nread = read(down, &c, sizeof(c)); - ok(nread == 0, "read should succeed\n"); + ok(nread == sizeof(c), "read should succeed\n"); exit(0); } @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) int allrecord_up[2]; char c; int tdb_flags; - ssize_t nread; + ssize_t nread, nwritten; key.dsize = strlen("hi"); key.dptr = discard_const_p(uint8_t, "hi"); @@ -144,6 +144,22 @@ int main(int argc, char *argv[]) ok(tdb, "tdb_open_ex should succeed\n"); /* + * Someone already a chainlock, but we're able to get the + * freelist lock. + * + * The freelist lock/mutex is independent from the allrecord lock/mutex. + */ + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed\n"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed\n"); + + /* * We have someone else having done the lock for us. Just mark it. */ @@ -173,5 +189,52 @@ int main(int argc, char *argv[]) ret = tdb_chainlock_unmark(tdb, key); ok(ret == 0, "tdb_chainlock_unmark should succeed\n"); + nwritten = write(chainlock_down[1], &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed\n"); + + nread = read(allrecord_up[0], &c, sizeof(c)); + ok(nread == sizeof(c), "read should succeed\n"); + + /* + * Someone already has the allrecord lock, but we're able to get the + * freelist lock. + * + * The freelist lock/mutex is independent from the allrecord lock/mutex. + */ + + ret = tdb_chainlock_nonblock(tdb, key); + ok(ret == -1, "tdb_chainlock_nonblock should not succeed\n"); + + ret = tdb_lockall_nonblock(tdb); + ok(ret == -1, "tdb_lockall_nonblock should not succeed\n"); + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed\n"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed\n"); + + /* + * We have someone else having done the lock for us. Just mark it. + */ + + ret = tdb_lockall_mark(tdb); + ok(ret == 0, "tdb_lockall_mark should succeed\n"); + + ret = tdb_lock_nonblock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_lock_nonblock should succeed\n"); + + ret = tdb_unlock(tdb, -1, F_WRLCK); + ok(ret == 0, "tdb_unlock should succeed\n"); + + ret = tdb_store(tdb, key, data, TDB_REPLACE); + ok(ret == 0, "tdb_store should succeed\n"); + + ret = tdb_lockall_unmark(tdb); + ok(ret == 0, "tdb_lockall_unmark should succeed\n"); + + nwritten = write(allrecord_down[1], &c, sizeof(c)); + ok(nwritten == sizeof(c), "write should succeed\n"); + return exit_status(); } -- 1.7.9.5 From c2ba4dc1850195fef89c265035b548b536c4ea5b Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 14 Nov 2013 13:04:00 +0100 Subject: [PATCH 46/49] tdb: Add mutex info to 'tdbtool info' Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/tdb/common/summary.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c index a22c17d..84c159f 100644 --- a/lib/tdb/common/summary.c +++ b/lib/tdb/common/summary.c @@ -21,6 +21,7 @@ "Size of file/data: %u/%zu\n" \ "Number of records: %zu\n" \ "Incompatible hash: %s\n" \ + "Mutexes: %s\n" \ "Smallest/average/largest keys: %zu/%zu/%zu\n" \ "Smallest/average/largest data: %zu/%zu/%zu\n" \ "Smallest/average/largest padding: %zu/%zu/%zu\n" \ @@ -164,7 +165,7 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) tally_add(&hashval, get_hash_length(tdb, off)); /* 20 is max length of a %zu. */ - len = strlen(SUMMARY_FORMAT) + 35*20 + 1; + len = strlen(SUMMARY_FORMAT) + 35*20 + 3; ret = (char *)malloc(len); if (!ret) goto unlock; @@ -173,6 +174,7 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) tdb->map_size, keys.total+data.total, keys.num, (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", + (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX)?"yes":"no", keys.min, tally_mean(&keys), keys.max, data.min, tally_mean(&data), data.max, extra.min, tally_mean(&extra), extra.max, -- 1.7.9.5 From 8478435b9cc93d14765abb8983349b71c398700e Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 15 Nov 2013 12:57:06 +0100 Subject: [PATCH 47/49] tdb: Allow tdbtool to r/o open mutexed tdbs Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher --- lib/tdb/tools/tdbtool.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/lib/tdb/tools/tdbtool.c b/lib/tdb/tools/tdbtool.c index c486117..9fccb03 100644 --- a/lib/tdb/tools/tdbtool.c +++ b/lib/tdb/tools/tdbtool.c @@ -119,6 +119,31 @@ static double _end_timer(void) } #ifdef PRINTF_ATTRIBUTE +static void tdb_log_open(struct tdb_context *tdb, enum tdb_debug_level level, + const char *format, ...) PRINTF_ATTRIBUTE(3,4); +#endif +static void tdb_log_open(struct tdb_context *tdb, enum tdb_debug_level level, + const char *format, ...) +{ + const char *mutex_msg = "Can use mutexes only with CLEAR_IF_FIRST " + "or NOLOCK\n"; + va_list ap; + + if (strcmp(format, mutex_msg) == 0) { + /* + * Yes, this is a hack, but we don't want to see this + * message on first open, but we want to see + * everything else. + */ + return; + } + + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} + +#ifdef PRINTF_ATTRIBUTE static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4); #endif static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) @@ -240,7 +265,7 @@ static void create_tdb(const char *tdbname) static void open_tdb(const char *tdbname) { struct tdb_logging_context log_ctx = { NULL, NULL }; - log_ctx.log_fn = tdb_log; + log_ctx.log_fn = tdb_log_open; if (tdb) tdb_close(tdb); tdb = tdb_open_ex(tdbname, 0, @@ -248,6 +273,19 @@ static void open_tdb(const char *tdbname) (disable_lock?TDB_NOLOCK:0), O_RDWR, 0600, &log_ctx, NULL); + + if ((tdb == NULL) && (errno == EINVAL)) { + /* + * Retry NOLOCK and readonly. There we want to see all + * error messages. + */ + log_ctx.log_fn = tdb_log; + tdb = tdb_open_ex(tdbname, 0, + (disable_mmap?TDB_NOMMAP:0) |TDB_NOLOCK, + O_RDONLY, 0600, + &log_ctx, NULL); + } + if (!tdb) { printf("Could not open %s: %s\n", tdbname, strerror(errno)); } -- 1.7.9.5 From 0b113227a1b04f997f88c9a0913678721ea36421 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 19 Mar 2013 12:02:22 +0100 Subject: [PATCH 48/49] dbwrap_tdb: Use mutexes on demand Signed-off-by: Stefan Metzmacher --- source3/lib/dbwrap/dbwrap_open.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/source3/lib/dbwrap/dbwrap_open.c b/source3/lib/dbwrap/dbwrap_open.c index 81f20b0..64f484e 100644 --- a/source3/lib/dbwrap/dbwrap_open.c +++ b/source3/lib/dbwrap/dbwrap_open.c @@ -93,6 +93,25 @@ struct db_context *db_open(TALLOC_CTX *mem_ctx, } } + if (tdb_flags & TDB_CLEAR_IF_FIRST) { + const char *base; + bool try_mutex = false; + + base = strrchr_m(name, '/'); + if (base != NULL) { + base += 1; + } else { + base = name; + } + + try_mutex = lp_parm_bool(-1, "dbwrap_tdb_mutexes", "*", try_mutex); + try_mutex = lp_parm_bool(-1, "dbwrap_tdb_mutexes", base, try_mutex); + + if (try_mutex && tdb_runtime_check_for_robust_mutexes()) { + tdb_flags |= TDB_MUTEX_LOCKING; + } + } + sockname = lp_ctdbd_socket(); if (lp_clustering()) { -- 1.7.9.5 From 798ff9770442ac987894c4acb5d68e5a791833a0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 May 2013 11:14:26 +0200 Subject: [PATCH 49/49] selftest: use dbwrap_tdb_mutexes:* = yes for "plugin_s4_dc" and "member" Signed-off-by: Stefan Metzmacher --- selftest/target/Samba3.pm | 1 + selftest/target/Samba4.pm | 2 ++ 2 files changed, 3 insertions(+) diff --git a/selftest/target/Samba3.pm b/selftest/target/Samba3.pm index d7b5177..134fd49 100755 --- a/selftest/target/Samba3.pm +++ b/selftest/target/Samba3.pm @@ -247,6 +247,7 @@ sub setup_member($$$) my $member_options = " security = domain server signing = on + dbwrap_tdb_mutexes:* = yes "; my $ret = $self->provision($prefix, "LOCALMEMBER3", diff --git a/selftest/target/Samba4.pm b/selftest/target/Samba4.pm index 7003713..55b7b9b 100644 --- a/selftest/target/Samba4.pm +++ b/selftest/target/Samba4.pm @@ -1424,6 +1424,8 @@ sub provision_plugin_s4_dc($$) server services = -smb +s3fs xattr_tdb:file = $prefix_abs/statedir/xattr.tdb + dbwrap_tdb_mutexes:* = yes + kernel oplocks = no kernel change notify = no -- 1.7.9.5