From fa94e9d91da242c57bc025c9bd3697b3feb96c59 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sun, 2 Jul 2017 07:46:17 +0200 Subject: [PATCH 1/4] tdb: rename struct tdb_traverse_lock hash member to list The variable stores the hashtable bucket, not the hash. No change in behaviour. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12888 Signed-off-by: Ralph Boehme Reviewed-by: Jeremy Allison (cherry picked from commit a5a19fa5ef7b324d4945375122ef664059937862) --- lib/tdb/common/tdb_private.h | 2 +- lib/tdb/common/traverse.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index 7ff29aa019b..e549af207ab 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -178,7 +178,7 @@ struct tdb_lock_type { struct tdb_traverse_lock { struct tdb_traverse_lock *next; uint32_t off; - uint32_t hash; + uint32_t list; int lock_rw; }; diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c index f62306e5560..9b833795959 100644 --- a/lib/tdb/common/traverse.c +++ b/lib/tdb/common/traverse.c @@ -37,8 +37,8 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock int want_next = (tlock->off != 0); /* Lock each chain from the start one. */ - for (; tlock->hash < tdb->hash_size; tlock->hash++) { - if (!tlock->off && tlock->hash != 0) { + for (; tlock->list < tdb->hash_size; tlock->list++) { + if (!tlock->off && tlock->list != 0) { /* this is an optimisation for the common case where the hash chain is empty, which is particularly common for the use of tdb with ldb, where large @@ -67,18 +67,18 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock factor of around 80 in speed on a linux 2.6.x system (testing using ldbtest). */ - tdb->methods->next_hash_chain(tdb, &tlock->hash); - if (tlock->hash == tdb->hash_size) { + tdb->methods->next_hash_chain(tdb, &tlock->list); + if (tlock->list == tdb->hash_size) { continue; } } - if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1) + if (tdb_lock(tdb, tlock->list, tlock->lock_rw) == -1) return TDB_NEXT_LOCK_ERR; /* No previous record? Start at top of chain. */ if (!tlock->off) { - if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->hash), + if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->list), &tlock->off) == -1) goto fail; } else { @@ -121,7 +121,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock tdb_do_delete(tdb, current, rec) != 0) goto fail; } - tdb_unlock(tdb, tlock->hash, tlock->lock_rw); + tdb_unlock(tdb, tlock->list, tlock->lock_rw); want_next = 0; } /* We finished iteration without finding anything */ @@ -130,7 +130,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock fail: tlock->off = 0; - if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0) + if (tdb_unlock(tdb, tlock->list, tlock->lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n")); return TDB_NEXT_LOCK_ERR; } @@ -181,7 +181,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb, if (key.dptr == NULL) { ret = -1; - if (tdb_unlock(tdb, tl->hash, tl->lock_rw) + if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0) { goto out; } @@ -205,7 +205,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb, key.dptr, full_len, 0); if (nread == -1) { ret = -1; - if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) + if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0) goto out; if (tdb_unlock_record(tdb, tl->off) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n")); @@ -218,7 +218,7 @@ static int tdb_traverse_internal(struct tdb_context *tdb, tdb_trace_1rec_retrec(tdb, "traverse", key, dbuf); /* Drop chain lock, call out */ - if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) { + if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0) { ret = -1; goto out; } @@ -314,7 +314,7 @@ _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb) /* release any old lock */ if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) return tdb_null; - tdb->travlocks.off = tdb->travlocks.hash = 0; + tdb->travlocks.off = tdb->travlocks.list = 0; tdb->travlocks.lock_rw = F_RDLCK; /* Grab first record: locks chain and returned record. */ @@ -330,7 +330,7 @@ _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb) tdb_trace_retrec(tdb, "tdb_firstkey", key); /* Unlock the hash chain of the record we just read. */ - if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) + if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n")); return key; } @@ -338,7 +338,7 @@ _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb) /* find the next entry in the database, returning its key */ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) { - uint32_t oldhash; + uint32_t oldlist; TDB_DATA key = tdb_null; struct tdb_record rec; unsigned char *k = NULL; @@ -346,7 +346,7 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) /* Is locked key the old key? If so, traverse will be reliable. */ if (tdb->travlocks.off) { - if (tdb_lock(tdb,tdb->travlocks.hash,tdb->travlocks.lock_rw)) + if (tdb_lock(tdb,tdb->travlocks.list,tdb->travlocks.lock_rw)) return tdb_null; if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1 || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec), @@ -359,7 +359,7 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) SAFE_FREE(k); return tdb_null; } - if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) { + if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0) { SAFE_FREE(k); return tdb_null; } @@ -376,13 +376,13 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, tdb_null); return tdb_null; } - tdb->travlocks.hash = BUCKET(rec.full_hash); + tdb->travlocks.list = BUCKET(rec.full_hash); if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno))); return tdb_null; } } - oldhash = tdb->travlocks.hash; + oldlist = tdb->travlocks.list; /* Grab next record: locks chain and returned record, unlocks old record */ @@ -392,11 +392,11 @@ _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec), key.dsize); /* Unlock the chain of this new record */ - if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) + if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); } /* Unlock the chain of old record */ - if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0) + if (tdb_unlock(tdb, oldlist, tdb->travlocks.lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, key); return key; -- 2.14.1.342.g6490525c54-goog From ca3032c1d8ffe99eb763f1bcdd134dbf218f9a7b Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sat, 1 Jul 2017 22:21:26 +0200 Subject: [PATCH 2/4] tbd: BUCKET(-1) returns wrong offset because tdb->hash_size is an unsigned int The following C program demonstrates the issue: #include #include #include #include #include #include #include #include #include int main(int argc, char **argv) { int hash = -1; int tsize_signed = 10; unsigned int tsize_unsigned = 10; int bucket; #define BUCKET(hash, tsize) ((hash) % (tsize)) bucket = BUCKET(hash, tsize_unsigned); printf("hash [%d] tsize [%d] bucket [%d]\n", hash, tsize_unsigned, bucket); bucket = BUCKET(hash, tsize_signed); printf("hash [%d] tsize [%d] bucket [%d]\n", hash, tsize_signed, bucket); return 0; } Output: $ ./tmp hash [-1] tsize [10] bucket [5] hash [-1] tsize [10] bucket [-1] The first version is what the current BUCKET() macro does. As a result we lock the hashtable chain in bucket 5, NOT the freelist. -1 is sign converted to an unsigned int 4294967295 and 4294967295 % 10 = 5. As all callers will lock the same wrong list consistently locking is still consistent. Stumpled across this when looking at the output of `tdbtool DB list` which always printed some other hashchain and not the freelist. The freelist bucket offset computation doesn't use the BUCKET macro in freelist.c (directly or indirectly) when working on the freelist, it just directly uses the FREELIST_TOP define, so this problem only affects tdbtool list. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12888 Signed-off-by: Ralph Boehme Reviewed-by: Jeremy Allison (cherry picked from commit f2b7bc23df842c3f6cd6a477a500590076a3839e) --- lib/tdb/common/tdb_private.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index e549af207ab..eeac10a49ad 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -126,6 +126,22 @@ void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op, #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0) #endif +/* + * Note: the BUCKET macro is broken as it returns an unexpected result when + * called as BUCKET(-1) for the freelist: + * + * -1 is sign converted to an unsigned int 4294967295 and then the modulo + * tdb->hashtable_size is computed. So with a hashtable_size of 10 the result + * is + * + * 4294967295 % hashtable_size = 5. + * + * where it should be -1 (C uses symmetric modulo). + * + * As all callers will lock the same wrong list consistently locking is still + * consistent. We can not change this without an incompatible on-disk format + * change, otherwise different tdb versions would use incompatible locking. + */ #define BUCKET(hash) ((hash) % tdb->hash_size) #define DOCONV() (tdb->flags & TDB_CONVERT) -- 2.14.1.342.g6490525c54-goog From 90a54b9b24941d212e25490123a1429a00692d9b Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sun, 2 Jul 2017 11:41:43 +0200 Subject: [PATCH 3/4] tdb: document hashtable bucket offset calculation madness BUG: https://bugzilla.samba.org/show_bug.cgi?id=12888 Signed-off-by: Ralph Boehme Reviewed-by: Jeremy Allison (cherry picked from commit d2c78695c60a1902fd10197f762b84ee83372b6e) --- lib/tdb/common/lock.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c index e330201961a..9f30c7a4b57 100644 --- a/lib/tdb/common/lock.c +++ b/lib/tdb/common/lock.c @@ -137,7 +137,33 @@ static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) return fcntl(tdb->fd, F_SETLKW, &fl); } -/* list -1 is the alloc list, otherwise a hash chain. */ +/* + * Calculate the lock offset for a list + * + * list -1 is the freelist, otherwise a hash chain. + * + * Note that we consistently (but without real reason) lock hash chains at an + * offset that is 4 bytes below the real offset of the corresponding list head + * in the db. + * + * This is the memory layout of the hashchain array: + * + * FREELIST_TOP + 0 = freelist + * FREELIST_TOP + 4 = hashtbale list 0 + * FREELIST_TOP + 8 = hashtbale list 1 + * ... + * + * Otoh lock_offset computes: + * + * freelist = FREELIST_TOP - 4 + * list 0 = FREELIST_TOP + 0 + * list 1 = FREELIST_TOP + 4 + * ... + * + * Unfortunately we can't change this calculation in order to align the locking + * offset with the memory layout, as that would make the locking incompatible + * between different tdb versions. + */ static tdb_off_t lock_offset(int list) { return FREELIST_TOP + 4*list; -- 2.14.1.342.g6490525c54-goog From f5d672c9a7a6683b20cde54770292056bba2bc36 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Sun, 9 Jul 2017 18:39:27 +0200 Subject: [PATCH 4/4] tdb: fix tbdtool list freelist output Due to the non-fixable bug in the BUCKET macro tdbtool list printed some other hash chainlist, not the freelist. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12888 Signed-off-by: Ralph Boehme Reviewed-by: Jeremy Allison (cherry picked from commit a43affd4c509246f968d7003e1b674f92d75d930) --- lib/tdb/common/dump.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/tdb/common/dump.c b/lib/tdb/common/dump.c index 5f6a78b2c8e..73286b87350 100644 --- a/lib/tdb/common/dump.c +++ b/lib/tdb/common/dump.c @@ -62,7 +62,11 @@ static int tdb_dump_chain(struct tdb_context *tdb, int i) { tdb_off_t rec_ptr, top; - top = TDB_HASH_TOP(i); + if (i == -1) { + top = FREELIST_TOP; + } else { + top = TDB_HASH_TOP(i); + } if (tdb_lock(tdb, i, F_WRLCK) != 0) return -1; -- 2.14.1.342.g6490525c54-goog