From 9e701057174963d8d5f512965362aa44e6831f53 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 15:33:43 +0200 Subject: [PATCH 1/7] s3:selftest: also run durable_v2_reconnect_delay_msec in samba3.blackbox.durable_v2_delay BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit 560fe7b38f0c8d53079fabf3f984b11748270035) --- source3/script/tests/test_durable_handle_reconnect.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source3/script/tests/test_durable_handle_reconnect.sh b/source3/script/tests/test_durable_handle_reconnect.sh index 77d82c1b4034..e9af9ca8a845 100755 --- a/source3/script/tests/test_durable_handle_reconnect.sh +++ b/source3/script/tests/test_durable_handle_reconnect.sh @@ -17,6 +17,12 @@ testit "durable_v2_delay.durable_v2_reconnect_delay" $VALGRIND \ smb2.durable-v2-delay.durable_v2_reconnect_delay || failed=$(expr $failed + 1) +testit "durable_v2_delay.durable_v2_reconnect_delay_msec" $VALGRIND \ + $BINDIR/smbtorture //$SERVER_IP/durable \ + -U$USERNAME%$PASSWORD \ + smb2.durable-v2-delay.durable_v2_reconnect_delay_msec || + failed=$(expr $failed + 1) + rm $delay_inject_conf testok $0 $failed -- 2.17.1 From fb97c283d854ce120c0e8a50635d45479cb07e0b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 14:37:59 +0200 Subject: [PATCH 2/7] s3:share_mode_lock: reproduce problem with stale disconnected share mode entries This reproduces the origin of "PANIC: assert failed in get_lease_type()" (https://bugzilla.samba.org/show_bug.cgi?id=14428). share_mode_cleanup_disconnected() removes disconnected entries from leases.tdb and brlock.tdb but not from locking.tdb. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit 444f2bedf723b89bb9f493c47812bff2154c4113) --- selftest/knownfail.d/bug-14428 | 1 + source3/locking/share_mode_lock.c | 14 ++++++++++++++ .../script/tests/test_durable_handle_reconnect.sh | 14 ++++++++++++++ 3 files changed, 29 insertions(+) create mode 100644 selftest/knownfail.d/bug-14428 diff --git a/selftest/knownfail.d/bug-14428 b/selftest/knownfail.d/bug-14428 new file mode 100644 index 000000000000..e198400ea163 --- /dev/null +++ b/selftest/knownfail.d/bug-14428 @@ -0,0 +1 @@ +^samba3.blackbox.durable_v2_delay.bug.14428 diff --git a/source3/locking/share_mode_lock.c b/source3/locking/share_mode_lock.c index ba0bc2b1e7b6..ca8b2f475290 100644 --- a/source3/locking/share_mode_lock.c +++ b/source3/locking/share_mode_lock.c @@ -1730,6 +1730,20 @@ bool share_mode_cleanup_disconnected(struct file_id fid, data->have_share_modes = false; data->modified = true; + /* + * This is a temporary reproducer for the origin of + * https://bugzilla.samba.org/show_bug.cgi?id=14428 + * "PANIC: assert failed in get_lease_type()" + * + * This will be removed again once the bug is demonstrated + * and fixed. + */ + TALLOC_FREE(state.lck); + state.lck = get_existing_share_mode_lock(frame, fid); + if (state.lck != NULL) { + DBG_ERR("Reproduced BUG#14428\n"); + } + ret = true; done: talloc_free(frame); diff --git a/source3/script/tests/test_durable_handle_reconnect.sh b/source3/script/tests/test_durable_handle_reconnect.sh index e9af9ca8a845..66a35f08f17e 100755 --- a/source3/script/tests/test_durable_handle_reconnect.sh +++ b/source3/script/tests/test_durable_handle_reconnect.sh @@ -17,12 +17,26 @@ testit "durable_v2_delay.durable_v2_reconnect_delay" $VALGRIND \ smb2.durable-v2-delay.durable_v2_reconnect_delay || failed=$(expr $failed + 1) +SMBD_LOG_FILES="$SMBD_TEST_LOG" +if [ $SMBD_DONT_LOG_STDOUT -eq 1 ]; then + _SMBD_LOG_FILE=$(dirname $SMBD_TEST_LOG)/logs/log.smbd + SMBD_LOG_FILES="$SMBD_LOG_FILES $_SMBD_LOG_FILE" +fi + +bug_count_0=$(grep 'Reproduced BUG#14428' $SMBD_LOG_FILES | wc -l) + testit "durable_v2_delay.durable_v2_reconnect_delay_msec" $VALGRIND \ $BINDIR/smbtorture //$SERVER_IP/durable \ -U$USERNAME%$PASSWORD \ smb2.durable-v2-delay.durable_v2_reconnect_delay_msec || failed=$(expr $failed + 1) +bug_count_1=$(grep 'Reproduced BUG#14428' $SMBD_LOG_FILES | wc -l) + +testit "bug.14428 bug_count_0[$bug_count_0] bug_count_1[$bug_count_1]" \ + test $bug_count_0 -eq $bug_count_1 || + failed=$(expr $failed + 1) + rm $delay_inject_conf testok $0 $failed -- 2.17.1 From 065d8f1dcd6e3aad268e8b78e82a5f88d7afbc9d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 15:56:35 +0200 Subject: [PATCH 3/7] s3:share_mode_lock: let share_mode_forall_entries/share_entry_forall evaluate e.stale first It's not really clear why e.stale would be ignored if *modified is set to true. This matches the behavior of share_mode_entry_do() This also makes sure we see the removed entry in level 10 logs again. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit deb2f782c95a5e59a0a5da4272239c2d31bc2b6d) --- source3/locking/share_mode_lock.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/source3/locking/share_mode_lock.c b/source3/locking/share_mode_lock.c index ca8b2f475290..0c5626f4bd1b 100644 --- a/source3/locking/share_mode_lock.c +++ b/source3/locking/share_mode_lock.c @@ -2022,6 +2022,18 @@ static bool share_mode_for_one_entry( (int)modified, (int)e.stale); + if (e.stale) { + if (*i < *num_share_modes) { + memmove(blob.data, + blob.data + SHARE_MODE_ENTRY_SIZE, + (*num_share_modes - *i - 1) * + SHARE_MODE_ENTRY_SIZE); + } + *num_share_modes -= 1; + *writeback = true; + return stop; + } + if (modified) { if (DEBUGLEVEL>=10) { DBG_DEBUG("share_mode_entry:\n"); @@ -2052,18 +2064,6 @@ static bool share_mode_for_one_entry( return stop; } - if (e.stale) { - if (*i < *num_share_modes) { - memmove(blob.data, - blob.data + SHARE_MODE_ENTRY_SIZE, - (*num_share_modes - *i - 1) * - SHARE_MODE_ENTRY_SIZE); - } - *num_share_modes -= 1; - *writeback = true; - return stop; - } - if (stop) { return true; } -- 2.17.1 From 661f27074e96eed07d083b1ee92cdce664cf743f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 15:56:35 +0200 Subject: [PATCH 4/7] s3:share_mode_lock: consistently debug share_mode_entry records share_mode_entry_do(), share_mode_forall_entries() and share_entry_forall() print the record before the callback is called and when it was modified or deleted. This makes it much easier to debug problems. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit 4d740ac2084a68c6d4836cd83ea5d5f1ee9d37a2) --- source3/locking/share_mode_lock.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/source3/locking/share_mode_lock.c b/source3/locking/share_mode_lock.c index 0c5626f4bd1b..a1a7b9793cfa 100644 --- a/source3/locking/share_mode_lock.c +++ b/source3/locking/share_mode_lock.c @@ -2023,6 +2023,11 @@ static bool share_mode_for_one_entry( (int)e.stale); if (e.stale) { + if (DEBUGLEVEL>=10) { + DBG_DEBUG("share_mode_entry:\n"); + NDR_PRINT_DEBUG(share_mode_entry, &e); + } + if (*i < *num_share_modes) { memmove(blob.data, blob.data + SHARE_MODE_ENTRY_SIZE, @@ -2240,8 +2245,18 @@ static bool share_mode_entry_do( goto done; } + if (DEBUGLEVEL>=10) { + DBG_DEBUG("entry[%zu]:\n", idx); + NDR_PRINT_DEBUG(share_mode_entry, &e); + } + fn(&e, ltdb->num_share_entries, &modified, private_data); + DBG_DEBUG("entry[%zu]: modified=%d, e.stale=%d\n", + idx, + (int)modified, + (int)e.stale); + if (!e.stale && !modified) { ret = true; goto done; @@ -2252,9 +2267,8 @@ static bool share_mode_entry_do( if (e.stale) { /* - * Move the reset down one entry + * Move the rest down one entry */ - size_t behind = ltdb->num_share_entries - idx - 1; if (behind != 0) { memmove(e_ptr, @@ -2262,6 +2276,11 @@ static bool share_mode_entry_do( behind * SHARE_MODE_ENTRY_SIZE); } ltdb->num_share_entries -= 1; + + if (DEBUGLEVEL>=10) { + DBG_DEBUG("share_mode_entry:\n"); + NDR_PRINT_DEBUG(share_mode_entry, &e); + } } else { struct share_mode_entry_buf buf; bool ok; -- 2.17.1 From e3b222b32c3ba1a08e59d6a7680ecd82daba39dc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 15:56:35 +0200 Subject: [PATCH 5/7] s3:share_mode_lock: add missing 'goto done' in share_mode_cleanup_disconnected() When cleanup_disconnected_lease() fails we should stop, at least we do that if brl_cleanup_disconnected() fails. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit 1aa1ac97082f81f6dc62f345823d2dd345e0afd7) --- source3/locking/share_mode_lock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source3/locking/share_mode_lock.c b/source3/locking/share_mode_lock.c index a1a7b9793cfa..965c5ab18837 100644 --- a/source3/locking/share_mode_lock.c +++ b/source3/locking/share_mode_lock.c @@ -1694,6 +1694,7 @@ bool share_mode_cleanup_disconnected(struct file_id fid, (data->stream_name == NULL) ? "" : data->stream_name, open_persistent_id); + goto done; } ok = brl_cleanup_disconnected(fid, open_persistent_id); -- 2.17.1 From bcc99047e3977ba9c91089fb5fba27d08b8c05e9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 16:28:41 +0200 Subject: [PATCH 6/7] s3:share_mode_lock: make sure share_mode_cleanup_disconnected() removes the record This fixes one possible trigger for "PANIC: assert failed in get_lease_type()" https://bugzilla.samba.org/show_bug.cgi?id=14428 This is no longer enough to remove the record: d->have_share_modes = false; d->modified = true; Note that we can remove it completely from share_mode_cleanup_disconnected() as share_mode_forall_entries() already sets it when there are no entries left. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke (cherry picked from commit b5c0874fd5d31e252cf9ac8b84bde5c536b1e8ef) --- selftest/knownfail.d/bug-14428 | 1 - source3/locking/share_mode_lock.c | 56 +++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) delete mode 100644 selftest/knownfail.d/bug-14428 diff --git a/selftest/knownfail.d/bug-14428 b/selftest/knownfail.d/bug-14428 deleted file mode 100644 index e198400ea163..000000000000 --- a/selftest/knownfail.d/bug-14428 +++ /dev/null @@ -1 +0,0 @@ -^samba3.blackbox.durable_v2_delay.bug.14428 diff --git a/source3/locking/share_mode_lock.c b/source3/locking/share_mode_lock.c index 965c5ab18837..6588f09b285b 100644 --- a/source3/locking/share_mode_lock.c +++ b/source3/locking/share_mode_lock.c @@ -1648,6 +1648,42 @@ static bool share_mode_find_connected_fn( return false; } +static bool cleanup_disconnected_share_mode_entry_fn( + struct share_mode_entry *e, + bool *modified, + void *private_data) +{ + struct cleanup_disconnected_state *state = private_data; + struct share_mode_data *d = state->lck->data; + + bool disconnected; + + disconnected = server_id_is_disconnected(&e->pid); + if (!disconnected) { + struct file_id_buf tmp1; + struct server_id_buf tmp2; + DBG_ERR("file (file-id='%s', servicepath='%s', " + "base_name='%s%s%s') " + "is used by server %s ==> internal error\n", + file_id_str_buf(d->id, &tmp1), + d->servicepath, + d->base_name, + (d->stream_name == NULL) + ? "" : "', stream_name='", + (d->stream_name == NULL) + ? "" : d->stream_name, + server_id_str_buf(e->pid, &tmp2)); + smb_panic(__location__); + } + + /* + * Setting e->stale = true is + * the indication to delete the entry. + */ + e->stale = true; + return false; +} + bool share_mode_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id) { @@ -1728,8 +1764,24 @@ bool share_mode_cleanup_disconnected(struct file_id fid, ? "" : data->stream_name, open_persistent_id); - data->have_share_modes = false; - data->modified = true; + ok = share_mode_forall_entries( + state.lck, cleanup_disconnected_share_mode_entry_fn, &state); + if (!ok) { + DBG_DEBUG("failed to clean up %zu entries associated " + "with file (file-id='%s', servicepath='%s', " + "base_name='%s%s%s') and open_persistent_id %"PRIu64" " + "==> do not cleanup\n", + state.num_disconnected, + file_id_str_buf(fid, &idbuf), + data->servicepath, + data->base_name, + (data->stream_name == NULL) + ? "" : "', stream_name='", + (data->stream_name == NULL) + ? "" : data->stream_name, + open_persistent_id); + goto done; + } /* * This is a temporary reproducer for the origin of -- 2.17.1 From fa04b25fc843210d9146187f74255ccda430cd31 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 28 Aug 2020 16:28:41 +0200 Subject: [PATCH 7/7] s3:share_mode_lock: remove unused reproducer for bug #14428 BUG: https://bugzilla.samba.org/show_bug.cgi?id=14428 Signed-off-by: Stefan Metzmacher Reviewed-by: Volker Lendecke Autobuild-User(master): Volker Lendecke Autobuild-Date(master): Mon Aug 31 13:34:17 UTC 2020 on sn-devel-184 (cherry picked from commit b02f1d676f6e62a0a4b33b9b08f8f51a68b561ca) --- source3/locking/share_mode_lock.c | 14 -------------- .../script/tests/test_durable_handle_reconnect.sh | 8 -------- 2 files changed, 22 deletions(-) diff --git a/source3/locking/share_mode_lock.c b/source3/locking/share_mode_lock.c index 6588f09b285b..1c4d3a422214 100644 --- a/source3/locking/share_mode_lock.c +++ b/source3/locking/share_mode_lock.c @@ -1783,20 +1783,6 @@ bool share_mode_cleanup_disconnected(struct file_id fid, goto done; } - /* - * This is a temporary reproducer for the origin of - * https://bugzilla.samba.org/show_bug.cgi?id=14428 - * "PANIC: assert failed in get_lease_type()" - * - * This will be removed again once the bug is demonstrated - * and fixed. - */ - TALLOC_FREE(state.lck); - state.lck = get_existing_share_mode_lock(frame, fid); - if (state.lck != NULL) { - DBG_ERR("Reproduced BUG#14428\n"); - } - ret = true; done: talloc_free(frame); diff --git a/source3/script/tests/test_durable_handle_reconnect.sh b/source3/script/tests/test_durable_handle_reconnect.sh index 66a35f08f17e..2f6b308eebe9 100755 --- a/source3/script/tests/test_durable_handle_reconnect.sh +++ b/source3/script/tests/test_durable_handle_reconnect.sh @@ -23,20 +23,12 @@ if [ $SMBD_DONT_LOG_STDOUT -eq 1 ]; then SMBD_LOG_FILES="$SMBD_LOG_FILES $_SMBD_LOG_FILE" fi -bug_count_0=$(grep 'Reproduced BUG#14428' $SMBD_LOG_FILES | wc -l) - testit "durable_v2_delay.durable_v2_reconnect_delay_msec" $VALGRIND \ $BINDIR/smbtorture //$SERVER_IP/durable \ -U$USERNAME%$PASSWORD \ smb2.durable-v2-delay.durable_v2_reconnect_delay_msec || failed=$(expr $failed + 1) -bug_count_1=$(grep 'Reproduced BUG#14428' $SMBD_LOG_FILES | wc -l) - -testit "bug.14428 bug_count_0[$bug_count_0] bug_count_1[$bug_count_1]" \ - test $bug_count_0 -eq $bug_count_1 || - failed=$(expr $failed + 1) - rm $delay_inject_conf testok $0 $failed -- 2.17.1