From 101810b6bebe03c7f7c358ccd05b1fbca93ca1e7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:06:54 +0200 Subject: [PATCH 01/28] lib: util: Add sys_valid_io_range() This implements the contraints of [MS-FSA] 2.1.5.2 Server Requests a Read. The special handling of [MS-FSA] 2.1.5.3 Server Requests a Write with offset < 0, should be handled by higher layers! Which means the check can also be used for writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- lib/util/sys_rw.c | 24 ++++++++++++++++++++++++ lib/util/sys_rw.h | 1 + 2 files changed, 25 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index 9a6cdcaa6063..6fa7ca573654 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -24,6 +24,30 @@ #include "system/filesys.h" #include "lib/util/sys_rw.h" +bool sys_valid_io_range(off_t offset, size_t length) +{ + uint64_t last_byte_ofs; + + if (offset < 0) { + return false; + } + + if (offset > INT64_MAX) { + return false; + } + + if (length > UINT32_MAX) { + return false; + } + + last_byte_ofs = (uint64_t)offset + (uint64_t)length; + if (last_byte_ofs > INT64_MAX) { + return false; + } + + return true; +} + /******************************************************************* A read wrapper that will deal with EINTR/EWOULDBLOCK ********************************************************************/ diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h index ab456d87b22e..70864cb2b74a 100644 --- a/lib/util/sys_rw.h +++ b/lib/util/sys_rw.h @@ -27,6 +27,7 @@ struct iovec; +bool sys_valid_io_range(off_t offset, size_t length); ssize_t sys_read(int fd, void *buf, size_t count); void sys_read_v(int fd, void *buf, size_t count); ssize_t sys_write(int fd, const void *buf, size_t count); -- 2.17.1 From 7e27f2c3c8ecf0b027410fe6f56bba383a46b3b9 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:32:48 -0700 Subject: [PATCH 02/28] lib: util: Add sys_pread_full(). A pread wrapper that will deal with EINTR and never return a short read unless pread returns zero meaning EOF. Thread-safe so may be used as a replacement for pread inside pread_do() thread functions. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison --- lib/util/sys_rw.c | 35 +++++++++++++++++++++++++++++++++++ lib/util/sys_rw.h | 1 + 2 files changed, 36 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index 6fa7ca573654..87efbfd367cb 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -143,6 +143,41 @@ ssize_t sys_pread(int fd, void *buf, size_t count, off_t off) return ret; } +/******************************************************************* + A pread wrapper that will deal with EINTR and never return a short + read unless pread returns zero meaning EOF. +********************************************************************/ + +ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off) +{ + ssize_t total_read = 0; + uint8_t *curr_buf = (uint8_t *)buf; + size_t curr_count = count; + off_t curr_off = off; + + while (curr_count != 0) { + ssize_t ret = sys_pread(fd, + curr_buf, + curr_count, + curr_off); + + if (ret == -1) { + return -1; + } + if (ret == 0) { + /* EOF */ + break; + } + + curr_buf += ret; + curr_count -= ret; + curr_off += ret; + + total_read += ret; + } + return total_read; +} + /******************************************************************* A write wrapper that will deal with EINTR ********************************************************************/ diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h index 70864cb2b74a..1e0dd3730a60 100644 --- a/lib/util/sys_rw.h +++ b/lib/util/sys_rw.h @@ -34,6 +34,7 @@ ssize_t sys_write(int fd, const void *buf, size_t count); void sys_write_v(int fd, const void *buf, size_t count); ssize_t sys_writev(int fd, const struct iovec *iov, int iovcnt); ssize_t sys_pread(int fd, void *buf, size_t count, off_t off); +ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off); ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off); #endif -- 2.17.1 From 52287ae30323e44f7da4418eec598cfd92fa2ab7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:09:50 +0200 Subject: [PATCH 03/28] sq pread_full Signed-off-by: Stefan Metzmacher --- lib/util/sys_rw.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index 87efbfd367cb..bfeb2e6b4661 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -154,6 +154,13 @@ ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off) uint8_t *curr_buf = (uint8_t *)buf; size_t curr_count = count; off_t curr_off = off; + bool ok; + + ok = sys_valid_io_range(off, count); + if (!ok) { + errno = EINVAL; + return -1; + } while (curr_count != 0) { ssize_t ret = sys_pread(fd, @@ -169,12 +176,18 @@ ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off) break; } + if (ret > curr_count) { + errno = EIO; + return -1; + } + curr_buf += ret; curr_count -= ret; curr_off += ret; total_read += ret; } + return total_read; } -- 2.17.1 From c66cc1396eafa4747524b64b31af7f9f36e360f0 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:34:32 -0700 Subject: [PATCH 04/28] lib: util: Add sys_pwrite_full(). A pwrite wrapper that will deal with EINTR and never return a short write unless the file system returns an error. Copes with the unspecified edge condition of pwrite returning zero by changing the return to -1, errno = ENOSPC. Thread-safe so may be used as a replacement for pwrite inside pwrite_do() thread functions. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison --- lib/util/sys_rw.c | 35 +++++++++++++++++++++++++++++++++++ lib/util/sys_rw.h | 1 + 2 files changed, 36 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index bfeb2e6b4661..6d39bd3afb06 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -204,3 +204,38 @@ ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off) } while (ret == -1 && errno == EINTR); return ret; } + +/******************************************************************* + A pwrite wrapper that will deal with EINTR and never allow a short + write unless the file system returns an error. +********************************************************************/ + +ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off) +{ + ssize_t total_written = 0; + const uint8_t *curr_buf = (const uint8_t *)buf; + size_t curr_count = count; + off_t curr_off = off; + + while (curr_count != 0) { + ssize_t ret = sys_pwrite(fd, + curr_buf, + curr_count, + curr_off); + + if (ret == -1) { + return -1; + } + if (ret == 0) { + /* Ensure we can never spin. */ + errno = ENOSPC; + return -1; + } + curr_buf += ret; + curr_count -= ret; + curr_off += ret; + + total_written += ret; + } + return total_written; +} diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h index 1e0dd3730a60..b224ecb30ac7 100644 --- a/lib/util/sys_rw.h +++ b/lib/util/sys_rw.h @@ -36,5 +36,6 @@ ssize_t sys_writev(int fd, const struct iovec *iov, int iovcnt); ssize_t sys_pread(int fd, void *buf, size_t count, off_t off); ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off); ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off); +ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off); #endif -- 2.17.1 From a9f41b41ffe4943dab5052123aa476c1d54f0ea8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:10:14 +0200 Subject: [PATCH 05/28] sq pwrite_full Signed-off-by: Stefan Metzmacher --- lib/util/sys_rw.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c index 6d39bd3afb06..d74395fc4091 100644 --- a/lib/util/sys_rw.c +++ b/lib/util/sys_rw.c @@ -216,6 +216,13 @@ ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off) const uint8_t *curr_buf = (const uint8_t *)buf; size_t curr_count = count; off_t curr_off = off; + bool ok; + + ok = sys_valid_io_range(off, count); + if (!ok) { + errno = EINVAL; + return -1; + } while (curr_count != 0) { ssize_t ret = sys_pwrite(fd, @@ -231,11 +238,18 @@ ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off) errno = ENOSPC; return -1; } + + if (ret > curr_count) { + errno = EIO; + return -1; + } + curr_buf += ret; curr_count -= ret; curr_off += ret; total_written += ret; } + return total_written; } -- 2.17.1 From 1edc5112e61f6b079e43f39cd54f9a17ee91d1c5 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:42:10 -0700 Subject: [PATCH 06/28] s3: VFS: aio_fork: Change sys_pread() -> sys_pread_full() to protect against short reads. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison --- source3/modules/vfs_aio_fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_aio_fork.c b/source3/modules/vfs_aio_fork.c index a1fed5c06557..285b331ff9ca 100644 --- a/source3/modules/vfs_aio_fork.c +++ b/source3/modules/vfs_aio_fork.c @@ -342,7 +342,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map) switch (cmd_struct.cmd) { case READ_CMD: - ret_struct.size = sys_pread( + ret_struct.size = sys_pread_full( fd, discard_const(map->ptr), cmd_struct.n, cmd_struct.offset); #if 0 -- 2.17.1 From 4eec2c666c304ebe1236ee37f5ca4cfee39fba9d Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:42:53 -0700 Subject: [PATCH 07/28] TODO s3: VFS: aio_fork: Change sys_pwrite() -> sys_pwrite_full() to protect against short writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison TODO: check how clients behave... TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write --- source3/modules/vfs_aio_fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_aio_fork.c b/source3/modules/vfs_aio_fork.c index 285b331ff9ca..7c6f4b00fd04 100644 --- a/source3/modules/vfs_aio_fork.c +++ b/source3/modules/vfs_aio_fork.c @@ -353,7 +353,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map) #endif break; case WRITE_CMD: - ret_struct.size = sys_pwrite( + ret_struct.size = sys_pwrite_full( fd, discard_const(map->ptr), cmd_struct.n, cmd_struct.offset); break; -- 2.17.1 From 485ed9258844bf4cb116096db26607d4ec9601a9 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:43:34 -0700 Subject: [PATCH 08/28] s3: VFS: default. Change sys_pread() -> sys_pread_full() in SMB_VFS_PREAD() to protect against short reads. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison --- source3/modules/vfs_default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index c7f2020a9ead..26db45dccd05 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -734,7 +734,7 @@ static ssize_t vfswrap_pread(vfs_handle_struct *handle, files_struct *fsp, void #if defined(HAVE_PREAD) || defined(HAVE_PREAD64) START_PROFILE_BYTES(syscall_pread, n); - result = sys_pread(fsp->fh->fd, data, n, offset); + result = sys_pread_full(fsp->fh->fd, data, n, offset); END_PROFILE_BYTES(syscall_pread); if (result == -1 && errno == ESPIPE) { -- 2.17.1 From 86a1d32d49448b3c6321d55365f3615d818092ab Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:44:26 -0700 Subject: [PATCH 09/28] TODO: s3: VFS: default. Change sys_pwrite() -> sys_pwrite_full() in SMB_VFS_PWRITE() to protect against short writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison TODO: check how clients behave... TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write --- source3/modules/vfs_default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index 26db45dccd05..7f49e4f26c3b 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -758,7 +758,7 @@ static ssize_t vfswrap_pwrite(vfs_handle_struct *handle, files_struct *fsp, cons #if defined(HAVE_PWRITE) || defined(HAVE_PRWITE64) START_PROFILE_BYTES(syscall_pwrite, n); - result = sys_pwrite(fsp->fh->fd, data, n, offset); + result = sys_pwrite_full(fsp->fh->fd, data, n, offset); END_PROFILE_BYTES(syscall_pwrite); if (result == -1 && errno == ESPIPE) { -- 2.17.1 From 6872fb5986bb6ae99f81e1fb830e3c6cb67a5696 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:45:10 -0700 Subject: [PATCH 10/28] s3: VFS: default. Change pread() -> sys_pread_full() in SMB_VFS_PREAD_SEND() to protect against short reads. Note that as sys_pread_full() deals with the EINTR case we can remove the do {} while loop here. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison --- source3/modules/vfs_default.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index 7f49e4f26c3b..a17eb0ce75c2 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -838,10 +838,10 @@ static void vfs_pread_do(void *private_data) PROFILE_TIMESTAMP(&start_time); - do { - state->ret = pread(state->fd, state->buf, state->count, - state->offset); - } while ((state->ret == -1) && (errno == EINTR)); + state->ret = sys_pread_full(state->fd, + state->buf, + state->count, + state->offset); if (state->ret == -1) { state->vfs_aio_state.error = errno; -- 2.17.1 From c35600726358f2c7057839561a69a0f0214f542b Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 7 May 2020 12:48:49 -0700 Subject: [PATCH 11/28] TODO: s3: VFS: default. Change pwrite() -> sys_pwrite_full() in SMB_VFS_PWRITE_SEND() to protect against short writes. Note that as sys_pwrite_full() deals with the EINTR case we can remove the do {} while loop here. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Jeremy Allison TODO: check how clients behave... TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write --- source3/modules/vfs_default.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c index a17eb0ce75c2..522ea03260c1 100644 --- a/source3/modules/vfs_default.c +++ b/source3/modules/vfs_default.c @@ -966,10 +966,10 @@ static void vfs_pwrite_do(void *private_data) PROFILE_TIMESTAMP(&start_time); - do { - state->ret = pwrite(state->fd, state->buf, state->count, - state->offset); - } while ((state->ret == -1) && (errno == EINTR)); + state->ret = sys_pwrite_full(state->fd, + state->buf, + state->count, + state->offset); if (state->ret == -1) { state->vfs_aio_state.error = errno; -- 2.17.1 From 2a9932a48a5c7ff1a1a223c9f18a40fc8b8f0dd7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 May 2020 03:05:47 -0700 Subject: [PATCH 12/28] vfs_io_uring: fix the prefix for parametric options from 'vfs_io_uring' to 'io_uring' This is what the manpage describes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 378e48d112f6..b409d0753379 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -172,13 +172,13 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service, } num_entries = lp_parm_ulong(SNUM(handle->conn), - "vfs_io_uring", + "io_uring", "num_entries", 128); num_entries = MAX(num_entries, 1); sqpoll = lp_parm_bool(SNUM(handle->conn), - "vfs_io_uring", + "io_uring", "sqpoll", false); if (sqpoll) { -- 2.17.1 From 6bcfdb8aff42a465d09374625ca134d76c03060b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:39:52 +0200 Subject: [PATCH 13/28] vfs_io_uring: replace vfs_io_uring_request->state with _tevent_req_data() We don't need a direct pointer to the state... BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index b409d0753379..988b309da525 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -42,7 +42,6 @@ struct vfs_io_uring_request { struct vfs_io_uring_request **list_head; struct vfs_io_uring_config *config; struct tevent_req *req; - void *state; struct io_uring_sqe sqe; struct io_uring_cqe cqe; struct timespec start_time; @@ -58,8 +57,9 @@ static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur, struct tevent_req *req = talloc_get_type_abort(cur->req, struct tevent_req); + void *state = _tevent_req_data(req); - talloc_set_destructor(cur->state, NULL); + talloc_set_destructor(state, NULL); if (cur->list_head != NULL) { DLIST_REMOVE((*cur->list_head), cur); cur->list_head = NULL; @@ -238,6 +238,7 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) for (cur = config->queue; cur != NULL; cur = next) { struct io_uring_sqe *sqe = NULL; + void *state = _tevent_req_data(cur->req); next = cur->next; @@ -246,7 +247,7 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) break; } - talloc_set_destructor(cur->state, + talloc_set_destructor(state, vfs_io_uring_request_state_deny_destructor); DLIST_REMOVE(config->queue, cur); *sqe = cur->sqe; @@ -318,7 +319,6 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; - state->ur.state = state; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p, state->ur.profile_bytes, n); @@ -398,7 +398,6 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han } state->ur.config = config; state->ur.req = req; - state->ur.state = state; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p, state->ur.profile_bytes, n); @@ -475,7 +474,6 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; - state->ur.state = state; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p, state->ur.profile_bytes, 0); -- 2.17.1 From f0496aa32470c52869d7f29e5cd411694d3339b4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:42:59 +0200 Subject: [PATCH 14/28] vfs_io_uring: introduce vfs_io_uring_request->completion_fn() We'll need to add more logic than a simple _tevent_req_done() BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 49 +++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 988b309da525..abdd4d16e9f8 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -44,6 +44,8 @@ struct vfs_io_uring_request { struct tevent_req *req; struct io_uring_sqe sqe; struct io_uring_cqe cqe; + void (*completion_fn)(struct vfs_io_uring_request *cur, + const char *location); struct timespec start_time; struct timespec end_time; SMBPROFILE_BYTES_ASYNC_STATE(profile_bytes); @@ -74,7 +76,7 @@ static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur, * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(req, location); + cur->completion_fn(cur, location); } static void vfs_io_uring_config_destroy(struct vfs_io_uring_config *config, @@ -297,6 +299,9 @@ struct vfs_io_uring_pread_state { struct iovec iov; }; +static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, + const char *location); + static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -319,6 +324,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; + state->ur.completion_fn = vfs_io_uring_pread_completion; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p, state->ur.profile_bytes, n); @@ -344,6 +350,17 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand return req; } +static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, + const char *location) +{ + /* + * We rely on being inside the _send() function + * or tevent_req_defer_callback() being called + * already. + */ + _tevent_req_done(cur->req, location); +} + static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, struct vfs_aio_state *vfs_aio_state) { @@ -376,6 +393,9 @@ struct vfs_io_uring_pwrite_state { struct iovec iov; }; +static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, + const char *location); + static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -398,6 +418,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han } state->ur.config = config; state->ur.req = req; + state->ur.completion_fn = vfs_io_uring_pwrite_completion; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p, state->ur.profile_bytes, n); @@ -423,6 +444,17 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han return req; } +static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, + const char *location) +{ + /* + * We rely on being inside the _send() function + * or tevent_req_defer_callback() being called + * already. + */ + _tevent_req_done(cur->req, location); +} + static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, struct vfs_aio_state *vfs_aio_state) { @@ -454,6 +486,9 @@ struct vfs_io_uring_fsync_state { struct vfs_io_uring_request ur; }; +static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, + const char *location); + static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -474,6 +509,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand } state->ur.config = config; state->ur.req = req; + state->ur.completion_fn = vfs_io_uring_fsync_completion; SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p, state->ur.profile_bytes, 0); @@ -496,6 +532,17 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand return req; } +static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, + const char *location) +{ + /* + * We rely on being inside the _send() function + * or tevent_req_defer_callback() being called + * already. + */ + _tevent_req_done(cur->req, location); +} + static int vfs_io_uring_fsync_recv(struct tevent_req *req, struct vfs_aio_state *vfs_aio_state) { -- 2.17.1 From 001c033ea39987a3c754f9960e89d155661e6c89 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:52:52 +0200 Subject: [PATCH 15/28] vfs_io_uring: move error handling out of vfs_io_uring_pread_recv() We should do that as early as possible and that's in vfs_io_uring_pread_completion(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index abdd4d16e9f8..0d8e18330092 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -297,6 +297,7 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct vfs_io_uring_pread_state { struct vfs_io_uring_request ur; struct iovec iov; + size_t nread; }; static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, @@ -353,12 +354,23 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, const char *location) { + struct vfs_io_uring_pread_state *state = tevent_req_data( + cur->req, struct vfs_io_uring_pread_state); + /* * We rely on being inside the _send() function * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(cur->req, location); + + if (cur->cqe.res < 0) { + int err = -cur->cqe.res; + _tevent_req_error(cur->req, err, location); + return; + } + + state->nread = state->ur.cqe.res; + tevent_req_done(cur->req); } static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, @@ -366,23 +378,19 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, { struct vfs_io_uring_pread_state *state = tevent_req_data( req, struct vfs_io_uring_pread_state); - int ret; + ssize_t ret; SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes); vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time, &state->ur.start_time); if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + tevent_req_received(req); return -1; } - if (state->ur.cqe.res < 0) { - vfs_aio_state->error = -state->ur.cqe.res; - ret = -1; - } else { - vfs_aio_state->error = 0; - ret = state->ur.cqe.res; - } + vfs_aio_state->error = 0; + ret = state->nread; tevent_req_received(req); return ret; -- 2.17.1 From eab8f17efb67e4b672c5f510344fc54c3c175e43 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:52:52 +0200 Subject: [PATCH 16/28] vfs_io_uring: move error handling out of vfs_io_uring_pwrite_recv() We should do that as early as possible and that's in vfs_io_uring_pwrite_completion(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 0d8e18330092..a8da341e7b7c 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -399,6 +399,7 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, struct vfs_io_uring_pwrite_state { struct vfs_io_uring_request ur; struct iovec iov; + size_t nwritten; }; static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, @@ -455,12 +456,23 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, const char *location) { + struct vfs_io_uring_pwrite_state *state = tevent_req_data( + cur->req, struct vfs_io_uring_pwrite_state); + /* * We rely on being inside the _send() function * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(cur->req, location); + + if (cur->cqe.res < 0) { + int err = -cur->cqe.res; + _tevent_req_error(cur->req, err, location); + return; + } + + state->nwritten = state->ur.cqe.res; + tevent_req_done(cur->req); } static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, @@ -468,23 +480,19 @@ static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, { struct vfs_io_uring_pwrite_state *state = tevent_req_data( req, struct vfs_io_uring_pwrite_state); - int ret; + ssize_t ret; SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes); vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time, &state->ur.start_time); if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + tevent_req_received(req); return -1; } - if (state->ur.cqe.res < 0) { - vfs_aio_state->error = -state->ur.cqe.res; - ret = -1; - } else { - vfs_aio_state->error = 0; - ret = state->ur.cqe.res; - } + vfs_aio_state->error = 0; + ret = state->nwritten; tevent_req_received(req); return ret; -- 2.17.1 From 5268026076eba48df8c8bde7f532684205c65874 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 10:52:52 +0200 Subject: [PATCH 17/28] vfs_io_uring: move error handling out of vfs_io_uring_fsync_recv() We should do that as early as possible and that's in vfs_io_uring_fsync_completion(). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index a8da341e7b7c..0f560c95b67d 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -556,7 +556,14 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, * or tevent_req_defer_callback() being called * already. */ - _tevent_req_done(cur->req, location); + + if (cur->cqe.res < 0) { + int err = -cur->cqe.res; + _tevent_req_error(cur->req, err, location); + return; + } + + tevent_req_done(cur->req); } static int vfs_io_uring_fsync_recv(struct tevent_req *req, @@ -564,26 +571,20 @@ static int vfs_io_uring_fsync_recv(struct tevent_req *req, { struct vfs_io_uring_fsync_state *state = tevent_req_data( req, struct vfs_io_uring_fsync_state); - int ret; SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes); vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time, &state->ur.start_time); if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + tevent_req_received(req); return -1; } - if (state->ur.cqe.res < 0) { - vfs_aio_state->error = -state->ur.cqe.res; - ret = -1; - } else { - vfs_aio_state->error = 0; - ret = state->ur.cqe.res; - } + vfs_aio_state->error = 0; tevent_req_received(req); - return ret; + return 0; } static struct vfs_fn_pointers vfs_io_uring_fns = { -- 2.17.1 From 3106a52406108408492b063ba5714af5fe8a49fe Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:17:05 +0200 Subject: [PATCH 18/28] vfs_io_uring: make use of sys_valid_io_range() in vfs_io_uring_pread_send() This makes the follow up commits easier as we don't have to care about overflows. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 0f560c95b67d..c7565b8c39de 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -25,6 +25,7 @@ #include "smbd/smbd.h" #include "smbd/globals.h" #include "lib/util/tevent_unix.h" +#include "lib/util/sys_rw.h" #include "smbprofile.h" #include @@ -313,6 +314,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand struct tevent_req *req = NULL; struct vfs_io_uring_pread_state *state = NULL; struct vfs_io_uring_config *config = NULL; + bool ok; SMB_VFS_HANDLE_GET_DATA(handle, config, struct vfs_io_uring_config, @@ -331,6 +333,12 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand state->ur.profile_bytes, n); SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes); + ok = sys_valid_io_range(offset, n); + if (!ok) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + state->iov.iov_base = (void *)data; state->iov.iov_len = n; io_uring_prep_readv(&state->ur.sqe, -- 2.17.1 From a7da07423717e95a88a23ad4fb03c995f9ce1006 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:17:51 +0200 Subject: [PATCH 19/28] vfs_io_uring: split out a vfs_io_uring_request_submit() function BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index c7565b8c39de..13c2b9d73969 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -280,6 +280,17 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config) io_uring_cq_advance(&config->uring, nr); } +static void vfs_io_uring_request_submit(struct vfs_io_uring_request *cur) +{ + struct vfs_io_uring_config *config = cur->config; + + io_uring_sqe_set_data(&cur->sqe, cur); + DLIST_ADD_END(config->queue, cur); + cur->list_head = &config->queue; + + vfs_io_uring_queue_run(config); +} + static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, @@ -345,11 +356,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand fsp->fh->fd, &state->iov, 1, offset); - io_uring_sqe_set_data(&state->ur.sqe, &state->ur); - DLIST_ADD_END(config->queue, &state->ur); - state->ur.list_head = &config->queue; - - vfs_io_uring_queue_run(config); + vfs_io_uring_request_submit(&state->ur); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -447,11 +454,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han fsp->fh->fd, &state->iov, 1, offset); - io_uring_sqe_set_data(&state->ur.sqe, &state->ur); - DLIST_ADD_END(config->queue, &state->ur); - state->ur.list_head = &config->queue; - - vfs_io_uring_queue_run(config); + vfs_io_uring_request_submit(&state->ur); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -542,11 +545,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand io_uring_prep_fsync(&state->ur.sqe, fsp->fh->fd, 0); /* fsync_flags */ - io_uring_sqe_set_data(&state->ur.sqe, &state->ur); - DLIST_ADD_END(config->queue, &state->ur); - state->ur.list_head = &config->queue; - - vfs_io_uring_queue_run(config); + vfs_io_uring_request_submit(&state->ur); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); -- 2.17.1 From 3b759db04c771c654f3565097ee18cdafc2e29ee Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:17:51 +0200 Subject: [PATCH 20/28] vfs_io_uring: split out a vfs_io_uring_pread_submit() function This can be reused when we add handling for short reads. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 13c2b9d73969..609264777254 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -308,10 +308,13 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev, struct vfs_io_uring_pread_state { struct vfs_io_uring_request ur; + struct files_struct *fsp; + off_t offset; struct iovec iov; size_t nread; }; +static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state); static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, const char *location); @@ -350,13 +353,11 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand return tevent_req_post(req, ev); } + state->fsp = fsp; + state->offset = offset; state->iov.iov_base = (void *)data; state->iov.iov_len = n; - io_uring_prep_readv(&state->ur.sqe, - fsp->fh->fd, - &state->iov, 1, - offset); - vfs_io_uring_request_submit(&state->ur); + vfs_io_uring_pread_submit(state); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -366,6 +367,15 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand return req; } +static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state) +{ + io_uring_prep_readv(&state->ur.sqe, + state->fsp->fh->fd, + &state->iov, 1, + state->offset); + vfs_io_uring_request_submit(&state->ur); +} + static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, const char *location) { -- 2.17.1 From c4c8eecbdac3f8082c42b4499339be45d98d538b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:17:51 +0200 Subject: [PATCH 21/28] vfs_io_uring: split out a vfs_io_uring_pwrite_submit() function This can be reused when we add handling for short writes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 609264777254..3aaff5c0f77b 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -423,10 +423,13 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, struct vfs_io_uring_pwrite_state { struct vfs_io_uring_request ur; + struct files_struct *fsp; + off_t offset; struct iovec iov; size_t nwritten; }; +static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state); static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, const char *location); @@ -458,13 +461,11 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han state->ur.profile_bytes, n); SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes); + state->fsp = fsp; + state->offset = offset; state->iov.iov_base = discard_const(data); state->iov.iov_len = n; - io_uring_prep_writev(&state->ur.sqe, - fsp->fh->fd, - &state->iov, 1, - offset); - vfs_io_uring_request_submit(&state->ur); + vfs_io_uring_pwrite_submit(state); if (!tevent_req_is_in_progress(req)) { return tevent_req_post(req, ev); @@ -474,6 +475,15 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han return req; } +static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state) +{ + io_uring_prep_writev(&state->ur.sqe, + state->fsp->fh->fd, + &state->iov, 1, + state->offset); + vfs_io_uring_request_submit(&state->ur); +} + static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, const char *location) { -- 2.17.1 From c77d5fb704e4b8ac42fecc5ca4f00638f55cc938 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:38:56 +0200 Subject: [PATCH 22/28] vfs_io_uring: protect vfs_io_uring_pread_completion() against invalid results We should never get back more than we asked for. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 3aaff5c0f77b..cfae59ea340b 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -26,6 +26,7 @@ #include "smbd/globals.h" #include "lib/util/tevent_unix.h" #include "lib/util/sys_rw.h" +#include "lib/util/iov_buf.h" #include "smbprofile.h" #include @@ -381,6 +382,9 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, { struct vfs_io_uring_pread_state *state = tevent_req_data( cur->req, struct vfs_io_uring_pread_state); + struct iovec *iov = &state->iov; + int num_iov = 1; + bool ok; /* * We rely on being inside the _send() function @@ -394,6 +398,16 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); + if (!ok) { + /* This is not expected! */ + DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n", + (int)cur->cqe.res, + (int)state->iov.iov_len); + tevent_req_error(cur->req, EIO); + return; + } + state->nread = state->ur.cqe.res; tevent_req_done(cur->req); } -- 2.17.1 From d29d74a517d37faf4aea48bd4aa969d3062493a5 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:38:56 +0200 Subject: [PATCH 23/28] vfs_io_uring: protect vfs_io_uring_pwrite_completion() against invalid results We should never get more acked than we asked for. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index cfae59ea340b..9563995de4f9 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -503,6 +503,9 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, { struct vfs_io_uring_pwrite_state *state = tevent_req_data( cur->req, struct vfs_io_uring_pwrite_state); + struct iovec *iov = &state->iov; + int num_iov = 1; + bool ok; /* * We rely on being inside the _send() function @@ -516,6 +519,16 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, return; } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); + if (!ok) { + /* This is not expected! */ + DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n", + (int)cur->cqe.res, + (int)state->iov.iov_len); + tevent_req_error(cur->req, EIO); + return; + } + state->nwritten = state->ur.cqe.res; tevent_req_done(cur->req); } -- 2.17.1 From d3eee19caa3a8dc3e9cd79d580bf31ccb1ae35e9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 11:38:56 +0200 Subject: [PATCH 24/28] vfs_io_uring: protect vfs_io_uring_fsync_completion() against invalid results We should never get back a value > 0. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 9563995de4f9..284655b2e685 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -617,6 +617,13 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur, return; } + if (cur->cqe.res > 0) { + /* This is not expected! */ + DBG_ERR("got cur->cqe.res=%d\n", (int)cur->cqe.res); + tevent_req_error(cur->req, EIO); + return; + } + tevent_req_done(cur->req); } -- 2.17.1 From 1c665ac3772b2b11d52a3675a6e8be59c8c31e88 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:17:05 +0200 Subject: [PATCH 25/28] vfs_io_uring: make use of sys_valid_io_range() in vfs_io_uring_pwrite_send() This makes the follow up commits easier as we don't have to care about overflows. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 284655b2e685..6c5e00e845e7 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -457,6 +457,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han struct tevent_req *req = NULL; struct vfs_io_uring_pwrite_state *state = NULL; struct vfs_io_uring_config *config = NULL; + bool ok; SMB_VFS_HANDLE_GET_DATA(handle, config, struct vfs_io_uring_config, @@ -475,6 +476,12 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han state->ur.profile_bytes, n); SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes); + ok = sys_valid_io_range(offset, n); + if (!ok) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + state->fsp = fsp; state->offset = offset; state->iov.iov_base = discard_const(data); -- 2.17.1 From 0e6c20ed8318c670b2b49a06534953fbe9b640c0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:30:17 +0200 Subject: [PATCH 26/28] vfs_io_uring: retry after a short read in vfs_io_uring_pread_completion() We need to be prepared for short reads from the kernel depending on the state of the page cache. Windows and Mac clients don't expect short reads for files, so we need to retry ourself. For the future we may be able to play with some io_uring flags in order to avoid the retries in userspace, but for now we just fix the data corruption bug... BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 Signed-off-by: Stefan Metzmacher --- source3/modules/vfs_io_uring.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 6c5e00e845e7..a91309b89229 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -398,6 +398,14 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } + if (cur->cqe.res == 0) { + /* + * We reached EOF, we're done + */ + tevent_req_done(cur->req); + return; + } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); if (!ok) { /* This is not expected! */ @@ -408,8 +416,20 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } - state->nread = state->ur.cqe.res; - tevent_req_done(cur->req); + /* sys_valid_io_range() already checked the boundaries */ + state->nread += state->ur.cqe.res; + if (num_iov == 0) { + /* We're done */ + tevent_req_done(cur->req); + return; + } + + /* + * sys_valid_io_range() already checked the boundaries + * now try to get the rest. + */ + state->offset += state->ur.cqe.res; + vfs_io_uring_pread_submit(state); } static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req, -- 2.17.1 From 4dcbde013073979042805551289328f4757972e4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 8 May 2020 13:30:17 +0200 Subject: [PATCH 27/28] TODO: vfs_io_uring: retry after a short writes in vfs_io_uring_pwrite_completion() We need to be prepared for short writes from the kernel depending on the state of the page cache. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361 TODO: check how clients handle short writes... TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write --- source3/modules/vfs_io_uring.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index a91309b89229..5d4771e8a75d 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -546,6 +546,14 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, return; } + if (cur->cqe.res == 0) { + /* + * Ensure we can never spin. + */ + tevent_req_error(cur->req, ENOSPC); + return; + } + ok = iov_advance(&iov, &num_iov, cur->cqe.res); if (!ok) { /* This is not expected! */ @@ -556,8 +564,20 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur, return; } - state->nwritten = state->ur.cqe.res; - tevent_req_done(cur->req); + /* sys_valid_io_range() already checked the boundaries */ + state->nwritten += state->ur.cqe.res; + if (num_iov == 0) { + /* We're done */ + tevent_req_done(cur->req); + return; + } + + /* + * sys_valid_io_range() already checked the boundaries + * now try to write the rest. + */ + state->offset += state->ur.cqe.res; + vfs_io_uring_pwrite_submit(state); } static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req, -- 2.17.1 From 70350597a39801de28686386259b88630ca33215 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 May 2020 03:12:24 -0700 Subject: [PATCH 28/28] HACK vfs_io_uring: add debugging for bug 14361 --- source3/modules/vfs_io_uring.c | 76 +++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c index 5d4771e8a75d..a7308ff9edd3 100644 --- a/source3/modules/vfs_io_uring.c +++ b/source3/modules/vfs_io_uring.c @@ -37,6 +37,10 @@ struct vfs_io_uring_config { struct tevent_fd *fde; struct vfs_io_uring_request *queue; struct vfs_io_uring_request *pending; + bool nowait_pread; + bool force_async_pread; + bool force_async_pread_retry; + size_t truncate_pread; }; struct vfs_io_uring_request { @@ -220,6 +224,22 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service, return -1; } + config->nowait_pread = lp_parm_bool(SNUM(handle->conn), + "io_uring", + "nowait_pread", + false); + config->force_async_pread = lp_parm_bool(SNUM(handle->conn), + "io_uring", + "force_async_pread", + false); + config->force_async_pread_retry = lp_parm_bool(SNUM(handle->conn), + "io_uring", + "force_async_pread_retry", + false); + config->truncate_pread = lp_parm_ulonglong(SNUM(handle->conn), + "io_uring", + "truncate_pread", + UINT32_MAX); return 0; } @@ -312,6 +332,7 @@ struct vfs_io_uring_pread_state { struct files_struct *fsp; off_t offset; struct iovec iov; + struct iovec tmp_iov; size_t nread; }; @@ -370,10 +391,27 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state) { + struct vfs_io_uring_config *config = state->ur.config; + unsigned flags = 0; + + state->tmp_iov = state->iov; + state->tmp_iov.iov_len = MIN(state->tmp_iov.iov_len, + config->truncate_pread); + io_uring_prep_readv(&state->ur.sqe, state->fsp->fh->fd, - &state->iov, 1, + &state->tmp_iov, 1, state->offset); + if (config->nowait_pread) { + state->ur.sqe.rw_flags |= RWF_NOWAIT; + } + if (config->force_async_pread) { + flags |= IOSQE_ASYNC; + } + if (state->nread > 0 && config->force_async_pread_retry) { + flags |= IOSQE_ASYNC; + } + io_uring_sqe_set_flags(&state->ur.sqe, flags); vfs_io_uring_request_submit(&state->ur); } @@ -398,6 +436,42 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur, return; } + if ((size_t)cur->cqe.res != (size_t)state->iov.iov_len) { + DBG_ERR("%p: Invalid last_read=%zu (0x%zx) ofs=%zu (0x%zx) len=%zu (0x%zx) nread=%zu (0x%zx) eof=%zu (0x%zx) blks=%zu blocks=%zu %s %s\n", + state, + (size_t)state->nread, + (size_t)state->nread, + (size_t)state->offset, + (size_t)state->offset, + (size_t)state->iov.iov_len, + (size_t)state->iov.iov_len, + (size_t)cur->cqe.res, + (size_t)cur->cqe.res, + (size_t)state->fsp->fsp_name->st.st_ex_size, + (size_t)state->fsp->fsp_name->st.st_ex_size, + (size_t)state->fsp->fsp_name->st.st_ex_blksize, + (size_t)state->fsp->fsp_name->st.st_ex_blocks, + fsp_str_dbg(state->fsp), + fsp_fnum_dbg(state->fsp)); + } else { + DBG_WARNING("%p: last_read=%zu (0x%zx) ofs=%zu (0x%zx) len=%zu (0x%zx) nread=%zu (0x%zx) eof=%zu (0x%zx) blks=%zu blocks=%zu %s %s\n", + state, + (size_t)state->nread, + (size_t)state->nread, + (size_t)state->offset, + (size_t)state->offset, + (size_t)state->iov.iov_len, + (size_t)state->iov.iov_len, + (size_t)cur->cqe.res, + (size_t)cur->cqe.res, + (size_t)state->fsp->fsp_name->st.st_ex_size, + (size_t)state->fsp->fsp_name->st.st_ex_size, + (size_t)state->fsp->fsp_name->st.st_ex_blksize, + (size_t)state->fsp->fsp_name->st.st_ex_blocks, + fsp_str_dbg(state->fsp), + fsp_fnum_dbg(state->fsp)); + } + if (cur->cqe.res == 0) { /* * We reached EOF, we're done -- 2.17.1