From 2a166530615d041c4c4377adc1caff24a4b4f19c Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Thu, 13 Jun 2024 15:54:48 +0300 Subject: [PATCH 01/34] vfs_ceph_new: next iteration of samba-to-cephfs bridge Defined new module 'vfs_ceph_new.c' which serves as a place holder for the next development phase of the bridge between samba's VFS layer and libcephfs. Begin with a module which is almost identical to existing 'vfs_ceph.c', except for hooks-names prefix which is 'vfs_ceph_' in order to make clear distinction from existing code base. Following commits will also switch to low-level APIs. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 3720452720b4760509875f0d2a8ed0d104bb1844) --- source3/modules/vfs_ceph_new.c | 2003 ++++++++++++++++++++++++++++++++ source3/modules/wscript_build | 10 + source3/wscript | 1 + 3 files changed, 2014 insertions(+) create mode 100644 source3/modules/vfs_ceph_new.c diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c new file mode 100644 index 00000000000..313dcb5ac64 --- /dev/null +++ b/source3/modules/vfs_ceph_new.c @@ -0,0 +1,2003 @@ +/* + Unix SMB/CIFS implementation. + Wrap disk only vfs functions to sidestep dodgy compilers. + Copyright (C) Tim Potter 1998 + Copyright (C) Jeremy Allison 2007 + Copyright (C) Brian Chrisman 2011 + Copyright (C) Richard Sharpe 2011 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * This VFS only works with the libcephfs.so user-space client. It is not needed + * if you are using the kernel client or the FUSE client. + * + * Add the following smb.conf parameter to each share that will be hosted on + * Ceph: + * + * vfs objects = [any others you need go here] ceph_new + */ + +#include "includes.h" +#include "smbd/smbd.h" +#include "system/filesys.h" +#include +#include +#include "cephfs/libcephfs.h" +#include "smbprofile.h" +#include "modules/posixacl_xattr.h" +#include "lib/util/tevent_unix.h" + +#undef DBGC_CLASS +#define DBGC_CLASS DBGC_VFS + +#ifndef LIBCEPHFS_VERSION +#define LIBCEPHFS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra) +#define LIBCEPHFS_VERSION_CODE LIBCEPHFS_VERSION(0, 0, 0) +#endif + +/* + * Use %llu whenever we have a 64bit unsigned int, and cast to (long long + * unsigned) + */ +#define llu(_var) ((long long unsigned)_var) + +/* + * Note, libcephfs's return code model is to return -errno. Thus we have to + * convert to what Samba expects: set errno to non-negative value and return -1. + * + * Using convenience helper functions to avoid non-hygienic macro. + */ +static int status_code(int ret) +{ + if (ret < 0) { + errno = -ret; + return -1; + } + errno = 0; + return ret; +} + +static ssize_t lstatus_code(intmax_t ret) +{ + if (ret < 0) { + errno = -((int)ret); + return -1; + } + errno = 0; + return (ssize_t)ret; +} + +/* + * Track unique connections, as virtual mounts, to cephfs file systems. + * Individual mounts will be set on the handle->data attribute, but + * the mounts themselves will be shared so as not to spawn extra mounts + * to the same cephfs. + * + * Individual mounts are IDed by a 'cookie' value that is a string built + * from identifying parameters found in smb.conf. + */ + +static struct cephmount_cached { + char *cookie; + uint32_t count; + struct ceph_mount_info *mount; + struct cephmount_cached *next, *prev; +} *cephmount_cached; + +static int cephmount_cache_add(const char *cookie, + struct ceph_mount_info *mount) +{ + struct cephmount_cached *entry = NULL; + + entry = talloc_zero(NULL, struct cephmount_cached); + if (entry == NULL) { + errno = ENOMEM; + return -1; + } + + entry->cookie = talloc_strdup(entry, cookie); + if (entry->cookie == NULL) { + talloc_free(entry); + errno = ENOMEM; + return -1; + } + + entry->mount = mount; + entry->count = 1; + + DBG_DEBUG("adding mount cache entry for %s\n", entry->cookie); + DLIST_ADD(cephmount_cached, entry); + return 0; +} + +static struct ceph_mount_info *cephmount_cache_update(const char *cookie) +{ + struct cephmount_cached *entry = NULL; + + for (entry = cephmount_cached; entry; entry = entry->next) { + if (strcmp(entry->cookie, cookie) == 0) { + entry->count++; + DBG_DEBUG("updated mount cache: count is [%" + PRIu32 "]\n", entry->count); + return entry->mount; + } + } + + errno = ENOENT; + return NULL; +} + +static int cephmount_cache_remove(struct ceph_mount_info *mount) +{ + struct cephmount_cached *entry = NULL; + + for (entry = cephmount_cached; entry; entry = entry->next) { + if (entry->mount == mount) { + if (--entry->count) { + DBG_DEBUG("updated mount cache: count is [%" + PRIu32 "]\n", entry->count); + return entry->count; + } + + DBG_DEBUG("removing mount cache entry for %s\n", + entry->cookie); + DLIST_REMOVE(cephmount_cached, entry); + talloc_free(entry); + return 0; + } + } + errno = ENOENT; + return -1; +} + +static char *cephmount_get_cookie(TALLOC_CTX * mem_ctx, const int snum) +{ + const char *conf_file = + lp_parm_const_string(snum, "ceph", "config_file", "."); + const char *user_id = lp_parm_const_string(snum, "ceph", "user_id", ""); + const char *fsname = + lp_parm_const_string(snum, "ceph", "filesystem", ""); + return talloc_asprintf(mem_ctx, "(%s/%s/%s)", conf_file, user_id, + fsname); +} + +static int cephmount_select_fs(struct ceph_mount_info *mnt, const char *fsname) +{ + /* + * ceph_select_filesystem was added in ceph 'nautilus' (v14). + * Earlier versions of libcephfs will lack that API function. + * At the time of this writing (Feb 2023) all versions of ceph + * supported by ceph upstream have this function. + */ +#if defined(HAVE_CEPH_SELECT_FILESYSTEM) + DBG_DEBUG("[CEPH] calling: ceph_select_filesystem with %s\n", fsname); + return ceph_select_filesystem(mnt, fsname); +#else + DBG_ERR("[CEPH] ceph_select_filesystem not available\n"); + return -ENOTSUP; +#endif +} + +static struct ceph_mount_info *cephmount_mount_fs(const int snum) +{ + int ret; + char buf[256]; + struct ceph_mount_info *mnt = NULL; + /* if config_file and/or user_id are NULL, ceph will use defaults */ + const char *conf_file = + lp_parm_const_string(snum, "ceph", "config_file", NULL); + const char *user_id = + lp_parm_const_string(snum, "ceph", "user_id", NULL); + const char *fsname = + lp_parm_const_string(snum, "ceph", "filesystem", NULL); + + DBG_DEBUG("[CEPH] calling: ceph_create\n"); + ret = ceph_create(&mnt, user_id); + if (ret) { + errno = -ret; + return NULL; + } + + DBG_DEBUG("[CEPH] calling: ceph_conf_read_file with %s\n", + (conf_file == NULL ? "default path" : conf_file)); + ret = ceph_conf_read_file(mnt, conf_file); + if (ret) { + goto err_cm_release; + } + + DBG_DEBUG("[CEPH] calling: ceph_conf_get\n"); + ret = ceph_conf_get(mnt, "log file", buf, sizeof(buf)); + if (ret < 0) { + goto err_cm_release; + } + + /* libcephfs disables POSIX ACL support by default, enable it... */ + ret = ceph_conf_set(mnt, "client_acl_type", "posix_acl"); + if (ret < 0) { + goto err_cm_release; + } + /* tell libcephfs to perform local permission checks */ + ret = ceph_conf_set(mnt, "fuse_default_permissions", "false"); + if (ret < 0) { + goto err_cm_release; + } + /* + * select a cephfs file system to use: + * In ceph, multiple file system support has been stable since + * 'pacific'. Permit different shares to access different file systems. + */ + if (fsname != NULL) { + ret = cephmount_select_fs(mnt, fsname); + if (ret < 0) { + goto err_cm_release; + } + } + + DBG_DEBUG("[CEPH] calling: ceph_mount\n"); + ret = ceph_mount(mnt, NULL); + if (ret >= 0) { + goto cm_done; + } + + err_cm_release: + ceph_release(mnt); + mnt = NULL; + DBG_DEBUG("[CEPH] Error mounting fs: %s\n", strerror(-ret)); + cm_done: + /* + * Handle the error correctly. Ceph returns -errno. + */ + if (ret) { + errno = -ret; + } + return mnt; +} + +/* Check for NULL pointer parameters in vfs_ceph_* functions */ + +/* We don't want to have NULL function pointers lying around. Someone + is sure to try and execute them. These stubs are used to prevent + this possibility. */ + +static int vfs_ceph_connect(struct vfs_handle_struct *handle, + const char *service, const char *user) +{ + int ret = 0; + struct ceph_mount_info *cmount = NULL; + int snum = SNUM(handle->conn); + char *cookie = cephmount_get_cookie(handle, snum); + if (cookie == NULL) { + return -1; + } + + cmount = cephmount_cache_update(cookie); + if (cmount != NULL) { + goto connect_ok; + } + + cmount = cephmount_mount_fs(snum); + if (cmount == NULL) { + ret = -1; + goto connect_fail; + } + ret = cephmount_cache_add(cookie, cmount); + if (ret) { + goto connect_fail; + } + + connect_ok: + handle->data = cmount; + DBG_WARNING("Connection established with the server: %s\n", cookie); + /* + * Unless we have an async implementation of getxattrat turn this off. + */ + lp_do_parameter(SNUM(handle->conn), "smbd async dosmode", "false"); + connect_fail: + talloc_free(cookie); + return ret; +} + +static void vfs_ceph_disconnect(struct vfs_handle_struct *handle) +{ + int ret = cephmount_cache_remove(handle->data); + if (ret < 0) { + DBG_ERR("failed to remove ceph mount from cache: %s\n", + strerror(errno)); + return; + } + if (ret > 0) { + DBG_DEBUG("mount cache entry still in use\n"); + return; + } + + ret = ceph_unmount(handle->data); + if (ret < 0) { + DBG_ERR("[CEPH] failed to unmount: %s\n", strerror(-ret)); + } + + ret = ceph_release(handle->data); + if (ret < 0) { + DBG_ERR("[CEPH] failed to release: %s\n", strerror(-ret)); + } + handle->data = NULL; +} + +/* Disk operations */ + +static uint64_t vfs_ceph_disk_free(struct vfs_handle_struct *handle, + const struct smb_filename *smb_fname, + uint64_t *bsize, + uint64_t *dfree, + uint64_t *dsize) +{ + struct statvfs statvfs_buf = { 0 }; + int ret; + + ret = ceph_statfs(handle->data, smb_fname->base_name, &statvfs_buf); + if (ret < 0) { + DBG_DEBUG("[CEPH] ceph_statfs returned %d\n", ret); + return (uint64_t)status_code(ret); + } + /* + * Provide all the correct values. + */ + *bsize = statvfs_buf.f_bsize; + *dfree = statvfs_buf.f_bavail; + *dsize = statvfs_buf.f_blocks; + DBG_DEBUG("[CEPH] bsize: %llu, dfree: %llu, dsize: %llu\n", + llu(*bsize), + llu(*dfree), + llu(*dsize)); + return *dfree; +} + +static int vfs_ceph_statvfs(struct vfs_handle_struct *handle, + const struct smb_filename *smb_fname, + struct vfs_statvfs_struct *statbuf) +{ + struct statvfs statvfs_buf = { 0 }; + int ret; + + ret = ceph_statfs(handle->data, smb_fname->base_name, &statvfs_buf); + if (ret < 0) { + return status_code(ret); + } + + statbuf->OptimalTransferSize = statvfs_buf.f_frsize; + statbuf->BlockSize = statvfs_buf.f_bsize; + statbuf->TotalBlocks = statvfs_buf.f_blocks; + statbuf->BlocksAvail = statvfs_buf.f_bfree; + statbuf->UserBlocksAvail = statvfs_buf.f_bavail; + statbuf->TotalFileNodes = statvfs_buf.f_files; + statbuf->FreeFileNodes = statvfs_buf.f_ffree; + statbuf->FsIdentifier = statvfs_buf.f_fsid; + DBG_DEBUG("[CEPH] f_bsize: %ld, f_blocks: %ld, f_bfree: %ld, " + "f_bavail: %ld\n", + (long int)statvfs_buf.f_bsize, + (long int)statvfs_buf.f_blocks, + (long int)statvfs_buf.f_bfree, + (long int)statvfs_buf.f_bavail); + + return ret; +} + +static uint32_t vfs_ceph_fs_capabilities( + struct vfs_handle_struct *handle, + enum timestamp_set_resolution *p_ts_res) +{ + uint32_t caps = FILE_CASE_SENSITIVE_SEARCH | FILE_CASE_PRESERVED_NAMES; + + *p_ts_res = TIMESTAMP_SET_NT_OR_BETTER; + + return caps; +} + +/* Directory operations */ + +static DIR *vfs_ceph_fdopendir(struct vfs_handle_struct *handle, + struct files_struct *fsp, + const char *mask, + uint32_t attributes) +{ + int ret = 0; + struct ceph_dir_result *result = NULL; + +#ifdef HAVE_CEPH_FDOPENDIR + int dirfd = fsp_get_io_fd(fsp); + DBG_DEBUG("[CEPH] fdopendir(%p, %d)\n", handle, dirfd); + ret = ceph_fdopendir(handle->data, dirfd, &result); +#else + DBG_DEBUG("[CEPH] fdopendir(%p, %p)\n", handle, fsp); + ret = ceph_opendir(handle->data, fsp->fsp_name->base_name, &result); +#endif + if (ret < 0) { + result = NULL; + errno = -ret; /* We return result which is NULL in this case */ + } + + DBG_DEBUG("[CEPH] fdopendir(...) = %d\n", ret); + return (DIR *)result; +} + +static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, + struct files_struct *dirfsp, + DIR *dirp) +{ + struct dirent *result = NULL; + + DBG_DEBUG("[CEPH] readdir(%p, %p)\n", handle, dirp); + result = ceph_readdir(handle->data, (struct ceph_dir_result *) dirp); + DBG_DEBUG("[CEPH] readdir(...) = %p\n", result); + + return result; +} + +static void vfs_ceph_rewinddir(struct vfs_handle_struct *handle, DIR *dirp) +{ + DBG_DEBUG("[CEPH] rewinddir(%p, %p)\n", handle, dirp); + ceph_rewinddir(handle->data, (struct ceph_dir_result *) dirp); +} + +static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, + files_struct *dirfsp, + const struct smb_filename *smb_fname, + mode_t mode) +{ + int result = -1; +#ifdef HAVE_CEPH_MKDIRAT + int dirfd = fsp_get_pathref_fd(dirfsp); + + DBG_DEBUG("[CEPH] mkdirat(%p, %d, %s)\n", + handle, + dirfd, + smb_fname->base_name); + + result = ceph_mkdirat(handle->data, dirfd, smb_fname->base_name, mode); + + DBG_DEBUG("[CEPH] mkdirat(...) = %d\n", result); + + return status_code(result); +#else + struct smb_filename *full_fname = NULL; + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + return -1; + } + + DBG_DEBUG("[CEPH] mkdir(%p, %s)\n", + handle, smb_fname_str_dbg(full_fname)); + + result = ceph_mkdir(handle->data, full_fname->base_name, mode); + + TALLOC_FREE(full_fname); + + return status_code(result); +#endif +} + +static int vfs_ceph_closedir(struct vfs_handle_struct *handle, DIR *dirp) +{ + int result; + + DBG_DEBUG("[CEPH] closedir(%p, %p)\n", handle, dirp); + result = ceph_closedir(handle->data, (struct ceph_dir_result *) dirp); + DBG_DEBUG("[CEPH] closedir(...) = %d\n", result); + return status_code(result); +} + +/* File operations */ + +static int vfs_ceph_openat(struct vfs_handle_struct *handle, + const struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + files_struct *fsp, + const struct vfs_open_how *how) +{ + int flags = how->flags; + mode_t mode = how->mode; + struct smb_filename *name = NULL; + bool have_opath = false; + bool became_root = false; + int result = -ENOENT; +#ifdef HAVE_CEPH_OPENAT + int dirfd = -1; +#endif + + if (how->resolve != 0) { + errno = ENOSYS; + return -1; + } + + if (smb_fname->stream_name) { + goto out; + } + +#ifdef O_PATH + have_opath = true; + if (fsp->fsp_flags.is_pathref) { + flags |= O_PATH; + } +#endif + +#ifdef HAVE_CEPH_OPENAT + dirfd = fsp_get_pathref_fd(dirfsp); + + DBG_DEBUG("[CEPH] openat(%p, %d, %p, %d, %d)\n", + handle, dirfd, fsp, flags, mode); + + if (fsp->fsp_flags.is_pathref && !have_opath) { + become_root(); + became_root = true; + } + + result = ceph_openat(handle->data, + dirfd, + smb_fname->base_name, + flags, + mode); + +#else + if (fsp_get_pathref_fd(dirfsp) != AT_FDCWD) { + name = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (name == NULL) { + return -1; + } + smb_fname = name; + } + + DBG_DEBUG("[CEPH] openat(%p, %s, %p, %d, %d)\n", handle, + smb_fname_str_dbg(smb_fname), fsp, flags, mode); + + if (fsp->fsp_flags.is_pathref && !have_opath) { + become_root(); + became_root = true; + } + + result = ceph_open(handle->data, smb_fname->base_name, flags, mode); +#endif + if (became_root) { + unbecome_root(); + } +out: + TALLOC_FREE(name); + fsp->fsp_flags.have_proc_fds = false; + DBG_DEBUG("[CEPH] open(...) = %d\n", result); + return status_code(result); +} + +static int vfs_ceph_close(struct vfs_handle_struct *handle, files_struct *fsp) +{ + int result; + + DBG_DEBUG("[CEPH] close(%p, %p)\n", handle, fsp); + result = ceph_close(handle->data, fsp_get_pathref_fd(fsp)); + DBG_DEBUG("[CEPH] close(...) = %d\n", result); + return status_code(result); +} + +static ssize_t vfs_ceph_pread(struct vfs_handle_struct *handle, + files_struct *fsp, + void *data, + size_t n, + off_t offset) +{ + ssize_t result; + + DBG_DEBUG("[CEPH] pread(%p, %p, %p, %llu, %llu)\n", + handle, + fsp, + data, + llu(n), + llu(offset)); + + result = ceph_read(handle->data, fsp_get_io_fd(fsp), data, n, offset); + DBG_DEBUG("[CEPH] pread(...) = %llu\n", llu(result)); + return lstatus_code(result); +} + +struct vfs_ceph_pread_state { + ssize_t bytes_read; + struct vfs_aio_state vfs_aio_state; +}; + +/* + * Fake up an async ceph read by calling the synchronous API. + */ +static struct tevent_req *vfs_ceph_pread_send(struct vfs_handle_struct *handle, + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct files_struct *fsp, + void *data, + size_t n, off_t offset) +{ + struct tevent_req *req = NULL; + struct vfs_ceph_pread_state *state = NULL; + int ret = -1; + + DBG_DEBUG("[CEPH] %s\n", __func__); + req = tevent_req_create(mem_ctx, &state, struct vfs_ceph_pread_state); + if (req == NULL) { + return NULL; + } + + ret = ceph_read(handle->data, fsp_get_io_fd(fsp), data, n, offset); + if (ret < 0) { + /* ceph returns -errno on error. */ + tevent_req_error(req, -ret); + return tevent_req_post(req, ev); + } + + state->bytes_read = ret; + tevent_req_done(req); + /* Return and schedule the completion of the call. */ + return tevent_req_post(req, ev); +} + +static ssize_t vfs_ceph_pread_recv(struct tevent_req *req, + struct vfs_aio_state *vfs_aio_state) +{ + struct vfs_ceph_pread_state *state = + tevent_req_data(req, struct vfs_ceph_pread_state); + + DBG_DEBUG("[CEPH] %s\n", __func__); + if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + return -1; + } + *vfs_aio_state = state->vfs_aio_state; + return state->bytes_read; +} + +static ssize_t vfs_ceph_pwrite(struct vfs_handle_struct *handle, + files_struct *fsp, + const void *data, + size_t n, + off_t offset) +{ + ssize_t result; + + DBG_DEBUG("[CEPH] pwrite(%p, %p, %p, %llu, %llu)\n", + handle, + fsp, + data, + llu(n), + llu(offset)); + result = ceph_write(handle->data, fsp_get_io_fd(fsp), data, n, offset); + DBG_DEBUG("[CEPH] pwrite(...) = %llu\n", llu(result)); + return lstatus_code(result); +} + +struct vfs_ceph_pwrite_state { + ssize_t bytes_written; + struct vfs_aio_state vfs_aio_state; +}; + +/* + * Fake up an async ceph write by calling the synchronous API. + */ +static struct tevent_req *vfs_ceph_pwrite_send(struct vfs_handle_struct *handle, + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct files_struct *fsp, + const void *data, + size_t n, off_t offset) +{ + struct tevent_req *req = NULL; + struct vfs_ceph_pwrite_state *state = NULL; + int ret = -1; + + DBG_DEBUG("[CEPH] %s\n", __func__); + req = tevent_req_create(mem_ctx, &state, struct vfs_ceph_pwrite_state); + if (req == NULL) { + return NULL; + } + + ret = ceph_write(handle->data, fsp_get_io_fd(fsp), data, n, offset); + if (ret < 0) { + /* ceph returns -errno on error. */ + tevent_req_error(req, -ret); + return tevent_req_post(req, ev); + } + + state->bytes_written = ret; + tevent_req_done(req); + /* Return and schedule the completion of the call. */ + return tevent_req_post(req, ev); +} + +static ssize_t vfs_ceph_pwrite_recv(struct tevent_req *req, + struct vfs_aio_state *vfs_aio_state) +{ + struct vfs_ceph_pwrite_state *state = + tevent_req_data(req, struct vfs_ceph_pwrite_state); + + DBG_DEBUG("[CEPH] %s\n", __func__); + if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + return -1; + } + *vfs_aio_state = state->vfs_aio_state; + return state->bytes_written; +} + +static off_t vfs_ceph_lseek(struct vfs_handle_struct *handle, + files_struct *fsp, + off_t offset, + int whence) +{ + off_t result = 0; + + DBG_DEBUG("[CEPH] vfs_ceph_lseek\n"); + result = ceph_lseek(handle->data, fsp_get_io_fd(fsp), offset, whence); + return lstatus_code(result); +} + +static ssize_t vfs_ceph_sendfile(struct vfs_handle_struct *handle, + int tofd, + files_struct *fromfsp, + const DATA_BLOB *hdr, + off_t offset, + size_t n) +{ + /* + * We cannot support sendfile because libcephfs is in user space. + */ + DBG_DEBUG("[CEPH] vfs_ceph_sendfile\n"); + errno = ENOTSUP; + return -1; +} + +static ssize_t vfs_ceph_recvfile(struct vfs_handle_struct *handle, + int fromfd, + files_struct *tofsp, + off_t offset, + size_t n) +{ + /* + * We cannot support recvfile because libcephfs is in user space. + */ + DBG_DEBUG("[CEPH] vfs_ceph_recvfile\n"); + errno = ENOTSUP; + return -1; +} + +static int vfs_ceph_renameat(struct vfs_handle_struct *handle, + files_struct *srcfsp, + const struct smb_filename *smb_fname_src, + files_struct *dstfsp, + const struct smb_filename *smb_fname_dst) +{ + struct smb_filename *full_fname_src = NULL; + struct smb_filename *full_fname_dst = NULL; + int result = -1; + + DBG_DEBUG("[CEPH] vfs_ceph_renameat\n"); + if (smb_fname_src->stream_name || smb_fname_dst->stream_name) { + errno = ENOENT; + return result; + } + + full_fname_src = full_path_from_dirfsp_atname(talloc_tos(), + srcfsp, + smb_fname_src); + if (full_fname_src == NULL) { + errno = ENOMEM; + return -1; + } + full_fname_dst = full_path_from_dirfsp_atname(talloc_tos(), + dstfsp, + smb_fname_dst); + if (full_fname_dst == NULL) { + TALLOC_FREE(full_fname_src); + errno = ENOMEM; + return -1; + } + + result = ceph_rename(handle->data, + full_fname_src->base_name, + full_fname_dst->base_name); + + TALLOC_FREE(full_fname_src); + TALLOC_FREE(full_fname_dst); + + return status_code(result); +} + +/* + * Fake up an async ceph fsync by calling the synchronous API. + */ + +static struct tevent_req *vfs_ceph_fsync_send(struct vfs_handle_struct *handle, + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + files_struct *fsp) +{ + struct tevent_req *req = NULL; + struct vfs_aio_state *state = NULL; + int ret = -1; + + DBG_DEBUG("[CEPH] vfs_ceph_fsync_send\n"); + + req = tevent_req_create(mem_ctx, &state, struct vfs_aio_state); + if (req == NULL) { + return NULL; + } + + /* Make sync call. */ + ret = ceph_fsync(handle->data, fsp_get_io_fd(fsp), false); + + if (ret != 0) { + /* ceph_fsync returns -errno on error. */ + tevent_req_error(req, -ret); + return tevent_req_post(req, ev); + } + + /* Mark it as done. */ + tevent_req_done(req); + /* Return and schedule the completion of the call. */ + return tevent_req_post(req, ev); +} + +static int vfs_ceph_fsync_recv(struct tevent_req *req, + struct vfs_aio_state *vfs_aio_state) +{ + struct vfs_aio_state *state = + tevent_req_data(req, struct vfs_aio_state); + + DBG_DEBUG("[CEPH] vfs_ceph_fsync_recv\n"); + + if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { + return -1; + } + *vfs_aio_state = *state; + return 0; +} + +#define SAMBA_STATX_ATTR_MASK (CEPH_STATX_BASIC_STATS|CEPH_STATX_BTIME) + +static void init_stat_ex_from_ceph_statx(struct stat_ex *dst, + const struct ceph_statx *stx) +{ + DBG_DEBUG("[CEPH]\tstx = {dev = %llx, ino = %llu, mode = 0x%x, " + "nlink = %llu, uid = %d, gid = %d, rdev = %llx, size = %llu, " + "blksize = %llu, blocks = %llu, atime = %llu, mtime = %llu, " + "ctime = %llu, btime = %llu}\n", + llu(stx->stx_dev), llu(stx->stx_ino), stx->stx_mode, + llu(stx->stx_nlink), stx->stx_uid, stx->stx_gid, + llu(stx->stx_rdev), llu(stx->stx_size), llu(stx->stx_blksize), + llu(stx->stx_blocks), llu(stx->stx_atime.tv_sec), + llu(stx->stx_mtime.tv_sec), llu(stx->stx_ctime.tv_sec), + llu(stx->stx_btime.tv_sec)); + + if ((stx->stx_mask & SAMBA_STATX_ATTR_MASK) != SAMBA_STATX_ATTR_MASK) { + DBG_WARNING("%s: stx->stx_mask is incorrect " + "(wanted %x, got %x)\n", + __func__, + SAMBA_STATX_ATTR_MASK, + stx->stx_mask); + } + + dst->st_ex_dev = stx->stx_dev; + dst->st_ex_rdev = stx->stx_rdev; + dst->st_ex_ino = stx->stx_ino; + dst->st_ex_mode = stx->stx_mode; + dst->st_ex_uid = stx->stx_uid; + dst->st_ex_gid = stx->stx_gid; + dst->st_ex_size = stx->stx_size; + dst->st_ex_nlink = stx->stx_nlink; + dst->st_ex_atime = stx->stx_atime; + dst->st_ex_btime = stx->stx_btime; + dst->st_ex_ctime = stx->stx_ctime; + dst->st_ex_mtime = stx->stx_mtime; + dst->st_ex_blksize = stx->stx_blksize; + dst->st_ex_blocks = stx->stx_blocks; +} + +static int vfs_ceph_stat(struct vfs_handle_struct *handle, + struct smb_filename *smb_fname) +{ + int result = -1; + struct ceph_statx stx = { 0 }; + + DBG_DEBUG("[CEPH] stat(%p, %s)\n", + handle, + smb_fname_str_dbg(smb_fname)); + + if (smb_fname->stream_name) { + errno = ENOENT; + return result; + } + + result = ceph_statx(handle->data, smb_fname->base_name, &stx, + SAMBA_STATX_ATTR_MASK, 0); + DBG_DEBUG("[CEPH] statx(...) = %d\n", result); + if (result < 0) { + return status_code(result); + } + + init_stat_ex_from_ceph_statx(&smb_fname->st, &stx); + DBG_DEBUG("[CEPH] mode = 0x%x\n", smb_fname->st.st_ex_mode); + return result; +} + +static int vfs_ceph_fstat(struct vfs_handle_struct *handle, + files_struct *fsp, + SMB_STRUCT_STAT *sbuf) +{ + int result = -1; + struct ceph_statx stx = { 0 }; + int fd = fsp_get_pathref_fd(fsp); + + DBG_DEBUG("[CEPH] fstat(%p, %d)\n", handle, fd); + result = ceph_fstatx(handle->data, fd, &stx, + SAMBA_STATX_ATTR_MASK, 0); + DBG_DEBUG("[CEPH] fstat(...) = %d\n", result); + if (result < 0) { + return status_code(result); + } + + init_stat_ex_from_ceph_statx(sbuf, &stx); + DBG_DEBUG("[CEPH] mode = 0x%x\n", sbuf->st_ex_mode); + return result; +} + +static int vfs_ceph_fstatat(struct vfs_handle_struct *handle, + const struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + SMB_STRUCT_STAT *sbuf, + int flags) +{ + int result = -1; + struct ceph_statx stx = { 0 }; +#ifdef HAVE_CEPH_STATXAT + int dirfd = fsp_get_pathref_fd(dirfsp); + + DBG_DEBUG("[CEPH] fstatat(%p, %d, %s)\n", + handle, dirfd, smb_fname->base_name); + result = ceph_statxat(handle->data, dirfd, smb_fname->base_name, + &stx, SAMBA_STATX_ATTR_MASK, 0); +#else + struct smb_filename *full_fname = NULL; + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + errno = ENOMEM; + return -1; + } + + DBG_DEBUG("[CEPH] fstatat(%p, %s)\n", + handle, smb_fname_str_dbg(full_fname)); + result = ceph_statx(handle->data, full_fname->base_name, + &stx, SAMBA_STATX_ATTR_MASK, 0); + + TALLOC_FREE(full_fname); +#endif + + DBG_DEBUG("[CEPH] fstatat(...) = %d\n", result); + if (result < 0) { + return status_code(result); + } + + init_stat_ex_from_ceph_statx(sbuf, &stx); + DBG_DEBUG("[CEPH] mode = 0x%x\n", sbuf->st_ex_mode); + + return 0; +} + +static int vfs_ceph_lstat(struct vfs_handle_struct *handle, + struct smb_filename *smb_fname) +{ + int result = -1; + struct ceph_statx stx = { 0 }; + + DBG_DEBUG("[CEPH] lstat(%p, %s)\n", + handle, + smb_fname_str_dbg(smb_fname)); + + if (smb_fname->stream_name) { + errno = ENOENT; + return result; + } + + result = ceph_statx(handle->data, smb_fname->base_name, &stx, + SAMBA_STATX_ATTR_MASK, AT_SYMLINK_NOFOLLOW); + DBG_DEBUG("[CEPH] lstat(...) = %d\n", result); + if (result < 0) { + return status_code(result); + } + + init_stat_ex_from_ceph_statx(&smb_fname->st, &stx); + return result; +} + +static int vfs_ceph_fntimes(struct vfs_handle_struct *handle, + files_struct *fsp, + struct smb_file_time *ft) +{ + struct ceph_statx stx = { 0 }; + int result; + int mask = 0; + + if (!is_omit_timespec(&ft->atime)) { + stx.stx_atime = ft->atime; + mask |= CEPH_SETATTR_ATIME; + } + if (!is_omit_timespec(&ft->mtime)) { + stx.stx_mtime = ft->mtime; + mask |= CEPH_SETATTR_MTIME; + } + if (!is_omit_timespec(&ft->create_time)) { + stx.stx_btime = ft->create_time; + mask |= CEPH_SETATTR_BTIME; + } + + if (!mask) { + return 0; + } + + if (!fsp->fsp_flags.is_pathref) { + /* + * We can use an io_fd to set xattrs. + */ + result = ceph_fsetattrx(handle->data, + fsp_get_io_fd(fsp), + &stx, + mask); + } else { + /* + * This is no longer a handle based call. + */ + result = ceph_setattrx(handle->data, + fsp->fsp_name->base_name, + &stx, + mask, + 0); + } + + DBG_DEBUG("[CEPH] ntimes(%p, %s, {%ld, %ld, %ld, %ld}) = %d\n", + handle, fsp_str_dbg(fsp), ft->mtime.tv_sec, ft->atime.tv_sec, + ft->ctime.tv_sec, ft->create_time.tv_sec, result); + + return result; +} + +static int vfs_ceph_unlinkat(struct vfs_handle_struct *handle, + struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + int flags) +{ + int result = -1; +#ifdef HAVE_CEPH_UNLINKAT + int dirfd = fsp_get_pathref_fd(dirfsp); + + DBG_DEBUG("[CEPH] unlinkat(%p, %d, %s)\n", + handle, + dirfd, + smb_fname_str_dbg(smb_fname)); + + if (smb_fname->stream_name) { + errno = ENOENT; + return result; + } + + result = ceph_unlinkat(handle->data, + dirfd, + smb_fname->base_name, + flags); + DBG_DEBUG("[CEPH] unlinkat(...) = %d\n", result); + return status_code(result); +#else + struct smb_filename *full_fname = NULL; + + DBG_DEBUG("[CEPH] unlink(%p, %s)\n", + handle, + smb_fname_str_dbg(smb_fname)); + + if (smb_fname->stream_name) { + errno = ENOENT; + return result; + } + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + return -1; + } + + if (flags & AT_REMOVEDIR) { + result = ceph_rmdir(handle->data, full_fname->base_name); + } else { + result = ceph_unlink(handle->data, full_fname->base_name); + } + TALLOC_FREE(full_fname); + DBG_DEBUG("[CEPH] unlink(...) = %d\n", result); + return status_code(result); +#endif +} + +static int vfs_ceph_fchmod(struct vfs_handle_struct *handle, + files_struct *fsp, + mode_t mode) +{ + int result; + + DBG_DEBUG("[CEPH] fchmod(%p, %p, %d)\n", handle, fsp, mode); + if (!fsp->fsp_flags.is_pathref) { + /* + * We can use an io_fd to change permissions. + */ + result = ceph_fchmod(handle->data, fsp_get_io_fd(fsp), mode); + } else { + /* + * This is no longer a handle based call. + */ + result = ceph_chmod(handle->data, + fsp->fsp_name->base_name, + mode); + } + DBG_DEBUG("[CEPH] fchmod(...) = %d\n", result); + return status_code(result); +} + +static int vfs_ceph_fchown(struct vfs_handle_struct *handle, + files_struct *fsp, + uid_t uid, + gid_t gid) +{ + int result; + + DBG_DEBUG("[CEPH] fchown(%p, %p, %d, %d)\n", handle, fsp, uid, gid); + if (!fsp->fsp_flags.is_pathref) { + /* + * We can use an io_fd to change ownership. + */ + result = ceph_fchown(handle->data, + fsp_get_io_fd(fsp), + uid, + gid); + } else { + /* + * This is no longer a handle based call. + */ + result = ceph_chown(handle->data, + fsp->fsp_name->base_name, + uid, + gid); + } + + DBG_DEBUG("[CEPH] fchown(...) = %d\n", result); + return status_code(result); +} + +static int vfs_ceph_lchown(struct vfs_handle_struct *handle, + const struct smb_filename *smb_fname, + uid_t uid, + gid_t gid) +{ + int result; + DBG_DEBUG("[CEPH] lchown(%p, %s, %d, %d)\n", + handle, + smb_fname->base_name, + uid, + gid); + result = ceph_lchown(handle->data, smb_fname->base_name, uid, gid); + DBG_DEBUG("[CEPH] lchown(...) = %d\n", result); + return status_code(result); +} + +static int vfs_ceph_chdir(struct vfs_handle_struct *handle, + const struct smb_filename *smb_fname) +{ + int result = -1; + DBG_DEBUG("[CEPH] chdir(%p, %s)\n", handle, smb_fname->base_name); + result = ceph_chdir(handle->data, smb_fname->base_name); + DBG_DEBUG("[CEPH] chdir(...) = %d\n", result); + return status_code(result); +} + +static struct smb_filename *vfs_ceph_getwd(struct vfs_handle_struct *handle, + TALLOC_CTX *ctx) +{ + const char *cwd = ceph_getcwd(handle->data); + DBG_DEBUG("[CEPH] getwd(%p) = %s\n", handle, cwd); + return synthetic_smb_fname(ctx, cwd, NULL, NULL, 0, 0); +} + +static int strict_allocate_ftruncate(struct vfs_handle_struct *handle, + files_struct *fsp, + off_t len) +{ + off_t space_to_write; + int result; + NTSTATUS status; + SMB_STRUCT_STAT *pst; + + status = vfs_stat_fsp(fsp); + if (!NT_STATUS_IS_OK(status)) { + return -1; + } + pst = &fsp->fsp_name->st; + +#ifdef S_ISFIFO + if (S_ISFIFO(pst->st_ex_mode)) + return 0; +#endif + + if (pst->st_ex_size == len) + return 0; + + /* Shrink - just ftruncate. */ + if (pst->st_ex_size > len) { + result = ceph_ftruncate(handle->data, fsp_get_io_fd(fsp), len); + return status_code(result); + } + + space_to_write = len - pst->st_ex_size; + result = ceph_fallocate(handle->data, + fsp_get_io_fd(fsp), + 0, + pst->st_ex_size, + space_to_write); + return status_code(result); +} + +static int vfs_ceph_ftruncate(struct vfs_handle_struct *handle, + files_struct *fsp, + off_t len) +{ + int result = -1; + + DBG_DEBUG("[CEPH] ftruncate(%p, %p, %llu\n", handle, fsp, llu(len)); + + if (lp_strict_allocate(SNUM(fsp->conn))) { + return strict_allocate_ftruncate(handle, fsp, len); + } + + result = ceph_ftruncate(handle->data, fsp_get_io_fd(fsp), len); + return status_code(result); +} + +static int vfs_ceph_fallocate(struct vfs_handle_struct *handle, + struct files_struct *fsp, + uint32_t mode, + off_t offset, + off_t len) +{ + int result; + + DBG_DEBUG("[CEPH] fallocate(%p, %p, %u, %llu, %llu\n", + handle, fsp, mode, llu(offset), llu(len)); + /* unsupported mode flags are rejected by libcephfs */ + result = ceph_fallocate( + handle->data, fsp_get_io_fd(fsp), mode, offset, len); + DBG_DEBUG("[CEPH] fallocate(...) = %d\n", result); + return status_code(result); +} + +static bool vfs_ceph_lock(struct vfs_handle_struct *handle, + files_struct *fsp, + int op, + off_t offset, + off_t count, + int type) +{ + DBG_DEBUG("[CEPH] lock\n"); + return true; +} + +static int vfs_ceph_filesystem_sharemode(struct vfs_handle_struct *handle, + files_struct *fsp, + uint32_t share_access, + uint32_t access_mask) +{ + DBG_ERR("[CEPH] filesystem sharemodes unsupported! Consider setting " + "\"kernel share modes = no\"\n"); + + return vfs_not_implemented_filesystem_sharemode(handle, + fsp, + share_access, + access_mask); +} + +static int vfs_ceph_fcntl(vfs_handle_struct *handle, + files_struct *fsp, int cmd, va_list cmd_arg) +{ + /* + * SMB_VFS_FCNTL() is currently only called by vfs_set_blocking() to + * clear O_NONBLOCK, etc for LOCK_MAND and FIFOs. Ignore it. + */ + if (cmd == F_GETFL) { + return 0; + } else if (cmd == F_SETFL) { + va_list dup_cmd_arg; + int opt; + + va_copy(dup_cmd_arg, cmd_arg); + opt = va_arg(dup_cmd_arg, int); + va_end(dup_cmd_arg); + if (opt == 0) { + return 0; + } + DBG_ERR("unexpected fcntl SETFL(%d)\n", opt); + goto err_out; + } + DBG_ERR("unexpected fcntl: %d\n", cmd); +err_out: + errno = EINVAL; + return -1; +} + +static bool vfs_ceph_getlock(struct vfs_handle_struct *handle, + files_struct *fsp, + off_t *poffset, + off_t *pcount, + int *ptype, + pid_t *ppid) +{ + DBG_DEBUG("[CEPH] getlock returning false and errno=0\n"); + + errno = 0; + return false; +} + +static int vfs_ceph_symlinkat(struct vfs_handle_struct *handle, + const struct smb_filename *link_target, + struct files_struct *dirfsp, + const struct smb_filename *new_smb_fname) +{ + int result = -1; +#ifdef HAVE_CEPH_SYMLINKAT + int dirfd = fsp_get_pathref_fd(dirfsp); + + DBG_DEBUG("[CEPH] symlinkat(%p, %s, %d, %s)\n", + handle, + link_target->base_name, + dirfd, + new_smb_fname->base_name); + + result = ceph_symlinkat(handle->data, + link_target->base_name, + dirfd, + new_smb_fname->base_name); + DBG_DEBUG("[CEPH] symlinkat(...) = %d\n", result); + return status_code(result); +#else + struct smb_filename *full_fname = NULL; + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + new_smb_fname); + if (full_fname == NULL) { + return -1; + } + + DBG_DEBUG("[CEPH] symlink(%p, %s, %s)\n", handle, + link_target->base_name, + full_fname->base_name); + + result = ceph_symlink(handle->data, + link_target->base_name, + full_fname->base_name); + TALLOC_FREE(full_fname); + DBG_DEBUG("[CEPH] symlink(...) = %d\n", result); + return status_code(result); +#endif +} + +static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, + const struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + char *buf, + size_t bufsiz) +{ + int result = -1; +#ifdef HAVE_CEPH_READLINKAT + int dirfd = fsp_get_pathref_fd(dirfsp); + + DBG_DEBUG("[CEPH] readlinkat(%p, %d, %s, %p, %llu)\n", + handle, + dirfd, + smb_fname->base_name, + buf, + llu(bufsiz)); + + result = ceph_readlinkat(handle->data, + dirfd, + smb_fname->base_name, + buf, + bufsiz); + + DBG_DEBUG("[CEPH] readlinkat(...) = %d\n", result); + return status_code(result); +#else + struct smb_filename *full_fname = NULL; + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + return -1; + } + + DBG_DEBUG("[CEPH] readlink(%p, %s, %p, %llu)\n", handle, + full_fname->base_name, buf, llu(bufsiz)); + + result = ceph_readlink(handle->data, + full_fname->base_name, + buf, + bufsiz); + TALLOC_FREE(full_fname); + DBG_DEBUG("[CEPH] readlink(...) = %d\n", result); + return status_code(result); +#endif +} + +static int vfs_ceph_linkat(struct vfs_handle_struct *handle, + files_struct *srcfsp, + const struct smb_filename *old_smb_fname, + files_struct *dstfsp, + const struct smb_filename *new_smb_fname, + int flags) +{ + struct smb_filename *full_fname_old = NULL; + struct smb_filename *full_fname_new = NULL; + int result = -1; + + full_fname_old = full_path_from_dirfsp_atname(talloc_tos(), + srcfsp, + old_smb_fname); + if (full_fname_old == NULL) { + return -1; + } + full_fname_new = full_path_from_dirfsp_atname(talloc_tos(), + dstfsp, + new_smb_fname); + if (full_fname_new == NULL) { + TALLOC_FREE(full_fname_old); + return -1; + } + + DBG_DEBUG("[CEPH] link(%p, %s, %s)\n", handle, + full_fname_old->base_name, + full_fname_new->base_name); + + result = ceph_link(handle->data, + full_fname_old->base_name, + full_fname_new->base_name); + DBG_DEBUG("[CEPH] link(...) = %d\n", result); + TALLOC_FREE(full_fname_old); + TALLOC_FREE(full_fname_new); + return status_code(result); +} + +static int vfs_ceph_mknodat(struct vfs_handle_struct *handle, + files_struct *dirfsp, + const struct smb_filename *smb_fname, + mode_t mode, + SMB_DEV_T dev) +{ + struct smb_filename *full_fname = NULL; + int result = -1; + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + return -1; + } + + DBG_DEBUG("[CEPH] mknodat(%p, %s)\n", handle, full_fname->base_name); + result = ceph_mknod(handle->data, full_fname->base_name, mode, dev); + DBG_DEBUG("[CEPH] mknodat(...) = %d\n", result); + + TALLOC_FREE(full_fname); + + return status_code(result); +} + +/* + * This is a simple version of real-path ... a better version is needed to + * ask libcephfs about symbolic links. + */ +static struct smb_filename *vfs_ceph_realpath(struct vfs_handle_struct *handle, + TALLOC_CTX *ctx, + const struct smb_filename *smb_fname) +{ + char *result = NULL; + const char *cwd = handle->conn->cwd_fsp->fsp_name->base_name; + const char *path = smb_fname->base_name; + size_t len = strlen(path); + struct smb_filename *result_fname = NULL; + + if (path[0] == '/') { + result = talloc_strdup(ctx, path); + } else if ((len >= 2) && (path[0] == '.') && (path[1] == '/')) { + if (len == 2) { + result = talloc_strdup(ctx, cwd); + } else { + result = talloc_asprintf(ctx, "%s/%s", cwd, &path[2]); + } + } else { + result = talloc_asprintf(ctx, "%s/%s", cwd, path); + } + + if (result == NULL) { + return NULL; + } + + DBG_DEBUG("[CEPH] realpath(%p, %s) = %s\n", handle, path, result); + result_fname = synthetic_smb_fname(ctx, result, NULL, NULL, 0, 0); + TALLOC_FREE(result); + return result_fname; +} + +static NTSTATUS vfs_ceph_get_real_filename_at( + struct vfs_handle_struct *handle, + struct files_struct *dirfsp, + const char *name, + TALLOC_CTX *mem_ctx, + char **found_name) +{ + /* + * Don't fall back to get_real_filename so callers can differentiate + * between a full directory scan and an actual case-insensitive stat. + */ + return NT_STATUS_NOT_SUPPORTED; +} + +static const char *vfs_ceph_connectpath( + struct vfs_handle_struct *handle, + const struct files_struct *dirfsp, + const struct smb_filename *smb_fname) +{ + return handle->conn->connectpath; +} + +static NTSTATUS vfs_ceph_fget_dos_attributes(struct vfs_handle_struct *handle, + struct files_struct *fsp, + uint32_t *dosmode) +{ + struct timespec saved_btime = fsp->fsp_name->st.st_ex_btime; + NTSTATUS status; + + status = fget_ea_dos_attribute(fsp, dosmode); + + /* + * Restore previously stored btime from statx timestamps as it should be + * the only source of truth. create_time from dos attribute, if any, may + * have older values which isn't trustworthy to be looked at for other + * open file handle operations. + */ + fsp->fsp_name->st.st_ex_btime = saved_btime; + + return status; +} + +static NTSTATUS vfs_ceph_fset_dos_attributes(struct vfs_handle_struct *handle, + struct files_struct *fsp, + uint32_t dosmode) +{ + struct timespec saved_btime = fsp->fsp_name->st.st_ex_btime; + NTSTATUS status; + + status = set_ea_dos_attribute(handle->conn, fsp->fsp_name, dosmode); + + /* + * Restore previously stored btime from statx timestamps. This is done + * to ensure that we have the exact btime in fsp stat information while + * the file handle is still open since the create_time stored as part of + * dos attributes can loose its precision when converted back to btime. + */ + fsp->fsp_name->st.st_ex_btime = saved_btime; + + return status; +} + +/**************************************************************** + Extended attribute operations. +*****************************************************************/ + +static ssize_t vfs_ceph_fgetxattr(struct vfs_handle_struct *handle, + struct files_struct *fsp, + const char *name, + void *value, + size_t size) +{ + int ret; + DBG_DEBUG("[CEPH] fgetxattr(%p, %p, %s, %p, %llu)\n", + handle, + fsp, + name, + value, + llu(size)); + if (!fsp->fsp_flags.is_pathref) { + ret = ceph_fgetxattr(handle->data, + fsp_get_io_fd(fsp), + name, + value, + size); + } else { + ret = ceph_getxattr(handle->data, + fsp->fsp_name->base_name, + name, + value, + size); + } + DBG_DEBUG("[CEPH] fgetxattr(...) = %d\n", ret); + return lstatus_code(ret); +} + +static ssize_t vfs_ceph_flistxattr(struct vfs_handle_struct *handle, + struct files_struct *fsp, + char *list, + size_t size) +{ + int ret; + DBG_DEBUG("[CEPH] flistxattr(%p, %p, %p, %llu)\n", + handle, fsp, list, llu(size)); + if (!fsp->fsp_flags.is_pathref) { + /* + * We can use an io_fd to list xattrs. + */ + ret = ceph_flistxattr(handle->data, + fsp_get_io_fd(fsp), + list, + size); + } else { + /* + * This is no longer a handle based call. + */ + ret = ceph_listxattr(handle->data, + fsp->fsp_name->base_name, + list, + size); + } + DBG_DEBUG("[CEPH] flistxattr(...) = %d\n", ret); + return lstatus_code(ret); +} + +static int vfs_ceph_fremovexattr(struct vfs_handle_struct *handle, + struct files_struct *fsp, + const char *name) +{ + int ret; + DBG_DEBUG("[CEPH] fremovexattr(%p, %p, %s)\n", handle, fsp, name); + if (!fsp->fsp_flags.is_pathref) { + /* + * We can use an io_fd to remove xattrs. + */ + ret = ceph_fremovexattr(handle->data, fsp_get_io_fd(fsp), name); + } else { + /* + * This is no longer a handle based call. + */ + ret = ceph_removexattr(handle->data, + fsp->fsp_name->base_name, + name); + } + DBG_DEBUG("[CEPH] fremovexattr(...) = %d\n", ret); + return status_code(ret); +} + +static int vfs_ceph_fsetxattr(struct vfs_handle_struct *handle, + struct files_struct *fsp, + const char *name, + const void *value, + size_t size, + int flags) +{ + int ret; + DBG_DEBUG("[CEPH] fsetxattr(%p, %p, %s, %p, %llu, %d)\n", + handle, + fsp, + name, + value, + llu(size), + flags); + if (!fsp->fsp_flags.is_pathref) { + /* + * We can use an io_fd to set xattrs. + */ + ret = ceph_fsetxattr(handle->data, + fsp_get_io_fd(fsp), + name, + value, + size, + flags); + } else { + /* + * This is no longer a handle based call. + */ + ret = ceph_setxattr(handle->data, + fsp->fsp_name->base_name, + name, + value, + size, + flags); + } + DBG_DEBUG("[CEPH] fsetxattr(...) = %d\n", ret); + return status_code(ret); +} + +static NTSTATUS vfs_ceph_create_dfs_pathat(struct vfs_handle_struct *handle, + struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + const struct referral *reflist, + size_t referral_count) +{ + TALLOC_CTX *frame = talloc_stackframe(); + NTSTATUS status = NT_STATUS_NO_MEMORY; + int ret; + char *msdfs_link = NULL; + struct smb_filename *full_fname = NULL; + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + goto out; + } + + /* Form the msdfs_link contents */ + msdfs_link = msdfs_link_string(frame, + reflist, + referral_count); + if (msdfs_link == NULL) { + goto out; + } + + ret = ceph_symlink(handle->data, + msdfs_link, + full_fname->base_name); + if (ret == 0) { + status = NT_STATUS_OK; + } else { + status = map_nt_error_from_unix(-ret); + } + + out: + + DBG_DEBUG("[CEPH] create_dfs_pathat(%s) = %s\n", + full_fname != NULL ? full_fname->base_name : "", + nt_errstr(status)); + + TALLOC_FREE(frame); + return status; +} + +/* + * Read and return the contents of a DFS redirect given a + * pathname. A caller can pass in NULL for ppreflist and + * preferral_count but still determine if this was a + * DFS redirect point by getting NT_STATUS_OK back + * without incurring the overhead of reading and parsing + * the referral contents. + */ + +static NTSTATUS vfs_ceph_read_dfs_pathat(struct vfs_handle_struct *handle, + TALLOC_CTX *mem_ctx, + struct files_struct *dirfsp, + struct smb_filename *smb_fname, + struct referral **ppreflist, + size_t *preferral_count) +{ + NTSTATUS status = NT_STATUS_NO_MEMORY; + size_t bufsize; + char *link_target = NULL; + int referral_len; + bool ok; +#if defined(HAVE_BROKEN_READLINK) + char link_target_buf[PATH_MAX]; +#else + char link_target_buf[7]; +#endif + struct ceph_statx stx = { 0 }; + struct smb_filename *full_fname = NULL; + int ret; + + if (is_named_stream(smb_fname)) { + status = NT_STATUS_OBJECT_NAME_NOT_FOUND; + goto err; + } + + if (ppreflist == NULL && preferral_count == NULL) { + /* + * We're only checking if this is a DFS + * redirect. We don't need to return data. + */ + bufsize = sizeof(link_target_buf); + link_target = link_target_buf; + } else { + bufsize = PATH_MAX; + link_target = talloc_array(mem_ctx, char, bufsize); + if (!link_target) { + goto err; + } + } + + full_fname = full_path_from_dirfsp_atname(talloc_tos(), + dirfsp, + smb_fname); + if (full_fname == NULL) { + status = NT_STATUS_NO_MEMORY; + goto err; + } + + ret = ceph_statx(handle->data, + full_fname->base_name, + &stx, + SAMBA_STATX_ATTR_MASK, + AT_SYMLINK_NOFOLLOW); + if (ret < 0) { + status = map_nt_error_from_unix(-ret); + goto err; + } + + referral_len = ceph_readlink(handle->data, + full_fname->base_name, + link_target, + bufsize - 1); + if (referral_len < 0) { + /* ceph errors are -errno. */ + if (-referral_len == EINVAL) { + DBG_INFO("%s is not a link.\n", + full_fname->base_name); + status = NT_STATUS_OBJECT_TYPE_MISMATCH; + } else { + status = map_nt_error_from_unix(-referral_len); + DBG_ERR("Error reading " + "msdfs link %s: %s\n", + full_fname->base_name, + strerror(errno)); + } + goto err; + } + link_target[referral_len] = '\0'; + + DBG_INFO("%s -> %s\n", + full_fname->base_name, + link_target); + + if (!strnequal(link_target, "msdfs:", 6)) { + status = NT_STATUS_OBJECT_TYPE_MISMATCH; + goto err; + } + + if (ppreflist == NULL && preferral_count == NULL) { + /* Early return for checking if this is a DFS link. */ + TALLOC_FREE(full_fname); + init_stat_ex_from_ceph_statx(&smb_fname->st, &stx); + return NT_STATUS_OK; + } + + ok = parse_msdfs_symlink(mem_ctx, + lp_msdfs_shuffle_referrals(SNUM(handle->conn)), + link_target, + ppreflist, + preferral_count); + + if (ok) { + init_stat_ex_from_ceph_statx(&smb_fname->st, &stx); + status = NT_STATUS_OK; + } else { + status = NT_STATUS_NO_MEMORY; + } + + err: + + if (link_target != link_target_buf) { + TALLOC_FREE(link_target); + } + TALLOC_FREE(full_fname); + return status; +} + +static struct vfs_fn_pointers ceph_new_fns = { + /* Disk operations */ + + .connect_fn = vfs_ceph_connect, + .disconnect_fn = vfs_ceph_disconnect, + .disk_free_fn = vfs_ceph_disk_free, + .get_quota_fn = vfs_not_implemented_get_quota, + .set_quota_fn = vfs_not_implemented_set_quota, + .statvfs_fn = vfs_ceph_statvfs, + .fs_capabilities_fn = vfs_ceph_fs_capabilities, + + /* Directory operations */ + + .fdopendir_fn = vfs_ceph_fdopendir, + .readdir_fn = vfs_ceph_readdir, + .rewind_dir_fn = vfs_ceph_rewinddir, + .mkdirat_fn = vfs_ceph_mkdirat, + .closedir_fn = vfs_ceph_closedir, + + /* File operations */ + + .create_dfs_pathat_fn = vfs_ceph_create_dfs_pathat, + .read_dfs_pathat_fn = vfs_ceph_read_dfs_pathat, + .openat_fn = vfs_ceph_openat, + .close_fn = vfs_ceph_close, + .pread_fn = vfs_ceph_pread, + .pread_send_fn = vfs_ceph_pread_send, + .pread_recv_fn = vfs_ceph_pread_recv, + .pwrite_fn = vfs_ceph_pwrite, + .pwrite_send_fn = vfs_ceph_pwrite_send, + .pwrite_recv_fn = vfs_ceph_pwrite_recv, + .lseek_fn = vfs_ceph_lseek, + .sendfile_fn = vfs_ceph_sendfile, + .recvfile_fn = vfs_ceph_recvfile, + .renameat_fn = vfs_ceph_renameat, + .fsync_send_fn = vfs_ceph_fsync_send, + .fsync_recv_fn = vfs_ceph_fsync_recv, + .stat_fn = vfs_ceph_stat, + .fstat_fn = vfs_ceph_fstat, + .lstat_fn = vfs_ceph_lstat, + .fstatat_fn = vfs_ceph_fstatat, + .unlinkat_fn = vfs_ceph_unlinkat, + .fchmod_fn = vfs_ceph_fchmod, + .fchown_fn = vfs_ceph_fchown, + .lchown_fn = vfs_ceph_lchown, + .chdir_fn = vfs_ceph_chdir, + .getwd_fn = vfs_ceph_getwd, + .fntimes_fn = vfs_ceph_fntimes, + .ftruncate_fn = vfs_ceph_ftruncate, + .fallocate_fn = vfs_ceph_fallocate, + .lock_fn = vfs_ceph_lock, + .filesystem_sharemode_fn = vfs_ceph_filesystem_sharemode, + .fcntl_fn = vfs_ceph_fcntl, + .linux_setlease_fn = vfs_not_implemented_linux_setlease, + .getlock_fn = vfs_ceph_getlock, + .symlinkat_fn = vfs_ceph_symlinkat, + .readlinkat_fn = vfs_ceph_readlinkat, + .linkat_fn = vfs_ceph_linkat, + .mknodat_fn = vfs_ceph_mknodat, + .realpath_fn = vfs_ceph_realpath, + .fchflags_fn = vfs_not_implemented_fchflags, + .get_real_filename_at_fn = vfs_ceph_get_real_filename_at, + .connectpath_fn = vfs_ceph_connectpath, + .fget_dos_attributes_fn = vfs_ceph_fget_dos_attributes, + .fset_dos_attributes_fn = vfs_ceph_fset_dos_attributes, + + /* EA operations. */ + .getxattrat_send_fn = vfs_not_implemented_getxattrat_send, + .getxattrat_recv_fn = vfs_not_implemented_getxattrat_recv, + .fgetxattr_fn = vfs_ceph_fgetxattr, + .flistxattr_fn = vfs_ceph_flistxattr, + .fremovexattr_fn = vfs_ceph_fremovexattr, + .fsetxattr_fn = vfs_ceph_fsetxattr, + + /* Posix ACL Operations */ + .sys_acl_get_fd_fn = posixacl_xattr_acl_get_fd, + .sys_acl_blob_get_fd_fn = posix_sys_acl_blob_get_fd, + .sys_acl_set_fd_fn = posixacl_xattr_acl_set_fd, + .sys_acl_delete_def_fd_fn = posixacl_xattr_acl_delete_def_fd, + + /* aio operations */ + .aio_force_fn = vfs_not_implemented_aio_force, +}; + +static_decl_vfs; +NTSTATUS vfs_ceph_new_init(TALLOC_CTX *ctx) +{ + return smb_register_vfs(SMB_VFS_INTERFACE_VERSION, + "ceph_new", &ceph_new_fns); +} diff --git a/source3/modules/wscript_build b/source3/modules/wscript_build index 1f0aa44ff1b..85708100189 100644 --- a/source3/modules/wscript_build +++ b/source3/modules/wscript_build @@ -539,6 +539,16 @@ bld.SAMBA3_MODULE('vfs_ceph', cflags=bld.CONFIG_GET('CFLAGS_CEPHFS'), includes=bld.CONFIG_GET('CPPPATH_CEPHFS')) +bld.SAMBA3_MODULE('vfs_ceph_new', + subsystem='vfs', + source='vfs_ceph_new.c', + deps='POSIXACL_XATTR samba-util cephfs', + init_function='', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('vfs_ceph_new'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_ceph_new'), + cflags=bld.CONFIG_GET('CFLAGS_CEPHFS'), + includes=bld.CONFIG_GET('CPPPATH_CEPHFS')) + bld.SAMBA3_MODULE('vfs_ceph_snapshots', subsystem='vfs', source='vfs_ceph_snapshots.c', diff --git a/source3/wscript b/source3/wscript index 83aeb763ec4..d7e0f916c9e 100644 --- a/source3/wscript +++ b/source3/wscript @@ -1966,6 +1966,7 @@ int main(void) { # addition to vfs_ceph. Still, only enable vfs_ceph_snapshots builds # if we're building with libcephfs for now. default_shared_modules.extend(['vfs_ceph_snapshots']) + default_shared_modules.extend(['vfs_ceph_new']) if conf.CONFIG_SET('HAVE_GLUSTERFS'): default_shared_modules.extend(['vfs_glusterfs']) -- 2.46.0 From 96c1a887591e339bb1d33d472da93debe03a760c Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Sun, 16 Jun 2024 14:50:08 +0300 Subject: [PATCH 02/34] vfs_ceph_new: use low-level APIs for disk_free Start using libcephfs low-level APIs: get reference to root inode and use it to query statfs. Requires an explicit put-inode to avoid resource leakage by libcephfs. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 192b0cf8717d79197b985539c9db8ca07a89c570) --- source3/modules/vfs_ceph_new.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 313dcb5ac64..722a2aed4f4 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -344,19 +344,26 @@ static uint64_t vfs_ceph_disk_free(struct vfs_handle_struct *handle, uint64_t *dsize) { struct statvfs statvfs_buf = { 0 }; + struct Inode *inode = NULL; int ret; - ret = ceph_statfs(handle->data, smb_fname->base_name, &statvfs_buf); - if (ret < 0) { - DBG_DEBUG("[CEPH] ceph_statfs returned %d\n", ret); - return (uint64_t)status_code(ret); + ret = ceph_ll_lookup_root(handle->data, &inode); + if (ret != 0) { + DBG_DEBUG("[CEPH] ceph_ll_lookup_root returned %d\n", ret); + errno = -ret; + return (uint64_t)(-1); } - /* - * Provide all the correct values. - */ - *bsize = statvfs_buf.f_bsize; - *dfree = statvfs_buf.f_bavail; - *dsize = statvfs_buf.f_blocks; + ret = ceph_ll_statfs(handle->data, inode, &statvfs_buf); + ceph_ll_put(handle->data, inode); + if (ret != 0) { + DBG_DEBUG("[CEPH] ceph_ll_statfs returned %d\n", ret); + errno = -ret; + return (uint64_t)(-1); + } + *bsize = (uint64_t)statvfs_buf.f_bsize; + *dfree = (uint64_t)statvfs_buf.f_bavail; + *dsize = (uint64_t)statvfs_buf.f_blocks; + DBG_DEBUG("[CEPH] bsize: %llu, dfree: %llu, dsize: %llu\n", llu(*bsize), llu(*dfree), -- 2.46.0 From 69ea7f28bc0a31b82553f05d354d6a2e8c29bea6 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 17 Jun 2024 12:11:18 +0300 Subject: [PATCH 03/34] vfs_ceph_new: use low-level APIs for stat Start migrating to libcephfs' low-level APIs, using explicit Inode* reference. Implement the VFS 'stat' hook using a ceph_ll_getattr function, encapsulated with a pair of iget/iput to hold a pinned-to-cache Inode* instance. Upon calling to libcephfs this new code crates and destroys on-the-fly a Ceph UserPerm instance based on the uig, gid and groups from 'handle->conn->session_info->unix_token'. This logic ensures that the correct caller-credentials are passed-on to cephfs (instead of those set upon connection-creation in legacy 'vfs_ceph.c'). BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 1b78d79663c48aa4b6810a875427de85ae49a2e8) --- source3/modules/vfs_ceph_new.c | 191 +++++++++++++++++++++++++++++++-- 1 file changed, 181 insertions(+), 10 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 722a2aed4f4..1b22a0026c3 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -335,6 +335,175 @@ static void vfs_ceph_disconnect(struct vfs_handle_struct *handle) handle->data = NULL; } +/* Ceph user-credentials */ +static struct UserPerm *vfs_ceph_userperm_new( + const struct vfs_handle_struct *handle) +{ + const struct security_unix_token *unix_token = NULL; + + unix_token = get_current_utok(handle->conn); + return ceph_userperm_new(unix_token->uid, + unix_token->gid, + unix_token->ngroups, + unix_token->groups); +} + +static void vfs_ceph_userperm_del(struct UserPerm *uperm) +{ + if (uperm != NULL) { + ceph_userperm_destroy(uperm); + } +} + +/* Ceph's statx to Samba's stat_ex */ +#define SAMBA_STATX_ATTR_MASK (CEPH_STATX_BASIC_STATS | CEPH_STATX_BTIME) + +static void smb_stat_from_ceph_statx(SMB_STRUCT_STAT *st, + const struct ceph_statx *stx) +{ + ZERO_STRUCTP(st); + + st->st_ex_dev = stx->stx_dev; + st->st_ex_rdev = stx->stx_rdev; + st->st_ex_ino = stx->stx_ino; + st->st_ex_mode = stx->stx_mode; + st->st_ex_uid = stx->stx_uid; + st->st_ex_gid = stx->stx_gid; + st->st_ex_size = stx->stx_size; + st->st_ex_nlink = stx->stx_nlink; + st->st_ex_atime = stx->stx_atime; + st->st_ex_btime = stx->stx_btime; + st->st_ex_ctime = stx->stx_ctime; + st->st_ex_mtime = stx->stx_mtime; + st->st_ex_blksize = stx->stx_blksize; + st->st_ex_blocks = stx->stx_blocks; +} + +/* Ceph's inode + ino-number */ +struct vfs_ceph_iref { + struct Inode *inode; + uint64_t ino; /* for debug printing */ +}; + +/* Ceph low-level wrappers */ + +static int vfs_ceph_ll_lookup_inode(const struct vfs_handle_struct *handle, + uint64_t inoval, + Inode **pout) +{ + struct inodeno_t ino = {.val = inoval}; + + return ceph_ll_lookup_inode(handle->data, ino, pout); +} + +static int vfs_ceph_ll_walk(const struct vfs_handle_struct *handle, + const char *name, + struct Inode **pin, + struct ceph_statx *stx, + unsigned int want, + unsigned int flags) +{ + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + ret = ceph_ll_walk(handle->data, name, pin, stx, want, flags, uperm); + vfs_ceph_userperm_del(uperm); + return ret; +} + +static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + SMB_STRUCT_STAT *st) +{ + struct ceph_statx stx = {0}; + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + ret = ceph_ll_getattr(handle->data, + iref->inode, + &stx, + SAMBA_STATX_ATTR_MASK, + 0, + uperm); + if (ret == 0) { + smb_stat_from_ceph_statx(st, &stx); + } + vfs_ceph_userperm_del(uperm); + return ret; +} + +/* Ceph Inode-refernce get/put wrappers */ +static int vfs_ceph_iget(const struct vfs_handle_struct *handle, + uint64_t ino, + const char *name, + unsigned int flags, + struct vfs_ceph_iref *iref) +{ + struct Inode *inode = NULL; + int ret = -1; + + if (ino > CEPH_INO_ROOT) { + /* get-by-ino */ + ret = vfs_ceph_ll_lookup_inode(handle, ino, &inode); + if (ret != 0) { + return ret; + } + } else { + /* get-by-path */ + struct ceph_statx stx = {.stx_ino = 0}; + + ret = vfs_ceph_ll_walk(handle, + name, + &inode, + &stx, + CEPH_STATX_INO, + flags); + if (ret != 0) { + return ret; + } + ino = stx.stx_ino; + } + iref->inode = inode; + iref->ino = ino; + DBG_DEBUG("[CEPH] get-inode: %s ino=%" PRIu64 "\n", name, iref->ino); + return 0; +} + +static int vfs_ceph_iget_by_fname(const struct vfs_handle_struct *handle, + const struct smb_filename *smb_fname, + struct vfs_ceph_iref *iref) +{ + const char *name = smb_fname->base_name; + const char *cwd = ceph_getcwd(handle->data); + int ret = -1; + + if (!strcmp(name, cwd)) { + ret = vfs_ceph_iget(handle, 0, "./", 0, iref); + } else { + ret = vfs_ceph_iget(handle, 0, name, 0, iref); + } + return ret; +} + +static void vfs_ceph_iput(const struct vfs_handle_struct *handle, + struct vfs_ceph_iref *iref) +{ + if ((iref != NULL) && (iref->inode != NULL)) { + DBG_DEBUG("[CEPH] put-inode: ino=%" PRIu64 "\n", iref->ino); + + ceph_ll_put(handle->data, iref->inode); + iref->inode = NULL; + } +} + /* Disk operations */ static uint64_t vfs_ceph_disk_free(struct vfs_handle_struct *handle, @@ -876,8 +1045,6 @@ static int vfs_ceph_fsync_recv(struct tevent_req *req, return 0; } -#define SAMBA_STATX_ATTR_MASK (CEPH_STATX_BASIC_STATS|CEPH_STATX_BTIME) - static void init_stat_ex_from_ceph_statx(struct stat_ex *dst, const struct ceph_statx *stx) { @@ -920,7 +1087,7 @@ static int vfs_ceph_stat(struct vfs_handle_struct *handle, struct smb_filename *smb_fname) { int result = -1; - struct ceph_statx stx = { 0 }; + struct vfs_ceph_iref iref = {0}; DBG_DEBUG("[CEPH] stat(%p, %s)\n", handle, @@ -931,16 +1098,20 @@ static int vfs_ceph_stat(struct vfs_handle_struct *handle, return result; } - result = ceph_statx(handle->data, smb_fname->base_name, &stx, - SAMBA_STATX_ATTR_MASK, 0); - DBG_DEBUG("[CEPH] statx(...) = %d\n", result); - if (result < 0) { - return status_code(result); + result = vfs_ceph_iget_by_fname(handle, smb_fname, &iref); + if (result != 0) { + goto out; } - init_stat_ex_from_ceph_statx(&smb_fname->st, &stx); + DBG_DEBUG("[CEPH] stat: ino=%" PRIu64 "\n", iref.ino); + result = vfs_ceph_ll_getattr(handle, &iref, &smb_fname->st); + if (result != 0) { + goto out; + } DBG_DEBUG("[CEPH] mode = 0x%x\n", smb_fname->st.st_ex_mode); - return result; +out: + vfs_ceph_iput(handle, &iref); + return status_code(result); } static int vfs_ceph_fstat(struct vfs_handle_struct *handle, -- 2.46.0 From 77bed696903aa03e8345391d883e53ea8b771d17 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 17 Jun 2024 15:57:42 +0300 Subject: [PATCH 04/34] vfs_ceph_new: use low-level APIs for lstat Use libcephfs' low-level APIs and apply the same logic as stat, but using AT_SYMLINK_NOFOLLOW flags. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 93d786b14358db5664e13b1aa43f3f03e7cf0be3) --- source3/modules/vfs_ceph_new.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 1b22a0026c3..97f4dcfb079 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -493,6 +493,17 @@ static int vfs_ceph_iget_by_fname(const struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_igetl(const struct vfs_handle_struct *handle, + const struct smb_filename *smb_fname, + struct vfs_ceph_iref *iref) +{ + return vfs_ceph_iget(handle, + 0, + smb_fname->base_name, + AT_SYMLINK_NOFOLLOW, + iref); +} + static void vfs_ceph_iput(const struct vfs_handle_struct *handle, struct vfs_ceph_iref *iref) { @@ -1181,10 +1192,10 @@ static int vfs_ceph_fstatat(struct vfs_handle_struct *handle, } static int vfs_ceph_lstat(struct vfs_handle_struct *handle, - struct smb_filename *smb_fname) + struct smb_filename *smb_fname) { int result = -1; - struct ceph_statx stx = { 0 }; + struct vfs_ceph_iref iref = {0}; DBG_DEBUG("[CEPH] lstat(%p, %s)\n", handle, @@ -1195,15 +1206,19 @@ static int vfs_ceph_lstat(struct vfs_handle_struct *handle, return result; } - result = ceph_statx(handle->data, smb_fname->base_name, &stx, - SAMBA_STATX_ATTR_MASK, AT_SYMLINK_NOFOLLOW); - DBG_DEBUG("[CEPH] lstat(...) = %d\n", result); - if (result < 0) { - return status_code(result); + result = vfs_ceph_igetl(handle, smb_fname, &iref); + if (result != 0) { + goto out; } - init_stat_ex_from_ceph_statx(&smb_fname->st, &stx); - return result; + result = vfs_ceph_ll_getattr(handle, &iref, &smb_fname->st); + if (result != 0) { + goto out; + } +out: + vfs_ceph_iput(handle, &iref); + DBG_DEBUG("[CEPH] lstat(...) = %d\n", result); + return status_code(result); } static int vfs_ceph_fntimes(struct vfs_handle_struct *handle, -- 2.46.0 From 0d5c3b8060fd0e992f421afc09981e427b773813 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 17 Jun 2024 18:02:07 +0300 Subject: [PATCH 05/34] vfs_ceph_new: use low-level APIs for statfs BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 47224fbdeb55100cf8a7ee75e13b954ab71fc158) --- source3/modules/vfs_ceph_new.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 97f4dcfb079..22228d32b78 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -415,6 +415,13 @@ static int vfs_ceph_ll_walk(const struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_ll_statfs(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + struct statvfs *stbuf) +{ + return ceph_ll_statfs(handle->data, iref->inode, stbuf); +} + static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, const struct vfs_ceph_iref *iref, SMB_STRUCT_STAT *st) @@ -556,11 +563,17 @@ static int vfs_ceph_statvfs(struct vfs_handle_struct *handle, struct vfs_statvfs_struct *statbuf) { struct statvfs statvfs_buf = { 0 }; + struct vfs_ceph_iref iref = {0}; int ret; - ret = ceph_statfs(handle->data, smb_fname->base_name, &statvfs_buf); - if (ret < 0) { - return status_code(ret); + ret = vfs_ceph_iget_by_fname(handle, smb_fname, &iref); + if (ret != 0) { + goto out; + } + + ret = vfs_ceph_ll_statfs(handle, &iref, &statvfs_buf); + if (ret != 0) { + goto out; } statbuf->OptimalTransferSize = statvfs_buf.f_frsize; @@ -577,8 +590,9 @@ static int vfs_ceph_statvfs(struct vfs_handle_struct *handle, (long int)statvfs_buf.f_blocks, (long int)statvfs_buf.f_bfree, (long int)statvfs_buf.f_bavail); - - return ret; +out: + vfs_ceph_iput(handle, &iref); + return status_code(ret); } static uint32_t vfs_ceph_fs_capabilities( -- 2.46.0 From a881758787f2b2012a100f0eaedd9ae5f931ae5e Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 17 Jun 2024 16:59:05 +0300 Subject: [PATCH 06/34] vfs_ceph_new: use low-level APIs for lchown Use libcephfs' low-level API ceph_ll_setattr to implement VFS lchown_fn hook. Use to standard pattern of iget/iput to allow operation by Inode reference. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit beb21324c9a554f50d8d99af2a1b7fe8a17c8ebb) --- source3/modules/vfs_ceph_new.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 22228d32b78..d005eb7d92f 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -447,6 +447,28 @@ static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_ll_chown(struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + uid_t uid, + gid_t gid) +{ + struct ceph_statx stx = {.stx_uid = uid, .stx_gid = gid}; + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + ret = ceph_ll_setattr(handle->data, + iref->inode, + &stx, + CEPH_STATX_UID | CEPH_STATX_GID, + uperm); + vfs_ceph_userperm_del(uperm); + return ret; +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1401,12 +1423,22 @@ static int vfs_ceph_lchown(struct vfs_handle_struct *handle, gid_t gid) { int result; + struct vfs_ceph_iref iref = {0}; + DBG_DEBUG("[CEPH] lchown(%p, %s, %d, %d)\n", handle, smb_fname->base_name, uid, gid); - result = ceph_lchown(handle->data, smb_fname->base_name, uid, gid); + + result = vfs_ceph_igetl(handle, smb_fname, &iref); + if (result != 0) { + goto out; + } + + result = vfs_ceph_ll_chown(handle, &iref, uid, gid); + vfs_ceph_iput(handle, &iref); +out: DBG_DEBUG("[CEPH] lchown(...) = %d\n", result); return status_code(result); } -- 2.46.0 From 0f940955518c59abb6360eef509e854365193f58 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 18 Jun 2024 12:58:52 +0300 Subject: [PATCH 07/34] vfs_ceph_new: ref cephmount_cached entry in handle->data Allow direct access to ceph-mount cached-entry via 'handle->data' private pointer. Required in order to allow more complex cached-state with each cephfs mount. Users should now use the local-helper function 'cmount_of' to access the underlying ceph_mount_info. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 31085c7efc3572bd6200d3d8e49c1e554cdbfbcc) --- source3/modules/vfs_ceph_new.c | 317 +++++++++++++++++++-------------- 1 file changed, 185 insertions(+), 132 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index d005eb7d92f..424f52e9e32 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -82,7 +82,7 @@ static ssize_t lstatus_code(intmax_t ret) /* * Track unique connections, as virtual mounts, to cephfs file systems. - * Individual mounts will be set on the handle->data attribute, but + * Individual mount-entries will be set on the handle->data attribute, but * the mounts themselves will be shared so as not to spawn extra mounts * to the same cephfs. * @@ -98,7 +98,8 @@ static struct cephmount_cached { } *cephmount_cached; static int cephmount_cache_add(const char *cookie, - struct ceph_mount_info *mount) + struct ceph_mount_info *mount, + struct cephmount_cached **out_entry) { struct cephmount_cached *entry = NULL; @@ -120,10 +121,12 @@ static int cephmount_cache_add(const char *cookie, DBG_DEBUG("adding mount cache entry for %s\n", entry->cookie); DLIST_ADD(cephmount_cached, entry); + + *out_entry = entry; return 0; } -static struct ceph_mount_info *cephmount_cache_update(const char *cookie) +static struct cephmount_cached *cephmount_cache_update(const char *cookie) { struct cephmount_cached *entry = NULL; @@ -132,7 +135,7 @@ static struct ceph_mount_info *cephmount_cache_update(const char *cookie) entry->count++; DBG_DEBUG("updated mount cache: count is [%" PRIu32 "]\n", entry->count); - return entry->mount; + return entry; } } @@ -140,27 +143,18 @@ static struct ceph_mount_info *cephmount_cache_update(const char *cookie) return NULL; } -static int cephmount_cache_remove(struct ceph_mount_info *mount) +static int cephmount_cache_remove(struct cephmount_cached *entry) { - struct cephmount_cached *entry = NULL; - - for (entry = cephmount_cached; entry; entry = entry->next) { - if (entry->mount == mount) { - if (--entry->count) { - DBG_DEBUG("updated mount cache: count is [%" - PRIu32 "]\n", entry->count); - return entry->count; - } - - DBG_DEBUG("removing mount cache entry for %s\n", - entry->cookie); - DLIST_REMOVE(cephmount_cached, entry); - talloc_free(entry); - return 0; - } + if (--entry->count) { + DBG_DEBUG("updated mount cache: count is [%" PRIu32 "]\n", + entry->count); + return entry->count; } - errno = ENOENT; - return -1; + + DBG_DEBUG("removing mount cache entry for %s\n", entry->cookie); + DLIST_REMOVE(cephmount_cached, entry); + talloc_free(entry); + return 0; } static char *cephmount_get_cookie(TALLOC_CTX * mem_ctx, const int snum) @@ -276,6 +270,7 @@ static int vfs_ceph_connect(struct vfs_handle_struct *handle, const char *service, const char *user) { int ret = 0; + struct cephmount_cached *entry = NULL; struct ceph_mount_info *cmount = NULL; int snum = SNUM(handle->conn); char *cookie = cephmount_get_cookie(handle, snum); @@ -283,8 +278,8 @@ static int vfs_ceph_connect(struct vfs_handle_struct *handle, return -1; } - cmount = cephmount_cache_update(cookie); - if (cmount != NULL) { + entry = cephmount_cache_update(cookie); + if (entry != NULL) { goto connect_ok; } @@ -293,42 +288,48 @@ static int vfs_ceph_connect(struct vfs_handle_struct *handle, ret = -1; goto connect_fail; } - ret = cephmount_cache_add(cookie, cmount); - if (ret) { + ret = cephmount_cache_add(cookie, cmount, &entry); + if (ret != 0) { goto connect_fail; } - connect_ok: - handle->data = cmount; +connect_ok: + handle->data = entry; DBG_WARNING("Connection established with the server: %s\n", cookie); + /* * Unless we have an async implementation of getxattrat turn this off. */ lp_do_parameter(SNUM(handle->conn), "smbd async dosmode", "false"); - connect_fail: +connect_fail: talloc_free(cookie); return ret; } +static struct ceph_mount_info *cmount_of(const struct vfs_handle_struct *handle) +{ + const struct cephmount_cached *entry = handle->data; + + return entry->mount; +} + static void vfs_ceph_disconnect(struct vfs_handle_struct *handle) { - int ret = cephmount_cache_remove(handle->data); - if (ret < 0) { - DBG_ERR("failed to remove ceph mount from cache: %s\n", - strerror(errno)); - return; - } + struct ceph_mount_info *cmount = cmount_of(handle); + int ret = 0; + + ret = cephmount_cache_remove(handle->data); if (ret > 0) { DBG_DEBUG("mount cache entry still in use\n"); return; } - ret = ceph_unmount(handle->data); + ret = ceph_unmount(cmount); if (ret < 0) { DBG_ERR("[CEPH] failed to unmount: %s\n", strerror(-ret)); } - ret = ceph_release(handle->data); + ret = ceph_release(cmount); if (ret < 0) { DBG_ERR("[CEPH] failed to release: %s\n", strerror(-ret)); } @@ -393,7 +394,7 @@ static int vfs_ceph_ll_lookup_inode(const struct vfs_handle_struct *handle, { struct inodeno_t ino = {.val = inoval}; - return ceph_ll_lookup_inode(handle->data, ino, pout); + return ceph_ll_lookup_inode(cmount_of(handle), ino, pout); } static int vfs_ceph_ll_walk(const struct vfs_handle_struct *handle, @@ -410,7 +411,15 @@ static int vfs_ceph_ll_walk(const struct vfs_handle_struct *handle, if (uperm == NULL) { return -ENOMEM; } - ret = ceph_ll_walk(handle->data, name, pin, stx, want, flags, uperm); + + ret = ceph_ll_walk(cmount_of(handle), + name, + pin, + stx, + want, + flags, + uperm); + vfs_ceph_userperm_del(uperm); return ret; } @@ -419,7 +428,7 @@ static int vfs_ceph_ll_statfs(const struct vfs_handle_struct *handle, const struct vfs_ceph_iref *iref, struct statvfs *stbuf) { - return ceph_ll_statfs(handle->data, iref->inode, stbuf); + return ceph_ll_statfs(cmount_of(handle), iref->inode, stbuf); } static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, @@ -434,7 +443,7 @@ static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, if (uperm == NULL) { return -ENOMEM; } - ret = ceph_ll_getattr(handle->data, + ret = ceph_ll_getattr(cmount_of(handle), iref->inode, &stx, SAMBA_STATX_ATTR_MASK, @@ -460,7 +469,7 @@ static int vfs_ceph_ll_chown(struct vfs_handle_struct *handle, if (uperm == NULL) { return -ENOMEM; } - ret = ceph_ll_setattr(handle->data, + ret = ceph_ll_setattr(cmount_of(handle), iref->inode, &stx, CEPH_STATX_UID | CEPH_STATX_GID, @@ -511,7 +520,7 @@ static int vfs_ceph_iget_by_fname(const struct vfs_handle_struct *handle, struct vfs_ceph_iref *iref) { const char *name = smb_fname->base_name; - const char *cwd = ceph_getcwd(handle->data); + const char *cwd = ceph_getcwd(cmount_of(handle)); int ret = -1; if (!strcmp(name, cwd)) { @@ -539,7 +548,7 @@ static void vfs_ceph_iput(const struct vfs_handle_struct *handle, if ((iref != NULL) && (iref->inode != NULL)) { DBG_DEBUG("[CEPH] put-inode: ino=%" PRIu64 "\n", iref->ino); - ceph_ll_put(handle->data, iref->inode); + ceph_ll_put(cmount_of(handle), iref->inode); iref->inode = NULL; } } @@ -556,14 +565,14 @@ static uint64_t vfs_ceph_disk_free(struct vfs_handle_struct *handle, struct Inode *inode = NULL; int ret; - ret = ceph_ll_lookup_root(handle->data, &inode); + ret = ceph_ll_lookup_root(cmount_of(handle), &inode); if (ret != 0) { DBG_DEBUG("[CEPH] ceph_ll_lookup_root returned %d\n", ret); errno = -ret; return (uint64_t)(-1); } - ret = ceph_ll_statfs(handle->data, inode, &statvfs_buf); - ceph_ll_put(handle->data, inode); + ret = ceph_ll_statfs(cmount_of(handle), inode, &statvfs_buf); + ceph_ll_put(cmount_of(handle), inode); if (ret != 0) { DBG_DEBUG("[CEPH] ceph_ll_statfs returned %d\n", ret); errno = -ret; @@ -641,10 +650,12 @@ static DIR *vfs_ceph_fdopendir(struct vfs_handle_struct *handle, #ifdef HAVE_CEPH_FDOPENDIR int dirfd = fsp_get_io_fd(fsp); DBG_DEBUG("[CEPH] fdopendir(%p, %d)\n", handle, dirfd); - ret = ceph_fdopendir(handle->data, dirfd, &result); + ret = ceph_fdopendir(cmount_of(handle), dirfd, &result); #else DBG_DEBUG("[CEPH] fdopendir(%p, %p)\n", handle, fsp); - ret = ceph_opendir(handle->data, fsp->fsp_name->base_name, &result); + ret = ceph_opendir(cmount_of(handle), + fsp->fsp_name->base_name, + &result); #endif if (ret < 0) { result = NULL; @@ -662,7 +673,8 @@ static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, struct dirent *result = NULL; DBG_DEBUG("[CEPH] readdir(%p, %p)\n", handle, dirp); - result = ceph_readdir(handle->data, (struct ceph_dir_result *) dirp); + result = ceph_readdir(cmount_of(handle), + (struct ceph_dir_result *)dirp); DBG_DEBUG("[CEPH] readdir(...) = %p\n", result); return result; @@ -671,7 +683,7 @@ static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, static void vfs_ceph_rewinddir(struct vfs_handle_struct *handle, DIR *dirp) { DBG_DEBUG("[CEPH] rewinddir(%p, %p)\n", handle, dirp); - ceph_rewinddir(handle->data, (struct ceph_dir_result *) dirp); + ceph_rewinddir(cmount_of(handle), (struct ceph_dir_result *)dirp); } static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, @@ -688,7 +700,10 @@ static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, dirfd, smb_fname->base_name); - result = ceph_mkdirat(handle->data, dirfd, smb_fname->base_name, mode); + result = ceph_mkdirat(cmount_of(handle), + dirfd, + smb_fname->base_name, + mode); DBG_DEBUG("[CEPH] mkdirat(...) = %d\n", result); @@ -706,7 +721,7 @@ static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, DBG_DEBUG("[CEPH] mkdir(%p, %s)\n", handle, smb_fname_str_dbg(full_fname)); - result = ceph_mkdir(handle->data, full_fname->base_name, mode); + result = ceph_mkdir(cmount_of(handle), full_fname->base_name, mode); TALLOC_FREE(full_fname); @@ -719,7 +734,8 @@ static int vfs_ceph_closedir(struct vfs_handle_struct *handle, DIR *dirp) int result; DBG_DEBUG("[CEPH] closedir(%p, %p)\n", handle, dirp); - result = ceph_closedir(handle->data, (struct ceph_dir_result *) dirp); + result = ceph_closedir(cmount_of(handle), + (struct ceph_dir_result *)dirp); DBG_DEBUG("[CEPH] closedir(...) = %d\n", result); return status_code(result); } @@ -769,7 +785,7 @@ static int vfs_ceph_openat(struct vfs_handle_struct *handle, became_root = true; } - result = ceph_openat(handle->data, + result = ceph_openat(cmount_of(handle), dirfd, smb_fname->base_name, flags, @@ -794,7 +810,10 @@ static int vfs_ceph_openat(struct vfs_handle_struct *handle, became_root = true; } - result = ceph_open(handle->data, smb_fname->base_name, flags, mode); + result = ceph_open(cmount_of(handle), + smb_fname->base_name, + flags, + mode); #endif if (became_root) { unbecome_root(); @@ -811,7 +830,7 @@ static int vfs_ceph_close(struct vfs_handle_struct *handle, files_struct *fsp) int result; DBG_DEBUG("[CEPH] close(%p, %p)\n", handle, fsp); - result = ceph_close(handle->data, fsp_get_pathref_fd(fsp)); + result = ceph_close(cmount_of(handle), fsp_get_pathref_fd(fsp)); DBG_DEBUG("[CEPH] close(...) = %d\n", result); return status_code(result); } @@ -831,7 +850,12 @@ static ssize_t vfs_ceph_pread(struct vfs_handle_struct *handle, llu(n), llu(offset)); - result = ceph_read(handle->data, fsp_get_io_fd(fsp), data, n, offset); + result = ceph_read(cmount_of(handle), + fsp_get_io_fd(fsp), + data, + n, + offset); + DBG_DEBUG("[CEPH] pread(...) = %llu\n", llu(result)); return lstatus_code(result); } @@ -861,7 +885,7 @@ static struct tevent_req *vfs_ceph_pread_send(struct vfs_handle_struct *handle, return NULL; } - ret = ceph_read(handle->data, fsp_get_io_fd(fsp), data, n, offset); + ret = ceph_read(cmount_of(handle), fsp_get_io_fd(fsp), data, n, offset); if (ret < 0) { /* ceph returns -errno on error. */ tevent_req_error(req, -ret); @@ -902,7 +926,13 @@ static ssize_t vfs_ceph_pwrite(struct vfs_handle_struct *handle, data, llu(n), llu(offset)); - result = ceph_write(handle->data, fsp_get_io_fd(fsp), data, n, offset); + + result = ceph_write(cmount_of(handle), + fsp_get_io_fd(fsp), + data, + n, + offset); + DBG_DEBUG("[CEPH] pwrite(...) = %llu\n", llu(result)); return lstatus_code(result); } @@ -932,7 +962,8 @@ static struct tevent_req *vfs_ceph_pwrite_send(struct vfs_handle_struct *handle, return NULL; } - ret = ceph_write(handle->data, fsp_get_io_fd(fsp), data, n, offset); + ret = ceph_write( + cmount_of(handle), fsp_get_io_fd(fsp), data, n, offset); if (ret < 0) { /* ceph returns -errno on error. */ tevent_req_error(req, -ret); @@ -967,7 +998,10 @@ static off_t vfs_ceph_lseek(struct vfs_handle_struct *handle, off_t result = 0; DBG_DEBUG("[CEPH] vfs_ceph_lseek\n"); - result = ceph_lseek(handle->data, fsp_get_io_fd(fsp), offset, whence); + result = ceph_lseek(cmount_of(handle), + fsp_get_io_fd(fsp), + offset, + whence); return lstatus_code(result); } @@ -1032,7 +1066,7 @@ static int vfs_ceph_renameat(struct vfs_handle_struct *handle, return -1; } - result = ceph_rename(handle->data, + result = ceph_rename(cmount_of(handle), full_fname_src->base_name, full_fname_dst->base_name); @@ -1063,7 +1097,7 @@ static struct tevent_req *vfs_ceph_fsync_send(struct vfs_handle_struct *handle, } /* Make sync call. */ - ret = ceph_fsync(handle->data, fsp_get_io_fd(fsp), false); + ret = ceph_fsync(cmount_of(handle), fsp_get_io_fd(fsp), false); if (ret != 0) { /* ceph_fsync returns -errno on error. */ @@ -1195,8 +1229,12 @@ static int vfs_ceph_fstatat(struct vfs_handle_struct *handle, DBG_DEBUG("[CEPH] fstatat(%p, %d, %s)\n", handle, dirfd, smb_fname->base_name); - result = ceph_statxat(handle->data, dirfd, smb_fname->base_name, - &stx, SAMBA_STATX_ATTR_MASK, 0); + result = ceph_statxat(cmount_of(handle), + dirfd, + smb_fname->base_name, + &stx, + SAMBA_STATX_ATTR_MASK, + 0); #else struct smb_filename *full_fname = NULL; @@ -1210,8 +1248,11 @@ static int vfs_ceph_fstatat(struct vfs_handle_struct *handle, DBG_DEBUG("[CEPH] fstatat(%p, %s)\n", handle, smb_fname_str_dbg(full_fname)); - result = ceph_statx(handle->data, full_fname->base_name, - &stx, SAMBA_STATX_ATTR_MASK, 0); + result = ceph_statx(cmount_of(handle), + full_fname->base_name, + &stx, + SAMBA_STATX_ATTR_MASK, + 0); TALLOC_FREE(full_fname); #endif @@ -1286,7 +1327,7 @@ static int vfs_ceph_fntimes(struct vfs_handle_struct *handle, /* * We can use an io_fd to set xattrs. */ - result = ceph_fsetattrx(handle->data, + result = ceph_fsetattrx(cmount_of(handle), fsp_get_io_fd(fsp), &stx, mask); @@ -1294,7 +1335,7 @@ static int vfs_ceph_fntimes(struct vfs_handle_struct *handle, /* * This is no longer a handle based call. */ - result = ceph_setattrx(handle->data, + result = ceph_setattrx(cmount_of(handle), fsp->fsp_name->base_name, &stx, mask, @@ -1327,7 +1368,7 @@ static int vfs_ceph_unlinkat(struct vfs_handle_struct *handle, return result; } - result = ceph_unlinkat(handle->data, + result = ceph_unlinkat(cmount_of(handle), dirfd, smb_fname->base_name, flags); @@ -1353,9 +1394,9 @@ static int vfs_ceph_unlinkat(struct vfs_handle_struct *handle, } if (flags & AT_REMOVEDIR) { - result = ceph_rmdir(handle->data, full_fname->base_name); + result = ceph_rmdir(cmount_of(handle), full_fname->base_name); } else { - result = ceph_unlink(handle->data, full_fname->base_name); + result = ceph_unlink(cmount_of(handle), full_fname->base_name); } TALLOC_FREE(full_fname); DBG_DEBUG("[CEPH] unlink(...) = %d\n", result); @@ -1374,12 +1415,14 @@ static int vfs_ceph_fchmod(struct vfs_handle_struct *handle, /* * We can use an io_fd to change permissions. */ - result = ceph_fchmod(handle->data, fsp_get_io_fd(fsp), mode); + result = ceph_fchmod(cmount_of(handle), + fsp_get_io_fd(fsp), + mode); } else { /* * This is no longer a handle based call. */ - result = ceph_chmod(handle->data, + result = ceph_chmod(cmount_of(handle), fsp->fsp_name->base_name, mode); } @@ -1399,7 +1442,7 @@ static int vfs_ceph_fchown(struct vfs_handle_struct *handle, /* * We can use an io_fd to change ownership. */ - result = ceph_fchown(handle->data, + result = ceph_fchown(cmount_of(handle), fsp_get_io_fd(fsp), uid, gid); @@ -1407,7 +1450,7 @@ static int vfs_ceph_fchown(struct vfs_handle_struct *handle, /* * This is no longer a handle based call. */ - result = ceph_chown(handle->data, + result = ceph_chown(cmount_of(handle), fsp->fsp_name->base_name, uid, gid); @@ -1448,7 +1491,7 @@ static int vfs_ceph_chdir(struct vfs_handle_struct *handle, { int result = -1; DBG_DEBUG("[CEPH] chdir(%p, %s)\n", handle, smb_fname->base_name); - result = ceph_chdir(handle->data, smb_fname->base_name); + result = ceph_chdir(cmount_of(handle), smb_fname->base_name); DBG_DEBUG("[CEPH] chdir(...) = %d\n", result); return status_code(result); } @@ -1456,7 +1499,7 @@ static int vfs_ceph_chdir(struct vfs_handle_struct *handle, static struct smb_filename *vfs_ceph_getwd(struct vfs_handle_struct *handle, TALLOC_CTX *ctx) { - const char *cwd = ceph_getcwd(handle->data); + const char *cwd = ceph_getcwd(cmount_of(handle)); DBG_DEBUG("[CEPH] getwd(%p) = %s\n", handle, cwd); return synthetic_smb_fname(ctx, cwd, NULL, NULL, 0, 0); } @@ -1486,12 +1529,14 @@ static int strict_allocate_ftruncate(struct vfs_handle_struct *handle, /* Shrink - just ftruncate. */ if (pst->st_ex_size > len) { - result = ceph_ftruncate(handle->data, fsp_get_io_fd(fsp), len); + result = ceph_ftruncate(cmount_of(handle), + fsp_get_io_fd(fsp), + len); return status_code(result); } space_to_write = len - pst->st_ex_size; - result = ceph_fallocate(handle->data, + result = ceph_fallocate(cmount_of(handle), fsp_get_io_fd(fsp), 0, pst->st_ex_size, @@ -1511,7 +1556,7 @@ static int vfs_ceph_ftruncate(struct vfs_handle_struct *handle, return strict_allocate_ftruncate(handle, fsp, len); } - result = ceph_ftruncate(handle->data, fsp_get_io_fd(fsp), len); + result = ceph_ftruncate(cmount_of(handle), fsp_get_io_fd(fsp), len); return status_code(result); } @@ -1526,8 +1571,11 @@ static int vfs_ceph_fallocate(struct vfs_handle_struct *handle, DBG_DEBUG("[CEPH] fallocate(%p, %p, %u, %llu, %llu\n", handle, fsp, mode, llu(offset), llu(len)); /* unsupported mode flags are rejected by libcephfs */ - result = ceph_fallocate( - handle->data, fsp_get_io_fd(fsp), mode, offset, len); + result = ceph_fallocate(cmount_of(handle), + fsp_get_io_fd(fsp), + mode, + offset, + len); DBG_DEBUG("[CEPH] fallocate(...) = %d\n", result); return status_code(result); } @@ -1613,7 +1661,7 @@ static int vfs_ceph_symlinkat(struct vfs_handle_struct *handle, dirfd, new_smb_fname->base_name); - result = ceph_symlinkat(handle->data, + result = ceph_symlinkat(cmount_of(handle), link_target->base_name, dirfd, new_smb_fname->base_name); @@ -1633,9 +1681,9 @@ static int vfs_ceph_symlinkat(struct vfs_handle_struct *handle, link_target->base_name, full_fname->base_name); - result = ceph_symlink(handle->data, - link_target->base_name, - full_fname->base_name); + result = ceph_symlink(cmount_of(handle), + link_target->base_name, + full_fname->base_name); TALLOC_FREE(full_fname); DBG_DEBUG("[CEPH] symlink(...) = %d\n", result); return status_code(result); @@ -1659,7 +1707,7 @@ static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, buf, llu(bufsiz)); - result = ceph_readlinkat(handle->data, + result = ceph_readlinkat(cmount_of(handle), dirfd, smb_fname->base_name, buf, @@ -1680,7 +1728,7 @@ static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, DBG_DEBUG("[CEPH] readlink(%p, %s, %p, %llu)\n", handle, full_fname->base_name, buf, llu(bufsiz)); - result = ceph_readlink(handle->data, + result = ceph_readlink(cmount_of(handle), full_fname->base_name, buf, bufsiz); @@ -1719,9 +1767,9 @@ static int vfs_ceph_linkat(struct vfs_handle_struct *handle, full_fname_old->base_name, full_fname_new->base_name); - result = ceph_link(handle->data, - full_fname_old->base_name, - full_fname_new->base_name); + result = ceph_link(cmount_of(handle), + full_fname_old->base_name, + full_fname_new->base_name); DBG_DEBUG("[CEPH] link(...) = %d\n", result); TALLOC_FREE(full_fname_old); TALLOC_FREE(full_fname_new); @@ -1745,7 +1793,10 @@ static int vfs_ceph_mknodat(struct vfs_handle_struct *handle, } DBG_DEBUG("[CEPH] mknodat(%p, %s)\n", handle, full_fname->base_name); - result = ceph_mknod(handle->data, full_fname->base_name, mode, dev); + result = ceph_mknod(cmount_of(handle), + full_fname->base_name, + mode, + dev); DBG_DEBUG("[CEPH] mknodat(...) = %d\n", result); TALLOC_FREE(full_fname); @@ -1869,13 +1920,13 @@ static ssize_t vfs_ceph_fgetxattr(struct vfs_handle_struct *handle, value, llu(size)); if (!fsp->fsp_flags.is_pathref) { - ret = ceph_fgetxattr(handle->data, + ret = ceph_fgetxattr(cmount_of(handle), fsp_get_io_fd(fsp), name, value, size); } else { - ret = ceph_getxattr(handle->data, + ret = ceph_getxattr(cmount_of(handle), fsp->fsp_name->base_name, name, value, @@ -1897,18 +1948,18 @@ static ssize_t vfs_ceph_flistxattr(struct vfs_handle_struct *handle, /* * We can use an io_fd to list xattrs. */ - ret = ceph_flistxattr(handle->data, - fsp_get_io_fd(fsp), - list, - size); + ret = ceph_flistxattr(cmount_of(handle), + fsp_get_io_fd(fsp), + list, + size); } else { /* * This is no longer a handle based call. */ - ret = ceph_listxattr(handle->data, - fsp->fsp_name->base_name, - list, - size); + ret = ceph_listxattr(cmount_of(handle), + fsp->fsp_name->base_name, + list, + size); } DBG_DEBUG("[CEPH] flistxattr(...) = %d\n", ret); return lstatus_code(ret); @@ -1924,14 +1975,16 @@ static int vfs_ceph_fremovexattr(struct vfs_handle_struct *handle, /* * We can use an io_fd to remove xattrs. */ - ret = ceph_fremovexattr(handle->data, fsp_get_io_fd(fsp), name); + ret = ceph_fremovexattr(cmount_of(handle), + fsp_get_io_fd(fsp), + name); } else { /* * This is no longer a handle based call. */ - ret = ceph_removexattr(handle->data, - fsp->fsp_name->base_name, - name); + ret = ceph_removexattr(cmount_of(handle), + fsp->fsp_name->base_name, + name); } DBG_DEBUG("[CEPH] fremovexattr(...) = %d\n", ret); return status_code(ret); @@ -1956,22 +2009,22 @@ static int vfs_ceph_fsetxattr(struct vfs_handle_struct *handle, /* * We can use an io_fd to set xattrs. */ - ret = ceph_fsetxattr(handle->data, - fsp_get_io_fd(fsp), - name, - value, - size, - flags); + ret = ceph_fsetxattr(cmount_of(handle), + fsp_get_io_fd(fsp), + name, + value, + size, + flags); } else { /* * This is no longer a handle based call. */ - ret = ceph_setxattr(handle->data, - fsp->fsp_name->base_name, - name, - value, - size, - flags); + ret = ceph_setxattr(cmount_of(handle), + fsp->fsp_name->base_name, + name, + value, + size, + flags); } DBG_DEBUG("[CEPH] fsetxattr(...) = %d\n", ret); return status_code(ret); @@ -2004,9 +2057,9 @@ static NTSTATUS vfs_ceph_create_dfs_pathat(struct vfs_handle_struct *handle, goto out; } - ret = ceph_symlink(handle->data, - msdfs_link, - full_fname->base_name); + ret = ceph_symlink(cmount_of(handle), + msdfs_link, + full_fname->base_name); if (ret == 0) { status = NT_STATUS_OK; } else { @@ -2081,7 +2134,7 @@ static NTSTATUS vfs_ceph_read_dfs_pathat(struct vfs_handle_struct *handle, goto err; } - ret = ceph_statx(handle->data, + ret = ceph_statx(cmount_of(handle), full_fname->base_name, &stx, SAMBA_STATX_ATTR_MASK, @@ -2091,11 +2144,11 @@ static NTSTATUS vfs_ceph_read_dfs_pathat(struct vfs_handle_struct *handle, goto err; } - referral_len = ceph_readlink(handle->data, - full_fname->base_name, - link_target, - bufsize - 1); - if (referral_len < 0) { + referral_len = ceph_readlink(cmount_of(handle), + full_fname->base_name, + link_target, + bufsize - 1); + if (referral_len < 0) { /* ceph errors are -errno. */ if (-referral_len == EINVAL) { DBG_INFO("%s is not a link.\n", @@ -2109,8 +2162,8 @@ static NTSTATUS vfs_ceph_read_dfs_pathat(struct vfs_handle_struct *handle, strerror(errno)); } goto err; - } - link_target[referral_len] = '\0'; + } + link_target[referral_len] = '\0'; DBG_INFO("%s -> %s\n", full_fname->base_name, -- 2.46.0 From db5e64457eb532eff6157b4ac610a5195d938b90 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 18 Jun 2024 15:02:52 +0300 Subject: [PATCH 08/34] vfs_ceph_new: use low-level APIs for open/close Implement openat, close and closedir and hooks using libcephfs' low-level APIs. Cache the open Fh* from libcephfs and its related meta-data using VFS fsp-extension mechanism. Upon open-create of new vfs_ceph_fh store the caller credentials (ceph's UserPerm*) within the same context object for subsequent calls. In addition, provide a "pseudo" fd numbering which is reported back to VFS layer and used as debugging hints. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 491676846458980944b76d1693726627a9a32503) --- source3/modules/vfs_ceph_new.c | 340 ++++++++++++++++++++++++++++----- 1 file changed, 290 insertions(+), 50 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 424f52e9e32..346bf6fbbdf 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -95,6 +95,7 @@ static struct cephmount_cached { uint32_t count; struct ceph_mount_info *mount; struct cephmount_cached *next, *prev; + uint64_t fd_index; } *cephmount_cached; static int cephmount_cache_add(const char *cookie, @@ -384,8 +385,111 @@ static void smb_stat_from_ceph_statx(SMB_STRUCT_STAT *st, struct vfs_ceph_iref { struct Inode *inode; uint64_t ino; /* for debug printing */ + bool owner; /* indicate when actual owner of Inode ref */ }; +/* Ceph DIR pointer wrapper */ +struct vfs_ceph_dirp { + struct ceph_dir_result *cdr; +}; + +/* Ceph file-handles via fsp-extension */ +struct vfs_ceph_fh { + struct vfs_ceph_dirp dirp; /* keep first for up-casting */ + struct cephmount_cached *cme; + struct UserPerm *uperm; + struct files_struct *fsp; + struct vfs_ceph_iref iref; + struct Fh *fh; + int fd; +}; + +static int cephmount_next_fd(struct cephmount_cached *cme) +{ + /* + * Those file-descriptor numbers are reported back to VFS layer + * (debug-hints only). Using numbers within a large range of + * [1000, 1001000], thus the chances of (annoying but harmless) + * collision are low. + */ + uint64_t next; + + next = (cme->fd_index++ % 1000000) + 1000; + return (int)next; +} + +static int vfs_ceph_release_fh(struct vfs_ceph_fh *cfh) +{ + int ret = 0; + + if (cfh->fh != NULL) { + ret = ceph_ll_close(cfh->cme->mount, cfh->fh); + cfh->fh = NULL; + } + if (cfh->iref.inode != NULL) { + ceph_ll_put(cfh->cme->mount, cfh->iref.inode); + cfh->iref.inode = NULL; + } + if (cfh->uperm != NULL) { + vfs_ceph_userperm_del(cfh->uperm); + cfh->uperm = NULL; + } + cfh->fd = -1; + + return ret; +} + +static void vfs_ceph_fsp_ext_destroy_cb(void *p_data) +{ + vfs_ceph_release_fh((struct vfs_ceph_fh *)p_data); +} + +static int vfs_ceph_add_fh(struct vfs_handle_struct *handle, + files_struct *fsp, + struct vfs_ceph_fh **out_cfh) +{ + struct cephmount_cached *cme = handle->data; + struct UserPerm *uperm = NULL; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + + *out_cfh = VFS_ADD_FSP_EXTENSION(handle, + fsp, + struct vfs_ceph_fh, + vfs_ceph_fsp_ext_destroy_cb); + if (*out_cfh == NULL) { + vfs_ceph_userperm_del(uperm); + return -ENOMEM; + } + (*out_cfh)->cme = cme; + (*out_cfh)->uperm = uperm; + (*out_cfh)->fsp = fsp; + (*out_cfh)->fd = -1; + return 0; +} + +static void vfs_ceph_remove_fh(struct vfs_handle_struct *handle, + struct files_struct *fsp) +{ + VFS_REMOVE_FSP_EXTENSION(handle, fsp); +} + +static int vfs_ceph_fetch_fh(struct vfs_handle_struct *handle, + const struct files_struct *fsp, + struct vfs_ceph_fh **out_cfh) +{ + *out_cfh = VFS_FETCH_FSP_EXTENSION(handle, fsp); + return (*out_cfh == NULL) ? -EBADF : 0; +} + +static void vfs_ceph_assign_fh_fd(struct vfs_ceph_fh *cfh) +{ + cfh->fd = cephmount_next_fd(cfh->cme); /* debug only */ +} + /* Ceph low-level wrappers */ static int vfs_ceph_ll_lookup_inode(const struct vfs_handle_struct *handle, @@ -478,6 +582,98 @@ static int vfs_ceph_ll_chown(struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_ll_releasedir(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh) +{ + return ceph_ll_releasedir(cmount_of(handle), dircfh->dirp.cdr); +} + +static int vfs_ceph_ll_create(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *parent, + const char *name, + mode_t mode, + int oflags, + struct vfs_ceph_fh *cfh) +{ + struct ceph_statx stx = {.stx_ino = 0}; + struct Inode *inode = NULL; + struct Fh *fh = NULL; + int ret = -1; + + ret = ceph_ll_create(cmount_of(handle), + parent->inode, + name, + mode, + oflags, + &inode, + &fh, + &stx, + CEPH_STATX_INO, + 0, + cfh->uperm); + if (ret != 0) { + return ret; + } + + cfh->iref.inode = inode; + cfh->iref.ino = (long)stx.stx_ino; + cfh->iref.owner = true; + cfh->fh = fh; + vfs_ceph_assign_fh_fd(cfh); + + return 0; +} + +static int vfs_ceph_ll_lookup(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *parent, + const char *name, + struct vfs_ceph_iref *iref) +{ + struct ceph_statx stx = {.stx_ino = 0}; + struct Inode *inode = NULL; + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + ret = ceph_ll_lookup(cmount_of(handle), + parent->inode, + name, + &inode, + &stx, + CEPH_STATX_INO, + 0, + uperm); + + vfs_ceph_userperm_del(uperm); + if (ret != 0) { + return ret; + } + + iref->inode = inode; + iref->ino = stx.stx_ino; + iref->owner = true; + return 0; +} + +static int vfs_ceph_ll_open(const struct vfs_handle_struct *handle, + struct vfs_ceph_fh *cfh, + int flags) +{ + struct Inode *in = cfh->iref.inode; + struct Fh *fh = NULL; + int ret = -1; + + ret = ceph_ll_open(cmount_of(handle), in, flags, &fh, cfh->uperm); + if (ret == 0) { + cfh->fh = fh; + vfs_ceph_assign_fh_fd(cfh); + } + return ret; +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -511,6 +707,7 @@ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, } iref->inode = inode; iref->ino = ino; + iref->owner = true; DBG_DEBUG("[CEPH] get-inode: %s ino=%" PRIu64 "\n", name, iref->ino); return 0; } @@ -542,10 +739,39 @@ static int vfs_ceph_igetl(const struct vfs_handle_struct *handle, iref); } +static int vfs_ceph_igetd(struct vfs_handle_struct *handle, + const struct files_struct *dirfsp, + struct vfs_ceph_iref *iref) +{ + struct vfs_ceph_fh *dircfh = NULL; + int ret = -1; + + /* case-1: already have reference to open directory; re-ref */ + ret = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (ret == 0) { + iref->inode = dircfh->iref.inode; + iref->ino = dircfh->iref.ino; + iref->owner = false; + return 0; + } + + /* case-2: resolve by current work-dir */ + if (fsp_get_pathref_fd(dirfsp) == AT_FDCWD) { + return vfs_ceph_iget(handle, 0, ".", 0, iref); + } + + /* case-3: resolve by parent dir and name */ + return vfs_ceph_iget(handle, + dirfsp->file_id.inode, + dirfsp->fsp_name->base_name, + AT_SYMLINK_NOFOLLOW, + iref); +} + static void vfs_ceph_iput(const struct vfs_handle_struct *handle, struct vfs_ceph_iref *iref) { - if ((iref != NULL) && (iref->inode != NULL)) { + if ((iref != NULL) && (iref->inode != NULL) && iref->owner) { DBG_DEBUG("[CEPH] put-inode: ino=%" PRIu64 "\n", iref->ino); ceph_ll_put(cmount_of(handle), iref->inode); @@ -732,10 +958,12 @@ static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, static int vfs_ceph_closedir(struct vfs_handle_struct *handle, DIR *dirp) { int result; + struct vfs_ceph_fh *cfh = (struct vfs_ceph_fh *)dirp; DBG_DEBUG("[CEPH] closedir(%p, %p)\n", handle, dirp); - result = ceph_closedir(cmount_of(handle), - (struct ceph_dir_result *)dirp); + result = vfs_ceph_ll_releasedir(handle, cfh); + vfs_ceph_release_fh(cfh); + vfs_ceph_remove_fh(handle, cfh->fsp); DBG_DEBUG("[CEPH] closedir(...) = %d\n", result); return status_code(result); } @@ -748,15 +976,11 @@ static int vfs_ceph_openat(struct vfs_handle_struct *handle, files_struct *fsp, const struct vfs_open_how *how) { + struct vfs_ceph_iref diref = {0}; + struct vfs_ceph_fh *cfh = NULL; int flags = how->flags; mode_t mode = how->mode; - struct smb_filename *name = NULL; - bool have_opath = false; - bool became_root = false; int result = -ENOENT; -#ifdef HAVE_CEPH_OPENAT - int dirfd = -1; -#endif if (how->resolve != 0) { errno = ENOSYS; @@ -764,62 +988,70 @@ static int vfs_ceph_openat(struct vfs_handle_struct *handle, } if (smb_fname->stream_name) { - goto out; + errno = ENOENT; + return -1; } #ifdef O_PATH - have_opath = true; if (fsp->fsp_flags.is_pathref) { flags |= O_PATH; } #endif -#ifdef HAVE_CEPH_OPENAT - dirfd = fsp_get_pathref_fd(dirfsp); - - DBG_DEBUG("[CEPH] openat(%p, %d, %p, %d, %d)\n", - handle, dirfd, fsp, flags, mode); + DBG_DEBUG("[CEPH] openat(%p, %p, %d, %d)\n", handle, fsp, flags, mode); - if (fsp->fsp_flags.is_pathref && !have_opath) { - become_root(); - became_root = true; - } - - result = ceph_openat(cmount_of(handle), - dirfd, - smb_fname->base_name, - flags, - mode); - -#else - if (fsp_get_pathref_fd(dirfsp) != AT_FDCWD) { - name = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - smb_fname); - if (name == NULL) { - return -1; - } - smb_fname = name; + result = vfs_ceph_igetd(handle, dirfsp, &diref); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] openat(%p, %s, %p, %d, %d)\n", handle, - smb_fname_str_dbg(smb_fname), fsp, flags, mode); - - if (fsp->fsp_flags.is_pathref && !have_opath) { - become_root(); - became_root = true; + result = vfs_ceph_add_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; } - result = ceph_open(cmount_of(handle), - smb_fname->base_name, - flags, - mode); + if (flags & O_CREAT) { + result = vfs_ceph_ll_create(handle, + &diref, + smb_fname->base_name, + mode, + flags, + cfh); + if (result != 0) { + vfs_ceph_remove_fh(handle, fsp); + goto out; + } + } else { + result = vfs_ceph_ll_lookup(handle, + &diref, + smb_fname->base_name, + &cfh->iref); + if (result != 0) { + vfs_ceph_remove_fh(handle, fsp); + goto out; + } +#ifdef O_PATH + if (flags & O_PATH) { + /* + * Special case: open with O_PATH: we already have + * Cephfs' Inode* from the above lookup so there is no + * need to go via expensive ceph_ll_open for Fh*. + */ + vfs_ceph_assign_fh_fd(cfh); + result = cfh->fd; + goto out; + } #endif - if (became_root) { - unbecome_root(); + result = vfs_ceph_ll_open(handle, cfh, flags); + if (result != 0) { + vfs_ceph_remove_fh(handle, fsp); + goto out; + } } + + result = cfh->fd; out: - TALLOC_FREE(name); + vfs_ceph_iput(handle, &diref); fsp->fsp_flags.have_proc_fds = false; DBG_DEBUG("[CEPH] open(...) = %d\n", result); return status_code(result); @@ -828,9 +1060,17 @@ out: static int vfs_ceph_close(struct vfs_handle_struct *handle, files_struct *fsp) { int result; + struct vfs_ceph_fh *cfh = NULL; DBG_DEBUG("[CEPH] close(%p, %p)\n", handle, fsp); - result = ceph_close(cmount_of(handle), fsp_get_pathref_fd(fsp)); + result = vfs_ceph_fetch_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; + } + + result = vfs_ceph_release_fh(cfh); + vfs_ceph_remove_fh(handle, fsp); +out: DBG_DEBUG("[CEPH] close(...) = %d\n", result); return status_code(result); } -- 2.46.0 From 5e05cd1f288c02f9a1cbb7c21b7ba9c587fd65fe Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 24 Jun 2024 11:39:43 +0300 Subject: [PATCH 09/34] vfs_ceph_new: use low-level APIs for fstat Use libcephfs' low-level APIs and apply the same logic as stat, but via explicit inode-reference. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit f16183f90abba3c2d3d26262926f1454275a9d3f) --- source3/modules/vfs_ceph_new.c | 50 ++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 346bf6fbbdf..f7482a9f0e2 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -535,18 +535,14 @@ static int vfs_ceph_ll_statfs(const struct vfs_handle_struct *handle, return ceph_ll_statfs(cmount_of(handle), iref->inode, stbuf); } -static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, - const struct vfs_ceph_iref *iref, - SMB_STRUCT_STAT *st) +static int vfs_ceph_ll_getattr2(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + struct UserPerm *uperm, + SMB_STRUCT_STAT *st) { struct ceph_statx stx = {0}; - struct UserPerm *uperm = NULL; int ret = -1; - uperm = vfs_ceph_userperm_new(handle); - if (uperm == NULL) { - return -ENOMEM; - } ret = ceph_ll_getattr(cmount_of(handle), iref->inode, &stx, @@ -556,6 +552,21 @@ static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, if (ret == 0) { smb_stat_from_ceph_statx(st, &stx); } + return ret; +} + +static int vfs_ceph_ll_getattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + SMB_STRUCT_STAT *st) +{ + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + ret = vfs_ceph_ll_getattr2(handle, iref, uperm, st); vfs_ceph_userperm_del(uperm); return ret; } @@ -1440,20 +1451,23 @@ static int vfs_ceph_fstat(struct vfs_handle_struct *handle, SMB_STRUCT_STAT *sbuf) { int result = -1; - struct ceph_statx stx = { 0 }; - int fd = fsp_get_pathref_fd(fsp); + struct vfs_ceph_fh *cfh = NULL; - DBG_DEBUG("[CEPH] fstat(%p, %d)\n", handle, fd); - result = ceph_fstatx(handle->data, fd, &stx, - SAMBA_STATX_ATTR_MASK, 0); - DBG_DEBUG("[CEPH] fstat(...) = %d\n", result); - if (result < 0) { - return status_code(result); + DBG_DEBUG("[CEPH] fstat(%p)\n", handle); + + result = vfs_ceph_fetch_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; } - init_stat_ex_from_ceph_statx(sbuf, &stx); + result = vfs_ceph_ll_getattr2(handle, &cfh->iref, cfh->uperm, sbuf); + if (result != 0) { + goto out; + } DBG_DEBUG("[CEPH] mode = 0x%x\n", sbuf->st_ex_mode); - return result; +out: + DBG_DEBUG("[CEPH] fstat(...) = %d\n", result); + return status_code(result); } static int vfs_ceph_fstatat(struct vfs_handle_struct *handle, -- 2.46.0 From c13e01ed0aa39364b08ec5b0448dd5277721f797 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 19 Jun 2024 12:35:11 +0300 Subject: [PATCH 10/34] vfs_ceph_new: use low-level APIs for fstatat Use libcephfs' low-level APIs to do lookup-by-name via parent's open reference followed by getattr on the inode itself. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 13671cefffb268d84c973583669681318a2ce3bb) --- source3/modules/vfs_ceph_new.c | 83 +++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index f7482a9f0e2..8d3fad5ff29 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -669,6 +669,32 @@ static int vfs_ceph_ll_lookup(const struct vfs_handle_struct *handle, return 0; } +static int vfs_ceph_ll_lookupat(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *parent_fh, + const char *name, + struct vfs_ceph_iref *iref) +{ + struct ceph_statx stx = {.stx_ino = 0}; + struct Inode *inode = NULL; + int ret = -1; + + ret = ceph_ll_lookup(cmount_of(handle), + parent_fh->iref.inode, + name, + &inode, + &stx, + CEPH_STATX_INO, + 0, + parent_fh->uperm); + if (ret != 0) { + return ret; + } + iref->inode = inode; + iref->ino = stx.stx_ino; + iref->owner = true; + return 0; +} + static int vfs_ceph_ll_open(const struct vfs_handle_struct *handle, struct vfs_ceph_fh *cfh, int flags) @@ -1477,49 +1503,32 @@ static int vfs_ceph_fstatat(struct vfs_handle_struct *handle, int flags) { int result = -1; - struct ceph_statx stx = { 0 }; -#ifdef HAVE_CEPH_STATXAT - int dirfd = fsp_get_pathref_fd(dirfsp); + struct vfs_ceph_iref iref = {0}; + struct vfs_ceph_fh *dircfh = NULL; - DBG_DEBUG("[CEPH] fstatat(%p, %d, %s)\n", - handle, dirfd, smb_fname->base_name); - result = ceph_statxat(cmount_of(handle), - dirfd, - smb_fname->base_name, - &stx, - SAMBA_STATX_ATTR_MASK, - 0); -#else - struct smb_filename *full_fname = NULL; + DBG_DEBUG("[CEPH] fstatat(%p, %s)\n", handle, smb_fname->base_name); - full_fname = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - smb_fname); - if (full_fname == NULL) { - errno = ENOMEM; - return -1; + result = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] fstatat(%p, %s)\n", - handle, smb_fname_str_dbg(full_fname)); - result = ceph_statx(cmount_of(handle), - full_fname->base_name, - &stx, - SAMBA_STATX_ATTR_MASK, - 0); - - TALLOC_FREE(full_fname); -#endif - - DBG_DEBUG("[CEPH] fstatat(...) = %d\n", result); - if (result < 0) { - return status_code(result); + result = vfs_ceph_ll_lookupat(handle, + dircfh, + smb_fname->base_name, + &iref); + if (result != 0) { + goto out; } - init_stat_ex_from_ceph_statx(sbuf, &stx); - DBG_DEBUG("[CEPH] mode = 0x%x\n", sbuf->st_ex_mode); - - return 0; + result = vfs_ceph_ll_getattr2(handle, &iref, dircfh->uperm, sbuf); + if (result != 0) { + goto out; + } +out: + vfs_ceph_iput(handle, &iref); + DBG_DEBUG("[CEPH] fstatat(...) = %d\n", result); + return status_code(result); } static int vfs_ceph_lstat(struct vfs_handle_struct *handle, -- 2.46.0 From 9b525ca83ab5668b253cd596f1b2914039f71f3f Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 18 Jun 2024 17:20:59 +0300 Subject: [PATCH 11/34] vfs_ceph_new: use low-level APIs for fdopendir Implement fdopendir using libcephfs low-level API and cached (via fsp) open file-handle. Embed the result within cached vfs_ceph_fh so it may be used properly by closedir. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit a8a7339c6b7a6866399fd6c409228267a585740f) --- source3/modules/vfs_ceph_new.c | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 8d3fad5ff29..30b9ef2248d 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -711,6 +711,15 @@ static int vfs_ceph_ll_open(const struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_ll_opendir(const struct vfs_handle_struct *handle, + struct vfs_ceph_fh *cfh) +{ + return ceph_ll_opendir(cmount_of(handle), + cfh->iref.inode, + &cfh->dirp.cdr, + cfh->uperm); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -908,24 +917,25 @@ static DIR *vfs_ceph_fdopendir(struct vfs_handle_struct *handle, uint32_t attributes) { int ret = 0; - struct ceph_dir_result *result = NULL; + void *result = NULL; + struct vfs_ceph_fh *cfh = NULL; -#ifdef HAVE_CEPH_FDOPENDIR - int dirfd = fsp_get_io_fd(fsp); - DBG_DEBUG("[CEPH] fdopendir(%p, %d)\n", handle, dirfd); - ret = ceph_fdopendir(cmount_of(handle), dirfd, &result); -#else DBG_DEBUG("[CEPH] fdopendir(%p, %p)\n", handle, fsp); - ret = ceph_opendir(cmount_of(handle), - fsp->fsp_name->base_name, - &result); -#endif - if (ret < 0) { - result = NULL; - errno = -ret; /* We return result which is NULL in this case */ + ret = vfs_ceph_fetch_fh(handle, fsp, &cfh); + if (ret != 0) { + goto out; } + ret = vfs_ceph_ll_opendir(handle, cfh); + if (ret != 0) { + goto out; + } + result = &cfh->dirp; +out: DBG_DEBUG("[CEPH] fdopendir(...) = %d\n", ret); + if (ret != 0) { + errno = -ret; + } return (DIR *)result; } -- 2.46.0 From 567b1a59c5d13b1a23d02085d6c99080e5b32eee Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 19 Jun 2024 11:55:27 +0300 Subject: [PATCH 12/34] vfs_ceph_new: use low-level APIs for mkdirat Implement 'mkdirat' hook using libcephfs' low-level APIs, via the open file-handle reference to parent directory. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit bd955af86e71fa6c87648e578890ea6f4d490d4b) --- source3/modules/vfs_ceph_new.c | 70 +++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 30b9ef2248d..6ca80893afb 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -720,6 +720,34 @@ static int vfs_ceph_ll_opendir(const struct vfs_handle_struct *handle, cfh->uperm); } +static int vfs_ceph_ll_mkdirat(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh, + const char *name, + mode_t mode, + struct vfs_ceph_iref *iref) +{ + struct ceph_statx stx = {.stx_ino = 0}; + struct Inode *inode = NULL; + int ret = -1; + + ret = ceph_ll_mkdir(cmount_of(handle), + dircfh->iref.inode, + name, + mode, + &inode, + &stx, + CEPH_STATX_INO, + 0, + dircfh->uperm); + if (ret != 0) { + return ret; + } + iref->inode = inode; + iref->ino = stx.stx_ino; + iref->owner = true; + return false; +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -965,41 +993,21 @@ static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, mode_t mode) { int result = -1; -#ifdef HAVE_CEPH_MKDIRAT - int dirfd = fsp_get_pathref_fd(dirfsp); - - DBG_DEBUG("[CEPH] mkdirat(%p, %d, %s)\n", - handle, - dirfd, - smb_fname->base_name); - - result = ceph_mkdirat(cmount_of(handle), - dirfd, - smb_fname->base_name, - mode); - - DBG_DEBUG("[CEPH] mkdirat(...) = %d\n", result); - - return status_code(result); -#else - struct smb_filename *full_fname = NULL; + const char *name = smb_fname->base_name; + struct vfs_ceph_fh *dircfh = NULL; + struct vfs_ceph_iref iref = {0}; - full_fname = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - smb_fname); - if (full_fname == NULL) { - return -1; + DBG_DEBUG("[CEPH] mkdirat(%p, %s)\n", handle, name); + result = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] mkdir(%p, %s)\n", - handle, smb_fname_str_dbg(full_fname)); - - result = ceph_mkdir(cmount_of(handle), full_fname->base_name, mode); - - TALLOC_FREE(full_fname); - + result = vfs_ceph_ll_mkdirat(handle, dircfh, name, mode, &iref); + vfs_ceph_iput(handle, &iref); +out: + DBG_DEBUG("[CEPH] mkdirat(...) = %d\n", result); return status_code(result); -#endif } static int vfs_ceph_closedir(struct vfs_handle_struct *handle, DIR *dirp) -- 2.46.0 From 74686419317a48ed17526a24b206f2f0777dfc6f Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 24 Jun 2024 13:33:05 +0300 Subject: [PATCH 13/34] vfs_ceph_new: use low-level APIs for readdir ops Implement readdir and rewinddir operations using libcephfs' low-level APIs. Casts the opaque DIR pointer into struct vfs_ceph_dirp (the first member of struct vfs_ceph_fh) to resolve the ceph_dir_result pointer which libcephfs expects for readdir operations. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 99c7179e5da6d201f03b1a04dbe2a6722090783d) --- source3/modules/vfs_ceph_new.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 6ca80893afb..9259d597ffb 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -720,6 +720,18 @@ static int vfs_ceph_ll_opendir(const struct vfs_handle_struct *handle, cfh->uperm); } +static struct dirent *vfs_ceph_ll_readdir(const struct vfs_handle_struct *hndl, + const struct vfs_ceph_fh *dircfh) +{ + return ceph_readdir(cmount_of(hndl), dircfh->dirp.cdr); +} + +static void vfs_ceph_ll_rewinddir(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh) +{ + ceph_rewinddir(cmount_of(handle), dircfh->dirp.cdr); +} + static int vfs_ceph_ll_mkdirat(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh, const char *name, @@ -971,11 +983,11 @@ static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, struct files_struct *dirfsp, DIR *dirp) { + const struct vfs_ceph_fh *dircfh = (const struct vfs_ceph_fh *)dirp; struct dirent *result = NULL; DBG_DEBUG("[CEPH] readdir(%p, %p)\n", handle, dirp); - result = ceph_readdir(cmount_of(handle), - (struct ceph_dir_result *)dirp); + result = vfs_ceph_ll_readdir(handle, dircfh); DBG_DEBUG("[CEPH] readdir(...) = %p\n", result); return result; @@ -983,8 +995,10 @@ static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, static void vfs_ceph_rewinddir(struct vfs_handle_struct *handle, DIR *dirp) { + const struct vfs_ceph_fh *dircfh = (const struct vfs_ceph_fh *)dirp; + DBG_DEBUG("[CEPH] rewinddir(%p, %p)\n", handle, dirp); - ceph_rewinddir(cmount_of(handle), (struct ceph_dir_result *)dirp); + vfs_ceph_ll_rewinddir(handle, dircfh); } static int vfs_ceph_mkdirat(struct vfs_handle_struct *handle, -- 2.46.0 From 19b41bbdadae001ae24fcc102f11708b2be23252 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 17 Jul 2024 11:41:13 +0300 Subject: [PATCH 14/34] vfs_ceph_new: proper error handling to readdir Error handling in the case of 'ceph_readdir' is done by setting 'errno' deep within libcephfs code. In case of error, emit proper debug message and re-update errno to avoid possible over-write by logging mechanism. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 24a3423949e127177c019a0d126c6f7523e61984) --- source3/modules/vfs_ceph_new.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 9259d597ffb..e106a9bb4cb 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -985,11 +985,19 @@ static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, { const struct vfs_ceph_fh *dircfh = (const struct vfs_ceph_fh *)dirp; struct dirent *result = NULL; + int errval = 0; DBG_DEBUG("[CEPH] readdir(%p, %p)\n", handle, dirp); + errno = 0; result = vfs_ceph_ll_readdir(handle, dircfh); - DBG_DEBUG("[CEPH] readdir(...) = %p\n", result); - + errval = errno; + if ((result == NULL) && (errval != 0)) { + DBG_DEBUG("[CEPH] readdir(...) = %d\n", errval); + } else { + DBG_DEBUG("[CEPH] readdir(...) = %p\n", result); + } + /* re-assign errno to avoid possible over-write by DBG_DEBUG */ + errno = errval; return result; } -- 2.46.0 From 028bbcaa8e2add72d4bd2be1570d262845ba8e33 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 19 Jun 2024 12:48:14 +0300 Subject: [PATCH 15/34] vfs_ceph_new: use low-level APIs for fchown/fchmod Use libcephfs' low-level APIs to implement 'fchown' and 'fchmod' using open file-handle. If fsp does not have an open cephfs Fh reference, set errno to EBADF and return -1 to VFS. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit cb14d3630d8c110405c2a43bef15aa31ec4a0fba) --- source3/modules/vfs_ceph_new.c | 79 +++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index e106a9bb4cb..20cd95ef16c 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -485,6 +485,14 @@ static int vfs_ceph_fetch_fh(struct vfs_handle_struct *handle, return (*out_cfh == NULL) ? -EBADF : 0; } +static int vfs_ceph_fetch_io_fh(struct vfs_handle_struct *handle, + const struct files_struct *fsp, + struct vfs_ceph_fh **out_cfh) +{ + *out_cfh = VFS_FETCH_FSP_EXTENSION(handle, fsp); + return (*out_cfh == NULL) || ((*out_cfh)->fh == NULL) ? -EBADF : 0; +} + static void vfs_ceph_assign_fh_fd(struct vfs_ceph_fh *cfh) { cfh->fd = cephmount_next_fd(cfh->cme); /* debug only */ @@ -593,6 +601,33 @@ static int vfs_ceph_ll_chown(struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_ll_fchown(struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + uid_t uid, + gid_t gid) +{ + struct ceph_statx stx = {.stx_uid = uid, .stx_gid = gid}; + + return ceph_ll_setattr(cmount_of(handle), + cfh->iref.inode, + &stx, + CEPH_STATX_UID | CEPH_STATX_GID, + cfh->uperm); +} + +static int vfs_ceph_ll_fchmod(struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + mode_t mode) +{ + struct ceph_statx stx = {.stx_mode = mode}; + + return ceph_ll_setattr(cmount_of(handle), + cfh->iref.inode, + &stx, + CEPH_STATX_MODE, + cfh->uperm); +} + static int vfs_ceph_ll_releasedir(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh) { @@ -1712,23 +1747,16 @@ static int vfs_ceph_fchmod(struct vfs_handle_struct *handle, mode_t mode) { int result; + struct vfs_ceph_fh *cfh = NULL; DBG_DEBUG("[CEPH] fchmod(%p, %p, %d)\n", handle, fsp, mode); - if (!fsp->fsp_flags.is_pathref) { - /* - * We can use an io_fd to change permissions. - */ - result = ceph_fchmod(cmount_of(handle), - fsp_get_io_fd(fsp), - mode); - } else { - /* - * This is no longer a handle based call. - */ - result = ceph_chmod(cmount_of(handle), - fsp->fsp_name->base_name, - mode); + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; } + + result = vfs_ceph_ll_fchmod(handle, cfh, mode); +out: DBG_DEBUG("[CEPH] fchmod(...) = %d\n", result); return status_code(result); } @@ -1739,26 +1767,15 @@ static int vfs_ceph_fchown(struct vfs_handle_struct *handle, gid_t gid) { int result; + struct vfs_ceph_fh *cfh = NULL; DBG_DEBUG("[CEPH] fchown(%p, %p, %d, %d)\n", handle, fsp, uid, gid); - if (!fsp->fsp_flags.is_pathref) { - /* - * We can use an io_fd to change ownership. - */ - result = ceph_fchown(cmount_of(handle), - fsp_get_io_fd(fsp), - uid, - gid); - } else { - /* - * This is no longer a handle based call. - */ - result = ceph_chown(cmount_of(handle), - fsp->fsp_name->base_name, - uid, - gid); + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; } - + result = vfs_ceph_ll_fchown(handle, cfh, uid, gid); +out: DBG_DEBUG("[CEPH] fchown(...) = %d\n", result); return status_code(result); } -- 2.46.0 From e6f2cbf73a5f658b30d64464b67dde86467fbfe5 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 19 Jun 2024 13:59:53 +0300 Subject: [PATCH 16/34] vfs_ceph_new: use low-level APIs for fntimes Implement fntimes hook using libcephfs' low-level APIs. Convert smb_file_time to ceph_statx plus proper field mask on-the-fly upon issuing low-level call to libcephfs. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 20b7d2bfe06beefb5e7f091eb317ad18cb53f8a9) --- source3/modules/vfs_ceph_new.c | 78 +++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 20cd95ef16c..41f930e131c 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -628,6 +628,39 @@ static int vfs_ceph_ll_fchmod(struct vfs_handle_struct *handle, cfh->uperm); } +static int vfs_ceph_ll_futimes(struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + const struct smb_file_time *ft) +{ + struct ceph_statx stx = {0}; + int mask = 0; + + if (!is_omit_timespec(&ft->atime)) { + stx.stx_atime = ft->atime; + mask |= CEPH_SETATTR_ATIME; + } + if (!is_omit_timespec(&ft->mtime)) { + stx.stx_mtime = ft->mtime; + mask |= CEPH_SETATTR_MTIME; + } + if (!is_omit_timespec(&ft->ctime)) { + stx.stx_ctime = ft->ctime; + mask |= CEPH_SETATTR_CTIME; + } + if (!is_omit_timespec(&ft->create_time)) { + stx.stx_btime = ft->create_time; + mask |= CEPH_SETATTR_BTIME; + } + if (!mask) { + return 0; + } + return ceph_ll_setattr(cmount_of(handle), + cfh->iref.inode, + &stx, + mask, + cfh->uperm); +} + static int vfs_ceph_ll_releasedir(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh) { @@ -1640,51 +1673,28 @@ static int vfs_ceph_fntimes(struct vfs_handle_struct *handle, files_struct *fsp, struct smb_file_time *ft) { - struct ceph_statx stx = { 0 }; + struct vfs_ceph_fh *cfh = NULL; int result; - int mask = 0; - if (!is_omit_timespec(&ft->atime)) { - stx.stx_atime = ft->atime; - mask |= CEPH_SETATTR_ATIME; - } - if (!is_omit_timespec(&ft->mtime)) { - stx.stx_mtime = ft->mtime; - mask |= CEPH_SETATTR_MTIME; - } - if (!is_omit_timespec(&ft->create_time)) { - stx.stx_btime = ft->create_time; - mask |= CEPH_SETATTR_BTIME; + result = vfs_ceph_fetch_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; } - if (!mask) { - return 0; + result = vfs_ceph_ll_futimes(handle, cfh, ft); + if (result != 0) { + goto out; } - if (!fsp->fsp_flags.is_pathref) { - /* - * We can use an io_fd to set xattrs. - */ - result = ceph_fsetattrx(cmount_of(handle), - fsp_get_io_fd(fsp), - &stx, - mask); - } else { - /* - * This is no longer a handle based call. - */ - result = ceph_setattrx(cmount_of(handle), - fsp->fsp_name->base_name, - &stx, - mask, - 0); + if (!is_omit_timespec(&ft->create_time)) { + set_create_timespec_ea(fsp, ft->create_time); } DBG_DEBUG("[CEPH] ntimes(%p, %s, {%ld, %ld, %ld, %ld}) = %d\n", handle, fsp_str_dbg(fsp), ft->mtime.tv_sec, ft->atime.tv_sec, ft->ctime.tv_sec, ft->create_time.tv_sec, result); - - return result; +out: + return status_code(result); } static int vfs_ceph_unlinkat(struct vfs_handle_struct *handle, -- 2.46.0 From ecbca07e41dd58fef19a904a80d89615d17c5fb2 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 19 Jun 2024 14:27:24 +0300 Subject: [PATCH 17/34] vfs_ceph_new: use low-level APIs for unlinkat Implement unlinkat using libcephfs low-level APIs. Operate using parent directory's open file-handle. When flags has AT_REMOVEDIR bit set call low-level rmdir; otherwise, do normal unlink. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 362a7cf8664270145bff815347e447797cc1a643) --- source3/modules/vfs_ceph_new.c | 68 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 41f930e131c..e5bd1dbd3a7 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -828,6 +828,26 @@ static int vfs_ceph_ll_mkdirat(const struct vfs_handle_struct *handle, return false; } +static int vfs_ceph_ll_rmdir(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh, + const char *name) +{ + return ceph_ll_rmdir(cmount_of(handle), + dircfh->iref.inode, + name, + dircfh->uperm); +} + +static int vfs_ceph_ll_unlinkat(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh, + const char *name) +{ + return ceph_ll_unlink(cmount_of(handle), + dircfh->iref.inode, + name, + dircfh->uperm); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1698,17 +1718,16 @@ out: } static int vfs_ceph_unlinkat(struct vfs_handle_struct *handle, - struct files_struct *dirfsp, - const struct smb_filename *smb_fname, - int flags) + struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + int flags) { + struct vfs_ceph_fh *dircfh = NULL; + const char *name = smb_fname->base_name; int result = -1; -#ifdef HAVE_CEPH_UNLINKAT - int dirfd = fsp_get_pathref_fd(dirfsp); - DBG_DEBUG("[CEPH] unlinkat(%p, %d, %s)\n", + DBG_DEBUG("[CEPH] unlinkat(%p, %s)\n", handle, - dirfd, smb_fname_str_dbg(smb_fname)); if (smb_fname->stream_name) { @@ -1716,40 +1735,19 @@ static int vfs_ceph_unlinkat(struct vfs_handle_struct *handle, return result; } - result = ceph_unlinkat(cmount_of(handle), - dirfd, - smb_fname->base_name, - flags); - DBG_DEBUG("[CEPH] unlinkat(...) = %d\n", result); - return status_code(result); -#else - struct smb_filename *full_fname = NULL; - - DBG_DEBUG("[CEPH] unlink(%p, %s)\n", - handle, - smb_fname_str_dbg(smb_fname)); - - if (smb_fname->stream_name) { - errno = ENOENT; - return result; - } - - full_fname = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - smb_fname); - if (full_fname == NULL) { - return -1; + result = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (result != 0) { + goto out; } if (flags & AT_REMOVEDIR) { - result = ceph_rmdir(cmount_of(handle), full_fname->base_name); + result = vfs_ceph_ll_rmdir(handle, dircfh, name); } else { - result = ceph_unlink(cmount_of(handle), full_fname->base_name); + result = vfs_ceph_ll_unlinkat(handle, dircfh, name); } - TALLOC_FREE(full_fname); - DBG_DEBUG("[CEPH] unlink(...) = %d\n", result); +out: + DBG_DEBUG("[CEPH] unlinkat(...) = %d\n", result); return status_code(result); -#endif } static int vfs_ceph_fchmod(struct vfs_handle_struct *handle, -- 2.46.0 From db7cb2614fff7efebf5e0d2c8d832d78e7e8ffb0 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 26 Jun 2024 13:46:54 +0300 Subject: [PATCH 18/34] vfs_ceph_new: use low-level APIs for symlink/readlink Implement unlinkat using libcephfs low-level APIs. For readlink operation need to resolve child inode by-lookup and then used the inode reference for the actual low-level readlink. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 53c9269b219a54236500d22d8a4c7f2ed582faaf) --- source3/modules/vfs_ceph_new.c | 132 +++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 57 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index e5bd1dbd3a7..ed0eea20744 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -848,6 +848,47 @@ static int vfs_ceph_ll_unlinkat(const struct vfs_handle_struct *handle, dircfh->uperm); } +static int vfs_ceph_ll_symlinkat(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh, + const char *name, + const char *value, + struct vfs_ceph_iref *out_iref) +{ + struct ceph_statx stx = {.stx_ino = 0}; + struct Inode *inode = NULL; + int ret = -1; + + ret = ceph_ll_symlink(cmount_of(handle), + dircfh->iref.inode, + name, + value, + &inode, + &stx, + CEPH_STATX_INO, + 0, + dircfh->uperm); + if (ret != 0) { + return ret; + } + out_iref->inode = inode; + out_iref->ino = stx.stx_ino; + out_iref->owner = true; + return 0; +} + +static int vfs_ceph_ll_readlinkat(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh, + const struct vfs_ceph_iref *iref, + char *buf, + size_t bsz) +{ + return ceph_ll_readlink(cmount_of(handle), + iref->inode, + buf, + bsz, + dircfh->uperm); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1979,43 +2020,32 @@ static int vfs_ceph_symlinkat(struct vfs_handle_struct *handle, struct files_struct *dirfsp, const struct smb_filename *new_smb_fname) { + struct vfs_ceph_iref iref = {0}; + struct vfs_ceph_fh *dircfh = NULL; int result = -1; -#ifdef HAVE_CEPH_SYMLINKAT - int dirfd = fsp_get_pathref_fd(dirfsp); - DBG_DEBUG("[CEPH] symlinkat(%p, %s, %d, %s)\n", + DBG_DEBUG("[CEPH] symlinkat(%p, %s, %s)\n", handle, link_target->base_name, - dirfd, new_smb_fname->base_name); - result = ceph_symlinkat(cmount_of(handle), - link_target->base_name, - dirfd, - new_smb_fname->base_name); - DBG_DEBUG("[CEPH] symlinkat(...) = %d\n", result); - return status_code(result); -#else - struct smb_filename *full_fname = NULL; - - full_fname = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - new_smb_fname); - if (full_fname == NULL) { - return -1; + result = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] symlink(%p, %s, %s)\n", handle, - link_target->base_name, - full_fname->base_name); - - result = ceph_symlink(cmount_of(handle), - link_target->base_name, - full_fname->base_name); - TALLOC_FREE(full_fname); - DBG_DEBUG("[CEPH] symlink(...) = %d\n", result); + result = vfs_ceph_ll_symlinkat(handle, + dircfh, + new_smb_fname->base_name, + link_target->base_name, + &iref); + if (result != 0) { + goto out; + } + vfs_ceph_iput(handle, &iref); +out: + DBG_DEBUG("[CEPH] symlinkat(...) = %d\n", result); return status_code(result); -#endif } static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, @@ -2025,45 +2055,33 @@ static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, size_t bufsiz) { int result = -1; -#ifdef HAVE_CEPH_READLINKAT - int dirfd = fsp_get_pathref_fd(dirfsp); + struct vfs_ceph_iref iref = {0}; + struct vfs_ceph_fh *dircfh = NULL; - DBG_DEBUG("[CEPH] readlinkat(%p, %d, %s, %p, %llu)\n", + DBG_DEBUG("[CEPH] readlinkat(%p, %s, %p, %llu)\n", handle, - dirfd, smb_fname->base_name, buf, llu(bufsiz)); - result = ceph_readlinkat(cmount_of(handle), - dirfd, - smb_fname->base_name, - buf, - bufsiz); - - DBG_DEBUG("[CEPH] readlinkat(...) = %d\n", result); - return status_code(result); -#else - struct smb_filename *full_fname = NULL; - - full_fname = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - smb_fname); - if (full_fname == NULL) { - return -1; + result = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] readlink(%p, %s, %p, %llu)\n", handle, - full_fname->base_name, buf, llu(bufsiz)); + result = vfs_ceph_ll_lookupat(handle, + dircfh, + smb_fname->base_name, + &iref); + if (result != 0) { + goto out; + } - result = ceph_readlink(cmount_of(handle), - full_fname->base_name, - buf, - bufsiz); - TALLOC_FREE(full_fname); - DBG_DEBUG("[CEPH] readlink(...) = %d\n", result); + result = vfs_ceph_ll_readlinkat(handle, dircfh, &iref, buf, bufsiz); + vfs_ceph_iput(handle, &iref); +out: + DBG_DEBUG("[CEPH] readlinkat(...) = %d\n", result); return status_code(result); -#endif } static int vfs_ceph_linkat(struct vfs_handle_struct *handle, -- 2.46.0 From 35496f01820d65f7fd99847f2bd5d78315ccd985 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 19 Jun 2024 17:52:45 +0300 Subject: [PATCH 19/34] vfs_ceph_new: use low-level APIs for read/write Implement read/write IO operations using libcephfs' low-level APIs. Requires open ceph Fh* associated with fsp (extension) to complete both pread/pwrite as well as async I/O operations. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 29bbe0f52d4ffae9dbb070ffc525acf99203444b) --- source3/modules/vfs_ceph_new.c | 64 ++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index ed0eea20744..2c96cc92442 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -889,6 +889,24 @@ static int vfs_ceph_ll_readlinkat(const struct vfs_handle_struct *handle, dircfh->uperm); } +static int vfs_ceph_ll_read(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + int64_t off, + uint64_t len, + char *buf) +{ + return ceph_ll_read(cmount_of(handle), cfh->fh, off, len, buf); +} + +static int vfs_ceph_ll_write(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + int64_t off, + uint64_t len, + const char *data) +{ + return ceph_ll_write(cmount_of(handle), cfh->fh, off, len, data); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1287,6 +1305,7 @@ static ssize_t vfs_ceph_pread(struct vfs_handle_struct *handle, size_t n, off_t offset) { + struct vfs_ceph_fh *cfh = NULL; ssize_t result; DBG_DEBUG("[CEPH] pread(%p, %p, %p, %llu, %llu)\n", @@ -1296,12 +1315,13 @@ static ssize_t vfs_ceph_pread(struct vfs_handle_struct *handle, llu(n), llu(offset)); - result = ceph_read(cmount_of(handle), - fsp_get_io_fd(fsp), - data, - n, - offset); + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; + } + result = vfs_ceph_ll_read(handle, cfh, offset, n, data); +out: DBG_DEBUG("[CEPH] pread(...) = %llu\n", llu(result)); return lstatus_code(result); } @@ -1321,6 +1341,7 @@ static struct tevent_req *vfs_ceph_pread_send(struct vfs_handle_struct *handle, void *data, size_t n, off_t offset) { + struct vfs_ceph_fh *cfh = NULL; struct tevent_req *req = NULL; struct vfs_ceph_pread_state *state = NULL; int ret = -1; @@ -1331,7 +1352,13 @@ static struct tevent_req *vfs_ceph_pread_send(struct vfs_handle_struct *handle, return NULL; } - ret = ceph_read(cmount_of(handle), fsp_get_io_fd(fsp), data, n, offset); + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + tevent_req_error(req, -ret); + return tevent_req_post(req, ev); + } + + ret = vfs_ceph_ll_read(handle, cfh, offset, n, data); if (ret < 0) { /* ceph returns -errno on error. */ tevent_req_error(req, -ret); @@ -1364,6 +1391,7 @@ static ssize_t vfs_ceph_pwrite(struct vfs_handle_struct *handle, size_t n, off_t offset) { + struct vfs_ceph_fh *cfh = NULL; ssize_t result; DBG_DEBUG("[CEPH] pwrite(%p, %p, %p, %llu, %llu)\n", @@ -1373,12 +1401,12 @@ static ssize_t vfs_ceph_pwrite(struct vfs_handle_struct *handle, llu(n), llu(offset)); - result = ceph_write(cmount_of(handle), - fsp_get_io_fd(fsp), - data, - n, - offset); - + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; + } + result = vfs_ceph_ll_write(handle, cfh, offset, n, data); +out: DBG_DEBUG("[CEPH] pwrite(...) = %llu\n", llu(result)); return lstatus_code(result); } @@ -1398,6 +1426,7 @@ static struct tevent_req *vfs_ceph_pwrite_send(struct vfs_handle_struct *handle, const void *data, size_t n, off_t offset) { + struct vfs_ceph_fh *cfh = NULL; struct tevent_req *req = NULL; struct vfs_ceph_pwrite_state *state = NULL; int ret = -1; @@ -1408,8 +1437,13 @@ static struct tevent_req *vfs_ceph_pwrite_send(struct vfs_handle_struct *handle, return NULL; } - ret = ceph_write( - cmount_of(handle), fsp_get_io_fd(fsp), data, n, offset); + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + tevent_req_error(req, -ret); + return tevent_req_post(req, ev); + } + + ret = vfs_ceph_ll_write(handle, cfh, offset, n, data); if (ret < 0) { /* ceph returns -errno on error. */ tevent_req_error(req, -ret); @@ -2068,7 +2102,6 @@ static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, if (result != 0) { goto out; } - result = vfs_ceph_ll_lookupat(handle, dircfh, smb_fname->base_name, @@ -2078,6 +2111,7 @@ static int vfs_ceph_readlinkat(struct vfs_handle_struct *handle, } result = vfs_ceph_ll_readlinkat(handle, dircfh, &iref, buf, bufsiz); + vfs_ceph_iput(handle, &iref); out: DBG_DEBUG("[CEPH] readlinkat(...) = %d\n", result); -- 2.46.0 From 40450af836cb4bd2204541a7081767708f204ebe Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Thu, 20 Jun 2024 12:23:03 +0300 Subject: [PATCH 20/34] vfs_ceph_new: use low-level APIs for lseek Implement lseek operation using libcephfs' low-level APIs. Requires open ceph Fh* associated with fsp (extension). BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 30c1a613fee3f625c0559e49e037af9fad04c3b8) --- source3/modules/vfs_ceph_new.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 2c96cc92442..fbdfe114939 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -907,6 +907,14 @@ static int vfs_ceph_ll_write(const struct vfs_handle_struct *handle, return ceph_ll_write(cmount_of(handle), cfh->fh, off, len, data); } +static off_t vfs_ceph_ll_lseek(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + off_t offset, + int whence) +{ + return ceph_ll_lseek(cmount_of(handle), cfh->fh, offset, whence); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1475,13 +1483,17 @@ static off_t vfs_ceph_lseek(struct vfs_handle_struct *handle, off_t offset, int whence) { - off_t result = 0; + struct vfs_ceph_fh *cfh = NULL; + intmax_t result = 0; DBG_DEBUG("[CEPH] vfs_ceph_lseek\n"); - result = ceph_lseek(cmount_of(handle), - fsp_get_io_fd(fsp), - offset, - whence); + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; + } + + result = vfs_ceph_ll_lseek(handle, cfh, offset, whence); +out: return lstatus_code(result); } -- 2.46.0 From 0bd395f4fc9b8a21bca188d94de549c45acf4b0c Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Thu, 20 Jun 2024 12:43:39 +0300 Subject: [PATCH 21/34] vfs_ceph_new: use low-level APIs for fsync Implement fsync operation using libcephfs' low-level APIs. Requires open ceph Fh* associated with fsp (extension). BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit e15586fc6097565208011c556282d83eeec2230b) --- source3/modules/vfs_ceph_new.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index fbdfe114939..68e514599ab 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -915,6 +915,13 @@ static off_t vfs_ceph_ll_lseek(const struct vfs_handle_struct *handle, return ceph_ll_lseek(cmount_of(handle), cfh->fh, offset, whence); } +static int vfs_ceph_ll_fsync(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + int syncdataonly) +{ + return ceph_ll_fsync(cmount_of(handle), cfh->fh, syncdataonly); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1577,6 +1584,7 @@ static struct tevent_req *vfs_ceph_fsync_send(struct vfs_handle_struct *handle, struct tevent_context *ev, files_struct *fsp) { + struct vfs_ceph_fh *cfh = NULL; struct tevent_req *req = NULL; struct vfs_aio_state *state = NULL; int ret = -1; @@ -1588,9 +1596,14 @@ static struct tevent_req *vfs_ceph_fsync_send(struct vfs_handle_struct *handle, return NULL; } - /* Make sync call. */ - ret = ceph_fsync(cmount_of(handle), fsp_get_io_fd(fsp), false); + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + tevent_req_error(req, -ret); + return tevent_req_post(req, ev); + } + /* Make sync call. */ + ret = vfs_ceph_ll_fsync(handle, cfh, false); if (ret != 0) { /* ceph_fsync returns -errno on error. */ tevent_req_error(req, -ret); -- 2.46.0 From 3bf322e2a06d9ce0491a82c75f194b5937ea4a81 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Thu, 20 Jun 2024 14:58:34 +0300 Subject: [PATCH 22/34] vfs_ceph_new: use low-level APIs for ftruncate/fallocate Implement ftruncate/fallocate operations using libcephfs' low-level APIs. Requires open ceph Fh* associated with fsp (extension). BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit b536bf1fa87fb794e2992ab5368f41fdba80e3ad) --- source3/modules/vfs_ceph_new.c | 61 ++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 68e514599ab..1d4f6ff1ad4 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -922,6 +922,28 @@ static int vfs_ceph_ll_fsync(const struct vfs_handle_struct *handle, return ceph_ll_fsync(cmount_of(handle), cfh->fh, syncdataonly); } +static int vfs_ceph_ll_ftruncate(struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + int64_t size) +{ + struct ceph_statx stx = {.stx_size = (uint64_t)size}; + + return ceph_ll_setattr(cmount_of(handle), + cfh->iref.inode, + &stx, + CEPH_SETATTR_SIZE, + cfh->uperm); +} + +static int vfs_ceph_ll_fallocate(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + int mode, + int64_t off, + int64_t len) +{ + return ceph_ll_fallocate(cmount_of(handle), cfh->fh, mode, off, len); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1940,6 +1962,7 @@ static int strict_allocate_ftruncate(struct vfs_handle_struct *handle, int result; NTSTATUS status; SMB_STRUCT_STAT *pst; + struct vfs_ceph_fh *cfh = NULL; status = vfs_stat_fsp(fsp); if (!NT_STATUS_IS_OK(status)) { @@ -1955,20 +1978,21 @@ static int strict_allocate_ftruncate(struct vfs_handle_struct *handle, if (pst->st_ex_size == len) return 0; + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + return status_code(result); + } + /* Shrink - just ftruncate. */ if (pst->st_ex_size > len) { - result = ceph_ftruncate(cmount_of(handle), - fsp_get_io_fd(fsp), - len); + result = vfs_ceph_ll_ftruncate(handle, cfh, len); return status_code(result); } space_to_write = len - pst->st_ex_size; - result = ceph_fallocate(cmount_of(handle), - fsp_get_io_fd(fsp), - 0, - pst->st_ex_size, - space_to_write); + result = vfs_ceph_ll_fallocate( + handle, cfh, 0, pst->st_ex_size, space_to_write); + return status_code(result); } @@ -1976,6 +2000,7 @@ static int vfs_ceph_ftruncate(struct vfs_handle_struct *handle, files_struct *fsp, off_t len) { + struct vfs_ceph_fh *cfh = NULL; int result = -1; DBG_DEBUG("[CEPH] ftruncate(%p, %p, %llu\n", handle, fsp, llu(len)); @@ -1984,7 +2009,12 @@ static int vfs_ceph_ftruncate(struct vfs_handle_struct *handle, return strict_allocate_ftruncate(handle, fsp, len); } - result = ceph_ftruncate(cmount_of(handle), fsp_get_io_fd(fsp), len); + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; + } + result = vfs_ceph_ll_ftruncate(handle, cfh, len); +out: return status_code(result); } @@ -1994,16 +2024,19 @@ static int vfs_ceph_fallocate(struct vfs_handle_struct *handle, off_t offset, off_t len) { + struct vfs_ceph_fh *cfh = NULL; int result; DBG_DEBUG("[CEPH] fallocate(%p, %p, %u, %llu, %llu\n", handle, fsp, mode, llu(offset), llu(len)); + result = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (result != 0) { + goto out; + } + /* unsupported mode flags are rejected by libcephfs */ - result = ceph_fallocate(cmount_of(handle), - fsp_get_io_fd(fsp), - mode, - offset, - len); + result = vfs_ceph_ll_fallocate(handle, cfh, mode, offset, len); +out: DBG_DEBUG("[CEPH] fallocate(...) = %d\n", result); return status_code(result); } -- 2.46.0 From 42d74a407f7d3f1ca8f2b474a6459d8036b90425 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Thu, 20 Jun 2024 22:46:52 +0300 Subject: [PATCH 23/34] vfs_ceph_new: use low-level APIs for linkat Implement link operations using libcephfs' low-level APIs. Requires two phase operation: resolve (by-lookup) reference to inode and then do the actual (hard) link operation using parent dir-inode reference to the locally-cached inode. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 9a70bd606728110ad91cab547a4e31350010bb68) --- source3/modules/vfs_ceph_new.c | 74 ++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 1d4f6ff1ad4..fcce851c5f9 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -944,6 +944,18 @@ static int vfs_ceph_ll_fallocate(const struct vfs_handle_struct *handle, return ceph_ll_fallocate(cmount_of(handle), cfh->fh, mode, off, len); } +static int vfs_ceph_ll_link(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *dircfh, + const char *name, + const struct vfs_ceph_iref *iref) +{ + return ceph_ll_link(cmount_of(handle), + iref->inode, + dircfh->iref.inode, + name, + dircfh->uperm); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -2177,40 +2189,50 @@ out: } static int vfs_ceph_linkat(struct vfs_handle_struct *handle, - files_struct *srcfsp, - const struct smb_filename *old_smb_fname, - files_struct *dstfsp, - const struct smb_filename *new_smb_fname, - int flags) -{ - struct smb_filename *full_fname_old = NULL; - struct smb_filename *full_fname_new = NULL; + files_struct *srcfsp, + const struct smb_filename *old_smb_fname, + files_struct *dstfsp, + const struct smb_filename *new_smb_fname, + int flags) +{ + struct vfs_ceph_fh *src_dircfh = NULL; + struct vfs_ceph_fh *dst_dircfh = NULL; + struct vfs_ceph_iref iref = {0}; + const char *name = old_smb_fname->base_name; + const char *newname = new_smb_fname->base_name; int result = -1; - full_fname_old = full_path_from_dirfsp_atname(talloc_tos(), - srcfsp, - old_smb_fname); - if (full_fname_old == NULL) { + /* Prevent special linkat modes until it is required by VFS layer */ + if (flags & (AT_EMPTY_PATH | AT_SYMLINK_FOLLOW)) { + errno = ENOTSUP; return -1; } - full_fname_new = full_path_from_dirfsp_atname(talloc_tos(), - dstfsp, - new_smb_fname); - if (full_fname_new == NULL) { - TALLOC_FREE(full_fname_old); - return -1; + + DBG_DEBUG("[CEPH] link(%p, %s, %s)\n", handle, name, newname); + + result = vfs_ceph_fetch_fh(handle, srcfsp, &src_dircfh); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] link(%p, %s, %s)\n", handle, - full_fname_old->base_name, - full_fname_new->base_name); + result = vfs_ceph_fetch_fh(handle, dstfsp, &dst_dircfh); + if (result != 0) { + goto out; + } + + result = vfs_ceph_ll_lookupat(handle, src_dircfh, name, &iref); + if (result != 0) { + goto out; + } - result = ceph_link(cmount_of(handle), - full_fname_old->base_name, - full_fname_new->base_name); + result = vfs_ceph_ll_link(handle, dst_dircfh, newname, &iref); + if (result != 0) { + goto out; + } + + vfs_ceph_iput(handle, &iref); +out: DBG_DEBUG("[CEPH] link(...) = %d\n", result); - TALLOC_FREE(full_fname_old); - TALLOC_FREE(full_fname_new); return status_code(result); } -- 2.46.0 From 1696b1cfdab14b35384ca4ca827486d799ddf2b8 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Sun, 23 Jun 2024 12:47:19 +0300 Subject: [PATCH 24/34] vfs_ceph_new: use low-level APIs for renameat Implement renameat operations using libcephfs' low-level APIs. Requires both directories to have valid inode-ref associated with their fsp extension. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 83011357fb834e92505f17d6f65d5f32e3d37ec0) --- source3/modules/vfs_ceph_new.c | 51 +++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index fcce851c5f9..986c66372a4 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -956,6 +956,20 @@ static int vfs_ceph_ll_link(const struct vfs_handle_struct *handle, dircfh->uperm); } +static int vfs_ceph_ll_rename(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *parent, + const char *name, + const struct vfs_ceph_fh *newparent, + const char *newname) +{ + return ceph_ll_rename(cmount_of(handle), + parent->iref.inode, + name, + newparent->iref.inode, + newname, + newparent->uperm); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1573,8 +1587,8 @@ static int vfs_ceph_renameat(struct vfs_handle_struct *handle, files_struct *dstfsp, const struct smb_filename *smb_fname_dst) { - struct smb_filename *full_fname_src = NULL; - struct smb_filename *full_fname_dst = NULL; + struct vfs_ceph_fh *src_dircfh = NULL; + struct vfs_ceph_fh *dst_dircfh = NULL; int result = -1; DBG_DEBUG("[CEPH] vfs_ceph_renameat\n"); @@ -1583,29 +1597,22 @@ static int vfs_ceph_renameat(struct vfs_handle_struct *handle, return result; } - full_fname_src = full_path_from_dirfsp_atname(talloc_tos(), - srcfsp, - smb_fname_src); - if (full_fname_src == NULL) { - errno = ENOMEM; - return -1; - } - full_fname_dst = full_path_from_dirfsp_atname(talloc_tos(), - dstfsp, - smb_fname_dst); - if (full_fname_dst == NULL) { - TALLOC_FREE(full_fname_src); - errno = ENOMEM; - return -1; + result = vfs_ceph_fetch_fh(handle, srcfsp, &src_dircfh); + if (result != 0) { + goto out; } - result = ceph_rename(cmount_of(handle), - full_fname_src->base_name, - full_fname_dst->base_name); - - TALLOC_FREE(full_fname_src); - TALLOC_FREE(full_fname_dst); + result = vfs_ceph_fetch_fh(handle, dstfsp, &dst_dircfh); + if (result != 0) { + goto out; + } + result = vfs_ceph_ll_rename(handle, + src_dircfh, + smb_fname_src->base_name, + dst_dircfh, + smb_fname_dst->base_name); +out: return status_code(result); } -- 2.46.0 From 8411e983ca7145af76542e386a096eb3a12b31c1 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Sun, 23 Jun 2024 13:08:25 +0300 Subject: [PATCH 25/34] vfs_ceph_new: use low-level APIs for mknodat Implement mknodat operations using libcephfs' low-level APIs. Requires parent directory to have valid inode-ref associated with its fsp extension. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit e714e5ddc50a771e743f5e63f686c106abe33b75) --- source3/modules/vfs_ceph_new.c | 56 ++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 986c66372a4..4bb6cb59d1f 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -970,6 +970,35 @@ static int vfs_ceph_ll_rename(const struct vfs_handle_struct *handle, newparent->uperm); } +static int vfs_ceph_ll_mknod(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *parent, + const char *name, + mode_t mode, + dev_t rdev, + struct vfs_ceph_iref *iref) +{ + struct ceph_statx stx = {.stx_ino = 0}; + struct Inode *inode = NULL; + int ret = -1; + + ret = ceph_ll_mknod(cmount_of(handle), + parent->iref.inode, + name, + mode, + rdev, + &inode, + &stx, + CEPH_STATX_INO, + 0, + parent->uperm); + if (ret == 0) { + iref->inode = inode; + iref->ino = stx.stx_ino; + iref->owner = true; + } + return ret; +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -2249,25 +2278,26 @@ static int vfs_ceph_mknodat(struct vfs_handle_struct *handle, mode_t mode, SMB_DEV_T dev) { - struct smb_filename *full_fname = NULL; + struct vfs_ceph_iref iref = {0}; + struct vfs_ceph_fh *dircfh = NULL; + const char *name = smb_fname->base_name; int result = -1; - full_fname = full_path_from_dirfsp_atname(talloc_tos(), - dirfsp, - smb_fname); - if (full_fname == NULL) { - return -1; + result = vfs_ceph_fetch_fh(handle, dirfsp, &dircfh); + if (result != 0) { + goto out; } - DBG_DEBUG("[CEPH] mknodat(%p, %s)\n", handle, full_fname->base_name); - result = ceph_mknod(cmount_of(handle), - full_fname->base_name, - mode, - dev); - DBG_DEBUG("[CEPH] mknodat(...) = %d\n", result); + DBG_DEBUG("[CEPH] mknodat(%p, %s)\n", handle, name); - TALLOC_FREE(full_fname); + result = vfs_ceph_ll_mknod(handle, dircfh, name, mode, dev, &iref); + if (result != 0) { + goto out; + } + vfs_ceph_iput(handle, &iref); +out: + DBG_DEBUG("[CEPH] mknodat(...) = %d\n", result); return status_code(result); } -- 2.46.0 From 5eccb9185c96b2e42775c96b20095f33393cb374 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Sun, 23 Jun 2024 14:57:10 +0300 Subject: [PATCH 26/34] vfs_ceph_new: use low-level APIs for xattr ops Implement extended-attributes operations using libcephfs' low-level APIs. Whenever possible, use the open file-handle from fsp-extension to resolve inode-reference and user-permissions. Otherwise, resolve both on-the-fly. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit 0a8445e891c64d703d44258b2eef85296265c55f) --- source3/modules/vfs_ceph_new.c | 317 +++++++++++++++++++++++++++------ 1 file changed, 263 insertions(+), 54 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 4bb6cb59d1f..839b726fd13 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -999,6 +999,159 @@ static int vfs_ceph_ll_mknod(const struct vfs_handle_struct *handle, return ret; } +static int vfs_ceph_ll_getxattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + const char *name, + void *value, + size_t size) +{ + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + + ret = ceph_ll_getxattr(cmount_of(handle), + iref->inode, + name, + value, + size, + uperm); + + vfs_ceph_userperm_del(uperm); + + return ret; +} + +static int vfs_ceph_ll_fgetxattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + const char *name, + void *value, + size_t size) +{ + return ceph_ll_getxattr(cmount_of(handle), + cfh->iref.inode, + name, + value, + size, + cfh->uperm); +} + +static int vfs_ceph_ll_setxattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + const char *name, + const void *value, + size_t size, + int flags) +{ + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + + ret = ceph_ll_setxattr(cmount_of(handle), + iref->inode, + name, + value, + size, + flags, + uperm); + + vfs_ceph_userperm_del(uperm); + + return ret; +} + +static int vfs_ceph_ll_fsetxattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + const char *name, + const void *value, + size_t size, + int flags) +{ + return ceph_ll_setxattr(cmount_of(handle), + cfh->iref.inode, + name, + value, + size, + flags, + cfh->uperm); +} + +static int vfs_ceph_ll_listxattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + char *list, + size_t buf_size, + size_t *list_size) +{ + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + + ret = ceph_ll_listxattr(cmount_of(handle), + iref->inode, + list, + buf_size, + list_size, + uperm); + + vfs_ceph_userperm_del(uperm); + + return ret; +} + +static int vfs_ceph_ll_flistxattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + char *list, + size_t buf_size, + size_t *list_size) +{ + return ceph_ll_listxattr(cmount_of(handle), + cfh->iref.inode, + list, + buf_size, + list_size, + cfh->uperm); +} + +static int vfs_ceph_ll_removexattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_iref *iref, + const char *name) +{ + struct UserPerm *uperm = NULL; + int ret = -1; + + uperm = vfs_ceph_userperm_new(handle); + if (uperm == NULL) { + return -ENOMEM; + } + + ret = ceph_ll_removexattr(cmount_of(handle), iref->inode, name, uperm); + + vfs_ceph_userperm_del(uperm); + + return ret; +} + +static int vfs_ceph_ll_fremovexattr(const struct vfs_handle_struct *handle, + const struct vfs_ceph_fh *cfh, + const char *name) +{ + return ceph_ll_removexattr(cmount_of(handle), + cfh->iref.inode, + name, + cfh->uperm); +} + /* Ceph Inode-refernce get/put wrappers */ static int vfs_ceph_iget(const struct vfs_handle_struct *handle, uint64_t ino, @@ -1093,6 +1246,14 @@ static int vfs_ceph_igetd(struct vfs_handle_struct *handle, iref); } +static int vfs_ceph_igetf(struct vfs_handle_struct *handle, + const struct files_struct *fsp, + struct vfs_ceph_iref *iref) +{ + return vfs_ceph_iget( + handle, fsp->file_id.inode, fsp->fsp_name->base_name, 0, iref); +} + static void vfs_ceph_iput(const struct vfs_handle_struct *handle, struct vfs_ceph_iref *iref) { @@ -2410,25 +2571,35 @@ static ssize_t vfs_ceph_fgetxattr(struct vfs_handle_struct *handle, size_t size) { int ret; + DBG_DEBUG("[CEPH] fgetxattr(%p, %p, %s, %p, %llu)\n", handle, fsp, name, value, llu(size)); + if (!fsp->fsp_flags.is_pathref) { - ret = ceph_fgetxattr(cmount_of(handle), - fsp_get_io_fd(fsp), - name, - value, - size); + struct vfs_ceph_fh *cfh = NULL; + + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + goto out; + } + + ret = vfs_ceph_ll_fgetxattr(handle, cfh, name, value, size); } else { - ret = ceph_getxattr(cmount_of(handle), - fsp->fsp_name->base_name, - name, - value, - size); + struct vfs_ceph_iref iref = {0}; + + ret = vfs_ceph_igetf(handle, fsp, &iref); + if (ret != 0) { + goto out; + } + + ret = vfs_ceph_ll_getxattr(handle, &iref, name, value, size); + vfs_ceph_iput(handle, &iref); } +out: DBG_DEBUG("[CEPH] fgetxattr(...) = %d\n", ret); return lstatus_code(ret); } @@ -2438,26 +2609,47 @@ static ssize_t vfs_ceph_flistxattr(struct vfs_handle_struct *handle, char *list, size_t size) { + size_t list_size = 0; int ret; + DBG_DEBUG("[CEPH] flistxattr(%p, %p, %p, %llu)\n", handle, fsp, list, llu(size)); + if (!fsp->fsp_flags.is_pathref) { - /* - * We can use an io_fd to list xattrs. - */ - ret = ceph_flistxattr(cmount_of(handle), - fsp_get_io_fd(fsp), - list, - size); + struct vfs_ceph_fh *cfh = NULL; + + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + goto out; + } + + ret = vfs_ceph_ll_flistxattr(handle, + cfh, + list, + size, + &list_size); + if (ret != 0) { + goto out; + } } else { - /* - * This is no longer a handle based call. - */ - ret = ceph_listxattr(cmount_of(handle), - fsp->fsp_name->base_name, - list, - size); + struct vfs_ceph_iref iref = {0}; + + ret = vfs_ceph_igetf(handle, fsp, &iref); + if (ret != 0) { + goto out; + } + ret = vfs_ceph_ll_listxattr(handle, + &iref, + list, + size, + &list_size); + if (ret != 0) { + goto out; + } + vfs_ceph_iput(handle, &iref); } + ret = (int)list_size; +out: DBG_DEBUG("[CEPH] flistxattr(...) = %d\n", ret); return lstatus_code(ret); } @@ -2467,22 +2659,29 @@ static int vfs_ceph_fremovexattr(struct vfs_handle_struct *handle, const char *name) { int ret; + DBG_DEBUG("[CEPH] fremovexattr(%p, %p, %s)\n", handle, fsp, name); if (!fsp->fsp_flags.is_pathref) { - /* - * We can use an io_fd to remove xattrs. - */ - ret = ceph_fremovexattr(cmount_of(handle), - fsp_get_io_fd(fsp), - name); + struct vfs_ceph_fh *cfh = NULL; + + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + goto out; + } + + ret = vfs_ceph_ll_fremovexattr(handle, cfh, name); } else { - /* - * This is no longer a handle based call. - */ - ret = ceph_removexattr(cmount_of(handle), - fsp->fsp_name->base_name, - name); + struct vfs_ceph_iref iref = {0}; + + ret = vfs_ceph_igetf(handle, fsp, &iref); + if (ret != 0) { + goto out; + } + + ret = vfs_ceph_ll_removexattr(handle, &iref, name); + vfs_ceph_iput(handle, &iref); } +out: DBG_DEBUG("[CEPH] fremovexattr(...) = %d\n", ret); return status_code(ret); } @@ -2495,6 +2694,7 @@ static int vfs_ceph_fsetxattr(struct vfs_handle_struct *handle, int flags) { int ret; + DBG_DEBUG("[CEPH] fsetxattr(%p, %p, %s, %p, %llu, %d)\n", handle, fsp, @@ -2502,27 +2702,36 @@ static int vfs_ceph_fsetxattr(struct vfs_handle_struct *handle, value, llu(size), flags); + if (!fsp->fsp_flags.is_pathref) { - /* - * We can use an io_fd to set xattrs. - */ - ret = ceph_fsetxattr(cmount_of(handle), - fsp_get_io_fd(fsp), - name, - value, - size, - flags); + struct vfs_ceph_fh *cfh = NULL; + + ret = vfs_ceph_fetch_io_fh(handle, fsp, &cfh); + if (ret != 0) { + goto out; + } + ret = vfs_ceph_ll_fsetxattr(handle, + cfh, + name, + value, + size, + flags); } else { - /* - * This is no longer a handle based call. - */ - ret = ceph_setxattr(cmount_of(handle), - fsp->fsp_name->base_name, - name, - value, - size, - flags); + struct vfs_ceph_iref iref = {0}; + + ret = vfs_ceph_igetf(handle, fsp, &iref); + if (ret != 0) { + goto out; + } + ret = vfs_ceph_ll_setxattr(handle, + &iref, + name, + value, + size, + flags); + vfs_ceph_iput(handle, &iref); } +out: DBG_DEBUG("[CEPH] fsetxattr(...) = %d\n", ret); return status_code(ret); } -- 2.46.0 From 2e0d1bbbc100f0ccc92dd4559c64c3c8d63846a0 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Wed, 26 Jun 2024 17:24:37 +0300 Subject: [PATCH 27/34] vfs_ceph_new: debug-log upon libcephfs low-level calls Add developer's debug-logging upon each call to libcephfs' low-level APIs. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit d00f20f30f4e77463e82d202099682b7ef68260f) --- source3/modules/vfs_ceph_new.c | 118 ++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 839b726fd13..d1c9862a1dc 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -423,10 +423,13 @@ static int vfs_ceph_release_fh(struct vfs_ceph_fh *cfh) int ret = 0; if (cfh->fh != NULL) { + DBG_DEBUG("[ceph] ceph_ll_close: fd=%d\n", cfh->fd); ret = ceph_ll_close(cfh->cme->mount, cfh->fh); cfh->fh = NULL; } if (cfh->iref.inode != NULL) { + DBG_DEBUG("[ceph] ceph_ll_put: ino=%" PRIu64 "\n", + cfh->iref.ino); ceph_ll_put(cfh->cme->mount, cfh->iref.inode); cfh->iref.inode = NULL; } @@ -506,6 +509,8 @@ static int vfs_ceph_ll_lookup_inode(const struct vfs_handle_struct *handle, { struct inodeno_t ino = {.val = inoval}; + DBG_DEBUG("[ceph] ceph_ll_lookup_inode: ino=%" PRIu64 "\n", inoval); + return ceph_ll_lookup_inode(cmount_of(handle), ino, pout); } @@ -519,6 +524,8 @@ static int vfs_ceph_ll_walk(const struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_walk: name=%s\n", name); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -540,6 +547,8 @@ static int vfs_ceph_ll_statfs(const struct vfs_handle_struct *handle, const struct vfs_ceph_iref *iref, struct statvfs *stbuf) { + DBG_DEBUG("[ceph] ceph_ll_statfs: ino=%" PRIu64 "\n", iref->ino); + return ceph_ll_statfs(cmount_of(handle), iref->inode, stbuf); } @@ -551,6 +560,8 @@ static int vfs_ceph_ll_getattr2(const struct vfs_handle_struct *handle, struct ceph_statx stx = {0}; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_getattr: ino=%" PRIu64 "\n", iref->ino); + ret = ceph_ll_getattr(cmount_of(handle), iref->inode, &stx, @@ -588,6 +599,9 @@ static int vfs_ceph_ll_chown(struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_setattr: ino=%" PRIu64 " uid=%u gid=%u\n", + iref->ino, uid, gid); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -608,6 +622,9 @@ static int vfs_ceph_ll_fchown(struct vfs_handle_struct *handle, { struct ceph_statx stx = {.stx_uid = uid, .stx_gid = gid}; + DBG_DEBUG("[ceph] ceph_ll_setattr: ino=%" PRIu64 " uid=%u gid=%u\n", + cfh->iref.ino, uid, gid); + return ceph_ll_setattr(cmount_of(handle), cfh->iref.inode, &stx, @@ -621,6 +638,9 @@ static int vfs_ceph_ll_fchmod(struct vfs_handle_struct *handle, { struct ceph_statx stx = {.stx_mode = mode}; + DBG_DEBUG("[ceph] ceph_ll_setattr: ino=%" PRIu64 " mode=%o\n", + cfh->iref.ino, mode); + return ceph_ll_setattr(cmount_of(handle), cfh->iref.inode, &stx, @@ -654,6 +674,15 @@ static int vfs_ceph_ll_futimes(struct vfs_handle_struct *handle, if (!mask) { return 0; } + + DBG_DEBUG("[ceph] ceph_ll_setattr: ino=%" PRIu64 " mtime=%" PRIu64 + " atime=%" PRIu64 " ctime=%" PRIu64 " btime=%" PRIu64 "\n", + cfh->iref.ino, + full_timespec_to_nt_time(&stx.stx_mtime), + full_timespec_to_nt_time(&stx.stx_atime), + full_timespec_to_nt_time(&stx.stx_ctime), + full_timespec_to_nt_time(&stx.stx_btime)); + return ceph_ll_setattr(cmount_of(handle), cfh->iref.inode, &stx, @@ -664,6 +693,9 @@ static int vfs_ceph_ll_futimes(struct vfs_handle_struct *handle, static int vfs_ceph_ll_releasedir(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh) { + DBG_DEBUG("[ceph] ceph_ll_releasedir: ino=%" PRIu64 " fd=%d\n", + dircfh->iref.ino, dircfh->fd); + return ceph_ll_releasedir(cmount_of(handle), dircfh->dirp.cdr); } @@ -679,6 +711,9 @@ static int vfs_ceph_ll_create(const struct vfs_handle_struct *handle, struct Fh *fh = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_create: parent-ino=%" PRIu64 " name=%s " + "mode=%o\n", parent->ino, name, mode); + ret = ceph_ll_create(cmount_of(handle), parent->inode, name, @@ -713,6 +748,9 @@ static int vfs_ceph_ll_lookup(const struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_lookup: parent-ino=%" PRIu64 " name=%s", + parent->ino, name); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -746,6 +784,9 @@ static int vfs_ceph_ll_lookupat(const struct vfs_handle_struct *handle, struct Inode *inode = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_lookup: parent-ino=%" PRIu64 " name=%s\n", + parent_fh->iref.ino, name); + ret = ceph_ll_lookup(cmount_of(handle), parent_fh->iref.inode, name, @@ -771,6 +812,9 @@ static int vfs_ceph_ll_open(const struct vfs_handle_struct *handle, struct Fh *fh = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_open: ino=%" PRIu64 " flags=0x%x\n", + cfh->iref.ino, flags); + ret = ceph_ll_open(cmount_of(handle), in, flags, &fh, cfh->uperm); if (ret == 0) { cfh->fh = fh; @@ -782,6 +826,8 @@ static int vfs_ceph_ll_open(const struct vfs_handle_struct *handle, static int vfs_ceph_ll_opendir(const struct vfs_handle_struct *handle, struct vfs_ceph_fh *cfh) { + DBG_DEBUG("[ceph] ceph_ll_opendir: ino=%" PRIu64 "\n", cfh->iref.ino); + return ceph_ll_opendir(cmount_of(handle), cfh->iref.inode, &cfh->dirp.cdr, @@ -791,12 +837,18 @@ static int vfs_ceph_ll_opendir(const struct vfs_handle_struct *handle, static struct dirent *vfs_ceph_ll_readdir(const struct vfs_handle_struct *hndl, const struct vfs_ceph_fh *dircfh) { + DBG_DEBUG("[ceph] ceph_readdir: ino=%" PRIu64 " fd=%d\n", + dircfh->iref.ino, dircfh->fd); + return ceph_readdir(cmount_of(hndl), dircfh->dirp.cdr); } static void vfs_ceph_ll_rewinddir(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh) { + DBG_DEBUG("[ceph] ceph_rewinddir: ino=%" PRIu64 " fd=%d\n", + dircfh->iref.ino, dircfh->fd); + ceph_rewinddir(cmount_of(handle), dircfh->dirp.cdr); } @@ -810,6 +862,9 @@ static int vfs_ceph_ll_mkdirat(const struct vfs_handle_struct *handle, struct Inode *inode = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_mkdir: parent-ino=%" PRIu64 " name=%s " + "mode=%o\n", dircfh->iref.ino, name, mode); + ret = ceph_ll_mkdir(cmount_of(handle), dircfh->iref.inode, name, @@ -832,6 +887,9 @@ static int vfs_ceph_ll_rmdir(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh, const char *name) { + DBG_DEBUG("[ceph] ceph_ll_rmdir: parent-ino=%" PRIu64 " name=%s\n", + dircfh->iref.ino, name); + return ceph_ll_rmdir(cmount_of(handle), dircfh->iref.inode, name, @@ -842,6 +900,9 @@ static int vfs_ceph_ll_unlinkat(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *dircfh, const char *name) { + DBG_DEBUG("[ceph] ceph_ll_unlink: parent-ino=%" PRIu64 " name=%s\n", + dircfh->iref.ino, name); + return ceph_ll_unlink(cmount_of(handle), dircfh->iref.inode, name, @@ -858,6 +919,9 @@ static int vfs_ceph_ll_symlinkat(const struct vfs_handle_struct *handle, struct Inode *inode = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_symlink: parent-ino=%" PRIu64 " name=%s\n", + dircfh->iref.ino, name); + ret = ceph_ll_symlink(cmount_of(handle), dircfh->iref.inode, name, @@ -882,6 +946,8 @@ static int vfs_ceph_ll_readlinkat(const struct vfs_handle_struct *handle, char *buf, size_t bsz) { + DBG_DEBUG("[ceph] ceph_ll_readlink: ino=%" PRIu64 "\n", iref->ino); + return ceph_ll_readlink(cmount_of(handle), iref->inode, buf, @@ -895,6 +961,9 @@ static int vfs_ceph_ll_read(const struct vfs_handle_struct *handle, uint64_t len, char *buf) { + DBG_DEBUG("[ceph] ceph_ll_read: ino=%" PRIu64 " fd=%d off=%jd " + "len=%ju\n", cfh->iref.ino, cfh->fd, off, len); + return ceph_ll_read(cmount_of(handle), cfh->fh, off, len, buf); } @@ -904,6 +973,9 @@ static int vfs_ceph_ll_write(const struct vfs_handle_struct *handle, uint64_t len, const char *data) { + DBG_DEBUG("[ceph] ceph_ll_write: ino=%" PRIu64 " fd=%d off=%jd " + "len=%ju\n", cfh->iref.ino, cfh->fd, off, len); + return ceph_ll_write(cmount_of(handle), cfh->fh, off, len, data); } @@ -912,6 +984,9 @@ static off_t vfs_ceph_ll_lseek(const struct vfs_handle_struct *handle, off_t offset, int whence) { + DBG_DEBUG("[ceph] ceph_ll_lseek: ino=%" PRIu64 " fd=%d offset=%jd " + "whence=%d\n", cfh->iref.ino, cfh->fd, offset, whence); + return ceph_ll_lseek(cmount_of(handle), cfh->fh, offset, whence); } @@ -919,6 +994,9 @@ static int vfs_ceph_ll_fsync(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *cfh, int syncdataonly) { + DBG_DEBUG("[ceph] ceph_ll_fsync: ino=%" PRIu64 " fd=%d " + "syncdataonly=%d\n", cfh->iref.ino, cfh->fd, syncdataonly); + return ceph_ll_fsync(cmount_of(handle), cfh->fh, syncdataonly); } @@ -928,6 +1006,9 @@ static int vfs_ceph_ll_ftruncate(struct vfs_handle_struct *handle, { struct ceph_statx stx = {.stx_size = (uint64_t)size}; + DBG_DEBUG("[ceph] ceph_ll_setattr: ino=%" PRIu64 " fd=%d size=%jd\n", + cfh->iref.ino, cfh->fd, size); + return ceph_ll_setattr(cmount_of(handle), cfh->iref.inode, &stx, @@ -941,6 +1022,9 @@ static int vfs_ceph_ll_fallocate(const struct vfs_handle_struct *handle, int64_t off, int64_t len) { + DBG_DEBUG("[ceph] ceph_ll_fallocate: ino=%" PRIu64 " fd=%d off=%jd " + "len=%jd\n", cfh->iref.ino, cfh->fd, off, len); + return ceph_ll_fallocate(cmount_of(handle), cfh->fh, mode, off, len); } @@ -949,6 +1033,9 @@ static int vfs_ceph_ll_link(const struct vfs_handle_struct *handle, const char *name, const struct vfs_ceph_iref *iref) { + DBG_DEBUG("[ceph] ceph_ll_link: parent-ino=%" PRIu64 " name=%s\n", + dircfh->iref.ino, name); + return ceph_ll_link(cmount_of(handle), iref->inode, dircfh->iref.inode, @@ -962,6 +1049,10 @@ static int vfs_ceph_ll_rename(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *newparent, const char *newname) { + DBG_DEBUG("[ceph] ceph_ll_rename: parent-ino=%" PRIu64 + " name=%s newparent-ino=%" PRIu64 " newname=%s\n", + parent->iref.ino, name, newparent->iref.ino, newname); + return ceph_ll_rename(cmount_of(handle), parent->iref.inode, name, @@ -981,6 +1072,9 @@ static int vfs_ceph_ll_mknod(const struct vfs_handle_struct *handle, struct Inode *inode = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_mknod: parent-ino=%" PRIu64 " name=%s " + "mode=%o\n", parent->iref.ino, name, mode); + ret = ceph_ll_mknod(cmount_of(handle), parent->iref.inode, name, @@ -1008,6 +1102,9 @@ static int vfs_ceph_ll_getxattr(const struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_getxattr: ino=%" PRIu64 " name=%s\n", + iref->ino, name); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -1031,6 +1128,9 @@ static int vfs_ceph_ll_fgetxattr(const struct vfs_handle_struct *handle, void *value, size_t size) { + DBG_DEBUG("[ceph] ceph_ll_getxattr: ino=%" PRIu64 " name=%s\n", + cfh->iref.ino, name); + return ceph_ll_getxattr(cmount_of(handle), cfh->iref.inode, name, @@ -1049,6 +1149,9 @@ static int vfs_ceph_ll_setxattr(const struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_setxattr: ino=%" PRIu64 " name=%s " + "size=%zu\n", iref->ino, name, size); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -1074,6 +1177,9 @@ static int vfs_ceph_ll_fsetxattr(const struct vfs_handle_struct *handle, size_t size, int flags) { + DBG_DEBUG("[ceph] ceph_ll_setxattr: ino=%" PRIu64 " name=%s " + "size=%zu\n", cfh->iref.ino, name, size); + return ceph_ll_setxattr(cmount_of(handle), cfh->iref.inode, name, @@ -1092,6 +1198,8 @@ static int vfs_ceph_ll_listxattr(const struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_listxattr: ino=%" PRIu64 "\n", iref->ino); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -1115,6 +1223,8 @@ static int vfs_ceph_ll_flistxattr(const struct vfs_handle_struct *handle, size_t buf_size, size_t *list_size) { + DBG_DEBUG("[ceph] ceph_ll_listxattr: ino=%" PRIu64 "\n", cfh->iref.ino); + return ceph_ll_listxattr(cmount_of(handle), cfh->iref.inode, list, @@ -1130,6 +1240,9 @@ static int vfs_ceph_ll_removexattr(const struct vfs_handle_struct *handle, struct UserPerm *uperm = NULL; int ret = -1; + DBG_DEBUG("[ceph] ceph_ll_removexattr: ino=%" PRIu64 " name=%s\n", + iref->ino, name); + uperm = vfs_ceph_userperm_new(handle); if (uperm == NULL) { return -ENOMEM; @@ -1146,6 +1259,9 @@ static int vfs_ceph_ll_fremovexattr(const struct vfs_handle_struct *handle, const struct vfs_ceph_fh *cfh, const char *name) { + DBG_DEBUG("[ceph] ceph_ll_removexattr: ino=%" PRIu64 " name=%s\n", + cfh->iref.ino, name); + return ceph_ll_removexattr(cmount_of(handle), cfh->iref.inode, name, @@ -1258,7 +1374,7 @@ static void vfs_ceph_iput(const struct vfs_handle_struct *handle, struct vfs_ceph_iref *iref) { if ((iref != NULL) && (iref->inode != NULL) && iref->owner) { - DBG_DEBUG("[CEPH] put-inode: ino=%" PRIu64 "\n", iref->ino); + DBG_DEBUG("[ceph] ceph_ll_put: ino=%" PRIu64 "\n", iref->ino); ceph_ll_put(cmount_of(handle), iref->inode); iref->inode = NULL; -- 2.46.0 From 018292bfa33938fc2a6cc0c59a4607dd29812714 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 16 Jul 2024 14:33:16 +0300 Subject: [PATCH 28/34] vfs_ceph_new: common prefix to debug-log messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep logging consistent: add "[CEPH] " prefix to DBG_DEBUG log messages where missing. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S Autobuild-User(master): Günther Deschner Autobuild-Date(master): Mon Jul 29 15:58:15 UTC 2024 on atb-devel-224 (cherry picked from commit 3bb6d441bf047bef6d95675057cecd3865a25540) --- source3/modules/vfs_ceph_new.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index d1c9862a1dc..3c82730f87c 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -120,7 +120,7 @@ static int cephmount_cache_add(const char *cookie, entry->mount = mount; entry->count = 1; - DBG_DEBUG("adding mount cache entry for %s\n", entry->cookie); + DBG_DEBUG("[CEPH] adding mount cache entry for %s\n", entry->cookie); DLIST_ADD(cephmount_cached, entry); *out_entry = entry; @@ -134,7 +134,7 @@ static struct cephmount_cached *cephmount_cache_update(const char *cookie) for (entry = cephmount_cached; entry; entry = entry->next) { if (strcmp(entry->cookie, cookie) == 0) { entry->count++; - DBG_DEBUG("updated mount cache: count is [%" + DBG_DEBUG("[CEPH] updated mount cache: count is [%" PRIu32 "]\n", entry->count); return entry; } @@ -147,12 +147,12 @@ static struct cephmount_cached *cephmount_cache_update(const char *cookie) static int cephmount_cache_remove(struct cephmount_cached *entry) { if (--entry->count) { - DBG_DEBUG("updated mount cache: count is [%" PRIu32 "]\n", - entry->count); + DBG_DEBUG("[CEPH] updated mount cache: count is [%" + PRIu32 "]\n", entry->count); return entry->count; } - DBG_DEBUG("removing mount cache entry for %s\n", entry->cookie); + DBG_DEBUG("[CEPH] removing mount cache entry for %s\n", entry->cookie); DLIST_REMOVE(cephmount_cached, entry); talloc_free(entry); return 0; @@ -321,7 +321,7 @@ static void vfs_ceph_disconnect(struct vfs_handle_struct *handle) ret = cephmount_cache_remove(handle->data); if (ret > 0) { - DBG_DEBUG("mount cache entry still in use\n"); + DBG_DEBUG("[CEPH] mount cache entry still in use\n"); return; } -- 2.46.0 From 1847edde9b714f4c49d89d4f49106327ce550b4a Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 20 Feb 2024 19:37:45 +0200 Subject: [PATCH 29/34] docs-xml/manpages: add entry for vfs_ceph_new Create man entry for the newly added vfs_ceph_new module: almost identical to existing vfs_ceph, except to the configuration entry: [sharename] vfs objects = ceph_new ... Adds a bit of info for the motivation behind this new module. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit d8c84a2993b84ebb69011c33c1b5d44801c15363) --- docs-xml/manpages/vfs_ceph_new.8.xml | 177 +++++++++++++++++++++++++++ docs-xml/wscript_build | 1 + 2 files changed, 178 insertions(+) create mode 100644 docs-xml/manpages/vfs_ceph_new.8.xml diff --git a/docs-xml/manpages/vfs_ceph_new.8.xml b/docs-xml/manpages/vfs_ceph_new.8.xml new file mode 100644 index 00000000000..7a65b965ce0 --- /dev/null +++ b/docs-xml/manpages/vfs_ceph_new.8.xml @@ -0,0 +1,177 @@ + + + + + + vfs_ceph_new + 8 + Samba + System Administration tools + &doc.version; + + + + + vfs_ceph_new + + Utilize features provided by libcephfs low-level APIs + + + + + + vfs objects = ceph_new + + + + + DESCRIPTION + + This VFS module is part of the + samba + 8 suite. + + + The vfs_ceph_new VFS module exposes + CephFS specific features for use by Samba. + + + + Ceph is a distributed network file system designed to provide + excellent performance, reliability, and scalability. This is a + shared library allowing applications to access a Ceph + distributed file system via a POSIX-like interface. + + + + This module can be combined with other modules, but it + should be the last module in the vfs objects + list. Modules added to this list to the right of the ceph + entry may not have any effect at all. + + + + vfs_ceph_new performs mapping between Windows + and POSIX Access Control Lists (ACLs). To ensure correct + processing and enforcement of POSIX ACLs, the following Ceph + configuration parameters are automatically applied: + + + client acl type = posix_acl + fuse default permissions = false + + + + NOTE: + This is a second implementation of a ceph module which uses libcephfs + low-level APIs (compared to the original + vfs_ceph + 8 module which uses path-based + APIs). Using the low-level API allows more optimized and fine-grained + access to the Ceph storage layer. + + + + + CONFIGURATION + + + vfs_ceph_new requires that the underlying + share path is a Ceph filesystem. + + + + + ceph_new + /non-mounted/cephfs/path + no + + + + Since vfs_ceph_new does not require a + filesystem mount, the share path is treated + differently: it is interpreted as an absolute path within the + Ceph filesystem on the attached Ceph cluster. + In a ctdb cluster environment where ctdb manages Samba, + CTDB_SAMBA_SKIP_SHARE_CHECK=yes must be + configured to disable local share path checks, otherwise ctdb + will not reach a healthy state. + + + + Note that currently kernel share modes have + to be disabled in a share running with the CephFS vfs module for + file serving to work properly. + + + + + OPTIONS + + + + + ceph:config_file = path + + + Allows one to define a ceph configfile to use. Empty by default. + + + Example: ceph:config_file = + /etc/ceph/ceph.conf + + + + + + ceph:user_id = name + + + Allows one to explicitly set the client ID used for the + CephFS mount handle. Empty by default (use the libcephfs + client default). + + + Example: ceph:user_id = samba + + + + + + ceph:filesystem = fs_name + + + Allows one to explicitly select the CephFS file system + to use when the Ceph cluster supports more than one + file system. Empty by default (use the default file + system of the Ceph cluster). + + + Example: ceph:filesystem = myfs2 + + + + + + + + + + VERSION + + + This man page is part of version &doc.version; of the Samba suite. + + + + + AUTHOR + + The original Samba software and related utilities + were created by Andrew Tridgell. Samba is now developed + by the Samba Team as an Open Source project similar + to the way the Linux kernel is developed. + + + + diff --git a/docs-xml/wscript_build b/docs-xml/wscript_build index 434afacaf1e..967e18a6596 100644 --- a/docs-xml/wscript_build +++ b/docs-xml/wscript_build @@ -78,6 +78,7 @@ vfs_module_manpages = ['vfs_acl_tdb', 'vfs_cap', 'vfs_catia', 'vfs_ceph', + 'vfs_ceph_new', 'vfs_ceph_snapshots', 'vfs_commit', 'vfs_crossrename', -- 2.46.0 From 5367984c57749d4fde06b54d6ffe294d25e9b3d9 Mon Sep 17 00:00:00 2001 From: Anoop C S Date: Fri, 2 Aug 2024 11:10:28 +0530 Subject: [PATCH 30/34] vfs_ceph_new: Unconditionally use ceph_select_filesystem Currently we don't have an explicit check for the presence of ceph_select_filesystem() libcephfs API as it is always found to be present with the minimum ceph version that is supported with Samba right now. Therefore under this assumption directly call ceph_select_filesystem() without any #ifdefs. Please note that this change is already part of vfs_ceph via ef0068cd. ref: https://gitlab.com/samba-team/samba/-/merge_requests/3715 BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Anoop C S Reviewed-by: Guenther Deschner Autobuild-User(master): Anoop C S Autobuild-Date(master): Mon Aug 5 16:06:47 UTC 2024 on atb-devel-224 (cherry picked from commit de2f76fa47e6e672ce353ea9d3dc4019965c6491) --- source3/modules/vfs_ceph_new.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 3c82730f87c..99d4a1fe407 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -169,23 +169,6 @@ static char *cephmount_get_cookie(TALLOC_CTX * mem_ctx, const int snum) fsname); } -static int cephmount_select_fs(struct ceph_mount_info *mnt, const char *fsname) -{ - /* - * ceph_select_filesystem was added in ceph 'nautilus' (v14). - * Earlier versions of libcephfs will lack that API function. - * At the time of this writing (Feb 2023) all versions of ceph - * supported by ceph upstream have this function. - */ -#if defined(HAVE_CEPH_SELECT_FILESYSTEM) - DBG_DEBUG("[CEPH] calling: ceph_select_filesystem with %s\n", fsname); - return ceph_select_filesystem(mnt, fsname); -#else - DBG_ERR("[CEPH] ceph_select_filesystem not available\n"); - return -ENOTSUP; -#endif -} - static struct ceph_mount_info *cephmount_mount_fs(const int snum) { int ret; @@ -235,7 +218,7 @@ static struct ceph_mount_info *cephmount_mount_fs(const int snum) * 'pacific'. Permit different shares to access different file systems. */ if (fsname != NULL) { - ret = cephmount_select_fs(mnt, fsname); + ret = ceph_select_filesystem(mnt, fsname); if (ret < 0) { goto err_cm_release; } -- 2.46.0 From 535e1216e502a0dfb2288fdbe7cfd78be0fa7c5b Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 30 Jul 2024 09:55:44 +0300 Subject: [PATCH 31/34] vfs_ceph{_new}: do not set errno upon successful call to libcephfs There is code in Samba that expects errno from a previous system call to be preserved through a subsequent system call. Thus, avoid setting "errno = 0" in status_code() and lstatus_code() upon successful return from libcephfs API call. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S (cherry picked from commit a7f4e2bd47c7f4728f3ac8d90af693156a69c557) --- source3/modules/vfs_ceph_new.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index 99d4a1fe407..c11f5f24616 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -66,7 +66,6 @@ static int status_code(int ret) errno = -ret; return -1; } - errno = 0; return ret; } @@ -76,7 +75,6 @@ static ssize_t lstatus_code(intmax_t ret) errno = -((int)ret); return -1; } - errno = 0; return (ssize_t)ret; } -- 2.46.0 From 505cdc10fb9f174d67e399aca96d13d8ffef7c7c Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Tue, 30 Jul 2024 17:36:09 +0300 Subject: [PATCH 32/34] vfs_ceph_new: handle errno properly for 'readdir' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take special care for readdir errno setting: in case of error, update errno by libcephfs (and protect from possible over-write by debug logging); in the case of successful result or end-of-stream restore errno to its previous value before calling the readdir_fn VFS hook. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Guenther Deschner Reviewed-by: Anoop C S Autobuild-User(master): Günther Deschner Autobuild-Date(master): Wed Aug 7 14:20:02 UTC 2024 on atb-devel-224 (cherry picked from commit aa043a5808b73fc272de585c1446372fa3f21d08) --- source3/modules/vfs_ceph_new.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index c11f5f24616..cf7e6b121db 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -1482,19 +1482,20 @@ static struct dirent *vfs_ceph_readdir(struct vfs_handle_struct *handle, { const struct vfs_ceph_fh *dircfh = (const struct vfs_ceph_fh *)dirp; struct dirent *result = NULL; - int errval = 0; + int saved_errno = errno; DBG_DEBUG("[CEPH] readdir(%p, %p)\n", handle, dirp); + errno = 0; result = vfs_ceph_ll_readdir(handle, dircfh); - errval = errno; - if ((result == NULL) && (errval != 0)) { - DBG_DEBUG("[CEPH] readdir(...) = %d\n", errval); + if ((result == NULL) && (errno != 0)) { + saved_errno = errno; + DBG_DEBUG("[CEPH] readdir(...) = %d\n", errno); } else { DBG_DEBUG("[CEPH] readdir(...) = %p\n", result); } - /* re-assign errno to avoid possible over-write by DBG_DEBUG */ - errno = errval; + + errno = saved_errno; return result; } -- 2.46.0 From 9316909a34da71727f5bb70a0891d8b7ee023f95 Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 5 Aug 2024 16:21:10 +0300 Subject: [PATCH 33/34] vfs_ceph_new: use 'ceph_new' for config-param prefix Use explicit 'ceph_new' prefix to each of the ceph specific config parameters to avoid confusion with legacy 'vfs_ceph' module. Hence, users will have in their smb.conf a format similar to: ... [smbshare] vfs objects = ceph_new ceph_new: config_file = /etc/ceph/ceph.conf ceph_new: user_id = user1 ceph_new: filesystem = fs1 ... BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Anoop C S Reviewed-by: Guenther Deschner (cherry picked from commit aca4cf8327dcaef782dedd98a63a020469c45cdb) --- source3/modules/vfs_ceph_new.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/source3/modules/vfs_ceph_new.c b/source3/modules/vfs_ceph_new.c index cf7e6b121db..25e78444fb5 100644 --- a/source3/modules/vfs_ceph_new.c +++ b/source3/modules/vfs_ceph_new.c @@ -159,10 +159,11 @@ static int cephmount_cache_remove(struct cephmount_cached *entry) static char *cephmount_get_cookie(TALLOC_CTX * mem_ctx, const int snum) { const char *conf_file = - lp_parm_const_string(snum, "ceph", "config_file", "."); - const char *user_id = lp_parm_const_string(snum, "ceph", "user_id", ""); + lp_parm_const_string(snum, "ceph_new", "config_file", "."); + const char *user_id = + lp_parm_const_string(snum, "ceph_new", "user_id", ""); const char *fsname = - lp_parm_const_string(snum, "ceph", "filesystem", ""); + lp_parm_const_string(snum, "ceph_new", "filesystem", ""); return talloc_asprintf(mem_ctx, "(%s/%s/%s)", conf_file, user_id, fsname); } @@ -174,11 +175,11 @@ static struct ceph_mount_info *cephmount_mount_fs(const int snum) struct ceph_mount_info *mnt = NULL; /* if config_file and/or user_id are NULL, ceph will use defaults */ const char *conf_file = - lp_parm_const_string(snum, "ceph", "config_file", NULL); + lp_parm_const_string(snum, "ceph_new", "config_file", NULL); const char *user_id = - lp_parm_const_string(snum, "ceph", "user_id", NULL); + lp_parm_const_string(snum, "ceph_new", "user_id", NULL); const char *fsname = - lp_parm_const_string(snum, "ceph", "filesystem", NULL); + lp_parm_const_string(snum, "ceph_new", "filesystem", NULL); DBG_DEBUG("[CEPH] calling: ceph_create\n"); ret = ceph_create(&mnt, user_id); -- 2.46.0 From 296d0833a595e6b17a653869ecea823c1e84a19c Mon Sep 17 00:00:00 2001 From: Shachar Sharon Date: Mon, 5 Aug 2024 19:12:29 +0300 Subject: [PATCH 34/34] docs-xml/manpages: 'ceph_new' prefix for config-param of vfs_ceph_new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 'ceph_new' prefix used by vfs_ceph_new for config parameters, update the relevant man-page accordingly. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15686 Signed-off-by: Shachar Sharon Reviewed-by: Anoop C S Reviewed-by: Guenther Deschner Autobuild-User(master): Günther Deschner Autobuild-Date(master): Thu Aug 8 13:54:34 UTC 2024 on atb-devel-224 (cherry picked from commit 68f0835c8e1c5029cd831c267b75c02185b206c7) --- docs-xml/manpages/vfs_ceph_new.8.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs-xml/manpages/vfs_ceph_new.8.xml b/docs-xml/manpages/vfs_ceph_new.8.xml index 7a65b965ce0..b0640a591a5 100644 --- a/docs-xml/manpages/vfs_ceph_new.8.xml +++ b/docs-xml/manpages/vfs_ceph_new.8.xml @@ -111,20 +111,20 @@ - ceph:config_file = path + ceph_new:config_file = path Allows one to define a ceph configfile to use. Empty by default. - Example: ceph:config_file = + Example: ceph_new:config_file = /etc/ceph/ceph.conf - ceph:user_id = name + ceph_new:user_id = name Allows one to explicitly set the client ID used for the @@ -132,13 +132,13 @@ client default). - Example: ceph:user_id = samba + Example: ceph_new:user_id = samba - ceph:filesystem = fs_name + ceph_new:filesystem = fs_name Allows one to explicitly select the CephFS file system @@ -147,7 +147,7 @@ system of the Ceph cluster). - Example: ceph:filesystem = myfs2 + Example: ceph_new:filesystem = myfs2 -- 2.46.0