From cb8b6a8b8ea983844584d8ada4d9aa4c88c997fb Mon Sep 17 00:00:00 2001 From: Alastair Houghton Date: Tue, 29 Dec 2020 14:02:39 +0000 Subject: [PATCH] cifs.upcall: try to use container ipc/uts/net/pid/mnt/user namespaces In certain scenarios (e.g. kerberos multimount), when a process does syscalls, the kernel sometimes has to query information or trigger some actions in userspace. To do so it calls the cifs.upcall binary with information on the process that triggered the syscall in the first place. ls(pid=10) ====> open("foo") ====> kernel that user doesn't have an SMB session, lets create one using his kerberos credential cache call cifs.upcall and ask for krb info for whoever owns pid=10 | cifs.upcall --pid 10 <=================+ ...gather info... return binary blob used when establishing SMB session ===================> kernel open SMB session, handle open() syscall ls <=================================== return open() result to ls On a system using containers, the kernel is still calling the host cifs.upcall and using the host configuration (for network, pid, etc). This patch changes the behaviour of cifs.upcall so that it uses the calling process namespaces (ls in the example) when doing its job. Note that the kernel still calls the binary in the host, but the binary will place itself the contexts of the calling process namespaces. This code makes use of (but shouldn't require) the following kernel config options and syscall flags: approx. year | introduced | config/flags ---------------+---------------- 2008 | CONFIG_NAMESPACES=y 2007 | CONFIG_UTS_NS=y 2020 | CONFIG_TIME_NS=y 2006 | CONFIG_IPC_NS=y 2007 | CONFIG_USER_NS 2008 | CONFIG_PID_NS=y 2007 | CONFIG_NET_NS=y 2007 | CONFIG_CGROUPS 2016 | CLONE_NEWCGROUP setns() flag Signed-off-by: Aurelien Aptel Signed-off-by: Alastair Houghton --- cifs.upcall.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/cifs.upcall.c b/cifs.upcall.c index 1559434..141dc66 100644 --- a/cifs.upcall.c +++ b/cifs.upcall.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "data_blob.h" #include "spnego.h" @@ -230,6 +231,164 @@ err_cache: return credtime; } +static struct namespace_file { + int nstype; + const char *name; + int fd; +} namespace_files[] = { + +#ifdef CLONE_NEWCGROUP + { CLONE_NEWCGROUP, "cgroup", -1 }, +#endif + +#ifdef CLONE_NEWIPC + { CLONE_NEWIPC, "ipc", -1 }, +#endif + +#ifdef CLONE_NEWUTS + { CLONE_NEWUTS, "uts", -1 }, +#endif + +#ifdef CLONE_NEWNET + { CLONE_NEWNET, "net", -1 }, +#endif + +#ifdef CLONE_NEWPID + { CLONE_NEWPID, "pid", -1 }, +#endif + +#ifdef CLONE_NEWTIME + { CLONE_NEWTIME, "time", -1 }, +#endif + +#ifdef CLONE_NEWNS + { CLONE_NEWNS, "mnt", -1 }, +#endif + +#ifdef CLONE_NEWUSER + { CLONE_NEWUSER, "user", -1 }, +#endif +}; + +#define NS_PATH_FMT "/proc/%d/ns/%s" +#define NS_PATH_MAXLEN (6 + 10 + 4 + 6 + 1) + +/** + * in_same_user_ns - return true if two processes are in the same user + * namespace. + * @pid_a: the pid of the first process + * @pid_b: the pid of the second process + * + * Works by comparing the inode numbers for /proc//user. + */ +static int +in_same_user_ns(pid_t pid_a, pid_t pid_b) +{ + char path[NS_PATH_MAXLEN]; + ino_t a_ino, b_ino; + struct stat st; + + snprintf(path, sizeof(path), NS_PATH_FMT, pid_a, "user"); + if (stat(path, &st) != 0) + return 0; + a_ino = st.st_ino; + + snprintf(path, sizeof(path), NS_PATH_FMT, pid_b, "user"); + if (stat(path, &st) != 0) + return 0; + b_ino = st.st_ino; + + return a_ino == b_ino; +} + +/** + * switch_to_process_ns - change the namespace to the one for the specified + * process. + * @pid: initiating pid value from the upcall string + * + * Uses setns() to switch process namespace. + * This ensures that we have the same access and configuration as the + * process that triggered the lookup. + */ +static int +switch_to_process_ns(pid_t pid) +{ + int count = sizeof(namespace_files) / sizeof(struct namespace_file); + int n, err = 0; + int rc = 0; + + /* First, open all the namespace fds. We do this first because + the namespace changes might prohibit us from opening them. */ + for (n = 0; n < count; ++n) { + char nspath[NS_PATH_MAXLEN]; + int ret, fd; + +#ifdef CLONE_NEWUSER + if (namespace_files[n].nstype == CLONE_NEWUSER + && in_same_user_ns(getpid(), pid)) { + /* Switching to the same user namespace is forbidden, + because switching to a user namespace grants all + capabilities in that namespace regardless of uid. */ + namespace_files[n].fd = -1; + continue; + } +#endif + + ret = snprintf(nspath, NS_PATH_MAXLEN, NS_PATH_FMT, + pid, namespace_files[n].name); + if (ret >= NS_PATH_MAXLEN) { + syslog(LOG_DEBUG, "%s: unterminated path!\n", __func__); + err = ENAMETOOLONG; + rc = -1; + goto out; + } + + fd = open(nspath, O_RDONLY); + if (fd < 0 && errno != ENOENT) { + /* + * don't stop on non-existing ns + * but stop for other errors + */ + err = errno; + rc = -1; + goto out; + } + + namespace_files[n].fd = fd; + } + + /* Next, call setns for each of them */ + for (n = 0; n < count; ++n) { + /* skip non-existing ns */ + if (namespace_files[n].fd < 0) + continue; + + rc = setns(namespace_files[n].fd, namespace_files[n].nstype); + + if (rc < 0) { + syslog(LOG_DEBUG, "%s: setns() failed for %s\n", + __func__, namespace_files[n].name); + err = errno; + goto out; + } + } + +out: + /* Finally, close all the fds */ + for (n = 0; n < count; ++n) { + if (namespace_files[n].fd != -1) { + close(namespace_files[n].fd); + namespace_files[n].fd = -1; + } + } + + if (rc != 0) { + errno = err; + } + + return rc; +} + #define ENV_PATH_FMT "/proc/%d/environ" #define ENV_PATH_MAXLEN (6 + 10 + 8 + 1) @@ -1099,6 +1258,19 @@ int main(const int argc, char *const argv[]) env_cachename = get_cachename_from_process_env(env_probe ? arg.pid : 0); + /* + * Change to the process's namespace. This means that things will work + * acceptably in containers, because we'll be looking at the correct + * filesystem and have the correct network configuration. + */ + rc = switch_to_process_ns(arg.pid); + if (rc == -1) { + syslog(LOG_ERR, "unable to switch to process namespace: %s", + strerror(errno)); + rc = 1; + goto out; + } + rc = setuid(uid); if (rc == -1) { syslog(LOG_ERR, "setuid: %s", strerror(errno));