Skip to content

Commit

Permalink
Add Linux namespace delegation support
Browse files Browse the repository at this point in the history
This allows ZFS datasets to be delegated to a user/mount namespace
Within that namespace, only the delegated datasets are visible
Works very similarly to Zones/Jailes on other ZFS OSes

As a user:
```
$ unshare -Um
$ zfs list
no datasets available
$ readlink /proc/$$/ns/user
user:[4026532291]
```

As root:
```
NAME                            ZONED  MOUNTPOINT
containers                      off    /containers
containers/host                 off    /containers/host
containers/host/child           off    /containers/host/child
containers/host/child/gchild    off    /containers/host/child/gchild
containers/unpriv               on     /unpriv
containers/unpriv/child         on     /unpriv/child
containers/unpriv/child/gchild  on     /unpriv/child/gchild

```

Back to the user:
```
$ zfs list
NAME                             USED  AVAIL     REFER  MOUNTPOINT
containers                       129M  47.8G       24K  /containers
containers/unpriv                128M  47.8G       24K  /unpriv
containers/unpriv/child          128M  47.8G      128M  /unpriv/child
```

Signed-off-by: Will Andrews <will.andrews@klarasystems.com>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Sponsored-by: Buddy <https://buddy.works>
  • Loading branch information
Will Andrews authored and allanjude committed Jun 21, 2021
1 parent ba91311 commit 03dc659
Show file tree
Hide file tree
Showing 21 changed files with 705 additions and 11 deletions.
60 changes: 60 additions & 0 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
#endif

#ifdef __linux__
static int zfs_do_userns(int argc, char **argv);
#endif

/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
*/
Expand Down Expand Up @@ -184,6 +188,7 @@ typedef enum {
HELP_JAIL,
HELP_UNJAIL,
HELP_WAIT,
HELP_USERNS,
} zfs_help_t;

typedef struct zfs_command {
Expand Down Expand Up @@ -254,6 +259,10 @@ static zfs_command_t command_table[] = {
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
#endif

#ifdef __linux__
{ "userns", zfs_do_userns, HELP_USERNS },
#endif
};

#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
Expand Down Expand Up @@ -414,6 +423,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
case HELP_USERNS:
return (gettext("\tuserns <add|del> <nsnum> <filesystem>\n"));
}

abort();
Expand Down Expand Up @@ -8729,6 +8740,55 @@ main(int argc, char **argv)
return (ret);
}

/*
* zfs userns add|del nsnum filesystem
*
* Add or delete the given dataset to/from the namespace.
*/
#ifdef __linux__
static int
zfs_do_userns(int argc, char **argv)
{
zfs_handle_t *zhp;
unsigned long nsnum;
int ret;
int attach;

if (argc < 4) {
(void) fprintf(stderr, gettext("missing argument(s)\n"));
usage(B_FALSE);
}
if (argc > 4) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}

if (strcmp(argv[1], "add") == 0) {
attach = 1;
} else if (strcmp(argv[1], "del") == 0) {
attach = 0;
} else {
(void) fprintf(stderr, gettext("invalid subcommand\n"));
usage(B_FALSE);
}

nsnum = strtoul(argv[2], NULL, 10);
if (nsnum > UINT_MAX) {
(void) fprintf(stderr, gettext("invalid namespace number\n"));
usage(B_FALSE);
}

zhp = zfs_open(g_zfs, argv[3], ZFS_TYPE_FILESYSTEM);
if (zhp == NULL)
return (1);

ret = (zfs_userns(zhp, (unsigned int)nsnum, attach) != 0);

zfs_close(zhp);
return (ret);
}
#endif

#ifdef __FreeBSD__
#include <sys/jail.h>
#include <jail.h>
Expand Down
23 changes: 23 additions & 0 deletions config/kernel-user-ns-inum.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dnl #
dnl # 3.18 API change
dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
#include <linux/user_namespace.h>
], [
struct user_namespace uns;
uns.ns.inum = 0;
])
])

AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
[user_namespace->ns.inum exists])
],[
AC_MSG_RESULT(no)
])
])
2 changes: 2 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_SIGNAL_STOP
ZFS_AC_KERNEL_SRC_SIGINFO
ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
Expand Down Expand Up @@ -237,6 +238,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_SIGNAL_STOP
ZFS_AC_KERNEL_SIGINFO
ZFS_AC_KERNEL_SET_SPECIAL_STATE
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
])

dnl #
Expand Down
9 changes: 9 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,

#endif /* __FreeBSD__ */

#ifdef __linux__

/*
* Add or delete the given filesystem to/from the given user namespace.
*/
extern int zfs_userns(zfs_handle_t *zhp, unsigned int nsnum, int attach);

#endif

#ifdef __cplusplus
}
#endif
Expand Down
31 changes: 27 additions & 4 deletions include/os/linux/spl/sys/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,34 @@
#define _SPL_ZONE_H

#include <sys/byteorder.h>
#include <sys/cred.h>

#define GLOBAL_ZONEID 0
#include <linux/cred.h>
#include <linux/user_namespace.h>

#define zone_dataset_visible(x, y) (1)
#define crgetzoneid(x) (GLOBAL_ZONEID)
#define INGLOBALZONE(z) (1)
/*
* Attach the given dataset to the given user namespace.
*/
extern int zone_dataset_attach(cred_t *, const char *, unsigned int);

/*
* Detach the given dataset from the given user namespace.
*/
extern int zone_dataset_detach(cred_t *, const char *, unsigned int);

/*
* Returns true if the named pool/dataset is visible in the current zone.
*/
extern int zone_dataset_visible(const char *dataset, int *write);

int spl_zone_init(void);
void spl_zone_fini(void);

extern unsigned int crgetzoneid(const cred_t *);
extern unsigned int global_zoneid(void);
extern boolean_t inglobalzone(proc_t *);

#define INGLOBALZONE(x) inglobalzone(x)
#define GLOBAL_ZONEID global_zoneid()

#endif /* SPL_ZONE_H */
2 changes: 1 addition & 1 deletion lib/libspl/include/sys/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#include <inttypes.h>
#endif /* HAVE_INTTYPES */

typedef int zoneid_t;
typedef uint zoneid_t;
typedef int projid_t;

/*
Expand Down
8 changes: 7 additions & 1 deletion lib/libspl/include/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@
extern "C" {
#endif

#define GLOBAL_ZONEID 0
/*
* Hardcoded in the kernel's root user namespace. A "better" way to get
* this would be by using ioctl_ns(2), but this would need to be performed
* recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
* supported since Linux 4.9.
*/
#define GLOBAL_ZONEID 4026531837U

extern zoneid_t getzoneid(void);

Expand Down
41 changes: 40 additions & 1 deletion lib/libspl/os/linux/zone.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,49 @@
* Use is subject to license terms.
*/

#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>

#include <zone.h>

zoneid_t
getzoneid()
{
return (GLOBAL_ZONEID);
zoneid_t z = 0;
char path[PATH_MAX];
char buf[128] = { '\0' };
char *cp, *cp_end;
unsigned long n;
int c;
ssize_t r;

c = snprintf(path, sizeof (path), "/proc/%d/ns/user", getpid());
/* This API doesn't have any error checking... */
if ((size_t)c >= sizeof (path))
goto out;

r = readlink(path, buf, sizeof (buf) - 1);
if ((size_t)r >= sizeof (buf))
goto out;

cp = strchr(buf, '[');
if (cp == NULL)
goto out;
cp++;
cp_end = strchr(cp, ']');
if (cp_end == NULL)
goto out;

*cp_end = '\0';
n = strtoul(cp, NULL, 10);
if (errno == ERANGE)
goto out;
z = (zoneid_t)n;

out:
return (z);
}
53 changes: 53 additions & 0 deletions lib/libzfs/os/linux/libzfs_util_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,56 @@ zfs_version_kernel(char *version, int len)

return (0);
}

/*
* Add or delete the given filesystem to/from the given user namespace.
*/
int
zfs_userns(zfs_handle_t *zhp, unsigned int nsnum, int attach)
{
libzfs_handle_t *hdl = zhp->zfs_hdl;
zfs_cmd_t zc = {"\0"};
char errbuf[1024];
unsigned long cmd;
int ret;

if (attach) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
zhp->zfs_name);
} else {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
zhp->zfs_name);
}

switch (zhp->zfs_type) {
case ZFS_TYPE_VOLUME:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"volumes can not be namespaced"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case ZFS_TYPE_SNAPSHOT:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"snapshots can not be namespaced"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case ZFS_TYPE_BOOKMARK:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"bookmarks can not be namespaced"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case ZFS_TYPE_POOL:
case ZFS_TYPE_FILESYSTEM:
/* OK */
;
}
assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);

(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
zc.zc_objset_type = DMU_OST_ZFS;
zc.zc_zoneid = nsnum;

cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL;
if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
zfs_standard_error(hdl, errno, errbuf);

return (ret);
}
1 change: 1 addition & 0 deletions module/os/linux/spl/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ $(MODULE)-objs += ../os/linux/spl/spl-tsd.o
$(MODULE)-objs += ../os/linux/spl/spl-vmem.o
$(MODULE)-objs += ../os/linux/spl/spl-xdr.o
$(MODULE)-objs += ../os/linux/spl/spl-zlib.o
$(MODULE)-objs += ../os/linux/spl/spl-zone.o
6 changes: 6 additions & 0 deletions module/os/linux/spl/spl-generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -803,8 +803,13 @@ spl_init(void)
if ((rc = spl_zlib_init()))
goto out7;

if ((rc = spl_zone_init()))
goto out8;

return (rc);

out8:
spl_zlib_fini();
out7:
spl_kstat_fini();
out6:
Expand All @@ -824,6 +829,7 @@ spl_init(void)
static void __exit
spl_fini(void)
{
spl_zone_fini();
spl_zlib_fini();
spl_kstat_fini();
spl_proc_fini();
Expand Down
Loading

0 comments on commit 03dc659

Please sign in to comment.