Skip to content

Commit

Permalink
Add Linux namespace delegation support
Browse files Browse the repository at this point in the history
This allows ZFS datasets to be delegated to a user/mount namespace
Within that namespace, only the delegated datasets are visible
Works very similarly to Zones/Jailes on other ZFS OSes

As a user:
```
 $ unshare -Um
 $ zfs list
no datasets available
 $ echo $$
1234
```

As root:
```
 # zfs list
NAME                            ZONED  MOUNTPOINT
containers                      off    /containers
containers/host                 off    /containers/host
containers/host/child           off    /containers/host/child
containers/host/child/gchild    off    /containers/host/child/gchild
containers/unpriv               on     /unpriv
containers/unpriv/child         on     /unpriv/child
containers/unpriv/child/gchild  on     /unpriv/child/gchild

 # zfs zone /proc/1234/ns/user containers/unpriv
```

Back to the user namespace:
```
 $ zfs list
NAME                             USED  AVAIL     REFER  MOUNTPOINT
containers                       129M  47.8G       24K  /containers
containers/unpriv                128M  47.8G       24K  /unpriv
containers/unpriv/child          128M  47.8G      128M  /unpriv/child
```

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Will Andrews <will.andrews@klarasystems.com>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Mateusz Piotrowski <mateusz.piotrowski@klarasystems.com>
Co-authored-by: Allan Jude <allan@klarasystems.com>
Co-authored-by: Mateusz Piotrowski <mateusz.piotrowski@klarasystems.com>
Sponsored-by: Buddy <https://buddy.works>
Closes openzfs#12263
  • Loading branch information
Will Andrews authored and andrewc12 committed Sep 23, 2022
1 parent 67c5c02 commit 2d286df
Show file tree
Hide file tree
Showing 33 changed files with 1,166 additions and 15 deletions.
60 changes: 60 additions & 0 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
#endif

#ifdef __linux__
static int zfs_do_zone(int argc, char **argv);
static int zfs_do_unzone(int argc, char **argv);
#endif

/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
*/
Expand Down Expand Up @@ -184,6 +189,8 @@ typedef enum {
HELP_JAIL,
HELP_UNJAIL,
HELP_WAIT,
HELP_ZONE,
HELP_UNZONE,
} zfs_help_t;

typedef struct zfs_command {
Expand Down Expand Up @@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
#endif

#ifdef __linux__
{ "zone", zfs_do_zone, HELP_ZONE },
{ "unzone", zfs_do_unzone, HELP_UNZONE },
#endif
};

#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
Expand Down Expand Up @@ -415,6 +427,10 @@ get_usage(zfs_help_t idx)
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
case HELP_ZONE:
return (gettext("\tzone <nsfile> <filesystem>\n"));
case HELP_UNZONE:
return (gettext("\tunzone <nsfile> <filesystem>\n"));
default:
__builtin_unreachable();
}
Expand Down Expand Up @@ -8692,6 +8708,50 @@ main(int argc, char **argv)
return (ret);
}

/*
* zfs zone nsfile filesystem
*
* Add or delete the given dataset to/from the namespace.
*/
#ifdef __linux__
static int
zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
{
zfs_handle_t *zhp;
int ret;

if (argc < 3) {
(void) fprintf(stderr, gettext("missing argument(s)\n"));
usage(B_FALSE);
}
if (argc > 3) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}

zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
if (zhp == NULL)
return (1);

ret = (zfs_userns(zhp, argv[1], attach) != 0);

zfs_close(zhp);
return (ret);
}

static int
zfs_do_zone(int argc, char **argv)
{
return (zfs_do_zone_impl(argc, argv, B_TRUE));
}

static int
zfs_do_unzone(int argc, char **argv)
{
return (zfs_do_zone_impl(argc, argv, B_FALSE));
}
#endif

#ifdef __FreeBSD__
#include <sys/jail.h>
#include <jail.h>
Expand Down
23 changes: 23 additions & 0 deletions config/kernel-user-ns-inum.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dnl #
dnl # 3.18 API change
dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
#include <linux/user_namespace.h>
], [
struct user_namespace uns;
uns.ns.inum = 0;
])
])

AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
[user_namespace->ns.inum exists])
],[
AC_MSG_RESULT(no)
])
])
2 changes: 2 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_KTHREAD
ZFS_AC_KERNEL_SRC_ZERO_PAGE
ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
Expand Down Expand Up @@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_KTHREAD
ZFS_AC_KERNEL_ZERO_PAGE
ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
])

dnl #
Expand Down
1 change: 1 addition & 0 deletions contrib/pyzfs/libzfs_core/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def enum(*sequential, **named):
'ZFS_ERR_REBUILD_IN_PROGRESS',
'ZFS_ERR_BADPROP',
'ZFS_ERR_VDEV_NOTSUP',
'ZFS_ERR_NOT_USER_NAMESPACE',
],
{}
)
Expand Down
10 changes: 10 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ typedef enum zfs_error {
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */
EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */
EZFS_UNKNOWN
} zfs_error_t;

Expand Down Expand Up @@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,

#endif /* __FreeBSD__ */

#ifdef __linux__

/*
* Add or delete the given filesystem to/from the given user namespace.
*/
_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach);

#endif

#ifdef __cplusplus
}
#endif
Expand Down
31 changes: 27 additions & 4 deletions include/os/linux/spl/sys/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,34 @@
#define _SPL_ZONE_H

#include <sys/byteorder.h>
#include <sys/cred.h>

#define GLOBAL_ZONEID 0
#include <linux/cred.h>
#include <linux/user_namespace.h>

#define zone_dataset_visible(x, y) (1)
#define crgetzoneid(x) (GLOBAL_ZONEID)
#define INGLOBALZONE(z) (1)
/*
* Attach the given dataset to the given user namespace.
*/
extern int zone_dataset_attach(cred_t *, const char *, int);

/*
* Detach the given dataset from the given user namespace.
*/
extern int zone_dataset_detach(cred_t *, const char *, int);

/*
* Returns true if the named pool/dataset is visible in the current zone.
*/
extern int zone_dataset_visible(const char *dataset, int *write);

int spl_zone_init(void);
void spl_zone_fini(void);

extern unsigned int crgetzoneid(const cred_t *);
extern unsigned int global_zoneid(void);
extern boolean_t inglobalzone(proc_t *);

#define INGLOBALZONE(x) inglobalzone(x)
#define GLOBAL_ZONEID global_zoneid()

#endif /* SPL_ZONE_H */
3 changes: 3 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,9 @@ typedef enum zfs_ioc {
ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */
ZFS_IOC_SET_BOOTENV, /* 0x87 */
ZFS_IOC_GET_BOOTENV, /* 0x88 */
ZFS_IOC_UNREGISTER_FS, /* 0x89 (Windows) */
Expand Down Expand Up @@ -1555,6 +1557,7 @@ typedef enum {
ZFS_ERR_REBUILD_IN_PROGRESS,
ZFS_ERR_BADPROP,
ZFS_ERR_VDEV_NOTSUP,
ZFS_ERR_NOT_USER_NAMESPACE,
} zfs_errno_t;

/*
Expand Down
2 changes: 1 addition & 1 deletion lib/libspl/include/sys/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#include <inttypes.h>
#endif /* HAVE_INTTYPES */

typedef int zoneid_t;
typedef uint_t zoneid_t;
typedef int projid_t;

/*
Expand Down
12 changes: 11 additions & 1 deletion lib/libspl/include/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,17 @@
extern "C" {
#endif

#define GLOBAL_ZONEID 0
#ifdef __FreeBSD__
#define GLOBAL_ZONEID 0
#else
/*
* Hardcoded in the kernel's root user namespace. A "better" way to get
* this would be by using ioctl_ns(2), but this would need to be performed
* recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
* supported since Linux 4.9.
*/
#define GLOBAL_ZONEID 4026531837U
#endif

extern zoneid_t getzoneid(void);

Expand Down
32 changes: 31 additions & 1 deletion lib/libspl/os/linux/zone.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,40 @@
* Use is subject to license terms.
*/

#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>

#include <zone.h>

zoneid_t
getzoneid(void)
{
return (GLOBAL_ZONEID);
char path[PATH_MAX];
char buf[128] = { '\0' };
char *cp;

int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
/* This API doesn't have any error checking... */
if (c < 0)
return (0);

ssize_t r = readlink(path, buf, sizeof (buf) - 1);
if (r < 0)
return (0);

cp = strchr(buf, '[');
if (cp == NULL)
return (0);
cp++;

unsigned long n = strtoul(cp, NULL, 10);
if (n == ULONG_MAX && errno == ERANGE)
return (0);
zoneid_t z = (zoneid_t)n;

return (z);
}
2 changes: 1 addition & 1 deletion lib/libuutil/libuutil.abi
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
Expand Down
9 changes: 8 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@
<elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -1537,7 +1538,7 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libspl/os/linux/zone.c' language='LANG_C99'>
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
Expand Down Expand Up @@ -4414,6 +4415,12 @@
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
<return type-id='26a90f95'/>
</function-decl>
<function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
<parameter type-id='9200a744' name='zhp'/>
<parameter type-id='80f4b756' name='nspath'/>
<parameter type-id='95e97e5e' name='attach'/>
<return type-id='95e97e5e'/>
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='lib/libzutil/os/linux/zutil_device_path_os.c' language='LANG_C99'>
<function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
Expand Down
6 changes: 6 additions & 0 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_VDEV_NOTSUP:
return (dgettext(TEXT_DOMAIN, "operation not supported "
"on this type of vdev"));
case EZFS_NOT_USER_NAMESPACE:
return (dgettext(TEXT_DOMAIN, "the provided file "
"was not a user namespace file"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
Expand Down Expand Up @@ -484,6 +487,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_BADPROP:
zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
break;
case ZFS_ERR_NOT_USER_NAMESPACE:
zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
break;
default:
zfs_error_aux(hdl, "%s", strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
Expand Down
Loading

0 comments on commit 2d286df

Please sign in to comment.