Skip to content

Commit

Permalink
Fix BLAKE3 tuneable and module loading on Linux and FreeBSD
Browse files Browse the repository at this point in the history
Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl.

The zfs module parameter on Linux changes from icp_blake3_impl to
zfs_blake3_impl.

You can check and set it on Linux via sysfs like this:
```
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle [fastest] generic sse2 sse41 avx2

[bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic [sse2] sse41 avx2
```

The modprobe module parameters may also be used now:
```
[bash]# modprobe zfs zfs_blake3_impl=sse41
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic sse2 [sse41] avx2
```

On FreeBSD the BLAKE3 implementation can be set via sysctl like this:
```
[bsd]# sysctl vfs.zfs.blake3_impl
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2
[bsd]# sysctl vfs.zfs.blake3_impl=sse2
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \
  -> cycle fastest generic [sse2] sse41 avx2
```

This commit changes also some Blake3 internals like these:
- blake3_impl_ops_t was renamed to blake3_ops_t
- all functions are named blake3_impl_NAME() now

Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
  • Loading branch information
mcmilk and Ryan Moeller committed Aug 5, 2022
1 parent 9681de4 commit 0fec6cc
Show file tree
Hide file tree
Showing 10 changed files with 267 additions and 191 deletions.
4 changes: 2 additions & 2 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -6413,7 +6413,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
void *res2 = &zc_res2;

/* BLAKE3_KEY_LEN = 32 */
VERIFY0(blake3_set_impl_name("generic"));
VERIFY0(blake3_impl_setname("generic"));
templ = abd_checksum_blake3_tmpl_init(&salt);
Blake3_InitKeyed(&ctx, salt_ptr);
Blake3_Update(&ctx, buf, size);
Expand All @@ -6422,7 +6422,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
ZIO_CHECKSUM_BSWAP(&zc_ref2);
abd_checksum_blake3_tmpl_free(templ);

VERIFY0(blake3_set_impl_name("cycle"));
VERIFY0(blake3_impl_setname("cycle"));
while (run_count-- > 0) {

/* Test current implementation */
Expand Down
7 changes: 3 additions & 4 deletions include/os/freebsd/spl/sys/mod_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@

#include <sys/sysctl.h>

#define EXPORT_SYMBOL(x)
#define module_param(a, b, c)
#define MODULE_PARM_DESC(a, b)

#define ZMOD_RW CTLFLAG_RWTUN
#define ZMOD_RD CTLFLAG_RDTUN

Expand Down Expand Up @@ -92,6 +88,9 @@
#define fletcher_4_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"

#define blake3_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, blake3_param, "A"

#include <sys/kernel.h>
#define module_init(fn) \
static void \
Expand Down
23 changes: 10 additions & 13 deletions include/sys/blake3.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ typedef struct {
*/
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];

/* const blake3_impl_ops_t *ops */
/* const blake3_ops_t *ops */
const void *ops;
} BLAKE3_CTX;

Expand All @@ -97,26 +97,23 @@ extern void **blake3_per_cpu_ctx;
extern void blake3_per_cpu_ctx_init(void);
extern void blake3_per_cpu_ctx_fini(void);

/* return number of supported implementations */
extern int blake3_get_impl_count(void);
/* get count of supported implementations */
extern uint32_t blake3_impl_getcnt(void);

/* return id of selected implementation */
extern int blake3_get_impl_id(void);
/* get id of selected implementation */
extern uint32_t blake3_impl_getid(void);

/* return name of selected implementation */
extern const char *blake3_get_impl_name(void);
/* get name of selected implementation */
extern const char *blake3_impl_getname(void);

/* setup id as fastest implementation */
extern void blake3_set_impl_fastest(uint32_t id);
extern void blake3_impl_set_fastest(uint32_t id);

/* set implementation by id */
extern void blake3_set_impl_id(uint32_t id);
extern void blake3_impl_setid(uint32_t id);

/* set implementation by name */
extern int blake3_set_impl_name(const char *name);

/* set startup implementation */
extern void blake3_setup_impl(void);
extern int blake3_impl_setname(const char *name);

#ifdef __cplusplus
}
Expand Down
14 changes: 7 additions & 7 deletions module/icp/algs/blake3/blake3.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8],
* bytes. For that reason, chaining values in the CV stack are represented as
* bytes.
*/
static void output_chaining_value(const blake3_impl_ops_t *ops,
static void output_chaining_value(const blake3_ops_t *ops,
const output_t *ctx, uint8_t cv[32])
{
uint32_t cv_words[8];
Expand All @@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops,
store_cv_words(cv, cv_words);
}

static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
uint64_t seek, uint8_t *out, size_t out_len)
{
uint64_t output_block_counter = seek / 64;
Expand All @@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
}
}

static void chunk_state_update(const blake3_impl_ops_t *ops,
static void chunk_state_update(const blake3_ops_t *ops,
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
{
if (ctx->buf_len > 0) {
Expand Down Expand Up @@ -230,7 +230,7 @@ static size_t left_len(size_t content_len)
* number of chunks hashed. These chunks are never the root and never empty;
* those cases use a different codepath.
*/
static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
static size_t compress_chunks_parallel(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{
Expand Down Expand Up @@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
* return it as an additional output.) These parents are never the root and
* never empty; those cases use a different codepath.
*/
static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
static size_t compress_parents_parallel(const blake3_ops_t *ops,
const uint8_t *child_chaining_values, size_t num_chaining_values,
const uint32_t key[8], uint8_t flags, uint8_t *out)
{
Expand Down Expand Up @@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
* of implementing this special rule? Because we don't want to limit SIMD or
* multi-threading parallelism for that update().
*/
static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{
Expand Down Expand Up @@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
* As with compress_subtree_wide(), this function is not used on inputs of 1
* chunk or less. That's a different codepath.
*/
static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
{
Expand Down
2 changes: 1 addition & 1 deletion module/icp/algs/blake3/blake3_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ static inline boolean_t blake3_is_generic_supported(void)
return (B_TRUE);
}

const blake3_impl_ops_t blake3_generic_impl = {
const blake3_ops_t blake3_generic_impl = {
.compress_in_place = blake3_compress_in_place_generic,
.compress_xof = blake3_compress_xof_generic,
.hash_many = blake3_hash_many_generic,
Expand Down
Loading

0 comments on commit 0fec6cc

Please sign in to comment.