From c197ee04c44516a1fe577477911c7c6edc3cc4c8 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 25 Jun 2024 20:48:32 -0400 Subject: [PATCH 1/6] Extra tests for some issues found --- croaring/tests/roaring64.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/croaring/tests/roaring64.rs b/croaring/tests/roaring64.rs index abcef9e..5eb4f2c 100644 --- a/croaring/tests/roaring64.rs +++ b/croaring/tests/roaring64.rs @@ -55,3 +55,19 @@ fn test_r64_cursor_reset() { assert_eq!(cursor.current(), Some(u64::MAX)); assert_eq!(cursor.next(), None); } + +#[test] +fn empty_intersect_with_range() { + let bitmap = Bitmap64::new(); + assert_eq!(0, bitmap.range_cardinality(0..1)); + bitmap.intersect_with_range(0..1); +} + +#[test] +fn empty_reset_iterator() { + let bitmap = Bitmap64::new(); + let mut iterator = bitmap.iter(); + assert_eq!(iterator.peek(), None); + iterator.reset_at_or_after(0); + assert_eq!(iterator.peek(), None); +} From 5c3aed9a66fe6b01cc524ce5fb3ab4bf9069ee0d Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Wed, 26 Jun 2024 22:39:13 -0400 Subject: [PATCH 2/6] Include the CRoaring version in readme --- .gitignore | 3 ++- Makefile | 9 ++++++++- README.md | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 083546c..1b58012 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ target .DS_Store -.idea \ No newline at end of file +.idea +/README.md.tmp \ No newline at end of file diff --git a/Makefile b/Makefile index 25d8c27..40814cc 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ test: croaring_release_url_base = /~https://github.com/RoaringBitmap/CRoaring/releases/download # Fetch the c source amalgamation from a tagged CRoaring release (like `make version=0.9.3 update_croaring`) -update_croaring: download_croaring bindgen +update_croaring: download_croaring bindgen update_readme_croaring_version download_croaring: rm -f '$(croaring_source)/roaring.c' '$(croaring_source)/roaring.h' '$(croaring_source)/roaring.hh' @@ -35,6 +35,13 @@ bindgen: -o bindgen_bundled_version.rs \ roaring.h + +# sed -i is a GNU extension, so we use a temporary file explicitly +update_readme_croaring_version: + @echo "Updating README.md with CRoaring version $(version)" + @sed -r -e 's_\[CRoaring version `[0-9]+\.[0-9]+\.[0-9]+`\]\([^\)]+\)_[CRoaring version `$(version)`](/~https://github.com/RoaringBitmap/CRoaring/releases/tag/v$(version))_' README.md > README.md.tmp + @mv README.md.tmp README.md + # Build a c program to (re)generate the example serialized files for testing test_serialization_files: cd croaring/tests/data/ && \ diff --git a/README.md b/README.md index 52dc988..2195aab 100644 --- a/README.md +++ b/README.md @@ -124,3 +124,10 @@ cargo bench ### Documentation Current documentation is available at https://docs.rs/croaring/latest/croaring/ + +## CRoaring Version + +This crate uses [CRoaring version `4.0.0`](/~https://github.com/RoaringBitmap/CRoaring/releases/tag/v4.0.0). +The version of this crate does not necessarily match the version of CRoaring: the major version of the crate is only +incremented when there are breaking changes in the Rust API: It is possible (and has happened) that breaking changes +in the CRoaring C API do not necessitate a major version bump in this crate. \ No newline at end of file From ed88a5fd475b698c9fe752b5fa31e8bbdaab1da3 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 2 Jul 2024 19:31:49 -0400 Subject: [PATCH 3/6] Match the croaring-sys version with the version of CRoaring --- .gitignore | 3 ++- Cargo.lock | 2 +- Makefile | 8 +++++++- croaring-sys/Cargo.toml | 2 +- croaring/Cargo.toml | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 1b58012..cde9a2b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ target .DS_Store .idea -/README.md.tmp \ No newline at end of file +README.md.tmp +Cargo.toml.tmp \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 9298f74..eb4c6c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -186,7 +186,7 @@ dependencies = [ [[package]] name = "croaring-sys" -version = "3.0.0" +version = "4.0.0" dependencies = [ "cc", ] diff --git a/Makefile b/Makefile index 40814cc..0f0e0c8 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ test: croaring_release_url_base = /~https://github.com/RoaringBitmap/CRoaring/releases/download # Fetch the c source amalgamation from a tagged CRoaring release (like `make version=0.9.3 update_croaring`) -update_croaring: download_croaring bindgen update_readme_croaring_version +update_croaring: download_croaring bindgen update_readme_croaring_version update_croaring_sys_version download_croaring: rm -f '$(croaring_source)/roaring.c' '$(croaring_source)/roaring.h' '$(croaring_source)/roaring.hh' @@ -42,6 +42,12 @@ update_readme_croaring_version: @sed -r -e 's_\[CRoaring version `[0-9]+\.[0-9]+\.[0-9]+`\]\([^\)]+\)_[CRoaring version `$(version)`](/~https://github.com/RoaringBitmap/CRoaring/releases/tag/v$(version))_' README.md > README.md.tmp @mv README.md.tmp README.md +# We don't always want to update the version of croaring-sys dependency in croaring, but we always want to update croaring-sys +update_croaring_sys_version: + @echo "Updating croaring-sys version in Cargo.toml to $(version)" + @sed -r -e 's/^version = ".*"/version = "$(version)"/' croaring-sys/Cargo.toml > croaring-sys/Cargo.toml.tmp + @mv croaring-sys/Cargo.toml.tmp croaring-sys/Cargo.toml + # Build a c program to (re)generate the example serialized files for testing test_serialization_files: cd croaring/tests/data/ && \ diff --git a/croaring-sys/Cargo.toml b/croaring-sys/Cargo.toml index 769f2a0..a4e3456 100644 --- a/croaring-sys/Cargo.toml +++ b/croaring-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "croaring-sys" -version = "3.0.0" +version = "4.0.0" edition = "2021" authors = ["croaring-rs developers"] license = "Apache-2.0" diff --git a/croaring/Cargo.toml b/croaring/Cargo.toml index d81916c..dec269e 100644 --- a/croaring/Cargo.toml +++ b/croaring/Cargo.toml @@ -25,7 +25,7 @@ roaring = "0.10" criterion = { version = "0.5", features = ["html_reports"] } [dependencies] -ffi = { package = "croaring-sys", path = "../croaring-sys", version = "3.0.0" } +ffi = { package = "croaring-sys", path = "../croaring-sys", version = "4.0.0" } [[bench]] name = "benches" From 514d8767043727d3f8a04e90cc5d080d4a574276 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 2 Jul 2024 19:33:13 -0400 Subject: [PATCH 4/6] Update version --- Cargo.lock | 2 +- croaring/Cargo.toml | 2 +- fuzz/Cargo.lock | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eb4c6c6..3ebbbc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,7 +176,7 @@ dependencies = [ [[package]] name = "croaring" -version = "2.0.1" +version = "2.0.2" dependencies = [ "criterion", "croaring-sys", diff --git a/croaring/Cargo.toml b/croaring/Cargo.toml index dec269e..385a354 100644 --- a/croaring/Cargo.toml +++ b/croaring/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "croaring" -version = "2.0.1" +version = "2.0.2" edition = "2021" authors = ["croaring-rs developers"] license = "Apache-2.0" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 98b919e..98e2490 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -36,7 +36,7 @@ dependencies = [ [[package]] name = "croaring" -version = "2.0.1" +version = "2.0.2" dependencies = [ "croaring-sys", ] @@ -52,7 +52,7 @@ dependencies = [ [[package]] name = "croaring-sys" -version = "3.0.0" +version = "4.0.0" dependencies = [ "cc", ] From 6bf6d813be0e6814bb70d5f21520fa4dd3a44d6b Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Wed, 3 Jul 2024 17:48:58 -0400 Subject: [PATCH 5/6] Update to croaring 4.1.0 --- Cargo.lock | 2 +- README.md | 2 +- .../CRoaring/bindgen_bundled_version.rs | 28 +- croaring-sys/CRoaring/roaring.c | 46 ++- croaring-sys/CRoaring/roaring.h | 328 ++++++++++-------- croaring-sys/CRoaring/roaring.hh | 2 +- croaring-sys/Cargo.toml | 2 +- croaring/Cargo.toml | 2 +- fuzz/Cargo.lock | 2 +- 9 files changed, 235 insertions(+), 179 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ebbbc8..efb9e40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -186,7 +186,7 @@ dependencies = [ [[package]] name = "croaring-sys" -version = "4.0.0" +version = "4.1.0" dependencies = [ "cc", ] diff --git a/README.md b/README.md index 2195aab..dfe98e9 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ Current documentation is available at https://docs.rs/croaring/latest/croaring/ ## CRoaring Version -This crate uses [CRoaring version `4.0.0`](/~https://github.com/RoaringBitmap/CRoaring/releases/tag/v4.0.0). +This crate uses [CRoaring version `4.1.0`](/~https://github.com/RoaringBitmap/CRoaring/releases/tag/v4.1.0). The version of this crate does not necessarily match the version of CRoaring: the major version of the crate is only incremented when there are breaking changes in the Rust API: It is possible (and has happened) that breaking changes in the CRoaring C API do not necessitate a major version bump in this crate. \ No newline at end of file diff --git a/croaring-sys/CRoaring/bindgen_bundled_version.rs b/croaring-sys/CRoaring/bindgen_bundled_version.rs index d8da1ef..f3aa8c3 100644 --- a/croaring-sys/CRoaring/bindgen_bundled_version.rs +++ b/croaring-sys/CRoaring/bindgen_bundled_version.rs @@ -5,6 +5,14 @@ pub const ROARING_VERSION_MAJOR: _bindgen_ty_1 = 4; pub const ROARING_VERSION_MINOR: _bindgen_ty_1 = 0; pub const ROARING_VERSION_REVISION: _bindgen_ty_1 = 0; pub type _bindgen_ty_1 = ::core::ffi::c_uint; +extern "C" { + #[doc = " result might be undefined when input_num is zero"] + pub fn roaring_trailing_zeroes(input_num: ::core::ffi::c_ulonglong) -> ::core::ffi::c_int; +} +extern "C" { + #[doc = " result might be undefined when input_num is zero"] + pub fn roaring_leading_zeroes(input_num: ::core::ffi::c_ulonglong) -> ::core::ffi::c_int; +} #[doc = " Roaring arrays are array-based key-value pairs having containers as values\n and 16-bit integer keys. A roaring bitmap might be implemented as such."] #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -73,14 +81,6 @@ pub struct roaring_container_iterator_s { } #[doc = " Roaring-internal type used to iterate within a roaring container."] pub type roaring_container_iterator_t = roaring_container_iterator_s; -extern "C" { - #[doc = " result might be undefined when input_num is zero"] - pub fn roaring_trailing_zeroes(input_num: ::core::ffi::c_ulonglong) -> ::core::ffi::c_int; -} -extern "C" { - #[doc = " result might be undefined when input_num is zero"] - pub fn roaring_leading_zeroes(input_num: ::core::ffi::c_ulonglong) -> ::core::ffi::c_int; -} #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct bitset_s { @@ -925,6 +925,10 @@ extern "C" { #[doc = " Remove all values in range [min, max]."] pub fn roaring64_bitmap_remove_range_closed(r: *mut roaring64_bitmap_t, min: u64, max: u64); } +extern "C" { + #[doc = " Empties the bitmap."] + pub fn roaring64_bitmap_clear(r: *mut roaring64_bitmap_t); +} extern "C" { #[doc = " Returns true if the provided value is present."] pub fn roaring64_bitmap_contains(r: *const roaring64_bitmap_t, val: u64) -> bool; @@ -977,6 +981,14 @@ extern "C" { max: u64, ) -> u64; } +extern "C" { + #[doc = " Returns the number of elements in the range [min, max]"] + pub fn roaring64_bitmap_range_closed_cardinality( + r: *const roaring64_bitmap_t, + min: u64, + max: u64, + ) -> u64; +} extern "C" { #[doc = " Returns true if the bitmap is empty (cardinality is zero)."] pub fn roaring64_bitmap_is_empty(r: *const roaring64_bitmap_t) -> bool; diff --git a/croaring-sys/CRoaring/roaring.c b/croaring-sys/CRoaring/roaring.c index 6c6f58d..10cc715 100644 --- a/croaring-sys/CRoaring/roaring.c +++ b/croaring-sys/CRoaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-05-13T21:29:25Z +// Created by amalgamation.sh on 2024-07-03T21:30:32Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -10770,7 +10770,7 @@ static bool art_node_iterator_lower_bound(const art_node_t *node, } art_iterator_t art_init_iterator(const art_t *art, bool first) { - art_iterator_t iterator = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + art_iterator_t iterator = CROARING_ZERO_INITIALIZER; if (art->root == NULL) { return iterator; } @@ -10793,8 +10793,11 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, // a valid key. Start from the root. iterator->frame = 0; iterator->depth = 0; - return art_node_iterator_lower_bound(art_iterator_node(iterator), - iterator, key); + art_node_t *root = art_iterator_node(iterator); + if (root == NULL) { + return false; + } + return art_node_iterator_lower_bound(root, iterator, key); } int compare_result = art_compare_prefix(iterator->key, 0, key, 0, ART_KEY_BYTES); @@ -10827,7 +10830,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, } art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { - art_iterator_t iterator = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + art_iterator_t iterator = CROARING_ZERO_INITIALIZER; if (art->root != NULL) { art_node_iterator_lower_bound(art->root, &iterator, key); } @@ -10835,7 +10838,7 @@ art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { } art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { - art_iterator_t iterator = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + art_iterator_t iterator = CROARING_ZERO_INITIALIZER; if (art->root != NULL) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && art_compare_keys(iterator.key, key) == 0) { @@ -19469,7 +19472,7 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { // todo: could be greatly optimized but we do not expect this call to ever // include long lists roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bulk_context_t context = {0, 0, 0, 0}; + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; va_list ap; va_start(ap, n_args); for (size_t i = 0; i < n_args; i++) { @@ -20811,7 +20814,7 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { if (bitmap == NULL) { return NULL; } - roaring_bulk_context_t context = {0, 0, 0, 0}; + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; for (uint32_t i = 0; i < card; i++) { // elems may not be aligned, read with memcpy uint32_t elem; @@ -20854,7 +20857,7 @@ roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, if (bitmap == NULL) { return NULL; } - roaring_bulk_context_t context = {0, 0, 0, 0}; + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; for (uint32_t i = 0; i < card; i++) { // elems may not be aligned, read with memcpy uint32_t elem; @@ -22780,6 +22783,9 @@ roaring64_bitmap_t *roaring64_bitmap_create(void) { } void roaring64_bitmap_free(roaring64_bitmap_t *r) { + if (!r) { + return; + } art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -22856,7 +22862,7 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) { roaring64_bitmap_t *r = roaring64_bitmap_create(); - roaring64_bulk_context_t context = {0, 0, 0, 0, 0, 0, 0}; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; va_list ap; va_start(ap, n_args); for (size_t i = 0; i < n_args; i++) { @@ -22949,7 +22955,7 @@ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, return; } const uint64_t *end = vals + n_args; - roaring64_bulk_context_t context = {0, 0, 0, 0, 0, 0, 0}; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; for (const uint64_t *current_val = vals; current_val != end; current_val++) { roaring64_bitmap_add_bulk(r, &context, *current_val); @@ -23273,7 +23279,7 @@ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, return; } const uint64_t *end = vals + n_args; - roaring64_bulk_context_t context = {0, 0, 0, 0, 0, 0, 0}; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; for (const uint64_t *current_val = vals; current_val != end; current_val++) { roaring64_bitmap_remove_bulk(r, &context, *current_val); @@ -23339,6 +23345,10 @@ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, remove_range_closed_at(art, max_high48, 0, max_low16); } +void roaring64_bitmap_clear(roaring64_bitmap_t *r) { + roaring64_bitmap_remove_range_closed(r, 0, UINT64_MAX); +} + uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t cardinality = 0; @@ -23356,7 +23366,17 @@ uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, if (min >= max) { return 0; } - max--; // A closed range is easier to work with. + // Convert to a closed range + // No underflow here: passing the above condition implies min < max, so + // there is a number less than max + return roaring64_bitmap_range_closed_cardinality(r, min, max - 1); +} + +uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min > max) { + return 0; + } uint64_t cardinality = 0; uint8_t min_high48[ART_KEY_BYTES]; diff --git a/croaring-sys/CRoaring/roaring.h b/croaring-sys/CRoaring/roaring.h index c0d9a52..d33c13e 100644 --- a/croaring-sys/CRoaring/roaring.h +++ b/croaring-sys/CRoaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-05-13T21:29:25Z +// Created by amalgamation.sh on 2024-07-03T21:30:32Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -67,157 +67,6 @@ enum { }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ -/* begin file include/roaring/roaring_types.h */ -/* - Typedefs used by various components -*/ - -#ifndef ROARING_TYPES_H -#define ROARING_TYPES_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace api { -#endif - -/** - * When building .c files as C++, there's added compile-time checking if the - * container types are derived from a `container_t` base class. So long as - * such a base class is empty, the struct will behave compatibly with C structs - * despite the derivation. This is due to the Empty Base Class Optimization: - * - * https://en.cppreference.com/w/cpp/language/ebo - * - * But since C isn't namespaced, taking `container_t` globally might collide - * with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal - * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;` - */ -#if defined(__cplusplus) -extern "C++" { -struct container_s {}; -} -#define ROARING_CONTAINER_T ::roaring::api::container_s -#else -#define ROARING_CONTAINER_T void // no compile-time checking -#endif - -#define ROARING_FLAG_COW UINT8_C(0x1) -#define ROARING_FLAG_FROZEN UINT8_C(0x2) - -/** - * Roaring arrays are array-based key-value pairs having containers as values - * and 16-bit integer keys. A roaring bitmap might be implemented as such. - */ - -// parallel arrays. Element sizes quite different. -// Alternative is array -// of structs. Which would have better -// cache performance through binary searches? - -typedef struct roaring_array_s { - int32_t size; - int32_t allocation_size; - ROARING_CONTAINER_T **containers; // Use container_t in non-API files! - uint16_t *keys; - uint8_t *typecodes; - uint8_t flags; -} roaring_array_t; - -typedef bool (*roaring_iterator)(uint32_t value, void *param); -typedef bool (*roaring_iterator64)(uint64_t value, void *param); - -/** - * (For advanced users.) - * The roaring_statistics_t can be used to collect detailed statistics about - * the composition of a roaring bitmap. - */ -typedef struct roaring_statistics_s { - uint32_t n_containers; /* number of containers */ - - uint32_t n_array_containers; /* number of array containers */ - uint32_t n_run_containers; /* number of run containers */ - uint32_t n_bitset_containers; /* number of bitmap containers */ - - uint32_t - n_values_array_containers; /* number of values in array containers */ - uint32_t n_values_run_containers; /* number of values in run containers */ - uint32_t - n_values_bitset_containers; /* number of values in bitmap containers */ - - uint32_t n_bytes_array_containers; /* number of allocated bytes in array - containers */ - uint32_t n_bytes_run_containers; /* number of allocated bytes in run - containers */ - uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap - containers */ - - uint32_t - max_value; /* the maximal value, undefined if cardinality is zero */ - uint32_t - min_value; /* the minimal value, undefined if cardinality is zero */ - uint64_t sum_value; /* deprecated always zero */ - - uint64_t cardinality; /* total number of values stored in the bitmap */ - - // and n_values_arrays, n_values_rle, n_values_bitmap -} roaring_statistics_t; - -/** - * (For advanced users.) - * The roaring64_statistics_t can be used to collect detailed statistics about - * the composition of a roaring64 bitmap. - */ -typedef struct roaring64_statistics_s { - uint64_t n_containers; /* number of containers */ - - uint64_t n_array_containers; /* number of array containers */ - uint64_t n_run_containers; /* number of run containers */ - uint64_t n_bitset_containers; /* number of bitmap containers */ - - uint64_t - n_values_array_containers; /* number of values in array containers */ - uint64_t n_values_run_containers; /* number of values in run containers */ - uint64_t - n_values_bitset_containers; /* number of values in bitmap containers */ - - uint64_t n_bytes_array_containers; /* number of allocated bytes in array - containers */ - uint64_t n_bytes_run_containers; /* number of allocated bytes in run - containers */ - uint64_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap - containers */ - - uint64_t - max_value; /* the maximal value, undefined if cardinality is zero */ - uint64_t - min_value; /* the minimal value, undefined if cardinality is zero */ - - uint64_t cardinality; /* total number of values stored in the bitmap */ - - // and n_values_arrays, n_values_rle, n_values_bitmap -} roaring64_statistics_t; - -/** - * Roaring-internal type used to iterate within a roaring container. - */ -typedef struct roaring_container_iterator_s { - // For bitset and array containers this is the index of the bit / entry. - // For run containers this points at the run. - int32_t index; -} roaring_container_iterator_t; - -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace api { -#endif - -#endif /* ROARING_TYPES_H */ -/* end file include/roaring/roaring_types.h */ /* begin file include/roaring/portability.h */ /* * portability.h @@ -810,6 +659,16 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { #define CROARING_DEPRECATED #endif // defined(__GNUC__) || defined(__clang__) +// We want to initialize structs to zero portably (C and C++), without +// warnings. We can do mystruct s = CROARING_ZERO_INITIALIZER; +#if __cplusplus +#define CROARING_ZERO_INITIALIZER \ + {} +#else +#define CROARING_ZERO_INITIALIZER \ + { 0 } +#endif + // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). @@ -819,6 +678,160 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { // strict requirement. #endif /* INCLUDE_PORTABILITY_H_ */ /* end file include/roaring/portability.h */ +/* begin file include/roaring/roaring_types.h */ +/* + Typedefs used by various components +*/ + +#ifndef ROARING_TYPES_H +#define ROARING_TYPES_H + +#include +#include + + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace api { +#endif + +/** + * When building .c files as C++, there's added compile-time checking if the + * container types are derived from a `container_t` base class. So long as + * such a base class is empty, the struct will behave compatibly with C structs + * despite the derivation. This is due to the Empty Base Class Optimization: + * + * https://en.cppreference.com/w/cpp/language/ebo + * + * But since C isn't namespaced, taking `container_t` globally might collide + * with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal + * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;` + */ +#if defined(__cplusplus) +extern "C++" { +struct container_s {}; +} +#define ROARING_CONTAINER_T ::roaring::api::container_s +#else +#define ROARING_CONTAINER_T void // no compile-time checking +#endif + +#define ROARING_FLAG_COW UINT8_C(0x1) +#define ROARING_FLAG_FROZEN UINT8_C(0x2) + +/** + * Roaring arrays are array-based key-value pairs having containers as values + * and 16-bit integer keys. A roaring bitmap might be implemented as such. + */ + +// parallel arrays. Element sizes quite different. +// Alternative is array +// of structs. Which would have better +// cache performance through binary searches? + +typedef struct roaring_array_s { + int32_t size; + int32_t allocation_size; + ROARING_CONTAINER_T **containers; // Use container_t in non-API files! + uint16_t *keys; + uint8_t *typecodes; + uint8_t flags; +} roaring_array_t; + +typedef bool (*roaring_iterator)(uint32_t value, void *param); +typedef bool (*roaring_iterator64)(uint64_t value, void *param); + +/** + * (For advanced users.) + * The roaring_statistics_t can be used to collect detailed statistics about + * the composition of a roaring bitmap. + */ +typedef struct roaring_statistics_s { + uint32_t n_containers; /* number of containers */ + + uint32_t n_array_containers; /* number of array containers */ + uint32_t n_run_containers; /* number of run containers */ + uint32_t n_bitset_containers; /* number of bitmap containers */ + + uint32_t + n_values_array_containers; /* number of values in array containers */ + uint32_t n_values_run_containers; /* number of values in run containers */ + uint32_t + n_values_bitset_containers; /* number of values in bitmap containers */ + + uint32_t n_bytes_array_containers; /* number of allocated bytes in array + containers */ + uint32_t n_bytes_run_containers; /* number of allocated bytes in run + containers */ + uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap + containers */ + + uint32_t + max_value; /* the maximal value, undefined if cardinality is zero */ + uint32_t + min_value; /* the minimal value, undefined if cardinality is zero */ + + CROARING_DEPRECATED + uint64_t sum_value; /* deprecated always zero */ + + uint64_t cardinality; /* total number of values stored in the bitmap */ + + // and n_values_arrays, n_values_rle, n_values_bitmap +} roaring_statistics_t; + +/** + * (For advanced users.) + * The roaring64_statistics_t can be used to collect detailed statistics about + * the composition of a roaring64 bitmap. + */ +typedef struct roaring64_statistics_s { + uint64_t n_containers; /* number of containers */ + + uint64_t n_array_containers; /* number of array containers */ + uint64_t n_run_containers; /* number of run containers */ + uint64_t n_bitset_containers; /* number of bitmap containers */ + + uint64_t + n_values_array_containers; /* number of values in array containers */ + uint64_t n_values_run_containers; /* number of values in run containers */ + uint64_t + n_values_bitset_containers; /* number of values in bitmap containers */ + + uint64_t n_bytes_array_containers; /* number of allocated bytes in array + containers */ + uint64_t n_bytes_run_containers; /* number of allocated bytes in run + containers */ + uint64_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap + containers */ + + uint64_t + max_value; /* the maximal value, undefined if cardinality is zero */ + uint64_t + min_value; /* the minimal value, undefined if cardinality is zero */ + + uint64_t cardinality; /* total number of values stored in the bitmap */ + + // and n_values_arrays, n_values_rle, n_values_bitmap +} roaring64_statistics_t; + +/** + * Roaring-internal type used to iterate within a roaring container. + */ +typedef struct roaring_container_iterator_s { + // For bitset and array containers this is the index of the bit / entry. + // For run containers this points at the run. + int32_t index; +} roaring_container_iterator_t; + +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace api { +#endif + +#endif /* ROARING_TYPES_H */ +/* end file include/roaring/roaring_types.h */ /* begin file include/roaring/bitset/bitset.h */ #ifndef CROARING_CBITSET_BITSET_H #define CROARING_CBITSET_BITSET_H @@ -2486,6 +2499,11 @@ void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint64_t max); +/** + * Empties the bitmap. + */ +void roaring64_bitmap_clear(roaring64_bitmap_t *r); + /** * Returns true if the provided value is present. */ @@ -2556,6 +2574,12 @@ uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r); uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); +/** + * Returns the number of elements in the range [min, max] + */ +uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max); + /** * Returns true if the bitmap is empty (cardinality is zero). */ diff --git a/croaring-sys/CRoaring/roaring.hh b/croaring-sys/CRoaring/roaring.hh index 1881de5..c195ede 100644 --- a/croaring-sys/CRoaring/roaring.hh +++ b/croaring-sys/CRoaring/roaring.hh @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-05-13T21:29:25Z +// Created by amalgamation.sh on 2024-07-03T21:30:32Z /* * The CRoaring project is under a dual license (Apache/MIT). diff --git a/croaring-sys/Cargo.toml b/croaring-sys/Cargo.toml index a4e3456..37606b2 100644 --- a/croaring-sys/Cargo.toml +++ b/croaring-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "croaring-sys" -version = "4.0.0" +version = "4.1.0" edition = "2021" authors = ["croaring-rs developers"] license = "Apache-2.0" diff --git a/croaring/Cargo.toml b/croaring/Cargo.toml index 385a354..6e57acd 100644 --- a/croaring/Cargo.toml +++ b/croaring/Cargo.toml @@ -25,7 +25,7 @@ roaring = "0.10" criterion = { version = "0.5", features = ["html_reports"] } [dependencies] -ffi = { package = "croaring-sys", path = "../croaring-sys", version = "4.0.0" } +ffi = { package = "croaring-sys", path = "../croaring-sys", version = "4.1.0" } [[bench]] name = "benches" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 98e2490..43b434b 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -52,7 +52,7 @@ dependencies = [ [[package]] name = "croaring-sys" -version = "4.0.0" +version = "4.1.0" dependencies = [ "cc", ] From 78c98384819d56819f43899fa773ff9551a35e88 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 25 Jun 2024 19:01:44 -0400 Subject: [PATCH 6/6] More must_use Adding must_use is technically a breaking change, would like to get this in now --- croaring/src/bitmap/imp.rs | 17 ++++++++++++++--- croaring/src/bitmap/iter.rs | 5 +++++ croaring/src/bitmap64/imp.rs | 19 ++++++++++++++++--- croaring/src/bitmap64/iter.rs | 2 ++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/croaring/src/bitmap/imp.rs b/croaring/src/bitmap/imp.rs index e07ac7f..b0b328e 100644 --- a/croaring/src/bitmap/imp.rs +++ b/croaring/src/bitmap/imp.rs @@ -14,6 +14,7 @@ use core::prelude::v1::*; impl Bitmap { #[inline] #[allow(clippy::assertions_on_constants)] + #[must_use] pub(crate) unsafe fn take_heap(p: *mut roaring_bitmap_t) -> Self { // Based heavily on the `roaring.hh` cpp header from croaring @@ -219,6 +220,7 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_contains_range")] + #[must_use] pub fn contains_range>(&self, range: R) -> bool { let (start, end) = range_to_exclusive(range); unsafe { ffi::roaring_bitmap_contains_range(&self.bitmap, start, end) } @@ -364,6 +366,7 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_range_cardinality")] + #[must_use] pub fn range_cardinality>(&self, range: R) -> u64 { let (start, end) = range_to_exclusive(range); unsafe { ffi::roaring_bitmap_range_cardinality(&self.bitmap, start, end) } @@ -873,7 +876,8 @@ impl Bitmap { /// Serializes a bitmap to a slice of bytes in format `S`, re-using existing capacity /// /// `dst` is not cleared, data is added after any existing data. Returns the added slice of `dst`. - /// If `dst` is empty, it is guaranteed to hold only the serialized data after this call + /// Because of alignment requirements, the serialized data may not start at the beginning of + /// `dst`: the returned slice may not start at `dst.as_ptr()`. /// /// # Examples /// @@ -886,12 +890,14 @@ impl Bitmap { /// let mut data = Vec::new(); /// for bitmap in [original_bitmap_1, original_bitmap_2] { /// data.clear(); - /// bitmap.try_serialize_into::(&mut data); - /// // do something with data + /// let serialized: &[u8] = bitmap.serialize_into_vec::(&mut data); + /// // do something with serialized data + /// # let _ = serialized; /// } /// ``` #[inline] #[cfg(feature = "alloc")] + #[must_use] pub fn serialize_into_vec<'a, S: Serializer>(&self, dst: &'a mut Vec) -> &'a mut [u8] { S::serialize_into_vec(self, dst) } @@ -907,6 +913,7 @@ impl Bitmap { /// See also [`Self::serialize_into_vec`] for a version that uses a Vec instead, or, for /// advanced use-cases, see [`Serializer::try_serialize_into`]. #[inline] + #[must_use] pub fn try_serialize_into<'a, S: Serializer>(&self, dst: &'a mut [u8]) -> Option<&'a mut [u8]> { S::try_serialize_into_aligned(self, dst) } @@ -945,6 +952,7 @@ impl Bitmap { /// /// On invalid input returns empty bitmap. #[inline] + #[must_use] pub fn deserialize(buffer: &[u8]) -> Self { Self::try_deserialize::(buffer).unwrap_or_else(Bitmap::new) } @@ -999,6 +1007,7 @@ impl Bitmap { /// assert_eq!(bitmap3.iter().collect::>(), [3, 4, 5]); #[inline] #[doc(alias = "roaring_bitmap_from_range")] + #[must_use] pub fn from_range>(range: R) -> Self { let mut result = Self::new(); result.add_range(range); @@ -1045,6 +1054,7 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_from_range")] + #[must_use] pub fn from_range_with_step>(range: R, step: u32) -> Self { // This can't use `range_to_exclusive` because when the start is excluded, we want // to start at the next step, not one more @@ -1243,6 +1253,7 @@ impl Bitmap { /// ``` #[inline] #[doc(alias = "roaring_bitmap_intersect_with_range")] + #[must_use] pub fn intersect_with_range>(&self, range: R) -> bool { let (start, end) = range_to_exclusive(range); unsafe { ffi::roaring_bitmap_intersect_with_range(&self.bitmap, start, end) } diff --git a/croaring/src/bitmap/iter.rs b/croaring/src/bitmap/iter.rs index 331e1bb..74bd620 100644 --- a/croaring/src/bitmap/iter.rs +++ b/croaring/src/bitmap/iter.rs @@ -59,6 +59,7 @@ impl<'a> BitmapCursor<'a> { /// assert!(!cursor.has_value()); /// ``` #[inline] + #[must_use] pub fn has_value(&self) -> bool { self.raw.has_value } @@ -79,6 +80,7 @@ impl<'a> BitmapCursor<'a> { /// assert_eq!(cursor.current(), None); /// ``` #[inline] + #[must_use] pub fn current(&self) -> Option { if self.has_value() { Some(self.raw.current_value) @@ -291,6 +293,7 @@ impl<'a> BitmapCursor<'a> { /// ``` #[inline] #[doc(alias = "roaring_uint32_iterator_read")] + #[must_use] pub fn read_many(&mut self, dst: &mut [u32]) -> usize { let count = u32::try_from(dst.len()).unwrap_or(u32::MAX); let result = @@ -404,6 +407,7 @@ impl<'a> BitmapIterator<'a> { /// ``` #[inline] #[doc(alias = "roaring_uint32_iterator_read")] + #[must_use] pub fn next_many(&mut self, dst: &mut [u32]) -> usize { self.cursor.read_many(dst) } @@ -449,6 +453,7 @@ impl<'a> BitmapIterator<'a> { /// assert_eq!(iter.next(), Some(1)); /// ``` #[inline] + #[must_use] pub fn peek(&self) -> Option { self.cursor.current() } diff --git a/croaring/src/bitmap64/imp.rs b/croaring/src/bitmap64/imp.rs index 21c0cec..de4a2b0 100644 --- a/croaring/src/bitmap64/imp.rs +++ b/croaring/src/bitmap64/imp.rs @@ -41,6 +41,7 @@ impl Bitmap64 { /// ``` #[inline] #[doc(alias = "roaring64_bitmap_of_ptr")] + #[must_use] pub fn of(slice: &[u64]) -> Self { unsafe { Self::take_heap(ffi::roaring64_bitmap_of_ptr(slice.len(), slice.as_ptr())) } } @@ -48,6 +49,7 @@ impl Bitmap64 { /// Create a new bitmap containing all the values in a range #[inline] #[doc(alias = "roaring64_bitmap_from_range")] + #[must_use] pub fn from_range>(range: R) -> Self { Self::from_range_with_step(range, 1) } @@ -91,6 +93,7 @@ impl Bitmap64 { /// ``` #[inline] #[doc(alias = "roaring64_bitmap_from_range")] + #[must_use] pub fn from_range_with_step>(range: R, step: u64) -> Self { // This can't use `range_to_exclusive` because when the start is excluded, we want // to start at the next step, not one more @@ -461,6 +464,9 @@ impl Bitmap64 { /// assert!(!bitmap4.is_strict_subset(&bitmap1)); /// assert!(!bitmap1.is_strict_subset(&bitmap1)); /// + #[inline] + #[must_use] + #[doc(alias = "roaring64_bitmap_is_strict_subset")] pub fn is_strict_subset(&self, other: &Self) -> bool { unsafe { ffi::roaring64_bitmap_is_strict_subset(self.raw.as_ptr(), other.raw.as_ptr()) } } @@ -575,6 +581,7 @@ impl Bitmap64 { /// assert!(bitmap.contains_range(10..0)); /// ``` #[inline] + #[must_use] #[doc(alias = "roaring64_bitmap_contains_range")] pub fn contains_range>(&self, range: R) -> bool { let Some(exclusive_range) = range_to_exclusive(range) else { @@ -821,7 +828,8 @@ impl Bitmap64 { /// Serializes a bitmap to a slice of bytes in format `S`, re-using existing capacity /// /// `dst` is not cleared, data is added after any existing data. Returns the added slice of `dst`. - /// If `dst` is empty, it is guaranteed to hold only the serialized data after this call + /// Because of alignment requirements, the serialized data may not start at the beginning of + /// `dst`: the returned slice may not start at `dst.as_ptr()`. /// /// # Examples /// @@ -834,11 +842,13 @@ impl Bitmap64 { /// let mut data = Vec::new(); /// for bitmap in [original_bitmap_1, original_bitmap_2] { /// data.clear(); - /// bitmap.serialize_into_vec::(&mut data); - /// // do something with data + /// let serialized = bitmap.serialize_into_vec::(&mut data); + /// // do something with serialized + /// # let _ = serialized; /// } /// ``` #[inline] + #[must_use] #[doc(alias = "roaring64_bitmap_portable_serialize")] #[cfg(feature = "alloc")] pub fn serialize_into_vec<'a, S: Serializer>(&self, dst: &'a mut Vec) -> &'a [u8] { @@ -856,6 +866,7 @@ impl Bitmap64 { /// See also [`Self::serialize_into_vec`] for a version that uses a Vec instead, or, for /// advanced use-cases, see [`Serializer::try_serialize_into`]. #[inline] + #[must_use] pub fn try_serialize_into<'a, S: Serializer>(&self, dst: &'a mut [u8]) -> Option<&'a mut [u8]> { S::try_serialize_into_aligned(self, dst) } @@ -892,6 +903,7 @@ impl Bitmap64 { /// /// On invalid input returns empty bitmap. #[inline] + #[must_use] pub fn deserialize(buffer: &[u8]) -> Self { Self::try_deserialize::(buffer).unwrap_or_default() } @@ -1071,6 +1083,7 @@ impl Bitmap64 { /// assert!(!bitmap.intersect_with_range(100..0)); /// ``` #[inline] + #[must_use] #[doc(alias = "roaring64_bitmap_intersect_with_range")] pub fn intersect_with_range>(&self, range: R) -> bool { let Some(exclusive_range) = range_to_exclusive(range) else { diff --git a/croaring/src/bitmap64/iter.rs b/croaring/src/bitmap64/iter.rs index 037431b..aba3d50 100644 --- a/croaring/src/bitmap64/iter.rs +++ b/croaring/src/bitmap64/iter.rs @@ -504,6 +504,7 @@ impl<'a> Bitmap64Iterator<'a> { /// # print_by_chunks(&Bitmap64::of(&[1, 2, 8, 20, 1000])); /// ``` #[inline] + #[must_use] #[doc(alias = "roaring64_iterator_read")] pub fn next_many(&mut self, dst: &mut [u64]) -> usize { self.cursor.read_many(dst) @@ -550,6 +551,7 @@ impl<'a> Bitmap64Iterator<'a> { /// assert_eq!(iter.next(), Some(1)); /// ``` #[inline] + #[must_use] pub fn peek(&self) -> Option { self.cursor.current() }