diff --git a/Cargo.lock b/Cargo.lock index ad45f2b6b5..6d736d5d88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2129,12 +2129,14 @@ dependencies = [ "ouroboros", "parking_lot", "postcard", + "proptest", "quinn", "rand", "rand_core", "redb", "serde", "tempfile", + "test-strategy", "tokio", "tokio-stream", "tokio-util", @@ -4230,6 +4232,29 @@ version = "0.1.1" source = "registry+/~https://github.com/rust-lang/crates.io-index" checksum = "e9426b2a0c03e6cc2ea8dbc0168dbbf943f88755e409fb91bcb8f6a268305f4a" +[[package]] +name = "structmeta" +version = "0.2.0" +source = "registry+/~https://github.com/rust-lang/crates.io-index" +checksum = "78ad9e09554f0456d67a69c1584c9798ba733a5b50349a6c0d0948710523922d" +dependencies = [ + "proc-macro2", + "quote", + "structmeta-derive", + "syn 2.0.29", +] + +[[package]] +name = "structmeta-derive" +version = "0.2.0" +source = "registry+/~https://github.com/rust-lang/crates.io-index" +checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] + [[package]] name = "stun-rs" version = "0.1.4" @@ -4373,6 +4398,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "test-strategy" +version = "0.3.1" +source = "registry+/~https://github.com/rust-lang/crates.io-index" +checksum = "b8361c808554228ad09bfed70f5c823caf8a3450b6881cc3a38eb57e8c08c1d9" +dependencies = [ + "proc-macro2", + "quote", + "structmeta", + "syn 2.0.29", +] + [[package]] name = "testdir" version = "0.8.0" diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml index 57d89b0ec0..5cff2aae71 100644 --- a/iroh-sync/Cargo.toml +++ b/iroh-sync/Cargo.toml @@ -47,6 +47,8 @@ futures = { version = "0.3", optional = true } [dev-dependencies] tokio = { version = "1", features = ["sync", "macros"] } tempfile = "3.4" +proptest = "1.2.0" +test-strategy = "0.3.1" [features] default = ["net", "fs-store", "metrics"] diff --git a/iroh-sync/proptest-regressions/ranger.txt b/iroh-sync/proptest-regressions/ranger.txt new file mode 100644 index 0000000000..5986c689de --- /dev/null +++ b/iroh-sync/proptest-regressions/ranger.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 797e83179f8684388880e25a6fac7b4047eb15b03c55c1fb725b82bdbd0a4369 # shrinks to a = {TestKey("3"): ()}, b = {TestKey(""): (), TestKey("3"): (), TestKey("4"): (), TestKey("5"): (), TestKey("a"): (), TestKey("b"): (), TestKey("c"): ()} +cc f5b7604319ead6181c2ff42e53f05e2c6f0298adf0b38ea4ae4710c43abb7663 # shrinks to input = _SimpleStoreSyncArgs { alice: [(3, ()), (a, ())], bob: [(, ()), (0, ()), (b, ())] } diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index cbec0c359b..550c935a1b 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -41,6 +41,20 @@ impl Range { } } +impl Range { + pub fn is_all(&self) -> bool { + self.x() == self.y() + } + + pub fn contains(&self, t: &K) -> bool { + match self.x().cmp(self.y()) { + Ordering::Equal => true, + Ordering::Less => self.x() <= t && t < self.y(), + Ordering::Greater => self.x() <= t || t < self.y(), + } + } +} + impl From<(K, K)> for Range { fn from((x, y): (K, K)) -> Self { Range { x, y } @@ -50,21 +64,14 @@ impl From<(K, K)> for Range { pub trait RangeKey: Sized + Ord + Debug { /// Is this key inside the range? fn contains(&self, range: &Range) -> bool { - contains(self, range) - } -} - -/// Default implementation of `contains` for `Ord` types. -pub fn contains(t: &T, range: &Range) -> bool { - match range.x().cmp(range.y()) { - Ordering::Equal => true, - Ordering::Less => range.x() <= t && t < range.y(), - Ordering::Greater => range.x() <= t || t < range.y(), + range.contains(self) } } impl RangeKey for &str {} impl RangeKey for &[u8] {} +impl RangeKey for Vec {} +impl RangeKey for String {} #[derive(Copy, Clone, PartialEq, Serialize, Deserialize)] pub struct Fingerprint(pub [u8; 32]); @@ -539,59 +546,87 @@ where // m0 = x < m1 < .. < mk = y, with k>= 2 // such that [ml, ml+1) is nonempty let mut ranges = Vec::with_capacity(self.split_factor); - let chunk_len = div_ceil(local_values.len(), self.split_factor); - // Select the first index, for which the key is larger than the x of the range. - let mut start_index = local_values + // Select the first index, for which the key is larger or equal than the x of the range. + let start_index = local_values .iter() - .position(|(k, _)| range.x() <= k) + .position(|(k, _)| k >= range.x()) .unwrap_or(0); - let max_len = local_values.len(); - for i in 0..self.split_factor { - let s_index = start_index; - let start = (s_index * chunk_len) % max_len; - let e_index = s_index + 1; - let end = (e_index * chunk_len) % max_len; - - let (x, y) = if i == 0 { - // first - (range.x(), &local_values[end].0) - } else if i == self.split_factor - 1 { - // last - (&local_values[start].0, range.y()) - } else { - // regular - (&local_values[start].0, &local_values[end].0) - }; - let range = Range::new(x.clone(), y.clone()); - ranges.push(range); - start_index += 1; + // select a pivot value. pivots repeat every split_factor, so pivot(i) == pivot(i + self.split_factor * x) + // it is guaranteed that pivot(0) != x if local_values.len() >= 2 + let pivot = |i: usize| { + // ensure that pivots wrap around + let i = i % self.split_factor; + // choose an offset. this will be + // 1/2, 1 in case of split_factor == 2 + // 1/3, 2/3, 1 in case of split_factor == 3 + // etc. + let offset = (local_values.len() * (i + 1)) / self.split_factor; + let offset = (start_index + offset) % local_values.len(); + &local_values[offset].0 + }; + if range.is_all() { + // the range is the whole set, so range.x and range.y should not matter + // just add all ranges as normal ranges. Exactly one of the ranges will + // wrap around, so we cover the entire set. + for i in 0..self.split_factor { + let (x, y) = (pivot(i), pivot(i + 1)); + // don't push empty ranges + if x != y { + ranges.push(Range { + x: x.clone(), + y: y.clone(), + }) + } + } + } else { + // guaranteed to be non-empty because + // - pivot(0) is guaranteed to be != x for local_values.len() >= 2 + // - local_values.len() < 2 gets handled by the recursion anchor + // - x != y (regular range) + ranges.push(Range { + x: range.x().clone(), + y: pivot(0).clone(), + }); + // this will only be executed for split_factor > 2 + for i in 0..self.split_factor - 2 { + // don't push empty ranges + let (x, y) = (pivot(i), pivot(i + 1)); + if x != y { + ranges.push(Range { + x: x.clone(), + y: y.clone(), + }) + } + } + // guaranteed to be non-empty because + // - pivot is a value in the range + // - y is the exclusive end of the range + // - x != y (regular range) + ranges.push(Range { + x: pivot(self.split_factor - 2).clone(), + y: range.y().clone(), + }); } - for range in ranges.into_iter() { + let mut non_empty = 0; + for range in ranges { let chunk: Vec<_> = self .store .get_range(range.clone(), self.limit.clone())? .collect(); + if !chunk.is_empty() { + non_empty += 1; + } // Add either the fingerprint or the item set let fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref())?; if chunk.len() > self.max_set_size { out.push(MessagePart::RangeFingerprint(RangeFingerprint { - range, + range: range.clone(), fingerprint, })); } else { - let values = chunk - .into_iter() - .map(|el| match el { - Ok((k, v)) => { - let k: K = k; - let v: V = v; - Ok((k, v)) - } - Err(err) => Err(err), - }) - .collect::>()?; + let values = chunk.into_iter().collect::>()?; out.push(MessagePart::RangeItem(RangeItem { range, values, @@ -599,6 +634,7 @@ where })); } } + debug_assert!(non_empty > 1); } } @@ -637,17 +673,11 @@ where // } } -/// Sadly is still unstable.. -fn div_ceil(a: usize, b: usize) -> usize { - debug_assert!(a != 0); - debug_assert!(b != 0); - - a / b + (a % b != 0) as usize -} - #[cfg(test)] mod tests { + use proptest::prelude::*; use std::fmt::Debug; + use test_strategy::proptest; use super::*; @@ -664,7 +694,8 @@ mod tests { ]; let res = sync(None, &alice_set, &bob_set); - assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + res.print_messages(); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); // Initial message @@ -678,7 +709,7 @@ mod tests { // Last response from Alice assert_eq!(res.alice_to_bob[1].parts.len(), 3); - assert!(res.alice_to_bob[1].parts[0].is_range_item()); + assert!(res.alice_to_bob[1].parts[0].is_range_fingerprint()); assert!(res.alice_to_bob[1].parts[1].is_range_fingerprint()); assert!(res.alice_to_bob[1].parts[2].is_range_item()); @@ -741,7 +772,7 @@ mod tests { // No Limit let res = sync(None, &alice_set, &bob_set); - assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); // With Limit: just ape @@ -816,57 +847,7 @@ mod tests { key: Vec, } - impl RangeKey for Multikey { - fn contains(&self, range: &Range) -> bool { - let author = range.x().author.cmp(&range.y().author); - let key = range.x().key.cmp(&range.y().key); - - match (author, key) { - (Ordering::Equal, Ordering::Equal) => { - // All - true - } - (Ordering::Equal, Ordering::Less) => { - // Regular, based on key - range.x().key <= self.key && self.key < range.y().key - } - (Ordering::Equal, Ordering::Greater) => { - // Reverse, based on key - range.x().key <= self.key || self.key < range.y().key - } - (Ordering::Less, Ordering::Equal) => { - // Regular, based on author - range.x().author <= self.author && self.author < range.y().author - } - (Ordering::Greater, Ordering::Equal) => { - // Reverse, based on key - range.x().author <= self.author || self.author < range.y().author - } - (Ordering::Less, Ordering::Less) => { - // Regular, key and author - range.x().key <= self.key - && self.key < range.y().key - && range.x().author <= self.author - && self.author < range.y().author - } - (Ordering::Greater, Ordering::Greater) => { - // Reverse, key and author - (range.x().key <= self.key || self.key < range.y().key) - && (range.x().author <= self.author || self.author < range.y().author) - } - (Ordering::Less, Ordering::Greater) => { - // Regular author, Reverse key - (range.x().key <= self.key || self.key < range.y().key) - && (range.x().author <= self.author && self.author < range.y().author) - } - (Ordering::Greater, Ordering::Less) => { - // Regular key, Reverse author - (range.x().key <= self.key && self.key < range.y().key) - && (range.x().author <= self.author || self.author < range.y().author) - } - } - } - } + impl RangeKey for Multikey {} impl Debug for Multikey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -945,7 +926,7 @@ mod tests { let limit = Range::new(Multikey::new(author_a, ""), Multikey::new(author_b, "")); let res = sync(Some(limit), &alice_set, &bob_set); assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); - assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); res.assert_alice_set( "only author_a", &[ @@ -968,36 +949,37 @@ mod tests { ], ); - // All authors, but only cat - let limit = Range::new( - Multikey::new(author_a, "cat"), - Multikey::new(author_a, "doe"), - ); - let res = sync(Some(limit), &alice_set, &bob_set); - assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); - assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); - - res.assert_alice_set( - "only cat", - &[ - (Multikey::new(author_a, "ape"), 1), - (Multikey::new(author_a, "bee"), 1), - (Multikey::new(author_b, "bee"), 1), - (Multikey::new(author_a, "doe"), 1), - (Multikey::new(author_a, "cat"), 1), - (Multikey::new(author_b, "cat"), 1), - ], - ); - - res.assert_bob_set( - "only cat", - &[ - (Multikey::new(author_a, "ape"), 1), - (Multikey::new(author_a, "bee"), 1), - (Multikey::new(author_a, "cat"), 1), - (Multikey::new(author_b, "cat"), 1), - ], - ); + // I don't think such a thing is possible + // // All authors, but only cat + // let limit = Range::new( + // Multikey::new(author_a, "cat"), + // Multikey::new(author_a, "doe"), + // ); + // let res = sync(Some(limit), &alice_set, &bob_set); + // assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + // assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + // res.assert_alice_set( + // "only cat", + // &[ + // (Multikey::new(author_a, "ape"), 1), + // (Multikey::new(author_a, "bee"), 1), + // (Multikey::new(author_b, "bee"), 1), + // (Multikey::new(author_a, "doe"), 1), + // (Multikey::new(author_a, "cat"), 1), + // (Multikey::new(author_b, "cat"), 1), + // ], + // ); + + // res.assert_bob_set( + // "only cat", + // &[ + // (Multikey::new(author_a, "ape"), 1), + // (Multikey::new(author_a, "bee"), 1), + // (Multikey::new(author_a, "cat"), 1), + // (Multikey::new(author_b, "cat"), 1), + // ], + // ); } struct SyncResult @@ -1115,7 +1097,7 @@ mod tests { ) -> SyncResult where K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, - V: Clone + Debug + PartialEq, + V: Ord + Clone + Debug + PartialEq, { println!("Using Limit: {:?}", limit); let mut expected_set_alice = BTreeMap::new(); @@ -1310,13 +1292,105 @@ mod tests { assert_eq!(&all, &set[..2]); } + type TestSet = BTreeMap; + + fn test_key() -> impl Strategy { + "[a-z0-9]{0,5}" + } + + fn test_set() -> impl Strategy { + prop::collection::btree_map(test_key(), Just(()), 0..10) + } + + fn test_vec() -> impl Strategy> { + test_set().prop_map(|m| m.into_iter().collect::>()) + } + + fn test_range() -> impl Strategy> { + // ranges with x > y are explicitly allowed - they wrap around + (test_key(), test_key()).prop_map(|(x, y)| Range::new(x, y)) + } + + fn mk_test_set(values: impl IntoIterator>) -> TestSet { + values + .into_iter() + .map(|v| v.as_ref().to_string()) + .map(|k| (k, ())) + .collect() + } + + fn mk_test_vec(values: impl IntoIterator>) -> Vec<(String, ())> { + mk_test_set(values).into_iter().collect() + } + + #[test] + fn simple_store_sync_1() { + let alice = mk_test_vec(["3"]); + let bob = mk_test_vec(["2", "3", "4", "5", "6", "7", "8"]); + let _res = sync(None, &alice, &bob); + } + + #[test] + fn simple_store_sync_2() { + let alice = mk_test_vec(["1", "3"]); + let bob = mk_test_vec(["0", "2", "3"]); + let _res = sync(None, &alice, &bob); + } + #[test] - fn test_div_ceil() { - assert_eq!(div_ceil(1, 1), 1); - assert_eq!(div_ceil(2, 1), 2); - assert_eq!(div_ceil(4, 2), 4 / 2); + fn simple_store_sync_3() { + let alice = mk_test_vec(["8", "9"]); + let bob = mk_test_vec(["1", "2", "3"]); + let _res = sync(None, &alice, &bob); + } + + #[proptest] + fn simple_store_sync( + #[strategy(test_vec())] alice: Vec<(String, ())>, + #[strategy(test_vec())] bob: Vec<(String, ())>, + ) { + let _res = sync(None, &alice, &bob); + } + + /// A generic fn to make a test for the get_range fn of a store. + #[allow(clippy::type_complexity)] + fn store_get_ranges_test( + elems: impl IntoIterator, + range: Range, + limit: Option>, + ) -> (Vec<(K, V)>, Vec<(K, V)>) + where + S: Store + Default, + K: RangeKey + Clone + Default + AsFingerprint, + V: Debug + Clone, + { + let mut store = S::default(); + let elems = elems.into_iter().collect::>(); + for (k, v) in elems.iter().cloned() { + store.put(k, v).unwrap(); + } + let mut actual = store + .get_range(range.clone(), limit) + .unwrap() + .collect::, S::Error>>() + .unwrap(); + let mut expected = elems + .into_iter() + .filter(|(k, _)| k.contains(&range)) + .collect::>(); + + actual.sort_by(|(ak, _), (bk, _)| ak.cmp(bk)); + expected.sort_by(|(ak, _), (bk, _)| ak.cmp(bk)); + (expected, actual) + } - assert_eq!(div_ceil(3, 2), 2); - assert_eq!(div_ceil(5, 3), 2); + #[proptest] + fn simple_store_get_ranges( + #[strategy(test_set())] contents: BTreeMap, + #[strategy(test_range())] range: Range, + ) { + let (expected, actual) = + store_get_ranges_test::, _, _>(contents.clone(), range.clone(), None); + prop_assert_eq!(expected, actual); } } diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index e4df3591bb..a8c99adb24 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -400,19 +400,7 @@ impl AsFingerprint for RecordIdentifier { } } -impl RangeKey for RecordIdentifier { - fn contains(&self, range: &crate::ranger::Range) -> bool { - use crate::ranger::contains; - - let key_range = range.clone().map(|x, y| (x.key, y.key)); - let namespace_range = range.clone().map(|x, y| (x.namespace, y.namespace)); - let author_range = range.clone().map(|x, y| (x.author, y.author)); - - contains(&self.key, &key_range) - && contains(&self.namespace, &namespace_range) - && contains(&self.author, &author_range) - } -} +impl RangeKey for RecordIdentifier {} impl RecordIdentifier { /// Create a new [`RecordIdentifier`].