Skip to content

Commit

Permalink
chore(ci): use nextests groups to isolate some tests (#2617)
Browse files Browse the repository at this point in the history
## Description

After a lot of debugging we realized the integrations tests (`gc` tests,
for
example) run with swarm discovery on since they are not technically
using test
iroh, so they bypass the cfg that prevents it from being turned on. This
causes
the `test_local_swarm_discovery` test to be flooded with nodes from
other
tests, making it flaky. Isolating the test should make it no longer
flaky
anymore

## Breaking Changes

n/a

## Notes & open questions

We should still create a test that creates a bunch of nodes and makes
sure they
all can find each other within reasonable time. There is still reason to
believe swarm discovery is slow, particularly in linux, but we should
address
it in a test under our countrol, not via the non-deterministic nature of
concurrent tests in ci.

## Change checklist

- [x] Self-review.
- [ ] ~~Documentation updates following the [style
guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text),
if relevant.~~
- [ ] ~~Tests if relevant.~~
- [ ] ~~All breaking changes documented.~~
  • Loading branch information
divagant-martian authored Aug 13, 2024
1 parent bcc87a2 commit a5072c3
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 36 deletions.
10 changes: 10 additions & 0 deletions .config/nextest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[test-groups]
run-in-isolation = { max-threads = 16 }
# these are tests that must not run with other tests concurrently. All tests in
# this group can take up at most 16 threads among them, but each one requiring
# 16 threads also. The effect should be that tests run isolated.

[[profile.ci.overrides]]
filter = 'test(::run_in_isolation::)'
test-group = 'run-in-isolation'
threads-required = 16
4 changes: 2 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ jobs:
- name: run tests
run: |
mkdir -p output
cargo nextest run --workspace ${{ env.FEATURES }} --lib --bins --tests --run-ignored ${{ inputs.flaky && 'all' || 'default' }} --no-fail-fast --message-format ${{ inputs.flaky && 'libtest-json' || 'human' }} > output/${{ matrix.name }}_${{ matrix.features }}_${{ matrix.rust }}.json
cargo nextest run --workspace ${{ env.FEATURES }} --lib --bins --tests --profile ci --run-ignored ${{ inputs.flaky && 'all' || 'default' }} --no-fail-fast --message-format ${{ inputs.flaky && 'libtest-json' || 'human' }} > output/${{ matrix.name }}_${{ matrix.features }}_${{ matrix.rust }}.json
env:
RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG'}}
NEXTEST_EXPERIMENTAL_LIBTEST_JSON: 1
Expand Down Expand Up @@ -213,7 +213,7 @@ jobs:
- name: tests
run: |
mkdir -p output
cargo nextest run --workspace ${{ env.FEATURES }} --lib --bins --tests --target ${{ matrix.target }} --run-ignored ${{ inputs.flaky && 'all' || 'default' }} --no-fail-fast --message-format ${{ inputs.flaky && 'libtest-json' || 'human' }} > output/${{ matrix.name }}_${{ matrix.features }}_${{ matrix.rust }}.json
cargo nextest run --workspace ${{ env.FEATURES }} --lib --bins --tests --profile ci --target ${{ matrix.target }} --run-ignored ${{ inputs.flaky && 'all' || 'default' }} --no-fail-fast --message-format ${{ inputs.flaky && 'libtest-json' || 'human' }} > output/${{ matrix.name }}_${{ matrix.features }}_${{ matrix.rust }}.json
env:
RUST_LOG: ${{ runner.debug && 'TRACE' || 'DEBUG'}}
NEXTEST_EXPERIMENTAL_LIBTEST_JSON: 1
Expand Down
73 changes: 39 additions & 34 deletions iroh-net/src/discovery/local_swarm_discovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl LocalSwarmDiscovery {
sender.send(Ok(item)).await.ok();
}
}
trace!(
debug!(
?discovered_node_id,
?peer_info,
"adding node to LocalSwarmDiscovery address book"
Expand Down Expand Up @@ -272,42 +272,47 @@ impl Discovery for LocalSwarmDiscovery {

#[cfg(test)]
mod tests {
use super::*;
use testresult::TestResult;

#[tokio::test]
async fn test_local_swarm_discovery() -> TestResult {
let _guard = iroh_test::logging::setup();
let (_, discovery_a) = make_discoverer()?;
let (node_id_b, discovery_b) = make_discoverer()?;
/// This module's name signals nextest to run test in a single thread (no other concurrent
/// tests)
mod run_in_isolation {
use super::super::*;
use testresult::TestResult;

// make addr info for discoverer b
let addr_info = AddrInfo {
relay_url: None,
direct_addresses: BTreeSet::from(["0.0.0.0:11111".parse()?]),
};
#[tokio::test]
async fn test_local_swarm_discovery() -> TestResult {
let _guard = iroh_test::logging::setup();
let (_, discovery_a) = make_discoverer()?;
let (node_id_b, discovery_b) = make_discoverer()?;

// pass in endpoint, this is never used
let ep = crate::endpoint::Builder::default().bind(0).await?;
// resolve twice to ensure we can create separate streams for the same node_id
let mut s1 = discovery_a.resolve(ep.clone(), node_id_b).unwrap();
let mut s2 = discovery_a.resolve(ep, node_id_b).unwrap();
tracing::debug!(?node_id_b, "Discovering node id b");
// publish discovery_b's address
discovery_b.publish(&addr_info);
let s1_res = tokio::time::timeout(Duration::from_secs(5), s1.next())
.await?
.unwrap()?;
let s2_res = tokio::time::timeout(Duration::from_secs(5), s2.next())
.await?
.unwrap()?;
assert_eq!(s1_res.addr_info, addr_info);
assert_eq!(s2_res.addr_info, addr_info);
Ok(())
}
// make addr info for discoverer b
let addr_info = AddrInfo {
relay_url: None,
direct_addresses: BTreeSet::from(["0.0.0.0:11111".parse()?]),
};

fn make_discoverer() -> Result<(PublicKey, LocalSwarmDiscovery)> {
let node_id = crate::key::SecretKey::generate().public();
Ok((node_id, LocalSwarmDiscovery::new(node_id)?))
// pass in endpoint, this is never used
let ep = crate::endpoint::Builder::default().bind(0).await?;
// resolve twice to ensure we can create separate streams for the same node_id
let mut s1 = discovery_a.resolve(ep.clone(), node_id_b).unwrap();
let mut s2 = discovery_a.resolve(ep, node_id_b).unwrap();
tracing::debug!(?node_id_b, "Discovering node id b");
// publish discovery_b's address
discovery_b.publish(&addr_info);
let s1_res = tokio::time::timeout(Duration::from_secs(5), s1.next())
.await?
.unwrap()?;
let s2_res = tokio::time::timeout(Duration::from_secs(5), s2.next())
.await?
.unwrap()?;
assert_eq!(s1_res.addr_info, addr_info);
assert_eq!(s2_res.addr_info, addr_info);
Ok(())
}

fn make_discoverer() -> Result<(PublicKey, LocalSwarmDiscovery)> {
let node_id = crate::key::SecretKey::generate().public();
Ok((node_id, LocalSwarmDiscovery::new(node_id)?))
}
}
}

0 comments on commit a5072c3

Please sign in to comment.