Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 0.3 #26

Merged
merged 20 commits into from
Sep 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 114 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,35 +1,139 @@
version: 2
jobs:
test:
docker:
- image: circleci/rust:1.31.1-stretch
version: 2.1

executors:
needletail:
machine:
image: ubuntu-1604:201903-01

commands:
checkout_and_setup:
description: "Checkout code and set up rust"
steps:
- checkout
- restore_cache:
name: restore rust install
keys:
- rust-stable
- run:
name: set up rust nightly
command: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable
echo 'export PATH=$HOME/.cargo/bin:$PATH' >> $BASH_ENV
source $HOME/.cargo/env
rustup show
- save_cache:
name: cache rust install
paths:
- ~/.rustup/
- ~/.cargo/
key: rust-stable
- run:
name: Version information
command: rustc --version; cargo --version; rustup --version
- run:
name: Calculate dependencies
command: cargo generate-lockfile
- restore_cache:
name: restore project build artifacts
keys:
- v4-cargo-cache-{{ arch }}-{{ checksum "Cargo.lock" }}
- run:
name: Build all targets
command: cargo build --all --all-targets
command: cargo build --all --all-targets --all-features
- save_cache:
name: save project build artifacts
paths:
- /usr/local/cargo/registry
- ~/.cargo/registry
- target/debug/.fingerprint
- target/debug/build
- target/debug/deps
key: v4-cargo-cache-{{ arch }}-{{ checksum "Cargo.lock" }}

jobs:
build:
executor: needletail
steps:
- checkout_and_setup
test:
executor: needletail
steps:
- checkout_and_setup
- run:
name: Run all tests
command: cargo test --all
command: cargo test --all --all-features
- run:
name: Run slow tests
command: cargo test -- --ignored
lint:
executor: needletail
steps:
- checkout_and_setup
- run:
name: Format
command: |
rustup component add rustfmt
cargo fmt --all -- --check
- run:
name: Clippy
command: |
rustup component add clippy
cargo clippy --all-features -- -D warnings
coverage:
executor: needletail
steps:
- checkout_and_setup
- restore_cache:
keys:
- cargo-tarpaulin-0.8.6
- run:
name: install cargo tarpaulin
command: cargo install cargo-tarpaulin --version 0.8.6 || echo "cargo-tarpaulin already installed"
environment:
RUSTFLAGS: --cfg procmacro2_semver_exempt
- save_cache:
paths:
- ~/.cargo/bin/cargo-tarpaulin
key: cargo-tarpaulin-0.8.6
- run:
name: Generate coverage report
command: cargo tarpaulin --out Xml --all --all-features -t 600
environment:
LZMA_API_STATIC: 1
- run:
name: Export coverage to codecov
command: bash <(curl -s https://codecov.io/bash) || echo "Failed to upload coverage"
bench:
# TODO: probably need to do something useful here (use critcmp?) before turning this on
executor: needletail
steps:
- checkout_and_setup
- run:
name: Run benchmarks
command: |
cargo bench
fuzz:
# TODO: need to figure out how to install nightly here and probably cache the cargo-fuzz binary
executor: needletail
steps:
- checkout_and_setup
- run:
name: Run fuzz for 3 minutes each
command: |
cargo +nightly install cargo-fuzz
cargo +nightly fuzz run parse_fasta -- -max_total_time=180
cargo +nightly fuzz run parse_fastq -- -max_total_time=180

workflows:
version: 2
tests:
ci-checks:
jobs:
- test
- build
- coverage:
requires:
- build
- test:
requires:
- build
- lint:
requires:
- build
19 changes: 14 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "needletail"
version = "0.2.3"
version = "0.3.0"
authors = ["Roderick Bovee <roderick@onecodex.com>"]
description = "FASTX parsing and k-mer methods"
keywords = ["FASTA", "FASTQ", "kmer", "bioinformatics"]
Expand All @@ -12,17 +12,26 @@ edition = "2018"

[features]
default = ["compression"]
compression = ["bzip2", "flate2", "xz2", "zip"]
compression = ["bzip2", "flate2", "xz2"]

[dependencies]
flate2 = { version="1.0.6", optional=true }
bzip2 = { version="0.3.3", optional=true }
xz2 = { version="0.1.6", optional=true }
zip = { version="0.5.0", optional=true }
memchr = "2.2.0"
memchr = "2.2.1"
safemem = "0.3.2"

[dev-dependencies]
bencher = "0.1.5"
criterion = "0.3"

# for benchmark comparisons
bio = "0.28"
seq_io = "0.3"

# for testing with the FormatSpecimens.jl repo samples
toml = "0.5"
serde = "1.0"
serde_derive = "1.0"

[[bench]]
name = "benchmark"
Expand Down
61 changes: 37 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,45 @@ Needletail's goal is to be as fast as the [readfq](/~https://github.com/lh3/readfq

```rust
extern crate needletail;
use needletail::{parse_sequence_path, Sequence};
use std::env;
use needletail::{fastx};

fn main() {
let filename: String = env::args().nth(1).unwrap();

let mut n_bases = 0;
let mut n_valid_kmers = 0;
fastx::fastx_cli(&filename[..], |_| {}, |seq| {
// seq.id is the name of the record
// seq.seq is the base sequence
// seq.qual is an optional quality score

// keep track of the total number of bases
n_bases += seq.seq.len();

// keep track of the number of AAAA (or TTTT via canonicalization) in the
// file (normalize makes sure ever base is capitalized for comparison)
for (_, kmer, _) in seq.normalize(false).kmers(4, true) {
if kmer == b"AAAA" {
n_valid_kmers += 1;
}
}
});
println!("There are {} bases in your file.", n_bases);
println!("There are {} AAAAs in your file.", n_valid_kmers);
let filename: String = env::args().nth(1).unwrap();

let mut n_bases = 0;
let mut n_valid_kmers = 0;
parse_sequence_path(
filename,
|_| {},
|seq| {
// seq.id is the name of the record
// seq.seq is the base sequence
// seq.qual is an optional quality score

// keep track of the total number of bases
n_bases += seq.seq.len();

// normalize to make sure all the bases are consistantly capitalized
let norm_seq = seq.normalize(false);
// we make a reverse complemented copy of the sequence first for
// `canonical_kmers` to draw the complemented sequences from.
let rc = norm_seq.reverse_complement();
// now we keep track of the number of AAAAs (or TTTTs via
// canonicalization) in the file; note we also get the postion (i.0;
// in the event there were `N`-containing kmers that were skipped)
// and whether the sequence was complemented (i.2) in addition to
// the canonical kmer (i.1)
for (_, kmer, _) in norm_seq.canonical_kmers(4, &rc) {
if kmer == b"AAAA" {
n_valid_kmers += 1;
}
}
},
)
.expect("parsing failed");
println!("There are {} bases in your file.", n_bases);
println!("There are {} AAAAs in your file.", n_valid_kmers);
}
```

Expand All @@ -49,7 +62,7 @@ Please use either your local package manager (`homebrew`, `apt-get`, `pacman`, e
Once you have Rust set up, you can include needletail in your `Cargo.toml` file like:
```shell
[dependencies]
needletail = "^0.2.0"
needletail = "^0.3.0"
```

To install needletail itself for development:
Expand Down
27 changes: 0 additions & 27 deletions bench/benchmark.c

This file was deleted.

52 changes: 0 additions & 52 deletions bench/benchmark.py

This file was deleted.

20 changes: 0 additions & 20 deletions bench/benchmark.rs

This file was deleted.

9 changes: 0 additions & 9 deletions bench/benchmark_biopython.py

This file was deleted.

Loading