Skip to content

Commit

Permalink
feat: Add UPGMA, Neighbor-Joining, and RF-distance to cli tool
Browse files Browse the repository at this point in the history
  • Loading branch information
RagnarGrootKoerkamp committed Nov 30, 2021
1 parent a63eeb7 commit df58688
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ thiserror = "1"
rayon = "1.5"
structopt = "0.3"
lz-str = "0.1.0"
strum = "0.23.0"
strum_macros = "0.23.0"

[[bin]]
name = "rbt"
Expand Down
38 changes: 38 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::common::Region;
use std::path::PathBuf;
use structopt::StructOpt;
use strum_macros::EnumString;

#[derive(StructOpt)]
#[structopt(
Expand Down Expand Up @@ -389,6 +390,43 @@ pub(crate) enum Command {
)]
fastq: bool,
},

/// Reconstruct a phylogenetic tree given a Phylip distance matrix input file.
#[structopt(author = "Ragnar Groot Koerkamp <ragnar.grootkoerkamp@gmail.com>")]
Phylogeny {
/// The reconstruction method to use. `UPGMA` and `NeighborJoining`.
#[structopt(long = "method")]
method: PhylogenyMethod,

/// Path of Phylip distance matrix file.
#[structopt(parse(from_os_str))]
input: PathBuf,

/// Path to store the phylogeny in Newick format, or stdout otherwise.
#[structopt(parse(from_os_str))]
output: Option<PathBuf>,
},

/// Compute the Robinson-Foulds distance between two phylogenetic trees.
#[structopt(author = "Ragnar Groot Koerkamp <ragnar.grootkoerkamp@gmail.com>")]
RobinsonFoulds {
/// The path of the first phylogeny in Newick format.
#[structopt(parse(from_os_str))]
newick_1: PathBuf,
/// The path of the second phylogeny in Newick format.
#[structopt(parse(from_os_str))]
newick_2: PathBuf,

/// The path to write the distance to, or stdout otherwise.
#[structopt(parse(from_os_str))]
output: Option<PathBuf>,
},
}

#[derive(EnumString)]
pub(crate) enum PhylogenyMethod {
UPGMA,
NeighborJoining,
}

#[derive(StructOpt)]
Expand Down
41 changes: 41 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,47 @@ fn main() -> Result<()> {
keep_only_pairs,
)?,
SequenceStats { fastq } => sequences_stats::stats(fastq)?,
Phylogeny {
method,
input,
output,
} => {
let dm = bio_types::distancematrix::DistanceMatrix::from_file(&input)?;
let phylogeny = (match method {
cli::PhylogenyMethod::UPGMA => bio::phylogeny::upgma,
cli::PhylogenyMethod::NeighborJoining => bio::phylogeny::neighbor_joining,
})(&dm);
match output {
Some(path) => {
let mut f = fs::File::create(path)?;
f.write_all(phylogeny.to_string().as_bytes())?;
f.write(b"\n")?;
}
None => {
println!("{}", phylogeny.to_string());
}
}
}
RobinsonFoulds {
ref newick_1,
ref newick_2,
output,
} => {
let dist = bio::phylogeny::robinson_foulds_distance(
&bio::io::newick::from_file(newick_1)?,
&bio::io::newick::from_file(newick_2)?,
);
match output {
Some(path) => {
let mut f = fs::File::create(path)?;
f.write_all(dist.to_string().as_bytes())?;
f.write(b"\n")?;
}
None => {
println!("{}", dist.to_string());
}
}
}
}
Ok(())
}

0 comments on commit df58688

Please sign in to comment.