Skip to content

Commit

Permalink
add gff output by default and add -g flag to force it
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Van der Jeugt committed Sep 16, 2021
1 parent 9dc8b40 commit 05f90cd
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 2 deletions.
35 changes: 33 additions & 2 deletions src/bin/FragGeneScanRs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ fn main() -> Result<()> {
.long("output-prefix")
.value_name("output_prefix")
.takes_value(true)
.help("Output metadata (.out), proteins (.faa) and genes (.ffn) to files with this prefix. Use 'stdout' to write the predicted proteins to standard output."))
.help("Output metadata (.out and .gff), proteins (.faa) and genes (.ffn) to files with this prefix. Use 'stdout' to write the predicted proteins to standard output."))
.arg(Arg::with_name("complete")
.short("w")
.long("complete")
Expand Down Expand Up @@ -90,6 +90,12 @@ fn main() -> Result<()> {
.value_name("meta_file")
.takes_value(true)
.help("Output metadata to this file (supersedes -o)."))
.arg(Arg::with_name("gff-file")
.short("g")
.long("gff-file")
.value_name("gff_file")
.takes_value(true)
.help("Output metadata to this gff formatted file (supersedes -o)."))
.arg(Arg::with_name("aa-file")
.short("a")
.long("aa-file")
Expand Down Expand Up @@ -138,6 +144,20 @@ fn main() -> Result<()> {
(None, None) => None,
};

let mut gffstream: Option<Box<dyn Write + Send>> = match (
matches.value_of("gff-file"),
matches.value_of("output-prefix"),
) {
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
(None, Some("stdout")) => None,
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".gff")?)),
(None, None) => None,
};

if let Some(ref mut gff) = &mut gffstream {
writeln!(gff, "##gff-version 3")?;
}

let dnastream: Option<Box<dyn Write + Send>> = match (
matches.value_of("nucleotide-file"),
matches.value_of("output-prefix"),
Expand All @@ -148,7 +168,7 @@ fn main() -> Result<()> {
(None, None) => None,
};

if aastream.is_none() && metastream.is_none() && dnastream.is_none() {
if aastream.is_none() && metastream.is_none() && gffstream.is_none() && dnastream.is_none() {
aastream = Some(Box::new(io::stdout()));
}

Expand All @@ -159,6 +179,7 @@ fn main() -> Result<()> {
inputseqs,
aastream.map(UnbufferingBuffer::new),
metastream.map(UnbufferingBuffer::new),
gffstream.map(UnbufferingBuffer::new),
dnastream.map(UnbufferingBuffer::new),
matches.value_of("complete").unwrap() == "1",
matches.is_present("formatted"),
Expand All @@ -171,6 +192,7 @@ fn main() -> Result<()> {
inputseqs,
aastream.map(SortingBuffer::new),
metastream.map(SortingBuffer::new),
gffstream.map(SortingBuffer::new),
dnastream.map(SortingBuffer::new),
matches.value_of("complete").unwrap() == "1",
matches.is_present("formatted"),
Expand All @@ -187,6 +209,7 @@ fn run<R: Read + Send, W: WritingBuffer + Send>(
inputseqs: R,
aa_buffer: Option<W>,
meta_buffer: Option<W>,
gff_buffer: Option<W>,
dna_buffer: Option<W>,
whole_genome: bool,
formatted: bool,
Expand All @@ -197,6 +220,7 @@ fn run<R: Read + Send, W: WritingBuffer + Send>(
.build_global()?;

let meta_buffer = meta_buffer.map(Mutex::new);
let gff_buffer = gff_buffer.map(Mutex::new);
let dna_buffer = dna_buffer.map(Mutex::new);
let aa_buffer = aa_buffer.map(Mutex::new);

Expand All @@ -205,6 +229,7 @@ fn run<R: Read + Send, W: WritingBuffer + Send>(
.par_bridge()
.map(|(index, recordvec)| {
let mut metabuf = Vec::new();
let mut gffbuf = Vec::new();
let mut dnabuf = Vec::new();
let mut aabuf = Vec::new();
for record in recordvec {
Expand All @@ -221,6 +246,9 @@ fn run<R: Read + Send, W: WritingBuffer + Send>(
if meta_buffer.is_some() {
read_prediction.meta(&mut metabuf)?;
}
if gff_buffer.is_some() {
read_prediction.gff(&mut gffbuf)?;
}
if dna_buffer.is_some() {
read_prediction.dna(&mut dnabuf, formatted)?;
}
Expand All @@ -231,6 +259,9 @@ fn run<R: Read + Send, W: WritingBuffer + Send>(
if let Some(buffer) = &meta_buffer {
buffer.lock().unwrap().add(index, metabuf)?;
}
if let Some(buffer) = &gff_buffer {
buffer.lock().unwrap().add(index, gffbuf)?;
}
if let Some(buffer) = &dna_buffer {
buffer.lock().unwrap().add(index, dnabuf)?;
}
Expand Down
28 changes: 28 additions & 0 deletions src/gene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ impl ReadPrediction {
Ok(())
}

pub fn gff(&self, buf: &mut Vec<u8>) -> Result<(), GeneError> {
if !self.genes.is_empty() {
let head = std::str::from_utf8(&self.head)?;
for gene in &self.genes {
gene.gff(buf, &head);
}
}
Ok(())
}

pub fn dna(&self, buf: &mut Vec<u8>, formatted: bool) -> Result<(), GeneError> {
for gene in &self.genes {
gene.dna(buf, &self.head, formatted)?;
Expand Down Expand Up @@ -97,6 +107,24 @@ impl Gene {
);
}

pub fn gff(&self, buf: &mut Vec<u8>, head: &str) {
buf.append(
&mut format!(
"{}\tFGS\tCDS\t{}\t{}\t.\t{}\t{}\tID={}_{}_{}_{};product=predicted protein\n",
head,
self.metastart,
self.end,
if self.forward_strand { '+' } else { '-' },
self.frame - 1,
head,
self.metastart,
self.end,
if self.forward_strand { '+' } else { '-' }
)
.into_bytes(),
);
}

pub fn dna(&self, buf: &mut Vec<u8>, head: &Vec<u8>, formatted: bool) -> Result<(), GeneError> {
let dna: Vec<u8> = match (self.forward_strand, formatted) {
(true, true) => self.dna.iter().map(|&n| u8::from(n)).collect(),
Expand Down

0 comments on commit 05f90cd

Please sign in to comment.