Skip to content

Commit

Permalink
small fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
phoenixAja committed Dec 7, 2023
1 parent 43825a4 commit a1c1f68
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
9 changes: 6 additions & 3 deletions workflows/index-generation/index-generation.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ workflow index_generation {
call SplitFastaBySeqLengthAndSort as SplitFastaBySeqLengthAndSortNR {
input:
fasta = DownloadNR.nr,
combined_sorted_path = "combined_sorted_nr.fa",
cpu = 64,
docker_image_id = docker_image_id
}
Expand All @@ -78,7 +79,8 @@ workflow index_generation {
if (!skip_nuc_compression) {
call SplitFastaBySeqLengthAndSort as SplitFastaBySeqLengthAndSortNT {
input:
fasta = DownloadNR.nr,
fasta = DownloadNT.nt,
combined_sorted_path = "combined_sorted_nt.fa",
cpu = 64,
docker_image_id = docker_image_id
}
Expand Down Expand Up @@ -662,6 +664,7 @@ task GenerateIndexMinimap2 {
task SplitFastaBySeqLengthAndSort {
input {
File fasta
String combined_sorted_path
Int cpu
Int threads = if cpu * 0.6 < 1 then 1 else floor(cpu * 0.6)
String docker_image_id
Expand Down Expand Up @@ -695,10 +698,10 @@ task SplitFastaBySeqLengthAndSort {
# cd ..

# Combine the sorted files with longest sequences at the top
ls -r outputs/*.fa | xargs cat > combined_sorted.fa
ls -r outputs/*.fa | xargs cat > ~{combined_sorted_path}
>>>
output {
File sorted = "combined_sorted.fa"
File sorted = combined_sorted_path
}
runtime {
docker: docker_image_id
Expand Down
2 changes: 1 addition & 1 deletion workflows/index-generation/ncbi-compress/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ pub mod commands {
let input_fasta_path = path.to_str().unwrap();
let taxid = path.file_name().unwrap().to_str().unwrap().split(".").collect::<Vec<&str>>()[0];
let reads_count = count_fasta_reads(input_fasta_path);
if reads_count >= 9500000 { // back of the envelope calculation for how many 50,000 character reads we can store in 488GB of RAM
if reads_count >= 3 { //9500000 { // back of the envelope calculation for how many 50,000 character reads we can store in 488GB of RAM
log::info!("Breaking apart taxid {} into smaller chunks", taxid);
let input_taxid_dir = format!("{}/{}_split", input_taxid_dir, taxid);
split_fasta_into_chunks(&input_fasta_path, &input_taxid_dir, &reads_count, &3, taxid).expect("error splitting fasta into chunks");
Expand Down

0 comments on commit a1c1f68

Please sign in to comment.