Skip to content

Commit

Permalink
Add unit test and test data for RunRedup task
Browse files Browse the repository at this point in the history
  • Loading branch information
lvreynoso committed Nov 9, 2023
1 parent c86faf7 commit 0ba1242
Show file tree
Hide file tree
Showing 7 changed files with 235 additions and 0 deletions.
11 changes: 11 additions & 0 deletions workflows/amr/test/RunRedup/clusters.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
M05295:617:000000000-KL64F:1:1101:2016:13202,M05295:617:000000000-KL64F:1:1101:2016:13202
M05295:617:000000000-KL64F:1:1101:3078:7376,M05295:617:000000000-KL64F:1:1101:3078:7376
M05295:617:000000000-KL64F:1:1101:3125:11405,M05295:617:000000000-KL64F:1:1101:3125:11405
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:1101:2666:12975
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:1119:25439:5751
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:2101:21157:16235
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:2106:11795:12187
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:2108:9015:16045
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:2112:3938:13885
M05295:617:000000000-KL64F:1:1101:2666:12975,M05295:617:000000000-KL64F:1:2117:7228:7910
M05295:617:000000000-KL64F:1:1101:1908:15400,M05295:617:000000000-KL64F:1:1101:1908:15400
5 changes: 5 additions & 0 deletions workflows/amr/test/RunRedup/duplicate_cluster_sizes.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1 M05295:617:000000000-KL64F:1:1101:2016:13202
1 M05295:617:000000000-KL64F:1:1101:3078:7376
1 M05295:617:000000000-KL64F:1:1101:3125:11405
7 M05295:617:000000000-KL64F:1:1101:2666:12975
1 M05295:617:000000000-KL64F:1:1101:1908:15400
44 changes: 44 additions & 0 deletions workflows/amr/test/RunRedup/host_filter_1.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
@M05295:617:000000000-KL64F:1:1101:3078:7376
TTTTGCCGTAACGGCTTTTTACCACAGCCAGCTTGCGGCGCAACACCTCCGCCAGAAAGTTGCCGTTGCCGCAGGCGGGTTCCAGAAAACGGCTCTCGATGCGCTCCGTCTCGCTCTTTACAAGGTCGCACATCGCCTTTACCTCC
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8DF@FDEGGGGGGGGGGGGGGGCDGGFFFFEFFFFFF>/
@M05295:617:000000000-KL64F:1:1101:3125:11405
TCTTTGGTATACTGCAGTGCTTATATGCGGTTTGCTGATTTTTTCGGCGGCAGCTTGTGCAGGAACGATTCTTTCCTGCAATAACCGGCTGAAAAGAAAAAGGAAAAAGATACGCAAGGCGGCACTCTTGTCAACTATGTGCATTA
+
CCCCCGGGGGGGGFFFDFGGGAFGCFGGEGGGG>AECFGEFFGGCEFGGEGGGGGGGGGGGGGGFGGGGFFGGGGGGFGGGGGGFG:FGGGFGGGG7DFCGGGGGDFGEGGGGGGCGGGFGEGGGGGFGCGGGGFF5D9CD7<DF+
@M05295:617:000000000-KL64F:1:1101:2016:13202
CCACCAAATAACACTCAAGGACTTCAAATGTCGGAGAGTGTGAGATGTTCTTTGAAAATTGAATAACGAAACAACAAAGAGGAAATTAAAGATATCCAATTAAAGAAATTTAATGGGTAAAATACAATTTCAAACAATTCTTCTGT
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGFFFFFFFFFFFF
@M05295:617:000000000-KL64F:1:1101:2666:12975
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFFFFFFFBBF
@M05295:617:000000000-KL64F:1:2112:3938:13885
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGDEGGGGEGGCFGGGGGGEGGCGGGGGGFGG8FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
@M05295:617:000000000-KL64F:1:2108:9015:16045
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGDGGGGCGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFG
@M05295:617:000000000-KL64F:1:2106:11795:12187
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGCGGGGGGGDGEFCFFDFFCGGGGGGGFGGGGGGGGGGFEF8FDCEGGGGGGGGGD>FFGGGGGGGGGGGFGGGGDGGGGGGGGGGGGGGG
@M05295:617:000000000-KL64F:1:2117:7228:7910
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGGGGGGGGGGGFFGGGGGGFFGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCDGGGGGGGGGGGDGGGGFGGAFGG7FGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGF
@M05295:617:000000000-KL64F:1:1119:25439:5751
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGFGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGF
@M05295:617:000000000-KL64F:1:2101:21157:16235
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCTACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTTCACAGAATGCGCACGGCCTTCCAGAT
+
CCCCCGGGGGGDGGGGFGGGGGGGGF@FGGGGG<FGGGGGGGGGFGGGGGGGFGEGDGGGGGGGGGCF@F<FFGGFGGGGG=@FGEGGGGFFA9E88CCEGGFAGFGGGGGFGG8CCCEEGGGGG??D??CGGD69DFGFF6DFGF
@M05295:617:000000000-KL64F:1:1101:1908:15400
CGCTCACATGAACGGAATAATACTCTCCCAAATATTCACTTCCCGCCCCATCTTTGTATACTTCCTCTGTTTCAAGATCATATGTTTGATATAAATACGCCTGTCCATCATTCTTCAGATCTACATAAGCAGCCCAGTCATCAATC
+
<8-AC@FCGFGGGGGCFFEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFGFGFGGCGFGGGFBFFFFBFBDDF9
44 changes: 44 additions & 0 deletions workflows/amr/test/RunRedup/host_filter_2.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
@M05295:617:000000000-KL64F:1:1101:3078:7376
CTTTGCCCTCAGATTTGCTTTTGTACCAATTATAGCATATTTCCCGGTTAAATCCACAGATTTTTAGCTATTCGTTTCATCTCTTGAGCCGCTTGTCAAAAGGTACACTTTTTGGCAAGCCCTTCAAAGAGGTGGAACGAATGGCA
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGFGFFFFDFFFFF5
@M05295:617:000000000-KL64F:1:1101:3125:11405
TCTTTGGTAAGTCCGAACAGATTTTATTCTACTCCTCGGGTGTTCTGAGCGATTGTTTGTGTTGAAAAGTCATTCAGGTCATAGTACCGCATTGCTTCTGTTCCGTCTTTGGCGATATACTCGACTAAAATGTAATGCTCGGTTAT
+
CCCCCFGFFFGF<FF@CFG8<<F<FGGFGGGGGGGGGGCFEGG7FGF9FC7@FGFAFGGGGGDCFEFGGGGGGGGFGGEGGGFGGGGGGGGGGGFGEGGC@EFGGFFGCGGFFGDEGGGGDFDGGGDCG?FFGBFFFFFBD>@ABD
@M05295:617:000000000-KL64F:1:1101:2016:13202
TTCAGTTCGGGCGGTTCCCCTCATATACCTATTTATTCAGTATATGATACATGGACTTGACTCCATGTGGATTGCTCCATTCGGACATCTACGGATCATATCGTGCTTGCCAATCCCCGTAGCTTTTCGCAGCTTACCACGTCCTT
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFFGGGGDGDFFGGGGGGGFGFD?FFFFFFFF
@M05295:617:000000000-KL64F:1:1101:2666:12975
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTATGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAGAAAC
+
CCCCCGGGGGGGGGGG9FAFGFGFGFGGGGGGGGGGGGGGGGGGGGGGDEGGGFGGDCGF:FGGGGGGGGFC=CFGGGGGFGGGGGGGGGGGGGFGGGGEEGDCC>EFGDEEGGGGGGGGFFDGGGGCDGG7DFGFFF5**@FFFA
@M05295:617:000000000-KL64F:1:2112:3938:13885
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTCTGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAGAAAC
+
CCCCCGGGGGGGGGCCFF<FGFGGGFCFGGGGGGGGGGGEFGEFGGGGEG7FFDGFEGGG:CFEFFFGGGEGG+CFFGCGGGGGGGGD+?FFCF,AAF,,@B@F7C@D@CCEGGGFFGFGGDGGGGFFGGCFCFG6C>+;*7*AF5
@M05295:617:000000000-KL64F:1:2108:9015:16045
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTATGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAGAAAC
+
CCCCCGGGGGGGGGGFEFG8F8AFGG:FFGFGGGGGGGGGGGGG,9BF:E:FD,@@::F8CCEFEG9FGC,7@CFDC9,CF<AFGFG:+84AAE,@9,E9CBCEE6+7BCEE6=6=F,EFGF>CDB:,CFF9CFFF677C57;>7*
@M05295:617:000000000-KL64F:1:2106:11795:12187
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTATGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAGAAAC
+
CCCCCGGGGGGGGGGFGGG8EEFEFGGGGGG,FFGGGGGGGGGGFGG@CEFCDFGG77F@CFFGFGCFD@EFFDFEAEFGFGFGGCCC+@FFCFEFFFFFEGG>C6+@EEEC>DCFFGDGGDEGGGFGGGGFFD6CD66?7BFF5;
@M05295:617:000000000-KL64F:1:2117:7228:7910
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTATGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAGAAAC
+
CCCCCGGGGGGGGGFE,C9,FFFGFCFGGGGGGFGGGFFCF<FFG8ECFGCFGGGGG7FFGGEF8FG<ECFGGGGGFGGF@9FEEGG+=F@8FF9F9AEEGGEGGGG<BFEEEFGFFGGGGGGGGGAFFFCFGGCCFCE5CGFF?8
@M05295:617:000000000-KL64F:1:1119:25439:5751
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTATGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAGAAAC
+
CCCCCGGGGGGGGGGF9@FGGGFGGFEGGGGCFEGGGGFGGGGGGGGGGGDDGGGGEDGF=FE@FGGGGFEFGGGGGGGGGGCFGGECC@FGEGFDCDFDGGGGCE@FFCFGGGFCFGGGGGFGGFGGGFGCFGGGF>>CGGGGGD
@M05295:617:000000000-KL64F:1:2101:21157:16235
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTCGTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTTTGAGCGCGAAGTCGGAAATCTGAAAAAATTTGCAGCCACCACGACAAAAAC
+
CCCCCGGGGGGGGG<<6@<C66CC8EDC@FC<FFF@FFF9FGGGGGFEGGDEEFC=CCF8FGGGGA?EE?8@F=FF9DE,FFBFDFEEECE8BF,E=,DFEDCEC>F8@CEGCDF:,,@+6=@F8FCGG6;+0=00*3**0**33:
@M05295:617:000000000-KL64F:1:1101:1908:15400
TGGTCCTCGCCGTTATGAATCTTACAAAGAAAGTATAGTTGACCATTCAGATATTTTATTGGATGAGAGGTATGCGGAAGCATGGGAATATAAGGACAATCCATTTATTTATGTATCTATCATAGGACCTATTTATGCAACGGGAA
+
CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGCFGGGDGFGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGFGFGGGDGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGG8DFGFFFFFFF5@A:
16 changes: 16 additions & 0 deletions workflows/amr/test/RunRedup/subsampled_1.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
>M05295:617:000000000-KL64F:1:1101:2016:13202
CCACCAAATAACACTCAAGGACTTCAAATGTCGGAGAGTGTGAGATGTTCTTTGAAAATT
GAATAACGAAACAACAAAGAGGAAATTAAAGATATCCAATTAAAGAAATTTAATGGGTAA
AATACAATTTCAAACAATTCTTCTGT
>M05295:617:000000000-KL64F:1:1101:2666:12975
CCTCTTTTTTTTGCAGAAGAGTACACAACTGCTTTATTTTATGCTAAAAGACCCCTGCCT
ACGCAAAGGCAGAGGTCCGATTTTTTCATAGTCTGGGGAGATAAAACAACTTTCCGATTT
CACAGAATGCGCACGGCCTTCCAGAT
>M05295:617:000000000-KL64F:1:1101:3078:7376
TTTTGCCGTAACGGCTTTTTACCACAGCCAGCTTGCGGCGCAACACCTCCGCCAGAAAGT
TGCCGTTGCCGCAGGCGGGTTCCAGAAAACGGCTCTCGATGCGCTCCGTCTCGCTCTTTA
CAAGGTCGCACATCGCCTTTACCTCC
>M05295:617:000000000-KL64F:1:1101:3125:11405
TCTTTGGTATACTGCAGTGCTTATATGCGGTTTGCTGATTTTTTCGGCGGCAGCTTGTGC
AGGAACGATTCTTTCCTGCAATAACCGGCTGAAAAGAAAAAGGAAAAAGATACGCAAGGC
GGCACTCTTGTCAACTATGTGCATTA
16 changes: 16 additions & 0 deletions workflows/amr/test/RunRedup/subsampled_2.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
>M05295:617:000000000-KL64F:1:1101:2016:13202
TTCAGTTCGGGCGGTTCCCCTCATATACCTATTTATTCAGTATATGATACATGGACTTGA
CTCCATGTGGATTGCTCCATTCGGACATCTACGGATCATATCGTGCTTGCCAATCCCCGT
AGCTTTTCGCAGCTTACCACGTCCTT
>M05295:617:000000000-KL64F:1:1101:2666:12975
CCTTTTTTTTTTTTAACTGGAATCGACATTGATTTTTATATTCCGTCGGCAAGGCAGGTC
GTTCAGGTGGCGTATTCCATTCAGGGGGATGCCTATGAGCGCGAAGTCGGAAATCTGAAA
AAATTTGCAGCCACCACGACAGAAAC
>M05295:617:000000000-KL64F:1:1101:3078:7376
CTTTGCCCTCAGATTTGCTTTTGTACCAATTATAGCATATTTCCCGGTTAAATCCACAGA
TTTTTAGCTATTCGTTTCATCTCTTGAGCCGCTTGTCAAAAGGTACACTTTTTGGCAAGC
CCTTCAAAGAGGTGGAACGAATGGCA
>M05295:617:000000000-KL64F:1:1101:3125:11405
TCTTTGGTAAGTCCGAACAGATTTTATTCTACTCCTCGGGTGTTCTGAGCGATTGTTTGT
GTTGAAAAGTCATTCAGGTCATAGTACCGCATTGCTTCTGTTCCGTCTTTGGCGATATAC
TCGACTAAAATGTAATGCTCGGTTAT
99 changes: 99 additions & 0 deletions workflows/amr/test/test_wdl.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import os
from test_util import WDLTestCase

Expand Down Expand Up @@ -75,3 +76,101 @@ def testRunResultsPerSample(self):
"smeB",
]:
self.assertTrue(expected_gene_name in gene_names)

def testRunRedup(self):
inputs = {
"host_filtered_reads": [relpath("RunRedup", "host_filter_1.fastq"), relpath("RunRedup", "host_filter_2.fastq")],
"subsampled_reads": [relpath("RunRedup", "subsampled_1.fa"), relpath("RunRedup", "subsampled_2.fa")],
"clusters": relpath("RunRedup", "clusters.csv"),
"cluster_sizes": relpath("RunRedup", "duplicate_cluster_sizes.tsv"),
}

# Get all input read ids
with open(relpath("RunRedup", "clusters.csv"), 'r') as clusters_csv:
tsvreader = csv.reader(clusters_csv)
cluster_read_ids = [row for row in tsvreader]

# Get subsampled read ids
subsampled_read_ids = set()
with open(relpath("RunRedup", "subsampled_1.fa"), 'r') as subsampled_1_fa:
for line in subsampled_1_fa.readlines():
if line.startswith(">"):
subsampled_read_ids.add(line.lstrip(">").rstrip("\n"))

target_read_ids = set()
excluded_read_ids = set()
for rep_read_id, read_id in cluster_read_ids:
if rep_read_id in subsampled_read_ids:
target_read_ids.add(read_id)
else:
excluded_read_ids.add(read_id)

# Run task and collect output ids
res = self.run_miniwdl(task="RunRedup", task_input=inputs)

output_1 = dict()
with open(res["outputs"]["RunRedup.redups_fa"][0]) as redups_1_fa:
reads = []
this_read = []
for line in redups_1_fa.readlines():
if line.startswith(">"):
reads.append(this_read)
this_read = [line.lstrip(">").rstrip("\n")]
else:
this_read.append(line.rstrip("\n"))
reads.append(this_read)
output_1 = {read[0]: "".join(read[1:]) for read in reads if len(read) > 0}

output_2 = dict()
with open(res["outputs"]["RunRedup.redups_fa"][1]) as redups_2_fa:
reads = []
this_read = []
for line in redups_2_fa.readlines():
if line.startswith(">"):
reads.append(this_read)
this_read = [line.lstrip(">").rstrip("\n")]
else:
this_read.append(line.rstrip("\n"))
reads.append(this_read)
output_2 = {read[0]: "".join(read[1:]) for read in reads if len(read) > 0}


# Check ids
output_ids_1 = output_1.keys()
assert len(set(output_ids_1)) == len(output_ids_1)
output_ids_1 = set(output_ids_1)
output_ids_2 = output_2.keys()
assert len(set(output_ids_2)) == len(output_ids_2)
output_ids_2 = set(output_ids_2)
assert output_ids_1 == output_ids_2

output_ids = output_ids_1 & output_ids_2

for read_id in target_read_ids:
assert read_id in output_ids
for read_id in excluded_read_ids:
assert read_id not in output_ids

# Check sequences
host_filter_1 = dict()
with open(relpath("RunRedup", "host_filter_1.fastq"), "r") as host_filter_1_fq:
lines = host_filter_1_fq.readlines()
for index in range(0, len(lines), 4):
read_id = lines[index].lstrip("@").rstrip("\n")
read_sequence = lines[index + 1].rstrip("\n")
host_filter_1[read_id] = read_sequence

host_filter_2 = dict()
with open(relpath("RunRedup", "host_filter_2.fastq"), "r") as host_filter_2_fq:
lines = host_filter_2_fq.readlines()
for index in range(0, len(lines), 4):
read_id = lines[index].lstrip("@").rstrip("\n")
read_sequence = lines[index + 1].rstrip("\n")
host_filter_2[read_id] = read_sequence

for read_id, read_sequence in output_1.items():
assert read_id in host_filter_1
assert read_sequence == host_filter_1[read_id]
for read_id, read_sequence in output_2.items():
assert read_id in host_filter_2
assert read_sequence == host_filter_2[read_id]

0 comments on commit 0ba1242

Please sign in to comment.