Skip to content

Commit

Permalink
additional tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Nina Bernick committed Oct 19, 2023
1 parent 1413978 commit f18d50c
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 27 deletions.
16 changes: 8 additions & 8 deletions workflows/index-generation/test/test_files/lineages.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
taxid,superkingdom_taxid,phylum_taxid,class_taxid,order_taxid,family_taxid,genus_taxid,species_taxid,superkingdom_name,phylum_name,class_name,order_name,family_name,genus_name,species_name,superkingdom_common_name,phylum_common_name,class_common_name,order_common_name,family_common_name,genus_common_name,species_common_name,kingdom_taxid,kingdom_name,kingdom_common_name,tax_name,is_phage
6,2,1224,28211,356,335928,6,-100,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,,eubacteria,,,rhizobacteria,,,,-650,,,Azorhizobium,0
7,2,1224,28211,356,335928,100,7,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Even Newer Genus,Even Newer Genus caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,New Genus caulinodans,0
9,2,1224,1236,91347,12345,32199,9,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,New Family,Buchnera,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Buchnera aphidicola,0
10,2,1224,1236,1706369,1706371,10,-100,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Cellvibrio,0
11,2,201174,1760,85006,85016,123456,11,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Cellulomonadaceae,New Genus 2,New Genus 2 gilvus,eubacteria,,high G+C Gram-positive bacteria,,,,,-650,,,Cellulomonas gilvus,0
1985173,2,976,1853228,1853229,123456,-200,1985173,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,New family,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Chitinophagaceae bacterium IBVUCB1,0
13,2,68297,203486,203487,203488,13,-100,Bacteria,Dictyoglomi,Dictyoglomia,Dictyoglomales,Dictyoglomaceae,Dictyoglomus,,eubacteria,,,,,,,-650,,,Dictyoglomus,0
14,2,68297,203486,203487,203488,13,14,Bacteria,Dictyoglomi,Dictyoglomia,Dictyoglomales,Dictyoglomaceae,Dictyoglomus,Dictyoglomus thermophilum,eubacteria,,,,,,,-650,,,Dictyoglomus thermophilum,0
1,-700,-600,-500,-400,-300,-200,-100,,,,,,,,,,,,,,,-650,,,NULL,0
2,2,-600,-500,-400,-300,-200,-100,Bacteria,,,,,,,eubacteria,,,,,,,-650,,,Bacteria,0
3,2,1224,28211,356,335928,6,-100,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium new name,,eubacteria,,,rhizobacteria,,,,-650,,,Genus A new name,0
4,2,1224,28211,356,335928,50,7,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Genus E,Genus E caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Genus E caulinodans,0
5,2,1224,1236,91347,1903409,32199,9,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Erwiniaceae,Genus D,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Genus D aphidicola,0
6,2,976,1853228,1853229,12345,-200,1985173,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,Family B,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Species missing a genus,0
9,10239,-600,-500,-400,10656,10657,10658,Viruses,,,,Tectiviridae,Tectivirus,Salmonella virus PRD1,,,,,,,,-650,,,Phage virus (salmonella),0
10,10239,-600,-500,-400,10656,10657,10,Viruses,,,,Tectiviridae,Tectivirus,Salmonella virus PRD1,,,,,,,,-650,,,New Virus,0
20 changes: 11 additions & 9 deletions workflows/index-generation/test/test_files/previous.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
taxid,superkingdom_taxid,phylum_taxid,class_taxid,order_taxid,family_taxid,genus_taxid,species_taxid,created_at,updated_at,superkingdom_name,phylum_name,class_name,order_name,family_name,genus_name,species_name,superkingdom_common_name,phylum_common_name,class_common_name,order_common_name,family_common_name,genus_common_name,species_common_name,kingdom_taxid,kingdom_name,kingdom_common_name,tax_name,version_start,version_end,is_phage
1,-700,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,,,,,,,,,,,,,,,-650,,,NULL,2020-01-15,2021-01-22,0
2,2,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,,,,,,,eubacteria,,,,,,,-650,,,Bacteria,2020-01-15,2020-02-15,0
2,2,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,,,,,,,new name for bacteria,,,,,,,-650,,,Bacteria,2021-01-22,2021-01-22,0
6,2,1224,28211,356,335928,6,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,,eubacteria,,,rhizobacteria,,,,-650,,,Azorhizobium,2020-01-15,2021-01-22,0
7,2,1224,28211,356,335928,6,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,Azorhizobium caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Azorhizobium caulinodans,2020-01-15,2020-02-15,0
7,2,1224,28211,356,335928,50,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,New Genus,New Genus caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,New Genus caulinodans,2021-01-22,2021-01-22,0
9,2,1224,1236,91347,1903409,32199,9,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Erwiniaceae,Buchnera,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Buchnera aphidicola,2020-01-15,2021-01-22,0
10,2,1224,1236,1706369,1706371,10,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Cellvibrio,2020-01-15,2021-01-22,0
11,2,201174,1760,85006,85016,1707,11,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Cellulomonadaceae,Cellulomonas,Cellulomonas gilvus,eubacteria,,high G+C Gram-positive bacteria,,,,,-650,,,Cellulomonas gilvus,2020-01-15,2021-01-22,0
1985173,2,976,1853228,1853229,563835,-200,1985173,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,Chitinophagaceae,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Chitinophagaceae bacterium IBVUCB1,2020-01-15,2021-01-22,0
2,2,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,,,,,,,eubacteria,,,,,,,-650,,,Bacteria,2020-01-15,2021-01-22,0
3,2,1224,28211,356,335928,6,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,,eubacteria,,,rhizobacteria,,,,-650,,,Genus A,2020-01-15,2020-02-15,0
3,2,1224,28211,356,335928,6,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium new name,,eubacteria,,,rhizobacteria,,,,-650,,,Genus A new name,2021-01-22,2021-01-22,0
4,2,1224,28211,356,335928,6,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,Azorhizobium caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Genus A caulinodans,2020-01-15,2020-02-15,0
4,2,1224,28211,356,335928,50,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,New Genus,New Genus caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Genus B caulinodans,2021-01-22,2021-01-22,0
5,2,1224,1236,91347,1903409,32199,9,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Erwiniaceae,Buchnera,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Genus C aphidicola,2020-01-15,2021-01-22,0
6,2,976,1853228,1853229,563835,-200,1985173,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,Chitinophagaceae,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Species missing a genus,2020-01-15,2021-01-22,0
7,2,201174,1760,85006,85016,1707,11,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Cellulomonadaceae,Cellulomonas,Cellulomonas gilvus,eubacteria,,high G+C Gram-positive bacteria,,,,,-650,,,Deprecated taxon X,2020-01-15,2021-01-22,0
8,2,1224,1236,1706369,1706371,10,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Deprecated taxon Y version 1,2020-01-15,2020-02-15,0
8,2,1224,1236,1706369,1706371,10,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Deprecated taxon Y version 2,2021-01-22,2021-01-22,0
9,10239,-600,-500,-400,10656,10657,10658,0000-00-00 00:00:00,0000-00-00 00:00:00,Viruses,,,,Tectiviridae,Tectivirus,Salmonella virus PRD1,,,,,,,,-650,,,Phage virus (salmonella),2020-01-15,2021-01-22,1
39 changes: 29 additions & 10 deletions workflows/index-generation/test/test_generate_lineage_csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@
test_files_dir = join(dirname(__file__), "test_files")

class TestIndexGeneration(unittest.TestCase):
multiple_row_taxid = 7 # has 2 rows in old csv and a new row in current csv -> 3 rows in versioned csv
taxid_missing_genus = 1985173 # has -200 for genus, is assigned to new family in new CSV -> 2 rows in versioned csv
taxid_with_updated_genus = 11 # genus changed between old csv and current csv
deprecated_taxid = 1 # taxon present in old csv, not present in current csv
deprecated_taxid_multiple_rows = 2 # taxon present with multiple rows in old csv, not present in current csv
new_taxid = 13 # taxon not present in old csv, present in new csv
unchanged_taxon_id = 2 # in both old and new csv -> 1 row
unchanged_taxon_id_with_multiple_rows = 3 # unchanged in this update, but had past updates -> 2 total rows
multiple_row_taxid = 4 # has 2 rows in old csv and a new row in current csv -> 3 rows in versioned csv
taxid_with_updated_genus = 5 # genus changed between old csv and current csv
taxid_missing_genus = 6 # has -200 for genus, is assigned to new family in new CSV -> 2 rows in versioned csv
deprecated_taxid = 7 # taxon present in old csv, not present in current csv
deprecated_taxid_multiple_rows = 8 # taxon present with multiple rows in old csv, not present in current csv
taxid_phage_to_nonphage = 9 # taxon that changes from phage to non-phage
new_taxid = 10 # taxon not present in old csv, present in new csv


version = datetime.now().strftime("%Y-%m-%d")

Expand All @@ -27,8 +31,15 @@ def _find_entries_for_taxid(self, results, taxid):
entries.sort(key=lambda x: x['version_end'])
return entries

def _assert_taxon_row_identical(row1, row2):
print('foo')
def _unchanged_taxa_assertions(self, results):
unchanged_taxon_single_row = self._find_entries_for_taxid(results, self.unchanged_taxon_id)
self.assertEqual(len(unchanged_taxon_single_row), 1)
self.assertEqual(unchanged_taxon_single_row[0]['version_end'], self.version)
unchanged_taxon_multiple_rows = self._find_entries_for_taxid(results, self.unchanged_taxon_id_with_multiple_rows)
self.assertEqual(len(unchanged_taxon_multiple_rows), 2)
# second entry (more recent one) should have its version_end updated
self.assertNotEqual(unchanged_taxon_multiple_rows[0]['version_end'], self.version)
self.assertEqual(unchanged_taxon_multiple_rows[1]['version_end'], self.version)

def _deprecated_taxa_assertions(self, results):
deprecated_taxon_rows = self._find_entries_for_taxid(results, self.deprecated_taxid)
Expand Down Expand Up @@ -83,6 +94,12 @@ def _new_taxid_assertions(self, results):
self.assertEqual(new_taxon_rows[0]['version_start'], self.version)
self.assertEqual(new_taxon_rows[0]['version_end'], self.version)

def _phage_to_nonphage_assertions(self, results):
phage_rows = self._find_entries_for_taxid(results, self.taxid_phage_to_nonphage)
self.assertEqual(len(phage_rows), 2)
self.assertNotEqual(phage_rows[0]['version_end'], self.version)
self.assertEqual(phage_rows[1]['version_start'], self.version)
self.assertEqual(phage_rows[1]['version_end'], self.version)

def test_version_taxon_lineages(self):
with tempfile.NamedTemporaryFile('wb', suffix="csv.gz") as previous_lineages, \
Expand All @@ -106,11 +123,13 @@ def test_version_taxon_lineages(self):

output_unzipped = gzip.open(output.name, "rt")
result = list(csv.DictReader(output_unzipped))

self._unchanged_taxa_assertions(result)
self._deprecated_taxa_assertions(result)
self._deprecated_taxa_multiple_rows_assertions(result)
self._updated_taxa_assertions(result)
self._multiple_taxa_rows_assertions(result)
self._updated_taxa_missing_genus_assertions(result)
self._new_taxid_assertions(result)
self.assertEqual(len(result), 16, result)

self._phage_to_nonphage_assertions(result)
self.assertEqual(len(result), 17, result)

0 comments on commit f18d50c

Please sign in to comment.