From f18d50c84e5e8fc1e1aa642ec3eda97d2083226f Mon Sep 17 00:00:00 2001 From: Nina Bernick Date: Thu, 19 Oct 2023 14:16:41 -0700 Subject: [PATCH] additional tests --- .../test/test_files/lineages.csv | 16 ++++---- .../test/test_files/previous.csv | 20 +++++----- .../test/test_generate_lineage_csvs.py | 39 ++++++++++++++----- 3 files changed, 48 insertions(+), 27 deletions(-) diff --git a/workflows/index-generation/test/test_files/lineages.csv b/workflows/index-generation/test/test_files/lineages.csv index b01421b41..e107d0206 100644 --- a/workflows/index-generation/test/test_files/lineages.csv +++ b/workflows/index-generation/test/test_files/lineages.csv @@ -1,9 +1,9 @@ taxid,superkingdom_taxid,phylum_taxid,class_taxid,order_taxid,family_taxid,genus_taxid,species_taxid,superkingdom_name,phylum_name,class_name,order_name,family_name,genus_name,species_name,superkingdom_common_name,phylum_common_name,class_common_name,order_common_name,family_common_name,genus_common_name,species_common_name,kingdom_taxid,kingdom_name,kingdom_common_name,tax_name,is_phage -6,2,1224,28211,356,335928,6,-100,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,,eubacteria,,,rhizobacteria,,,,-650,,,Azorhizobium,0 -7,2,1224,28211,356,335928,100,7,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Even Newer Genus,Even Newer Genus caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,New Genus caulinodans,0 -9,2,1224,1236,91347,12345,32199,9,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,New Family,Buchnera,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Buchnera aphidicola,0 -10,2,1224,1236,1706369,1706371,10,-100,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Cellvibrio,0 -11,2,201174,1760,85006,85016,123456,11,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Cellulomonadaceae,New Genus 2,New Genus 2 gilvus,eubacteria,,high G+C Gram-positive bacteria,,,,,-650,,,Cellulomonas gilvus,0 -1985173,2,976,1853228,1853229,123456,-200,1985173,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,New family,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Chitinophagaceae bacterium IBVUCB1,0 -13,2,68297,203486,203487,203488,13,-100,Bacteria,Dictyoglomi,Dictyoglomia,Dictyoglomales,Dictyoglomaceae,Dictyoglomus,,eubacteria,,,,,,,-650,,,Dictyoglomus,0 -14,2,68297,203486,203487,203488,13,14,Bacteria,Dictyoglomi,Dictyoglomia,Dictyoglomales,Dictyoglomaceae,Dictyoglomus,Dictyoglomus thermophilum,eubacteria,,,,,,,-650,,,Dictyoglomus thermophilum,0 \ No newline at end of file +1,-700,-600,-500,-400,-300,-200,-100,,,,,,,,,,,,,,,-650,,,NULL,0 +2,2,-600,-500,-400,-300,-200,-100,Bacteria,,,,,,,eubacteria,,,,,,,-650,,,Bacteria,0 +3,2,1224,28211,356,335928,6,-100,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium new name,,eubacteria,,,rhizobacteria,,,,-650,,,Genus A new name,0 +4,2,1224,28211,356,335928,50,7,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Genus E,Genus E caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Genus E caulinodans,0 +5,2,1224,1236,91347,1903409,32199,9,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Erwiniaceae,Genus D,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Genus D aphidicola,0 +6,2,976,1853228,1853229,12345,-200,1985173,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,Family B,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Species missing a genus,0 +9,10239,-600,-500,-400,10656,10657,10658,Viruses,,,,Tectiviridae,Tectivirus,Salmonella virus PRD1,,,,,,,,-650,,,Phage virus (salmonella),0 +10,10239,-600,-500,-400,10656,10657,10,Viruses,,,,Tectiviridae,Tectivirus,Salmonella virus PRD1,,,,,,,,-650,,,New Virus,0 \ No newline at end of file diff --git a/workflows/index-generation/test/test_files/previous.csv b/workflows/index-generation/test/test_files/previous.csv index 8ed356cb6..a7aed8118 100644 --- a/workflows/index-generation/test/test_files/previous.csv +++ b/workflows/index-generation/test/test_files/previous.csv @@ -1,11 +1,13 @@ taxid,superkingdom_taxid,phylum_taxid,class_taxid,order_taxid,family_taxid,genus_taxid,species_taxid,created_at,updated_at,superkingdom_name,phylum_name,class_name,order_name,family_name,genus_name,species_name,superkingdom_common_name,phylum_common_name,class_common_name,order_common_name,family_common_name,genus_common_name,species_common_name,kingdom_taxid,kingdom_name,kingdom_common_name,tax_name,version_start,version_end,is_phage 1,-700,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,,,,,,,,,,,,,,,-650,,,NULL,2020-01-15,2021-01-22,0 -2,2,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,,,,,,,eubacteria,,,,,,,-650,,,Bacteria,2020-01-15,2020-02-15,0 -2,2,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,,,,,,,new name for bacteria,,,,,,,-650,,,Bacteria,2021-01-22,2021-01-22,0 -6,2,1224,28211,356,335928,6,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,,eubacteria,,,rhizobacteria,,,,-650,,,Azorhizobium,2020-01-15,2021-01-22,0 -7,2,1224,28211,356,335928,6,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,Azorhizobium caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Azorhizobium caulinodans,2020-01-15,2020-02-15,0 -7,2,1224,28211,356,335928,50,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,New Genus,New Genus caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,New Genus caulinodans,2021-01-22,2021-01-22,0 -9,2,1224,1236,91347,1903409,32199,9,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Erwiniaceae,Buchnera,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Buchnera aphidicola,2020-01-15,2021-01-22,0 -10,2,1224,1236,1706369,1706371,10,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Cellvibrio,2020-01-15,2021-01-22,0 -11,2,201174,1760,85006,85016,1707,11,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Cellulomonadaceae,Cellulomonas,Cellulomonas gilvus,eubacteria,,high G+C Gram-positive bacteria,,,,,-650,,,Cellulomonas gilvus,2020-01-15,2021-01-22,0 -1985173,2,976,1853228,1853229,563835,-200,1985173,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,Chitinophagaceae,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Chitinophagaceae bacterium IBVUCB1,2020-01-15,2021-01-22,0 \ No newline at end of file +2,2,-600,-500,-400,-300,-200,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,,,,,,,eubacteria,,,,,,,-650,,,Bacteria,2020-01-15,2021-01-22,0 +3,2,1224,28211,356,335928,6,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,,eubacteria,,,rhizobacteria,,,,-650,,,Genus A,2020-01-15,2020-02-15,0 +3,2,1224,28211,356,335928,6,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium new name,,eubacteria,,,rhizobacteria,,,,-650,,,Genus A new name,2021-01-22,2021-01-22,0 +4,2,1224,28211,356,335928,6,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,Azorhizobium,Azorhizobium caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Genus A caulinodans,2020-01-15,2020-02-15,0 +4,2,1224,28211,356,335928,50,7,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Xanthobacteraceae,New Genus,New Genus caulinodans,eubacteria,,,rhizobacteria,,,,-650,,,Genus B caulinodans,2021-01-22,2021-01-22,0 +5,2,1224,1236,91347,1903409,32199,9,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Erwiniaceae,Buchnera,Buchnera aphidicola,eubacteria,,,,,aphid P-endosymbionts,,-650,,,Genus C aphidicola,2020-01-15,2021-01-22,0 +6,2,976,1853228,1853229,563835,-200,1985173,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Bacteroidetes,Chitinophagia,Chitinophagales,Chitinophagaceae,,Chitinophagaceae bacterium IBVUCB1,eubacteria,,,,,,,-650,,,Species missing a genus,2020-01-15,2021-01-22,0 +7,2,201174,1760,85006,85016,1707,11,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Actinobacteria,Actinobacteria,Micrococcales,Cellulomonadaceae,Cellulomonas,Cellulomonas gilvus,eubacteria,,high G+C Gram-positive bacteria,,,,,-650,,,Deprecated taxon X,2020-01-15,2021-01-22,0 +8,2,1224,1236,1706369,1706371,10,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Deprecated taxon Y version 1,2020-01-15,2020-02-15,0 +8,2,1224,1236,1706369,1706371,10,-100,0000-00-00 00:00:00,0000-00-00 00:00:00,Bacteria,Proteobacteria,Gammaproteobacteria,Cellvibrionales,Cellvibrionaceae,Cellvibrio,,eubacteria,,,,,,,-650,,,Deprecated taxon Y version 2,2021-01-22,2021-01-22,0 +9,10239,-600,-500,-400,10656,10657,10658,0000-00-00 00:00:00,0000-00-00 00:00:00,Viruses,,,,Tectiviridae,Tectivirus,Salmonella virus PRD1,,,,,,,,-650,,,Phage virus (salmonella),2020-01-15,2021-01-22,1 \ No newline at end of file diff --git a/workflows/index-generation/test/test_generate_lineage_csvs.py b/workflows/index-generation/test/test_generate_lineage_csvs.py index be657eb79..9945875ad 100644 --- a/workflows/index-generation/test/test_generate_lineage_csvs.py +++ b/workflows/index-generation/test/test_generate_lineage_csvs.py @@ -13,12 +13,16 @@ test_files_dir = join(dirname(__file__), "test_files") class TestIndexGeneration(unittest.TestCase): - multiple_row_taxid = 7 # has 2 rows in old csv and a new row in current csv -> 3 rows in versioned csv - taxid_missing_genus = 1985173 # has -200 for genus, is assigned to new family in new CSV -> 2 rows in versioned csv - taxid_with_updated_genus = 11 # genus changed between old csv and current csv - deprecated_taxid = 1 # taxon present in old csv, not present in current csv - deprecated_taxid_multiple_rows = 2 # taxon present with multiple rows in old csv, not present in current csv - new_taxid = 13 # taxon not present in old csv, present in new csv + unchanged_taxon_id = 2 # in both old and new csv -> 1 row + unchanged_taxon_id_with_multiple_rows = 3 # unchanged in this update, but had past updates -> 2 total rows + multiple_row_taxid = 4 # has 2 rows in old csv and a new row in current csv -> 3 rows in versioned csv + taxid_with_updated_genus = 5 # genus changed between old csv and current csv + taxid_missing_genus = 6 # has -200 for genus, is assigned to new family in new CSV -> 2 rows in versioned csv + deprecated_taxid = 7 # taxon present in old csv, not present in current csv + deprecated_taxid_multiple_rows = 8 # taxon present with multiple rows in old csv, not present in current csv + taxid_phage_to_nonphage = 9 # taxon that changes from phage to non-phage + new_taxid = 10 # taxon not present in old csv, present in new csv + version = datetime.now().strftime("%Y-%m-%d") @@ -27,8 +31,15 @@ def _find_entries_for_taxid(self, results, taxid): entries.sort(key=lambda x: x['version_end']) return entries - def _assert_taxon_row_identical(row1, row2): - print('foo') + def _unchanged_taxa_assertions(self, results): + unchanged_taxon_single_row = self._find_entries_for_taxid(results, self.unchanged_taxon_id) + self.assertEqual(len(unchanged_taxon_single_row), 1) + self.assertEqual(unchanged_taxon_single_row[0]['version_end'], self.version) + unchanged_taxon_multiple_rows = self._find_entries_for_taxid(results, self.unchanged_taxon_id_with_multiple_rows) + self.assertEqual(len(unchanged_taxon_multiple_rows), 2) + # second entry (more recent one) should have its version_end updated + self.assertNotEqual(unchanged_taxon_multiple_rows[0]['version_end'], self.version) + self.assertEqual(unchanged_taxon_multiple_rows[1]['version_end'], self.version) def _deprecated_taxa_assertions(self, results): deprecated_taxon_rows = self._find_entries_for_taxid(results, self.deprecated_taxid) @@ -83,6 +94,12 @@ def _new_taxid_assertions(self, results): self.assertEqual(new_taxon_rows[0]['version_start'], self.version) self.assertEqual(new_taxon_rows[0]['version_end'], self.version) + def _phage_to_nonphage_assertions(self, results): + phage_rows = self._find_entries_for_taxid(results, self.taxid_phage_to_nonphage) + self.assertEqual(len(phage_rows), 2) + self.assertNotEqual(phage_rows[0]['version_end'], self.version) + self.assertEqual(phage_rows[1]['version_start'], self.version) + self.assertEqual(phage_rows[1]['version_end'], self.version) def test_version_taxon_lineages(self): with tempfile.NamedTemporaryFile('wb', suffix="csv.gz") as previous_lineages, \ @@ -106,11 +123,13 @@ def test_version_taxon_lineages(self): output_unzipped = gzip.open(output.name, "rt") result = list(csv.DictReader(output_unzipped)) + + self._unchanged_taxa_assertions(result) self._deprecated_taxa_assertions(result) self._deprecated_taxa_multiple_rows_assertions(result) self._updated_taxa_assertions(result) self._multiple_taxa_rows_assertions(result) self._updated_taxa_missing_genus_assertions(result) self._new_taxid_assertions(result) - self.assertEqual(len(result), 16, result) - + self._phage_to_nonphage_assertions(result) + self.assertEqual(len(result), 17, result)