Skip to content

Commit

Permalink
Display each genotype on a separate page for #412.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby committed Jan 9, 2018
1 parent f152e26 commit 70c0c61
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 98 deletions.
56 changes: 30 additions & 26 deletions micall/hivdb/genreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from argparse import ArgumentParser, FileType
import csv
from collections import defaultdict, Counter
from collections import defaultdict, Counter, namedtuple

import yaml

Expand Down Expand Up @@ -36,19 +36,21 @@ def read_config(git_version):
"""Read in a configuration file for generating reports."""
with open(REPORT_CONFIG_PATH) as fi:
virus_configs = yaml.safe_load(fi)
report_pages = []
report_templates = []
for virus_config in virus_configs:
virus_config['generated_by_text'] = (
virus_config['generated_by_text'].format(git_version or ''))
report_pages.append(ReportPage(virus_config))
return report_pages
report_templates.append(ReportTemplate(virus_config))
return report_templates


class ReportPage:
ReportPage = namedtuple('ReportPage', 'resistance_calls mutations')


class ReportTemplate:
def __init__(self, virus_config, raise_missing=False):
self.virus_config = virus_config
self.resistance_calls = {}
self.mutations = {}
self.genotype_pages = defaultdict(lambda: ReportPage({}, {}))
err_string = "Error in configuration file"
if not isinstance(virus_config, dict):
raise RuntimeError("""Configuration in {} must be a
Expand Down Expand Up @@ -163,8 +165,11 @@ def __repr__(self):
report_title = self.virus_config.get('report_title')
return "ReportPage({{'report_title': {!r}}})".format(report_title)

def get_reported_drug_classes(self):
reported_drug_codes = set(self.resistance_calls.keys())
def get_reported_genotypes(self):
return sorted(self.genotype_pages.keys())

def get_reported_drug_classes(self, genotype):
reported_drug_codes = set(self.genotype_pages[genotype].resistance_calls.keys())
return {class_code
for class_code, drugs in self.virus_config['known_drugs'].items()
if any(drug_code in reported_drug_codes
Expand All @@ -185,23 +190,21 @@ def read_mutations(drug_classes, csv_file):
tmp_dct = defaultdict(list)
for od_num, od in enumerate(data_lst):
d_class, mut_str = od['drug_class'], od["mutation"]
tmp_dct[d_class].append('{}({:.0f}%)'.format(
genotype = od['genotype']
tmp_dct[(genotype, d_class)].append('{}({:.0f}%)'.format(
mut_str,
100*float(od['prevalence'])))
report_pages = set(drug_classes.values())
for report_page in report_pages:
for d_class in report_page.virus_config['known_drug_classes']:
mutations = tmp_dct[d_class]
mut_str = ", ".join(mutations) if mutations else "None"
report_page.mutations[d_class] = "Relevant {} Mutations: {}".format(
d_class,
mut_str)
for (genotype, drug_class), mutations in tmp_dct.items():
report_template = drug_classes[drug_class]
report_page = report_template.genotype_pages[genotype]
mutation_display = ', '.join(mutations)
report_page.mutations[drug_class] = mutation_display


def read_resistance(regions, csv_file):
"""Read in a resistance call file from CSV.
:param regions: {region: ReportPage} each ReportPage will receive the
:param regions: {region: ReportTemplate} each ReportPage will receive the
resistance calls from its regions
:param csv_file: the resistance calls to load
"""
Expand All @@ -213,10 +216,11 @@ def read_resistance(regions, csv_file):
if sum([set(od.keys()) == exp_set for od in data_lst]) != len(data_lst):
raise RuntimeError("{}: unexpected data found.".format(err_string))
for od in data_lst:
report_page = regions[od['region']]
template = regions[od['region']]
level = int(od['level'])
drug_id = od['drug']
report_page.genotype = od['genotype']
genotype = od['genotype']
report_page = template.genotype_pages[genotype]
report_page.resistance_calls[drug_id] = (level, od["level_name"])


Expand All @@ -233,16 +237,16 @@ def gen_report(resistance_csv,
:param sample_name: name to describe the sample on the report
:param git_version: source code version to display
"""
report_pages = read_config(git_version)
report_templates = read_config(git_version)
regions = {}
drug_classes = {}
for report_page in report_pages:
report_page.register_regions(regions)
report_page.register_drug_classes(drug_classes)
for report_template in report_templates:
report_template.register_regions(regions)
report_template.register_drug_classes(drug_classes)
read_resistance(regions, resistance_csv)
read_mutations(drug_classes, mutations_csv)

pdfreport.write_report_one_column(report_pages, res_report_pdf, sample_name)
pdfreport.write_report_one_column(report_templates, res_report_pdf, sample_name)


def main():
Expand Down
14 changes: 9 additions & 5 deletions micall/hivdb/hivdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from itertools import groupby
from operator import itemgetter

from pyvdrm.vcf import Mutation

from micall.hivdb.asi_algorithm import AsiAlgorithm
from micall.core.aln2counts import AMINO_ALPHABET

Expand Down Expand Up @@ -84,7 +86,7 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None):
if reported_regions:
missing_regions.update(reported_regions.keys())
for (region, seed), rows in groupby(DictReader(amino_csv),
itemgetter('region', 'seed')):
itemgetter('region', 'seed')):
if reported_regions is not None:
missing_regions.discard(region)
translated_region, is_reported = reported_regions.get(region,
Expand All @@ -103,7 +105,7 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None):
for i, count in enumerate(counts)
if count >= min_count and report_names[i] != '*'}
ins_count = int(row['ins'])
if ins_count >= min_count:
if ins_count >= min_count and coverage > 0:
pos_aminos['i'] = ins_count / coverage
aminos.append(pos_aminos)
yield AminoList(region, aminos, seed)
Expand Down Expand Up @@ -177,9 +179,11 @@ def write_resistance(aminos, resistance_csv, mutations_csv):
score=drug_result.score,
genotype=genotype))
for drug_class, class_mutations in result.mutations.items():
for mutation in class_mutations:
amino = mutation[-1]
pos = int(mutation[1:-1])
mutations = [Mutation(m) for m in class_mutations]
mutations.sort()
for mutation in mutations:
amino = mutation.variant
pos = mutation.pos
pos_aminos = amino_seq[pos-1]
prevalence = pos_aminos[amino]
mutations_writer.writerow(dict(drug_class=drug_class,
Expand Down
99 changes: 56 additions & 43 deletions micall/hivdb/pdfreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import reportlab.platypus as plat

# we currently only support North American letter paper -- no A4
from reportlab.platypus import PageBreak

page_w, page_h = letter

# Times are reported in this time zone
Expand Down Expand Up @@ -74,12 +76,15 @@ def headertab_style(row_offset, colnum, dospan):
return lst


def drug_class_tablst(row_offset, report_page, dc_name, level_coltab):
cfg_dct = report_page.virus_config
drug_lst = cfg_dct["known_drugs"][dc_name]
table_header_str = cfg_dct['drug_class_tableheaders'][dc_name]
def drug_class_tablst(row_offset, report_template, genotype, drug_class_code, level_coltab):
cfg_dct = report_template.virus_config
drug_lst = cfg_dct["known_drugs"][drug_class_code]
table_header_str = cfg_dct['drug_class_tableheaders'][drug_class_code]
report_page = report_template.genotype_pages[genotype]
resistance_dct = report_page.resistance_calls
mutation_str = report_page.mutations[dc_name]
mutation_str = report_page.mutations.get(drug_class_code, 'None')
mutation_str = "Relevant {} Mutations: {}".format(drug_class_code,
mutation_str)
# 1) row 0: header column: name of drug_class
t_data = [["{} Drugs".format(table_header_str), ""]]
t_style = headertab_style(row_offset, 2, dospan=True)
Expand All @@ -95,7 +100,7 @@ def drug_class_tablst(row_offset, report_page, dc_name, level_coltab):
if drug_id in resistance_dct:
level, level_name = resistance_dct[drug_id]
else:
level, level_name = 1, "NOT REPORTED"
level, level_name = 0, "Not indicated: genotype " + genotype
t_data.append([drug_name, level_name])
# determine colours for the level
bg_col, fg_col = level_coltab[level]
Expand Down Expand Up @@ -143,7 +148,7 @@ def top_table(sample_name, table_width, genotype):
hAlign="CENTRE")


def write_report_one_column(report_pages, fname, sample_name=None):
def write_report_one_column(report_templates, fname, sample_name=None):
"""Generate a PDF report to a given output file name
"""
doc = plat.SimpleDocTemplate(
Expand All @@ -160,45 +165,53 @@ def write_report_one_column(report_pages, fname, sample_name=None):
ti_style = ParagraphStyle("scotitle", alignment=TA_CENTER, fontSize=20)
re_style = ParagraphStyle("scored", fontSize=15, textColor=colors.red,
spaceBefore=5 * mm, spaceAfter=5 * mm)
for report_page in report_pages:
for report_template in report_templates:
# from the resistance, we determine which drug_classes to write a table for:
# we only write a table if we are given resistance data for it.
got_dc_set = report_page.get_reported_drug_classes()
if not got_dc_set:
reported_genotypes = report_template.get_reported_genotypes()
if not reported_genotypes:
continue
cfg_dct = report_page.virus_config
col_tab = cfg_dct["resistance_level_colours"]
level_coltab = dict([(k, (colors.HexColor(v[1]), colors.HexColor(v[2])))
for k, v in col_tab.items()])
doc_els.append(plat.Paragraph(cfg_dct["report_title"], ti_style))
doc_els.append(plat.Paragraph("For research use only", re_style))
# -- top table
doc_els.append(top_table(sample_name, table_width, report_page.genotype))
# now drug classes tables, two per line
known_dc_lst = cfg_dct["known_dclass_list"]
tot_tab, tot_style = [], []
for dc in [dc for dc in known_dc_lst if dc in got_dc_set]:
tl, t_style = drug_class_tablst(len(tot_tab), report_page, dc, level_coltab)
tot_tab.extend(tl)
tot_style.extend(t_style)
# adjust the overall table style
num_rows = len(tot_tab)
tot_style.extend([("VALIGN", (0, 0), (1, num_rows-1), "TOP"),
("FONTSIZE", (0, 0), (1, num_rows-1), TAB_FONT_SIZE),
("LEADING", (0, 0), (1, num_rows-1), TAB_FONT_SIZE)])
left_col_w = table_width * 0.5
right_col_w = table_width - left_col_w
doc_els.append(plat.Table(tot_tab,
vAlign="TOP",
hAlign="CENTRE", style=tot_style,
colWidths=[left_col_w, right_col_w]))
# this is for layout debugging
# big_table = [["l0", "r0"], ["l1", "r1"], ["l2", "r2"]]
# debug_lst = [("GRID", (lc, 0), (rc, d_rowmax), 1, colors.red)]
# btstyle.extend(debug_lst)
# bottom paragraphs
doc_els.append(bottom_para(cfg_dct["disclaimer_text"]))
doc_els.append(bottom_para(cfg_dct["generated_by_text"]))
for genotype in reported_genotypes:
if doc_els:
doc_els.append(PageBreak())
got_dc_set = report_template.get_reported_drug_classes(genotype)
cfg_dct = report_template.virus_config
col_tab = cfg_dct["resistance_level_colours"]
level_coltab = dict([(k, (colors.HexColor(v[1]), colors.HexColor(v[2])))
for k, v in col_tab.items()])
doc_els.append(plat.Paragraph(cfg_dct["report_title"], ti_style))
doc_els.append(plat.Paragraph("For research use only", re_style))
# -- top table
doc_els.append(top_table(sample_name, table_width, genotype))
# now drug classes tables, two per line
known_dc_lst = cfg_dct["known_dclass_list"]
tot_tab, tot_style = [], []
for dc in [dc for dc in known_dc_lst if dc in got_dc_set]:
tl, t_style = drug_class_tablst(len(tot_tab),
report_template,
genotype,
dc,
level_coltab)
tot_tab.extend(tl)
tot_style.extend(t_style)
# adjust the overall table style
num_rows = len(tot_tab)
tot_style.extend([("VALIGN", (0, 0), (1, num_rows-1), "TOP"),
("FONTSIZE", (0, 0), (1, num_rows-1), TAB_FONT_SIZE),
("LEADING", (0, 0), (1, num_rows-1), TAB_FONT_SIZE)])
left_col_w = table_width * 0.5
right_col_w = table_width - left_col_w
doc_els.append(plat.Table(tot_tab,
vAlign="TOP",
hAlign="CENTRE", style=tot_style,
colWidths=[left_col_w, right_col_w]))
# this is for layout debugging
# big_table = [["l0", "r0"], ["l1", "r1"], ["l2", "r2"]]
# debug_lst = [("GRID", (lc, 0), (rc, d_rowmax), 1, colors.red)]
# btstyle.extend(debug_lst)
# bottom paragraphs
doc_els.append(bottom_para(cfg_dct["disclaimer_text"]))
doc_els.append(bottom_para(cfg_dct["generated_by_text"]))
if doc_els:
doc.build(doc_els)

Expand Down
54 changes: 30 additions & 24 deletions micall/tests/test_genreport.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
from unittest import TestCase

from micall.hivdb.genreport import ReportPage
from micall.hivdb.genreport import ReportTemplate, ReportPage


class GenReportTest(TestCase):
def test_unknown_keys(self):
with self.assertRaisesRegex(
ValueError,
r"Unknown configuration: bogus_key, weird_key."):
ReportPage(dict(weird_key="Hello",
known_regions=['R1', 'R2'],
bogus_key=42))
ReportTemplate(dict(weird_key="Hello",
known_regions=['R1', 'R2'],
bogus_key=42))

def test_missing_keys(self):
with self.assertRaisesRegex(
ValueError,
r"Missing configuration: generated_by_text, known_drug_classes, "
r"known_drugs, report_title, resistance_level_colours."):
ReportPage(dict(known_regions=['R1', 'R2'],
disclaimer_text="Hello."),
raise_missing=True)
ReportTemplate(dict(known_regions=['R1', 'R2'],
disclaimer_text="Hello."),
raise_missing=True)

def test_register_regions(self):
page1 = ReportPage(dict(known_regions=['R1', 'R2']))
page2 = ReportPage(dict(known_regions=['R3']))
page1 = ReportTemplate(dict(known_regions=['R1', 'R2']))
page2 = ReportTemplate(dict(known_regions=['R3']))
expected_regions = {'R1': page1,
'R2': page1,
'R3': page2}
Expand All @@ -35,12 +35,12 @@ def test_register_regions(self):
self.assertEqual(expected_regions, regions)

def test_register_drug_classes(self):
page1 = ReportPage(dict(known_drug_classes=[('C1', 'Class 1'),
('C2', 'Class 2')],
known_drugs={'C1': [('D1', 'Drug 1')],
'C2': [('D2', 'Drug 2')]}))
page2 = ReportPage(dict(known_drug_classes=[('C3', 'Class 3')],
known_drugs={'C3': [('D3', 'Drug 3')]}))
page1 = ReportTemplate(dict(known_drug_classes=[('C1', 'Class 1'),
('C2', 'Class 2')],
known_drugs={'C1': [('D1', 'Drug 1')],
'C2': [('D2', 'Drug 2')]}))
page2 = ReportTemplate(dict(known_drug_classes=[('C3', 'Class 3')],
known_drugs={'C3': [('D3', 'Drug 3')]}))
expected_drug_classes = {'C1': page1,
'C2': page1,
'C3': page2}
Expand All @@ -52,22 +52,28 @@ def test_register_drug_classes(self):
self.assertEqual(expected_drug_classes, drug_classes)

def test_repr(self):
page = ReportPage({'report_title': 'Example Report',
'known_regions': ['R1', 'R2']})
page = ReportTemplate({'report_title': 'Example Report',
'known_regions': ['R1', 'R2']})
expected_repr = "ReportPage({'report_title': 'Example Report'})"

r = repr(page)

self.assertEqual(expected_repr, r)

def test_get_reported_drug_classes(self):
page = ReportPage(dict(known_drug_classes=[('C1', 'Class 1'),
('C2', 'Class 2')],
known_drugs={'C1': [('D1', 'Drug 1')],
'C2': [('D2', 'Drug 2')]}))
page.resistance_calls['D1'] = 'Some resistance data'
template = ReportTemplate(dict(known_drug_classes=[('C1', 'Class 1'),
('C2', 'Class 2')],
known_drugs={'C1': [('D1', 'Drug 1')],
'C2': [('D2', 'Drug 2')]}))
genotype = 'g1'
template.genotype_pages[genotype] = ReportPage(
resistance_calls={'D1': 'Some resistance data'},
mutations={})
expected_genotypes = [genotype]
expected_drug_classes = {'C1'}

reported_drug_classes = page.get_reported_drug_classes()
reported_genotypes = template.get_reported_genotypes()
reported_drug_classes = template.get_reported_drug_classes(genotype)

self.assertEqual(expected_drug_classes, reported_drug_classes)
self.assertEqual(expected_genotypes, reported_genotypes)
self.assertEqual(expected_drug_classes, reported_drug_classes)
Loading

0 comments on commit 70c0c61

Please sign in to comment.