Skip to content

Commit

Permalink
Merge pull request #437 from broadinstitute/qc_mt
Browse files Browse the repository at this point in the history
add checkpoint option to get_qc_mt
  • Loading branch information
klaricch authored Jan 25, 2022
2 parents e625c03 + 3e021b2 commit b955780
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion gnomad/sample_qc/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def get_qc_mt(
filter_segdup: bool = True,
filter_exome_low_coverage_regions: bool = False,
high_conf_regions: Optional[List[str]] = None,
checkpoint_path: Optional[str] = None,
) -> hl.MatrixTable:
"""
Create a QC-ready MT.
Expand All @@ -147,6 +148,7 @@ def get_qc_mt(
:param filter_segdup: Filter segmental duplication regions
:param filter_exome_low_coverage_regions: If set, only high coverage exome regions (computed from gnomAD are kept)
:param high_conf_regions: If given, the data will be filtered to only include variants in those regions
:param checkpoint_path: If given, the QC MT will be checkpointed to the specified path before running LD pruning. If not specified, persist will be used instead.
:return: Filtered MT
"""
logger.info("Creating QC MatrixTable")
Expand Down Expand Up @@ -179,7 +181,12 @@ def get_qc_mt(
)

if ld_r2 is not None:
qc_mt = qc_mt.persist()
if checkpoint_path:
logger.info("Checkpointing the MT and LD pruning")
qc_mt = qc_mt.checkpoint(checkpoint_path, overwrite=True)
else:
logger.info("Persisting the MT and LD pruning")
qc_mt = qc_mt.persist()
unfiltered_qc_mt = qc_mt.unfilter_entries()
pruned_ht = hl.ld_prune(unfiltered_qc_mt.GT, r2=ld_r2)
qc_mt = qc_mt.filter_rows(hl.is_defined(pruned_ht[qc_mt.row_key]))
Expand Down

0 comments on commit b955780

Please sign in to comment.