Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modified annotate_sex() #427

Merged
merged 11 commits into from
Jan 3, 2022
53 changes: 42 additions & 11 deletions gnomad/sample_qc/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from gnomad.utils.reference_genome import get_reference_genome
from gnomad.utils.sparse_mt import impute_sex_ploidy

from typing import List, Optional
from typing import List, Optional, Union

logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s")
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -209,7 +209,7 @@ def get_qc_mt(


def annotate_sex(
mt: hl.MatrixTable,
mtds: Union[hl.MatrixTable, hl.vds.VariantDataset],
is_sparse: bool = True,
excluded_intervals: Optional[hl.Table] = None,
included_intervals: Optional[hl.Table] = None,
Expand All @@ -225,7 +225,7 @@ def annotate_sex(

Return Table with the following fields:
- s (str): Sample
- chr20_mean_dp (float32): Sample's mean coverage over chromosome 20.
- `normalization_contig`_mean_dp (float32): Sample's mean coverage over the specified `normalization_contig`.
- chrX_mean_dp (float32): Sample's mean coverage over chromosome X.
- chrY_mean_dp (float32): Sample's mean coverage over chromosome Y.
- chrX_ploidy (float32): Sample's imputed ploidy over chromosome X.
Expand All @@ -238,7 +238,7 @@ def annotate_sex(
- Y_karyotype (str): Sample's chromosome Y karyotype.
- sex_karyotype (str): Sample's sex karyotype.

:param mt: Input MatrixTable
:param mtds: Input MatrixTable or VariantDataset
:param bool is_sparse: Whether input MatrixTable is in sparse data format
:param excluded_intervals: Optional table of intervals to exclude from the computation.
:param included_intervals: Optional table of intervals to use in the computation. REQUIRED for exomes.
Expand All @@ -252,14 +252,38 @@ def annotate_sex(
:return: Table of samples and their imputed sex karyotypes.
"""
logger.info("Imputing sex chromosome ploidies...")
if is_sparse:
ploidy_ht = impute_sex_ploidy(
mt, excluded_intervals, included_intervals, normalization_contig

is_vds = isinstance(mtds, hl.vds.VariantDataset)
if is_vds:
if excluded_intervals is not None:
raise NotImplementedError(
"The use of the parameter 'excluded_intervals' is currently not implemented for imputing sex chromosome ploidy on a VDS!"
)
ploidy_ht = hl.vds.impute_sex_chromosome_ploidy(
jkgoodrich marked this conversation as resolved.
Show resolved Hide resolved
mtds,
calling_intervals=included_intervals,
normalization_contig=normalization_contig,
)
else:
raise NotImplementedError(
"Imputing sex ploidy does not exist yet for dense data."
ploidy_ht = ploidy_ht.rename(
{
"x_ploidy": "chrX_ploidy",
"y_ploidy": "chrY_ploidy",
"x_mean_dp": "chrX_mean_dp",
"y_mean_dp": "chrY_mean_dp",
"autosomal_mean_dp": f"{normalization_contig}_mean_dp",
}
wlu04 marked this conversation as resolved.
Show resolved Hide resolved
)
mt = mtds.variant_data
else:
mt = mtds
if is_sparse:
ploidy_ht = impute_sex_ploidy(
mt, excluded_intervals, included_intervals, normalization_contig
)
else:
raise NotImplementedError(
"Imputing sex ploidy does not exist yet for dense data."
)

x_contigs = get_reference_genome(mt.locus).x_contigs
logger.info("Filtering mt to biallelic SNPs in X contigs: %s", x_contigs)
Expand All @@ -269,8 +293,15 @@ def annotate_sex(
mt = mt.filter_rows(
(hl.len(mt.alleles) == 2) & hl.is_snp(mt.alleles[0], mt.alleles[1])
)

build = get_reference_genome(mt.locus).name
mt = hl.filter_intervals(
mt, [hl.parse_locus_interval(contig) for contig in x_contigs]
mt,
[
hl.parse_locus_interval(contig, reference_genome=build)
for contig in x_contigs
],
keep=True,
)

if sites_ht is not None:
Expand Down
3 changes: 2 additions & 1 deletion gnomad/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from typing import Callable, List, Dict, Optional, Tuple, Union

import hail as hl
from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS, AsyncFS
from hailtop.aiotools import LocalAsyncFS, AsyncFS
from hailtop.aiotools.router_fs import RouterAsyncFS
from hailtop.aiogoogle import GoogleStorageAsyncFS
from hailtop.utils import bounded_gather, tqdm

Expand Down