From e559c8189a3c1c0d68a4a7d642543717f48b0877 Mon Sep 17 00:00:00 2001 From: Chiara Rasi Date: Mon, 19 Feb 2024 12:06:08 +0100 Subject: [PATCH] Speed up function --- src/chanjo2/endpoints/coverage.py | 4 ++-- src/chanjo2/meta/handle_d4.py | 30 ++++++++++++++++-------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/chanjo2/endpoints/coverage.py b/src/chanjo2/endpoints/coverage.py index adf15620..a309450f 100644 --- a/src/chanjo2/endpoints/coverage.py +++ b/src/chanjo2/endpoints/coverage.py @@ -53,8 +53,8 @@ def d4_interval_coverage(query: FileCoverageQuery): if None in [query.start, query.end]: # Coverage over an entire chromosome return IntervalCoverage( mean_coverage=get_d4tools_chromosome_mean_coverage( - d4_file_path=query.coverage_file_path, chromosome=query.chromosome - ), + d4_file_path=query.coverage_file_path, chromosomes=[query.chromosome] + )[0][1], completeness={}, interval_id=interval, ) diff --git a/src/chanjo2/meta/handle_d4.py b/src/chanjo2/meta/handle_d4.py index 68f9829e..91e9b986 100644 --- a/src/chanjo2/meta/handle_d4.py +++ b/src/chanjo2/meta/handle_d4.py @@ -46,18 +46,23 @@ def get_intervals_coords_list( return interval_coords -def get_d4tools_chromosome_mean_coverage(d4_file_path: str, chromosome=str) -> float: - """Return mean coverage over one entire chromosome.""" +def get_d4tools_chromosome_mean_coverage( + d4_file_path: str, chromosomes=List[str] +) -> List[Tuple[str, float]]: + """Return mean coverage over entire chromosomes.""" chromosomes_stats_mean_cmd: List[str] = subprocess.check_output( ["d4tools", "stat", "-s" "mean", d4_file_path], text=True, ).splitlines() - + chromosomes_coverage: List[Tuple[str, float]] = [] for line in chromosomes_stats_mean_cmd: stats_data: List[str] = line.split("\t") - if chromosome == stats_data[CHROM_INDEX]: - return float(stats_data[STATS_MEAN_COVERAGE_INDEX]) + if stats_data[CHROM_INDEX] in chromosomes: + chromosomes_coverage.append( + (stats_data[CHROM_INDEX], float(stats_data[STATS_MEAN_COVERAGE_INDEX])) + ) + return chromosomes_coverage def get_d4tools_intervals_mean_coverage( @@ -345,17 +350,14 @@ def predict_sex(x_cov: float, y_cov: float) -> str: def get_samples_sex_metrics(d4_file_path: str) -> Dict: """Compute coverage over sex chromosomes and predicted sex.""" - sex_chroms_coverage: List[float] = [ - get_d4tools_chromosome_mean_coverage( - d4_file_path=d4_file_path, chromosome=chrom - ) - for chrom in ["X", "Y"] - ] + sex_chroms_coverage: List[Tuple[str, float]] = get_d4tools_chromosome_mean_coverage( + d4_file_path=d4_file_path, chromosomes=["X", "Y"] + ) return { - "x_coverage": round(sex_chroms_coverage[0], 1), - "y_coverage": round(sex_chroms_coverage[1], 1), + "x_coverage": round(sex_chroms_coverage[0][1], 1), + "y_coverage": round(sex_chroms_coverage[1][1], 1), "predicted_sex": predict_sex( - x_cov=sex_chroms_coverage[0], y_cov=sex_chroms_coverage[1] + x_cov=sex_chroms_coverage[0][1], y_cov=sex_chroms_coverage[1][1] ), }