Skip to content

Commit

Permalink
fix: #82 double p value adjustment (#84)
Browse files Browse the repository at this point in the history
* fix: removed statsmodel dependency

* fix: removed code, which double adjusted the p-values

* fix: removed output reporting p-value adjustment
  • Loading branch information
cmeesters authored Sep 9, 2024
1 parent 6091e15 commit 1d6bfc7
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 19 deletions.
1 change: 0 additions & 1 deletion workflow/envs/pydeseq2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ dependencies:
- anndata=0.10.8
- pydeseq2=0.4.10
- seaborn>=0.13.2
- statsmodels>=0.14.2
1 change: 0 additions & 1 deletion workflow/rules/diffexp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ rule de_analysis:
correlation_matrix="de_analysis/correlation_matrix.svg",
normalized_counts="de_analysis/normalized_counts.csv",
de_top_heatmap="de_analysis/heatmap_top.svg",
statistics="de_analysis/statistics.log",
lfc_analysis="de_analysis/lfc_analysis.csv",
params:
samples=samples,
Expand Down
21 changes: 4 additions & 17 deletions workflow/scripts/de_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import pandas as pd
import seaborn as sns
import scipy.spatial as sp, scipy.cluster.hierarchy as hc
import statsmodels.api as sm


from snakemake.exceptions import WorkflowError
Expand Down Expand Up @@ -105,31 +104,19 @@
log2foldchange = stat_res.results_df["log2FoldChange"]
# 'pvalue' is a pandas series, linear, of length(number of aligned loci)
pvalue = stat_res.results_df["pvalue"]

# p-value adjustment for independent p-values
# the return value is a tuple:
# (array of bools, np.floats), both of length(number of aligned loci)
cutoff = sm.stats.multipletests(
pvalue, alpha=0.05, method="hs", maxiter=1, is_sorted=False, returnsorted=False
)

with open(snakemake.output.statistics, "w") as statslog:
statslog.write(
f"Found {cutoff[0].sum()} values about the alpha threshold." + os.linesep
)
statslog.write(f"The total number of tests was {cutoff[1].shape[0]}." + os.linesep)
padj = stat_res.results_df["padj"]

normalized = normalized.join(log2foldchange)
# normalized = normalized.join(np.array(pvalue[1]))
normalized = normalized.join(pvalue)
normalized = normalized.join(padj)

normalized.sort_values(by="log2FoldChange")
# delete rows, which do not meet our p-value criterion
# the comparison operator is >= because we drop all values >= our desired alpha
normalized.drop(normalized[cutoff[1] >= snakemake.config["alpha"]].index, inplace=True)
normalized.drop(normalized[padj >= snakemake.config["alpha"]].index, inplace=True)
# through away these columns
normalized.drop("log2FoldChange", axis=1, inplace=True)
normalized.drop("pvalue", axis=1, inplace=True)
normalized.drop("padj", axis=1, inplace=True)

normalized.to_csv(snakemake.output.normalized_counts)
normalized.dropna(inplace=True)
Expand Down

0 comments on commit 1d6bfc7

Please sign in to comment.