Skip to content

Commit

Permalink
feat(customization): mostly scriptizing for snakemake
Browse files Browse the repository at this point in the history
  • Loading branch information
cmeesters committed Mar 6, 2024
1 parent f6b27a1 commit 354a91c
Showing 1 changed file with 44 additions and 31 deletions.
75 changes: 44 additions & 31 deletions workflow/scripts/de_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,42 +42,57 @@
n_cpus=ncpus,
)
# fit dispersion(s) and LFCs
# this - fits the size factors
# - the genewise dispersion
# - the prior dispersion variance
# - MAP dispersion
# - log2fold change
# - cooks distances
dds.deseq2()

# compute normalization factors
dds.fit_size_factors()
#dds.fit_size_factors()
# fitting genewise dispersions
dds.fit_genewise_dispersions()
#dds.fit_genewise_dispersions()

dds.plot_dispersions(save_path=f"{snakemake.output.dispersion_graph}")

# compute p-values for our dispersion
stat_res = DeseqStats(dds, n_cpus=ncpus)

# performing LFC shrinkage
stat_res.lfc_shrink(coeff=f"condition_{b_condition}_vs_{a_condition}")
# a_condition:
a_condition = snakemake.config["condition_a_identifier"]
# b_condition
b_condition = snakemake.config["condition_b_identifier"]

# run Wald test and plot, perform optional threshold tests, if wanted
if snakemake.config["lfc_null"] or snakemake.config["alt_hypothesis"]:
summary = stat_res(lfc_null = snakemake.config.get("lfc_null", default=0),
alt_hypothesis = snakemake.config.get("alt_hypothesis", default=None))
if "lfc_null" in snakemake.config or "alt_hypothesis" in snakemake.config:
summary = stat_res.summary(lfc_null = snakemake.config.get("lfc_null", 0),
alt_hypothesis = snakemake.config.get("alt_hypothesis", None))
else:
summary = stat_res.summary()
# performing LFC shrinkage
stat_res.lfc_shrink(coeff=f"condition_{b_condition}_vs_{a_condition}")

stat_res.results_df.to_csv(snakemake.output.lfc_analysis)

stat_res.plot_MA(s=snakemake.config["point_width"], save_path=f"{snakemake.output.ma_graph}")
stat_res.plot_MA(s=snakemake.config["point_width"],
save_path=f"{snakemake.output.ma_graph}")

# create a clustermap, based on normalized counts
#dds_df = dds.to_df()
#ds_df.to_csv('dds_df.csv'
# getting and applying the scaling factors
sf = dds.obsm["size_factors"]

normalized = counts_df.values.T * sf

# transpose back
normalized = normalized.T

# append log2fold and pvalue columns
normalized.join(stats_res.results_df["log2FoldChange"])
normalized.join(stats_res.results_df["pvalue"])

# get indices where the matrix is 0 - may happen
#zero_indices = np.argwhere(normalized == 0)
# TODO: try implementing imputation
Expand All @@ -90,10 +105,6 @@

normalized = pd.DataFrame(normalized, index = row_names,
columns = column_names)
# a_condition:
a_condition = snakemake.config["condition_a_identifier"]
# b_condition
b_condition = snakemake.config["condition_b_identifier"]

# Order columns according to traits - generally column
# order can be arbitrary, but for the headmap, we want.
Expand All @@ -111,35 +122,37 @@
# final orientation and order
normalized = normalized.T[samples]


# get the means of our conditions
a_mean = normalized[a_samples].mean(axis=1)
b_mean = normalized[b_samples].mean(axis=1)
#a_mean = normalized[a_samples].mean(axis=1)
#b_mean = normalized[b_samples].mean(axis=1)

a_over_b = a_mean/b_mean
b_over_a = b_mean/a_mean
#a_over_b = a_mean/b_mean
#b_over_a = b_mean/a_mean
# which is bigger?
ratio = list(None for _ in a_over_b)
for index, state in enumerate(a_over_b.ge(b_over_a)):
#ratio = list(None for _ in a_over_b)
#for index, state in enumerate(a_over_b.ge(b_over_a)):
# enter ratio, but check for non-inf-ness, first:
print(a_over_b[index], b_over_a[index])
if state:
# print(a_over_b[index], b_over_a[index])
# if state:
# if np.isinf(a_over_b[index]):
# normalized.drop(index, inplace=True)
ratio[index] = a_over_b[index]
else:
# ratio[index] = a_over_b[index]
# else:
# if np.isinf(b_over_a[index]):
# normalized.drop(index, inplace=True)
ratio[index] = b_over_a[index]
# ratio[index] = b_over_a[index]

normalized["ratio"] = ratio
normalized = normalized[~np.all(normalized == np.inf, axis=1)]
print(normalized)
# now sort according to the ratio
normalized.sort_values("ratio", )
#normalized["ratio"] = ratio
#normalized = normalized[~np.all(normalized == np.inf, axis=1)]
#print(normalized)
# now sort according to the log2foldchange
normalized.sort_values(by="log2FoldChange")
# delete rows, which do not meet our p-value criterion
normalized.drop(normalized["pvalue"] > 0.05, inplace=True)
# through away this column
normalized.drop(["ratio"], axis=1, inplace=True)
#normalized.drop(["ratio"], axis=1, inplace=True)

print(normalized)
#normalized.loc[normalized.index.difference(normalized.dropna(how='all').index)]
#print(normalized)

Expand Down

0 comments on commit 354a91c

Please sign in to comment.