-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFigure_4
154 lines (127 loc) · 5.75 KB
/
Figure_4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#The source code to reproduce Figure 4.
library(tidyverse)
library(furrr)
library(patchwork)
library(Seurat)
library(SingleCellExperiment)
library(scMerge)
library(spectral)
library(tradeSeq)
set.seed(8)
theme_set(theme_bw(base_size = 20))
WT <- readRDS("../231210_trypanosoma/data/WT_sce_merged.rds")
KO <- readRDS("../231210_trypanosoma/data/KO_sce.rds")
WT$batch <- "WT"
KO$batch <- "KO"
combined <- sce_cbind(list(WT, KO), batch_names = c("WT", "KO"))
combined$Pseudotime <- c(WT$slingPseudotime_1, KO$slingPseudotime_1)
pseudotime <- data.frame(WT = combined$Pseudotime, KO = combined$Pseudotime, row.names = colnames(combined))
cellWeights <- data.frame(WT = rep(c(1, 0), c(ncol(WT), ncol(KO))),
KO = rep(c(0, 1), c(ncol(WT), ncol(KO))))
load("../231205_trypanosoma/sce.RData")
counts <- assay(sce)
libsizes <- colSums(counts)
size.factors <- libsizes/mean(libsizes)
logcounts(sce) <- log2(t(t(counts)/size.factors) + 1)
plotSmoothers(sce, counts = counts, gene = "Tbrucei---Tb927.7.2660")
which(rownames(counts) == "Tbrucei---Tb927.7.2660")
plot(counts(KO[2634,]))
plot(logcounts(KO[2634,]))
plot(logcounts(WT[2634,]))
plot(counts[2634,] %>% as.numeric())
which(rownames(sce) == "Tbrucei---Tb927.7.2660")
which(rownames(sce) == "Tbrucei---Tb927.7.2660")
ZC3H20 <- sce[2634,]
ZC3H20@assays@data@listData[[c("counts", "logcounts")]]
data <- tibble(cell = colnames(ZC3H20@assays@data@listData[["counts"]]),
batch = sce$crv$cellWeights.WT %>% str_replace_all(c("1" = "WT", "0" = "KO")),
Pseudotime = c(WT$slingPseudotime_1, KO$slingPseudotime_1),
counts = as.numeric(ZC3H20@assays@data@listData[["counts"]]),
logcounts = as.numeric(ZC3H20@assays@data@listData[["logcounts"]]))
ggplot(data, aes(x = Pseudotime, y = logcounts, colour = batch)) +
geom_point()
assoRes <- associationTest(sce)
startRes <- startVsEndTest(sce)
endRes <- diffEndTest(sce)
patternRes <- patternTest(sce)
earlyDERes <- earlyDETest(sce, knots = c(1, 2))
#LS
min.freq <- 0.2
max.freq <- 1
length.freq <- 101
LS <- function(g, time1, exp1, time2 = NULL, exp2 = NULL, min.freq = 0, max.freq = 1, length.freq = 101, m = "generalized", sim.num = 1000){
.quiet <- function(x){
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
time1 <- time1[!is.infinite(time1)]
exp1 <- exp1[,!is.infinite(time1)]
if(is.null(time2)){
seq1 <- exp1[which(rownames(exp1) == g),]
ls1 <- spec.lomb(x = time1, y = seq1,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
return(tibble(gene = g, p.dynamic = min(ls1$p), p.de = NA))
}else{
time2 <- time2[!is.infinite(time2)]
exp2 <- exp2[,!is.infinite(time2)]
seq1 <- exp1[which(rownames(exp1) == g),]
seq2 <- exp2[which(rownames(exp2) == g),]
ls1 <- spec.lomb(x = time1, y = seq1,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
ls2 <- spec.lomb(x = time2, y = seq2,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
null.hypothesis <- NULL
for(i in seq(sim.num)){
sim.ls1 <- spec.lomb(x = sample(time1, length(time1)),
y = seq1,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
sim.ls2 <- spec.lomb(x = sample(time2, length(time2)),
y = seq2,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
null.hypothesis <- c(null.hypothesis, dist(rbind(sim.ls1$A, sim.ls2$A), method = "euclidean"))
}
p <- sum(dist(rbind(ls1$A, ls2$A), method = "euclidean") < null.hypothesis) / sim.num
return(tibble(gene = g, p.dynamic = min(c(ls1$p, ls2$p)), p.de = p))
}
}
plan(multisession, workers = 32)
Sys.time()
res.LS_euclidean <- future_map_dfr(rownames(WT), LS, WT$slingPseudotime_1, as.matrix(WT@assays@data@listData[["logcounts"]]),
KO$slingPseudotime_1, as.matrix(KO@assays@data@listData[["logcounts"]]),
.options = furrr_options(seed = 8), .progress = TRUE)
Sys.time()
save(res.LS_euclidean, file = "res.Ls_euclidean.RData")
load("res.Ls_euclidean.RData")
res <- tibble(method = rep(c("LS", "tradeSeq (diffEndTest)", "tradeSeq (patternTest)", "tradeSeq (earlyDETest)"), c(nrow(res.LS_euclidean), nrow(endRes), nrow(patternRes), nrow(earlyDERes))),
gene = c(res.LS_euclidean$gene, rownames(endRes), rownames(patternRes), rownames(earlyDERes)),
p_value = c(res.LS_euclidean$p.de, endRes$pvalue, patternRes$pvalue, earlyDERes$pvalue))
res.wide <- res %>% pivot_wider(names_from = method, values_from = p_value) %>%
mutate(gene.name = case_when(gene == "Tbrucei---Tb927.7.2660" ~ "ZC3H20", TRUE ~ ""))
g1 <- ggplot(res.wide, aes(x = LS, y = `tradeSeq (diffEndTest)`, label = gene.name)) +
geom_point() +
ggrepel::geom_text_repel(aes(label = gene.name), max.overlaps = 100000, color = "magenta") +
scale_x_log10() +
scale_y_log10() +
xlab("Lomb-Scargle's p") +
ylab("tradeSeq's p (diffEndTest)")
g1
g2 <- ggplot(res.wide, aes(x = LS, y = `tradeSeq (patternTest)`, label = gene.name)) +
geom_point() +
ggrepel::geom_text_repel(aes(label = gene.name), max.overlaps = 100000, color = "magenta") +
scale_x_log10() +
scale_y_log10() +
xlab("Lomb-Scargle's p") +
ylab("tradeSeq's p (patternTest)")
g2
g3 <- ggplot(res.wide, aes(x = LS, y = `tradeSeq (earlyDETest)`, label = gene.name)) +
geom_point() +
ggrepel::geom_text_repel(aes(label = gene.name), max.overlaps = 100000, color = "magenta") +
scale_x_log10() +
scale_y_log10() +
xlab("Lomb-Scargle's p") +
ylab("tradeSeq's p (earlyDETest)")
g3
g <- g1 + g2 + g3
g
ggsave(g, file = "./plots/de_euclidean.pdf", width = 30, height = 10)