-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFigure_3
552 lines (513 loc) · 29.4 KB
/
Figure_3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
#The source code to reproduce Figure 3.
library(tidyverse)
library(furrr)
library(patchwork)
library(dyno)
library(dyngen)
library(spectral)
library(monocle3)
library(mclust)
library(slingshot)
library(tradeSeq)
library(Lamian)
library(plotROC)
theme_set(theme_bw(base_size = 20))
num.targets <- 250
num.hks <- 250
ko.rate <- 0
num.cores <- 8
res <- tibble(backbone = NA,
gene = NA,
pt.method = NA,
method = NA,
answer = NA,
num_cells = NA,
p = NA)
pt <- tibble(backbone = NA,
pt.method = NA,
sample = NA,
cell.num = NA,
cell = NA,
pseudotime = NA)
cell.range <- c(250, 500, 1000)
#parameters and functions
min.freq <- 0.2
max.freq <- 1
length.freq <- 101
sim.num <- 100
ko_gene <- "C1_TF1"
FQnorm <- function(counts){
rk <- apply(counts,2,rank,ties.method='min')
counts.sort <- apply(counts,2,sort)
refdist <- apply(counts.sort,1,median)
norm <- apply(rk,2,function(r){ refdist[r] })
rownames(norm) <- rownames(counts)
return(norm)
}
LS <- function(g, time1, exp1, time2 = NULL, exp2 = NULL, min.freq = 0, max.freq = 1, length.freq = 101, m = "generalized", sim.num = 100){
.quiet <- function(x){
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
time1 <- time1[!is.infinite(time1)]
exp1 <- exp1[,!is.infinite(time1)]
if(is.null(time2)){
seq1 <- exp1[which(rownames(exp1) == g),]
ls1 <- spec.lomb(x = time1, y = seq1,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
return(tibble(gene = g, p.dynamic = min(ls1$p), p.de = NA))
}else{
time2 <- time2[!is.infinite(time2)]
exp2 <- exp2[,!is.infinite(time2)]
seq1 <- exp1[which(rownames(exp1) == g),]
seq2 <- exp2[which(rownames(exp2) == g),]
ls1 <- spec.lomb(x = time1, y = seq1,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
ls2 <- spec.lomb(x = time2, y = seq2,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
null.hypothesis <- NULL
for(i in seq(sim.num)){
sim.ls1 <- spec.lomb(x = sample(time1, length(time1)),
y = seq1,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
sim.ls2 <- spec.lomb(x = sample(time2, length(time2)),
y = seq2,
f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
null.hypothesis <- c(null.hypothesis, dist(rbind(sim.ls1$A, sim.ls2$A), method = "canberra"))
}
p <- sum(dist(rbind(ls1$A, ls2$A), method = "canberra") < null.hypothesis) / sim.num
return(tibble(gene = g, p.dynamic = min(c(ls1$p, ls2$p)), p.de = p))
}
}
plot.each <- function(target){
print(target)
d <- pt %>% filter(backbone == "linear", pt.method == "Ground_truth", cell.num == num.cells) %>%
add_column(Expression = c(wt.expression[which(rownames(wt.expression) == target),],
ko.expression[which(rownames(ko.expression) == target),]))
d$sample <- factor(d$sample, levels = c("WT", "KO"))
text <- str_c("Ground_truth: LS's p=",
res %>% filter(gene == target, pt.method == "Ground_truth", method == "LS", num_cells == num.cells)
%>% .$p %>% as.numeric,
", Lamian's p=",
res %>% filter(gene == target, pt.method == "Ground_truth", method == "Lamian", num_cells == num.cells)
%>% .$p %>% as.numeric %>% round(digits = 2))
g1 <- ggplot(d, aes(x = pseudotime, y = Expression, colour = sample))
g1 <- g1 + geom_point() + ggtitle(target, subtitle = text)
g1 <- g1 + xlab("Pseudotime") + theme(plot.subtitle = element_text(size = 10))
g1
ggsave(g1, file = str_c("./genes/", target, ".pdf"), dpi = 300)
}
#data generation
for(model in c("linear", "bifurcating")){
set.seed(1)
if(model == "linear"){
backbone <- backbone_linear()
}else{
backbone <- backbone_bifurcating()
}
config <- initialise_model(backbone = backbone,
num_cells = last(cell.range),
num_tfs = nrow(backbone$module_info),
num_targets = num.targets,
num_hks = num.hks,
num_cores = num.cores,
simulation_params = simulation_default(census_interval = 10,
ssa_algorithm = ssa_etl(tau = 300 / 3600),
experiment_params = simulation_type_wild_type(num_simulations = 100)))
ggsave(plot_backbone_modulenet(config), file = str_c("./files/", model, ".pdf"), dpi = 300)
model_common <- config %>%
generate_tf_network() %>%
generate_feature_network() %>%
generate_kinetics() %>%
generate_gold_standard()
model_wt <- model_common %>% generate_cells()
model_ko <- model_common
model_ko$simulation_params$experiment_params <- simulation_type_knockdown(num_simulations = 100,
timepoint = 0,
genes = ko_gene,
num_genes = 1,
multiplier = ko.rate)
model_ko <- model_ko %>% generate_cells()
wt <- model_wt %>% generate_experiment() %>% as_dyno() %>% add_root(root_milestone_id = "sA") %>% add_pseudotime(pseudotime = NULL)
ko <- model_ko %>% generate_experiment() %>% as_dyno() %>% add_root(root_milestone_id = "sA") %>% add_pseudotime(pseudotime = NULL)
wt.counts <- wt$counts %>% as.matrix() %>% t()
ko.counts <- ko$counts %>% as.matrix() %>% t()
counts <- cbind(wt.counts, ko.counts)
answer <- rep(0, nrow(counts))
de_genes <- c(ko_gene, model_wt$feature_network %>% filter(str_detect(from, "C")) %>% pull(to))
for(i in seq(nrow(counts))) if(wt$feature_ids[i] %in% de_genes) answer[i] <- 1
if(model == "linear"){
save(wt, file = "./files/linear_wt.RData")
save(ko, file = "./files/linear_ko.RData")
save(answer, file = "./files/answer_linear.RData")
}else{
save(wt, file = "./files/bifurcating_wt.RData")
save(ko, file = "./files/bifurcating_ko.RData")
save(answer, file = "./files/answer_bifurcating.RData")
}
}
set.seed(8)
for(model in c("linear", "bifurcating")){
load(str_c("./files/", model, "_wt.RData"))
load(str_c("./files/", model, "_ko.RData"))
load(str_c("./files/answer_", model, ".RData"))
for(num.cells in cell.range){
print(sum(is.na(res$method)))
print(num.cells)
.sampling <- sample(1:1000, num.cells) %>% sort
wt.expression <- wt$expression[.sampling,] %>% as.matrix() %>% t()
wt.counts <- wt$counts[.sampling,] %>% as.matrix() %>% t()
wt.pseudotime <- wt$pseudotime[.sampling]
ko.expression <- ko$expression[.sampling,] %>% as.matrix() %>% t()
ko.counts <- ko$counts[.sampling,] %>% as.matrix() %>% t()
ko.pseudotime <- ko$pseudotime[.sampling]
counts <- cbind(wt.counts, ko.counts)
pt <- pt %>% add_row(backbone = model,
pt.method = "Ground_truth",
sample = rep(c("WT", "KO"), each = num.cells),
cell.num = num.cells,
cell = colnames(counts),
pseudotime = c(wt.pseudotime, ko.pseudotime))
d <- tibble(Component1 = wt$dimred[.sampling, 1], Component2 = wt$dimred[.sampling, 2], pseudotime = wt.pseudotime / max(wt.pseudotime))
g <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
g <- g + geom_point() + ggtitle(str_c("Grounrtruth, ", num.cells, "cells")) + labs(subtitle = "WT") + theme(legend.position = 'none')
d <- tibble(Component1 = ko$dimred[.sampling, 1], Component2 = ko$dimred[.sampling, 2], pseudotime = ko.pseudotime / max(ko.pseudotime))
g2 <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
g2 <- g2 + geom_point() + labs(subtitle = "KO") + theme(legend.position = "bottom")
print(g + g2)
ggsave(g + g2, file = str_c("./plots/Grounrtruth_", model, "_", num.cells, "cells.pdf"), dpi = 300)
#LS
plan(multisession)
res.LS <- future_map_dfr(rownames(wt.expression), LS, wt.pseudotime, wt.expression,
ko.pseudotime, ko.expression, .options = furrr_options(seed = 8))
#Lamian
.expression <- cbind(wt.expression, ko.expression)
colnames(.expression) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
.pseudotime <- c(wt.pseudotime, ko.pseudotime)
names(.pseudotime) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
.cellanno <- data.frame(Cell = colnames(.expression),
Sample = rep(c("WT", "KO"), each = num.cells / 2))
.design <- matrix(c(1, 1, 0, 1), ncol = 2, dimnames = list(c("WT", "KO"), c("intercept", "group")))
res.lamian <- lamian_test(expr = .expression,
cellanno = .cellanno,
pseudotime = .pseudotime,
design = .design,
test.type = 'variable',
overall.only = TRUE,
test.method = "chisq",
permuiter = 100,
ncores = num.cores)
#tradeSeq
colnames(counts) <- colnames(.expression)
.condition <- factor(rep(c("WT", "KO"), each = num.cells), levels = c("WT", "KO"))
.pseudotime <- matrix(c(wt.pseudotime, rep(0, num.cells), rep(0, num.cells), ko.pseudotime), ncol = 2)
.cellweights <- matrix(c(rep(1, num.cells), rep(0, num.cells), rep(0, num.cells), rep(1, num.cells)), ncol = 2)
gam <- fitGAM(counts = counts, pseudotime = .pseudotime,
cellWeights = .cellweights,
nknots = 6, parallel = TRUE)
res.tradeseq.diffEnd <- diffEndTest(gam)
res.tradeseq.pattern <- patternTest(gam)
res.tradeseq.earlyDE <- earlyDETest(gam, knots = c(1, 3))
res <- res %>% add_row(backbone = model,
gene = rep(rownames(counts), 5),
pt.method = "Ground_truth",
method = c(rep("LS", nrow(res.LS)),
rep("Lamian", length(res.lamian$statistics$pval.chisq.overall)),
rep("tradeSeq (diffEnd)", nrow(res.tradeseq.diffEnd)),
rep("tradeSeq (pattern)", nrow(res.tradeseq.pattern)),
rep("tradeSeq (earlyDE)", nrow(res.tradeseq.earlyDE))),
answer = rep(answer, 5),
num_cells = num.cells,
p = c(res.LS$p.de, res.lamian$statistics$pval.chisq.overall,
res.tradeseq.diffEnd$pvalue,
res.tradeseq.pattern$pvalue,
res.tradeseq.earlyDE$pvalue))
#Monocle3
gene.metadata <- data.frame(id = wt$feature_info$feature_id, gene_short_name = wt$feature_info$feature_id)
rownames(gene.metadata) <- gene.metadata$id
monocle3.wt <- new_cell_data_set(wt.counts, gene_metadata = gene.metadata) %>%
preprocess_cds(num_dim = 50) %>%
reduce_dimension(preprocess_method = "PCA", reduction_method = "UMAP") %>%
cluster_cells() %>%
learn_graph() %>%
order_cells(root_cells = names(which.min(wt.pseudotime)))
wt.pseudotime.monocle3 <- monocle3.wt@principal_graph_aux@listData$UMAP$pseudotime / max(monocle3.wt@principal_graph_aux@listData$UMAP$pseudotime)
monocle3.ko <- new_cell_data_set(ko.counts, gene_metadata = gene.metadata) %>%
preprocess_cds(num_dim = 50) %>%
reduce_dimension(preprocess_method = "PCA", reduction_method = "UMAP") %>%
cluster_cells() %>%
learn_graph() %>%
order_cells(root_cells = names(which.min(ko.pseudotime)))
ko.pseudotime.monocle3 <- monocle3.ko@principal_graph_aux@listData$UMAP$pseudotime / max(monocle3.ko@principal_graph_aux@listData$UMAP$pseudotime)
pt <- pt %>% add_row(backbone = model,
pt.method = "monocle3",
sample = rep(c("WT", "KO"), each = num.cells),
cell.num = num.cells,
cell = colnames(counts),
pseudotime = c(wt.pseudotime.monocle3, ko.pseudotime.monocle3))
d <- tibble(Component1 = wt$dimred[.sampling, 1], Component2 = wt$dimred[.sampling, 2], pseudotime = wt.pseudotime.monocle3 / max(wt.pseudotime.monocle3))
g <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
g <- g + geom_point() + ggtitle(str_c("monocle3, ", num.cells, "cells")) + labs(subtitle = "WT") + theme(legend.position = 'none')
d <- tibble(Component1 = ko$dimred[.sampling, 1], Component2 = ko$dimred[.sampling, 2], pseudotime = ko.pseudotime.monocle3 / max(ko.pseudotime.monocle3))
g2 <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
g2 <- g2 + geom_point() + labs(subtitle = "KO") + theme(legend.position = "bottom")
ggsave(g + g2, file = str_c("./plots/monocle3_", model, "_", num.cells, "cells.pdf"), dpi = 300)
#LS
plan(multisession)
res.LS <- future_map_dfr(rownames(wt.expression), LS, wt.pseudotime.monocle3, wt.expression,
ko.pseudotime.monocle3, ko.expression, .options = furrr_options(seed = 8))
#Lamian
.expression <- cbind(wt.expression, ko.expression)
colnames(.expression) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
.pseudotime <- c(wt.pseudotime.monocle3, ko.pseudotime.monocle3)
names(.pseudotime) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
.cellanno <- data.frame(Cell = colnames(.expression),
Sample = rep(c("WT", "KO"), each = num.cells / 2))
.design <- matrix(c(1, 1, 0, 1), ncol = 2, dimnames = list(c("WT", "KO"), c("intercept", "group")))
res.lamian <- lamian_test(expr = .expression,
cellanno = .cellanno,
pseudotime = .pseudotime,
design = .design,
test.type = 'variable',
overall.only = TRUE,
test.method = "chisq",
permuiter = 100,
ncores = num.cores)
#tradeSeq
colnames(counts) <- colnames(.expression)
.condition <- factor(rep(c("WT", "KO"), each = num.cells), levels = c("WT", "KO"))
.pseudotime <- matrix(c(wt.pseudotime.monocle3, rep(0, num.cells), rep(0, num.cells), ko.pseudotime.monocle3), ncol = 2)
.cellweights <- matrix(c(rep(1, num.cells), rep(0, num.cells), rep(0, num.cells), rep(1, num.cells)), ncol = 2)
gam <- fitGAM(counts = counts, pseudotime = .pseudotime,
cellWeights = .cellweights,
nknots = 6, parallel = TRUE)
res.tradeseq.diffEnd <- diffEndTest(gam)
res.tradeseq.pattern <- patternTest(gam)
res.tradeseq.earlyDE <- earlyDETest(gam, knots = c(1, 3))
res <- res %>% add_row(backbone = model,
gene = rep(rownames(counts), 5),
pt.method = "monocle3",
method = c(rep("LS", nrow(res.LS)),
rep("Lamian", length(res.lamian$statistics$pval.chisq.overall)),
rep("tradeSeq (diffEnd)", nrow(res.tradeseq.diffEnd)),
rep("tradeSeq (pattern)", nrow(res.tradeseq.pattern)),
rep("tradeSeq (earlyDE)", nrow(res.tradeseq.earlyDE))),
answer = rep(answer, 5),
num_cells = num.cells,
p = c(res.LS$p.de, res.lamian$statistics$pval.chisq.overall,
res.tradeseq.diffEnd$pvalue,
res.tradeseq.pattern$pvalue,
res.tradeseq.earlyDE$pvalue))
#slingshot
sce <- SingleCellExperiment(assays = List(counts = wt.counts, log2 = log2(wt.counts)))
assays(sce)$norm <- FQnorm(wt.counts)
.pca <- prcomp(t(log1p(assays(sce)$norm)), scale. = FALSE)
reducedDims(sce) <- SimpleList(PCA = .pca$x[,1:2])
colData(sce)$GMM <- Mclust(.pca$x[,1:2])$classification
wt.slingshot <- slingshot(sce, clusterLabels = 'GMM', reducedDim = 'PCA',
start.clus = colData(sce)$GMM[which(names(colData(sce)$GMM) == names(which.min(wt.pseudotime)))])
wt.cellWeights <- slingCurveWeights(wt.slingshot)
wt.pseudotime.slingshot <- slingPseudotime(wt.slingshot, na = FALSE)
.normWeights <- sweep(wt.cellWeights, 1, FUN = "/",
STATS = apply(wt.cellWeights, 1, sum))
wt.cellWeights.slingshot <- apply(.normWeights, 1, function(prob) {stats::rmultinom(n = 1, prob = prob, size = 1)}) %>% t()
.get.pseudotime <- function(value) return(sample(wt.pseudotime.slingshot[value,], 1, prob = as.numeric(wt.cellWeights.slingshot[value,])))
if(ncol(wt.pseudotime.slingshot) >= 2){
wt.pseudotime.slingshot <- sapply(seq(nrow(wt.pseudotime.slingshot)), .get.pseudotime)
names(wt.pseudotime.slingshot) <- colnames(sce)
}
sce <- SingleCellExperiment(assays = List(counts = ko.counts, log2 = log2(ko.counts)))
assays(sce)$norm <- FQnorm(ko.counts)
.pca <- prcomp(t(log1p(assays(sce)$norm)), scale. = FALSE)
reducedDims(sce) <- SimpleList(PCA = .pca$x[,1:2])
colData(sce)$GMM <- Mclust(.pca$x[,1:2])$classification
ko.slingshot <- slingshot(sce, clusterLabels = 'GMM', reducedDim = 'PCA',
start.clus = colData(sce)$GMM[which(names(colData(sce)$GMM) == names(which.min(ko.pseudotime)))])
ko.cellWeights <- slingCurveWeights(ko.slingshot)
ko.pseudotime.slingshot <- slingPseudotime(ko.slingshot, na = FALSE)
.normWeights <- sweep(ko.cellWeights, 1, FUN = "/",
STATS = apply(ko.cellWeights, 1, sum))
ko.cellWeights.slingshot <- apply(.normWeights, 1, function(prob) {stats::rmultinom(n = 1, prob = prob, size = 1)}) %>% t()
.get.pseudotime <- function(value) return(sample(ko.pseudotime.slingshot[value,], 1, prob = as.numeric(ko.cellWeights.slingshot[value,])))
if(ncol(ko.pseudotime.slingshot) >= 2){
ko.pseudotime.slingshot <- sapply(seq(nrow(ko.pseudotime.slingshot)), .get.pseudotime)
names(ko.pseudotime.slingshot) <- colnames(sce)
}
pt <- pt %>% add_row(backbone = model,
pt.method = "slingshot",
sample = rep(c("WT", "KO"), each = num.cells),
cell.num = num.cells,
cell = colnames(counts),
pseudotime = c(wt.pseudotime.slingshot, ko.pseudotime.slingshot))
d <- tibble(Component1 = wt$dimred[.sampling, 1], Component2 = wt$dimred[.sampling, 2], pseudotime = wt.pseudotime.slingshot / max(wt.pseudotime.slingshot))
g <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
g <- g + geom_point() + ggtitle(str_c("slingshot, ", num.cells, "cells")) + labs(subtitle = "WT") + theme(legend.position = 'none')
d <- tibble(Component1 = ko$dimred[.sampling, 1], Component2 = ko$dimred[.sampling, 2], pseudotime = ko.pseudotime.slingshot / max(ko.pseudotime.slingshot))
g2 <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
g2 <- g2 + geom_point() + labs(subtitle = "KO") + theme(legend.position = "bottom")
ggsave(g + g2, file = str_c("./plots/slingshot_", model, "_", num.cells, "cells.pdf"), dpi = 300)
#LS
plan(multisession)
res.LS <- future_map_dfr(rownames(wt.expression), LS, wt.pseudotime.slingshot, wt.expression, ko.pseudotime.slingshot, ko.expression, .options = furrr_options(seed = 8))
#Lamian
.expression <- cbind(wt.expression, ko.expression)
colnames(.expression) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
.pseudotime <- c(wt.pseudotime.slingshot, ko.pseudotime.slingshot)
names(.pseudotime) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
.cellanno <- data.frame(Cell = colnames(.expression),
Sample = rep(c("WT", "KO"), each = num.cells / 2))
.design <- matrix(c(1, 1, 0, 1), ncol = 2, dimnames = list(c("WT", "KO"), c("intercept", "group")))
res.lamian <- lamian_test(expr = .expression,
cellanno = .cellanno,
pseudotime = .pseudotime,
design = .design,
test.type = 'variable',
overall.only = TRUE,
test.method = "chisq",
permuiter = 100,
ncores = num.cores)
#tradeSeq
colnames(counts) <- colnames(.expression)
.condition <- factor(rep(c("WT", "KO"), each = num.cells), levels = c("WT", "KO"))
.pseudotime <- matrix(c(wt.pseudotime.slingshot, rep(0, num.cells), rep(0, num.cells), ko.pseudotime.slingshot), ncol = 2)
if(model == "linear"){
.cellweights <- matrix(c(rep(1, num.cells), rep(0, num.cells), rep(0, num.cells), rep(1, num.cells)), ncol = 2)
}else{
.cellweights <- rbind(wt.cellWeights, ko.cellWeights)
}
gam <- fitGAM(counts = counts, pseudotime = .pseudotime,
cellWeights = .cellweights,
nknots = 6, parallel = TRUE)
res.tradeseq.diffEnd <- diffEndTest(gam)
res.tradeseq.pattern <- patternTest(gam)
res.tradeseq.earlyDE <- earlyDETest(gam, knots = c(1, 3))
res <- res %>% add_row(backbone = model,
gene = rep(rownames(counts), 5),
pt.method = "slingshot",
method = c(rep("LS", nrow(res.LS)),
rep("Lamian", length(res.lamian$statistics$pval.chisq.overall)),
rep("tradeSeq (diffEnd)", nrow(res.tradeseq.diffEnd)),
rep("tradeSeq (pattern)", nrow(res.tradeseq.pattern)),
rep("tradeSeq (earlyDE)", nrow(res.tradeseq.earlyDE))),
answer = rep(answer, 5),
num_cells = num.cells,
p = c(res.LS$p.de, res.lamian$statistics$pval.chisq.overall,
res.tradeseq.diffEnd$pvalue,
res.tradeseq.pattern$pvalue,
res.tradeseq.earlyDE$pvalue))
}
}
res$method <- factor(res$method, levels = c("LS", "Lamian", "tradeSeq (diffEnd)", "tradeSeq (pattern)", "tradeSeq (earlyDE)"))
res <- res[-1,]
save(res, file = "./res/res.RData")
pt <- pt[-1,]
pt$cell <- pt$cell %>%
str_remove_all("WT_") %>%
str_remove_all("KO_")
save(pt, file = "./res/pt.RData")
#correlation of pseudotime
for(model in c("linear", "bifurcating")){
for(samp in c("WT", "KO")){
d <- rbind(inner_join(pt %>% filter(pt.method == "Ground_truth", backbone == model, sample == samp),
pt %>% filter(pt.method == "monocle3", backbone == model, sample == samp),
by = c("sample", "cell.num", "cell")),
inner_join(pt %>% filter(pt.method == "Ground_truth", backbone == model, sample == samp),
pt %>% filter(pt.method == "slingshot", backbone == model, sample == samp),
by = c("sample", "cell.num", "cell")))
g <- ggplot(d, aes(x = pseudotime.x, y = pseudotime.y)) +
geom_point() + facet_grid(pt.method.y ~ cell.num, scales = "free") +
ggtitle(str_c(model, ", ", samp)) + xlab("Pseudotime of groud truth") + ylab("Inferred pseudotime")
g
ggsave(g, file = str_c("./cor/cor_pseudotime_",samp, "_", model, ".pdf"), dpi = 300)
}
}
res$method <- factor(res$method, levels = c("LS", "tradeSeq (earlyDE)", "tradeSeq (diffEnd)", "tradeSeq (pattern)", "Lamian"))
for(model in c("linear", "bifurcating")){
g <- ggplot(res %>% filter(backbone == model), aes(d = answer, m = 1 - p, colour = pt.method)) +
geom_roc(n.cuts = FALSE, linealpha = 1) +
xlab("False positive rate") + ylab("True positive rate") + ggtitle(model) +
facet_grid(num_cells ~ method) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "bottom", legend.title=element_blank())
roc <- calc_auc(g) %>% tibble
g <- ggplot(roc, aes(x = method, y = AUC, fill = method)) +
geom_bar(stat = "identity", position = "dodge") +
facet_grid(pt.method ~ num_cells, scales="free_y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("") +
ylim(0, 1) +
ggtitle(model) +
theme(legend.position = "bottom", legend.title=element_blank())
g
ggsave(g, file = str_c("./res/", model, "_ROC_bar.pdf"), dpi = 300)
g <- ggplot(res %>% filter(backbone == model), aes(d = answer, m = 1 - p, colour = method)) +
geom_roc(n.cuts = FALSE, linealpha = 1) +
xlab("False positive rate") + ylab("True positive rate") + ggtitle(model) +
facet_grid(pt.method ~ num_cells) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "right", legend.title=element_blank())
print(g)
ggsave(g, file = str_c("./res/", model, "_ROC.pdf"), dpi = 300)
}
#plot each gene
load(str_c("./files/", model, "_wt.RData"))
load(str_c("./files/", model, "_ko.RData"))
wt.expression <- wt$expression[.sampling,] %>% as.matrix() %>% t()
wt.pseudotime <- wt$pseudotime[.sampling]
ko.expression <- ko$expression[.sampling,] %>% as.matrix() %>% t()
ko.pseudotime <- ko$pseudotime[.sampling]
purrr::map(rownames(wt.expression), plot.each)
load("./res/res.RData")
load("./res/pt.RData")
#load("./files/linear_wt.RData")
#load("./files/linear_ko.RData")
load("./files/bifurcating_wt.RData")
load("./files/bifurcating_ko.RData")
num.cells <- 1000
Expression <- c(wt$expression[which(rownames(wt$expression) == "C1_TF1"),],
ko$expression[which(rownames(ko$expression) == "C1_TF1"),])
tmp <- tibble(expression = c(wt$expression[,which(colnames(wt$expression) == "C1_TF1")],
ko$expression[,which(colnames(ko$expression) == "C1_TF1")],
wt$expression[,which(colnames(wt$expression) == "HK8")],
ko$expression[,which(colnames(ko$expression) == "HK8")]),
gene = rep(c("C1_TF1", "HK8"), each = 2000),
cell = c(rownames(wt$expression), rownames(ko$expression), rownames(wt$expression), rownames(ko$expression)))
d <- pt %>% filter(pt.method == "Ground_truth", cell.num == num.cells) %>%
inner_join(tibble(expression = c(wt$expression[,which(colnames(wt$expression) == "C1_TF1")],
ko$expression[,which(colnames(ko$expression) == "C1_TF1")],
wt$expression[,which(colnames(wt$expression) == "HK8")],
ko$expression[,which(colnames(ko$expression) == "HK8")]),
gene = rep(c("C1_TF1", "HK8"), each = 2000),
sample = rep(c("WT", "KO", "WT", "KO"), each = 1000),
cell = c(rownames(wt$expression), rownames(ko$expression), rownames(wt$expression), rownames(ko$expression))),
by = c("cell", "sample"), relationship = "many-to-many")
d$sample <- factor(d$sample, levels = c("WT", "KO"))
str <- tibble(backbone = NA, gene = NA, label = NA)
stats <- res %>% filter(gene %in% c("C1_TF1", "HK8"), num_cells == num.cells, pt.method == "Ground_truth")
for(i in c("C1_TF1", "HK8")){
for(j in c("linear", "bifurcating")){
str <- str %>%
add_row(backbone = j,
gene = i,
label = str_c("Lomb-Scargle's p = ", stats %>% filter(gene == i, backbone == j, method == "LS") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
"tradeSeq (diffEnd)'s p = ", stats %>% filter(gene == i, backbone == j, method == "tradeSeq (diffEnd)") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
"tradeSeq (pattern)'s p = ", stats %>% filter(gene == i, backbone == j, method == "tradeSeq (pattern)") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
"tradeSeq (earlyDE)'s p = ", stats %>% filter(gene == i, backbone == j, method == "tradeSeq (earlyDE)") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
"Lamian's p = ", stats %>% filter(gene == i, backbone == j, method == "Lamian") %>% .$p %>% formatC(digits = 2, format = "e")))
}
}
str <- na.omit(str)
#str$method <- str_replace_all(str$pt.method, pattern = "Monocle3", replacement = "monocle3")
str$x <- 0.5
str$y <- 0.5
d$backbone <- factor(d$backbone, levels = c("linear", "bifurcating"))
d$sample <- factor(d$sample, levels = c("KO", "WT"))
str$backbone <- factor(str$backbone, levels = c("linear", "bifurcating"))
str$sample <- "WT"
g <- ggplot(d, aes(x = pseudotime, y = expression, colour = sample)) +
geom_point() +
facet_grid(gene ~ backbone, scales = "free") +
geom_label(aes(x, y, label = label, vjust = 0.1, hjust = 0.3), data = str) +
theme(legend.position = "none")
g
ggsave(g, file = "./fig.pdf", dpi = 300)