Figure_3

#The source code to reproduce Figure 3.
library(tidyverse)
library(furrr)
library(patchwork)
library(dyno)
library(dyngen)
library(spectral)
library(monocle3)
library(mclust)
library(slingshot)
library(tradeSeq)
library(Lamian)
library(plotROC)
theme_set(theme_bw(base_size = 20))

num.targets <- 250
num.hks <- 250
ko.rate <- 0
num.cores <- 8
res <- tibble(backbone = NA,
              gene = NA,
              pt.method = NA,
              method = NA,
              answer = NA,
              num_cells = NA,
              p = NA)
pt <- tibble(backbone = NA,
             pt.method = NA,
             sample = NA,
             cell.num = NA,
             cell = NA,
             pseudotime = NA)
cell.range <- c(250, 500, 1000)

#parameters and functions
min.freq <- 0.2
max.freq <- 1
length.freq <- 101
sim.num <- 100
ko_gene <- "C1_TF1"
FQnorm <- function(counts){
  rk <- apply(counts,2,rank,ties.method='min')
  counts.sort <- apply(counts,2,sort)
  refdist <- apply(counts.sort,1,median)
  norm <- apply(rk,2,function(r){ refdist[r] })
  rownames(norm) <- rownames(counts)
  return(norm)
}
LS <- function(g, time1, exp1, time2 = NULL, exp2 = NULL, min.freq = 0, max.freq = 1, length.freq = 101, m = "generalized", sim.num = 100){
  .quiet <- function(x){ 
    sink(tempfile()) 
    on.exit(sink()) 
    invisible(force(x)) 
  }
  time1 <- time1[!is.infinite(time1)]
  exp1 <- exp1[,!is.infinite(time1)]
  if(is.null(time2)){
    seq1 <- exp1[which(rownames(exp1) == g),]
    ls1 <- spec.lomb(x = time1, y = seq1,
                     f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
    return(tibble(gene = g, p.dynamic = min(ls1$p), p.de = NA))
  }else{
    time2 <- time2[!is.infinite(time2)]
    exp2 <- exp2[,!is.infinite(time2)]
    seq1 <- exp1[which(rownames(exp1) == g),]
    seq2 <- exp2[which(rownames(exp2) == g),]
    ls1 <- spec.lomb(x = time1, y = seq1,
                     f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
    ls2 <- spec.lomb(x = time2, y = seq2,
                     f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
    
    null.hypothesis <- NULL
    for(i in seq(sim.num)){
      sim.ls1 <- spec.lomb(x = sample(time1, length(time1)),
                           y = seq1,
                           f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
      sim.ls2 <- spec.lomb(x = sample(time2, length(time2)),
                           y = seq2,
                           f = seq(min.freq, max.freq, length = length.freq), mode = m) %>% .quiet
      null.hypothesis <- c(null.hypothesis, dist(rbind(sim.ls1$A, sim.ls2$A), method = "canberra"))
    }
    p <- sum(dist(rbind(ls1$A, ls2$A), method = "canberra") <  null.hypothesis) / sim.num
    return(tibble(gene = g, p.dynamic = min(c(ls1$p, ls2$p)), p.de = p))
  }
}
plot.each <- function(target){
  print(target)
  d <- pt %>% filter(backbone == "linear", pt.method == "Ground_truth", cell.num == num.cells) %>%
    add_column(Expression = c(wt.expression[which(rownames(wt.expression) == target),],
                              ko.expression[which(rownames(ko.expression) == target),]))
  d$sample <- factor(d$sample, levels = c("WT", "KO"))
  
  text <- str_c("Ground_truth: LS's p=",
                res %>% filter(gene == target, pt.method == "Ground_truth", method == "LS", num_cells == num.cells)
                %>% .$p %>% as.numeric,
                ", Lamian's p=",
                res %>% filter(gene == target, pt.method == "Ground_truth", method == "Lamian", num_cells == num.cells)
                %>% .$p %>% as.numeric %>% round(digits = 2))
  
  g1 <- ggplot(d, aes(x = pseudotime, y = Expression, colour = sample))
  g1 <- g1 + geom_point() + ggtitle(target, subtitle = text)
  g1 <- g1 + xlab("Pseudotime") + theme(plot.subtitle = element_text(size = 10))
  g1
  ggsave(g1, file = str_c("./genes/", target, ".pdf"), dpi = 300)
}

#data generation
for(model in c("linear", "bifurcating")){
  set.seed(1)
  if(model == "linear"){
    backbone <- backbone_linear()
  }else{
    backbone <- backbone_bifurcating()
  }
  config <- initialise_model(backbone = backbone,
                             num_cells = last(cell.range),
                             num_tfs = nrow(backbone$module_info),
                             num_targets = num.targets,
                             num_hks = num.hks,
                             num_cores = num.cores,
                             simulation_params = simulation_default(census_interval = 10,
                                                                    ssa_algorithm = ssa_etl(tau = 300 / 3600),
                                                                    experiment_params = simulation_type_wild_type(num_simulations = 100)))
  ggsave(plot_backbone_modulenet(config), file = str_c("./files/", model, ".pdf"), dpi = 300)
  model_common <- config %>%
    generate_tf_network() %>%
    generate_feature_network() %>% 
    generate_kinetics() %>%
    generate_gold_standard()
  model_wt <- model_common %>% generate_cells()
  model_ko <- model_common
  model_ko$simulation_params$experiment_params <- simulation_type_knockdown(num_simulations = 100,
                                                                            timepoint = 0, 
                                                                            genes = ko_gene,
                                                                            num_genes = 1,
                                                                            multiplier = ko.rate)
  model_ko <- model_ko %>% generate_cells()
  wt <- model_wt %>% generate_experiment() %>% as_dyno() %>% add_root(root_milestone_id = "sA") %>% add_pseudotime(pseudotime = NULL)
  ko <- model_ko %>% generate_experiment() %>% as_dyno() %>% add_root(root_milestone_id = "sA") %>% add_pseudotime(pseudotime = NULL)
  wt.counts <- wt$counts %>% as.matrix() %>% t()
  ko.counts <- ko$counts %>% as.matrix() %>% t()
  counts <- cbind(wt.counts, ko.counts)
  answer <- rep(0, nrow(counts))
  de_genes <- c(ko_gene, model_wt$feature_network %>% filter(str_detect(from, "C")) %>% pull(to))
  for(i in seq(nrow(counts))) if(wt$feature_ids[i] %in% de_genes) answer[i] <- 1
  if(model == "linear"){
    save(wt, file = "./files/linear_wt.RData")
    save(ko, file = "./files/linear_ko.RData")
    save(answer, file = "./files/answer_linear.RData")
  }else{
    save(wt, file = "./files/bifurcating_wt.RData")
    save(ko, file = "./files/bifurcating_ko.RData")
    save(answer, file = "./files/answer_bifurcating.RData")
  }
}

set.seed(8)
for(model in c("linear", "bifurcating")){
  load(str_c("./files/", model, "_wt.RData"))
  load(str_c("./files/", model, "_ko.RData"))
  load(str_c("./files/answer_", model, ".RData"))
  for(num.cells in cell.range){
    print(sum(is.na(res$method)))
    print(num.cells)
    .sampling <- sample(1:1000, num.cells) %>% sort
    wt.expression <- wt$expression[.sampling,] %>% as.matrix() %>% t()
    wt.counts <- wt$counts[.sampling,] %>% as.matrix() %>% t()
    wt.pseudotime <- wt$pseudotime[.sampling]
    ko.expression <- ko$expression[.sampling,] %>% as.matrix() %>% t()
    ko.counts <- ko$counts[.sampling,] %>% as.matrix() %>% t()
    ko.pseudotime <- ko$pseudotime[.sampling]
    counts <- cbind(wt.counts, ko.counts)
    pt <- pt %>% add_row(backbone = model,
                         pt.method = "Ground_truth",
                         sample = rep(c("WT", "KO"), each = num.cells),
                         cell.num = num.cells,
                         cell = colnames(counts),
                         pseudotime = c(wt.pseudotime, ko.pseudotime))
    
    d <- tibble(Component1 = wt$dimred[.sampling, 1], Component2 = wt$dimred[.sampling, 2], pseudotime = wt.pseudotime / max(wt.pseudotime))
    g <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
    g <- g + geom_point() + ggtitle(str_c("Grounrtruth, ", num.cells, "cells")) + labs(subtitle = "WT") + theme(legend.position = 'none')
    d <- tibble(Component1 = ko$dimred[.sampling, 1], Component2 = ko$dimred[.sampling, 2], pseudotime = ko.pseudotime / max(ko.pseudotime))
    g2 <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
    g2 <- g2 + geom_point() + labs(subtitle = "KO") + theme(legend.position = "bottom")
    print(g + g2)
    ggsave(g + g2, file = str_c("./plots/Grounrtruth_", model, "_", num.cells, "cells.pdf"), dpi = 300)

    #LS
    plan(multisession)
    res.LS <- future_map_dfr(rownames(wt.expression), LS, wt.pseudotime, wt.expression,
                             ko.pseudotime, ko.expression, .options = furrr_options(seed = 8))
    
    #Lamian
    .expression <- cbind(wt.expression, ko.expression)
    colnames(.expression) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
    .pseudotime <- c(wt.pseudotime, ko.pseudotime)
    names(.pseudotime) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
    .cellanno <- data.frame(Cell = colnames(.expression),
                            Sample = rep(c("WT", "KO"), each = num.cells / 2))
    .design <- matrix(c(1, 1, 0, 1), ncol = 2, dimnames = list(c("WT", "KO"), c("intercept", "group")))
    res.lamian <- lamian_test(expr = .expression,
                              cellanno = .cellanno,
                              pseudotime = .pseudotime,
                              design = .design,
                              test.type = 'variable',
                              overall.only = TRUE,
                              test.method = "chisq",
                              permuiter = 100,
                              ncores = num.cores)

    #tradeSeq
    colnames(counts) <- colnames(.expression)
    .condition <- factor(rep(c("WT", "KO"), each = num.cells), levels = c("WT", "KO"))
    .pseudotime <-  matrix(c(wt.pseudotime, rep(0, num.cells), rep(0, num.cells), ko.pseudotime), ncol = 2)
    .cellweights <- matrix(c(rep(1, num.cells), rep(0, num.cells), rep(0, num.cells), rep(1, num.cells)), ncol = 2)
    gam <- fitGAM(counts = counts, pseudotime = .pseudotime,
                  cellWeights = .cellweights,
                  nknots = 6, parallel = TRUE)
    res.tradeseq.diffEnd <- diffEndTest(gam)
    res.tradeseq.pattern <- patternTest(gam)
    res.tradeseq.earlyDE <- earlyDETest(gam, knots = c(1, 3))
    
    res <- res %>% add_row(backbone = model,
                           gene = rep(rownames(counts), 5),
                           pt.method = "Ground_truth",
                           method = c(rep("LS", nrow(res.LS)),
                                      rep("Lamian", length(res.lamian$statistics$pval.chisq.overall)),
                                      rep("tradeSeq (diffEnd)", nrow(res.tradeseq.diffEnd)),
                                      rep("tradeSeq (pattern)", nrow(res.tradeseq.pattern)),
                                      rep("tradeSeq (earlyDE)", nrow(res.tradeseq.earlyDE))),
                           answer = rep(answer, 5),
                           num_cells = num.cells,
                           p = c(res.LS$p.de, res.lamian$statistics$pval.chisq.overall,
                                 res.tradeseq.diffEnd$pvalue,
                                 res.tradeseq.pattern$pvalue,
                                 res.tradeseq.earlyDE$pvalue))
    
    #Monocle3
    gene.metadata <- data.frame(id = wt$feature_info$feature_id, gene_short_name = wt$feature_info$feature_id)
    rownames(gene.metadata) <- gene.metadata$id
    monocle3.wt <- new_cell_data_set(wt.counts, gene_metadata = gene.metadata) %>%
      preprocess_cds(num_dim = 50) %>%
      reduce_dimension(preprocess_method = "PCA", reduction_method = "UMAP") %>%
      cluster_cells() %>%
      learn_graph() %>%
      order_cells(root_cells = names(which.min(wt.pseudotime))) 
    wt.pseudotime.monocle3 <- monocle3.wt@principal_graph_aux@listData$UMAP$pseudotime / max(monocle3.wt@principal_graph_aux@listData$UMAP$pseudotime)

    monocle3.ko <- new_cell_data_set(ko.counts, gene_metadata = gene.metadata) %>%
      preprocess_cds(num_dim = 50) %>%
      reduce_dimension(preprocess_method = "PCA", reduction_method = "UMAP") %>%
      cluster_cells() %>%
      learn_graph() %>%
      order_cells(root_cells = names(which.min(ko.pseudotime))) 
    ko.pseudotime.monocle3 <- monocle3.ko@principal_graph_aux@listData$UMAP$pseudotime / max(monocle3.ko@principal_graph_aux@listData$UMAP$pseudotime)

    pt <- pt %>% add_row(backbone = model,
                         pt.method = "monocle3",
                         sample = rep(c("WT", "KO"), each = num.cells),
                         cell.num = num.cells,
                         cell = colnames(counts),
                         pseudotime = c(wt.pseudotime.monocle3, ko.pseudotime.monocle3))
    
    d <- tibble(Component1 = wt$dimred[.sampling, 1], Component2 = wt$dimred[.sampling, 2], pseudotime = wt.pseudotime.monocle3 / max(wt.pseudotime.monocle3))
    g <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
    g <- g + geom_point() + ggtitle(str_c("monocle3, ", num.cells, "cells")) + labs(subtitle = "WT") + theme(legend.position = 'none')
    d <- tibble(Component1 = ko$dimred[.sampling, 1], Component2 = ko$dimred[.sampling, 2], pseudotime = ko.pseudotime.monocle3 / max(ko.pseudotime.monocle3))
    g2 <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
    g2 <- g2 + geom_point() + labs(subtitle = "KO") + theme(legend.position = "bottom")
    ggsave(g + g2, file = str_c("./plots/monocle3_", model, "_", num.cells, "cells.pdf"), dpi = 300)

    #LS
    plan(multisession)
    res.LS <- future_map_dfr(rownames(wt.expression), LS, wt.pseudotime.monocle3, wt.expression,
                             ko.pseudotime.monocle3, ko.expression, .options = furrr_options(seed = 8))
    
    #Lamian
    .expression <- cbind(wt.expression, ko.expression)
    colnames(.expression) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
    .pseudotime <- c(wt.pseudotime.monocle3, ko.pseudotime.monocle3)
    names(.pseudotime) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
    .cellanno <- data.frame(Cell = colnames(.expression),
                            Sample = rep(c("WT", "KO"), each = num.cells / 2))
    .design <- matrix(c(1, 1, 0, 1), ncol = 2, dimnames = list(c("WT", "KO"), c("intercept", "group")))
    res.lamian <- lamian_test(expr = .expression,
                              cellanno = .cellanno,
                              pseudotime = .pseudotime,
                              design = .design,
                              test.type = 'variable',
                              overall.only = TRUE,
                              test.method = "chisq",
                              permuiter = 100,
                              ncores = num.cores)
    
    #tradeSeq
    colnames(counts) <- colnames(.expression)
    .condition <- factor(rep(c("WT", "KO"), each = num.cells), levels = c("WT", "KO"))
    .pseudotime <-  matrix(c(wt.pseudotime.monocle3, rep(0, num.cells), rep(0, num.cells), ko.pseudotime.monocle3), ncol = 2)
    .cellweights <- matrix(c(rep(1, num.cells), rep(0, num.cells), rep(0, num.cells), rep(1, num.cells)), ncol = 2)
    gam <- fitGAM(counts = counts, pseudotime = .pseudotime,
                  cellWeights = .cellweights,
                  nknots = 6, parallel = TRUE)
    res.tradeseq.diffEnd <- diffEndTest(gam)
    res.tradeseq.pattern <- patternTest(gam)
    res.tradeseq.earlyDE <- earlyDETest(gam, knots = c(1, 3))
    
    res <- res %>% add_row(backbone = model,
                           gene = rep(rownames(counts), 5),
                           pt.method = "monocle3",
                           method = c(rep("LS", nrow(res.LS)),
                                      rep("Lamian", length(res.lamian$statistics$pval.chisq.overall)),
                                      rep("tradeSeq (diffEnd)", nrow(res.tradeseq.diffEnd)),
                                      rep("tradeSeq (pattern)", nrow(res.tradeseq.pattern)),
                                      rep("tradeSeq (earlyDE)", nrow(res.tradeseq.earlyDE))),
                           answer = rep(answer, 5),
                           num_cells = num.cells,
                           p = c(res.LS$p.de, res.lamian$statistics$pval.chisq.overall,
                                 res.tradeseq.diffEnd$pvalue,
                                 res.tradeseq.pattern$pvalue,
                                 res.tradeseq.earlyDE$pvalue))
    
    #slingshot
    sce <- SingleCellExperiment(assays = List(counts = wt.counts, log2 = log2(wt.counts)))
    assays(sce)$norm <- FQnorm(wt.counts)
    .pca <- prcomp(t(log1p(assays(sce)$norm)), scale. = FALSE)
    reducedDims(sce) <- SimpleList(PCA = .pca$x[,1:2])
    colData(sce)$GMM <- Mclust(.pca$x[,1:2])$classification
    wt.slingshot <- slingshot(sce, clusterLabels = 'GMM', reducedDim = 'PCA',
                              start.clus = colData(sce)$GMM[which(names(colData(sce)$GMM) == names(which.min(wt.pseudotime)))])
    wt.cellWeights <- slingCurveWeights(wt.slingshot)
    wt.pseudotime.slingshot <- slingPseudotime(wt.slingshot, na = FALSE)
    .normWeights <- sweep(wt.cellWeights, 1, FUN = "/",
                          STATS = apply(wt.cellWeights, 1, sum))
    wt.cellWeights.slingshot <- apply(.normWeights, 1, function(prob) {stats::rmultinom(n = 1, prob = prob, size = 1)}) %>% t()
    .get.pseudotime <- function(value) return(sample(wt.pseudotime.slingshot[value,], 1, prob = as.numeric(wt.cellWeights.slingshot[value,])))
    if(ncol(wt.pseudotime.slingshot) >= 2){
      wt.pseudotime.slingshot <- sapply(seq(nrow(wt.pseudotime.slingshot)), .get.pseudotime)
      names(wt.pseudotime.slingshot) <- colnames(sce)
    }

    sce <- SingleCellExperiment(assays = List(counts = ko.counts, log2 = log2(ko.counts)))
    assays(sce)$norm <- FQnorm(ko.counts)
    .pca <- prcomp(t(log1p(assays(sce)$norm)), scale. = FALSE)
    reducedDims(sce) <- SimpleList(PCA = .pca$x[,1:2])
    colData(sce)$GMM <- Mclust(.pca$x[,1:2])$classification
    ko.slingshot <- slingshot(sce, clusterLabels = 'GMM', reducedDim = 'PCA',
                              start.clus = colData(sce)$GMM[which(names(colData(sce)$GMM) == names(which.min(ko.pseudotime)))])
    ko.cellWeights <- slingCurveWeights(ko.slingshot)
    ko.pseudotime.slingshot <- slingPseudotime(ko.slingshot, na = FALSE)
    .normWeights <- sweep(ko.cellWeights, 1, FUN = "/",
                          STATS = apply(ko.cellWeights, 1, sum))
    ko.cellWeights.slingshot <- apply(.normWeights, 1, function(prob) {stats::rmultinom(n = 1, prob = prob, size = 1)}) %>% t()
    .get.pseudotime <- function(value) return(sample(ko.pseudotime.slingshot[value,], 1, prob = as.numeric(ko.cellWeights.slingshot[value,])))
    if(ncol(ko.pseudotime.slingshot) >= 2){
      ko.pseudotime.slingshot <- sapply(seq(nrow(ko.pseudotime.slingshot)), .get.pseudotime)
      names(ko.pseudotime.slingshot) <- colnames(sce)
    }
    
    pt <- pt %>% add_row(backbone = model,
                         pt.method = "slingshot",
                         sample = rep(c("WT", "KO"), each = num.cells),
                         cell.num = num.cells,
                         cell = colnames(counts),
                         pseudotime = c(wt.pseudotime.slingshot, ko.pseudotime.slingshot))
    
    d <- tibble(Component1 = wt$dimred[.sampling, 1], Component2 = wt$dimred[.sampling, 2], pseudotime = wt.pseudotime.slingshot / max(wt.pseudotime.slingshot))
    g <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
    g <- g + geom_point() + ggtitle(str_c("slingshot, ", num.cells, "cells")) + labs(subtitle = "WT") + theme(legend.position = 'none')
    d <- tibble(Component1 = ko$dimred[.sampling, 1], Component2 = ko$dimred[.sampling, 2], pseudotime = ko.pseudotime.slingshot / max(ko.pseudotime.slingshot))
    g2 <- ggplot(d, aes(x = Component1, y = Component2, colour = pseudotime))
    g2 <- g2 + geom_point() + labs(subtitle = "KO") + theme(legend.position = "bottom")
    ggsave(g + g2, file = str_c("./plots/slingshot_", model, "_", num.cells, "cells.pdf"), dpi = 300)

    #LS
    plan(multisession)
    res.LS <- future_map_dfr(rownames(wt.expression), LS, wt.pseudotime.slingshot, wt.expression, ko.pseudotime.slingshot, ko.expression, .options = furrr_options(seed = 8))

    #Lamian
    .expression <- cbind(wt.expression, ko.expression)
    colnames(.expression) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
    .pseudotime <- c(wt.pseudotime.slingshot, ko.pseudotime.slingshot)
    names(.pseudotime) <- c(str_c("WT_", colnames(wt.expression)), str_c("KO_", colnames(ko.expression)))
    .cellanno <- data.frame(Cell = colnames(.expression),
                            Sample = rep(c("WT", "KO"), each = num.cells / 2))
    .design <- matrix(c(1, 1, 0, 1), ncol = 2, dimnames = list(c("WT", "KO"), c("intercept", "group")))
    res.lamian <- lamian_test(expr = .expression,
                              cellanno = .cellanno,
                              pseudotime = .pseudotime,
                              design = .design,
                              test.type = 'variable',
                              overall.only = TRUE,
                              test.method = "chisq",
                              permuiter = 100,
                              ncores = num.cores)
    
    #tradeSeq
    colnames(counts) <- colnames(.expression)
    .condition <- factor(rep(c("WT", "KO"), each = num.cells), levels = c("WT", "KO"))
    .pseudotime <-  matrix(c(wt.pseudotime.slingshot, rep(0, num.cells), rep(0, num.cells), ko.pseudotime.slingshot), ncol = 2)
    if(model == "linear"){
      .cellweights <- matrix(c(rep(1, num.cells), rep(0, num.cells), rep(0, num.cells), rep(1, num.cells)), ncol = 2)
    }else{
      .cellweights <- rbind(wt.cellWeights, ko.cellWeights)
    }
    
    gam <- fitGAM(counts = counts, pseudotime = .pseudotime,
                  cellWeights = .cellweights,
                  nknots = 6, parallel = TRUE)
    res.tradeseq.diffEnd <- diffEndTest(gam)
    res.tradeseq.pattern <- patternTest(gam)
    res.tradeseq.earlyDE <- earlyDETest(gam, knots = c(1, 3))
    res <- res %>% add_row(backbone = model,
                           gene = rep(rownames(counts), 5),
                           pt.method = "slingshot",
                           method = c(rep("LS", nrow(res.LS)),
                                      rep("Lamian", length(res.lamian$statistics$pval.chisq.overall)),
                                      rep("tradeSeq (diffEnd)", nrow(res.tradeseq.diffEnd)),
                                      rep("tradeSeq (pattern)", nrow(res.tradeseq.pattern)),
                                      rep("tradeSeq (earlyDE)", nrow(res.tradeseq.earlyDE))),
                           answer = rep(answer, 5),
                           num_cells = num.cells,
                           p = c(res.LS$p.de, res.lamian$statistics$pval.chisq.overall,
                                 res.tradeseq.diffEnd$pvalue,
                                 res.tradeseq.pattern$pvalue,
                                 res.tradeseq.earlyDE$pvalue))
  }
}
res$method <- factor(res$method, levels = c("LS", "Lamian", "tradeSeq (diffEnd)", "tradeSeq (pattern)", "tradeSeq (earlyDE)"))
res <- res[-1,]
save(res, file = "./res/res.RData")
pt <- pt[-1,]
pt$cell <- pt$cell %>%
  str_remove_all("WT_") %>%
  str_remove_all("KO_")
save(pt, file = "./res/pt.RData")

#correlation of pseudotime
for(model in c("linear", "bifurcating")){
  for(samp in c("WT", "KO")){
    d <- rbind(inner_join(pt %>% filter(pt.method == "Ground_truth", backbone == model, sample == samp),
                          pt %>% filter(pt.method == "monocle3", backbone == model, sample == samp),
                          by = c("sample", "cell.num", "cell")),
               inner_join(pt %>% filter(pt.method == "Ground_truth", backbone == model, sample == samp),
                          pt %>% filter(pt.method == "slingshot", backbone == model, sample == samp),
                          by = c("sample", "cell.num", "cell")))
    g <- ggplot(d, aes(x = pseudotime.x, y = pseudotime.y)) +
      geom_point() + facet_grid(pt.method.y ~ cell.num, scales = "free") +
      ggtitle(str_c(model, ", ", samp)) + xlab("Pseudotime of groud truth") + ylab("Inferred pseudotime")
    g
    ggsave(g, file = str_c("./cor/cor_pseudotime_",samp, "_", model, ".pdf"), dpi = 300)
  }
}

res$method <- factor(res$method, levels = c("LS", "tradeSeq (earlyDE)", "tradeSeq (diffEnd)", "tradeSeq (pattern)", "Lamian"))
for(model in c("linear", "bifurcating")){
  g <- ggplot(res %>% filter(backbone == model), aes(d = answer, m = 1 - p, colour = pt.method)) +
    geom_roc(n.cuts = FALSE, linealpha = 1) +
    xlab("False positive rate") + ylab("True positive rate") + ggtitle(model) +
    facet_grid(num_cells ~ method) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    theme(legend.position = "bottom", legend.title=element_blank())
  
  roc <- calc_auc(g) %>% tibble
  g <- ggplot(roc, aes(x = method, y = AUC, fill = method)) +
    geom_bar(stat = "identity", position = "dodge") +
    facet_grid(pt.method ~ num_cells, scales="free_y") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("") +
    ylim(0, 1) +
    ggtitle(model) +
    theme(legend.position = "bottom", legend.title=element_blank())
  g
  ggsave(g, file = str_c("./res/", model, "_ROC_bar.pdf"), dpi = 300)
  
  g <- ggplot(res %>% filter(backbone == model), aes(d = answer, m = 1 - p, colour = method)) +
    geom_roc(n.cuts = FALSE, linealpha = 1) +
    xlab("False positive rate") + ylab("True positive rate") + ggtitle(model) +
    facet_grid(pt.method ~ num_cells) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    theme(legend.position = "right", legend.title=element_blank())
  print(g)
  ggsave(g, file = str_c("./res/", model, "_ROC.pdf"), dpi = 300)
}

#plot each gene
load(str_c("./files/", model, "_wt.RData"))
load(str_c("./files/", model, "_ko.RData"))
wt.expression <- wt$expression[.sampling,] %>% as.matrix() %>% t()
wt.pseudotime <- wt$pseudotime[.sampling]
ko.expression <- ko$expression[.sampling,] %>% as.matrix() %>% t()
ko.pseudotime <- ko$pseudotime[.sampling]
purrr::map(rownames(wt.expression), plot.each)


load("./res/res.RData")
load("./res/pt.RData")
#load("./files/linear_wt.RData")
#load("./files/linear_ko.RData")
load("./files/bifurcating_wt.RData")
load("./files/bifurcating_ko.RData")
num.cells <- 1000

Expression <- c(wt$expression[which(rownames(wt$expression) == "C1_TF1"),],
                ko$expression[which(rownames(ko$expression) == "C1_TF1"),])

tmp <- tibble(expression = c(wt$expression[,which(colnames(wt$expression) == "C1_TF1")],
                             ko$expression[,which(colnames(ko$expression) == "C1_TF1")],
                             wt$expression[,which(colnames(wt$expression) == "HK8")],
                             ko$expression[,which(colnames(ko$expression) == "HK8")]),
              gene = rep(c("C1_TF1", "HK8"), each = 2000),
              cell = c(rownames(wt$expression), rownames(ko$expression), rownames(wt$expression), rownames(ko$expression)))

d <- pt %>% filter(pt.method == "Ground_truth", cell.num == num.cells) %>%
  inner_join(tibble(expression = c(wt$expression[,which(colnames(wt$expression) == "C1_TF1")],
                                   ko$expression[,which(colnames(ko$expression) == "C1_TF1")],
                                   wt$expression[,which(colnames(wt$expression) == "HK8")],
                                   ko$expression[,which(colnames(ko$expression) == "HK8")]),
                    gene = rep(c("C1_TF1", "HK8"), each = 2000),
                    sample = rep(c("WT", "KO", "WT", "KO"), each = 1000),
                    cell = c(rownames(wt$expression), rownames(ko$expression), rownames(wt$expression), rownames(ko$expression))),
             by = c("cell", "sample"), relationship = "many-to-many")
d$sample <- factor(d$sample, levels = c("WT", "KO"))
str <- tibble(backbone = NA, gene = NA, label = NA)
stats <- res %>% filter(gene %in% c("C1_TF1", "HK8"), num_cells == num.cells, pt.method == "Ground_truth")
for(i in c("C1_TF1", "HK8")){
  for(j in c("linear", "bifurcating")){
    str <- str %>%
      add_row(backbone = j,
              gene = i,
              label = str_c("Lomb-Scargle's p = ", stats %>% filter(gene == i, backbone == j, method == "LS") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
                            "tradeSeq (diffEnd)'s p = ", stats %>% filter(gene == i, backbone == j, method == "tradeSeq (diffEnd)") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
                            "tradeSeq (pattern)'s p = ", stats %>% filter(gene == i, backbone == j, method == "tradeSeq (pattern)") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
                            "tradeSeq (earlyDE)'s p = ", stats %>% filter(gene == i, backbone == j, method == "tradeSeq (earlyDE)") %>% .$p %>% formatC(digits = 2, format = "e"), "\n",
                            "Lamian's p = ", stats %>% filter(gene == i, backbone == j, method == "Lamian") %>% .$p %>% formatC(digits = 2, format = "e")))
  }
}
str <- na.omit(str)
#str$method <- str_replace_all(str$pt.method, pattern = "Monocle3", replacement = "monocle3")
str$x <- 0.5
str$y <- 0.5

d$backbone <- factor(d$backbone, levels = c("linear", "bifurcating"))
d$sample <- factor(d$sample, levels = c("KO", "WT"))
str$backbone <- factor(str$backbone, levels = c("linear", "bifurcating"))
str$sample <- "WT"
g <- ggplot(d, aes(x = pseudotime, y = expression, colour = sample)) +
  geom_point() +
  facet_grid(gene ~ backbone, scales = "free") +
  geom_label(aes(x, y, label = label, vjust = 0.1, hjust = 0.3), data = str) +
  theme(legend.position = "none")
g
ggsave(g, file = "./fig.pdf", dpi = 300)