-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsample_level_pseudobulk_clustering.R
54 lines (51 loc) · 2.58 KB
/
sample_level_pseudobulk_clustering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
## ------------------------------------
##
## Script name: single-cell data sample level pseudobulk clustering
##
## Purpose of script: Generate sample-level pseudobulk clustering result and plot
##
## Author: Jiekun (Jackie) Yang
##
## Date Created: 2022-11-09
##
## Email: jkyang@mit.edu
##
##--------------------------------------
### input_file_directory is a vector of directory paths with all the sample-level r data objects saved, include "/" at the end of the path
### pheno_df is a data frame with the phenotypes you would like to add to the clustering plot with rownames matching the sample names
### pheno_colors is a list with each element corresponding to one phenotype provided in pheno_df
### target_folder is where you would like to store output files generated by this function
### file_name is the file name that you want to name the output plot
## library setup
library(Seurat)
options(future.globals.maxSize = 128000 * 1024^2)
library(future)
library(future.apply)
library(pheatmap)
library(RColorBrewer)
plan("multiprocess", workers = 16)
## function
sample_level_pseudobulk_cluster <- function(input_file_directory = NULL, pheno_df = NULL, pheno_colors = NULL, target_folder = NULL, file_name = NULL) {
i <- 1
for (temp.file in list.files(pattern = ".rds", path = input_file_directory, full.names = T)) {
temp.data <- readRDS(temp.file)
temp.name <- gsub(".rds", "", basename(temp.file))
if (i == 1) {
temp.all_sample_bulk <- as.data.frame(Matrix::rowMeans(temp.data@assays$SCT))
colnames(temp.all_sample_bulk) <- temp.name
temp.all_sample_bulk$Row.names <- rownames(temp.all_sample_bulk)
} else {
temp.all_sample_bulk <- merge(temp.all_sample_bulk, as.data.frame(Matrix::rowMeans(temp.data@assays$SCT)), by.x = "Row.names", by.y = "row.names")
colnames(temp.all_sample_bulk)[i+1] <- temp.name
}
i <- i+1
}
rownames(temp.all_sample_bulk) <- temp.all_sample_bulk$Row.names
temp.all_sample_bulk <- temp.all_sample_bulk[, -1]
temp.corrs <- cor(as.matrix(temp.all_sample_bulk), method = "spearman")
temp.corr.dists <- as.dist(1 - temp.corrs)
temp.sample.num <- ncol(temp.corrs)
pdf(paste0(target_folder, file_name, "_sample_level_pseudo_bulk_clustering.pdf"), width = 0.2*temp.sample.num, height = 0.15*temp.sample.num)
print(pheatmap(temp.corrs, border_color = NA, clustering_method = "ward.D", annotation_col = pheno_df, clustering_distance_rows=temp.corr.dists, clustering_distance_cols=temp.corr.dists, col=colorRampPalette(brewer.pal(9, "Blues"))(99), show_rownames = FALSE, show_colnames = TRUE, annotation_colors = pheno_colors))
dev.off()
}