Seurat 原理和使用实例
目录
要点: 单细胞实验最大的目的是:划分细胞亚群,寻找分化及发育轨迹。Seurat能完成第一个目标,其原理参考官网,实例看文章及解读。
Seurat官网
SATIJA LAB: seurat首页, seurat实例,
单细胞分析教程: Analysis of single cell RNA-seq data
Seurat4 源码解析: Seurat 4 源码解析
Seurat分析实例(IF>5)
发育过程中
六个样本的单细胞数据如何发一篇10分以上的文章[cellranger+Seurat]: 主要研究对象是人类肠道组织,我们知道肠道主要分为回肠、结肠及直肠,所以这篇文章就采集了每个部位2个样本,来自于6个不同个人的手术中切除肠道组织。采用10XGenomics技术检测到了14537个肠道细胞,其中2个人类回肠6167个细胞,2个结肠4472个细胞,以及两个直肠3898个细胞。
免疫过程中
衰老过程中
Seurat 可视化及自定义函数
常用的ggplot2修饰函数
# large dot in legend LargeLegend = function(size=5){ guides(colour = guide_legend(override.aes = list(alpha = 1,size=size))) } # add panel box for ggplot AddBox = function(...){ theme( panel.border = element_rect(color = "black", size = 1), validate = TRUE, ... ) }
FeaturePlot 及修饰
csdn 效果图# feature plot with no axis FeaturePlot(scObj, features =c("CD3D", "CD8A", "GZMB", "CCR7", "CCL5", "CD5","CD7", "HLA-DRA", "LYZ", "CD33", "ANPEP", "FCGR1A", "IL3RA", "CD79A","CD79B","VPREB1", "IGJ", "MME","CD38", "HBA1", "GATA1", "TFRC",#CD71 "CD34", "SOCS2", "GAPDH"), cols = c("lightgrey", 'red'), ncol = 5 ) & NoLegend() & NoAxes() & theme( panel.border = element_rect(color = "black", size = 1) )
DotPlot 及修饰
barplot 自定义函数
要点:1. table的2个参数是对细胞的2种分类指标,比如 样品处理时间 vs 亚群;2. 注意对原始样品归一化为1万个细胞,消除样品间测序细胞数差异,见底部的示例;3. colors=颜色是为table的第一个参数指定;
#' draw barplot, from a data.frame generated by table(para1, para2), colored by 1st parameter of table() #' v2.1 #' v2.2 第一参数的空值跳过 #' #' @param tbl1 data.frame, draw by each column #' @param colors colors of each row(default NULL, auto-color) #' @param scale whether scale to 1 or not(default T) #' @param title the main title of the figure,(is main in the function) #' @param legendY the position y of legend, adjust as needed, default -0.25 #' @param omit whether to omit some columns #' @param ... #' #' @return NULL #' @export #' #' @examples #' table2barplot = function(tbl1, colors=NULL, levels=NULL, scale=T, title="", omit=NULL, xlab="", ylab="", legendTitle=NULL){ tbl1= tbl1[, which(colSums(tbl1)>0)] #remove all 0 columns tbl1= tbl1[which(rowSums(tbl1)>0), ] #remove all 0 rows # remove some columns by column names if(!is.null(omit)){ tbl1=tbl1[, setdiff( colnames(tbl1), as.character( omit ) ) ] } # table to data.frame(wide to long) df2=as.data.frame(tbl1) if(!is.null(levels)){ df2$Var1=factor(df2$Var1, levels =levels ) #change order } if(""==ylab){ ylab=ifelse(scale, "Freq", "Count") } if(""==xlab){ xlab="Index" } # draw g1=ggplot(df2, aes(x=Var2, y=Freq, fill=Var1))+ geom_bar(stat="identity", position=ifelse(scale, "fill","stack") )+ labs(x=xlab, y=ylab, title=title)+ theme_classic(base_size = 14)+ theme(axis.text.x=element_text(angle=60, hjust=1,size=rel(1.2)) ) if(is.null(colors)){ return (g1 + scale_fill_discrete( ifelse(is.null(legendTitle), "", legendTitle) ) ) }else{ return( g1+scale_fill_manual(legendTitle, values = colors) ) } } if(0){ table2barplot( as.table(t( apply( table(scObj$time, scObj$seurat_clusters), 1, function(x){ x/sum(x) * 1e4}) )), colors = c("0h"="#FFA500", "18h"="#FF1493", "48h"="#4876FF"), title="time" ) }
VolcanoPlot 经典火山图
csdn 效果图#' Volcano plot #' #' @param dif a data.frame, must provide at leat 3 columns: log2FoldChange, padj, symbol #' @param log2FC the threashold of DEG, default log2(1.5) #' @param padj the threashold of DEG, default 0.05 #' @param label.symbols genes symbols to label, optional, if not provied, select by log2FC in DEG; #' @param label.max if not provide label.symbols, automaticly select DEG number. #' @param cols the dot colors of down and up genees. #' @param title title of the plot #' #' @return a ggplot obj #' @export #' #' @examples #' if(0){ #' VolcanoPlot(dif, padj=0.05, title="DSS vs WT", label.max = 50) #' VolcanoPlot(dif, padj=1e-10, title="DSS vs WT -2", #' label.symbols=dif[ ((abs(dif$log2FoldChange) > 2) & (dif$padj < 1e-50) ) | #' abs(dif$log2FoldChange) > 4,]$symbol ) #' } VolcanoPlot=function(dif, log2FC=log2(1.5), padj=0.05, label.symbols=NULL, label.max=30, cols=c("#497aa2", "#ae3137"), title=""){ if( all(!c("log2FoldChange", "padj", "symbol") %in% colnames(dif)) ){ stop("Colnames must include: log2FoldChange, padj, symbol") } rownames(dif)=dif$symbol # (1) define up and down dif$threshold="ns"; dif[which(dif$log2FoldChange > log2FC & dif$padj2) & (dif$padj < 1e-50) ) | abs(dif$log2FoldChange) > 4,]$symbol )
multiVolcanoPlot
csdn 效果图# > head(DEG) # p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene label #RPS12 1.273332e-143 0.7298951 1.000 0.991 1.746248e-139 Naive CD4 T RPS12 Up #RPS6 6.817653e-143 0.6870694 1.000 0.995 9.349729e-139 Naive CD4 T RPS6 Up # color.pals = c("#DC143C","#0000FF","#20B2AA","#FFA500","#9370DB","#98FB98","#F08080","#1E90FF","#7CFC00","#FFFF00", "#808000","#FF00FF","#FA8072","#7B68EE","#9400D3","#800080","#A0522D","#D2B48C","#D2691E","#87CEEB","#40E0D0","#5F9EA0", "#FF1493","#0000CD","#008B8B","#FFE4B5","#8A2BE2","#228B22","#E9967A","#4682B4","#32CD32","#F0E68C","#FFFFE0","#EE82EE", "#FF6347","#6A5ACD","#9932CC","#8B008B","#8B4513","#DEB887") # #' multi volcano plot for scRNA-seq #' @version 0.2 change legend order #' @version 0.3 add max_overlaps for annotation #' #' @param dat Seurat FindAllMarkers returns, must set only.pos = F; #' @param color.arr color list, default same as Seurat #' @param onlyAnnotateUp only annote gene symbols for up genes #' @param log2Foldchang threshold for annotation #' @param adjp threshold for annotation #' @param top_marker gene number for annotation #' @param max_overlaps annotation label overlapping #' #' @return ggplot2 obj #' @export #' #' @examples multiVolcanoPlot = function(dat, color.arr=NULL, onlyAnnotateUp=T, log2Foldchang=0.58, adjp=0.05, top_marker=5, max_overlaps=10, width=0.9){ library(dplyr) library(ggrepel) # set default color list if(is.null(color.arr)){ len = length(unique(dat$cluster)) color.arr=scales::hue_pal()(len) } dat.plot <- dat %>% mutate( "significance"=case_when(p_val_adj < adjp & avg_log2FC >= log2Foldchang ~ 'Up', p_val_adj < adjp & avg_log2FC <= -log2Foldchang ~ 'Down', TRUE ~ 'None')) tbl = table(dat.plot$significance) print( tbl ) background.dat <- data.frame( dat.plot %>% group_by(cluster) %>% filter(avg_log2FC>0) %>% summarise("y.localup"=max(avg_log2FC)), dat.plot %>% group_by(cluster) %>% filter(avg_log2FC<=0) %>% summarise("y.localdown"=min(avg_log2FC)), x.local=seq(1:length(unique(dat.plot$cluster))) ) %>% select(-cluster.1) #names(background.dat) #head(background.dat) #dim(background.dat) # x.number <- background.dat %>% select(cluster, x.local) dat.plot <- dat.plot%>% left_join(x.number,by = "cluster") #names(dat.plot) #head(dat.plot) #selecting top-up and top-down proteins dat.marked.up <- dat.plot %>% filter(significance=="Up") %>% group_by(cluster) %>% arrange(-avg_log2FC) %>% top_n(top_marker,abs(avg_log2FC)) dat.marked.down <- dat.plot %>% filter(significance=="Down") %>% group_by(cluster) %>% arrange(avg_log2FC) %>% top_n(top_marker,abs(avg_log2FC)) dat.marked <- dat.marked.up %>% bind_rows(dat.marked.down) #referring group information data dat.infor <- background.dat %>% mutate("y.infor"=rep(0,length(cluster))) #names(dat.infor) #dim(dat.infor) #head(dat.infor) ##plotting: #setting color by loading local color schemes vol.plot <- ggplot()+ # background geom_col(background.dat,mapping=aes(x.local, y.localup), fill="grey80", alpha=0.2, width=0.9, just = 0.5)+ geom_col(background.dat,mapping=aes(x.local,y.localdown), fill="grey80", alpha=0.2, width=0.9, just = 0.5)+ # point plot geom_jitter(dat.plot, mapping=aes(x.local, avg_log2FC, #x= should be number, Not string or factor color=significance), size=0.8, width = 0.4, alpha= 1)+ scale_color_manual(name="significance", breaks = c('Up', 'None', 'Down'), values = c("#d56e5e","#cccccc", "#5390b5")) + #set color for: Down None Up geom_tile(dat.infor, mapping=aes(x.local, y.infor), #x axis color box height = log2Foldchang*1.3, fill = color.arr[1:length(unique(dat.plot$cluster))], alpha = 0.5, width=width) + labs(x=NULL,y="log2 Fold change")+ geom_text(dat.infor, mapping=aes(x.local,y.infor,label=cluster))+ # Down is not recommend, not meaningful, hard to explain; so prefer dat.marked.up to dat.marked ggrepel::geom_label_repel(data=if(onlyAnnotateUp) dat.marked.up else dat.marked, #gene symbol, of up group default mapping=aes(x=x.local, y=avg_log2FC, label=gene), force = 2, #size=2, max.overlaps = max_overlaps, label.size = 0, #no border fill="#00000000", #box fill color seed = 233, min.segment.length = 0, force_pull = 2, box.padding = 0.1, segment.linetype = 3, #segment.color = 'black', #segment.alpha = 0.5, #direction = "x", #line direction hjust = 0.5)+ annotate("text", x=1.5, y=max(background.dat$y.localup)+1, label=paste0("|log2FC|>=", log2Foldchang, " & FDR<", adjp))+ theme_classic(base_size = 12)+ theme( axis.title = element_text(size = 13, color = "black"), axis.text = element_text(size = 15, color = "black"), axis.line.y = element_line(color = "black", size = 0.8), # axis.line.x = element_blank(), #no x axis line axis.ticks.x = element_blank(), #no x axis ticks axis.title.x = element_blank(), # axis.text.x = element_blank(), # legend.spacing.x = unit(0.1,'cm'), legend.key.width = unit(0.5,'cm'), legend.key.height = unit(0.5,'cm'), legend.background = element_blank(), legend.box = "horizontal", legend.position = c(0.13, 0.77),legend.justification = c(1,0) )+ guides( #color = guide_legend( override.aes = list(size=5) ), #legend circle size color=guide_legend( override.aes = list(size=5), title="Change") ) #guides(fill=guide_legend(title="Change"))+ #change legend title vol.plot } #multiVolcanoPlot(DEG, color.pals) multiVolcanoPlot(scObj.markers.time) multiVolcanoPlot(scObj.markers.time, onlyAnnotateUp = F)
单细胞分析展望
整合是单细胞的利器,空间是单细胞的未来。