rm(list=ls())
setwd('/Users/zhangjuxiang/Desktop/R time seq/')
#Bioconductor 安裝 edgeR
#install.packages('BiocManager') #需要首先安裝 BiocManager,如果尚未安裝請先執行該步
BiocManager::install('edgeR',force=T)
#讀取基因表達矩陣
targets <- read.csv('rawdata.csv')
as.matrix(targets)
rownames(targets) <- targets[,1]
targets <- targets[,-1]
#指定分組,注意要保證表達矩陣中的樣本順序和這里的分組順序是一一對應的
#對照組在前,處理組在后
group <- rep(c('BC', 'EC'),c(121,192))
library(edgeR)
#數據預處理
#(1)構建 DGEList 對象
dgelist <- DGEList(counts = targets, group = group)
#(2)過濾 low count 數據,例如 CPM 標準化(推薦)
keep <- rowSums(cpm(dgelist) > 1 ) >= 2
dgelist <- dgelist[keep,, keep.lib.sizes = FALSE]
#(3)標準化,以 TMM 標準化為例
dgelist_norm <- calcNormFactors(dgelist, method = 'TMM')
#差異表達基因分析
#首先根據分組信息構建試驗設計矩陣,分組信息中一定要是對照組在前,處理組在后
design <- model.matrix(~group)
#(1)估算基因表達值的離散度
dge <- estimateDisp(dgelist_norm, design, robust = TRUE)
#(2)模型擬合,edgeR 提供了多種擬合算法
#負二項廣義對數線性模型
fit <- glmFit(dge, design, robust = TRUE)
lrt <- topTags(glmLRT(fit), n = nrow(dgelist$counts))
write.table(lrt, 'control_treat.glmLRT.txt', sep = '\t', col.names = NA, quote = FALSE)
#擬似然負二項廣義對數線性模型
fit <- glmQLFit(dge, design, robust = TRUE)
lrt <- topTags(glmQLFTest(fit), n = nrow(dgelist$counts))
write.table(lrt, 'control_treat.glmQLFit.txt', sep = '\t', col.names = NA, quote = FALSE)
##篩選差異表達基因
#讀取上述輸出的差異倍數計算結果
gene_diff <- read.delim('control_treat.glmLRT.txt', row.names = 1, sep = '\t', check.names = FALSE)
#首先對表格排個序,按 FDR 值升序排序,相同 FDR 值下繼續按 log2FC 降序排序
gene_diff <- gene_diff[order(gene_diff$FDR, gene_diff$logFC, decreasing = c(FALSE, TRUE)), ]
#log2FC≥1 & FDR<0.01 標識 up,代表顯著上調的基因
#log2FC≤-1 & FDR<0.01 標識 down,代表顯著下調的基因
#其余標識 none,代表非差異的基因
gene_diff[which(gene_diff$logFC >= 1 & gene_diff$FDR < 0.05),'sig'] <- 'up'
gene_diff[which(gene_diff$logFC <= -1 & gene_diff$FDR < 0.05),'sig'] <- 'down'
gene_diff[which(abs(gene_diff$logFC) <= 1 | gene_diff$FDR >= 0.05),'sig'] <- 'none'
#輸出選擇的差異基因總表
gene_diff_select <- subset(gene_diff, sig %in% c('up', 'down'))
write.table(gene_diff_select, file = 'control_treat.glmQLFit.select.txt', sep = '\t', col.names = NA, quote = FALSE)
#根據 up 和 down 分開輸出
gene_diff_up <- subset(gene_diff, sig == 'up')
gene_diff_down <- subset(gene_diff, sig == 'down')
write.table(gene_diff_up, file = 'control_treat.glmQLFit.up.txt', sep = '\t', col.names = NA, quote = FALSE)
write.table(gene_diff_down, file = 'control_treat.glmQLFit.down.txt', sep = '\t', col.names = NA, quote = FALSE)
install.packages('pheatmap')
library(pheatmap)
{
tmp = gene_diff_select[gene_diff_select$PValue < 0.05,]
#差異結果需要先根據p值挑選
nrDEG_Z = tmp[ order( tmp$logFC ), ]
nrDEG_F = tmp[ order( -tmp$logFC ), ]
choose_gene = c( rownames( nrDEG_Z )[1:100], rownames( nrDEG_F )[1:100] )
choose_matrix = targets[ choose_gene, ]
choose_matrix = t( scale( t( choose_matrix ) ) )
choose_matrix[choose_matrix > 2] = 2
choose_matrix[choose_matrix < -2] = -2
annotation_col = data.frame( CellType = factor( group ) )
rownames( annotation_col ) = colnames( targets )
choose_matrix <- na.omit(choose_matrix)
pheatmap( fontsize = 2, choose_matrix, annotation_col = annotation_col, show_rownames = F, annotation_legend = F, filename = "heatmap_BRCA_medianexp2.png")
}
install.packages('ggplot2')
library( "ggplot2" )
nrDEG <- gene_diff
logFC_cutoff <- with( nrDEG, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) )
logFC_cutoff
logFC_cutoff = 1
{
nrDEG$change = as.factor( ifelse( nrDEG$PValue < 0.01 & abs(nrDEG$logFC) > logFC_cutoff,
ifelse( nrDEG$logFC > logFC_cutoff , 'UP', 'DOWN' ), 'NOT' ) )
save( nrDEG, file = "nrDEG_array_medianexp.Rdata" )
this_tile <- paste0( 'Cutoff for logFC is ', round( logFC_cutoff, 3 ),
' The number of up M/Z is ', nrow(nrDEG[ nrDEG$change =='UP', ] ),
' The number of down M/Z is ', nrow(nrDEG[ nrDEG$change =='DOWN', ] ) )
volcano = ggplot(data = nrDEG, aes( x = logFC, y = -log10(PValue), color = change)) +
geom_point( alpha = 0.4, size = 1.75) +
theme_set( theme_set( theme_minimal( base_size = 15 ) ) ) +
xlab( "log2 fold change" ) + ylab( "-log10 p-value" ) +
theme(legend.title = element_text(colour="black", size=6, face="bold")) +
theme(legend.text = element_text(colour="black", size = 7, face = "bold")) +
theme(axis.title.x = element_text(size = 9, color = "black", face = "bold")) +
theme(axis.title.y = element_text(size = 9, color = "black", face = "bold")) +
ggtitle( this_tile ) + theme( plot.title = element_text( size = 8, hjust = 0.5, face = "bold" )) +
theme(legend.position=c(1.2, 0.8)) +
theme(aspect.ratio=1) +
scale_colour_manual( values = c('green','black','red') ) + theme(panel.grid.major = element_line(colour = "white",
linetype = "blank"), panel.grid.minor = element_line(colour = "white"),
panel.background = element_rect(fill = "aliceblue",
colour = "white"), plot.background = element_rect(colour = "azure1"))
print( volcano )
ggsave( volcano, filename = 'volcano_BRCA_medianexp.tiff' )
dev.off()
}
幫老婆寫代謝組的熱圖和火山圖
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。
- 文/潘曉璐 我一進店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人,你說我怎么就攤上這事。” “怎么了?”我有些...
- 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著,像睡著了一般。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發上,一...
- 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼!你這毒婦竟也來了?” 一聲冷哼從身側響起,我...
推薦閱讀更多精彩內容
- 測序行業的蓬勃發展,帶來微生物組學日新月異的變化。目前,單一組學的文章不斷“貶值”,前沿研究的目光從單一組學逐步拓...
- 這個文章跟著之前的文章完整轉錄組RNAseq分析流程(tophat2+cufflink+cuffdiff)用了之前...
- 差異基因展示方式除了熱圖、火山圖,還有傳說中的“瀑布圖”(不確定是不是叫瀑布圖,我感覺更像S型圖),近兩天我在瀏覽...