# source("http://bioconductor.org/biocLite.R")
# biocLite("DESeq")
suppressMessages(library(DESeq))
counts = get(load("counts_Hs.RData"))
dim(counts)
## [1] 27704 6
range(counts)
## [1] 0 899348
is.matrix(counts)
## [1] TRUE
head(counts)
## A1 A2 A3 B1 B2 B3
## ENSG00000101557 2590 2647 3225 3173 3147 3515
## ENSG00000079134 487 488 570 565 663 677
## ENSG00000261724 10 10 14 4 10 9
## ENSG00000158270 13565 14006 16451 9602 9823 10311
## ENSG00000079101 29 23 44 16 20 16
## ENSG00000176912 4 9 5 14 10 20
conditions = get(load("conditions_Hs.RData"))
conditions
## [1] "10mM" "10mM" "10mM" "0mM" "0mM" "0mM"
length(conditions) == ncol(counts) # must be TRUE
## [1] TRUE
length(unique(conditions)) == 2 # simple analysis: just compare two conditions
## [1] TRUE
nr.replicates.group1 = sum(conditions == unique(conditions)[1])
nr.replicates.group1
## [1] 3
nr.replicates.group2 = sum(conditions == unique(conditions)[2])
nr.replicates.group2
## [1] 3
This might take quite a long time!
method = "per-condition" # method = c( "pooled", "per-condition", "blind" )
sharingMode = "maximum" # sharingMode = c( "maximum", "fit-only", "gene-est-only" )
fitType = "parametric" # fitType = c("parametric", "local")
cds <- DESeq::newCountDataSet(countData = counts, conditions = conditions)
cds <- DESeq::estimateSizeFactors(cds)
cds <- DESeq::estimateDispersions(cds, method = method, sharingMode = sharingMode, fitType = fitType)
cond1 = unique(conditions)[1]
cond1
## [1] "10mM"
cond2 = unique(conditions)[2]
cond2
## [1] "0mM"
res <- DESeq::nbinomTest(cds, cond1, cond2)
options(width = 110)
class(res)
## [1] "data.frame"
head(res)
## id baseMean baseMeanA baseMeanB foldChange log2FoldChange pval padj
## 1 ENSG00000101557 3020.629949 3118.736628 2922.523269 0.9370856 -0.09374721 3.915884e-02 1.094709e-01
## 2 ENSG00000079134 568.215137 570.746238 565.684036 0.9911306 -0.01285299 9.219691e-01 1.000000e+00
## 3 ENSG00000261724 9.625743 12.461472 6.790014 0.5448806 -0.87598802 1.377790e-01 3.135394e-01
## 4 ENSG00000158270 12543.776728 16246.055825 8841.497632 0.5442243 -0.87772684 3.512526e-137 2.870532e-135
## 5 ENSG00000079101 25.200071 34.912756 15.487385 0.4436025 -1.17266062 7.678901e-04 3.155856e-03
## 6 ENSG00000176912 9.869019 6.735315 13.002723 1.9305292 0.94899639 1.051728e-01 2.528601e-01
nr.deg = sum(res$padj <= 0.05)
nr.up = sum((res$padj <= 0.05) & (res$log2FoldChange > 0))
nr.down = sum((res$padj <= 0.05) & (res$log2FoldChange < 0))
x <- res[,c("baseMean", "log2FoldChange", "padj")] # required by plotMA
head(x)
## baseMean log2FoldChange padj
## 1 3020.629949 -0.09374721 1.094709e-01
## 2 568.215137 -0.01285299 1.000000e+00
## 3 9.625743 -0.87598802 3.135394e-01
## 4 12543.776728 -0.87772684 2.870532e-135
## 5 25.200071 -1.17266062 3.155856e-03
## 6 9.869019 0.94899639 2.528601e-01
y.limits = c(max(min(x$log2FoldChange), -10), min(max(x$log2FoldChange), 10)) # limit to (-10, 10) or smaller
y.limits
## [1] -10.000000 9.705805
maintxt = paste(cond1, "vs.", cond2)
DESeq::plotMA(x, ylim = y.limits, col = ifelse(x$padj >= 0.05, "gray32", "red3"), main = maintxt, font.main = 1)
mtext(paste("DESeq: ", nr.deg, "DEG's ", nr.up, "up ", nr.down, "down"), side = 3, col = "blue")
abline(h = c(-2, 2), col = "blue", lty = 2, cex = 0.8) # horizontal blue lines show 4-fold changes.