# source("http://bioconductor.org/biocLite.R")
# biocLite("edgeR")
library(edgeR)
## Loading required package: limma
counts = get(load("counts_Hs.RData"))
dim(counts)
## [1] 27704 6
range(counts)
## [1] 0 899348
is.matrix(counts)
## [1] TRUE
head(counts)
## A1 A2 A3 B1 B2 B3
## ENSG00000101557 2590 2647 3225 3173 3147 3515
## ENSG00000079134 487 488 570 565 663 677
## ENSG00000261724 10 10 14 4 10 9
## ENSG00000158270 13565 14006 16451 9602 9823 10311
## ENSG00000079101 29 23 44 16 20 16
## ENSG00000176912 4 9 5 14 10 20
conditions = get(load("conditions_Hs.RData"))
conditions
## [1] "10mM" "10mM" "10mM" "0mM" "0mM" "0mM"
length(conditions) == ncol(counts) # must be TRUE
## [1] TRUE
length(unique(conditions)) == 2 # simple analysis: just compare two conditions
## [1] TRUE
nr.replicates.group1 = sum(conditions == unique(conditions)[1])
nr.replicates.group1
## [1] 3
nr.replicates.group2 = sum(conditions == unique(conditions)[2])
nr.replicates.group2
## [1] 3
The parameter method=“TMM” means the weighted trimmed mean of M-values (to the reference) proposed by Robinson and Oshlack (2010).
dge <- DGEList(counts = counts, group = conditions, remove.zeros = FALSE)
dge <- calcNormFactors(dge, method = "TMM")
dge <- estimateCommonDisp(dge)
dge <- estimateTagwiseDisp(dge, trend = "movingave")
et <- exactTest(dge)
show.max = nrow(et$table) # displays all genes
show.max = 10
topHits = topTags(et, n = show.max, adjust.method = "BH", sort.by = "p.value")
test.result.frame = topHits$table
head(test.result.frame)
## logFC logCPM PValue FDR
## ENSG00000260512 12.190325 4.835635 0 0
## ENSG00000153820 10.541393 3.149688 0 0
## ENSG00000152402 10.034181 3.419070 0 0
## ENSG00000020633 9.887362 3.269853 0 0
## ENSG00000125740 -9.339320 7.468802 0 0
## ENSG00000173376 -9.004989 4.476489 0 0
nr.up = sum((test.result.frame$FDR <= 0.05) & (test.result.frame$logFC < 0))
nr.up
## [1] 3
nr.down = sum((test.result.frame$FDR <= 0.05) & (test.result.frame$logFC > 0))
nr.down
## [1] 7
nr.not.sig = sum(test.result.frame$FDR > 0.05)
nr.not.sig
## [1] 0
nr.up + nr.down + nr.not.sig == show.max # must be TRUE
## [1] TRUE
plotSmear(et)
condition1 = unique(conditions)[1]
condition2 = unique(conditions)[2]
maintxt = paste(condition1, "vs.", condition2)
maintxt
## [1] "10mM vs. 0mM"
de <- decideTestsDGE(et, p = 0.05, adjust = "BH")
detags <- rownames(dge)[as.logical(de)]
length(detags) # regulated
## [1] 11056
plotSmear(et, de.tags = detags, main = maintxt, font.main = 1)
mtext(paste("edgeR: ", length(detags), "DEG's ", nr.up, "up ", nr.down, "down"), side = 3, col = "blue")
abline(h = c(-2, 2), col = "blue") # The horizontal blue lines show 4-fold changes.
# filename = paste("edgeR.smearplot.", condition1, "--", condition2, ".png", sep="")
# dev.copy(png, file = filename); dev.off()