cmdscale
Load some data:
expr = get(load("RPKM_means_DOG_DMSO.RData"))
dim(expr)
## [1] 8 24607
rownames(expr)
## [1] "1d_mean_DMSO" "5d_mean_DMSO" "10d_mean_DMSO" "20d_mean_DMSO"
## [5] "1d_mean_DOG" "5d_mean_DOG" "10d_mean_DOG" "20d_mean_DOG"
We have to tell cmdscale
what distance measure we want to use.
The function cmdscale
takes an object of class dist
as input:
distance = dist(expr, method = "euclidean") # dist computes distances between the rows of a matrix.
class(distance)
## [1] "dist"
length(distance)
## [1] 28
scaling <- cmdscale(distance, k = 2, eig = TRUE) # create 2D-plot
str(scaling)
## List of 5
## $ points: num [1:8, 1:2] 80879 -14010 -30137 -58733 111974 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:8] "1d_mean_DMSO" "5d_mean_DMSO" "10d_mean_DMSO" "20d_mean_DMSO" ...
## .. ..$ : NULL
## $ eig : num [1:8] 2.89e+10 7.54e+09 2.50e+09 1.24e+09 4.22e+08 ...
## $ x : NULL
## $ ac : num 0
## $ GOF : num [1:2] 0.89 0.89
x <- scaling$points[,1]
y <- scaling$points[,2]
plot(x,y)
Colouring the points according to time (1d, 5d, 10d, 20d) might be a good idea:
rownames(expr)
## [1] "1d_mean_DMSO" "5d_mean_DMSO" "10d_mean_DMSO" "20d_mean_DMSO"
## [5] "1d_mean_DOG" "5d_mean_DOG" "10d_mean_DOG" "20d_mean_DOG"
colorvec = rep("black", nrow(expr))
grep("1d", rownames(expr))
## [1] 1 5
colorvec[grep("1d", rownames(expr))] = "blue"
colorvec[grep("5d", rownames(expr))] = "red"
colorvec[grep("10d", rownames(expr))] = "green"
colorvec[grep("20d", rownames(expr))] = "brown"
colorvec
## [1] "blue" "red" "green" "brown" "blue" "red" "green" "brown"
cbind(rownames(expr), colorvec)
## colorvec
## [1,] "1d_mean_DMSO" "blue"
## [2,] "5d_mean_DMSO" "red"
## [3,] "10d_mean_DMSO" "green"
## [4,] "20d_mean_DMSO" "brown"
## [5,] "1d_mean_DOG" "blue"
## [6,] "5d_mean_DOG" "red"
## [7,] "10d_mean_DOG" "green"
## [8,] "20d_mean_DOG" "brown"
Shaping the points according to medium (look here for possible shapes:
rownames(expr)
## [1] "1d_mean_DMSO" "5d_mean_DMSO" "10d_mean_DMSO" "20d_mean_DMSO"
## [5] "1d_mean_DOG" "5d_mean_DOG" "10d_mean_DOG" "20d_mean_DOG"
pchvec = rep(1, nrow(expr))
grep("DMSO", rownames(expr))
## [1] 1 2 3 4
pchvec[grep("DMSO", rownames(expr))] = 17
pchvec[grep("DOG", rownames(expr))] = 18
pchvec
## [1] 17 17 17 17 18 18 18 18
cbind(rownames(expr), pchvec)
## pchvec
## [1,] "1d_mean_DMSO" "17"
## [2,] "5d_mean_DMSO" "17"
## [3,] "10d_mean_DMSO" "17"
## [4,] "20d_mean_DMSO" "17"
## [5,] "1d_mean_DOG" "18"
## [6,] "5d_mean_DOG" "18"
## [7,] "10d_mean_DOG" "18"
## [8,] "20d_mean_DOG" "18"
Plot it:
plot(x, y, col = colorvec, pch = pchvec, xlab = "", ylab = "")
legend("bottomright", c("1d", "5d", "10d", "20d"), col = c("blue", "red", "green", "brown"), pch = 16)
legend("top", c("DMSO", "DOG"), pch = c(17, 18), col = "black")
title("MDS plot, time & medium")
Turn the matrix around (“transpose”) in the dist command:
n = 100 # include that many genes
sample(1:ncol(expr), n) # choose randomly
## [1] 6797 5328 7913 3422 19903 10974 19893 8962 816 17612 14198
## [12] 4242 10144 21478 3141 5955 6373 1901 13716 20860 5890 16108
## [23] 7098 10881 10889 22785 8445 21149 10412 8309 7262 12146 1588
## [34] 19917 17043 10926 14193 9824 18518 23976 7387 20689 11162 12175
## [45] 12940 6491 15908 19550 17763 23515 17777 1230 9981 3445 340
## [56] 24026 7864 4788 606 13970 9048 9345 7137 19995 20804 13158
## [67] 12300 11324 18966 322 12107 9458 14960 2871 5307 15033 14567
## [78] 8741 21710 14682 19659 19619 16219 15428 11972 12961 8173 1451
## [89] 23046 12211 2138 17220 4743 4227 4514 4739 10961 18347 5250
## [100] 9830
set.seed(76)
small = expr[, sample(1:ncol(expr), n)] # select n genes randomly
distance = dist(t(small), method = "euclidean") # dist computes distances between the rows of a matrix.
length(distance)
## [1] 4950
scaling <- cmdscale(distance, k = 2, eig = TRUE) # create 2D-plot
str(scaling)
## List of 5
## $ points: num [1:100, 1:2] -784 -739 -800 -437 -800 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:100] "R173.1" "T28H10.3" "F49E12.7" "C17E4.3" ...
## .. ..$ : NULL
## $ eig : num [1:100] 1.19e+09 3.01e+07 2.63e+06 6.86e+05 2.50e+05 ...
## $ x : NULL
## $ ac : num 0
## $ GOF : num [1:2] 0.997 0.997
x <- scaling$points[,1]
y <- scaling$points[,2]
maintxt = paste("MDS plot for", n, "genes")
plot(x, y, col = "red", pch = 18, main = maintxt, font.main = 1)