kmeans
Get some data:
clouds = get(load("clouds.RData"))
dim(clouds)
## [1] 75 2
head(clouds)
## cloud1_x cloud1_y
## [1,] 1.454594 3.751767
## [2,] 1.072375 4.571505
## [3,] 1.028263 3.808842
## [4,] 1.508426 3.705582
## [5,] 0.889498 4.217310
## [6,] 1.291009 4.051441
plot(clouds[,1], clouds[,2], col = "black", xlab = "x", ylab = "y", main = "Clouds", font.main = 1)
Cluster with kmeans:
nr = 3 # needs number of clusters as input
clres = kmeans(clouds, nr)
str(clres)
## List of 9
## $ cluster : int [1:75] 3 3 3 3 3 3 3 3 3 3 ...
## $ centers : num [1:3, 1:2] 1.97 3.07 1.07 2.95 2.01 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:3] "1" "2" "3"
## .. ..$ : chr [1:2] "cloud1_x" "cloud1_y"
## $ totss : num 114
## $ withinss : num [1:3] 4.92 3.66 6.13
## $ tot.withinss: num 14.7
## $ betweenss : num 99.5
## $ size : int [1:3] 26 25 24
## $ iter : int 2
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
clres$cluster
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2
## [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1
Plot, color according to identified clusters:
colvec = rep("black", nrow(clouds))
clres$cluster == 1
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
## [56] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [67] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
which(clres$cluster == 1)
## [1] 18 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [24] 73 74 75
colvec[which(clres$cluster == 1)] = "blue"
colvec[which(clres$cluster == 2)] = "red"
colvec[which(clres$cluster == 3)] = "green"
colvec
## [1] "green" "green" "green" "green" "green" "green" "green" "green"
## [9] "green" "green" "green" "green" "green" "green" "green" "green"
## [17] "green" "blue" "green" "green" "green" "green" "green" "green"
## [25] "green" "red" "red" "red" "red" "red" "red" "red"
## [33] "red" "red" "red" "red" "red" "red" "red" "red"
## [41] "red" "red" "red" "red" "red" "red" "red" "red"
## [49] "red" "red" "blue" "blue" "blue" "blue" "blue" "blue"
## [57] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
## [65] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
## [73] "blue" "blue" "blue"
plot(clouds[,1], clouds[,2], col = colvec, xlab = "x", ylab = "y", main = "Clusters found", font.main = 1)
Add the cluster centers:
clres$centers
## cloud1_x cloud1_y
## 1 1.967705 2.952207
## 2 3.071303 2.014523
## 3 1.068808 4.037214
plot(clouds[,1], clouds[,2], col = colvec, xlab = "x", ylab = "y", main = "Clusters found", font.main = 1)
points(clres$centers[,1], clres$centers[,2], col = c("blue", "red", "green"), pch = 15, cex = 1.8)
Some data:
timedata = get(load("timedata.RData"))
matplot(1:10, t(timedata), pch = 1:6, type = "b", col = "black")
Find clusters (assume that there are two):
cres = kmeans(timedata, 2)
cres$cluster
## gene 1 gene 2 gene 3 gene 4 gene 5 gene 6
## 1 1 1 2 2 2
colvec = rep("black", 6)
colvec[which(cres$cluster == 1)] = "blue"
colvec[which(cres$cluster == 2)] = "red"
cbind(cres$cluster, colvec)
## colvec
## gene 1 "1" "blue"
## gene 2 "1" "blue"
## gene 3 "1" "blue"
## gene 4 "2" "red"
## gene 5 "2" "red"
## gene 6 "2" "red"
matplot(1:10, t(timedata), pch = 1:6, type = "b", col = colvec, main = "Clustered timelines")
cmeans
library(e1071)
nr = 3 # needs number of clusters as input
res = cmeans(clouds, nr)
str(res)
## List of 7
## $ centers : num [1:3, 1:2] 1.98 3.08 1.07 2.96 2.02 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:3] "1" "2" "3"
## .. ..$ : chr [1:2] "cloud1_x" "cloud1_y"
## $ size : int [1:3] 26 25 24
## $ cluster : int [1:75] 3 3 3 3 3 3 3 3 3 3 ...
## $ membership : num [1:75, 1:3] 0.1869 0.0819 0.0247 0.258 0.0258 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "1" "2" "3"
## $ iter : num 9
## $ withinerror: num 0.155
## $ call : language cmeans(x = clouds, centers = nr)
## - attr(*, "class")= chr "fclust"
res$cluster
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2
## [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1
Plot, color according to identified clusters:
colvec = rep("black", nrow(clouds))
res$cluster == 1
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
## [56] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [67] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
which(res$cluster == 1)
## [1] 18 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [24] 73 74 75
colvec[which(res$cluster == 1)] = "blue"
colvec[which(res$cluster == 2)] = "red"
colvec[which(res$cluster == 3)] = "green"
colvec
## [1] "green" "green" "green" "green" "green" "green" "green" "green"
## [9] "green" "green" "green" "green" "green" "green" "green" "green"
## [17] "green" "blue" "green" "green" "green" "green" "green" "green"
## [25] "green" "red" "red" "red" "red" "red" "red" "red"
## [33] "red" "red" "red" "red" "red" "red" "red" "red"
## [41] "red" "red" "red" "red" "red" "red" "red" "red"
## [49] "red" "red" "blue" "blue" "blue" "blue" "blue" "blue"
## [57] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
## [65] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
## [73] "blue" "blue" "blue"
plot(clouds[,1], clouds[,2], col = colvec, xlab = "x", ylab = "y", main = "Clusters found", font.main = 1)
fanny
library(cluster)
# ?fanny
The cmeans function does not accept a distance matrix as input, but the fanny function does
(cmeans knows only euclidian and manhattan distance).