Vector
- vectors are the most basic data structure
- can be created in several ways:
# with the concatenation function c()
vector.c <- c(1, 2, 3, 4, 5)
vector.c
## [1] 1 2 3 4 5
animals <- c("cat", "sheep", "dog")
animals
## [1] "cat" "sheep" "dog"
# with the sequence notation
vector.s <- 1:5
vector.s
## [1] 1 2 3 4 5
# with the rep() function
vector.r <- rep(1,5)
vector.r
## [1] 1 1 1 1 1
List
- a list can contain objects of different type
- lists can be unnamed or named:
n = c(2, 3, 5)
s = c("aa", "bb", "cc", "dd", "ee")
b = c(TRUE, FALSE, TRUE, FALSE, FALSE)
x = list(n, s, b, 3) # x contains copies of n, s, b
x
## [[1]]
## [1] 2 3 5
##
## [[2]]
## [1] "aa" "bb" "cc" "dd" "ee"
##
## [[3]]
## [1] TRUE FALSE TRUE FALSE FALSE
##
## [[4]]
## [1] 3
x[[1]]
## [1] 2 3 5
y = list()
y[["one"]] = c(2, 3, 5)
y[["two"]] = c("aa", "bb", "cc", "dd", "ee")
y[["drei"]] = c(TRUE, FALSE, TRUE, FALSE, FALSE)
y
## $one
## [1] 2 3 5
##
## $two
## [1] "aa" "bb" "cc" "dd" "ee"
##
## $drei
## [1] TRUE FALSE TRUE FALSE FALSE
y[["two"]]
## [1] "aa" "bb" "cc" "dd" "ee"
Matrix
- a matrix is a two-dimensional arrangement of objects
- all objects must be of the same type:
mat = matrix(1:12, ncol=3)
mat
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
mat2 <- matrix(letters[1:12], ncol=3, byrow=TRUE)
mat2
## [,1] [,2] [,3]
## [1,] "a" "b" "c"
## [2,] "d" "e" "f"
## [3,] "g" "h" "i"
## [4,] "j" "k" "l"
mat[1,]
## [1] 1 5 9
mat[,2:3]
## [,1] [,2]
## [1,] 5 9
## [2,] 6 10
## [3,] 7 11
## [4,] 8 12
Array
- similar to matrices but can have more than two dimensions see more
arr <- array(1:24, dim=c(3,4,2))
arr
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
Data frame
- a data frame is a two-dimensional arrangement of objects
- all objects within the same column must be of the same type
- however, different columns can have different types
d <- c(1,2,3,4)
e <- c("red", "white", "red", NA)
f <- c(TRUE,TRUE,TRUE,FALSE)
mydata <- data.frame(ID = d, color = e, success = f)
mydata
## ID color success
## 1 1 red TRUE
## 2 2 white TRUE
## 3 3 red TRUE
## 4 4 <NA> FALSE
mydata[1:2]
## ID color
## 1 1 red
## 2 2 white
## 3 3 red
## 4 4 <NA>
mydata[c("ID","color")]
## ID color
## 1 1 red
## 2 2 white
## 3 3 red
## 4 4 <NA>
mydata$ID
## [1] 1 2 3 4
Factor
- a nominal variable can be stored as a factor
- the factor stores the nominal values as a vector of integers
- an internal vector of character strings (the original values) is mapped to these integers
gender <- c(rep("male",20), rep("female", 30))
gender <- factor(gender)
# stores gender as 20 1s and 30 2s and associates
# 1=female, 2=male internally (alphabetically)
summary(gender)
## female male
## 30 20
levels(gender)
## [1] "female" "male"
as.character(gender)
## [1] "male" "male" "male" "male" "male" "male" "male"
## [8] "male" "male" "male" "male" "male" "male" "male"
## [15] "male" "male" "male" "male" "male" "male" "female"
## [22] "female" "female" "female" "female" "female" "female" "female"
## [29] "female" "female" "female" "female" "female" "female" "female"
## [36] "female" "female" "female" "female" "female" "female" "female"
## [43] "female" "female" "female" "female" "female" "female" "female"
## [50] "female"
Functions to explore objects
x <- matrix(1:25, ncol = 5)
colnames(x) = c("a", "b", "c", "d", "e")
rownames(x) = c("gene1", "gene2", "gene3", "gene4", "gene5")
x
## a b c d e
## gene1 1 6 11 16 21
## gene2 2 7 12 17 22
## gene3 3 8 13 18 23
## gene4 4 9 14 19 24
## gene5 5 10 15 20 25
y = c(2, 4, 5, 2, 5, 7)
length(y) # number of elements or components
## [1] 6
str(x) # structure of an object
## int [1:5, 1:5] 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:5] "gene1" "gene2" "gene3" "gene4" ...
## ..$ : chr [1:5] "a" "b" "c" "d" ...
class(x) # class or type of an object
## [1] "matrix"
colnames(x) # names
## [1] "a" "b" "c" "d" "e"
names(y) # names
## NULL
ls() # list current objects
## [1] "aa_seqs" "animals" "arr"
## [4] "b" "bio.tree" "clouds"
## [7] "clres" "colvec" "condition1"
## [10] "condition2" "conditions" "counts"
## [13] "counts.selected" "cres" "d"
## [16] "de" "design" "detags"
## [19] "df" "dge" "distance"
## [22] "dna" "dna2" "dna3"
## [25] "dna_seqs" "e" "et"
## [28] "f" "fastafile" "gender"
## [31] "InsectSprays" "maintxt" "mat"
## [34] "mat2" "mean.values" "midpoints"
## [37] "mydata" "n" "nr"
## [40] "op" "pinched_data" "res"
## [43] "s" "sets" "sets1"
## [46] "stacked" "std.dev" "timedata"
## [49] "tree.nj" "tuk" "vector.c"
## [52] "vector.r" "vector.s" "x"
## [55] "y" "ylim" "z"
rm(animals) # delete an object
# newobject <- edit(x) edit copy and save as newobject
# fix(x) edit in place