## Vector

• vectors are the most basic data structure
• can be created in several ways:
# with the concatenation function c()
vector.c <- c(1, 2, 3, 4, 5)
vector.c
## [1] 1 2 3 4 5
animals <- c("cat", "sheep", "dog")
animals
## [1] "cat"   "sheep" "dog"
# with the sequence notation
vector.s <- 1:5
vector.s
## [1] 1 2 3 4 5
# with the rep() function
vector.r <- rep(1,5)
vector.r
## [1] 1 1 1 1 1

## List

• a list can contain objects of different type
• lists can be unnamed or named:
n = c(2, 3, 5)
s = c("aa", "bb", "cc", "dd", "ee")
b = c(TRUE, FALSE, TRUE, FALSE, FALSE)
x = list(n, s, b, 3)   # x contains copies of n, s, b
x
## [[1]]
## [1] 2 3 5
##
## [[2]]
## [1] "aa" "bb" "cc" "dd" "ee"
##
## [[3]]
## [1]  TRUE FALSE  TRUE FALSE FALSE
##
## [[4]]
## [1] 3
x[[1]]
## [1] 2 3 5
y = list()
y[["one"]] =  c(2, 3, 5)
y[["two"]] = c("aa", "bb", "cc", "dd", "ee")
y[["drei"]] = c(TRUE, FALSE, TRUE, FALSE, FALSE)
y
## $one ## [1] 2 3 5 ## ##$two
## [1] "aa" "bb" "cc" "dd" "ee"
##
## $drei ## [1] TRUE FALSE TRUE FALSE FALSE y[["two"]] ## [1] "aa" "bb" "cc" "dd" "ee" ## Matrix • a matrix is a two-dimensional arrangement of objects • all objects must be of the same type: mat = matrix(1:12, ncol=3) mat ## [,1] [,2] [,3] ## [1,] 1 5 9 ## [2,] 2 6 10 ## [3,] 3 7 11 ## [4,] 4 8 12 mat2 <- matrix(letters[1:12], ncol=3, byrow=TRUE) mat2 ## [,1] [,2] [,3] ## [1,] "a" "b" "c" ## [2,] "d" "e" "f" ## [3,] "g" "h" "i" ## [4,] "j" "k" "l" mat[1,] ## [1] 1 5 9 mat[,2:3] ## [,1] [,2] ## [1,] 5 9 ## [2,] 6 10 ## [3,] 7 11 ## [4,] 8 12 ## Array • similar to matrices but can have more than two dimensions see more arr <- array(1:24, dim=c(3,4,2)) arr ## , , 1 ## ## [,1] [,2] [,3] [,4] ## [1,] 1 4 7 10 ## [2,] 2 5 8 11 ## [3,] 3 6 9 12 ## ## , , 2 ## ## [,1] [,2] [,3] [,4] ## [1,] 13 16 19 22 ## [2,] 14 17 20 23 ## [3,] 15 18 21 24 ## Data frame • a data frame is a two-dimensional arrangement of objects • all objects within the same column must be of the same type • however, different columns can have different types d <- c(1,2,3,4) e <- c("red", "white", "red", NA) f <- c(TRUE,TRUE,TRUE,FALSE) mydata <- data.frame(ID = d, color = e, success = f) mydata ## ID color success ## 1 1 red TRUE ## 2 2 white TRUE ## 3 3 red TRUE ## 4 4 <NA> FALSE mydata[1:2]  ## ID color ## 1 1 red ## 2 2 white ## 3 3 red ## 4 4 <NA> mydata[c("ID","color")] ## ID color ## 1 1 red ## 2 2 white ## 3 3 red ## 4 4 <NA> mydata$ID 
## [1] 1 2 3 4

## Factor

• a nominal variable can be stored as a factor
• the factor stores the nominal values as a vector of integers
• an internal vector of character strings (the original values) is mapped to these integers
gender <- c(rep("male",20), rep("female", 30))
gender <- factor(gender)
# stores gender as 20 1s and 30 2s and associates
# 1=female, 2=male internally (alphabetically)
summary(gender) 
## female   male
##     30     20
levels(gender)
## [1] "female" "male"
as.character(gender)
##  [1] "male"   "male"   "male"   "male"   "male"   "male"   "male"
##  [8] "male"   "male"   "male"   "male"   "male"   "male"   "male"
## [15] "male"   "male"   "male"   "male"   "male"   "male"   "female"
## [22] "female" "female" "female" "female" "female" "female" "female"
## [29] "female" "female" "female" "female" "female" "female" "female"
## [36] "female" "female" "female" "female" "female" "female" "female"
## [43] "female" "female" "female" "female" "female" "female" "female"
## [50] "female"

# Functions to explore objects

x <- matrix(1:25, ncol = 5)
colnames(x) = c("a", "b", "c", "d", "e")
rownames(x) = c("gene1", "gene2", "gene3", "gene4", "gene5")
x
##       a  b  c  d  e
## gene1 1  6 11 16 21
## gene2 2  7 12 17 22
## gene3 3  8 13 18 23
## gene4 4  9 14 19 24
## gene5 5 10 15 20 25
y = c(2, 4, 5, 2, 5, 7)
length(y)      # number of elements or components
## [1] 6
str(x)         # structure of an object
##  int [1:5, 1:5] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "dimnames")=List of 2
##   ..$: chr [1:5] "gene1" "gene2" "gene3" "gene4" ... ## ..$ : chr [1:5] "a" "b" "c" "d" ...
class(x)       # class or type of an object
## [1] "matrix"
colnames(x)    # names
## [1] "a" "b" "c" "d" "e"
names(y)       # names
## NULL
ls()           # list current objects
##  [1] "aa_seqs"         "animals"         "arr"
##  [4] "b"               "bio.tree"        "clouds"
##  [7] "clres"           "colvec"          "condition1"
## [10] "condition2"      "conditions"      "counts"
## [13] "counts.selected" "cres"            "d"
## [16] "de"              "design"          "detags"
## [19] "df"              "dge"             "distance"
## [22] "dna"             "dna2"            "dna3"
## [25] "dna_seqs"        "e"               "et"
## [28] "f"               "fastafile"       "gender"
## [31] "InsectSprays"    "maintxt"         "mat"
## [34] "mat2"            "mean.values"     "midpoints"
## [37] "mydata"          "n"               "nr"
## [40] "op"              "pinched_data"    "res"
## [43] "s"               "sets"            "sets1"
## [46] "stacked"         "std.dev"         "timedata"
## [49] "tree.nj"         "tuk"             "vector.c"
## [52] "vector.r"        "vector.s"        "x"
## [55] "y"               "ylim"            "z"
rm(animals)    # delete an object
# newobject <- edit(x)  edit copy and save as newobject
# fix(x)                edit in place