1. What is the Baum-Welch algorithm?

2. Example: Dishonest casino

library(HMM)
## Warning: package 'HMM' was built under R version 3.5.2
states = c("F", "L")  
symbols = 1:6          

3. Getting some observation

part1 = sample(symbols, 300, replace = TRUE)                            # fair
part2 = sample(symbols, 300, replace = TRUE, prob = c(rep(0.1,5), 0.5)) # loaded
part3 = sample(symbols, 300, replace = TRUE)                            # fair
observation <- c(part1, part2, part1)
observation
##   [1] 1 3 2 1 2 1 1 3 6 2 5 2 3 3 4 5 2 4 5 3 3 1 2 5 1 1 3 3 1 4 5 6 4 2 6
##  [36] 5 4 3 3 5 1 2 1 6 1 5 3 5 6 1 3 3 5 1 6 1 5 3 4 5 6 5 5 3 3 3 4 1 1 6
##  [71] 5 6 3 5 6 6 4 1 2 1 6 5 2 5 1 3 5 3 2 3 5 1 4 5 3 4 3 3 3 4 5 5 3 1 1
## [106] 5 2 1 4 5 6 2 6 6 4 4 5 2 1 4 2 3 6 2 2 3 1 2 1 4 1 5 1 5 5 2 3 3 6 6
## [141] 6 4 1 4 4 1 6 4 1 6 2 3 6 2 4 2 6 3 3 1 1 3 1 2 1 6 1 6 3 5 2 5 3 1 1
## [176] 5 5 2 4 2 1 3 1 3 6 1 5 6 3 2 3 6 3 3 2 5 5 2 3 4 4 3 4 4 6 2 2 2 4 5
## [211] 5 4 1 3 1 4 3 3 6 1 2 3 1 6 5 4 1 6 2 6 6 2 6 5 4 3 5 4 6 3 1 6 2 5 1
## [246] 5 3 1 5 6 1 1 3 2 4 3 4 6 1 2 3 3 6 4 5 5 6 3 5 6 3 2 1 3 3 3 2 1 5 2
## [281] 5 5 6 3 6 5 6 2 5 6 1 5 4 2 6 4 3 4 1 2 6 2 2 2 1 6 6 6 5 4 6 6 5 6 6
## [316] 6 6 3 6 6 1 6 6 3 2 6 3 4 6 5 5 6 4 4 6 5 6 2 6 6 1 2 5 6 4 6 6 6 6 6
## [351] 3 6 2 4 6 5 6 4 3 6 4 6 2 2 3 4 6 6 2 6 3 5 5 6 6 6 5 5 6 3 6 6 6 4 4
## [386] 6 6 5 3 6 6 6 6 4 6 6 5 6 3 4 6 6 5 3 6 6 4 6 6 6 5 6 5 4 4 6 6 6 6 6
## [421] 5 6 2 3 5 4 6 4 6 6 1 6 6 6 3 6 6 6 6 6 6 6 6 5 6 3 4 4 4 5 6 6 6 6 6
## [456] 2 3 1 6 6 6 1 6 2 6 6 5 6 6 3 6 6 6 6 5 2 6 1 3 6 6 6 4 6 4 3 6 6 3 5
## [491] 5 6 1 6 3 6 5 3 6 6 6 2 6 6 4 2 6 6 6 5 6 6 6 5 1 2 6 6 6 6 6 2 6 6 6
## [526] 6 6 6 6 6 6 6 1 3 6 4 6 2 6 2 1 2 6 4 6 2 6 1 6 6 3 6 3 4 5 4 6 3 5 6
## [561] 5 6 5 6 3 6 2 6 6 5 2 3 6 6 6 6 3 3 5 6 6 3 6 5 4 2 4 4 6 6 6 6 6 6 5
## [596] 6 2 4 2 6 1 3 2 1 2 1 1 3 6 2 5 2 3 3 4 5 2 4 5 3 3 1 2 5 1 1 3 3 1 4
## [631] 5 6 4 2 6 5 4 3 3 5 1 2 1 6 1 5 3 5 6 1 3 3 5 1 6 1 5 3 4 5 6 5 5 3 3
## [666] 3 4 1 1 6 5 6 3 5 6 6 4 1 2 1 6 5 2 5 1 3 5 3 2 3 5 1 4 5 3 4 3 3 3 4
## [701] 5 5 3 1 1 5 2 1 4 5 6 2 6 6 4 4 5 2 1 4 2 3 6 2 2 3 1 2 1 4 1 5 1 5 5
## [736] 2 3 3 6 6 6 4 1 4 4 1 6 4 1 6 2 3 6 2 4 2 6 3 3 1 1 3 1 2 1 6 1 6 3 5
## [771] 2 5 3 1 1 5 5 2 4 2 1 3 1 3 6 1 5 6 3 2 3 6 3 3 2 5 5 2 3 4 4 3 4 4 6
## [806] 2 2 2 4 5 5 4 1 3 1 4 3 3 6 1 2 3 1 6 5 4 1 6 2 6 6 2 6 5 4 3 5 4 6 3
## [841] 1 6 2 5 1 5 3 1 5 6 1 1 3 2 4 3 4 6 1 2 3 3 6 4 5 5 6 3 5 6 3 2 1 3 3
## [876] 3 2 1 5 2 5 5 6 3 6 5 6 2 5 6 1 5 4 2 6 4 3 4 1 2

4. Initialization of the transition-, emission- , and start probabilities

trans_prob =  matrix(c(0.95, 0.05, 0.1, 0.9), byrow = TRUE, nrow = 2)            # transition probabilities
rownames(trans_prob) = states
colnames(trans_prob) = states
rowSums(trans_prob)   # must be all one (stochastic matrix)
## F L 
## 1 1
trans_prob
##      F    L
## F 0.95 0.05
## L 0.10 0.90
emission_prob = matrix(c(rep(1/6, 6), rep(0.1, 5), 0.5), byrow = TRUE, nrow = 2)  # emission probabilities
rownames(emission_prob) = states
colnames(emission_prob) = symbols
rowSums(emission_prob)                   # must be all one (stochastic matrix)
## F L 
## 1 1
emission_prob
##           1         2         3         4         5         6
## F 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## L 0.1000000 0.1000000 0.1000000 0.1000000 0.1000000 0.5000000
start_prob = c(0.5, 0.5)                 # if we have no clue how it starts 
names(start_prob) = states
sum(start_prob)
## [1] 1
start_prob
##   F   L 
## 0.5 0.5

5. Running Baum-Welch

hmm = initHMM(states, symbols, startProbs = start_prob, transProbs = trans_prob, emissionProbs = emission_prob)
estimated_hmm <- baumWelch(hmm, observation, maxIterations = 100, delta = 1E-9, pseudoCount = 0) 
round(estimated_hmm$hmm$transProbs, 4) 
##     to
## from      F      L
##    F 0.9983 0.0017
##    L 0.0036 0.9964
round(estimated_hmm$hmm$emissionProbs, 4)
##       symbols
## states      1      2      3      4      5      6
##      F 0.1856 0.1507 0.2044 0.1261 0.1749 0.1584
##      L 0.0392 0.0806 0.1025 0.1075 0.1190 0.5512
# estimated_hmm$difference  #  sum of the distances between consecutive transition and emission matrices (L2-norm)
plot(estimated_hmm$difference, ylab = "Distance L2", xlab = "iteration-nr.", col = "red")  


uwe.menzel@matstat.org