Decision Trees

Training set: Edgar Anderson’s Iris Data


Iris versicolor


str(iris)  
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
head(iris, 10)  # data: column 1 to 4 ;  attribute: column 5 
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa
levels(iris[,5])
## [1] "setosa"     "versicolor" "virginica"

Constructing the Decision Tree

library(RWeka)
## Warning: package 'RWeka' was built under R version 3.5.3
tree <- J48(Species ~ ., data = iris)
print(tree)
## J48 pruned tree
## ------------------
## 
## Petal.Width <= 0.6: setosa (50.0)
## Petal.Width > 0.6
## |   Petal.Width <= 1.7
## |   |   Petal.Length <= 4.9: versicolor (48.0/1.0)
## |   |   Petal.Length > 4.9
## |   |   |   Petal.Width <= 1.5: virginica (3.0)
## |   |   |   Petal.Width > 1.5: versicolor (3.0/1.0)
## |   Petal.Width > 1.7: virginica (46.0/1.0)
## 
## Number of Leaves  :  5
## 
## Size of the tree :   9
library(rpart)  # for plotting the tree
plot(tree)  

Interpretation of the results


Checking the content of node 2:
criterion = iris[,4] <= 0.6                # logical array
table(iris[criterion,]$Species) 
## 
##     setosa versicolor  virginica 
##         50          0          0
Checking the content of node 5:
criterion = (iris$Petal.Width > 0.6) & (iris$Petal.Width <= 1.7) & (iris$Petal.Length <= 4.9)
table(iris[criterion,]$Species)  
## 
##     setosa versicolor  virginica 
##          0         47          1
Checking the content of node 7:
with(iris,
    {criterion = (Petal.Width > 0.6) & (Petal.Width <= 1.7) & (Petal.Length > 4.9) & (Petal.Width <= 1.5)
    table(iris[criterion,]$Species)} 
)
## 
##     setosa versicolor  virginica 
##          0          0          3
Checking the content of node 8:
with(iris,
    {criterion = (Petal.Width > 0.6) & (Petal.Width <= 1.7) & (Petal.Length > 4.9) & (Petal.Width > 1.5)
    table(iris[criterion,]$Species)} 
)
## 
##     setosa versicolor  virginica 
##          0          2          1
Checking the content of node 9:
criterion = iris$Petal.Width > 1.7 
table(iris[criterion,]$Species) 
## 
##     setosa versicolor  virginica 
##          0          1         45

Validation of the model


# summary(tree)  # also possible
valid <- evaluate_Weka_classifier(tree, numFolds = 10)
valid
## === 10 Fold Cross Validation ===
## 
## === Summary ===
## 
## Correctly Classified Instances         140               93.3333 %
## Incorrectly Classified Instances        10                6.6667 %
## Kappa statistic                          0.9   
## Mean absolute error                      0.0533
## Root mean squared error                  0.2028
## Relative absolute error                 11.996  %
## Root relative squared error             43.0252 %
## Total Number of Instances              150     
## 
## === Confusion Matrix ===
## 
##   a  b  c   <-- classified as
##  49  1  0 |  a = setosa
##   0 45  5 |  b = versicolor
##   0  4 46 |  c = virginica

A new dataset for prediction

dim(iris[,1:4])
## [1] 150   4
trafo = matrix(runif(600, min = 0.5, max = 1.5), ncol = 4)
new.data = as.data.frame(iris[,1:4] * trafo)
# sum(new.data < 0) == 0  # TRUE
head(new.data, 10)
##    Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1      4.832716    5.122523    2.0991858  0.17564576
## 2      2.854215    1.874564    1.6270647  0.24534582
## 3      5.478056    2.248085    1.6337735  0.25635555
## 4      4.412058    2.095338    1.5231784  0.15873155
## 5      3.936727    5.347617    1.8457398  0.20578437
## 6      2.951850    4.738964    2.2712688  0.45398926
## 7      3.752095    2.697692    1.3381425  0.19033576
## 8      4.587077    3.397883    1.9045512  0.25682642
## 9      6.030708    1.860872    0.7206633  0.11685948
## 10     6.396816    3.502716    2.0242492  0.09565466

Prediction using the Decision tree

prediction <- predict(tree, newdata = new.data)
str(prediction)
##  Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
result1 = cbind(new.data, prediction)
result1
##     Sepal.Length Sepal.Width Petal.Length Petal.Width prediction
## 1       4.832716    5.122523    2.0991858  0.17564576     setosa
## 2       2.854215    1.874564    1.6270647  0.24534582     setosa
## 3       5.478056    2.248085    1.6337735  0.25635555     setosa
## 4       4.412058    2.095338    1.5231784  0.15873155     setosa
## 5       3.936727    5.347617    1.8457398  0.20578437     setosa
## 6       2.951850    4.738964    2.2712688  0.45398926     setosa
## 7       3.752095    2.697692    1.3381425  0.19033576     setosa
## 8       4.587077    3.397883    1.9045512  0.25682642     setosa
## 9       6.030708    1.860872    0.7206633  0.11685948     setosa
## 10      6.396816    3.502716    2.0242492  0.09565466     setosa
## 11      5.943058    4.072902    2.2274814  0.29316926     setosa
## 12      3.016985    1.821694    2.3393281  0.16772238     setosa
## 13      4.804658    1.826393    1.2735984  0.06542385     setosa
## 14      4.860173    3.780216    1.1170545  0.05857827     setosa
## 15      6.570960    3.681125    1.0500925  0.24184318     setosa
## 16      3.530287    2.724209    1.6194209  0.28702872     setosa
## 17      4.986281    5.834168    0.9886306  0.56685548     setosa
## 18      3.286190    2.676965    0.8913875  0.21118476     setosa
## 19      7.687908    4.891066    1.7007642  0.36118353     setosa
## 20      2.738755    3.124861    0.8124252  0.17246026     setosa
## 21      5.062289    3.345152    1.2242594  0.24436398     setosa
## 22      4.058749    5.204812    0.8351260  0.49736829     setosa
## 23      2.850975    5.251906    1.3364728  0.10125522     setosa
## 24      2.779614    3.081571    2.1900976  0.38634188     setosa
## 25      2.937637    2.331701    1.4850430  0.17474287     setosa
## 26      5.315325    3.624403    1.4020883  0.20023908     setosa
## 27      6.166012    3.409298    2.1961946  0.21572545     setosa
## 28      6.244989    1.873612    0.9203357  0.24105894     setosa
## 29      3.639132    1.885118    1.8450813  0.15824337     setosa
## 30      5.853805    4.299255    0.9571473  0.19648223     setosa
## 31      5.919919    2.759024    2.0954649  0.29200994     setosa
## 32      2.833008    1.901636    1.8998995  0.45850322     setosa
## 33      4.992551    3.225702    1.0551993  0.06531902     setosa
## 34      6.593400    5.713773    2.0325360  0.20341695     setosa
## 35      3.111231    2.841053    2.1874752  0.20881520     setosa
## 36      7.004825    3.311882    0.6576265  0.23245626     setosa
## 37      4.031835    3.261548    0.7743620  0.21463489     setosa
## 38      6.141885    2.077962    0.7858660  0.09718561     setosa
## 39      3.777757    4.234628    1.4579466  0.13455255     setosa
## 40      7.051590    4.191850    1.0865406  0.29689719     setosa
## 41      5.765006    4.701749    0.9577343  0.30544897     setosa
## 42      5.779574    1.235240    0.9728323  0.30412761     setosa
## 43      4.279601    4.330575    1.2167328  0.23049953     setosa
## 44      2.591314    2.878865    2.3927535  0.58061333     setosa
## 45      6.763196    4.990708    2.2109211  0.22348202     setosa
## 46      7.186208    2.160478    1.7440791  0.26029225     setosa
## 47      5.282986    3.812588    1.4221341  0.27726634     setosa
## 48      3.265226    1.956757    1.8479365  0.12266365     setosa
## 49      4.762972    3.543531    0.9891561  0.19297598     setosa
## 50      3.345149    3.448123    1.9302926  0.24637028     setosa
## 51      8.742657    3.841667    5.1695992  1.25692878  virginica
## 52      3.844931    2.386025    5.6622839  0.84952889  virginica
## 53      8.718737    2.379241    5.2242200  2.13433535  virginica
## 54      4.234440    3.037695    4.4805097  1.10476524 versicolor
## 55      4.959848    2.769440    4.2522363  2.18465440  virginica
## 56      6.045163    2.012805    3.0917925  0.86792880 versicolor
## 57      3.737477    3.915816    3.5859034  1.73440388  virginica
## 58      5.670866    2.028810    2.6499562  0.61750846 versicolor
## 59      4.202744    1.856623    3.6645251  1.68217684 versicolor
## 60      4.379822    2.718736    4.9990942  1.99128899  virginica
## 61      3.033290    2.297334    4.7318999  1.11315215 versicolor
## 62      8.786806    3.850833    4.3541196  1.47663198 versicolor
## 63      8.240229    3.285175    2.8399583  0.76385861 versicolor
## 64      6.446854    3.641839    3.7240133  0.82757080 versicolor
## 65      7.001825    2.267892    5.0228502  1.32203644  virginica
## 66      5.451719    2.590259    6.1478981  2.03299208  virginica
## 67      3.405662    1.791855    6.0851748  1.95148766  virginica
## 68      5.723218    3.768938    3.3760824  1.13247269 versicolor
## 69      4.780760    2.790003    2.6960507  2.20740653  virginica
## 70      5.272699    1.851365    3.8916031  0.98068134 versicolor
## 71      8.816352    3.132248    4.9889977  2.63767410  virginica
## 72      5.350125    1.986364    4.7478489  0.67025457 versicolor
## 73      4.203775    1.654432    2.5738289  1.58543463 versicolor
## 74      6.409963    2.610098    6.2135779  0.93882719  virginica
## 75      8.240457    3.964222    2.1895989  0.95691883 versicolor
## 76      8.762775    3.996914    3.9247762  0.87483287 versicolor
## 77      7.255909    2.319953    6.8492307  0.82336750  virginica
## 78      8.099363    4.080294    4.0517736  0.96917620 versicolor
## 79      4.820584    2.860442    5.6377012  1.86838030  virginica
## 80      6.318633    3.722013    2.0366081  0.79574199 versicolor
## 81      8.203643    3.057393    2.6312645  0.65870745 versicolor
## 82      4.517657    1.725634    3.4318489  1.46491500 versicolor
## 83      6.165700    3.630446    3.6515013  0.85046646 versicolor
## 84      8.763337    1.801799    6.5221238  1.37847329  virginica
## 85      7.194513    2.411695    3.4572395  1.84675892  virginica
## 86      6.482665    4.755928    3.7126390  2.28194465  virginica
## 87      6.467284    2.148698    5.7219687  1.84450568  virginica
## 88      8.776032    2.581382    5.2524546  1.46669481  virginica
## 89      8.262017    4.040443    5.2959714  0.83156829  virginica
## 90      3.284570    3.080284    2.3295676  0.81942775 versicolor
## 91      6.778975    1.682780    4.6253215  0.90405001 versicolor
## 92      8.115952    4.251712    5.1677275  1.32486388  virginica
## 93      6.299611    2.183056    3.9932239  0.61342497 versicolor
## 94      6.430682    2.853464    3.1478331  0.50764713     setosa
## 95      4.034101    2.948159    2.6495131  1.77721366  virginica
## 96      4.741147    3.573355    3.5776308  0.84312842 versicolor
## 97      3.677493    1.619995    2.4604212  1.52472772 versicolor
## 98      5.631430    4.214066    5.8615448  1.34765434  virginica
## 99      4.670305    3.000035    3.8492299  1.08718694 versicolor
## 100     3.406271    2.603243    4.7887187  1.83460089  virginica
## 101     5.412494    2.768269    3.9176295  1.82598282  virginica
## 102     4.174296    3.220575    5.3573717  2.50845081  virginica
## 103     8.916831    3.496705    5.3443078  2.69107179  virginica
## 104     3.408714    2.119770    7.6363355  2.42231125  virginica
## 105     7.693003    1.578378    7.5433542  3.11401597  virginica
## 106     6.943272    1.502315    9.4540851  2.33190844  virginica
## 107     6.761995    3.610559    4.9122007  2.11011187  virginica
## 108     9.880241    1.521159    4.2071940  1.68435920 versicolor
## 109     5.090855    2.899962    5.2489287  0.97091766  virginica
## 110     7.846517    2.219277    4.7105649  3.27590621  virginica
## 111     5.982181    2.407672    4.2231867  2.93446542  virginica
## 112     8.293309    1.937653    6.9035730  2.71335159  virginica
## 113     5.760098    2.710019    7.9684022  2.76157754  virginica
## 114     5.466427    2.674704    2.9973433  2.48488719  virginica
## 115     5.937860    3.563594    3.5523909  1.69759229 versicolor
## 116     3.546129    4.297026    4.9848704  2.38481172  virginica
## 117     8.346896    4.192600    4.8138415  1.48472573 versicolor
## 118     6.070196    4.624252    9.6513670  1.29250449  virginica
## 119     5.432680    1.780398    8.4202676  1.33046022  virginica
## 120     6.727776    2.604435    6.4902592  0.76100906  virginica
## 121     5.040791    3.815913    7.7680044  2.81599959  virginica
## 122     5.568607    1.552100    3.7793098  2.44377175  virginica
## 123     5.008661    2.306757    6.9837526  1.60726194 versicolor
## 124     9.442820    3.073266    4.4182416  1.10672933 versicolor
## 125     8.048954    3.674011    5.8599089  2.43866647  virginica
## 126     4.753932    2.620837    5.8422255  2.38860098  virginica
## 127     3.167409    3.566129    5.8160231  1.78234452  virginica
## 128     8.856527    1.842868    5.7695312  1.58478304 versicolor
## 129     6.757927    1.775758    6.2410990  1.59086761 versicolor
## 130     6.097694    2.542240    7.6771751  1.83585701  virginica
## 131     6.231776    2.412246    6.9606010  1.67937293 versicolor
## 132     7.042629    2.485692    7.6061105  1.74705897  virginica
## 133     8.894703    3.150081    5.6677920  2.95348354  virginica
## 134     7.775730    2.251343    3.5754494  1.15741537 versicolor
## 135     6.967865    3.566389    3.7665705  0.97070322 versicolor
## 136    11.123091    4.148667    5.1356627  2.09005688  virginica
## 137     5.032044    2.578277    8.0075962  3.40275984  virginica
## 138     5.656349    3.831401    4.3073856  1.62953249 versicolor
## 139     7.479488    3.118329    4.4863036  2.17516611  virginica
## 140     3.989887    1.930024    2.7528994  2.47162337  virginica
## 141     6.985691    1.743139    5.7631909  2.69427229  virginica
## 142     8.166098    2.387197    4.4143853  2.80877725  virginica
## 143     6.274991    3.370998    7.1657572  1.86624777  virginica
## 144     3.728006    2.090939    6.5733810  2.26185388  virginica
## 145     9.950141    4.274248    6.3119399  1.89759800  virginica
## 146     4.015230    3.306560    5.6319216  2.86518737  virginica
## 147     5.332751    1.825646    7.3790063  1.07590510  virginica
## 148     4.642727    2.002926    7.5637778  2.69859480  virginica
## 149     4.399559    2.964567    4.4050450  3.23821013  virginica
## 150     6.397060    1.586019    2.6698864  2.16664270  virginica
result2 <- predict(tree, newdata = new.data, type = "probability")
result2
##     setosa versicolor  virginica
## 1        1 0.00000000 0.00000000
## 2        1 0.00000000 0.00000000
## 3        1 0.00000000 0.00000000
## 4        1 0.00000000 0.00000000
## 5        1 0.00000000 0.00000000
## 6        1 0.00000000 0.00000000
## 7        1 0.00000000 0.00000000
## 8        1 0.00000000 0.00000000
## 9        1 0.00000000 0.00000000
## 10       1 0.00000000 0.00000000
## 11       1 0.00000000 0.00000000
## 12       1 0.00000000 0.00000000
## 13       1 0.00000000 0.00000000
## 14       1 0.00000000 0.00000000
## 15       1 0.00000000 0.00000000
## 16       1 0.00000000 0.00000000
## 17       1 0.00000000 0.00000000
## 18       1 0.00000000 0.00000000
## 19       1 0.00000000 0.00000000
## 20       1 0.00000000 0.00000000
## 21       1 0.00000000 0.00000000
## 22       1 0.00000000 0.00000000
## 23       1 0.00000000 0.00000000
## 24       1 0.00000000 0.00000000
## 25       1 0.00000000 0.00000000
## 26       1 0.00000000 0.00000000
## 27       1 0.00000000 0.00000000
## 28       1 0.00000000 0.00000000
## 29       1 0.00000000 0.00000000
## 30       1 0.00000000 0.00000000
## 31       1 0.00000000 0.00000000
## 32       1 0.00000000 0.00000000
## 33       1 0.00000000 0.00000000
## 34       1 0.00000000 0.00000000
## 35       1 0.00000000 0.00000000
## 36       1 0.00000000 0.00000000
## 37       1 0.00000000 0.00000000
## 38       1 0.00000000 0.00000000
## 39       1 0.00000000 0.00000000
## 40       1 0.00000000 0.00000000
## 41       1 0.00000000 0.00000000
## 42       1 0.00000000 0.00000000
## 43       1 0.00000000 0.00000000
## 44       1 0.00000000 0.00000000
## 45       1 0.00000000 0.00000000
## 46       1 0.00000000 0.00000000
## 47       1 0.00000000 0.00000000
## 48       1 0.00000000 0.00000000
## 49       1 0.00000000 0.00000000
## 50       1 0.00000000 0.00000000
## 51       0 0.00000000 1.00000000
## 52       0 0.00000000 1.00000000
## 53       0 0.02173913 0.97826087
## 54       0 0.97916667 0.02083333
## 55       0 0.02173913 0.97826087
## 56       0 0.97916667 0.02083333
## 57       0 0.02173913 0.97826087
## 58       0 0.97916667 0.02083333
## 59       0 0.97916667 0.02083333
## 60       0 0.02173913 0.97826087
## 61       0 0.97916667 0.02083333
## 62       0 0.97916667 0.02083333
## 63       0 0.97916667 0.02083333
## 64       0 0.97916667 0.02083333
## 65       0 0.00000000 1.00000000
## 66       0 0.02173913 0.97826087
## 67       0 0.02173913 0.97826087
## 68       0 0.97916667 0.02083333
## 69       0 0.02173913 0.97826087
## 70       0 0.97916667 0.02083333
## 71       0 0.02173913 0.97826087
## 72       0 0.97916667 0.02083333
## 73       0 0.97916667 0.02083333
## 74       0 0.00000000 1.00000000
## 75       0 0.97916667 0.02083333
## 76       0 0.97916667 0.02083333
## 77       0 0.00000000 1.00000000
## 78       0 0.97916667 0.02083333
## 79       0 0.02173913 0.97826087
## 80       0 0.97916667 0.02083333
## 81       0 0.97916667 0.02083333
## 82       0 0.97916667 0.02083333
## 83       0 0.97916667 0.02083333
## 84       0 0.00000000 1.00000000
## 85       0 0.02173913 0.97826087
## 86       0 0.02173913 0.97826087
## 87       0 0.02173913 0.97826087
## 88       0 0.00000000 1.00000000
## 89       0 0.00000000 1.00000000
## 90       0 0.97916667 0.02083333
## 91       0 0.97916667 0.02083333
## 92       0 0.00000000 1.00000000
## 93       0 0.97916667 0.02083333
## 94       1 0.00000000 0.00000000
## 95       0 0.02173913 0.97826087
## 96       0 0.97916667 0.02083333
## 97       0 0.97916667 0.02083333
## 98       0 0.00000000 1.00000000
## 99       0 0.97916667 0.02083333
## 100      0 0.02173913 0.97826087
## 101      0 0.02173913 0.97826087
## 102      0 0.02173913 0.97826087
## 103      0 0.02173913 0.97826087
## 104      0 0.02173913 0.97826087
## 105      0 0.02173913 0.97826087
## 106      0 0.02173913 0.97826087
## 107      0 0.02173913 0.97826087
## 108      0 0.97916667 0.02083333
## 109      0 0.00000000 1.00000000
## 110      0 0.02173913 0.97826087
## 111      0 0.02173913 0.97826087
## 112      0 0.02173913 0.97826087
## 113      0 0.02173913 0.97826087
## 114      0 0.02173913 0.97826087
## 115      0 0.97916667 0.02083333
## 116      0 0.02173913 0.97826087
## 117      0 0.97916667 0.02083333
## 118      0 0.00000000 1.00000000
## 119      0 0.00000000 1.00000000
## 120      0 0.00000000 1.00000000
## 121      0 0.02173913 0.97826087
## 122      0 0.02173913 0.97826087
## 123      0 0.66666667 0.33333333
## 124      0 0.97916667 0.02083333
## 125      0 0.02173913 0.97826087
## 126      0 0.02173913 0.97826087
## 127      0 0.02173913 0.97826087
## 128      0 0.66666667 0.33333333
## 129      0 0.66666667 0.33333333
## 130      0 0.02173913 0.97826087
## 131      0 0.66666667 0.33333333
## 132      0 0.02173913 0.97826087
## 133      0 0.02173913 0.97826087
## 134      0 0.97916667 0.02083333
## 135      0 0.97916667 0.02083333
## 136      0 0.02173913 0.97826087
## 137      0 0.02173913 0.97826087
## 138      0 0.97916667 0.02083333
## 139      0 0.02173913 0.97826087
## 140      0 0.02173913 0.97826087
## 141      0 0.02173913 0.97826087
## 142      0 0.02173913 0.97826087
## 143      0 0.02173913 0.97826087
## 144      0 0.02173913 0.97826087
## 145      0 0.02173913 0.97826087
## 146      0 0.02173913 0.97826087
## 147      0 0.00000000 1.00000000
## 148      0 0.02173913 0.97826087
## 149      0 0.02173913 0.97826087
## 150      0 0.02173913 0.97826087

Drawback of Decision Tree models


uwe.menzel@matstat.org