Basic of R Session 21.2- Support Vector Machine- caret package

Dr Manohar Kapse

rm(list=ls())

#using the iris daat set
data1<-iris
str(data1)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

dim(data1)

## [1] 150   5

summary(data1)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

# plot the data points if error use dev.off()

library(ggplot2)

ggplot(data1, aes(data1$Sepal.Length,data1$Sepal.Width, color=data1$Species))+ geom_point()

ggplot(data1, aes(data1$Petal.Length,data1$Petal.Width, color=data1$Species))+ geom_point()

ggplot(data1, aes(data1$Sepal.Length,data1$Petal.Length, color=data1$Species))+ geom_point()

ggplot(data1, aes(data1$Petal.Length,data1$Sepal.Width, color=data1$Species))+ geom_point()

using caret

methods for svm in carets-> svmLinear, svmPoly, svmRadial

library(caret)
dtm1<-train(Species~., data = data1, 
            method="svmLinear",
            preProcess = c("scale"))

dtm1

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9675266  0.9506984
## 
## Tuning parameter 'C' was held constant at a value of 1

print(dtm1)

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9675266  0.9506984
## 
## Tuning parameter 'C' was held constant at a value of 1

summary(dtm1)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 29 
## 
## Objective Function Value : -0.9818 -0.322 -17.0644 
## Training error : 0.033333

# plot not possible since more then two variables for plotting we use only two variables X1, X2

training data set control parameter

control1<-trainControl(method = "cv", number =10) # 10 fold cross validation
control1<-trainControl(method = "cv", number =5) # 5 fold cross validation

# using complete data set

dtm1<-train(Species~., data = data1,
            method="svmLinear",
            trcontrol=control1,
            preProcess = c("scale"))
dtm1

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9638456  0.9453001
## 
## Tuning parameter 'C' was held constant at a value of 1

print(dtm1)

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9638456  0.9453001
## 
## Tuning parameter 'C' was held constant at a value of 1

summary(dtm1)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 29 
## 
## Objective Function Value : -0.9818 -0.322 -17.0644 
## Training error : 0.033333

control1<-trainControl(method = "repeatedcv", number =10, repeats = 3) #repeated cross validation

# using complete data set

dtm1<-train(Species~., data = data1,
            method="svmLinear",
            trcontrol=control1,
            preProcess = c("scale"))
dtm1

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9602225  0.9397231
## 
## Tuning parameter 'C' was held constant at a value of 1

print(dtm1)

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9602225  0.9397231
## 
## Tuning parameter 'C' was held constant at a value of 1

summary(dtm1)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 29 
## 
## Objective Function Value : -0.9818 -0.322 -17.0644 
## Training error : 0.033333

If predicted values are required use savePredictions = TRUE

ctrl <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)

dtm1<-train(Species~., data = data1,
            method="svmLinear",
            trcontrol=control1,
            preProcess = c("scale"))
dtm1

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9639723  0.9453882
## 
## Tuning parameter 'C' was held constant at a value of 1

print(dtm1)

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9639723  0.9453882
## 
## Tuning parameter 'C' was held constant at a value of 1

summary(dtm1)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 29 
## 
## Objective Function Value : -0.9818 -0.322 -17.0644 
## Training error : 0.033333

using tune grid

set.seed(512)
SVMgrid <- expand.grid(C = c(1,2,3,10))
# using complete data set
dtm1<-train(Species~., data = data1,
            method="svmLinear",
            trcontrol=control1,
            tuneGrid=SVMgrid,
            preProcess = c("scale"))

dtm1

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   C   Accuracy   Kappa    
##    1  0.9554404  0.9326608
##    2  0.9533454  0.9295609
##    3  0.9547179  0.9316088
##   10  0.9548137  0.9316673
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.

print(dtm1)

## Support Vector Machines with Linear Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   C   Accuracy   Kappa    
##    1  0.9554404  0.9326608
##    2  0.9533454  0.9295609
##    3  0.9547179  0.9316088
##   10  0.9548137  0.9316673
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.

summary(dtm1)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 29 
## 
## Objective Function Value : -0.9818 -0.322 -17.0644 
## Training error : 0.033333

# tune length = 10, means we use 10 values of the cost function

svmPoly kernel uses two parameters degree and C- cost

scaling parameter is also used- define, which is used for normalizing the data

```

control1 <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)

dtm1<-train(Species~., data = data1,
            method="svmPoly",
            trcontrol=control1,
            preProcess = c("scale"))
dtm1

## Support Vector Machines with Polynomial Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   degree  scale  C     Accuracy   Kappa     
##   1       0.001  0.25  0.3189350  0.03179580
##   1       0.001  0.50  0.3189350  0.03179580
##   1       0.001  1.00  0.3352224  0.05382717
##   1       0.010  0.25  0.6060400  0.42933891
##   1       0.010  0.50  0.6946539  0.55587515
##   1       0.010  1.00  0.8206702  0.73625618
##   1       0.100  0.25  0.9033585  0.85484912
##   1       0.100  0.50  0.9420969  0.91270983
##   1       0.100  1.00  0.9477396  0.92115842
##   2       0.001  0.25  0.3189350  0.03179580
##   2       0.001  0.50  0.3352224  0.05382717
##   2       0.001  1.00  0.5561180  0.35914942
##   2       0.010  0.25  0.7010685  0.56534163
##   2       0.010  0.50  0.8243618  0.74130498
##   2       0.010  1.00  0.8839544  0.82612438
##   2       0.100  0.25  0.9472588  0.92044753
##   2       0.100  0.50  0.9470714  0.92013650
##   2       0.100  1.00  0.9561435  0.93368563
##   3       0.001  0.25  0.3189350  0.03179580
##   3       0.001  0.50  0.4916925  0.27266504
##   3       0.001  1.00  0.6295076  0.46130918
##   3       0.010  0.25  0.7854196  0.68727525
##   3       0.010  0.50  0.8672595  0.80183892
##   3       0.010  1.00  0.9226673  0.88361825
##   3       0.100  0.25  0.9459481  0.91839940
##   3       0.100  0.50  0.9519643  0.92739431
##   3       0.100  1.00  0.9548726  0.93180227
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 0.1 and C = 1.

print(dtm1)

## Support Vector Machines with Polynomial Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   degree  scale  C     Accuracy   Kappa     
##   1       0.001  0.25  0.3189350  0.03179580
##   1       0.001  0.50  0.3189350  0.03179580
##   1       0.001  1.00  0.3352224  0.05382717
##   1       0.010  0.25  0.6060400  0.42933891
##   1       0.010  0.50  0.6946539  0.55587515
##   1       0.010  1.00  0.8206702  0.73625618
##   1       0.100  0.25  0.9033585  0.85484912
##   1       0.100  0.50  0.9420969  0.91270983
##   1       0.100  1.00  0.9477396  0.92115842
##   2       0.001  0.25  0.3189350  0.03179580
##   2       0.001  0.50  0.3352224  0.05382717
##   2       0.001  1.00  0.5561180  0.35914942
##   2       0.010  0.25  0.7010685  0.56534163
##   2       0.010  0.50  0.8243618  0.74130498
##   2       0.010  1.00  0.8839544  0.82612438
##   2       0.100  0.25  0.9472588  0.92044753
##   2       0.100  0.50  0.9470714  0.92013650
##   2       0.100  1.00  0.9561435  0.93368563
##   3       0.001  0.25  0.3189350  0.03179580
##   3       0.001  0.50  0.4916925  0.27266504
##   3       0.001  1.00  0.6295076  0.46130918
##   3       0.010  0.25  0.7854196  0.68727525
##   3       0.010  0.50  0.8672595  0.80183892
##   3       0.010  1.00  0.9226673  0.88361825
##   3       0.100  0.25  0.9459481  0.91839940
##   3       0.100  0.50  0.9519643  0.92739431
##   3       0.100  1.00  0.9548726  0.93180227
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 0.1 and C = 1.

summary(dtm1)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Polynomial kernel function. 
##  Hyperparameters : degree =  2  scale =  0.1  offset =  1 
## 
## Number of Support Vectors : 48 
## 
## Objective Function Value : -2.9978 -1.3517 -28.2822 
## Training error : 0.02

plot(dtm1)

# using tune grid

set.seed(512)
SVMgrid <- expand.grid(C = c(0.5,0.6,0.7,0.8,0.9),degree=c(1,2,3), scale=c(0.1,0.2,0.3))
dtm3<-train(Species~., data = data1,
            method="svmPoly",
            trcontrol=control1,
            tuneGrid=SVMgrid,
            preProcess = c("scale"))
dtm3

## Support Vector Machines with Polynomial Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   C    degree  scale  Accuracy   Kappa    
##   0.5  1       0.1    0.9379075  0.9064204
##   0.5  1       0.2    0.9534679  0.9296606
##   0.5  1       0.3    0.9541353  0.9306748
##   0.5  2       0.1    0.9577418  0.9360208
##   0.5  2       0.2    0.9586165  0.9373771
##   0.5  2       0.3    0.9645233  0.9463505
##   0.5  3       0.1    0.9571117  0.9350699
##   0.5  3       0.2    0.9653535  0.9475874
##   0.5  3       0.3    0.9607010  0.9406222
##   0.6  1       0.1    0.9460512  0.9185776
##   0.6  1       0.2    0.9590451  0.9380687
##   0.6  1       0.3    0.9519713  0.9274845
##   0.6  2       0.1    0.9555269  0.9327030
##   0.6  2       0.2    0.9608402  0.9407640
##   0.6  2       0.3    0.9653035  0.9475480
##   0.6  3       0.1    0.9563995  0.9340219
##   0.6  3       0.2    0.9622559  0.9429065
##   0.6  3       0.3    0.9606169  0.9404695
##   0.7  1       0.1    0.9490226  0.9230217
##   0.7  1       0.2    0.9564146  0.9341296
##   0.7  1       0.3    0.9526973  0.9285361
##   0.7  2       0.1    0.9548734  0.9317350
##   0.7  2       0.2    0.9631051  0.9442228
##   0.7  2       0.3    0.9623245  0.9430513
##   0.7  3       0.1    0.9593307  0.9384856
##   0.7  3       0.2    0.9593189  0.9384934
##   0.7  3       0.3    0.9578854  0.9363387
##   0.8  1       0.1    0.9505728  0.9253374
##   0.8  1       0.2    0.9565080  0.9342746
##   0.8  1       0.3    0.9532393  0.9293213
##   0.8  2       0.1    0.9540195  0.9304183
##   0.8  2       0.2    0.9624038  0.9431791
##   0.8  2       0.3    0.9609420  0.9409593
##   0.8  3       0.1    0.9623751  0.9430964
##   0.8  3       0.2    0.9599641  0.9394671
##   0.8  3       0.3    0.9547693  0.9316805
##   0.9  1       0.1    0.9549665  0.9319497
##   0.9  1       0.2    0.9519713  0.9274845
##   0.9  1       0.3    0.9532393  0.9293295
##   0.9  2       0.1    0.9564509  0.9340994
##   0.9  2       0.2    0.9616895  0.9421074
##   0.9  2       0.3    0.9614959  0.9417833
##   0.9  3       0.1    0.9631051  0.9442228
##   0.9  3       0.2    0.9590896  0.9381498
##   0.9  3       0.3    0.9524235  0.9281326
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 3, scale = 0.2 and C
##  = 0.5.

dtm3$bestTune

##   degree scale   C
## 8      3   0.2 0.5

dtm3$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 0.5 
## 
## Polynomial kernel function. 
##  Hyperparameters : degree =  3  scale =  0.2  offset =  1 
## 
## Number of Support Vectors : 34 
## 
## Objective Function Value : -0.5869 -0.3544 -10.2014 
## Training error : 0.02

predict1<-predict(dtm3,data1)
confusionMatrix(predict1,data1$Species)

## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         49         2
##   virginica       0          1        48
## 
## Overall Statistics
##                                           
##                Accuracy : 0.98            
##                  95% CI : (0.9427, 0.9959)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.97            
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9800           0.9600
## Specificity                 1.0000            0.9800           0.9900
## Pos Pred Value              1.0000            0.9608           0.9796
## Neg Pred Value              1.0000            0.9899           0.9802
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3267           0.3200
## Detection Prevalence        0.3333            0.3400           0.3267
## Balanced Accuracy           1.0000            0.9800           0.9750

svmRadial

the parameter for radial is sigma and C

using complete data set

control1 <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)

dtm5<-train(Species~., data = data1,
            method="svmRadial",
            trcontrol=control1,
            preProcess = c("scale"))

dtm5

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.9405263  0.9102520
##   0.50  0.9505407  0.9251513
##   1.00  0.9543298  0.9308903
## 
## Tuning parameter 'sigma' was held constant at a value of 0.4859277
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.4859277 and C = 1.

print(dtm5)

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.9405263  0.9102520
##   0.50  0.9505407  0.9251513
##   1.00  0.9543298  0.9308903
## 
## Tuning parameter 'sigma' was held constant at a value of 0.4859277
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.4859277 and C = 1.

summary(dtm5)

## Length  Class   Mode 
##      1   ksvm     S4

dtm1$modelType

## [1] "Classification"

dtm1$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Polynomial kernel function. 
##  Hyperparameters : degree =  2  scale =  0.1  offset =  1 
## 
## Number of Support Vectors : 48 
## 
## Objective Function Value : -2.9978 -1.3517 -28.2822 
## Training error : 0.02

plot(dtm1)

# using tune grid

set.seed(512)
SVMgrid <- expand.grid(C = c(0.5,0.6,0.7,0.8),sigma=c(0.1,0.2,0.3,0.4,0.5))

dtm5<-train(Species~., data = data1,
            method="svmRadial",
            trcontrol=control1,
            tuneGrid=SVMgrid,
            preProcess = c("scale"))

dtm5

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 150 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## Pre-processing: scaled (4) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ... 
## Resampling results across tuning parameters:
## 
##   C    sigma  Accuracy   Kappa    
##   0.5  0.1    0.9438578  0.9153355
##   0.5  0.2    0.9500508  0.9245833
##   0.5  0.3    0.9535023  0.9298088
##   0.5  0.4    0.9481746  0.9217782
##   0.5  0.5    0.9481311  0.9216479
##   0.6  0.1    0.9497571  0.9242068
##   0.6  0.2    0.9536985  0.9300512
##   0.6  0.3    0.9519995  0.9275425
##   0.6  0.4    0.9511274  0.9261463
##   0.6  0.5    0.9496003  0.9238181
##   0.7  0.1    0.9504495  0.9251802
##   0.7  0.2    0.9558533  0.9332882
##   0.7  0.3    0.9556814  0.9330129
##   0.7  0.4    0.9540759  0.9305549
##   0.7  0.5    0.9531584  0.9292216
##   0.8  0.1    0.9520468  0.9274961
##   0.8  0.2    0.9556846  0.9330448
##   0.8  0.3    0.9556814  0.9330129
##   0.8  0.4    0.9555594  0.9328267
##   0.8  0.5    0.9530785  0.9291254
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.2 and C = 0.7.

dtm5$bestTune

##    sigma   C
## 12   0.2 0.7

dtm5$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 0.7 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.2 
## 
## Number of Support Vectors : 61 
## 
## Objective Function Value : -3.4561 -2.7751 -21.3074 
## Training error : 0.033333

predict1<-predict(dtm5,data1)
confusionMatrix(predict1,data1$Species)

## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         3
##   virginica       0          2        47
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9239, 0.9891)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9600           0.9400
## Specificity                 1.0000            0.9700           0.9800
## Pos Pred Value              1.0000            0.9412           0.9592
## Neg Pred Value              1.0000            0.9798           0.9703
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3200           0.3133
## Detection Prevalence        0.3333            0.3400           0.3267
## Balanced Accuracy           1.0000            0.9650           0.9600

we can divide the data into training and test

set.seed(100)
library(caret)
datasplit<-createDataPartition(data1$Species, times = 1, p=0.7, list = FALSE)

# create training and test data
datatrain<-data1[datasplit,]  # training data set 70%
datatest<-data1[-datasplit,]  # training data set 30%
dim(datatrain)

## [1] 105   5

dim(datatest)

## [1] 45  5

# use the same procedure

#----------------------------------------------------------------#

Learn R

Thursday, January 10, 2019

Basic of R Session 21.2- Support Vector Machine- caret package

Basic of R Session 21.2- Support Vector Machine- caret package

Dr Manohar Kapse

using caret

methods for svm in carets-> svmLinear, svmPoly, svmRadial

training data set control parameter

using tune grid

svmPoly kernel uses two parameters degree and C- cost

scaling parameter is also used- define, which is used for normalizing the data

svmRadial

the parameter for radial is sigma and C

using complete data set

we can divide the data into training and test

No comments:

Post a Comment

Report Abuse