Basic of R Session 21.2- Support Vector Machine- caret package
Dr Manohar Kapse
rm(list=ls())
#using the iris daat set
data1<-iris
str(data1)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
dim(data1)
## [1] 150 5
summary(data1)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
# plot the data points if error use dev.off()
library(ggplot2)
ggplot(data1, aes(data1$Sepal.Length,data1$Sepal.Width, color=data1$Species))+ geom_point()
ggplot(data1, aes(data1$Petal.Length,data1$Petal.Width, color=data1$Species))+ geom_point()
ggplot(data1, aes(data1$Sepal.Length,data1$Petal.Length, color=data1$Species))+ geom_point()
ggplot(data1, aes(data1$Petal.Length,data1$Sepal.Width, color=data1$Species))+ geom_point()
rm(list=ls())
#using the iris daat set
data1<-iris
str(data1)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
dim(data1)
## [1] 150 5
summary(data1)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
# plot the data points if error use dev.off()
library(ggplot2)
ggplot(data1, aes(data1$Sepal.Length,data1$Sepal.Width, color=data1$Species))+ geom_point()
ggplot(data1, aes(data1$Petal.Length,data1$Petal.Width, color=data1$Species))+ geom_point()
ggplot(data1, aes(data1$Sepal.Length,data1$Petal.Length, color=data1$Species))+ geom_point()
ggplot(data1, aes(data1$Petal.Length,data1$Sepal.Width, color=data1$Species))+ geom_point()
using caret
methods for svm in carets-> svmLinear, svmPoly, svmRadial
library(caret)
dtm1<-train(Species~., data = data1,
method="svmLinear",
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9675266 0.9506984
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9675266 0.9506984
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
# plot not possible since more then two variables for plotting we use only two variables X1, X2
library(caret)
dtm1<-train(Species~., data = data1,
method="svmLinear",
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9675266 0.9506984
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9675266 0.9506984
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
# plot not possible since more then two variables for plotting we use only two variables X1, X2
training data set control parameter
control1<-trainControl(method = "cv", number =10) # 10 fold cross validation
control1<-trainControl(method = "cv", number =5) # 5 fold cross validation
# using complete data set
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9638456 0.9453001
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9638456 0.9453001
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
control1<-trainControl(method = "repeatedcv", number =10, repeats = 3) #repeated cross validation
# using complete data set
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9602225 0.9397231
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9602225 0.9397231
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
If predicted values are required use savePredictions = TRUE
ctrl <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9639723 0.9453882
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9639723 0.9453882
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
control1<-trainControl(method = "cv", number =10) # 10 fold cross validation
control1<-trainControl(method = "cv", number =5) # 5 fold cross validation
# using complete data set
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9638456 0.9453001
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9638456 0.9453001
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
control1<-trainControl(method = "repeatedcv", number =10, repeats = 3) #repeated cross validation
# using complete data set
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9602225 0.9397231
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9602225 0.9397231
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
If predicted values are required use savePredictions = TRUE
ctrl <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9639723 0.9453882
##
## Tuning parameter 'C' was held constant at a value of 1
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9639723 0.9453882
##
## Tuning parameter 'C' was held constant at a value of 1
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
using tune grid
set.seed(512)
SVMgrid <- expand.grid(C = c(1,2,3,10))
# using complete data set
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
tuneGrid=SVMgrid,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 1 0.9554404 0.9326608
## 2 0.9533454 0.9295609
## 3 0.9547179 0.9316088
## 10 0.9548137 0.9316673
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 1 0.9554404 0.9326608
## 2 0.9533454 0.9295609
## 3 0.9547179 0.9316088
## 10 0.9548137 0.9316673
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
# tune length = 10, means we use 10 values of the cost function
set.seed(512)
SVMgrid <- expand.grid(C = c(1,2,3,10))
# using complete data set
dtm1<-train(Species~., data = data1,
method="svmLinear",
trcontrol=control1,
tuneGrid=SVMgrid,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 1 0.9554404 0.9326608
## 2 0.9533454 0.9295609
## 3 0.9547179 0.9316088
## 10 0.9548137 0.9316673
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.
print(dtm1)
## Support Vector Machines with Linear Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 1 0.9554404 0.9326608
## 2 0.9533454 0.9295609
## 3 0.9547179 0.9316088
## 10 0.9548137 0.9316673
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 29
##
## Objective Function Value : -0.9818 -0.322 -17.0644
## Training error : 0.033333
# tune length = 10, means we use 10 values of the cost function
svmPoly kernel uses two parameters degree and C- cost
scaling parameter is also used- define, which is used for normalizing the data
```
control1 <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)
dtm1<-train(Species~., data = data1,
method="svmPoly",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Polynomial Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## degree scale C Accuracy Kappa
## 1 0.001 0.25 0.3189350 0.03179580
## 1 0.001 0.50 0.3189350 0.03179580
## 1 0.001 1.00 0.3352224 0.05382717
## 1 0.010 0.25 0.6060400 0.42933891
## 1 0.010 0.50 0.6946539 0.55587515
## 1 0.010 1.00 0.8206702 0.73625618
## 1 0.100 0.25 0.9033585 0.85484912
## 1 0.100 0.50 0.9420969 0.91270983
## 1 0.100 1.00 0.9477396 0.92115842
## 2 0.001 0.25 0.3189350 0.03179580
## 2 0.001 0.50 0.3352224 0.05382717
## 2 0.001 1.00 0.5561180 0.35914942
## 2 0.010 0.25 0.7010685 0.56534163
## 2 0.010 0.50 0.8243618 0.74130498
## 2 0.010 1.00 0.8839544 0.82612438
## 2 0.100 0.25 0.9472588 0.92044753
## 2 0.100 0.50 0.9470714 0.92013650
## 2 0.100 1.00 0.9561435 0.93368563
## 3 0.001 0.25 0.3189350 0.03179580
## 3 0.001 0.50 0.4916925 0.27266504
## 3 0.001 1.00 0.6295076 0.46130918
## 3 0.010 0.25 0.7854196 0.68727525
## 3 0.010 0.50 0.8672595 0.80183892
## 3 0.010 1.00 0.9226673 0.88361825
## 3 0.100 0.25 0.9459481 0.91839940
## 3 0.100 0.50 0.9519643 0.92739431
## 3 0.100 1.00 0.9548726 0.93180227
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 0.1 and C = 1.
print(dtm1)
## Support Vector Machines with Polynomial Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## degree scale C Accuracy Kappa
## 1 0.001 0.25 0.3189350 0.03179580
## 1 0.001 0.50 0.3189350 0.03179580
## 1 0.001 1.00 0.3352224 0.05382717
## 1 0.010 0.25 0.6060400 0.42933891
## 1 0.010 0.50 0.6946539 0.55587515
## 1 0.010 1.00 0.8206702 0.73625618
## 1 0.100 0.25 0.9033585 0.85484912
## 1 0.100 0.50 0.9420969 0.91270983
## 1 0.100 1.00 0.9477396 0.92115842
## 2 0.001 0.25 0.3189350 0.03179580
## 2 0.001 0.50 0.3352224 0.05382717
## 2 0.001 1.00 0.5561180 0.35914942
## 2 0.010 0.25 0.7010685 0.56534163
## 2 0.010 0.50 0.8243618 0.74130498
## 2 0.010 1.00 0.8839544 0.82612438
## 2 0.100 0.25 0.9472588 0.92044753
## 2 0.100 0.50 0.9470714 0.92013650
## 2 0.100 1.00 0.9561435 0.93368563
## 3 0.001 0.25 0.3189350 0.03179580
## 3 0.001 0.50 0.4916925 0.27266504
## 3 0.001 1.00 0.6295076 0.46130918
## 3 0.010 0.25 0.7854196 0.68727525
## 3 0.010 0.50 0.8672595 0.80183892
## 3 0.010 1.00 0.9226673 0.88361825
## 3 0.100 0.25 0.9459481 0.91839940
## 3 0.100 0.50 0.9519643 0.92739431
## 3 0.100 1.00 0.9548726 0.93180227
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 0.1 and C = 1.
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = 0.1 offset = 1
##
## Number of Support Vectors : 48
##
## Objective Function Value : -2.9978 -1.3517 -28.2822
## Training error : 0.02
plot(dtm1)
# using tune grid
set.seed(512)
SVMgrid <- expand.grid(C = c(0.5,0.6,0.7,0.8,0.9),degree=c(1,2,3), scale=c(0.1,0.2,0.3))
dtm3<-train(Species~., data = data1,
method="svmPoly",
trcontrol=control1,
tuneGrid=SVMgrid,
preProcess = c("scale"))
dtm3
## Support Vector Machines with Polynomial Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C degree scale Accuracy Kappa
## 0.5 1 0.1 0.9379075 0.9064204
## 0.5 1 0.2 0.9534679 0.9296606
## 0.5 1 0.3 0.9541353 0.9306748
## 0.5 2 0.1 0.9577418 0.9360208
## 0.5 2 0.2 0.9586165 0.9373771
## 0.5 2 0.3 0.9645233 0.9463505
## 0.5 3 0.1 0.9571117 0.9350699
## 0.5 3 0.2 0.9653535 0.9475874
## 0.5 3 0.3 0.9607010 0.9406222
## 0.6 1 0.1 0.9460512 0.9185776
## 0.6 1 0.2 0.9590451 0.9380687
## 0.6 1 0.3 0.9519713 0.9274845
## 0.6 2 0.1 0.9555269 0.9327030
## 0.6 2 0.2 0.9608402 0.9407640
## 0.6 2 0.3 0.9653035 0.9475480
## 0.6 3 0.1 0.9563995 0.9340219
## 0.6 3 0.2 0.9622559 0.9429065
## 0.6 3 0.3 0.9606169 0.9404695
## 0.7 1 0.1 0.9490226 0.9230217
## 0.7 1 0.2 0.9564146 0.9341296
## 0.7 1 0.3 0.9526973 0.9285361
## 0.7 2 0.1 0.9548734 0.9317350
## 0.7 2 0.2 0.9631051 0.9442228
## 0.7 2 0.3 0.9623245 0.9430513
## 0.7 3 0.1 0.9593307 0.9384856
## 0.7 3 0.2 0.9593189 0.9384934
## 0.7 3 0.3 0.9578854 0.9363387
## 0.8 1 0.1 0.9505728 0.9253374
## 0.8 1 0.2 0.9565080 0.9342746
## 0.8 1 0.3 0.9532393 0.9293213
## 0.8 2 0.1 0.9540195 0.9304183
## 0.8 2 0.2 0.9624038 0.9431791
## 0.8 2 0.3 0.9609420 0.9409593
## 0.8 3 0.1 0.9623751 0.9430964
## 0.8 3 0.2 0.9599641 0.9394671
## 0.8 3 0.3 0.9547693 0.9316805
## 0.9 1 0.1 0.9549665 0.9319497
## 0.9 1 0.2 0.9519713 0.9274845
## 0.9 1 0.3 0.9532393 0.9293295
## 0.9 2 0.1 0.9564509 0.9340994
## 0.9 2 0.2 0.9616895 0.9421074
## 0.9 2 0.3 0.9614959 0.9417833
## 0.9 3 0.1 0.9631051 0.9442228
## 0.9 3 0.2 0.9590896 0.9381498
## 0.9 3 0.3 0.9524235 0.9281326
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 3, scale = 0.2 and C
## = 0.5.
dtm3$bestTune
## degree scale C
## 8 3 0.2 0.5
dtm3$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.5
##
## Polynomial kernel function.
## Hyperparameters : degree = 3 scale = 0.2 offset = 1
##
## Number of Support Vectors : 34
##
## Objective Function Value : -0.5869 -0.3544 -10.2014
## Training error : 0.02
predict1<-predict(dtm3,data1)
confusionMatrix(predict1,data1$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 2
## virginica 0 1 48
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.9427, 0.9959)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.97
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9800 0.9600
## Specificity 1.0000 0.9800 0.9900
## Pos Pred Value 1.0000 0.9608 0.9796
## Neg Pred Value 1.0000 0.9899 0.9802
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3267 0.3200
## Detection Prevalence 0.3333 0.3400 0.3267
## Balanced Accuracy 1.0000 0.9800 0.9750
```
control1 <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)
dtm1<-train(Species~., data = data1,
method="svmPoly",
trcontrol=control1,
preProcess = c("scale"))
dtm1
## Support Vector Machines with Polynomial Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## degree scale C Accuracy Kappa
## 1 0.001 0.25 0.3189350 0.03179580
## 1 0.001 0.50 0.3189350 0.03179580
## 1 0.001 1.00 0.3352224 0.05382717
## 1 0.010 0.25 0.6060400 0.42933891
## 1 0.010 0.50 0.6946539 0.55587515
## 1 0.010 1.00 0.8206702 0.73625618
## 1 0.100 0.25 0.9033585 0.85484912
## 1 0.100 0.50 0.9420969 0.91270983
## 1 0.100 1.00 0.9477396 0.92115842
## 2 0.001 0.25 0.3189350 0.03179580
## 2 0.001 0.50 0.3352224 0.05382717
## 2 0.001 1.00 0.5561180 0.35914942
## 2 0.010 0.25 0.7010685 0.56534163
## 2 0.010 0.50 0.8243618 0.74130498
## 2 0.010 1.00 0.8839544 0.82612438
## 2 0.100 0.25 0.9472588 0.92044753
## 2 0.100 0.50 0.9470714 0.92013650
## 2 0.100 1.00 0.9561435 0.93368563
## 3 0.001 0.25 0.3189350 0.03179580
## 3 0.001 0.50 0.4916925 0.27266504
## 3 0.001 1.00 0.6295076 0.46130918
## 3 0.010 0.25 0.7854196 0.68727525
## 3 0.010 0.50 0.8672595 0.80183892
## 3 0.010 1.00 0.9226673 0.88361825
## 3 0.100 0.25 0.9459481 0.91839940
## 3 0.100 0.50 0.9519643 0.92739431
## 3 0.100 1.00 0.9548726 0.93180227
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 0.1 and C = 1.
print(dtm1)
## Support Vector Machines with Polynomial Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## degree scale C Accuracy Kappa
## 1 0.001 0.25 0.3189350 0.03179580
## 1 0.001 0.50 0.3189350 0.03179580
## 1 0.001 1.00 0.3352224 0.05382717
## 1 0.010 0.25 0.6060400 0.42933891
## 1 0.010 0.50 0.6946539 0.55587515
## 1 0.010 1.00 0.8206702 0.73625618
## 1 0.100 0.25 0.9033585 0.85484912
## 1 0.100 0.50 0.9420969 0.91270983
## 1 0.100 1.00 0.9477396 0.92115842
## 2 0.001 0.25 0.3189350 0.03179580
## 2 0.001 0.50 0.3352224 0.05382717
## 2 0.001 1.00 0.5561180 0.35914942
## 2 0.010 0.25 0.7010685 0.56534163
## 2 0.010 0.50 0.8243618 0.74130498
## 2 0.010 1.00 0.8839544 0.82612438
## 2 0.100 0.25 0.9472588 0.92044753
## 2 0.100 0.50 0.9470714 0.92013650
## 2 0.100 1.00 0.9561435 0.93368563
## 3 0.001 0.25 0.3189350 0.03179580
## 3 0.001 0.50 0.4916925 0.27266504
## 3 0.001 1.00 0.6295076 0.46130918
## 3 0.010 0.25 0.7854196 0.68727525
## 3 0.010 0.50 0.8672595 0.80183892
## 3 0.010 1.00 0.9226673 0.88361825
## 3 0.100 0.25 0.9459481 0.91839940
## 3 0.100 0.50 0.9519643 0.92739431
## 3 0.100 1.00 0.9548726 0.93180227
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 0.1 and C = 1.
summary(dtm1)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = 0.1 offset = 1
##
## Number of Support Vectors : 48
##
## Objective Function Value : -2.9978 -1.3517 -28.2822
## Training error : 0.02
plot(dtm1)
# using tune grid
set.seed(512)
SVMgrid <- expand.grid(C = c(0.5,0.6,0.7,0.8,0.9),degree=c(1,2,3), scale=c(0.1,0.2,0.3))
dtm3<-train(Species~., data = data1,
method="svmPoly",
trcontrol=control1,
tuneGrid=SVMgrid,
preProcess = c("scale"))
dtm3
## Support Vector Machines with Polynomial Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C degree scale Accuracy Kappa
## 0.5 1 0.1 0.9379075 0.9064204
## 0.5 1 0.2 0.9534679 0.9296606
## 0.5 1 0.3 0.9541353 0.9306748
## 0.5 2 0.1 0.9577418 0.9360208
## 0.5 2 0.2 0.9586165 0.9373771
## 0.5 2 0.3 0.9645233 0.9463505
## 0.5 3 0.1 0.9571117 0.9350699
## 0.5 3 0.2 0.9653535 0.9475874
## 0.5 3 0.3 0.9607010 0.9406222
## 0.6 1 0.1 0.9460512 0.9185776
## 0.6 1 0.2 0.9590451 0.9380687
## 0.6 1 0.3 0.9519713 0.9274845
## 0.6 2 0.1 0.9555269 0.9327030
## 0.6 2 0.2 0.9608402 0.9407640
## 0.6 2 0.3 0.9653035 0.9475480
## 0.6 3 0.1 0.9563995 0.9340219
## 0.6 3 0.2 0.9622559 0.9429065
## 0.6 3 0.3 0.9606169 0.9404695
## 0.7 1 0.1 0.9490226 0.9230217
## 0.7 1 0.2 0.9564146 0.9341296
## 0.7 1 0.3 0.9526973 0.9285361
## 0.7 2 0.1 0.9548734 0.9317350
## 0.7 2 0.2 0.9631051 0.9442228
## 0.7 2 0.3 0.9623245 0.9430513
## 0.7 3 0.1 0.9593307 0.9384856
## 0.7 3 0.2 0.9593189 0.9384934
## 0.7 3 0.3 0.9578854 0.9363387
## 0.8 1 0.1 0.9505728 0.9253374
## 0.8 1 0.2 0.9565080 0.9342746
## 0.8 1 0.3 0.9532393 0.9293213
## 0.8 2 0.1 0.9540195 0.9304183
## 0.8 2 0.2 0.9624038 0.9431791
## 0.8 2 0.3 0.9609420 0.9409593
## 0.8 3 0.1 0.9623751 0.9430964
## 0.8 3 0.2 0.9599641 0.9394671
## 0.8 3 0.3 0.9547693 0.9316805
## 0.9 1 0.1 0.9549665 0.9319497
## 0.9 1 0.2 0.9519713 0.9274845
## 0.9 1 0.3 0.9532393 0.9293295
## 0.9 2 0.1 0.9564509 0.9340994
## 0.9 2 0.2 0.9616895 0.9421074
## 0.9 2 0.3 0.9614959 0.9417833
## 0.9 3 0.1 0.9631051 0.9442228
## 0.9 3 0.2 0.9590896 0.9381498
## 0.9 3 0.3 0.9524235 0.9281326
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 3, scale = 0.2 and C
## = 0.5.
dtm3$bestTune
## degree scale C
## 8 3 0.2 0.5
dtm3$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.5
##
## Polynomial kernel function.
## Hyperparameters : degree = 3 scale = 0.2 offset = 1
##
## Number of Support Vectors : 34
##
## Objective Function Value : -0.5869 -0.3544 -10.2014
## Training error : 0.02
predict1<-predict(dtm3,data1)
confusionMatrix(predict1,data1$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 2
## virginica 0 1 48
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.9427, 0.9959)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.97
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9800 0.9600
## Specificity 1.0000 0.9800 0.9900
## Pos Pred Value 1.0000 0.9608 0.9796
## Neg Pred Value 1.0000 0.9899 0.9802
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3267 0.3200
## Detection Prevalence 0.3333 0.3400 0.3267
## Balanced Accuracy 1.0000 0.9800 0.9750
svmRadial
the parameter for radial is sigma and C
using complete data set
control1 <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)
dtm5<-train(Species~., data = data1,
method="svmRadial",
trcontrol=control1,
preProcess = c("scale"))
dtm5
## Support Vector Machines with Radial Basis Function Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9405263 0.9102520
## 0.50 0.9505407 0.9251513
## 1.00 0.9543298 0.9308903
##
## Tuning parameter 'sigma' was held constant at a value of 0.4859277
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.4859277 and C = 1.
print(dtm5)
## Support Vector Machines with Radial Basis Function Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9405263 0.9102520
## 0.50 0.9505407 0.9251513
## 1.00 0.9543298 0.9308903
##
## Tuning parameter 'sigma' was held constant at a value of 0.4859277
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.4859277 and C = 1.
summary(dtm5)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = 0.1 offset = 1
##
## Number of Support Vectors : 48
##
## Objective Function Value : -2.9978 -1.3517 -28.2822
## Training error : 0.02
plot(dtm1)
# using tune grid
set.seed(512)
SVMgrid <- expand.grid(C = c(0.5,0.6,0.7,0.8),sigma=c(0.1,0.2,0.3,0.4,0.5))
dtm5<-train(Species~., data = data1,
method="svmRadial",
trcontrol=control1,
tuneGrid=SVMgrid,
preProcess = c("scale"))
dtm5
## Support Vector Machines with Radial Basis Function Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C sigma Accuracy Kappa
## 0.5 0.1 0.9438578 0.9153355
## 0.5 0.2 0.9500508 0.9245833
## 0.5 0.3 0.9535023 0.9298088
## 0.5 0.4 0.9481746 0.9217782
## 0.5 0.5 0.9481311 0.9216479
## 0.6 0.1 0.9497571 0.9242068
## 0.6 0.2 0.9536985 0.9300512
## 0.6 0.3 0.9519995 0.9275425
## 0.6 0.4 0.9511274 0.9261463
## 0.6 0.5 0.9496003 0.9238181
## 0.7 0.1 0.9504495 0.9251802
## 0.7 0.2 0.9558533 0.9332882
## 0.7 0.3 0.9556814 0.9330129
## 0.7 0.4 0.9540759 0.9305549
## 0.7 0.5 0.9531584 0.9292216
## 0.8 0.1 0.9520468 0.9274961
## 0.8 0.2 0.9556846 0.9330448
## 0.8 0.3 0.9556814 0.9330129
## 0.8 0.4 0.9555594 0.9328267
## 0.8 0.5 0.9530785 0.9291254
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.2 and C = 0.7.
dtm5$bestTune
## sigma C
## 12 0.2 0.7
dtm5$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.7
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.2
##
## Number of Support Vectors : 61
##
## Objective Function Value : -3.4561 -2.7751 -21.3074
## Training error : 0.033333
predict1<-predict(dtm5,data1)
confusionMatrix(predict1,data1$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 3
## virginica 0 2 47
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9239, 0.9891)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.95
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9600 0.9400
## Specificity 1.0000 0.9700 0.9800
## Pos Pred Value 1.0000 0.9412 0.9592
## Neg Pred Value 1.0000 0.9798 0.9703
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3200 0.3133
## Detection Prevalence 0.3333 0.3400 0.3267
## Balanced Accuracy 1.0000 0.9650 0.9600
control1 <- trainControl(method = "repeatedcv", repeats = 5, savePredictions = TRUE)
dtm5<-train(Species~., data = data1,
method="svmRadial",
trcontrol=control1,
preProcess = c("scale"))
dtm5
## Support Vector Machines with Radial Basis Function Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9405263 0.9102520
## 0.50 0.9505407 0.9251513
## 1.00 0.9543298 0.9308903
##
## Tuning parameter 'sigma' was held constant at a value of 0.4859277
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.4859277 and C = 1.
print(dtm5)
## Support Vector Machines with Radial Basis Function Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9405263 0.9102520
## 0.50 0.9505407 0.9251513
## 1.00 0.9543298 0.9308903
##
## Tuning parameter 'sigma' was held constant at a value of 0.4859277
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.4859277 and C = 1.
summary(dtm5)
## Length Class Mode
## 1 ksvm S4
dtm1$modelType
## [1] "Classification"
dtm1$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = 0.1 offset = 1
##
## Number of Support Vectors : 48
##
## Objective Function Value : -2.9978 -1.3517 -28.2822
## Training error : 0.02
plot(dtm1)
# using tune grid
set.seed(512)
SVMgrid <- expand.grid(C = c(0.5,0.6,0.7,0.8),sigma=c(0.1,0.2,0.3,0.4,0.5))
dtm5<-train(Species~., data = data1,
method="svmRadial",
trcontrol=control1,
tuneGrid=SVMgrid,
preProcess = c("scale"))
dtm5
## Support Vector Machines with Radial Basis Function Kernel
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## Pre-processing: scaled (4)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## C sigma Accuracy Kappa
## 0.5 0.1 0.9438578 0.9153355
## 0.5 0.2 0.9500508 0.9245833
## 0.5 0.3 0.9535023 0.9298088
## 0.5 0.4 0.9481746 0.9217782
## 0.5 0.5 0.9481311 0.9216479
## 0.6 0.1 0.9497571 0.9242068
## 0.6 0.2 0.9536985 0.9300512
## 0.6 0.3 0.9519995 0.9275425
## 0.6 0.4 0.9511274 0.9261463
## 0.6 0.5 0.9496003 0.9238181
## 0.7 0.1 0.9504495 0.9251802
## 0.7 0.2 0.9558533 0.9332882
## 0.7 0.3 0.9556814 0.9330129
## 0.7 0.4 0.9540759 0.9305549
## 0.7 0.5 0.9531584 0.9292216
## 0.8 0.1 0.9520468 0.9274961
## 0.8 0.2 0.9556846 0.9330448
## 0.8 0.3 0.9556814 0.9330129
## 0.8 0.4 0.9555594 0.9328267
## 0.8 0.5 0.9530785 0.9291254
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.2 and C = 0.7.
dtm5$bestTune
## sigma C
## 12 0.2 0.7
dtm5$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.7
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.2
##
## Number of Support Vectors : 61
##
## Objective Function Value : -3.4561 -2.7751 -21.3074
## Training error : 0.033333
predict1<-predict(dtm5,data1)
confusionMatrix(predict1,data1$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 3
## virginica 0 2 47
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9239, 0.9891)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.95
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9600 0.9400
## Specificity 1.0000 0.9700 0.9800
## Pos Pred Value 1.0000 0.9412 0.9592
## Neg Pred Value 1.0000 0.9798 0.9703
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3200 0.3133
## Detection Prevalence 0.3333 0.3400 0.3267
## Balanced Accuracy 1.0000 0.9650 0.9600
we can divide the data into training and test
set.seed(100)
library(caret)
datasplit<-createDataPartition(data1$Species, times = 1, p=0.7, list = FALSE)
# create training and test data
datatrain<-data1[datasplit,] # training data set 70%
datatest<-data1[-datasplit,] # training data set 30%
dim(datatrain)
## [1] 105 5
dim(datatest)
## [1] 45 5
# use the same procedure
#----------------------------------------------------------------#
set.seed(100)
library(caret)
datasplit<-createDataPartition(data1$Species, times = 1, p=0.7, list = FALSE)
# create training and test data
datatrain<-data1[datasplit,] # training data set 70%
datatest<-data1[-datasplit,] # training data set 30%
dim(datatrain)
## [1] 105 5
dim(datatest)
## [1] 45 5
# use the same procedure
#----------------------------------------------------------------#