rm(list=ls())
data1<-iris
dim(data1)
## [1] 150 5
str(data1)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(data1)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
for linear svm we will remove one of the categories of species, we want to consider only two categories
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
str(data1)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
data2<-filter(data1,Species!= "setosa")
str(data2)
## 'data.frame': 100 obs. of 5 variables:
## $ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 ...
## $ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 ...
## $ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 3.3 4.6 3.9 ...
## $ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 1.6 1 1.3 1.4 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
data2<-data.frame(data2)
str(data2$Species)
## Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
data2$Species<-droplevels(data2$Species)
str(data2$Species)
## Factor w/ 2 levels "versicolor","virginica": 1 1 1 1 1 1 1 1 1 1 ...
plot the data points, if error use dev.off()
library(ggplot2)
ggplot(data2, aes(data2$Sepal.Length,data2$Sepal.Width, color=data2$Species))+ geom_point()
ggplot(data2, aes(data2$Petal.Length,data2$Petal.Width, color=data2$Species))+ geom_point()
ggplot(data2, aes(data2$Sepal.Length,data2$Petal.Length, color=data2$Species))+ geom_point()
ggplot(data2, aes(data2$Petal.Length,data2$Sepal.Width, color=data2$Species))+ geom_point()
using e1701 package
library(e1071)
dtm1<-svm(Species~Sepal.Length+Sepal.Width, data = data2,kernel = "linear",scale = TRUE)
summary(dtm1)
##
## Call:
## svm(formula = Species ~ Sepal.Length + Sepal.Width, data = data2,
## kernel = "linear", scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 0.5
##
## Number of Support Vectors: 67
##
## ( 34 33 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm1, data = data2,Sepal.Length~Sepal.Width)
predict1<-predict(dtm1,data2)
library(caret)
## Loading required package: lattice
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 38 15
## virginica 12 35
##
## Accuracy : 0.73
## 95% CI : (0.632, 0.8139)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 2.346e-06
##
## Kappa : 0.46
## Mcnemar's Test P-Value : 0.7003
##
## Sensitivity : 0.7600
## Specificity : 0.7000
## Pos Pred Value : 0.7170
## Neg Pred Value : 0.7447
## Prevalence : 0.5000
## Detection Rate : 0.3800
## Detection Prevalence : 0.5300
## Balanced Accuracy : 0.7300
##
## 'Positive' Class : versicolor
##
change the value of C
dtm2<-svm(Species~Sepal.Length+Sepal.Width, data = data2,kernel = "linear",Cost =100,scale = TRUE)
summary(dtm2)
##
## Call:
## svm(formula = Species ~ Sepal.Length + Sepal.Width, data = data2,
## kernel = "linear", Cost = 100, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 0.5
##
## Number of Support Vectors: 67
##
## ( 34 33 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm2, data = data2,Sepal.Length~Sepal.Width)
predict2<-predict(dtm2,data2)
confusionMatrix(predict2,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 38 15
## virginica 12 35
##
## Accuracy : 0.73
## 95% CI : (0.632, 0.8139)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 2.346e-06
##
## Kappa : 0.46
## Mcnemar's Test P-Value : 0.7003
##
## Sensitivity : 0.7600
## Specificity : 0.7000
## Pos Pred Value : 0.7170
## Neg Pred Value : 0.7447
## Prevalence : 0.5000
## Detection Rate : 0.3800
## Detection Prevalence : 0.5300
## Balanced Accuracy : 0.7300
##
## 'Positive' Class : versicolor
##
———-polynomial Kernel———
dtm3<-svm(Species~Sepal.Length+Sepal.Width, data = data2,kernel = "polynomial",scale = TRUE)
summary(dtm3)
##
## Call:
## svm(formula = Species ~ Sepal.Length + Sepal.Width, data = data2,
## kernel = "polynomial", scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## gamma: 0.5
## coef.0: 0
##
## Number of Support Vectors: 77
##
## ( 38 39 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm3, data = data2,Sepal.Length~Sepal.Width)
predict1<-predict(dtm3,data2)
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 39 16
## virginica 11 34
##
## Accuracy : 0.73
## 95% CI : (0.632, 0.8139)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 2.346e-06
##
## Kappa : 0.46
## Mcnemar's Test P-Value : 0.4414
##
## Sensitivity : 0.7800
## Specificity : 0.6800
## Pos Pred Value : 0.7091
## Neg Pred Value : 0.7556
## Prevalence : 0.5000
## Detection Rate : 0.3900
## Detection Prevalence : 0.5500
## Balanced Accuracy : 0.7300
##
## 'Positive' Class : versicolor
##
———–Radial Kernel——————–
dtm4<-svm(Species~Sepal.Length+Sepal.Width, data = data2,kernel = "radial",scale = FALSE)
summary(dtm4)
##
## Call:
## svm(formula = Species ~ Sepal.Length + Sepal.Width, data = data2,
## kernel = "radial", scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.5
##
## Number of Support Vectors: 70
##
## ( 35 35 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm4, data = data2,Sepal.Length~Sepal.Width)
predict1<-predict(dtm4,data2)
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 38 14
## virginica 12 36
##
## Accuracy : 0.74
## 95% CI : (0.6427, 0.8226)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 8.337e-07
##
## Kappa : 0.48
## Mcnemar's Test P-Value : 0.8445
##
## Sensitivity : 0.7600
## Specificity : 0.7200
## Pos Pred Value : 0.7308
## Neg Pred Value : 0.7500
## Prevalence : 0.5000
## Detection Rate : 0.3800
## Detection Prevalence : 0.5200
## Balanced Accuracy : 0.7400
##
## 'Positive' Class : versicolor
##
——–Sigmoid Kernel—————
dtm5<-svm(Species~Sepal.Length+Sepal.Width, data = data2,kernel = "sigmoid",scale = FALSE)
summary(dtm5)
##
## Call:
## svm(formula = Species ~ Sepal.Length + Sepal.Width, data = data2,
## kernel = "sigmoid", scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: sigmoid
## cost: 1
## gamma: 0.5
## coef.0: 0
##
## Number of Support Vectors: 100
##
## ( 50 50 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm5, data = data2,Sepal.Length~Sepal.Width)
predict1<-predict(dtm5,data2)
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 0 0
## virginica 50 50
##
## Accuracy : 0.5
## 95% CI : (0.3983, 0.6017)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 0.5398
##
## Kappa : 0
## Mcnemar's Test P-Value : 4.219e-12
##
## Sensitivity : 0.0
## Specificity : 1.0
## Pos Pred Value : NaN
## Neg Pred Value : 0.5
## Prevalence : 0.5
## Detection Rate : 0.0
## Detection Prevalence : 0.0
## Balanced Accuracy : 0.5
##
## 'Positive' Class : versicolor
##
use tunning for finding the best tune parameter
tune1<-tune.svm(Species~Sepal.Length+Sepal.Width, data = data2,
gamma = c(0.1,0.2, 0.3, 0.5, 0.7), cost = c(1,10,100))
print(tune1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost
## 0.7 1
##
## - best performance: 0.29
plot(tune1)
dtm1<-svm(Species~Sepal.Length+Sepal.Width, data = data2,kernel = "linear",scale = TRUE, cost=1, gamma= 0.3)
summary(dtm1)
##
## Call:
## svm(formula = Species ~ Sepal.Length + Sepal.Width, data = data2,
## kernel = "linear", cost = 1, gamma = 0.3, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 0.3
##
## Number of Support Vectors: 67
##
## ( 34 33 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm1, data = data2,Sepal.Length~Sepal.Width)
predict1<-predict(dtm1,data2)
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 38 15
## virginica 12 35
##
## Accuracy : 0.73
## 95% CI : (0.632, 0.8139)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 2.346e-06
##
## Kappa : 0.46
## Mcnemar's Test P-Value : 0.7003
##
## Sensitivity : 0.7600
## Specificity : 0.7000
## Pos Pred Value : 0.7170
## Neg Pred Value : 0.7447
## Prevalence : 0.5000
## Detection Rate : 0.3800
## Detection Prevalence : 0.5300
## Balanced Accuracy : 0.7300
##
## 'Positive' Class : versicolor
##
—————————————————
now using three categories and all the four variables
—————————————–
str(data1)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
data1<-iris
dtm1<-svm(Species~., data = data2,kernel = "linear",scale = TRUE)
summary(dtm1)
##
## Call:
## svm(formula = Species ~ ., data = data2, kernel = "linear", scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 0.25
##
## Number of Support Vectors: 16
##
## ( 8 8 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
plot(dtm1, data = data2,Sepal.Length~Sepal.Width)
predict1<-predict(dtm1,data2)
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 46 0
## virginica 4 50
##
## Accuracy : 0.96
## 95% CI : (0.9007, 0.989)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.92
## Mcnemar's Test P-Value : 0.1336
##
## Sensitivity : 0.9200
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9259
## Prevalence : 0.5000
## Detection Rate : 0.4600
## Detection Prevalence : 0.4600
## Balanced Accuracy : 0.9600
##
## 'Positive' Class : versicolor
##
use tunning for finding the best tune parameter
tune1<-tune.svm(Species~., data = data2,
gamma = c(0.1,0.2, 0.3, 0.5, 0.7), cost = c(1,10,100))
print(tune1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost
## 0.1 1
##
## - best performance: 0.04
plot(tune1)
tune1$best.parameters
## gamma cost
## 1 0.1 1
dtm1<-svm(Species~., data = data2,kernel = "linear",scale = TRUE, cost=1, gamma=0.1)
summary(dtm1)
##
## Call:
## svm(formula = Species ~ ., data = data2, kernel = "linear", cost = 1,
## gamma = 0.1, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 0.1
##
## Number of Support Vectors: 16
##
## ( 8 8 )
##
##
## Number of Classes: 2
##
## Levels:
## versicolor virginica
predict1<-predict(dtm1,data2)
confusionMatrix(predict1,data2$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction versicolor virginica
## versicolor 46 0
## virginica 4 50
##
## Accuracy : 0.96
## 95% CI : (0.9007, 0.989)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.92
## Mcnemar's Test P-Value : 0.1336
##
## Sensitivity : 0.9200
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9259
## Prevalence : 0.5000
## Detection Rate : 0.4600
## Detection Prevalence : 0.4600
## Balanced Accuracy : 0.9600
##
## 'Positive' Class : versicolor
##
No comments:
Post a Comment