rm(list=ls())
data1<-read.csv("D:/1 Research/1 preparedness for future/3 decision science specialization/sheet final.csv/final data1.7.csv")
dim(data1)
## [1] 473 25
summary(data1)
## ï..F1 pre.score Age_in_years Percentage_in_10_Class
## Min. : 4.0 Min. : 30.00 Min. :19.00 Min. :58.90
## 1st Qu.:194.0 1st Qu.: 60.00 1st Qu.:21.00 1st Qu.:79.00
## Median :389.0 Median : 70.00 Median :22.00 Median :85.00
## Mean :394.5 Mean : 68.18 Mean :22.56 Mean :84.02
## 3rd Qu.:592.0 3rd Qu.: 76.67 3rd Qu.:24.00 3rd Qu.:90.00
## Max. :783.0 Max. :100.00 Max. :26.00 Max. :97.20
##
## Percentage_in_12_Class Percentage_in_Under_Graduate Number_of_siblings
## Min. :60.29 Min. :58.00 Min. :0.000
## 1st Qu.:75.20 1st Qu.:67.88 1st Qu.:1.000
## Median :83.40 Median :74.00 Median :1.000
## Mean :81.56 Mean :73.41 Mean :1.034
## 3rd Qu.:89.33 3rd Qu.:78.00 3rd Qu.:1.000
## Max. :97.17 Max. :94.00 Max. :4.000
##
## Over.all.percentage.in.MBA post.score Gender
## Min. :57.00 Min. : 7.00 Female:205
## 1st Qu.:65.60 1st Qu.: 70.00 Male :268
## Median :68.00 Median : 76.67
## Mean :67.87 Mean : 76.22
## 3rd Qu.:70.48 3rd Qu.: 85.00
## Max. :77.41 Max. :100.00
##
## STATE Previous_Degree Fathers_qualification
## Central Zone: 56 Arts : 3 Under Graduate :257
## East Zone : 52 Commerce :148 Post Graduation :110
## North East : 5 Engineering:235 HSC : 35
## North Zone : 86 Management : 44 no formal Education : 31
## South Zone :258 Science : 43 DIPLOMA : 17
## West Zone : 16 PhD or higher qualificati: 8
## (Other) : 15
## Mothers_qualification Fathers_occupation
## DIPLOMA : 2 Farming : 23
## HSC : 48 Not Alive : 3
## no formal Education : 33 Professional Job:192
## PhD or higher qualificati: 9 RETIRED : 18
## Post Graduation :109 Self employed :181
## SSC : 35 Technical Job : 56
## Under Graduate :237
## Mothers_occupation Marital_status Place_you_belong_to
## House wife :307 Married: 11 Rural : 44
## Professional Job:130 Single :462 Semi Urban:116
## Self employed : 25 Urban :313
## Technical Job : 11
##
##
##
## Total_Family_Income_per_annum Funding_for_the_MBA_Program
## 0-5 Lakh :181 Family :200
## 10-15 lakh : 55 Loan :103
## 15 lakh and above: 51 Loan and family: 85
## 5-10 Lakh :186 Loan and self : 37
## Self : 41
## Self and Family: 7
##
## Work_Experience Career_options_after_MBA
## 1-2 year : 80 Higher Studies : 5
## 2-3 year : 60 Job :449
## 3-4 year : 9 Not yet decided : 4
## less than 1 year: 50 Self-employed/ entreprene: 15
## no experience :274
##
##
## Alternate_Career_Option_after_MBA_if_not_selected_through_Instit
## Family Business : 20
## Higher Studies : 47
## Job :244
## Not yet decided : 42
## Self-employed/ entreprene:120
##
##
## perceived.Job.Skill Specialization
## desired skills :167 Finance :147
## prefered skills:295 HR : 47
## required skills: 11 LOS :106
## Marketing:173
##
##
##
data2<-na.omit(data1)
data2$Specialization<-droplevels(data2$Specialization)
dim(data2)
## [1] 473 25
split the data into two part using caret package
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(100)
datasplit<-createDataPartition(data2$Specialization, times = 1, p=0.7, list = FALSE)
datatrain<-data2[datasplit,]
datatest<-data2[-datasplit,]
dim(datatrain)
## [1] 333 25
dim(datatest)
## [1] 140 25
summary(datatrain$Specialization)
## Finance HR LOS Marketing
## 103 33 75 122
summary(datatest)
## ï..F1 pre.score Age_in_years Percentage_in_10_Class
## Min. : 5.0 Min. : 33.33 Min. :19.00 Min. :61.00
## 1st Qu.:176.5 1st Qu.: 60.00 1st Qu.:22.00 1st Qu.:79.00
## Median :444.5 Median : 71.67 Median :22.00 Median :85.25
## Mean :407.0 Mean : 68.36 Mean :22.69 Mean :83.77
## 3rd Qu.:600.0 3rd Qu.: 76.67 3rd Qu.:24.00 3rd Qu.:89.40
## Max. :781.0 Max. :100.00 Max. :26.00 Max. :97.20
##
## Percentage_in_12_Class Percentage_in_Under_Graduate Number_of_siblings
## Min. :61.00 Min. :58.00 Min. :0.000
## 1st Qu.:74.80 1st Qu.:67.00 1st Qu.:1.000
## Median :80.70 Median :73.08 Median :1.000
## Mean :80.28 Mean :73.05 Mean :1.007
## 3rd Qu.:87.50 3rd Qu.:77.00 3rd Qu.:1.000
## Max. :97.17 Max. :94.00 Max. :4.000
##
## Over.all.percentage.in.MBA post.score Gender STATE
## Min. :59.16 Min. : 8.00 Female:58 Central Zone:26
## 1st Qu.:65.08 1st Qu.: 69.58 Male :82 East Zone :16
## Median :67.27 Median : 76.67 North East : 1
## Mean :67.52 Mean : 75.78 North Zone :19
## 3rd Qu.:69.45 3rd Qu.: 85.00 South Zone :74
## Max. :77.41 Max. :100.00 West Zone : 4
##
## Previous_Degree Fathers_qualification
## Arts : 1 Under Graduate :81
## Commerce :44 Post Graduation :31
## Engineering:74 HSC : 9
## Management : 6 no formal Education: 9
## Science :15 DIPLOMA : 5
## SSC : 2
## (Other) : 3
## Mothers_qualification Fathers_occupation
## DIPLOMA : 0 Farming :11
## HSC :20 Not Alive : 1
## no formal Education : 9 Professional Job:55
## PhD or higher qualificati: 1 RETIRED : 2
## Post Graduation :32 Self employed :50
## SSC :11 Technical Job :21
## Under Graduate :67
## Mothers_occupation Marital_status Place_you_belong_to
## House wife :96 Married: 4 Rural :13
## Professional Job:34 Single :136 Semi Urban:31
## Self employed : 6 Urban :96
## Technical Job : 4
##
##
##
## Total_Family_Income_per_annum Funding_for_the_MBA_Program
## 0-5 Lakh :52 Family :58
## 10-15 lakh :19 Loan :34
## 15 lakh and above:17 Loan and family:25
## 5-10 Lakh :52 Loan and self :12
## Self :10
## Self and Family: 1
##
## Work_Experience Career_options_after_MBA
## 1-2 year :23 Higher Studies : 2
## 2-3 year :15 Job :135
## 3-4 year : 3 Not yet decided : 1
## less than 1 year:16 Self-employed/ entreprene: 2
## no experience :83
##
##
## Alternate_Career_Option_after_MBA_if_not_selected_through_Instit
## Family Business : 5
## Higher Studies :18
## Job :63
## Not yet decided :15
## Self-employed/ entreprene:39
##
##
## perceived.Job.Skill Specialization
## desired skills :48 Finance :44
## prefered skills:89 HR :14
## required skills: 3 LOS :31
## Marketing:51
##
##
##
summary(datatrain)
## ï..F1 pre.score Age_in_years Percentage_in_10_Class
## Min. : 4.0 Min. : 30.00 Min. :19.0 Min. :58.90
## 1st Qu.:201.0 1st Qu.: 60.00 1st Qu.:21.0 1st Qu.:79.00
## Median :380.0 Median : 70.00 Median :22.0 Median :85.00
## Mean :389.3 Mean : 68.11 Mean :22.5 Mean :84.12
## 3rd Qu.:586.0 3rd Qu.: 76.67 3rd Qu.:24.0 3rd Qu.:91.00
## Max. :783.0 Max. :100.00 Max. :26.0 Max. :97.20
##
## Percentage_in_12_Class Percentage_in_Under_Graduate Number_of_siblings
## Min. :60.29 Min. :58.00 Min. :0.000
## 1st Qu.:76.00 1st Qu.:68.00 1st Qu.:1.000
## Median :84.00 Median :74.00 Median :1.000
## Mean :82.09 Mean :73.55 Mean :1.045
## 3rd Qu.:90.20 3rd Qu.:78.13 3rd Qu.:1.000
## Max. :97.17 Max. :94.00 Max. :4.000
##
## Over.all.percentage.in.MBA post.score Gender
## Min. :57.00 Min. : 7.00 Female:147
## 1st Qu.:66.00 1st Qu.: 71.67 Male :186
## Median :68.10 Median : 76.67
## Mean :68.02 Mean : 76.40
## 3rd Qu.:70.64 3rd Qu.: 85.00
## Max. :77.41 Max. :100.00
##
## STATE Previous_Degree Fathers_qualification
## Central Zone: 30 Arts : 2 Under Graduate :176
## East Zone : 36 Commerce :104 Post Graduation : 79
## North East : 4 Engineering:161 HSC : 26
## North Zone : 67 Management : 38 no formal Education : 22
## South Zone :184 Science : 28 DIPLOMA : 12
## West Zone : 12 PhD or higher qualificati: 7
## (Other) : 11
## Mothers_qualification Fathers_occupation
## DIPLOMA : 2 Farming : 12
## HSC : 28 Not Alive : 2
## no formal Education : 24 Professional Job:137
## PhD or higher qualificati: 8 RETIRED : 16
## Post Graduation : 77 Self employed :131
## SSC : 24 Technical Job : 35
## Under Graduate :170
## Mothers_occupation Marital_status Place_you_belong_to
## House wife :211 Married: 7 Rural : 31
## Professional Job: 96 Single :326 Semi Urban: 85
## Self employed : 19 Urban :217
## Technical Job : 7
##
##
##
## Total_Family_Income_per_annum Funding_for_the_MBA_Program
## 0-5 Lakh :129 Family :142
## 10-15 lakh : 36 Loan : 69
## 15 lakh and above: 34 Loan and family: 60
## 5-10 Lakh :134 Loan and self : 25
## Self : 31
## Self and Family: 6
##
## Work_Experience Career_options_after_MBA
## 1-2 year : 57 Higher Studies : 3
## 2-3 year : 45 Job :314
## 3-4 year : 6 Not yet decided : 3
## less than 1 year: 34 Self-employed/ entreprene: 13
## no experience :191
##
##
## Alternate_Career_Option_after_MBA_if_not_selected_through_Instit
## Family Business : 15
## Higher Studies : 29
## Job :181
## Not yet decided : 27
## Self-employed/ entreprene: 81
##
##
## perceived.Job.Skill Specialization
## desired skills :119 Finance :103
## prefered skills:206 HR : 33
## required skills: 8 LOS : 75
## Marketing:122
##
##
##
training data set control parameter
control1<-trainControl(method = "cv", number =10, savePredictions = TRUE)
control1<-trainControl(method = "cv", number =10)
control1<-trainControl(method = "cv", number =5)
dtm4<-train(Specialization ~ Gender+Previous_Degree+Marital_status+Place_you_belong_to+Percentage_in_10_Class+Percentage_in_12_Class+Percentage_in_Under_Graduate+Over.all.percentage.in.MBA+Total_Family_Income_per_annum+Work_Experience,
data = datatrain, trcontrol=control1, method="rf", savePredictions = TRUE)
dtm4
- mtry: Number of variables randomly sampled as candidates at each split.
- ntree: Number of trees to grow.
## Random Forest
##
## 333 samples
## 10 predictor
## 4 classes: 'Finance', 'HR', 'LOS', 'Marketing'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 333, 333, 333, 333, 333, 333, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8420018 0.7733131
## 10 0.8762908 0.8244549
## 19 0.8716184 0.8182187
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 10.
explore the model
dtm4
## Random Forest
##
## 333 samples
## 10 predictor
## 4 classes: 'Finance', 'HR', 'LOS', 'Marketing'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 333, 333, 333, 333, 333, 333, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8420018 0.7733131
## 10 0.8762908 0.8244549
## 19 0.8716184 0.8182187
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 10.
print(dtm4)
## Random Forest
##
## 333 samples
## 10 predictor
## 4 classes: 'Finance', 'HR', 'LOS', 'Marketing'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 333, 333, 333, 333, 333, 333, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8420018 0.7733131
## 10 0.8762908 0.8244549
## 19 0.8716184 0.8182187
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 10.
summary(dtm4)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 333 factor numeric
## err.rate 2500 -none- numeric
## confusion 20 -none- numeric
## votes 1332 matrix numeric
## oob.times 333 -none- numeric
## classes 4 -none- character
## importance 19 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 333 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 19 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 4 -none- character
## param 2 -none- list
plot(dtm4$finalModel)
dtm4$coefnames
## [1] "GenderMale"
## [2] "Previous_DegreeCommerce"
## [3] "Previous_DegreeEngineering"
## [4] "Previous_DegreeManagement"
## [5] "Previous_DegreeScience"
## [6] "Marital_statusSingle"
## [7] "Place_you_belong_toSemi Urban"
## [8] "Place_you_belong_toUrban"
## [9] "Percentage_in_10_Class"
## [10] "Percentage_in_12_Class"
## [11] "Percentage_in_Under_Graduate"
## [12] "Over.all.percentage.in.MBA"
## [13] "Total_Family_Income_per_annum10-15 lakh"
## [14] "Total_Family_Income_per_annum15 lakh and above"
## [15] "Total_Family_Income_per_annum5-10 Lakh"
## [16] "Work_Experience2-3 year"
## [17] "Work_Experience3-4 year"
## [18] "Work_Experienceless than 1 year"
## [19] "Work_Experienceno experience"
dtm4$results
## mtry Accuracy Kappa AccuracySD KappaSD
## 1 2 0.8420018 0.7733131 0.05062263 0.07319933
## 2 10 0.8762908 0.8244549 0.03340855 0.04748827
## 3 19 0.8716184 0.8182187 0.03675329 0.05249690
dtm4$modelType
## [1] "Classification"
dtm4$bestTune
## mtry
## 2 10
varImp(dtm4)
## rf variable importance
##
## Overall
## Percentage_in_Under_Graduate 100.000
## Over.all.percentage.in.MBA 88.979
## Percentage_in_12_Class 75.238
## Percentage_in_10_Class 69.144
## Previous_DegreeEngineering 36.772
## GenderMale 16.762
## Work_Experienceno experience 16.688
## Previous_DegreeCommerce 16.285
## Total_Family_Income_per_annum5-10 Lakh 15.200
## Place_you_belong_toUrban 9.885
## Work_Experienceless than 1 year 8.519
## Previous_DegreeScience 7.975
## Place_you_belong_toSemi Urban 7.962
## Total_Family_Income_per_annum15 lakh and above 7.149
## Work_Experience2-3 year 6.817
## Previous_DegreeManagement 6.343
## Total_Family_Income_per_annum10-15 lakh 4.846
## Marital_statusSingle 2.623
## Work_Experience3-4 year 0.000
dtm4_f<-dtm4$finalModel
predict_dtm4<-predict(dtm4, datatest)
predict_dtm4
## [1] Marketing LOS Finance Marketing Marketing Marketing Finance
## [8] Finance Finance LOS Marketing Finance Finance Finance
## [15] Finance Finance Marketing Finance LOS Marketing Finance
## [22] Marketing Marketing Marketing Finance Finance Marketing Finance
## [29] Finance LOS Finance Marketing Marketing Marketing HR
## [36] HR Finance Finance Marketing HR LOS Marketing
## [43] HR Marketing LOS LOS LOS Marketing Finance
## [50] HR Finance Marketing HR LOS Marketing Finance
## [57] Marketing Marketing HR HR LOS Finance Marketing
## [64] Finance Marketing LOS LOS Finance LOS Marketing
## [71] Marketing LOS Finance Finance Marketing LOS Finance
## [78] HR Marketing LOS LOS Finance Finance Marketing
## [85] LOS Marketing LOS Finance LOS Finance HR
## [92] Marketing LOS Marketing HR LOS Finance LOS
## [99] Marketing Marketing Marketing LOS HR Marketing Marketing
## [106] HR Marketing Finance Finance Marketing Marketing Marketing
## [113] Marketing LOS HR Marketing Finance Finance LOS
## [120] Marketing Finance Finance Finance Marketing Marketing LOS
## [127] LOS Marketing Finance LOS Marketing LOS Marketing
## [134] LOS LOS Finance Marketing LOS Finance Finance
## Levels: Finance HR LOS Marketing
confusionMatrix(predict_dtm4, datatest$Specialization)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Finance HR LOS Marketing
## Finance 40 0 0 3
## HR 0 14 0 0
## LOS 0 0 31 2
## Marketing 4 0 0 46
##
## Overall Statistics
##
## Accuracy : 0.9357
## 95% CI : (0.8815, 0.9702)
## No Information Rate : 0.3643
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9096
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Finance Class: HR Class: LOS Class: Marketing
## Sensitivity 0.9091 1.0 1.0000 0.9020
## Specificity 0.9688 1.0 0.9817 0.9551
## Pos Pred Value 0.9302 1.0 0.9394 0.9200
## Neg Pred Value 0.9588 1.0 1.0000 0.9444
## Prevalence 0.3143 0.1 0.2214 0.3643
## Detection Rate 0.2857 0.1 0.2214 0.3286
## Detection Prevalence 0.3071 0.1 0.2357 0.3571
## Balanced Accuracy 0.9389 1.0 0.9908 0.9285
No comments:
Post a Comment