data1<-read.csv("file:///C:/Users/LENOVO/Desktop/MBAdata.csv")
summary and descriptive statistics
descriptive statistics
t-test
str(data1$Gender)
## Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 2 1 2 2 ...
t.test(data1$Percentage_in_10_Class, mu=80)
##
## One Sample t-test
##
## data: data1$Percentage_in_10_Class
## t = 6.5793, df = 272, p-value = 2.417e-10
## alternative hypothesis: true mean is not equal to 80
## 95 percent confidence interval:
## 82.32584 84.31211
## sample estimates:
## mean of x
## 83.31897
independent 2-group t-test
t.test(data1$Age_in_years_completed~data1$Gender)
##
## Welch Two Sample t-test
##
## data: data1$Age_in_years_completed by data1$Gender
## t = -2.0978, df = 246.26, p-value = 0.03694
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.83004766 -0.02616149
## sample estimates:
## mean in group Female mean in group Male
## 22.16667 22.59477
t.test(data1$Percentage_in_10_Class~data1$Gender)
##
## Welch Two Sample t-test
##
## data: data1$Percentage_in_10_Class by data1$Gender
## t = 3.5962, df = 266.85, p-value = 0.0003846
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.596708 5.460344
## sample estimates:
## mean in group Female mean in group Male
## 85.29650 81.76797
t.test(data1$Percentage_in_12_Class~data1$Gender)
##
## Welch Two Sample t-test
##
## data: data1$Percentage_in_12_Class by data1$Gender
## t = 4.7554, df = 264.43, p-value = 3.259e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.114591 7.516383
## sample estimates:
## mean in group Female mean in group Male
## 83.54483 78.22935
t.test(data1$Percentage_in_Under_Graduate~data1$Gender)
##
## Welch Two Sample t-test
##
## data: data1$Percentage_in_Under_Graduate by data1$Gender
## t = 5.0318, df = 246.04, p-value = 9.381e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.735661 6.254914
## sample estimates:
## mean in group Female mean in group Male
## 75.86800 71.37271
t.test(data1$Age_in_years_completed~data1$Gender, alternative=c("two.sided"))
##
## Welch Two Sample t-test
##
## data: data1$Age_in_years_completed by data1$Gender
## t = -2.0978, df = 246.26, p-value = 0.03694
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.83004766 -0.02616149
## sample estimates:
## mean in group Female mean in group Male
## 22.16667 22.59477
t.test(data1$Age_in_years_completed~data1$Gender, alternative=c("less"))
##
## Welch Two Sample t-test
##
## data: data1$Age_in_years_completed by data1$Gender
## t = -2.0978, df = 246.26, p-value = 0.01847
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.09117358
## sample estimates:
## mean in group Female mean in group Male
## 22.16667 22.59477
t.test(data1$Age_in_years_completed~data1$Gender, alternative=c("greater"))
##
## Welch Two Sample t-test
##
## data: data1$Age_in_years_completed by data1$Gender
## t = -2.0978, df = 246.26, p-value = 0.9815
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.7650356 Inf
## sample estimates:
## mean in group Female mean in group Male
## 22.16667 22.59477
independent 2-group t-test
t.test(y1,y2) # where y1 and y2 are numeric
t.test(data1$Percentage_in_10_Class,data1$Percentage_in_12_Class)
##
## Welch Two Sample t-test
##
## data: data1$Percentage_in_10_Class and data1$Percentage_in_12_Class
## t = 3.5745, df = 533.24, p-value = 0.0003827
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.240114 4.266186
## sample estimates:
## mean of x mean of y
## 83.31897 80.56582
t.test(data1$Percentage_in_10_Class,data1$Percentage_in_12_Class, var.equal = TRUE)
##
## Two Sample t-test
##
## data: data1$Percentage_in_10_Class and data1$Percentage_in_12_Class
## t = 3.5745, df = 544, p-value = 0.0003821
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.240182 4.266118
## sample estimates:
## mean of x mean of y
## 83.31897 80.56582
t.test(data1$Percentage_in_10_Class,data1$Percentage_in_12_Class, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: data1$Percentage_in_10_Class and data1$Percentage_in_12_Class
## t = 3.5745, df = 533.24, p-value = 0.0003827
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.240114 4.266186
## sample estimates:
## mean of x mean of y
## 83.31897 80.56582
t.test(data1$Percentage_in_10_Class,data1$Percentage_in_12_Class, conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: data1$Percentage_in_10_Class and data1$Percentage_in_12_Class
## t = 3.5745, df = 533.24, p-value = 0.0003827
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.240114 4.266186
## sample estimates:
## mean of x mean of y
## 83.31897 80.56582
paired t-test
t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric
t.test(data1$Percentage_in_10_Class,data1$Percentage_in_12_Class, paired = TRUE)
##
## Paired t-test
##
## data: data1$Percentage_in_10_Class and data1$Percentage_in_12_Class
## t = 4.845, df = 272, p-value = 2.129e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.634436 3.871864
## sample estimates:
## mean of the differences
## 2.75315
analysis of variance
library(psych)
describeBy(data1$Percentage_in_10_Class, data1$Previous_Degree)
##
## Descriptive statistics by group
## group: Arts
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 4 81.03 8.48 82.8 81.03 5.41 69.2 89.3 20.1 -0.44 -1.83
## se
## X1 4.24
## --------------------------------------------------------
## group: Commerce
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 101 81.28 8.79 81.7 81.52 9.93 61 96.4 35.4 -0.19 -0.85
## se
## X1 0.87
## --------------------------------------------------------
## group: Engineering
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 108 85.5 6.91 86.1 85.83 7.56 68 97.2 29.2 -0.45 -0.49
## se
## X1 0.67
## --------------------------------------------------------
## group: Journalism
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1 65 NA 65 65 0 65 65 0 NA NA NA
## --------------------------------------------------------
## group: Management
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 36 81.84 8.47 83 82.29 8.45 58.9 94 35.1 -0.59 -0.34
## se
## X1 1.41
## --------------------------------------------------------
## group: Science
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 23 85.53 9.16 88.6 87.09 6.23 56 95 39 -1.69 2.71
## se
## X1 1.91
describeBy(data1$Percentage_in_10_Class, data1$Previous_Degree, mat = TRUE)
## item group1 vars n mean sd median trimmed mad
## X11 1 Arts 1 4 81.02500 8.483071 82.8 81.02500 5.41149
## X12 2 Commerce 1 101 81.27931 8.787344 81.7 81.51617 9.93342
## X13 3 Engineering 1 108 85.50361 6.913155 86.1 85.82818 7.56126
## X14 4 Journalism 1 1 65.00000 NA 65.0 65.00000 0.00000
## X15 5 Management 1 36 81.83944 8.474893 83.0 82.29067 8.45082
## X16 6 Science 1 23 85.52870 9.164647 88.6 87.09263 6.22692
## min max range skew kurtosis se
## X11 69.2 89.3 20.1 -0.4377254 -1.8275607 4.2415357
## X12 61.0 96.4 35.4 -0.1926382 -0.8546568 0.8743734
## X13 68.0 97.2 29.2 -0.4537738 -0.4903248 0.6652186
## X14 65.0 65.0 0.0 NA NA NA
## X15 58.9 94.0 35.1 -0.5920874 -0.3430281 1.4124822
## X16 56.0 95.0 39.0 -1.6928706 2.7076566 1.9109610
describeBy(data1$Percentage_in_10_Class, data1$Previous_Degree, mat = FALSE)
##
## Descriptive statistics by group
## group: Arts
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 4 81.03 8.48 82.8 81.03 5.41 69.2 89.3 20.1 -0.44 -1.83
## se
## X1 4.24
## --------------------------------------------------------
## group: Commerce
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 101 81.28 8.79 81.7 81.52 9.93 61 96.4 35.4 -0.19 -0.85
## se
## X1 0.87
## --------------------------------------------------------
## group: Engineering
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 108 85.5 6.91 86.1 85.83 7.56 68 97.2 29.2 -0.45 -0.49
## se
## X1 0.67
## --------------------------------------------------------
## group: Journalism
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1 65 NA 65 65 0 65 65 0 NA NA NA
## --------------------------------------------------------
## group: Management
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 36 81.84 8.47 83 82.29 8.45 58.9 94 35.1 -0.59 -0.34
## se
## X1 1.41
## --------------------------------------------------------
## group: Science
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 23 85.53 9.16 88.6 87.09 6.23 56 95 39 -1.69 2.71
## se
## X1 1.91
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(data1, aes(data1$Previous_Degree,data1$Percentage_in_10_Class))+geom_boxplot()
ggplot(data1, aes(data1$Previous_Degree,data1$Percentage_in_10_Class))+geom_boxplot(aes(color=Previous_Degree))
anova1<-aov(Percentage_in_10_Class~Previous_Degree, data=data1)
anova1
## Call:
## aov(formula = Percentage_in_10_Class ~ Previous_Degree, data = data1)
##
## Terms:
## Previous_Degree Residuals
## Sum of Squares 1483.374 17412.972
## Deg. of Freedom 5 267
##
## Residual standard error: 8.075712
## Estimated effects may be unbalanced
print(anova1)
## Call:
## aov(formula = Percentage_in_10_Class ~ Previous_Degree, data = data1)
##
## Terms:
## Previous_Degree Residuals
## Sum of Squares 1483.374 17412.972
## Deg. of Freedom 5 267
##
## Residual standard error: 8.075712
## Estimated effects may be unbalanced
summary(anova1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Previous_Degree 5 1483 296.67 4.549 0.000535 ***
## Residuals 267 17413 65.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Percentage_in_10_Class ~ Previous_Degree, data = data1)
##
## $Previous_Degree
## diff lwr upr p adj
## Commerce-Arts 0.25430693 -11.563795 12.0724093 0.9999999
## Engineering-Arts 4.47861111 -7.324892 16.2821142 0.8854315
## Journalism-Arts -16.02500000 -41.942839 9.8928389 0.4837954
## Management-Arts 0.81444444 -11.403342 13.0322309 0.9999643
## Science-Arts 4.50369565 -8.054630 17.0620215 0.9077213
## Engineering-Commerce 4.22430418 1.015492 7.4331168 0.0026482
## Journalism-Commerce -16.27930693 -39.575405 7.0167907 0.3416660
## Management-Commerce 0.56013751 -3.939651 5.0599263 0.9992302
## Science-Commerce 4.24938872 -1.106481 9.6052586 0.2069561
## Journalism-Engineering -20.50361111 -43.792306 2.7850837 0.1199175
## Management-Engineering -3.66416667 -8.125471 0.7971381 0.1752232
## Science-Engineering 0.02508454 -5.298494 5.3486627 1.0000000
## Management-Journalism 16.83944444 -6.661937 40.3408258 0.3133739
## Science-Journalism 20.52869565 -3.151511 44.2089020 0.1311848
## Science-Management 3.68925121 -2.498810 9.8773126 0.5254607
Two way anova
anova2<-aov(Percentage_in_10_Class~Previous_Degree+Gender, data=data1)
anova2
Interaction Effect
anova3<-aov(Percentage_in_10_Class~Previous_Degree+Gender+Previous_Degree:Gender, data=data1)
anova3