Basics of R Session 5.2 - Missing values
Dr Manohar Kapse
rm(list=ls())
# Missing Values
X<-c(1,2,NA,4,5)
str(X)
## num [1:5] 1 2 NA 4 5
is.na(X)
## [1] FALSE FALSE TRUE FALSE FALSE
sum(is.na(X))
## [1] 1
which(is.na(X))
## [1] 3
rm(list=ls())
# Missing Values
X<-c(1,2,NA,4,5)
str(X)
## num [1:5] 1 2 NA 4 5
is.na(X)
## [1] FALSE FALSE TRUE FALSE FALSE
sum(is.na(X))
## [1] 1
which(is.na(X))
## [1] 3
Step by Step
is.na(X)
## [1] FALSE FALSE TRUE FALSE FALSE
X[is.na(X)]
## [1] NA
X[is.na(X)]<-999
X
## [1] 1 2 999 4 5
Change to some specific statement or string
X[is.na(X)]<-"missing values"
str(X) # converted all to string
## chr [1:5] "1" "2" "999" "4" "5"
Imputig with mean
X<-c(1,2,NA,4,5)
X[is.na(X)]<-mean(X)
X # no output, since missing value was there
## [1] 1 2 NA 4 5
X[is.na(X)]<-mean(X, na.rm = TRUE)
X
## [1] 1 2 3 4 5
Omiting the Missing values
X<-c(1,2,NA,4,5)
X<-na.omit(X)
X
## [1] 1 2 4 5
## attr(,"na.action")
## [1] 3
## attr(,"class")
## [1] "omit"
is.na(X)
## [1] FALSE FALSE TRUE FALSE FALSE
X[is.na(X)]
## [1] NA
X[is.na(X)]<-999
X
## [1] 1 2 999 4 5
Change to some specific statement or string
X[is.na(X)]<-"missing values"
str(X) # converted all to string
## chr [1:5] "1" "2" "999" "4" "5"
Imputig with mean
X<-c(1,2,NA,4,5)
X[is.na(X)]<-mean(X)
X # no output, since missing value was there
## [1] 1 2 NA 4 5
X[is.na(X)]<-mean(X, na.rm = TRUE)
X
## [1] 1 2 3 4 5
Omiting the Missing values
X<-c(1,2,NA,4,5)
X<-na.omit(X)
X
## [1] 1 2 4 5
## attr(,"na.action")
## [1] 3
## attr(,"class")
## [1] "omit"
use hsurvey data set
data1<-read.csv("file:///C:/Users/LENOVO/Desktop/health Survey.csv")
# to check missing values in a file
summary(data1)
## Case.Identification Current.age...respondent Age.in.5.year.groups
## 1 1 2 2: 1 Min. :15.00 15-19:23955
## 1 1 3 2: 1 1st Qu.:21.00 20-24:22807
## 1 1 5 5: 1 Median :28.00 25-29:20653
## 1 1 6 2: 1 Mean :29.16 30-34:17867
## 1 1 7 2: 1 3rd Qu.:37.00 35-39:16158
## 1 1 7 3: 1 Max. :49.00 40-44:13138
## (Other) :124379 45-49: 9807
## State Type.of.place.of.residence
## [UP] Uttar Pradesh :12183 Rural:67424
## [MH] Maharashtra : 9034 Urban:56961
## [AP] Andhra Pradesh: 7128
## [WB] West Bengal : 6794
## [MP] Madhya Pradesh: 6427
## [KA] Karnataka : 6008
## (Other) :76811
## State.1 Type.of.place.of.residence.1
## [UP] Uttar Pradesh :12183 Rural:67424
## [MH] Maharashtra : 9034 Urban:56961
## [AP] Andhra Pradesh: 7128
## [WB] West Bengal : 6794
## [MP] Madhya Pradesh: 6427
## [KA] Karnataka : 6008
## (Other) :76811
## Highest.educational.level Highest.year.of.education
## 9 : 12 Min. : 0.00
## Higher :12966 1st Qu.: 3.00
## No education:39769 Median : 4.00
## Primary :17756 Mean : 3.95
## Secondary :53882 3rd Qu.: 5.00
## Max. :11.00
## NA's :39781
## Household.has..electricity Household.has..motorcycle.scooter
## 9 : 23 9 : 49
## No :23070 No :90934
## Not a dejure resident: 5528 Not a dejure resident: 5528
## Yes :95764 Yes :27874
##
##
##
## Household.has..car Religion
## 9 : 55 Hindu :89957
## No :112490 Muslim :16742
## Not a dejure resident: 5528 Christian :10977
## Yes : 6312 Sikh : 2772
## Buddhist/Neo-Buddhist: 1765
## other : 1032
## (Other) : 1140
## Number.of.household.members..listed. Educational.attainment
## Min. : 1.000 9 : 12
## 1st Qu.: 4.000 Complete primary : 8039
## Median : 5.000 Complete secondary : 7611
## Mean : 6.007 Higher :12966
## 3rd Qu.: 7.000 Incomplete primary : 9717
## Max. :35.000 Incomplete secondary:46271
## No education :39769
## Has.telephone Literacy
## 9 : 45 9 : 107
## No :93489 Able to read only parts of sentence: 6082
## Not de jure resident: 5528 Able to read whole sentence :73628
## Yes :25323 Blind/visually impaired : 130
## Cannot read at all :43901
## No card with required language : 537
##
## Wealth.index Wealth.index.factor.score..5.decimals.
## Middle :23682 Min. :-175304
## Poorer :17652 1st Qu.: -76482
## Poorest:14077 Median : 1737
## Richer :30136 Mean : 7403
## Richest:38838 3rd Qu.: 88900
## Max. : 236620
##
## Total.children.ever.born Sons.at.home Daughters.at.home
## Min. : 0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 2.000 Median :1.0000 Median :0.0000
## Mean : 2.064 Mean :0.8668 Mean :0.7289
## 3rd Qu.: 3.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :16.000 Max. :9.0000 Max. :8.0000
##
## Sons.elsewhere Daughters.elsewhere
## Min. :0.00000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.00000 Median :0.000
## Mean :0.09649 Mean :0.165
## 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :7.00000 Max. :6.000
##
## Marital.status
## Currently married :87925
## Deserted : 443
## Divorced : 504
## Married, but Gauna not performed: 470
## Never married :30191
## Separated : 931
## Widowed : 3921
descr::descr(data1)
##
## Case.Identification
## 1 1 2 2 1 1 3 2 1 1 5 5 1 1 6 2
## 1 1 1 1
## 1 1 7 2 1 1 7 3 1 1 7 5 1 1 7 9
## 1 1 1 1
## 1 1 8 4 1 1 9 2 1 1 10 2 1 1 10 5
## 1 1 1 1
## 1 1 11 2 1 1 12 4 1 1 14 2 1 1 14 3
## 1 1 1 1
## 1 1 15 2 1 1 18 1 1 1 18 3 1 1 19 2
## 1 1 1 1
## 1 1 21 2 1 1 22 3 1 1 23 2 1 1 25 2
## 1 1 1 1
## 1 1 25 3 1 1 26 3 1 1 27 3 1 1 27 5
## 1 1 1 1
## 1 1 27 6 1 1 29 4 1 1 29 11 1 1 30 1
## 1 1 1 1
## 1 1 30 2 1 1 32 2 1 1 32 5 1 1 33 2
## 1 1 1 1
## 1 1 33 3 1 1 34 4 1 1 34 5 1 1 34 6
## 1 1 1 1
## 1 1 34 7 1 1 34 9 1 1 35 2 1 1 36 2
## 1 1 1 1
## 1 1 39 4 1 1 40 2 1 1 41 2 1 1 41 3
## 1 1 1 1
## 1 1 43 2 1 1 44 2 1 1 44 6 1 1 45 2
## 1 1 1 1
## 1 1 45 4 1 1 46 3 1 1 46 6 1 1 48 2
## 1 1 1 1
## 1 1 49 2 1 1 49 5 1 1 50 2 1 1 51 2
## 1 1 1 1
## 1 1 52 2 1 1 55 3 1 1 55 4 1 1 56 7
## 1 1 1 1
## 1 2 1 2 1 2 3 2 1 2 3 4 1 2 4 2
## 1 1 1 1
## 1 2 4 7 1 2 5 2 1 2 5 4 1 2 5 5
## 1 1 1 1
## 1 2 6 2 1 2 6 4 1 2 6 6 1 2 7 4
## 1 1 1 1
## 1 2 8 2 1 2 8 4 1 2 8 7 1 2 9 4
## 1 1 1 1
## 1 2 9 8 1 2 10 4 1 2 10 5 1 2 11 2
## 1 1 1 1
## 1 2 12 2 1 2 12 3 1 2 12 4 1 2 13 2
## 1 1 1 1
## 1 2 13 6 1 2 14 2 1 2 15 7 1 3 1 4
## 1 1 1 1
## 1 3 1 5 1 3 2 2 1 3 3 3 1 3 3 4
## 1 1 1 1
## 1 3 4 3 1 3 6 2 1 3 9 4 (Other)
## 1 1 1 124286
##
## Current.age...respondent
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 21.00 28.00 29.16 37.00 49.00
##
## Age.in.5.year.groups
## 15-19 20-24 25-29 30-34 35-39 40-44 45-49
## 23955 22807 20653 17867 16158 13138 9807
##
## State
## [AP] Andhra Pradesh [AR] Arunachal Pradesh [AS] Assam
## 7128 1647 3840
## [BH] Bihar [CH] Chhattisgarh [DL] Delhi
## 3818 3810 3349
## [GJ] Gujarat [GO] Goa [HP] Himachal Pradesh
## 3729 3464 3193
## [HR] Haryana [JH] Jharkhand [JM] Jammu and Kashmir
## 2790 2983 3281
## [KA] Karnataka [KE] Kerala [MG] Meghalaya
## 6008 3566 2124
## [MH] Maharashtra [MN] Manipur [MP] Madhya Pradesh
## 9034 4512 6427
## [MZ] Mizoram [NA] Nagaland [OR] Orissa
## 1791 3896 4540
## [PJ] Punjab [RJ] Rajasthan [SK] Sikkim
## 3681 3892 2127
## [TN] Tamil Nadu [TR] Tripura [UC] Uttaranchal
## 5919 1906 2953
## [UP] Uttar Pradesh [WB] West Bengal
## 12183 6794
##
## Type.of.place.of.residence
## Rural Urban
## 67424 56961
##
## State.1
## [AP] Andhra Pradesh [AR] Arunachal Pradesh [AS] Assam
## 7128 1647 3840
## [BH] Bihar [CH] Chhatisgarh [DL] Delhi
## 3818 3810 3349
## [GJ] Gujarat [GO] Goa [HP] Himachal Pradesh
## 3729 3464 3193
## [HR] Haryana [JH] Jharkhand [JM] Jammu and Kashmir
## 2790 2983 3281
## [KA] Karnataka [KE] Kerala [MG] Meghalaya
## 6008 3566 2124
## [MH] Maharashtra [MN] Manipur [MP] Madhya Pradesh
## 9034 4512 6427
## [MZ] Mizoram [NA] Nagaland [OR] Orissa
## 1791 3896 4540
## [PJ] Punjab [RJ] Rajasthan [SK] Sikkim
## 3681 3892 2127
## [TN] Tamil Nadu [TR] Tripura [UC] Uttaranchal
## 5919 1906 2953
## [UP] Uttar Pradesh [WB] West Bengal
## 12183 6794
##
## Type.of.place.of.residence.1
## Rural Urban
## 67424 56961
##
## Highest.educational.level
## 9 Higher No education Primary Secondary
## 12 12966 39769 17756 53882
##
## Highest.year.of.education
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 3.00 4.00 3.95 5.00 11.00 39781
##
## Household.has..electricity
## 9 No Not a dejure resident
## 23 23070 5528
## Yes
## 95764
##
## Household.has..motorcycle.scooter
## 9 No Not a dejure resident
## 49 90934 5528
## Yes
## 27874
##
## Household.has..car
## 9 No Not a dejure resident
## 55 112490 5528
## Yes
## 6312
##
## Religion
## 99 Buddhist/Neo-Buddhist Christian
## 158 1765 10977
## Donyi polo Hindu Jain
## 385 89957 539
## Jewish Muslim No religion
## 11 16742 44
## other Parsi/Zoroastrian Sikh
## 1032 3 2772
##
## Number.of.household.members..listed.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 5.000 6.007 7.000 35.000
##
## Educational.attainment
## 9 Complete primary Complete secondary
## 12 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
##
## Has.telephone
## 9 No Not de jure resident
## 45 93489 5528
## Yes
## 25323
##
## Literacy
## 9 Able to read only parts of sentence
## 107 6082
## Able to read whole sentence Blind/visually impaired
## 73628 130
## Cannot read at all No card with required language
## 43901 537
##
## Wealth.index
## Middle Poorer Poorest Richer Richest
## 23682 17652 14077 30136 38838
##
## Wealth.index.factor.score..5.decimals.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -175304 -76482 1737 7403 88900 236620
##
## Total.children.ever.born
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.000 2.064 3.000 16.000
##
## Sons.at.home
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 1.0000 0.8668 1.0000 9.0000
##
## Daughters.at.home
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.7289 1.0000 8.0000
##
## Sons.elsewhere
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.09649 0.00000 7.00000
##
## Daughters.elsewhere
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.165 0.000 6.000
##
## Marital.status
## Currently married Deserted
## 87925 443
## Divorced Married, but Gauna not performed
## 504 470
## Never married Separated
## 30191 931
## Widowed
## 3921
#------------#
# is.na(data1) # not so usefull
# is.na(data1$Current.age...respondent) # not so usefull
# if we find the total number of missing then use full
sum(is.na(data1))
## [1] 39781
sum(is.na(data1$Current.age...respondent))
## [1] 0
sum(is.na(data1$Type.of.place.of.residence))
## [1] 0
sum(is.na(data1$Highest.year.of.education))
## [1] 39781
data1<-read.csv("file:///C:/Users/LENOVO/Desktop/health Survey.csv")
# to check missing values in a file
summary(data1)
## Case.Identification Current.age...respondent Age.in.5.year.groups
## 1 1 2 2: 1 Min. :15.00 15-19:23955
## 1 1 3 2: 1 1st Qu.:21.00 20-24:22807
## 1 1 5 5: 1 Median :28.00 25-29:20653
## 1 1 6 2: 1 Mean :29.16 30-34:17867
## 1 1 7 2: 1 3rd Qu.:37.00 35-39:16158
## 1 1 7 3: 1 Max. :49.00 40-44:13138
## (Other) :124379 45-49: 9807
## State Type.of.place.of.residence
## [UP] Uttar Pradesh :12183 Rural:67424
## [MH] Maharashtra : 9034 Urban:56961
## [AP] Andhra Pradesh: 7128
## [WB] West Bengal : 6794
## [MP] Madhya Pradesh: 6427
## [KA] Karnataka : 6008
## (Other) :76811
## State.1 Type.of.place.of.residence.1
## [UP] Uttar Pradesh :12183 Rural:67424
## [MH] Maharashtra : 9034 Urban:56961
## [AP] Andhra Pradesh: 7128
## [WB] West Bengal : 6794
## [MP] Madhya Pradesh: 6427
## [KA] Karnataka : 6008
## (Other) :76811
## Highest.educational.level Highest.year.of.education
## 9 : 12 Min. : 0.00
## Higher :12966 1st Qu.: 3.00
## No education:39769 Median : 4.00
## Primary :17756 Mean : 3.95
## Secondary :53882 3rd Qu.: 5.00
## Max. :11.00
## NA's :39781
## Household.has..electricity Household.has..motorcycle.scooter
## 9 : 23 9 : 49
## No :23070 No :90934
## Not a dejure resident: 5528 Not a dejure resident: 5528
## Yes :95764 Yes :27874
##
##
##
## Household.has..car Religion
## 9 : 55 Hindu :89957
## No :112490 Muslim :16742
## Not a dejure resident: 5528 Christian :10977
## Yes : 6312 Sikh : 2772
## Buddhist/Neo-Buddhist: 1765
## other : 1032
## (Other) : 1140
## Number.of.household.members..listed. Educational.attainment
## Min. : 1.000 9 : 12
## 1st Qu.: 4.000 Complete primary : 8039
## Median : 5.000 Complete secondary : 7611
## Mean : 6.007 Higher :12966
## 3rd Qu.: 7.000 Incomplete primary : 9717
## Max. :35.000 Incomplete secondary:46271
## No education :39769
## Has.telephone Literacy
## 9 : 45 9 : 107
## No :93489 Able to read only parts of sentence: 6082
## Not de jure resident: 5528 Able to read whole sentence :73628
## Yes :25323 Blind/visually impaired : 130
## Cannot read at all :43901
## No card with required language : 537
##
## Wealth.index Wealth.index.factor.score..5.decimals.
## Middle :23682 Min. :-175304
## Poorer :17652 1st Qu.: -76482
## Poorest:14077 Median : 1737
## Richer :30136 Mean : 7403
## Richest:38838 3rd Qu.: 88900
## Max. : 236620
##
## Total.children.ever.born Sons.at.home Daughters.at.home
## Min. : 0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 2.000 Median :1.0000 Median :0.0000
## Mean : 2.064 Mean :0.8668 Mean :0.7289
## 3rd Qu.: 3.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :16.000 Max. :9.0000 Max. :8.0000
##
## Sons.elsewhere Daughters.elsewhere
## Min. :0.00000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.00000 Median :0.000
## Mean :0.09649 Mean :0.165
## 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :7.00000 Max. :6.000
##
## Marital.status
## Currently married :87925
## Deserted : 443
## Divorced : 504
## Married, but Gauna not performed: 470
## Never married :30191
## Separated : 931
## Widowed : 3921
descr::descr(data1)
##
## Case.Identification
## 1 1 2 2 1 1 3 2 1 1 5 5 1 1 6 2
## 1 1 1 1
## 1 1 7 2 1 1 7 3 1 1 7 5 1 1 7 9
## 1 1 1 1
## 1 1 8 4 1 1 9 2 1 1 10 2 1 1 10 5
## 1 1 1 1
## 1 1 11 2 1 1 12 4 1 1 14 2 1 1 14 3
## 1 1 1 1
## 1 1 15 2 1 1 18 1 1 1 18 3 1 1 19 2
## 1 1 1 1
## 1 1 21 2 1 1 22 3 1 1 23 2 1 1 25 2
## 1 1 1 1
## 1 1 25 3 1 1 26 3 1 1 27 3 1 1 27 5
## 1 1 1 1
## 1 1 27 6 1 1 29 4 1 1 29 11 1 1 30 1
## 1 1 1 1
## 1 1 30 2 1 1 32 2 1 1 32 5 1 1 33 2
## 1 1 1 1
## 1 1 33 3 1 1 34 4 1 1 34 5 1 1 34 6
## 1 1 1 1
## 1 1 34 7 1 1 34 9 1 1 35 2 1 1 36 2
## 1 1 1 1
## 1 1 39 4 1 1 40 2 1 1 41 2 1 1 41 3
## 1 1 1 1
## 1 1 43 2 1 1 44 2 1 1 44 6 1 1 45 2
## 1 1 1 1
## 1 1 45 4 1 1 46 3 1 1 46 6 1 1 48 2
## 1 1 1 1
## 1 1 49 2 1 1 49 5 1 1 50 2 1 1 51 2
## 1 1 1 1
## 1 1 52 2 1 1 55 3 1 1 55 4 1 1 56 7
## 1 1 1 1
## 1 2 1 2 1 2 3 2 1 2 3 4 1 2 4 2
## 1 1 1 1
## 1 2 4 7 1 2 5 2 1 2 5 4 1 2 5 5
## 1 1 1 1
## 1 2 6 2 1 2 6 4 1 2 6 6 1 2 7 4
## 1 1 1 1
## 1 2 8 2 1 2 8 4 1 2 8 7 1 2 9 4
## 1 1 1 1
## 1 2 9 8 1 2 10 4 1 2 10 5 1 2 11 2
## 1 1 1 1
## 1 2 12 2 1 2 12 3 1 2 12 4 1 2 13 2
## 1 1 1 1
## 1 2 13 6 1 2 14 2 1 2 15 7 1 3 1 4
## 1 1 1 1
## 1 3 1 5 1 3 2 2 1 3 3 3 1 3 3 4
## 1 1 1 1
## 1 3 4 3 1 3 6 2 1 3 9 4 (Other)
## 1 1 1 124286
##
## Current.age...respondent
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 21.00 28.00 29.16 37.00 49.00
##
## Age.in.5.year.groups
## 15-19 20-24 25-29 30-34 35-39 40-44 45-49
## 23955 22807 20653 17867 16158 13138 9807
##
## State
## [AP] Andhra Pradesh [AR] Arunachal Pradesh [AS] Assam
## 7128 1647 3840
## [BH] Bihar [CH] Chhattisgarh [DL] Delhi
## 3818 3810 3349
## [GJ] Gujarat [GO] Goa [HP] Himachal Pradesh
## 3729 3464 3193
## [HR] Haryana [JH] Jharkhand [JM] Jammu and Kashmir
## 2790 2983 3281
## [KA] Karnataka [KE] Kerala [MG] Meghalaya
## 6008 3566 2124
## [MH] Maharashtra [MN] Manipur [MP] Madhya Pradesh
## 9034 4512 6427
## [MZ] Mizoram [NA] Nagaland [OR] Orissa
## 1791 3896 4540
## [PJ] Punjab [RJ] Rajasthan [SK] Sikkim
## 3681 3892 2127
## [TN] Tamil Nadu [TR] Tripura [UC] Uttaranchal
## 5919 1906 2953
## [UP] Uttar Pradesh [WB] West Bengal
## 12183 6794
##
## Type.of.place.of.residence
## Rural Urban
## 67424 56961
##
## State.1
## [AP] Andhra Pradesh [AR] Arunachal Pradesh [AS] Assam
## 7128 1647 3840
## [BH] Bihar [CH] Chhatisgarh [DL] Delhi
## 3818 3810 3349
## [GJ] Gujarat [GO] Goa [HP] Himachal Pradesh
## 3729 3464 3193
## [HR] Haryana [JH] Jharkhand [JM] Jammu and Kashmir
## 2790 2983 3281
## [KA] Karnataka [KE] Kerala [MG] Meghalaya
## 6008 3566 2124
## [MH] Maharashtra [MN] Manipur [MP] Madhya Pradesh
## 9034 4512 6427
## [MZ] Mizoram [NA] Nagaland [OR] Orissa
## 1791 3896 4540
## [PJ] Punjab [RJ] Rajasthan [SK] Sikkim
## 3681 3892 2127
## [TN] Tamil Nadu [TR] Tripura [UC] Uttaranchal
## 5919 1906 2953
## [UP] Uttar Pradesh [WB] West Bengal
## 12183 6794
##
## Type.of.place.of.residence.1
## Rural Urban
## 67424 56961
##
## Highest.educational.level
## 9 Higher No education Primary Secondary
## 12 12966 39769 17756 53882
##
## Highest.year.of.education
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 3.00 4.00 3.95 5.00 11.00 39781
##
## Household.has..electricity
## 9 No Not a dejure resident
## 23 23070 5528
## Yes
## 95764
##
## Household.has..motorcycle.scooter
## 9 No Not a dejure resident
## 49 90934 5528
## Yes
## 27874
##
## Household.has..car
## 9 No Not a dejure resident
## 55 112490 5528
## Yes
## 6312
##
## Religion
## 99 Buddhist/Neo-Buddhist Christian
## 158 1765 10977
## Donyi polo Hindu Jain
## 385 89957 539
## Jewish Muslim No religion
## 11 16742 44
## other Parsi/Zoroastrian Sikh
## 1032 3 2772
##
## Number.of.household.members..listed.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 5.000 6.007 7.000 35.000
##
## Educational.attainment
## 9 Complete primary Complete secondary
## 12 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
##
## Has.telephone
## 9 No Not de jure resident
## 45 93489 5528
## Yes
## 25323
##
## Literacy
## 9 Able to read only parts of sentence
## 107 6082
## Able to read whole sentence Blind/visually impaired
## 73628 130
## Cannot read at all No card with required language
## 43901 537
##
## Wealth.index
## Middle Poorer Poorest Richer Richest
## 23682 17652 14077 30136 38838
##
## Wealth.index.factor.score..5.decimals.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -175304 -76482 1737 7403 88900 236620
##
## Total.children.ever.born
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.000 2.064 3.000 16.000
##
## Sons.at.home
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 1.0000 0.8668 1.0000 9.0000
##
## Daughters.at.home
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.7289 1.0000 8.0000
##
## Sons.elsewhere
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.09649 0.00000 7.00000
##
## Daughters.elsewhere
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.165 0.000 6.000
##
## Marital.status
## Currently married Deserted
## 87925 443
## Divorced Married, but Gauna not performed
## 504 470
## Never married Separated
## 30191 931
## Widowed
## 3921
#------------#
# is.na(data1) # not so usefull
# is.na(data1$Current.age...respondent) # not so usefull
# if we find the total number of missing then use full
sum(is.na(data1))
## [1] 39781
sum(is.na(data1$Current.age...respondent))
## [1] 0
sum(is.na(data1$Type.of.place.of.residence))
## [1] 0
sum(is.na(data1$Highest.year.of.education))
## [1] 39781
location of the missing values
#which(is.na(data1$Highest.year.of.education))
# recode missing values string or number; in categorical data string is better, for scale number is better
table(data1$Educational.attainment)
##
## 9 Complete primary Complete secondary
## 12 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
data1$Highest.year.of.education[is.na(data1$Highest.year.of.education)]<-"missing vales"
table(data1$Educational.attainment)
##
## 9 Complete primary Complete secondary
## 12 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
# replace 9 which is coded as missing to NA
data1$Educational.attainment[data1$Educational.attainment==9]<-NA
table(data1$Educational.attainment)
##
## 9 Complete primary Complete secondary
## 0 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
#fix(data1)
sum(is.na(data1$Educational.attainment))
## [1] 12
#which(is.na(data1$Highest.year.of.education))
# recode missing values string or number; in categorical data string is better, for scale number is better
table(data1$Educational.attainment)
##
## 9 Complete primary Complete secondary
## 12 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
data1$Highest.year.of.education[is.na(data1$Highest.year.of.education)]<-"missing vales"
table(data1$Educational.attainment)
##
## 9 Complete primary Complete secondary
## 12 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
# replace 9 which is coded as missing to NA
data1$Educational.attainment[data1$Educational.attainment==9]<-NA
table(data1$Educational.attainment)
##
## 9 Complete primary Complete secondary
## 0 8039 7611
## Higher Incomplete primary Incomplete secondary
## 12966 9717 46271
## No education
## 39769
#fix(data1)
sum(is.na(data1$Educational.attainment))
## [1] 12