Saturday, June 20, 2020

Basics of R Session 5.5- Outliers

options(scipen=999) # to turn off scientific notation
data1<-read.csv("file:///C:/Users/LENOVO/Desktop/Missing/outliers sumeer.csv")

#-----------------------#
library(outliers)

library(car)
boxplot(data1$AGE)

#convert the data into standard form
data1$HB_scale<-scale(data1$HB, center = TRUE, scale = TRUE)

summary(data1$HB_scale)
plot(data1$HB_scale)

data1$HB_scale
which(data1$HB_scale>=1.96)
which(data1$HB_scale<=-1.96)

#chi square test
chisq.out.test(data1$HB)
outlier(data1$HB)

#inter quartile range

IQR(data1$HB)
quantile(data1$HB)

score1<-scores(data1$HB, type = "chisq", prob = 0.95)
score1<-scores(data1$HB, type = "z", prob = 0.95)
score1<-scores(data1$HB, type = "t", prob = 0.95)

# mahalobanis distance
library(psych)
# reducing the data using only those variables whcih are required
data2<-data1[,c(2:3)]
outlier(data2, plot = TRUE)

# regression or residual to find outliers

reg1<-lm(AGE~HB+TLC+PLATELET, data = data1)
plot(reg1)

No comments:

Post a Comment