Monday, June 11, 2018

Basics of R- session 5- data Exploration part 2

# import the files sumeer from the Location of the data set, D:/1 Teaching Material/R/importfile/sumeer.csv

sumeer<-read.csv("D:/1 Teaching Material/R/importfile/sumeer.csv")

# fix(sumeer)

#--------------------------------------#

#give names to the variables

#serial number to aa and gender to sex

names(sumeer)[1]<-"SNO"
str(sumeer)
## 'data.frame':    50 obs. of  45 variables:
##  $ SNO              : int  375332 375326 372830 375704 368114 368069 365788 380816 380730 380536 ...
##  $ AGE              : int  23 49 27 54 49 75 20 20 58 75 ...
##  $ SEX              : int  0 0 0 1 0 1 0 1 1 0 ...
##  $ DM               : int  0 1 0 1 1 1 0 0 1 1 ...
##  $ HTN              : int  0 1 0 1 1 1 0 0 0 1 ...
##  $ CKD              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SEPSIS           : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ SEVERE_SEPSIS    : int  0 0 1 1 1 0 0 0 0 1 ...
##  $ SEPTIC_SHOCK     : int  1 1 0 0 0 1 0 1 1 0 ...
##  $ HB               : num  12.8 11.2 11.6 11 11.5 7.6 11.7 4 6.2 12.5 ...
##  $ TLC              : int  8600 19100 12000 19800 17900 5400 30200 12000 31100 16900 ...
##  $ PLATELET         : int  30000 41000 150000 460000 320000 25000 270000 60000 31000 290000 ...
##  $ pH               : num  7.26 7.24 7.34 7.34 7.24 7.28 7.35 7.34 7.16 7.21 ...
##  $ HCO3             : num  18 16 16 18 16 18 22 12.9 18 22.8 ...
##  $ Na               : int  132 140 140 132 130 132 132 130 140 130 ...
##  $ K                : num  3.6 3.8 35 3.8 3.8 3.6 3.6 5 5.4 4 ...
##  $ CREATININE       : num  2.04 2.16 0.74 1.42 5.3 1.53 0.84 2.25 2.9 1.21 ...
##  $ TOTAL_BILIRUBIN  : num  6.5 0.5 0.5 0.65 6.8 0.5 0.5 2.5 0.5 0.5 ...
##  $ DIRECT_BILIRUBIN : num  4.5 0.25 0.25 0.35 4.6 0.25 0.25 1.25 0.25 0.25 ...
##  $ SGOT             : int  101 128 22 126 7 88 22 101 39 42 ...
##  $ SGPT             : int  68 134 18 171 14 39 15 120 32 41 ...
##  $ MP               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ DENGUE           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PNEUMONIA_PROFILE: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AMYLASE          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PTINR            : num  1.8 2 1 2 1 1.3 1 1.8 1.5 1.2 ...
##  $ ALBUMIN          : num  3.2 2.8 3 2.8 2.4 2.8 3.4 3 2.8 3 ...
##  $ FIBRINOGEN       : int  180 240 160 180 240 190 200 210 220 160 ...
##  $ NIV_DAYS         : int  3 0 1 0 2 2 0 1 0 2 ...
##  $ MV_DAYS          : int  0 1 0 0 4 0 0 4 1 0 ...
##  $ INOTROPE         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SOFA             : int  11 11 1 2 9 7 1 12 11 3 ...
##  $ Number           : int  3 3 1 1 2 1 1 3 2 1 ...
##  $ lung             : int  1 1 1 0 0 1 0 1 1 1 ...
##  $ kidney           : int  1 1 0 0 1 0 1 1 1 0 ...
##  $ liver            : int  1 1 0 1 1 0 0 1 0 0 ...
##  $ STAY_IN_ICU_HDU  : int  4 1 1 1 5 3 1 5 1 3 ...
##  $ STAY_IN_WARD     : int  5 3 5 3 7 5 4 3 3 3 ...
##  $ bacterial        : int  0 1 1 0 1 1 1 1 1 1 ...
##  $ viral            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ parasitic        : int  1 0 0 1 0 0 0 0 0 0 ...
##  $ fungal           : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ PCT              : int  2 4 3 4 4 4 2 4 4 2 ...
##  $ Mortality        : int  2 3 2 2 2 2 2 3 3 2 ...
##  $ Age_group        : int  1 3 1 3 3 4 1 1 3 4 ...
names(sumeer)[3]<-"Gender"
str(sumeer)
## 'data.frame':    50 obs. of  45 variables:
##  $ SNO              : int  375332 375326 372830 375704 368114 368069 365788 380816 380730 380536 ...
##  $ AGE              : int  23 49 27 54 49 75 20 20 58 75 ...
##  $ Gender           : int  0 0 0 1 0 1 0 1 1 0 ...
##  $ DM               : int  0 1 0 1 1 1 0 0 1 1 ...
##  $ HTN              : int  0 1 0 1 1 1 0 0 0 1 ...
##  $ CKD              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SEPSIS           : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ SEVERE_SEPSIS    : int  0 0 1 1 1 0 0 0 0 1 ...
##  $ SEPTIC_SHOCK     : int  1 1 0 0 0 1 0 1 1 0 ...
##  $ HB               : num  12.8 11.2 11.6 11 11.5 7.6 11.7 4 6.2 12.5 ...
##  $ TLC              : int  8600 19100 12000 19800 17900 5400 30200 12000 31100 16900 ...
##  $ PLATELET         : int  30000 41000 150000 460000 320000 25000 270000 60000 31000 290000 ...
##  $ pH               : num  7.26 7.24 7.34 7.34 7.24 7.28 7.35 7.34 7.16 7.21 ...
##  $ HCO3             : num  18 16 16 18 16 18 22 12.9 18 22.8 ...
##  $ Na               : int  132 140 140 132 130 132 132 130 140 130 ...
##  $ K                : num  3.6 3.8 35 3.8 3.8 3.6 3.6 5 5.4 4 ...
##  $ CREATININE       : num  2.04 2.16 0.74 1.42 5.3 1.53 0.84 2.25 2.9 1.21 ...
##  $ TOTAL_BILIRUBIN  : num  6.5 0.5 0.5 0.65 6.8 0.5 0.5 2.5 0.5 0.5 ...
##  $ DIRECT_BILIRUBIN : num  4.5 0.25 0.25 0.35 4.6 0.25 0.25 1.25 0.25 0.25 ...
##  $ SGOT             : int  101 128 22 126 7 88 22 101 39 42 ...
##  $ SGPT             : int  68 134 18 171 14 39 15 120 32 41 ...
##  $ MP               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ DENGUE           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PNEUMONIA_PROFILE: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AMYLASE          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PTINR            : num  1.8 2 1 2 1 1.3 1 1.8 1.5 1.2 ...
##  $ ALBUMIN          : num  3.2 2.8 3 2.8 2.4 2.8 3.4 3 2.8 3 ...
##  $ FIBRINOGEN       : int  180 240 160 180 240 190 200 210 220 160 ...
##  $ NIV_DAYS         : int  3 0 1 0 2 2 0 1 0 2 ...
##  $ MV_DAYS          : int  0 1 0 0 4 0 0 4 1 0 ...
##  $ INOTROPE         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SOFA             : int  11 11 1 2 9 7 1 12 11 3 ...
##  $ Number           : int  3 3 1 1 2 1 1 3 2 1 ...
##  $ lung             : int  1 1 1 0 0 1 0 1 1 1 ...
##  $ kidney           : int  1 1 0 0 1 0 1 1 1 0 ...
##  $ liver            : int  1 1 0 1 1 0 0 1 0 0 ...
##  $ STAY_IN_ICU_HDU  : int  4 1 1 1 5 3 1 5 1 3 ...
##  $ STAY_IN_WARD     : int  5 3 5 3 7 5 4 3 3 3 ...
##  $ bacterial        : int  0 1 1 0 1 1 1 1 1 1 ...
##  $ viral            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ parasitic        : int  1 0 0 1 0 0 0 0 0 0 ...
##  $ fungal           : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ PCT              : int  2 4 3 4 4 4 2 4 4 2 ...
##  $ Mortality        : int  2 3 2 2 2 2 2 3 3 2 ...
##  $ Age_group        : int  1 3 1 3 3 4 1 1 3 4 ...
# labelling the categories of the variable gender - male and  female as 0  and 1

sumeer$Gender<-factor(sumeer$Gender, labels = c("Male", "Female"))
str(sumeer$Gender)
##  Factor w/ 2 levels "Male","Female": 1 1 1 2 1 2 1 2 2 1 ...
# DM- Diabeties, presence and absence

sumeer$DM<-factor(sumeer$DM, labels = c("presence", "absence"))
str(sumeer$DM)
##  Factor w/ 2 levels "presence","absence": 1 2 1 2 2 2 1 1 2 2 ...
# Mortality ,  death survive

sumeer$Mortality<-factor(sumeer$Mortality, labels = c("Death", "Survice")                      )

str(sumeer$Mortality)
##  Factor w/ 2 levels "Death","Survice": 1 2 1 1 1 1 1 2 2 1 ...

exploration of the data set

summary(sumeer)
##       SNO              AGE           Gender          DM          HTN     
##  Min.   :365788   Min.   :18.00   Male  :22   presence:21   Min.   :0.0  
##  1st Qu.:376592   1st Qu.:34.25   Female:28   absence :29   1st Qu.:0.0  
##  Median :380860   Median :51.00                             Median :0.5  
##  Mean   :381339   Mean   :50.34                             Mean   :0.5  
##  3rd Qu.:387528   3rd Qu.:65.00                             3rd Qu.:1.0  
##  Max.   :391088   Max.   :86.00                             Max.   :1.0  
##                                                                          
##       CKD          SEPSIS     SEVERE_SEPSIS   SEPTIC_SHOCK 
##  Min.   :0.0   Min.   :0.00   Min.   :0.00   Min.   :0.00  
##  1st Qu.:0.0   1st Qu.:0.00   1st Qu.:0.00   1st Qu.:0.00  
##  Median :0.0   Median :0.00   Median :0.00   Median :0.00  
##  Mean   :0.2   Mean   :0.14   Mean   :0.42   Mean   :0.42  
##  3rd Qu.:0.0   3rd Qu.:0.00   3rd Qu.:1.00   3rd Qu.:1.00  
##  Max.   :1.0   Max.   :1.00   Max.   :1.00   Max.   :1.00  
##                                                            
##        HB              TLC           PLATELET            pH       
##  Min.   : 4.000   Min.   : 4600   Min.   : 10000   Min.   :7.160  
##  1st Qu.: 8.925   1st Qu.:13000   1st Qu.: 50000   1st Qu.:7.240  
##  Median :10.700   Median :15750   Median : 75000   Median :7.300  
##  Mean   :10.388   Mean   :18752   Mean   :140680   Mean   :7.290  
##  3rd Qu.:12.175   3rd Qu.:23750   3rd Qu.:222500   3rd Qu.:7.348  
##  Max.   :14.600   Max.   :74500   Max.   :820000   Max.   :7.360  
##                                                                   
##       HCO3             Na              K           CREATININE    
##  Min.   :12.00   Min.   :124.0   Min.   : 2.40   Min.   : 0.740  
##  1st Qu.:18.00   1st Qu.:129.0   1st Qu.: 3.20   1st Qu.: 1.355  
##  Median :18.00   Median :132.0   Median : 3.40   Median : 2.000  
##  Mean   :19.35   Mean   :131.1   Mean   : 4.09   Mean   : 2.510  
##  3rd Qu.:21.50   3rd Qu.:132.0   3rd Qu.: 3.60   3rd Qu.: 2.505  
##  Max.   :28.00   Max.   :140.0   Max.   :35.00   Max.   :10.000  
##                                                                  
##  TOTAL_BILIRUBIN DIRECT_BILIRUBIN      SGOT             SGPT      
##  Min.   :0.000   Min.   :0.250    Min.   :   7.0   Min.   : 12.0  
##  1st Qu.:0.500   1st Qu.:0.250    1st Qu.:  44.0   1st Qu.: 34.5  
##  Median :1.750   Median :1.000    Median : 100.0   Median : 75.0  
##  Mean   :1.924   Mean   :1.162    Mean   : 128.1   Mean   :112.2  
##  3rd Qu.:2.500   3rd Qu.:1.400    3rd Qu.: 127.5   3rd Qu.:120.0  
##  Max.   :7.600   Max.   :4.600    Max.   :1000.0   Max.   :975.0  
##                  NA's   :1                                        
##        MP        DENGUE  PNEUMONIA_PROFILE    AMYLASE         PTINR      
##  Min.   :0   Min.   :0   Min.   :0         Min.   : 0.0   Min.   :1.000  
##  1st Qu.:0   1st Qu.:0   1st Qu.:0         1st Qu.: 0.0   1st Qu.:1.200  
##  Median :0   Median :0   Median :0         Median : 0.0   Median :1.300  
##  Mean   :0   Mean   :0   Mean   :0         Mean   : 1.2   Mean   :1.424  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:0         3rd Qu.: 0.0   3rd Qu.:1.650  
##  Max.   :0   Max.   :0   Max.   :0         Max.   :30.0   Max.   :2.400  
##                                                                          
##     ALBUMIN        FIBRINOGEN       NIV_DAYS       MV_DAYS     
##  Min.   :2.400   Min.   :160.0   Min.   :0.00   Min.   : 0.00  
##  1st Qu.:2.800   1st Qu.:188.5   1st Qu.:0.00   1st Qu.: 0.00  
##  Median :2.900   Median :200.0   Median :0.50   Median : 0.00  
##  Mean   :2.906   Mean   :203.0   Mean   :1.12   Mean   : 0.84  
##  3rd Qu.:3.000   3rd Qu.:220.0   3rd Qu.:2.00   3rd Qu.: 0.00  
##  Max.   :3.400   Max.   :240.0   Max.   :5.00   Max.   :10.00  
##                                                                
##     INOTROPE      SOFA           Number          lung          kidney    
##  Min.   :0   Min.   : 1.00   Min.   :1.00   Min.   :0.00   Min.   :0.00  
##  1st Qu.:0   1st Qu.: 5.00   1st Qu.:1.00   1st Qu.:0.00   1st Qu.:0.00  
##  Median :0   Median : 7.00   Median :2.00   Median :1.00   Median :1.00  
##  Mean   :0   Mean   : 7.06   Mean   :1.86   Mean   :0.66   Mean   :0.66  
##  3rd Qu.:0   3rd Qu.:10.00   3rd Qu.:2.00   3rd Qu.:1.00   3rd Qu.:1.00  
##  Max.   :0   Max.   :13.00   Max.   :3.00   Max.   :1.00   Max.   :1.00  
##  NA's   :2                                                               
##      liver      STAY_IN_ICU_HDU  STAY_IN_WARD    bacterial  
##  Min.   :0.00   Min.   : 1.0    Min.   :0.00   Min.   :0.0  
##  1st Qu.:0.00   1st Qu.: 1.0    1st Qu.:3.00   1st Qu.:0.0  
##  Median :1.00   Median : 2.0    Median :5.00   Median :1.0  
##  Mean   :0.52   Mean   : 2.8    Mean   :4.26   Mean   :0.7  
##  3rd Qu.:1.00   3rd Qu.: 4.0    3rd Qu.:5.00   3rd Qu.:1.0  
##  Max.   :1.00   Max.   :10.0    Max.   :7.00   Max.   :1.0  
##                                                             
##      viral           parasitic       fungal          PCT        Mortality 
##  Min.   :0.00000   Min.   :0.0   Min.   :0.00   Min.   :1.0   Death  :42  
##  1st Qu.:0.00000   1st Qu.:0.0   1st Qu.:0.00   1st Qu.:2.0   Survice: 8  
##  Median :0.00000   Median :0.0   Median :0.00   Median :3.0               
##  Mean   :0.06122   Mean   :0.3   Mean   :0.06   Mean   :2.9               
##  3rd Qu.:0.00000   3rd Qu.:1.0   3rd Qu.:0.00   3rd Qu.:3.0               
##  Max.   :1.00000   Max.   :1.0   Max.   :1.00   Max.   :4.0               
##  NA's   :1                                                                
##    Age_group   
##  Min.   :1.00  
##  1st Qu.:2.00  
##  Median :3.00  
##  Mean   :2.62  
##  3rd Qu.:4.00  
##  Max.   :4.00  
## 
#minimum, lower-hinge, median, upper-hinge, maximum

fivenum(sumeer$AGE)
## [1] 18 34 51 65 86
fivenum(sumeer$DM) # not for categorical variable
## Warning in Ops.factor(x[floor(d)], x[ceiling(d)]): '+' not meaningful for
## factors
## [1] NA NA NA NA NA
fivenum(sumeer$HB)
## [1]  4.0  8.9 10.7 12.2 14.6
#mean, median, mode, etc

library(descr)

descr(sumeer)
## 
## SNO
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  365788  376592  380860  381339  387528  391088 
## 
## AGE
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   34.25   51.00   50.34   65.00   86.00 
## 
## Gender
##   Male Female 
##     22     28 
## 
## DM
## presence  absence 
##       21       29 
## 
## HTN
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.5     0.5     1.0     1.0 
## 
## CKD
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0     0.2     0.0     1.0 
## 
## SEPSIS
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00    0.14    0.00    1.00 
## 
## SEVERE_SEPSIS
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00    0.42    1.00    1.00 
## 
## SEPTIC_SHOCK
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00    0.42    1.00    1.00 
## 
## HB
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.000   8.925  10.700  10.388  12.175  14.600 
## 
## TLC
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4600   13000   15750   18752   23750   74500 
## 
## PLATELET
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10000   50000   75000  140680  222500  820000 
## 
## pH
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   7.160   7.240   7.300   7.290   7.348   7.360 
## 
## HCO3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.00   18.00   18.00   19.35   21.50   28.00 
## 
## Na
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   124.0   129.0   132.0   131.1   132.0   140.0 
## 
## K
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.40    3.20    3.40    4.09    3.60   35.00 
## 
## CREATININE
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.740   1.355   2.000   2.510   2.505  10.000 
## 
## TOTAL_BILIRUBIN
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.500   1.750   1.924   2.500   7.600 
## 
## DIRECT_BILIRUBIN
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.250   0.250   1.000   1.162   1.400   4.600       1 
## 
## SGOT
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     7.0    44.0   100.0   128.1   127.5  1000.0 
## 
## SGPT
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    12.0    34.5    75.0   112.2   120.0   975.0 
## 
## MP
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0 
## 
## DENGUE
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0 
## 
## PNEUMONIA_PROFILE
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0 
## 
## AMYLASE
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0     1.2     0.0    30.0 
## 
## PTINR
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.200   1.300   1.424   1.650   2.400 
## 
## ALBUMIN
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.400   2.800   2.900   2.906   3.000   3.400 
## 
## FIBRINOGEN
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   160.0   188.5   200.0   203.0   220.0   240.0 
## 
## NIV_DAYS
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.50    1.12    2.00    5.00 
## 
## MV_DAYS
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00    0.84    0.00   10.00 
## 
## INOTROPE
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0       0       0       0       0       0       2 
## 
## SOFA
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    5.00    7.00    7.06   10.00   13.00 
## 
## Number
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    1.00    2.00    1.86    2.00    3.00 
## 
## lung
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    1.00    0.66    1.00    1.00 
## 
## kidney
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    1.00    0.66    1.00    1.00 
## 
## liver
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    1.00    0.52    1.00    1.00 
## 
## STAY_IN_ICU_HDU
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     1.0     2.0     2.8     4.0    10.0 
## 
## STAY_IN_WARD
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    3.00    5.00    4.26    5.00    7.00 
## 
## bacterial
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     1.0     0.7     1.0     1.0 
## 
## viral
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00000 0.00000 0.00000 0.06122 0.00000 1.00000       1 
## 
## parasitic
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0     0.3     1.0     1.0 
## 
## fungal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00    0.06    0.00    1.00 
## 
## PCT
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     2.0     3.0     2.9     3.0     4.0 
## 
## Mortality
##   Death Survice 
##      42       8 
## 
## Age_group
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    2.00    3.00    2.62    4.00    4.00
library(pastecs)
## Warning: package 'pastecs' was built under R version 3.5.3
stat.desc(sumeer)
##                       SNO          AGE Gender DM         HTN         CKD
## nbr.val      5.000000e+01   50.0000000     NA NA 50.00000000 50.00000000
## nbr.null     0.000000e+00    0.0000000     NA NA 25.00000000 40.00000000
## nbr.na       0.000000e+00    0.0000000     NA NA  0.00000000  0.00000000
## min          3.657880e+05   18.0000000     NA NA  0.00000000  0.00000000
## max          3.910880e+05   86.0000000     NA NA  1.00000000  1.00000000
## range        2.530000e+04   68.0000000     NA NA  1.00000000  1.00000000
## sum          1.906695e+07 2517.0000000     NA NA 25.00000000 10.00000000
## median       3.808595e+05   51.0000000     NA NA  0.50000000  0.00000000
## mean         3.813389e+05   50.3400000     NA NA  0.50000000  0.20000000
## SE.mean      8.953497e+02    2.8070771     NA NA  0.07142857  0.05714286
## CI.mean.0.95 1.799273e+03    5.6410326     NA NA  0.14354109  0.11483287
## var          4.008256e+07  393.9840816     NA NA  0.25510204  0.16326531
## std.dev      6.331079e+03   19.8490323     NA NA  0.50507627  0.40406102
## coef.var     1.660223e-02    0.3942994     NA NA  1.01015254  2.02030509
##                   SEPSIS SEVERE_SEPSIS SEPTIC_SHOCK          HB
## nbr.val      50.00000000   50.00000000  50.00000000  50.0000000
## nbr.null     43.00000000   29.00000000  29.00000000   0.0000000
## nbr.na        0.00000000    0.00000000   0.00000000   0.0000000
## min           0.00000000    0.00000000   0.00000000   4.0000000
## max           1.00000000    1.00000000   1.00000000  14.6000000
## range         1.00000000    1.00000000   1.00000000  10.6000000
## sum           7.00000000   21.00000000  21.00000000 519.4000000
## median        0.00000000    0.00000000   0.00000000  10.7000000
## mean          0.14000000    0.42000000   0.42000000  10.3880000
## SE.mean       0.04956958    0.07050836   0.07050836   0.3295097
## CI.mean.0.95  0.09961379    0.14169185   0.14169185   0.6621746
## var           0.12285714    0.24857143   0.24857143   5.4288327
## std.dev       0.35050983    0.49856938   0.49856938   2.3299855
## coef.var      2.50364166    1.18706996   1.18706996   0.2242959
##                       TLC     PLATELET           pH        HCO3
## nbr.val      5.000000e+01 5.000000e+01 5.000000e+01  50.0000000
## nbr.null     0.000000e+00 0.000000e+00 0.000000e+00   0.0000000
## nbr.na       0.000000e+00 0.000000e+00 0.000000e+00   0.0000000
## min          4.600000e+03 1.000000e+04 7.160000e+00  12.0000000
## max          7.450000e+04 8.200000e+05 7.360000e+00  28.0000000
## range        6.990000e+04 8.100000e+05 2.000000e-01  16.0000000
## sum          9.376000e+05 7.034000e+06 3.644800e+02 967.7000000
## median       1.575000e+04 7.500000e+04 7.300000e+00  18.0000000
## mean         1.875200e+04 1.406800e+05 7.289600e+00  19.3540000
## SE.mean      1.592360e+03 2.148310e+04 8.300283e-03   0.4532802
## CI.mean.0.95 3.199967e+03 4.317191e+04 1.668004e-02   0.9109007
## var          1.267805e+08 2.307618e+10 3.444735e-03  10.2731469
## std.dev      1.125968e+04 1.519085e+05 5.869186e-02   3.2051750
## coef.var     6.004525e-01 1.079816e+00 8.051452e-03   0.1656079
##                        Na           K  CREATININE TOTAL_BILIRUBIN
## nbr.val      5.000000e+01  50.0000000  50.0000000      50.0000000
## nbr.null     0.000000e+00   0.0000000   0.0000000       1.0000000
## nbr.na       0.000000e+00   0.0000000   0.0000000       0.0000000
## min          1.240000e+02   2.4000000   0.7400000       0.0000000
## max          1.400000e+02  35.0000000  10.0000000       7.6000000
## range        1.600000e+01  32.6000000   9.2600000       7.6000000
## sum          6.553000e+03 204.5000000 125.4800000      96.2000000
## median       1.320000e+02   3.4000000   2.0000000       1.7500000
## mean         1.310600e+02   4.0900000   2.5096000       1.9240000
## SE.mean      4.894061e-01   0.6339719   0.2767390       0.2406803
## CI.mean.0.95 9.834985e-01   1.2740143   0.5561279       0.4836652
## var          1.197592e+01  20.0960204   3.8292243       2.8963510
## std.dev      3.460624e+00   4.4828585   1.9568404       1.7018669
## coef.var     2.640488e-02   1.0960534   0.7797419       0.8845462
##              DIRECT_BILIRUBIN         SGOT        SGPT  MP DENGUE
## nbr.val            49.0000000    50.000000    50.00000  50     50
## nbr.null            0.0000000     0.000000     0.00000  50     50
## nbr.na              1.0000000     0.000000     0.00000   0      0
## min                 0.2500000     7.000000    12.00000   0      0
## max                 4.6000000  1000.000000   975.00000   0      0
## range               4.3500000   993.000000   963.00000   0      0
## sum                56.9400000  6406.000000  5610.00000   0      0
## median              1.0000000   100.000000    75.00000   0      0
## mean                1.1620408   128.120000   112.20000   0      0
## SE.mean             0.1527177    22.328432    22.47453   0      0
## CI.mean.0.95        0.3070596    44.870665    45.16426   0      0
## var                 1.1428124 24927.944490 25255.22449   0      0
## std.dev             1.0690240   157.885859   158.91892   0      0
## coef.var            0.9199540     1.232328     1.41639 NaN    NaN
##              PNEUMONIA_PROFILE    AMYLASE       PTINR      ALBUMIN
## nbr.val                     50 50.0000000 50.00000000  50.00000000
## nbr.null                    50 48.0000000  0.00000000   0.00000000
## nbr.na                       0  0.0000000  0.00000000   0.00000000
## min                          0  0.0000000  1.00000000   2.40000000
## max                          0 30.0000000  2.40000000   3.40000000
## range                        0 30.0000000  1.40000000   1.00000000
## sum                          0 60.0000000 71.20000000 145.30000000
## median                       0  0.0000000  1.30000000   2.90000000
## mean                         0  1.2000000  1.42400000   2.90600000
## SE.mean                      0  0.8398251  0.04969786   0.03042689
## CI.mean.0.95                 0  1.6876916  0.09987159   0.06114513
## var                          0 35.2653061  0.12349388   0.04628980
## std.dev                      0  5.9384599  0.35141696   0.21515064
## coef.var                   NaN  4.9487166  0.24678157   0.07403669
##                FIBRINOGEN   NIV_DAYS    MV_DAYS INOTROPE        SOFA
## nbr.val         50.000000 50.0000000 50.0000000       48  50.0000000
## nbr.null         0.000000 25.0000000 38.0000000       48   0.0000000
## nbr.na           0.000000  0.0000000  0.0000000        2   0.0000000
## min            160.000000  0.0000000  0.0000000        0   1.0000000
## max            240.000000  5.0000000 10.0000000        0  13.0000000
## range           80.000000  5.0000000 10.0000000        0  12.0000000
## sum          10151.000000 56.0000000 42.0000000        0 353.0000000
## median         200.000000  0.5000000  0.0000000        0   7.0000000
## mean           203.020000  1.1200000  0.8400000        0   7.0600000
## SE.mean          3.192273  0.2111968  0.2730926        0   0.4741566
## CI.mean.0.95     6.415112  0.4244158  0.5488002        0   0.9528534
## var            509.530204  2.2302041  3.7289796        0  11.2412245
## std.dev         22.572776  1.4933868  1.9310566        0   3.3527935
## coef.var         0.111185  1.3333811  2.2988769      NaN   0.4748999
##                  Number        lung      kidney       liver
## nbr.val      50.0000000 50.00000000 50.00000000 50.00000000
## nbr.null      0.0000000 17.00000000 17.00000000 24.00000000
## nbr.na        0.0000000  0.00000000  0.00000000  0.00000000
## min           1.0000000  0.00000000  0.00000000  0.00000000
## max           3.0000000  1.00000000  1.00000000  1.00000000
## range         2.0000000  1.00000000  1.00000000  1.00000000
## sum          93.0000000 33.00000000 33.00000000 26.00000000
## median        2.0000000  1.00000000  1.00000000  1.00000000
## mean          1.8600000  0.66000000  0.66000000  0.52000000
## SE.mean       0.1106935  0.06767268  0.06767268  0.07137141
## CI.mean.0.95  0.2224470  0.13599335  0.13599335  0.14342621
## var           0.6126531  0.22897959  0.22897959  0.25469388
## std.dev       0.7827216  0.47851812  0.47851812  0.50467205
## coef.var      0.4208181  0.72502746  0.72502746  0.97052317
##              STAY_IN_ICU_HDU STAY_IN_WARD   bacterial       viral
## nbr.val           50.0000000   50.0000000 50.00000000 49.00000000
## nbr.null           0.0000000    2.0000000 15.00000000 46.00000000
## nbr.na             0.0000000    0.0000000  0.00000000  1.00000000
## min                1.0000000    0.0000000  0.00000000  0.00000000
## max               10.0000000    7.0000000  1.00000000  1.00000000
## range              9.0000000    7.0000000  1.00000000  1.00000000
## sum              140.0000000  213.0000000 35.00000000  3.00000000
## median             2.0000000    5.0000000  1.00000000  0.00000000
## mean               2.8000000    4.2600000  0.70000000  0.06122449
## SE.mean            0.2913725    0.1954795  0.06546537  0.03460372
## CI.mean.0.95       0.5855350    0.3928308  0.13155758  0.06957545
## var                4.2448980    1.9106122  0.21428571  0.05867347
## std.dev            2.0603150    1.3822490  0.46291005  0.24222607
## coef.var           0.7358268    0.3244716  0.66130007  3.95635916
##                parasitic      fungal         PCT Mortality   Age_group
## nbr.val      50.00000000 50.00000000  50.0000000        NA  50.0000000
## nbr.null     35.00000000 47.00000000   0.0000000        NA   0.0000000
## nbr.na        0.00000000  0.00000000   0.0000000        NA   0.0000000
## min           0.00000000  0.00000000   1.0000000        NA   1.0000000
## max           1.00000000  1.00000000   4.0000000        NA   4.0000000
## range         1.00000000  1.00000000   3.0000000        NA   3.0000000
## sum          15.00000000  3.00000000 145.0000000        NA 131.0000000
## median        0.00000000  0.00000000   3.0000000        NA   3.0000000
## mean          0.30000000  0.06000000   2.9000000        NA   2.6200000
## SE.mean       0.06546537  0.03392669   0.1115750        NA   0.1638566
## CI.mean.0.95  0.13155758  0.06817824   0.2242183        NA   0.3292821
## var           0.21428571  0.05755102   0.6224490        NA   1.3424490
## std.dev       0.46291005  0.23989794   0.7889544        NA   1.1586410
## coef.var      1.54303350  3.99829896   0.2720532        NA   0.4422294

Library (skmir)
skim(data1)
Library(dataExplorer)
create_report(data1)


Frequency distribution tables

one way

onewaytable1<-table(sumeer$Gender)
onewaytable1
## 
##   Male Female 
##     22     28

two way

twowaytable1<-table(sumeer$Gender, sumeer$DM)

twowaytable1
##         
##          presence absence
##   Male         10      12
##   Female       11      17

three way

threewaytable1<-table(sumeer$Gender, sumeer$DM, sumeer$Mortality)

threewaytable1
## , ,  = Death
## 
##         
##          presence absence
##   Male          8      11
##   Female       10      13
## 
## , ,  = Survice
## 
##         
##          presence absence
##   Male          2       1
##   Female        1       4

use ftable for the same

cell proportions

first create a table as r object then use it in prop.table

prop.table(onewaytable1)
## 
##   Male Female 
##   0.44   0.56
prop.table(twowaytable1)
##         
##          presence absence
##   Male       0.20    0.24
##   Female     0.22    0.34
prop.table(threewaytable1)
## , ,  = Death
## 
##         
##          presence absence
##   Male       0.16    0.22
##   Female     0.20    0.26
## 
## , ,  = Survice
## 
##         
##          presence absence
##   Male       0.04    0.02
##   Female     0.02    0.08

row proportion

first create a table as r object then use it in prop.table

prop.table(onewaytable1,1)
## 
##   Male Female 
##      1      1
prop.table(twowaytable1,1)
##         
##           presence   absence
##   Male   0.4545455 0.5454545
##   Female 0.3928571 0.6071429
prop.table(threewaytable1,1)
## , ,  = Death
## 
##         
##            presence    absence
##   Male   0.36363636 0.50000000
##   Female 0.35714286 0.46428571
## 
## , ,  = Survice
## 
##         
##            presence    absence
##   Male   0.09090909 0.04545455
##   Female 0.03571429 0.14285714

column proportion

first create a table as r object then use it in prop.table

prop.table(onewaytable1)  # margin 2 not for one way table
## 
##   Male Female 
##   0.44   0.56
prop.table(twowaytable1,2)
##         
##           presence   absence
##   Male   0.4761905 0.4137931
##   Female 0.5238095 0.5862069
prop.table(threewaytable1,2)
## , ,  = Death
## 
##         
##            presence    absence
##   Male   0.38095238 0.37931034
##   Female 0.47619048 0.44827586
## 
## , ,  = Survice
## 
##         
##            presence    absence
##   Male   0.09523810 0.03448276
##   Female 0.04761905 0.13793103

No comments:

Post a Comment