Monday, June 11, 2018

Basics of R- Session 2- Vector, List, data Frame, matrix, labels

Basics of R- Session 2- Vector, List, data Frame, matrix, labels

Session 2

# Creating the same type of vectors

# Case 1: create a data set of five students

# creating the record of the same type of vector

# start with single observation,

# say only a single student

Roll_Number<-1

Per_10<-85

per_12<-71

Name1<-"CHRISTENSON"

creating vectors with more than one observation, which are string in nature

Roll_Number<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)

per_10<-c(85, 91.2,76,85.5,85, 91,96, 80,79.8,83.6,82,89.3, 94.3,94,75,85.5,75,93,88,78)

per_12<-c(71,91.2,82,73.5,89,93, 80,80,65.5,84.2,62,89,97,81,81,61,74,94.8,65.2,76)

# creating string vector

name1<-c("CHRISTENSON","NAVEEN J","PRASANTHKUMAR", "SATYAJIT", "SHARDUL","AJAY","DEEPTI","SHIRISHA","ROHINI", "SHELLY","SHIVANI", "VARSHA", "KRISHNA", "SRIPAD", "VIBHAV", "TRISHA", "NAMRATA", "SHRUTI", "ANANDA","ASHISH")

using cbind to bind them together as a R object to know what is cbind use ?cbind()

# combining the vetors Roll_Number, per_10,per_12

mba_a<-cbind(Roll_Number, per_10,per_12)

# check the created R object
str(mba_a)
##  num [1:20, 1:3] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "Roll_Number" "per_10" "per_12"
# can use fix(mba_a)

same way join vectors using rbind, here they are joined row wise

mba_ar<-rbind(Roll_Number, per_10,per_12)

# check the created R object
str(mba_ar)
##  num [1:3, 1:20] 1 85 71 2 91.2 91.2 3 76 82 4 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:3] "Roll_Number" "per_10" "per_12"
##   ..$ : NULL
# can use fix(mba_ar)

Case 2

creating an R object of different type of vector, scale, and character

Roll_Number<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
per_10<-c(85, 91.2,76,85.5,85, 91,96, 80,79.8,83.6,82,89.3, 94.3,94,75,85.5,75,93,88,78)

per_12<-c(71,91.2,82,73.5,89,93, 80,80,65.5,84.2,62,89,97,81,81,61,74,94.8,65.2,76)

name1<-c("CHRISTENSON","NAVEEN J","PRASANTHKUMAR", "SATYAJIT", "SHARDUL","AJAY","DEEPTI","SHIRISHA",

         "ROHINI", "SHELLY","SHIVANI", "VARSHA", "KRISHNA", "SRIPAD", "VIBHAV", "TRISHA",

         "NAMRATA", "SHRUTI", "ANANDA","ASHISH")

mba_a<-cbind(Roll_Number, per_10,per_12,name1)
# check the created R object
str(mba_a)
##  chr [1:20, 1:4] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "Roll_Number" "per_10" "per_12" "name1"
# All the vectors are converted to a string R object
# problem because vectors are of different types

use dataframes

mba_d<-data.frame(Roll_Number,name1,per_10, per_12)

str(mba_d)
## 'data.frame':    20 obs. of  4 variables:
##  $ Roll_Number: num  1 2 3 4 5 6 7 8 9 10 ...
##  $ name1      : Factor w/ 20 levels "AJAY","ANANDA",..: 4 8 9 11 12 1 5 14 10 13 ...
##  $ per_10     : num  85 91.2 76 85.5 85 91 96 80 79.8 83.6 ...
##  $ per_12     : num  71 91.2 82 73.5 89 93 80 80 65.5 84.2 ...

we can use fix(mba_d) but number of observations should be of equal length

use List- if the size of the vectors are unequal

mba_c<-list(Roll_Number,name1,per_10, per_12 )

# check the created R object
str(mba_c)
## List of 4
##  $ : num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
##  $ : chr [1:20] "CHRISTENSON" "NAVEEN J" "PRASANTHKUMAR" "SATYAJIT" ...
##  $ : num [1:20] 85 91.2 76 85.5 85 91 96 80 79.8 83.6 ...
##  $ : num [1:20] 71 91.2 82 73.5 89 93 80 80 65.5 84.2 ...
# we can use fix(mba_c)

Creating a Matrix

how to start creating a matrix

?matrix() # we will get the codes and help for the matrix

matrix(data = NA, nrow = 1, ncol = 1, byrow = FALSE, dimnames = NULL)

matrix1<-matrix(1, nrow = 1, ncol = 1,byrow = TRUE)

matrix1
##      [,1]
## [1,]    1
str(matrix1)
##  num [1, 1] 1
#---
matrix2<-matrix(c(1,1), nrow = 1, ncol = 2,byrow = TRUE)

matrix2
##      [,1] [,2]
## [1,]    1    1
str(matrix2)
##  num [1, 1:2] 1 1
#----

matrix3<-matrix(c(1,2,3), nrow = 3, ncol = 1, byrow = TRUE) # Column Matrix

matrix3
##      [,1]
## [1,]    1
## [2,]    2
## [3,]    3

now u can label the matrix row and col, remember list, name of rows and col in the form of list

matrix4<-matrix(c(1,1), nrow = 1, ncol = 2,byrow = TRUE, dimnames = list(c("a"),c("x", "y")))

check the number of rows and col, the labels assigned should also be equal to the number of rows and col

matrix5<-matrix(c(1,2,3,4,5,6,7,8,9), nrow = 3, ncol = 3,byrow = TRUE, dimnames = list(c("a", "b","c"),c("x", "y", "z")))

str(matrix5)
##  num [1:3, 1:3] 1 4 7 2 5 8 3 6 9
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:3] "a" "b" "c"
##   ..$ : chr [1:3] "x" "y" "z"
length(matrix5)
## [1] 9
dim(matrix5)
## [1] 3 3
matrix5
##   x y z
## a 1 2 3
## b 4 5 6
## c 7 8 9
# note all the observations/data should be of the same type

case 3, categorical variable

# scale

gender1<-c("male", "male", "female", "female", "male")

# numeric

gender2<-c(1,1,2,2,1)

# problem---> labels are not given to levels  or code 1 and 2

# for categories use factor

# codes for factor()

factor(x = character(), levels, labels = levels,

      exclude = NA, ordered = is.ordered(x), nmax = NA)

giving labels to 1 and 2

gender3<-factor(gender2, levels=c(1,2), labels = c("Male", "Female"))

str(gender3)
##  Factor w/ 2 levels "Male","Female": 1 1 2 2 1

we can change the order of labels, here 2 is male and 1 is female

gender3<-factor(gender2, levels=c(2,1), labels = c("Male", "Female"))

exercise

#create a variable week as a categorical variable

# Solution

week1<-c(1,2,3,4,5,6,7,1,2)

## giving labels 1,2,3, as mon, tue, ....

week1<-factor(week1,levels= c(1,2,3,4,5,6,7),labels = c("Mon", "Tue", "Wed", "Thur", "Fri","Sat", "Sun"))

week1
## [1] Mon  Tue  Wed  Thur Fri  Sat  Sun  Mon  Tue 
## Levels: Mon Tue Wed Thur Fri Sat Sun
# can be done with out levels also it will take levels from 1, 2, 3 and ...

week1<-factor(week1,labels = c("Mon", "Tue", "Wed", "Thur", "Fri","Sat", "Sun"))

str(week1)
##  Factor w/ 7 levels "Mon","Tue","Wed",..: 1 2 3 4 5 6 7 1 2
# create income group but the codes are not in order

Incomegroup<-c(1,3,5,1,1,1)

Incomegroup<-factor(Incomegroup,levels = c(1,3,5), labels = c("HIG", "MIG", "LIG"))

Exercise 1- create an R object with name as “MDP1”,

# which contains the following information

#Rollno- 1 to 10, coursename-mba,bba,mba,bba,mba,bba,mba,bba,mba,bba

#caste-gen, obc, sc,st, data is code as 1,1,2,2,3,4,1,1,2,3



#solution

rollno<-c(1:10)

coursename<-c("mba","bba","mba","bba","mba","bba","mba","bba","mba","bba")

#character should be in double qoutes

caste<-c(1,1,2,2,3,4,1,1,2,3)

#convert caste into factor or give labels

caste<-factor(caste,labels = c("general", "obc", "SC", "ST"))

mdp1<-cbind(rollno,coursename,caste)

str(mdp1)
##  chr [1:10, 1:3] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "mba" "bba" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "rollno" "coursename" "caste"
mdp1<-list(rollno,coursename,caste)

str(mdp1)
## List of 3
##  $ : int [1:10] 1 2 3 4 5 6 7 8 9 10
##  $ : chr [1:10] "mba" "bba" "mba" "bba" ...
##  $ : Factor w/ 4 levels "general","obc",..: 1 1 2 2 3 4 1 1 2 3
mdp1<-data.frame(rollno,coursename,caste)

str(mdp1)
## 'data.frame':    10 obs. of  3 variables:
##  $ rollno    : int  1 2 3 4 5 6 7 8 9 10
##  $ coursename: Factor w/ 2 levels "bba","mba": 2 1 2 1 2 1 2 1 2 1
##  $ caste     : Factor w/ 4 levels "general","obc",..: 1 1 2 2 3 4 1 1 2 3
# we can use fix(mdp1)

#labeling the variables use library library(Hmisc)

know more about labels use -> ?labels()

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
name1
##  [1] "CHRISTENSON"   "NAVEEN J"      "PRASANTHKUMAR" "SATYAJIT"     
##  [5] "SHARDUL"       "AJAY"          "DEEPTI"        "SHIRISHA"     
##  [9] "ROHINI"        "SHELLY"        "SHIVANI"       "VARSHA"       
## [13] "KRISHNA"       "SRIPAD"        "VIBHAV"        "TRISHA"       
## [17] "NAMRATA"       "SHRUTI"        "ANANDA"        "ASHISH"
label(name1)<-"name of respondents"

str(name1)
##  'labelled' chr [1:20] "CHRISTENSON" "NAVEEN J" "PRASANTHKUMAR" ...
##  - attr(*, "label")= chr "name of respondents"
label(gender1)<-"gender of the respondents"

str(gender1)
##  'labelled' chr [1:5] "male" "male" "female" "female" "male"
##  - attr(*, "label")= chr "gender of the respondents"
label(Roll_Number)<-"serial no of the respondents"

str(Roll_Number) 
##  'labelled' num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "label")= chr "serial no of the respondents"

No comments:

Post a Comment