Basics of R- Session 2- Vector, List, data Frame, matrix, labels
Dr Manohar Kapse
17 March 2019
Session 2
# Creating the same type of vectors
# Case 1: create a data set of five students
# creating the record of the same type of vector
# start with single observation,
# say only a single student
Roll_Number<-1
Per_10<-85
per_12<-71
Name1<-"CHRISTENSON"
creating vectors with more than one observation, which are string in nature
Roll_Number<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
per_10<-c(85, 91.2,76,85.5,85, 91,96, 80,79.8,83.6,82,89.3, 94.3,94,75,85.5,75,93,88,78)
per_12<-c(71,91.2,82,73.5,89,93, 80,80,65.5,84.2,62,89,97,81,81,61,74,94.8,65.2,76)
# creating string vector
name1<-c("CHRISTENSON","NAVEEN J","PRASANTHKUMAR", "SATYAJIT", "SHARDUL","AJAY","DEEPTI","SHIRISHA","ROHINI", "SHELLY","SHIVANI", "VARSHA", "KRISHNA", "SRIPAD", "VIBHAV", "TRISHA", "NAMRATA", "SHRUTI", "ANANDA","ASHISH")
using cbind to bind them together as a R object to know what is cbind use ?cbind()
# combining the vetors Roll_Number, per_10,per_12
mba_a<-cbind(Roll_Number, per_10,per_12)
# check the created R object
str(mba_a)
## num [1:20, 1:3] 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:3] "Roll_Number" "per_10" "per_12"
# can use fix(mba_a)
same way join vectors using rbind, here they are joined row wise
mba_ar<-rbind(Roll_Number, per_10,per_12)
# check the created R object
str(mba_ar)
## num [1:3, 1:20] 1 85 71 2 91.2 91.2 3 76 82 4 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:3] "Roll_Number" "per_10" "per_12"
## ..$ : NULL
# can use fix(mba_ar)
Case 2
creating an R object of different type of vector, scale, and character
Roll_Number<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
per_10<-c(85, 91.2,76,85.5,85, 91,96, 80,79.8,83.6,82,89.3, 94.3,94,75,85.5,75,93,88,78)
per_12<-c(71,91.2,82,73.5,89,93, 80,80,65.5,84.2,62,89,97,81,81,61,74,94.8,65.2,76)
name1<-c("CHRISTENSON","NAVEEN J","PRASANTHKUMAR", "SATYAJIT", "SHARDUL","AJAY","DEEPTI","SHIRISHA",
"ROHINI", "SHELLY","SHIVANI", "VARSHA", "KRISHNA", "SRIPAD", "VIBHAV", "TRISHA",
"NAMRATA", "SHRUTI", "ANANDA","ASHISH")
mba_a<-cbind(Roll_Number, per_10,per_12,name1)
# check the created R object
str(mba_a)
## chr [1:20, 1:4] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "Roll_Number" "per_10" "per_12" "name1"
# All the vectors are converted to a string R object
# problem because vectors are of different types
use dataframes
mba_d<-data.frame(Roll_Number,name1,per_10, per_12)
str(mba_d)
## 'data.frame': 20 obs. of 4 variables:
## $ Roll_Number: num 1 2 3 4 5 6 7 8 9 10 ...
## $ name1 : Factor w/ 20 levels "AJAY","ANANDA",..: 4 8 9 11 12 1 5 14 10 13 ...
## $ per_10 : num 85 91.2 76 85.5 85 91 96 80 79.8 83.6 ...
## $ per_12 : num 71 91.2 82 73.5 89 93 80 80 65.5 84.2 ...
we can use fix(mba_d) but number of observations should be of equal length
use List- if the size of the vectors are unequal
mba_c<-list(Roll_Number,name1,per_10, per_12 )
# check the created R object
str(mba_c)
## List of 4
## $ : num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
## $ : chr [1:20] "CHRISTENSON" "NAVEEN J" "PRASANTHKUMAR" "SATYAJIT" ...
## $ : num [1:20] 85 91.2 76 85.5 85 91 96 80 79.8 83.6 ...
## $ : num [1:20] 71 91.2 82 73.5 89 93 80 80 65.5 84.2 ...
# we can use fix(mba_c)
Creating a Matrix
how to start creating a matrix
?matrix() # we will get the codes and help for the matrix
matrix(data = NA, nrow = 1, ncol = 1, byrow = FALSE, dimnames = NULL)
matrix1<-matrix(1, nrow = 1, ncol = 1,byrow = TRUE)
matrix1
## [,1]
## [1,] 1
str(matrix1)
## num [1, 1] 1
#---
matrix2<-matrix(c(1,1), nrow = 1, ncol = 2,byrow = TRUE)
matrix2
## [,1] [,2]
## [1,] 1 1
str(matrix2)
## num [1, 1:2] 1 1
#----
matrix3<-matrix(c(1,2,3), nrow = 3, ncol = 1, byrow = TRUE) # Column Matrix
matrix3
## [,1]
## [1,] 1
## [2,] 2
## [3,] 3
now u can label the matrix row and col, remember list, name of rows and col in the form of list
matrix4<-matrix(c(1,1), nrow = 1, ncol = 2,byrow = TRUE, dimnames = list(c("a"),c("x", "y")))
check the number of rows and col, the labels assigned should also be equal to the number of rows and col
matrix5<-matrix(c(1,2,3,4,5,6,7,8,9), nrow = 3, ncol = 3,byrow = TRUE, dimnames = list(c("a", "b","c"),c("x", "y", "z")))
str(matrix5)
## num [1:3, 1:3] 1 4 7 2 5 8 3 6 9
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:3] "a" "b" "c"
## ..$ : chr [1:3] "x" "y" "z"
length(matrix5)
## [1] 9
dim(matrix5)
## [1] 3 3
matrix5
## x y z
## a 1 2 3
## b 4 5 6
## c 7 8 9
# note all the observations/data should be of the same type
case 3, categorical variable
# scale
gender1<-c("male", "male", "female", "female", "male")
# numeric
gender2<-c(1,1,2,2,1)
# problem---> labels are not given to levels or code 1 and 2
# for categories use factor
# codes for factor()
factor(x = character(), levels, labels = levels,
exclude = NA, ordered = is.ordered(x), nmax = NA)
giving labels to 1 and 2
gender3<-factor(gender2, levels=c(1,2), labels = c("Male", "Female"))
str(gender3)
## Factor w/ 2 levels "Male","Female": 1 1 2 2 1
we can change the order of labels, here 2 is male and 1 is female
gender3<-factor(gender2, levels=c(2,1), labels = c("Male", "Female"))
exercise
#create a variable week as a categorical variable
# Solution
week1<-c(1,2,3,4,5,6,7,1,2)
## giving labels 1,2,3, as mon, tue, ....
week1<-factor(week1,levels= c(1,2,3,4,5,6,7),labels = c("Mon", "Tue", "Wed", "Thur", "Fri","Sat", "Sun"))
week1
## [1] Mon Tue Wed Thur Fri Sat Sun Mon Tue
## Levels: Mon Tue Wed Thur Fri Sat Sun
# can be done with out levels also it will take levels from 1, 2, 3 and ...
week1<-factor(week1,labels = c("Mon", "Tue", "Wed", "Thur", "Fri","Sat", "Sun"))
str(week1)
## Factor w/ 7 levels "Mon","Tue","Wed",..: 1 2 3 4 5 6 7 1 2
# create income group but the codes are not in order
Incomegroup<-c(1,3,5,1,1,1)
Incomegroup<-factor(Incomegroup,levels = c(1,3,5), labels = c("HIG", "MIG", "LIG"))
Exercise 1- create an R object with name as “MDP1”,
# which contains the following information
#Rollno- 1 to 10, coursename-mba,bba,mba,bba,mba,bba,mba,bba,mba,bba
#caste-gen, obc, sc,st, data is code as 1,1,2,2,3,4,1,1,2,3
#solution
rollno<-c(1:10)
coursename<-c("mba","bba","mba","bba","mba","bba","mba","bba","mba","bba")
#character should be in double qoutes
caste<-c(1,1,2,2,3,4,1,1,2,3)
#convert caste into factor or give labels
caste<-factor(caste,labels = c("general", "obc", "SC", "ST"))
mdp1<-cbind(rollno,coursename,caste)
str(mdp1)
## chr [1:10, 1:3] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "mba" "bba" ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:3] "rollno" "coursename" "caste"
mdp1<-list(rollno,coursename,caste)
str(mdp1)
## List of 3
## $ : int [1:10] 1 2 3 4 5 6 7 8 9 10
## $ : chr [1:10] "mba" "bba" "mba" "bba" ...
## $ : Factor w/ 4 levels "general","obc",..: 1 1 2 2 3 4 1 1 2 3
mdp1<-data.frame(rollno,coursename,caste)
str(mdp1)
## 'data.frame': 10 obs. of 3 variables:
## $ rollno : int 1 2 3 4 5 6 7 8 9 10
## $ coursename: Factor w/ 2 levels "bba","mba": 2 1 2 1 2 1 2 1 2 1
## $ caste : Factor w/ 4 levels "general","obc",..: 1 1 2 2 3 4 1 1 2 3
# we can use fix(mdp1)
#labeling the variables use library library(Hmisc)
know more about labels use -> ?labels()
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
name1
## [1] "CHRISTENSON" "NAVEEN J" "PRASANTHKUMAR" "SATYAJIT"
## [5] "SHARDUL" "AJAY" "DEEPTI" "SHIRISHA"
## [9] "ROHINI" "SHELLY" "SHIVANI" "VARSHA"
## [13] "KRISHNA" "SRIPAD" "VIBHAV" "TRISHA"
## [17] "NAMRATA" "SHRUTI" "ANANDA" "ASHISH"
label(name1)<-"name of respondents"
str(name1)
## 'labelled' chr [1:20] "CHRISTENSON" "NAVEEN J" "PRASANTHKUMAR" ...
## - attr(*, "label")= chr "name of respondents"
label(gender1)<-"gender of the respondents"
str(gender1)
## 'labelled' chr [1:5] "male" "male" "female" "female" "male"
## - attr(*, "label")= chr "gender of the respondents"
label(Roll_Number)<-"serial no of the respondents"
str(Roll_Number)
## 'labelled' num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "label")= chr "serial no of the respondents"
No comments:
Post a Comment