Learn R: July 2018

Monday, July 30, 2018

Basics of R Session 13 Conjoint Analysis

library(conjoint)

Case study- Beauty Bar Soap

There are three attributes of the problem/ case study

1)colour with three levels (red, blue, and yellow)
2)shape with three levels (cubic, cylindrical and spherical) and
3)aroma with two levels (scented, and unscented)

Create an R object attribute1 for the factors with levels

attribute1<-list(
colour=c("red","blue", "yellow"),
shape =c("cubic", "cylindrical","spherical"),
aroma=c("scented", "unscented")
)

attribute1
fix(attribute1)

all the possible profiles for the 3 factors each with three levels 3*3*2=18

profiles1<-expand.grid(attribute1)
profiles1

fix(profiles1)
length(profiles1)
dim(profiles1)

orthogonal design, no of cards are equal to 3*3*2= 18

design1<-caFactorialDesign(data=profiles1,type="fractional", cards=18)

print(design1)
fix(design1)

load the files with the names of all the levels of the three factors

possiblefactors <- read.csv("file:///E:/2 presentation for class/inurture Lectures/1 multivariate data analysis/1 Multivariate Data Analysis PPts Self/conjoint Analysis/Beauty Bar Soap Case IIM/profile.csv", header = FALSE)

possiblefactors

load the files with the perception of the respondents on the 3 factors and their sublevels

perception1 <- read.csv("file:///E:/2 presentation for class/inurture Lectures/1 multivariate data analysis/1 Multivariate Data Analysis PPts Self/conjoint Analysis/Beauty Bar Soap Case IIM/perception.csv", header=T)
here the respondents are asked to rank the levels for all the factors with their levels

fix(perception1)

Conjoint(perception1, design1, possiblefactors)

Tuesday, July 10, 2018

Basics of R- Session 12- Market Basket Analysis

Market Basket Analysis

library(arules)
library(arulesViz)

# file mbaper
mbaper<-read.csv("E:/2 presentation for class/R/1 R Open elective/data set/mbadata.csv")
fix(mbaper)
str(mbaper)

remove first four variables which are scale in Nature
mbaper<-mbaper[,c(-1:-4)]

# study the categorical variables using tables and cross tab and find the variables which are having associations or some sort of relations

convert the files into transactions type use all the factor/ categorical variables in this analysis

mbaper<-as(mbaper, "transactions")

check the data set created after creating transactions

inspect(mbaper)

inspect(head(mbaper))

head(mbaper@itemInfo)

head(c(mbaper@itemsetInfo, mbaper@itemInfo))

# create a priori rules for the data set

# to check the code for the function or argument
args(apriori)
rules1<- apriori(mbaper)

rules1<- apriori(mbaper, parameter = list(supp = 0.10, conf = 0.80))
rules1<- apriori(mbaper, parameter = list(supp = 0.50, conf = 0.80))

# Number of items in the rules minlen 1 and malen=4, means there will be maximum 4 items in the rules created
rules1<- apriori(mbaper, parameter = list(minlen=1, supp=0.10, conf=0.80, maxlen=4))

#----------------------------#
# check the frequency distribution of the transactions
itemFrequency(mbaper)

# we can use other methods as well
itemFrequency(mbaper,type="absolute")

# plot the frequency
itemFrequencyPlot(rules1)

# if it doesn't work, then use items()

items(rules1)
itemFrequencyPlot(items(rules1))

#--------------------------#

#inspect the rules created

inspect(rules1)

#first three rules
inspect(rules1[1:3])

#first ten rules
inspect(rules1[1:10])

#-----------------------------------------------------#
inspect(head(sort(rules1, by = "support"), n=10))

inspect(head(sort(rules1, by = "confidence"), n=10))

inspect(head(sort(rules1, by = "lift"), n=100))
#---------------------------------------------------------------#

#when we want to study some specific combination of LHS and RHS
rules with RHS containing "perceivedscorecat=required skills" only

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills")))

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills"), default="rhs"))

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills")))

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills"), default ="lhs"))

inspect(head(sort(rules2, by = "lift"), n=10))

#--------------------------------------------------#

#rules with LHS containing "STATE = delhi" only

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.05, conf=0.8),appearance = list(default ="rhs", lhs="perceivedscorecat=required skills"))

inspect(head(sort(rules2, by = "lift"), n=10))

#___________________________________________________________________________#

library(arulesViz)
plot(rules2)
plot(rules1, method = "graph")
plot(rules1, method = "grouped")
plotly_arules(rules1)

#----------------#

rules3<- apriori(mbaper, parameter = list(supp = 0.50, conf = 0.80))

plot(rules3,method="graph",interactive=TRUE,shading=NA)

Monday, July 9, 2018

Basics of R- Session 11- Factor Analysis

Factor Analysis

library(psych)
library(Hmisc)

#import data set

factoranalysis<-read.csv("E:/2 presentation for class/R/1 R Open elective/data set/FACTOR ANALYSIS.csv")

# check the structure of the data set, here we are using only 10 Variables

# labeling the variables
label(factoranalysis$ï..Resp)<-"Respondent"
label(factoranalysis$X1)<-"refreshing"
label(factoranalysis$X2)<-"bad for health"
label(factoranalysis$X3)<-"very convenient to serve"
label(factoranalysis$X4)<-"avoided with age"
label(factoranalysis$X5)<-"very tasty"
label(factoranalysis$X6)<-"not good for children"
label(factoranalysis$X7)<-"consumed occasionally"
label(factoranalysis$X8)<-"not be taken in large quantity"
label(factoranalysis$X9)<-"not as good as energy drinks"
label(factoranalysis$X10)<-"better than fruit juices"
label(factoranalysis$S)<-"Recommending aerated drinks to others"

str(factoranalysis)

fix(factoranalysis)
View(factoranalysis)

#-----------------#

#--------------------#

#----------------------#

# here the library(psych) will be used

# bartlet test of speriocity

# the file should contain only those variables for which we have to apply factor analysis

# remove those variables, which are not included

fix(factoranalysis)

factoranalysis<-factoranalysis[,c(-1,-12)]

fix(factoranalysis)

cortest.bartlett(factoranalysis) # data set

#KMO test

KMO(factoranalysis)

#-----------------#

#--------------------#

#----------------------#

#code for principal in factor analysis

principal(r, nfactors = 1, residuals = FALSE,rotate="varimax",n.obs=NA, covar=FALSE,

scores=TRUE,missing=FALSE,impute="median",oblique.scores=TRUE,method="regression",...)

Rotation-->
"none", "varimax", "quartimax", "promax", "oblimin", "simplimax", and "cluster"

PCA2<-principal(factoranalysis)

PCA2$rotation

PCA2$values

PCA2$communality

PCA2$factors

PCA2$scores

# for screeplot

X11<-PCA2$values

Y11<-1:length(PCA2$values)

plot(X11,Y11, type="l")

# scree plot
VSS.scree(factoranalysis)

The column h2 is a measure of communalities, and u2 is uniqueness;

Communalities refer to shared variance with the other items, while uniqueness is variance not explained by the other items, but that could be explained by the latent variable as well as measurement error.

Tuesday, July 3, 2018

Basics of R- Session 10- Data Visualization-4 Tree map

library(treemap)

# here index is the categorical variable or variables, vsize is the size of the rectangle (scale variable)

treemap(mbaper, index = "Gender_MF", vSize = "Percentage_in_10_Class")

# adding more categorical variables

treemap(mbaper, index = c("Gender_MF","Previous_Degree"), vSize = "Percentage_in_10_Class")

# interactive treemap using itreemap

# here index is the categorical variable or variables, vsize is the size of the rectangle (scale variable)

treemap::itreemap(mbaper, index = "Gender_MF", vSize = "Percentage_in_10_Class")