Monday, July 30, 2018

Basics of R Session 13 Conjoint Analysis


library(conjoint)

Case study- Beauty Bar Soap

There are three attributes of the problem/ case study

1)colour with three levels (red, blue, and yellow)
2)shape with three levels (cubic, cylindrical and spherical) and
3)aroma with two levels (scented, and unscented)

Create an R object attribute1 for the factors with levels

attribute1<-list(
colour=c("red","blue", "yellow"),
shape =c("cubic", "cylindrical","spherical"),
aroma=c("scented", "unscented")
    )

attribute1
fix(attribute1)

all the possible profiles for the 3 factors each with three levels 3*3*2=18

profiles1<-expand.grid(attribute1)
profiles1

fix(profiles1)
length(profiles1)
dim(profiles1)

orthogonal design, no of cards are equal to 3*3*2= 18

design1<-caFactorialDesign(data=profiles1,type="fractional", cards=18)

print(design1)
fix(design1)

load the files with the names of all the levels of the three factors

possiblefactors <- read.csv("file:///E:/2 presentation for class/inurture Lectures/1 multivariate data analysis/1 Multivariate Data Analysis PPts Self/conjoint Analysis/Beauty Bar Soap Case IIM/profile.csv", header = FALSE)


possiblefactors


load the files with the perception of the respondents on the 3 factors and their sublevels

perception1 <- read.csv("file:///E:/2 presentation for class/inurture Lectures/1 multivariate data analysis/1 Multivariate Data Analysis PPts Self/conjoint Analysis/Beauty Bar Soap Case IIM/perception.csv", header=T)
here the respondents are asked to rank the levels for all the factors with their levels



fix(perception1)


Conjoint(perception1, design1, possiblefactors)



Tuesday, July 10, 2018

Basics of R- Session 12- Market Basket Analysis


Market Basket Analysis

library(arules)
library(arulesViz)

# file mbaper
mbaper<-read.csv("E:/2 presentation for class/R/1 R Open elective/data set/mbadata.csv")
fix(mbaper)
str(mbaper)

remove first four variables which are scale in Nature
mbaper<-mbaper[,c(-1:-4)]

# study the categorical variables using tables and cross tab and find the variables which are having associations or some sort of relations

convert the files into transactions type use all the factor/ categorical variables in this analysis

mbaper<-as(mbaper, "transactions")

check the data set created after creating transactions

inspect(mbaper)

inspect(head(mbaper))

head(mbaper@itemInfo)

head(c(mbaper@itemsetInfo, mbaper@itemInfo))

# create a priori rules for the data set
# to check the code for the function or argument
args(apriori)
rules1<- apriori(mbaper)

rules1<- apriori(mbaper, parameter = list(supp = 0.10, conf = 0.80))
rules1<- apriori(mbaper, parameter = list(supp = 0.50, conf = 0.80))

# Number of items in the rules minlen 1 and malen=4, means there will be maximum 4 items in the rules created
rules1<- apriori(mbaper, parameter = list(minlen=1, supp=0.10, conf=0.80, maxlen=4))

#----------------------------#
# check the frequency distribution of the transactions
itemFrequency(mbaper)

# we can use other methods as well
itemFrequency(mbaper,type="absolute")

# plot the frequency
itemFrequencyPlot(rules1)

# if it doesn't work, then use items()

items(rules1)
itemFrequencyPlot(items(rules1))


#--------------------------#

#inspect the rules created

inspect(rules1)

#first three rules
inspect(rules1[1:3])

#first ten rules
inspect(rules1[1:10])


#-----------------------------------------------------#
inspect(head(sort(rules1, by = "support"), n=10))

inspect(head(sort(rules1, by = "confidence"), n=10))

inspect(head(sort(rules1, by = "lift"), n=100))
#---------------------------------------------------------------#

#when we want to study some specific combination of LHS and RHS
rules with RHS containing "perceivedscorecat=required skills" only



rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills")))

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills"), default="rhs"))

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills")))





rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills"), default ="lhs"))

inspect(head(sort(rules2, by = "lift"), n=10))

#--------------------------------------------------#


#rules with LHS containing "STATE = delhi" only

rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.05, conf=0.8),appearance = list(default ="rhs", lhs="perceivedscorecat=required skills"))

inspect(head(sort(rules2, by = "lift"), n=10))

#___________________________________________________________________________#


library(arulesViz)
plot(rules2)
plot(rules1, method = "graph")
plot(rules1, method = "grouped")
plotly_arules(rules1)


#----------------#

rules3<- apriori(mbaper, parameter = list(supp = 0.50, conf = 0.80))

plot(rules3,method="graph",interactive=TRUE,shading=NA)


Monday, July 9, 2018

Basics of R- Session 11- Factor Analysis

Factor Analysis

library(psych)
library(Hmisc)

#import data set

factoranalysis<-read.csv("E:/2 presentation for class/R/1 R Open elective/data set/FACTOR ANALYSIS.csv")

# check the structure of the data set, here we are using only 10 Variables

# labeling the variables
label(factoranalysis$ï..Resp)<-"Respondent"
label(factoranalysis$X1)<-"refreshing"
label(factoranalysis$X2)<-"bad for health"
label(factoranalysis$X3)<-"very convenient to serve"
label(factoranalysis$X4)<-"avoided with age"
label(factoranalysis$X5)<-"very tasty"
label(factoranalysis$X6)<-"not good for children"
label(factoranalysis$X7)<-"consumed occasionally"
label(factoranalysis$X8)<-"not be taken in large quantity"
label(factoranalysis$X9)<-"not as good as energy drinks"
label(factoranalysis$X10)<-"better than fruit juices"
label(factoranalysis$S)<-"Recommending aerated drinks to others"

str(factoranalysis)

fix(factoranalysis)
View(factoranalysis)

#-----------------#
#--------------------#
#----------------------#
# here the library(psych) will be used

# bartlet test of speriocity 
# the file should contain only those variables for which we have to apply factor analysis
# remove those variables, which are not included

fix(factoranalysis)
factoranalysis<-factoranalysis[,c(-1,-12)]
fix(factoranalysis)

cortest.bartlett(factoranalysis)  # data set


#KMO test 
KMO(factoranalysis)

#-----------------#
#--------------------#
#----------------------#

#code for principal in factor analysis

principal(r, nfactors = 1, residuals = FALSE,rotate="varimax",n.obs=NA, covar=FALSE,
 scores=TRUE,missing=FALSE,impute="median",oblique.scores=TRUE,method="regression",...)

Rotation-->
"none", "varimax", "quartimax", "promax", "oblimin", "simplimax", and "cluster"

PCA2<-principal(factoranalysis)
PCA2$rotation
PCA2$values
PCA2$communality
PCA2$factors
PCA2$scores

# for screeplot
X11<-PCA2$values
Y11<-1:length(PCA2$values)
plot(X11,Y11, type="l")

# scree plot
VSS.scree(factoranalysis)

The column h2 is a measure of communalities, and u2 is uniqueness; 

Communalities refer to shared variance with the other items, while uniqueness is variance not explained by the other items, but that could be explained by the latent variable as well as measurement error. 



Tuesday, July 3, 2018

Basics of R- Session 10- Data Visualization-4 Tree map


library(treemap)

# here index is the categorical variable or variables, vsize is the size of the rectangle (scale variable)

treemap(mbaper, index = "Gender_MF", vSize = "Percentage_in_10_Class")

# adding more categorical variables

treemap(mbaper, index = c("Gender_MF","Previous_Degree"), vSize = "Percentage_in_10_Class")


# interactive treemap using itreemap
# here index is the categorical variable or variables, vsize is the size of the rectangle (scale variable)

treemap::itreemap(mbaper, index = "Gender_MF", vSize = "Percentage_in_10_Class")

Tuesday, June 19, 2018

Basics of R- Session 9- Data Visualization-3 using facets for adding layers in ggplot2


# Use of facet in ggplot for adding layers

ggplot(mbaper, aes(mbaper$Percentage_in_10_Class))+geom_dotplot()+facet_grid(~mbaper$Gender_MF)

ggplot(mbaper, aes(mbaper$Percentage_in_10_Class))+geom_dotplot()+facet_grid(~mbaper$Gender_MF+mbaper$Previous_Degree)

ggplot(mbaper, aes(mbaper$Percentage_in_10_Class))+geom_dotplot()+facet_grid(mbaper$Gender_MF~mbaper$Previous_Degree+mbaper$Place_you_belong_to)

ggplot(mbaper, aes(mbaper$perceivedscorecat))+geom_bar()+facet_grid(~mbaper$Gender_MF)

ggplot(mbaper, aes(mbaper$perceivedscorecat))+geom_bar()+facet_grid(~mbaper$Gender_MF+mbaper$Previous_Degree)

ggplot(mbaper, aes(mbaper$perceivedscorecat))+geom_bar()+facet_grid(mbaper$Gender_MF~mbaper$Previous_Degree+mbaper$Place_you_belong_to)

Basics of R- session 8- data visualization-2

one categorical and one scale variable
x- axis categorical, y axis scale
box plot



ggplot(mbaper, aes(mbaper$perceivedscorecat, mbaper$Percentage_in_12_Class))+geom_boxplot()
ggplot(mbaper, aes(mbaper$perceivedscorecat, mbaper$Percentage_in_12_Class))+geom_col()
ggplot(mbaper, aes(mbaper$perceivedscorecat, mbaper$Percentage_in_12_Class))+geom_count()
ggplot(mbaper, aes(mbaper$perceivedscorecat, mbaper$Percentage_in_12_Class))+geom_bin2d()
ggplot(mbaper, aes(mbaper$perceivedscorecat, mbaper$Percentage_in_12_Class))+geom_jitter()
ggplot(mbaper, aes(mbaper$perceivedscorecat, mbaper$Percentage_in_12_Class))+geom_violin()


one categorical and one scale variable
X axis scale, Y- axis categorical
box plot

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$perceivedscorecat))+geom_bin2d()
ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$perceivedscorecat))+geom_jitter()

# better to have x axis as categorical

multiple variables
3 variables
2 scale and one categorical


ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point()

#add more layer in terms of colour

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point(aes(color=perceivedscorecat))

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point(aes(color=Previous_Degree))

#add more layer in terms of shape

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point(aes(shape=perceivedscorecat))

# divide it into parts use facets
# facet_wrap()

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point()+facet_wrap(~perceivedscorecat)

#extra facet
ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point()+facet_wrap(~perceivedscorecat+Marital_status)

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point()+facet_wrap(perceivedscorecat~Marital_status)

# we can add colour also

ggplot(mbaper, aes(mbaper$Percentage_in_12_Class, mbaper$Percentage_in_10_Class))+geom_point(aes(color=Previous_Degree))+facet_wrap(~perceivedscorecat)

Basics of R session 7- Data Visualization Mosaic Plot

Use library vcd
mosaic will be used for categorical variables
#Import the file- MBAdata.csv and save as an R object mbaper
# import the file from the location-

mbaper<-read.csv("D:/1 Teaching Material/R/importfile/MBAdata.csv")

library(vcd)
## Loading required package: grid
#here the data set used is mbaper, and the variable used is Marital_status

mosaic(~mbaper$Marital_status)
mosaic(~Marital_status, data= mbaper)
# here the output will be different in terms of labeling- the block name as the labels of the variable Marital_status, and the title will be the name of the data
Adding variables
mosaic(~mbaper$Gender+mbaper$Marital_status+mbaper$Place_you_belong_to)
mosaic(~Gender+Marital_status+Place_you_belong_to, data = mbaper)

cross tab mosaic

mosaic(mbaper$Gender~mbaper$Marital_status)

adding variables

mosaic(mbaper$Gender~mbaper$Marital_status+mbaper$perceivedscorecat)
mosaic(mbaper$Gender~mbaper$Marital_status+mbaper$perceivedscorecat+mbaper$Place_you_belong_to)
mosaic(mbaper$Gender~mbaper$Marital_status)
rotating the label for better visibility
mosaic(~mbaper$Gender+mbaper$Marital_status, labeling= labeling_border(rot_labels = c(45,45,45,45)))
if there are null blocks or no observation in the combination cell, nothing will be displayed if zero_size=0
mosaic(~mbaper$Gender+mbaper$STATE, zero_size= 0)
for colouring the mosaic, wth specific colour
mosaic(~mbaper$Marital_status, gp= gpar(fill=c("red", "green")))
different types of shading based on residual
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_hcl)
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_hsv)
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_max)
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_Friendly)
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_Friendly2)
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_sieve)
mosaic(~mbaper$Gender+mbaper$Marital_status, gp = shading_binary)