Market Basket Analysis
library(arules)
library(arulesViz)
# file mbaper
mbaper<-read.csv("E:/2 presentation for class/R/1 R Open elective/data set/mbadata.csv")
fix(mbaper)
str(mbaper)
remove first four variables which are scale in Nature
mbaper<-mbaper[,c(-1:-4)]
# study the categorical variables using tables and cross tab and find the variables which are having associations or some sort of relations
convert the files into transactions type use all the factor/ categorical variables in this analysis
mbaper<-as(mbaper, "transactions")
check the data set created after creating transactions
inspect(mbaper)
inspect(head(mbaper))
head(mbaper@itemInfo)
head(c(mbaper@itemsetInfo, mbaper@itemInfo))
# create a priori rules for the data set
# to check the code for the function or argument
args(apriori)
rules1<- apriori(mbaper)
rules1<- apriori(mbaper, parameter = list(supp = 0.10, conf = 0.80))
rules1<- apriori(mbaper, parameter = list(supp = 0.50, conf = 0.80))
# Number of items in the rules minlen 1 and malen=4, means there will be maximum 4 items in the rules created
rules1<- apriori(mbaper, parameter = list(minlen=1, supp=0.10, conf=0.80, maxlen=4))
#----------------------------#
# check the frequency distribution of the transactions
itemFrequency(mbaper)
# we can use other methods as well
itemFrequency(mbaper,type="absolute")
# plot the frequency
itemFrequencyPlot(rules1)
# if it doesn't work, then use items()
items(rules1)
itemFrequencyPlot(items(rules1))
#--------------------------#
#inspect the rules created
inspect(rules1)
#first three rules
inspect(rules1[1:3])
#first ten rules
inspect(rules1[1:10])
#-----------------------------------------------------#
inspect(head(sort(rules1, by = "support"), n=10))
inspect(head(sort(rules1, by = "confidence"), n=10))
inspect(head(sort(rules1, by = "lift"), n=100))
#---------------------------------------------------------------#
#when we want to study some specific combination of LHS and RHS
rules with RHS containing "perceivedscorecat=required skills" only
rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills")))
rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills"), default="rhs"))
rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills")))
rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.001, conf=0.51, maxlen=4),appearance = list(rhs=c("perceivedscorecat=required skills"), default ="lhs"))
inspect(head(sort(rules2, by = "lift"), n=10))
#--------------------------------------------------#
#rules with LHS containing "STATE = delhi" only
rules2<- apriori(mbaper, parameter = list(minlen=1, supp=0.05, conf=0.8),appearance = list(default ="rhs", lhs="perceivedscorecat=required skills"))
inspect(head(sort(rules2, by = "lift"), n=10))
#___________________________________________________________________________#
library(arulesViz)
plot(rules2)
plot(rules1, method = "graph")
plot(rules1, method = "grouped")
plotly_arules(rules1)
#----------------#
rules3<- apriori(mbaper, parameter = list(supp = 0.50, conf = 0.80))
plot(rules3,method="graph",interactive=TRUE,shading=NA)