###################################################################################################################
########    A simulation study based on the Iris dataset for comparing several classification methods     #########
########    The results are given in (the new) Section 6 of the paper                                     ######### 
###################################################################################################################

getwd()
setwd("\\\\filestore.soton.ac.uk/users/wl/mydocuments/mydocuments/MyResearch/My papers/inProgress/Classification/2018-8/R code")
getwd()  #To make sure that the dataset is in the right folder/directory

iris<-read.csv("iris.csv",head=T)

# Variables used = Sepal length and width
irism<-iris[,c(1,2,5)]
attach(irism)


sigma1<-cov(irism[Class=="Iris-setosa",1:2])
sigma2<-cov(irism[Class=="Iris-versicolor",1:2])
sigma3<-cov(irism[Class=="Iris-virginica",1:2])


#### Select one of the three cases below to run #### 
## Case 1: All population distributions are close to each other
mu1<-colMeans(irism[Class=="Iris-setosa",1:2])+c(0,-0.5)
mu2<-colMeans(irism[Class=="Iris-versicolor",1:2])
mu3<-colMeans(irism[Class=="Iris-virginica",1:2])

## Case 2: 2 population distributions are close + 1 is reasonablely Separate
mu1<-colMeans(irism[Class=="Iris-setosa",1:2])
mu2<-colMeans(irism[Class=="Iris-versicolor",1:2])
mu3<-colMeans(irism[Class=="Iris-virginica",1:2])


## Case 3: All 3 population distributions are reasonablely Separate
mu1<-colMeans(irism[Class=="Iris-setosa",1:2])
mu2<-colMeans(irism[Class=="Iris-versicolor",1:2])
mu3<-colMeans(irism[Class=="Iris-virginica",1:2])+c(1,0.5)


#### The function for the confidence-set classifier ####
classifierCS<-function(x,data,lambda)
  # x:      the vector of measurements for new case 
  # data:   the training dataset matrix with the Class given in the 1st column 
  # alpha:  1 - alpha is the proportion of correct classification of all future cases
  # lambda: the critical constant value for classification         
  #               (=9.175 for Q=10000, S=10000, p=2,c=3, ni=50)        
  #                which is computed using the program ConfidenceSetClassifier.R Part 2 
{
  p<- length(x)
  
  #group1
  Mx1<-data[data[,1]==1,2:(p+1)]
  mu1<-apply(Mx1,2,mean)
  sigma1<-cov(Mx1)
  N1<-dim(Mx1)[1]
  
  #group2
  Mx2<-data[data[,1]==2,2:(p+1)]
  mu2<-apply(Mx2,2,mean)
  sigma2<-cov(Mx2)
  N2<-dim(Mx2)[1] 
  
  #group3
  Mx3<-data[data[,1]==3,2:(p+1)]
  mu3<-apply(Mx3,2,mean)
  sigma3<-cov(Mx3)
  N3<-dim(Mx3)[1]
  
  A1<- t(x-mu1)%*%solve(sigma1)%*%(x-mu1)-lambda
  A2<- t(x-mu2)%*%solve(sigma2)%*%(x-mu2)-lambda
  A3<- t(x-mu3)%*%solve(sigma3)%*%(x-mu3)-lambda  
  
  if (A1>=0 & A2>=0 & A3>=0)
  {set<-9}      #{set<-"{ }"}, i.e. the set is empty
  if (A1<0 & A2>=0 & A3>=0)
  {set<-1}      #{set<-"{1}"}, i.e. the set contains 1 only
  if (A1>=0 & A2<0 & A3>=0)
  {set<-2}      #{set<-"{2}"}, i.e. the set contains 2 only
  if (A1>=0 & A2>=0 & A3<0)
  {set<-3}      #{set<-"{3}"}, i.e. the set contains 3 only
  if (A1<0 & A2<0 & A3>=0)
  {set<-12}     #{set<-"{1,2}"}, i.e. the set contains 1 and 2 only
  if (A1<0 & A2>=0 & A3<0)
  {set<-13}     #{set<-"{1,3}"}, i.e. the set contains 1 and 3 only
  if (A1>=0 & A2<0 & A3<0)
  {set<-23}     #{set<-"{2,3}"}, i.e. the set contains 2 and 3 only
  if (A1<0 & A2<0 & A3<0)
  {set<-123}     #{set<-"{1,2,3}"}, i.e. the set contains 1 and 2 and 3
  set
}


set.seed(101)
library(MASS) # simulate multivariate normal

alpha<-0.05
sim<-100 # number of simulation set
cases<-1000 # number of new cases for each class
lambda<-9.175     # This critical constant is computed using ConfidenceSetClassifier.R (Part 2)
                  # 9.257609 (1000*1000) seeds 101
                  # 9.187527  (10000*10000) seeds 101
                  # 8.18818   ((1000*1000) gamma=.80)

#####################################################
#  vector of % correct classified by each method    #
#  for each simulation set                          #
#####################################################
library("tree")
library("nnet")
library("e1071")

probconf<-c() 
probtree<-c()
problogit<-c()
probbayes<-c()
probsvm<-c()
sizeconf<-c()

# Start the clock!
ptm <- proc.time()


# One loop i for each simulated training dataset
for (j in 1:sim)
{

#############################################################################
##     simulate the training data for building the classifiers             ##
#############################################################################

y1<-mvrnorm(n = 50, mu1, sigma1)
y2<-mvrnorm(n = 50, mu2, sigma2)
y3<-mvrnorm(n = 50, mu3, sigma3)
yclass<-as.data.frame(rbind(y1,y2,y3))
yclass$Class<-as.factor(c(rep(1,50),rep(2,50),rep(3,50)))

data<-cbind(yclass$Class,yclass$SepalLength,yclass$SepalWidth)


###################################################################################################
##                  simulate 3000 future cases, 1000 from each distribution                      ##
###################################################################################################

y1<-mvrnorm(n = cases, mu1, sigma1)
y2<-mvrnorm(n = cases, mu2, sigma2)
y3<-mvrnorm(n = cases, mu3, sigma3)
ycase<-as.data.frame(rbind(y1,y2,y3))
ycase$Class<-c(rep(1,cases),rep(2,cases),rep(3,cases))
ny<-nrow(ycase)


####################################################
## Classification by Classification Tree          ##
####################################################

#library("tree")
modtree<-tree(Class~SepalLength+SepalWidth,data=yclass)  ##An automatic selection of the best set of variables is involved
#plot(modtree)
#text(modtree)

################################################################
## Classification by  Multinomial Logistic regression         ##
################################################################

#library("nnet")
modlogit<-multinom(Class~SepalLength+SepalWidth,data=yclass)  ##No selection of the best set of variables is involved
#modlogit<-step(mod1)

###############################
##     Bayes Classifier      ##
###############################

#library("e1071")
modbayes<-naiveBayes(Class~SepalLength+SepalWidth,data=yclass)  ##No selection of the best set of variables is involved

###############################
##  Classification by SVM    ##
###############################

#library(e1071)
y <- yclass$Class
x <- subset(yclass,select=c(SepalLength,SepalWidth))
modsvm <- svm(x, y)   ##An automatic selection of the best kernal is involved

###########################################################################
##   Confidence Set method is given by the function classifierCS above   ##
###########################################################################


## applying all classifiers to each new case                   
for (i in 1:ny)
{
  ycase$conf[i]<-classifierCS(c(ycase$SepalLength[i],ycase$SepalWidth[i]),data,lambda=lambda)
  ycase$tree[i]<-predict(modtree,newdata=ycase[i,1:2],type="class")
  ycase$logit[i]<-predict(modlogit,newdata=ycase[i,1:2],type="class")
  ycase$bayes[i]<-predict(modbayes,type="class",newdata=ycase[i,1:2])
  ycase$svmm[i]<- predict(modsvm, ycase[i,1:2],type="class")
  
  ####### for Calculate the size of confidence set
  if (ycase$conf[i]==9){ycase$size[i]<-0}
  if (ycase$conf[i]<9){ycase$size[i]<-1}
  if (ycase$conf[i]>9 && ycase$conf[i]<123 ){ycase$size[i]<-2}
  if (ycase$conf[i]==123){ycase$size[i]<-3}
  }

#######################################################
####   Calculate prob.of correct classified   #########
#######################################################

##### Confidence Set Method #####
##### Selected correct classified 
correct1 <- nrow(subset(ycase, (Class==1 & conf==1)|(Class==1 & conf==12)|(Class==1 & conf==13)|(Class==1 & conf==123)))
correct2 <- nrow(subset(ycase, (Class==2 & conf==2)|(Class==2 & conf==12)|(Class==2 & conf==23)|(Class==2 & conf==123)))
correct3 <- nrow(subset(ycase, (Class==3 & conf==3)|(Class==3 & conf==13)|(Class==3 & conf==23)|(Class==3 & conf==123)))

####### Calculate the probability 
probconf[j]<-(correct1+correct2+correct3)/ny
####### Calculate the size of confidence set
sizeconf[j]<-mean(ycase$size)


##### Classification Tree  ##
##### Selected correct classified
correct1 <- nrow(subset(ycase, (Class==1 & tree==1)))
correct2 <- nrow(subset(ycase, (Class==2 & tree==2)))
correct3 <- nrow(subset(ycase, (Class==3 & tree==3)))

####### Calculate the probability 
probtree[j]<-(correct1+correct2+correct3)/ny

##### Logistic Regression  ##
##### Selected correct classified
correct1 <- nrow(subset(ycase, (Class==1 & logit==1)))
correct2 <- nrow(subset(ycase, (Class==2 & logit==2)))
correct3 <- nrow(subset(ycase, (Class==3 & logit==3)))

####### Calculate the probability 
problogit[j]<-(correct1+correct2+correct3)/ny

##### Bayesian Method  ##
##### Selected correct classified
correct1 <- nrow(subset(ycase, (Class==1 & bayes==1)))
correct2 <- nrow(subset(ycase, (Class==2 & bayes==2)))
correct3 <- nrow(subset(ycase, (Class==3 & bayes==3)))

####### Calculate the probability 
probbayes[j]<-(correct1+correct2+correct3)/ny

##### Support Vector Machine  ##
##### Selected correct classified
correct1 <- nrow(subset(ycase, (Class==1 & svmm==1)))
correct2 <- nrow(subset(ycase, (Class==2 & svmm==2)))
correct3 <- nrow(subset(ycase, (Class==3 & svmm==3)))

####### Calculate the probability 
probsvm[j]<-(correct1+correct2+correct3)/ny

## waiting time indication chart
pie(c(j,sim-j),radius=1,clockwise=T)
}

# Stop the clock
time<-proc.time() - ptm
time


sim
cases
mu1
mu2
mu3
mean(sizeconf)  ## \bar{M} in Table 2 for CS classifier
sd(sizeconf)
sum(probconf>(1-alpha))/sim  ## This gives the \hat{\gamma} in Table 2
mean(probconf)  ## This gives the \bar{p} in Table 2
sd(probconf)
sum(probtree>(1-alpha))/sim  ## This gives the \hat{\gamma} for Tree Classifier
mean(probtree)
sd(probtree)
sum(problogit>(1-alpha))/sim  ## This gives the \hat{\gamma}
mean(problogit)
sd(problogit)
sum(probbayes>(1-alpha))/sim  ## This gives the \hat{\gamma}
mean(probbayes)
sd(probbayes)
sum(probsvm>(1-alpha))/sim  ## This gives the \hat{\gamma}
mean(probsvm)
sd(probsvm)


table(ycase$Class,ycase$conf,dnn=c("Observed","Predicted"))
table(ycase$Class,ycase$tree,dnn=c("Observed","Predicted"))
table(ycase$Class,ycase$logit,dnn=c("Observed","Predicted"))
table(ycase$Class,ycase$bayes,dnn=c("Observed","Predicted"))
table(ycase$Class,ycase$svmm,dnn=c("Observed","Predicted"))


# plot proportion of correct classification over the "sim" simulations for all the classification methods
plot(probconf,ylim=c(0.6,1),main=paste("Simulation =",  sim, ", New cases =",cases, " per class", sep=" "), xlab ="",ylab ="")
points(probtree,col="red")
points(problogit,col="green")
points(probbayes,col="blue")
points(probsvm,col="yellow")
legend("bottomright", inset=.01, title="Classification Methods",
       c("Confidence Set Method","Classification Tree","Logistic Regression","Bayesian Method","Support Vector Machine"),
       col=c("black","red","green","blue","yellow"),pch=1,cex=0.8)