#############################################################################################################
####       Classification Method using Exact confidence Set                                              ####
#### Part 1: read in the training data set cT                                                            ####
####        cc: number of classes                                                                        ####
####        pp: feature vector dimension                                                                 ####
####        nni: number of training observations from the i-th class, i=1, ..., cc                       ####
#### Part 2: computer the critical constant lambda, which depends on                                     ####
####        alpha: 1-alpha is the proportion of correct classifications of future objects                ####
####        gamma: confidence about the randomness in training data set cT                               ####
####        cc, pp, nn1,...,nncc: specified in Part 1 above                                              ####                                                    
####        SS: number of outlayer simulations (affect the accuracy of lambda due to simul randomness)   ####
####        QQ: number of innerlayer simulations (affect the accuracy of lambda due to simul randomness) ####
####        VECTr: proportions of the future objects in the cc classes                                   ####
#### Part 3: Classify a future observaed object, with inputs                                             ####
####        yy : the vector of measurement(s) for new case                                               ####
#############################################################################################################

################
#### Part 1 ####
################
getwd()
setwd("\\\\filestore.soton.ac.uk/users/wl/mydocuments/mydocuments/MyResearch/My papers/inProgress/Classification/Paper2/R")
getwd()  #To make sure that the dataset is in the right folder/directory

iris<-read.csv("iris.csv",head=T)

# Data used = Sepal length and width only
irism<-iris[,c(1,2,5)]
attach(irism)

mean1<-apply(irism[Class=="Iris-setosa",1:2],2,mean)
mean2<-apply(irism[Class=="Iris-versicolor",1:2],2,mean)
mean3<-apply(irism[Class=="Iris-virginica",1:2],2,mean)
sigma1<-cov(irism[Class=="Iris-setosa",1:2])
sigma2<-cov(irism[Class=="Iris-versicolor",1:2])
sigma3<-cov(irism[Class=="Iris-virginica",1:2])


################
#### Part 2 ####
################
cc <- 3   # number of classes
pp <- 2   # dimension of feature measurement
nn = c(50,50,50)  # samples from the classes
alpha <- 0.05  # 1- alpha  is the proportion of correct classification
gamma <- 0.95  # confidence level about the randomness in the training data
SS <- 10000  # number of outer simulations - one may increase SS to improve the accuracy of lambda due to simulation randomness
QQ <- 10000  # number of inner simulations - one may increase QQ to improve the accuracy of lambda due to simulation randomness

VECTr <- c(0.1, 0.8, 0.1) #c(1/3, 1/3, 1/3) #c(0.1, 0.45, 0.45)  #c(0.1, 0.7, 0.2)  #c(0.3, 0.4, 0.3)  #proportions of the future objects in the cc classes

##Define the function that is the left-side of (10)##
LeftSide10 <- function(lambdaS,VECTr, UlS, INVsumvvtS, WlQ, cc, pp, nn, QQ){ #the function in (10)
  y <- 0
  for (ll in 1:cc){
    VECTval <- rep(0,QQ)
    
    for (qq in 1:QQ){
      VECTval[qq] <- t(WlQ[,ll,qq]-UlS[,ll])%*%INVsumvvtS[,,ll]%*%(WlQ[,ll,qq]-UlS[,ll])  #the expression in the prob sign for class l in (10)
    }
    
    y <- y + VECTr[ll]*mean(VECTval<=lambdaS)
  }
  return(y)
} 


library("MASS") # simulate mutivariate normal random vectors

Ip <- diag(x = 1, nrow=pp, ncol=pp)

# Start the clock!
ptm <- proc.time()

set.seed(10) #set.seed(5)  #set.seed(18)  #set.seed(100)
#1. S loop
lambdaS <- c()  # record the lambdaS for each of the SS simulations
iteRec <- c()   # record the number of iterations in the bi-section method to find lambdaS for each of the SS simulations
for (s in 1:SS ){

  UlS <- matrix(0, nrow=pp, ncol=cc)  #generate the random u, v and w required in each s loop
  INVsumvvtS <- array(0, c(pp,pp,cc))
  for(ll in 1:cc){
    UlS[,ll] <- mvrnorm(n=1,rep(0,pp),Ip/nn[ll])
    
    sumvvt <- matrix(0,nrow=pp,ncol=pp)
    for (i in 1:(nn[ll]-1) ){
      v <- mvrnorm(n=1,rep(0,pp),Ip) 
      sumvvt <- sumvvt + v%*%t(v)
    }
    INVsumvvtS[ , ,ll] <- solve(sumvvt / (nn[ll]-1))
  }
  
  WlQ <- array(0,c(pp,cc,QQ))
  for(qq in 1:QQ){
    for(ll in 1:cc){
      WlQ[,ll,qq] <- mvrnorm(n=1,rep(0,pp),Ip) 
    }
  }
  
    
  #Find the lambda_s for this simulation repeat using the bisection method
  clow = qchisq(1-alpha, df=pp, ncp=0)/2  #the chi-square quantile is the lambda for the case of known mu and Sigma in Liu et al (2019) 
  c <- clow
  flow <- LeftSide10(c,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ)  #Make sure this <  1-alpha 
  if( flow > (1-alpha) ) cat("the lower bound of root",c,"is not small enough","\n")
  cupp = 9.175*1.5  # 9.175 is the conservative lambda in Liu et al (2019)   
  c <- cupp
  fupp <- LeftSide10(c,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ)  #Make sure this > 1-alpha
  if( fupp < (1-alpha) ) cat("the upper bound of root",c,"is not large enough","\n")
  
  tol <- 0.0001 #absolute error of the solution c
  ite <- 0 #Number of iterations
  cmid <- (clow+cupp)/2
  while ( (cupp-clow)>tol ){
    cmid <- (clow+cupp)/2
    c <- cmid
    fmid <- LeftSide10(c,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ)
    if ( fmid == 1-alpha ){
      lambdaS[s] = cmid
      break
    } else if ( (flow-1+alpha)*(fmid-1+alpha) <0){
      cupp <- cmid
      fupp <- fmid
    } else {
      clow <- cmid
      flow <- fmid
    }
    ite <- ite+1
#    if( ite > 50 ) cat("at iteration",ite,"the root is not found","\n")
    if( ite > 50 ){
      lambdaS[s] = cmid
      ite <- 99
      break
    } 
  }
  lambdaS[s] = cmid
  #LeftSide10(cmid,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ)  #Make sure this is close to 0
  iteRec[s] <- ite
}  
    
# ordered lampdaS_[s]
lambdaS <- sort(lambdaS)
lambdaEXA <- lambdaS[round((gamma)*SS)] 
lambdaEXA

lambdaCON <- 9.175  # the conservative lambda given in Liu et al (2019)

# Stop the clock
time<-proc.time() - ptm
time

sum(iteRec >= 50)


################
#### Part 3 ####
################
### Classify a given yy by checking whether it is in each of the cc acceptance sets ###

yy <- c(4.79, 2.35)  #Observed feature of a future object

sigma1M <- solve(sigma1)
sigma2M <- solve(sigma2)
sigma3M <- solve(sigma3)

A1CON<- sign( lambdaCON - t(yy-mean1)%*%sigma1M%*%(yy-mean1) )  # +ve sign means "in the conservative acceptance set of liu et al (2019)"
A2CON<- sign( lambdaCON - t(yy-mean2)%*%sigma2M%*%(yy-mean2) )
A3CON<- sign( lambdaCON - t(yy-mean3)%*%sigma3M%*%(yy-mean3) ) 
c(A1CON,A2CON,A3CON) 

A1EXA<- sign( lambdaEXA - t(yy-mean1)%*%sigma1M%*%(yy-mean1) )  # +ve sign means "in the newly proposed exact acceptance set "
A2EXA<- sign( lambdaEXA - t(yy-mean2)%*%sigma2M%*%(yy-mean2) )
A3EXA<- sign( lambdaEXA - t(yy-mean3)%*%sigma3M%*%(yy-mean3) ) 
c(A1EXA,A2EXA,A3EXA) 

## When pp=2, the acceptance sets can be drawn as contours ##
xliml = 3.93
xlimu = 8.53
yliml = 1.75
ylimu = 4.59

plot(yy[1],yy[2],type="p",pch=18,xlim=c(xliml,xlimu),ylim=c(yliml,ylimu),xlab="", ylab="")  # Draw this future object yy
#plot(yy[1],yy[2],type="p",pch=18,xlim=c(xliml,xlimu),ylim=c(yliml,ylimu))  # Draw this future object yy
text(yy[1],yy[2]-0.2,"(4.79, 2.35)")

##Contour mesh
x1 = seq(xliml, xlimu, 0.02)
x2 = seq(yliml, ylimu, 0.02)
I = length(x1)
J = length(x2)
Zz = matrix(0, nrow=I, ncol=J)

##Contour of A1
#Function fA1#
fA1 <- function(yy){   
  val <- t(yy-mean1)%*%sigma1M%*%(yy-mean1)
  return(val)
} 

for (i in 1:I){
  for (j in 1:J){
    Zz[i,j] = fA1(c(x1[i],x2[j]))
  }
}

vCON = c(lambdaCON)    # conservative one
fA1CONCont <- contourLines(x1,x2,Zz,levels=vCON)
lines(fA1CONCont[[1]][[2]],fA1CONCont[[1]][[3]],type="l", lty=2)
#contour(x1,x2,Zz,levels=v,  xlab="x_1", ylab="x_2")

vEXA = c(lambdaEXA)   # exact one
fA1EXACont <- contourLines(x1,x2,Zz,levels=vEXA)
lines(fA1EXACont[[1]][[2]],fA1EXACont[[1]][[3]],type="l", lty=1, col="red")

text(mean1[1],mean1[2]+0.2,"Class 1")
points(mean1[1],mean1[2],type="p", pch=3)   #Centre of the ellips

##Contour of A2
#Function fA2#
fA2 <- function(yy){   
  val <- t(yy-mean2)%*%sigma2M%*%(yy-mean2)
  return(val)
} 

for (i in 1:I){
  for (j in 1:J){
    Zz[i,j] = fA2(c(x1[i],x2[j]))
  }
}

fA2CONCont <- contourLines(x1,x2,Zz,levels=vCON)    # conservative one
lines(fA2CONCont[[1]][[2]],fA2CONCont[[1]][[3]],type="l", lty=2)

fA2EXACont <- contourLines(x1,x2,Zz,levels=vEXA)   # exact one
lines(fA2EXACont[[1]][[2]],fA2EXACont[[1]][[3]],type="l", lty=1, col="red")

text(mean2[1],mean2[2]-0.2,"Class 2")
points(mean2[1],mean2[2],type="p", pch=3)   #Centre of the ellips

##Contour of A3
#Function fA3#
fA3 <- function(yy){   
  val <- t(yy-mean3)%*%sigma3M%*%(yy-mean3)
  return(val)
} 

for (i in 1:I){
  for (j in 1:J){
    Zz[i,j] = fA3(c(x1[i],x2[j]))
  }
}

fA3CONCont <- contourLines(x1,x2,Zz,levels=vCON)    # conservative one
lines(fA3CONCont[[1]][[2]],fA3CONCont[[1]][[3]],type="l", lty=2)

fA3EXACont <- contourLines(x1,x2,Zz,levels=vEXA)   # exact one
lines(fA3EXACont[[1]][[2]],fA3EXACont[[1]][[3]],type="l", lty=1, col="red")

text(mean3[1],mean3[2]+0.2,"Class 3")
points(mean3[1],mean3[2],type="p", pch=3)   #Centre of the ellips