############################################################################################################# #### Classification Method using Exact confidence Set #### #### Part 1: read in the training data set cT #### #### cc: number of classes #### #### pp: feature vector dimension #### #### nni: number of training observations from the i-th class, i=1, ..., cc #### #### Part 2: computer the critical constant lambda, which depends on #### #### alpha: 1-alpha is the proportion of correct classifications of future objects #### #### gamma: confidence about the randomness in training data set cT #### #### cc, pp, nn1,...,nncc: specified in Part 1 above #### #### SS: number of outlayer simulations (affect the accuracy of lambda due to simul randomness) #### #### QQ: number of innerlayer simulations (affect the accuracy of lambda due to simul randomness) #### #### VECTr: proportions of the future objects in the cc classes #### #### Part 3: Classify a future observaed object, with inputs #### #### yy : the vector of measurement(s) for new case #### ############################################################################################################# ################ #### Part 1 #### ################ getwd() setwd("\\\\filestore.soton.ac.uk/users/wl/mydocuments/mydocuments/MyResearch/My papers/inProgress/Classification/Paper2/R") getwd() #To make sure that the dataset is in the right folder/directory iris<-read.csv("iris.csv",head=T) # Data used = Sepal length and width only irism<-iris[,c(1,2,5)] attach(irism) mean1<-apply(irism[Class=="Iris-setosa",1:2],2,mean) mean2<-apply(irism[Class=="Iris-versicolor",1:2],2,mean) mean3<-apply(irism[Class=="Iris-virginica",1:2],2,mean) sigma1<-cov(irism[Class=="Iris-setosa",1:2]) sigma2<-cov(irism[Class=="Iris-versicolor",1:2]) sigma3<-cov(irism[Class=="Iris-virginica",1:2]) ################ #### Part 2 #### ################ cc <- 3 # number of classes pp <- 2 # dimension of feature measurement nn = c(50,50,50) # samples from the classes alpha <- 0.05 # 1- alpha is the proportion of correct classification gamma <- 0.95 # confidence level about the randomness in the training data SS <- 10000 # number of outer simulations - one may increase SS to improve the accuracy of lambda due to simulation randomness QQ <- 10000 # number of inner simulations - one may increase QQ to improve the accuracy of lambda due to simulation randomness VECTr <- c(0.1, 0.8, 0.1) #c(1/3, 1/3, 1/3) #c(0.1, 0.45, 0.45) #c(0.1, 0.7, 0.2) #c(0.3, 0.4, 0.3) #proportions of the future objects in the cc classes ##Define the function that is the left-side of (10)## LeftSide10 <- function(lambdaS,VECTr, UlS, INVsumvvtS, WlQ, cc, pp, nn, QQ){ #the function in (10) y <- 0 for (ll in 1:cc){ VECTval <- rep(0,QQ) for (qq in 1:QQ){ VECTval[qq] <- t(WlQ[,ll,qq]-UlS[,ll])%*%INVsumvvtS[,,ll]%*%(WlQ[,ll,qq]-UlS[,ll]) #the expression in the prob sign for class l in (10) } y <- y + VECTr[ll]*mean(VECTval<=lambdaS) } return(y) } library("MASS") # simulate mutivariate normal random vectors Ip <- diag(x = 1, nrow=pp, ncol=pp) # Start the clock! ptm <- proc.time() set.seed(10) #set.seed(5) #set.seed(18) #set.seed(100) #1. S loop lambdaS <- c() # record the lambdaS for each of the SS simulations iteRec <- c() # record the number of iterations in the bi-section method to find lambdaS for each of the SS simulations for (s in 1:SS ){ UlS <- matrix(0, nrow=pp, ncol=cc) #generate the random u, v and w required in each s loop INVsumvvtS <- array(0, c(pp,pp,cc)) for(ll in 1:cc){ UlS[,ll] <- mvrnorm(n=1,rep(0,pp),Ip/nn[ll]) sumvvt <- matrix(0,nrow=pp,ncol=pp) for (i in 1:(nn[ll]-1) ){ v <- mvrnorm(n=1,rep(0,pp),Ip) sumvvt <- sumvvt + v%*%t(v) } INVsumvvtS[ , ,ll] <- solve(sumvvt / (nn[ll]-1)) } WlQ <- array(0,c(pp,cc,QQ)) for(qq in 1:QQ){ for(ll in 1:cc){ WlQ[,ll,qq] <- mvrnorm(n=1,rep(0,pp),Ip) } } #Find the lambda_s for this simulation repeat using the bisection method clow = qchisq(1-alpha, df=pp, ncp=0)/2 #the chi-square quantile is the lambda for the case of known mu and Sigma in Liu et al (2019) c <- clow flow <- LeftSide10(c,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ) #Make sure this < 1-alpha if( flow > (1-alpha) ) cat("the lower bound of root",c,"is not small enough","\n") cupp = 9.175*1.5 # 9.175 is the conservative lambda in Liu et al (2019) c <- cupp fupp <- LeftSide10(c,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ) #Make sure this > 1-alpha if( fupp < (1-alpha) ) cat("the upper bound of root",c,"is not large enough","\n") tol <- 0.0001 #absolute error of the solution c ite <- 0 #Number of iterations cmid <- (clow+cupp)/2 while ( (cupp-clow)>tol ){ cmid <- (clow+cupp)/2 c <- cmid fmid <- LeftSide10(c,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ) if ( fmid == 1-alpha ){ lambdaS[s] = cmid break } else if ( (flow-1+alpha)*(fmid-1+alpha) <0){ cupp <- cmid fupp <- fmid } else { clow <- cmid flow <- fmid } ite <- ite+1 # if( ite > 50 ) cat("at iteration",ite,"the root is not found","\n") if( ite > 50 ){ lambdaS[s] = cmid ite <- 99 break } } lambdaS[s] = cmid #LeftSide10(cmid,VECTr=VECTr, UlS=UlS, INVsumvvtS=INVsumvvtS, WlQ=WlQ, cc=cc, pp=pp, nn=nn, QQ=QQ) #Make sure this is close to 0 iteRec[s] <- ite } # ordered lampdaS_[s] lambdaS <- sort(lambdaS) lambdaEXA <- lambdaS[round((gamma)*SS)] lambdaEXA lambdaCON <- 9.175 # the conservative lambda given in Liu et al (2019) # Stop the clock time<-proc.time() - ptm time sum(iteRec >= 50) ################ #### Part 3 #### ################ ### Classify a given yy by checking whether it is in each of the cc acceptance sets ### yy <- c(4.79, 2.35) #Observed feature of a future object sigma1M <- solve(sigma1) sigma2M <- solve(sigma2) sigma3M <- solve(sigma3) A1CON<- sign( lambdaCON - t(yy-mean1)%*%sigma1M%*%(yy-mean1) ) # +ve sign means "in the conservative acceptance set of liu et al (2019)" A2CON<- sign( lambdaCON - t(yy-mean2)%*%sigma2M%*%(yy-mean2) ) A3CON<- sign( lambdaCON - t(yy-mean3)%*%sigma3M%*%(yy-mean3) ) c(A1CON,A2CON,A3CON) A1EXA<- sign( lambdaEXA - t(yy-mean1)%*%sigma1M%*%(yy-mean1) ) # +ve sign means "in the newly proposed exact acceptance set " A2EXA<- sign( lambdaEXA - t(yy-mean2)%*%sigma2M%*%(yy-mean2) ) A3EXA<- sign( lambdaEXA - t(yy-mean3)%*%sigma3M%*%(yy-mean3) ) c(A1EXA,A2EXA,A3EXA) ## When pp=2, the acceptance sets can be drawn as contours ## xliml = 3.93 xlimu = 8.53 yliml = 1.75 ylimu = 4.59 plot(yy[1],yy[2],type="p",pch=18,xlim=c(xliml,xlimu),ylim=c(yliml,ylimu),xlab="", ylab="") # Draw this future object yy #plot(yy[1],yy[2],type="p",pch=18,xlim=c(xliml,xlimu),ylim=c(yliml,ylimu)) # Draw this future object yy text(yy[1],yy[2]-0.2,"(4.79, 2.35)") ##Contour mesh x1 = seq(xliml, xlimu, 0.02) x2 = seq(yliml, ylimu, 0.02) I = length(x1) J = length(x2) Zz = matrix(0, nrow=I, ncol=J) ##Contour of A1 #Function fA1# fA1 <- function(yy){ val <- t(yy-mean1)%*%sigma1M%*%(yy-mean1) return(val) } for (i in 1:I){ for (j in 1:J){ Zz[i,j] = fA1(c(x1[i],x2[j])) } } vCON = c(lambdaCON) # conservative one fA1CONCont <- contourLines(x1,x2,Zz,levels=vCON) lines(fA1CONCont[[1]][[2]],fA1CONCont[[1]][[3]],type="l", lty=2) #contour(x1,x2,Zz,levels=v, xlab="x_1", ylab="x_2") vEXA = c(lambdaEXA) # exact one fA1EXACont <- contourLines(x1,x2,Zz,levels=vEXA) lines(fA1EXACont[[1]][[2]],fA1EXACont[[1]][[3]],type="l", lty=1, col="red") text(mean1[1],mean1[2]+0.2,"Class 1") points(mean1[1],mean1[2],type="p", pch=3) #Centre of the ellips ##Contour of A2 #Function fA2# fA2 <- function(yy){ val <- t(yy-mean2)%*%sigma2M%*%(yy-mean2) return(val) } for (i in 1:I){ for (j in 1:J){ Zz[i,j] = fA2(c(x1[i],x2[j])) } } fA2CONCont <- contourLines(x1,x2,Zz,levels=vCON) # conservative one lines(fA2CONCont[[1]][[2]],fA2CONCont[[1]][[3]],type="l", lty=2) fA2EXACont <- contourLines(x1,x2,Zz,levels=vEXA) # exact one lines(fA2EXACont[[1]][[2]],fA2EXACont[[1]][[3]],type="l", lty=1, col="red") text(mean2[1],mean2[2]-0.2,"Class 2") points(mean2[1],mean2[2],type="p", pch=3) #Centre of the ellips ##Contour of A3 #Function fA3# fA3 <- function(yy){ val <- t(yy-mean3)%*%sigma3M%*%(yy-mean3) return(val) } for (i in 1:I){ for (j in 1:J){ Zz[i,j] = fA3(c(x1[i],x2[j])) } } fA3CONCont <- contourLines(x1,x2,Zz,levels=vCON) # conservative one lines(fA3CONCont[[1]][[2]],fA3CONCont[[1]][[3]],type="l", lty=2) fA3EXACont <- contourLines(x1,x2,Zz,levels=vEXA) # exact one lines(fA3EXACont[[1]][[2]],fA3EXACont[[1]][[3]],type="l", lty=1, col="red") text(mean3[1],mean3[2]+0.2,"Class 3") points(mean3[1],mean3[2],type="p", pch=3) #Centre of the ellips