#### WL & MCB, 01.05.19; 10.05.19; 10.12.19;  02.03.20

########################### libraries ####
library(tidyverse)
library(KernSmooth)
library(tolerance)
library(metagen)



########################### preliminaries ####
lcols = metagen::cbbPalette[1:4]#[c(2,3,4,1)]
linetype = c("solid", "longdash", "dotdash", "dotted")
lcols6 = metagen::cbbPalette[1:6]#[c(2,3,4,1)]
linetype6 = c("solid", "longdash", "dotdash", "dotted", "twodash", "12345678")
lcols3 = metagen::cbbPalette[1:3]#[c(2,3,4,1)]
linetype3 = c("dotted","longdash","solid")


##############################################################################################
########    This program implements the computations in the paper                    ######### 
#########   Reference range: which statistical intervals to use?                     #########
########    by Liu, Bretz, and Cortina-Borja                                         #########
##############################################################################################

ralpha <- 0.05  #1- ralpha is the proportion of the population to catch
sampleSize <- 2:150  #Size of the sample from the population based on which to construct the reference ranges

z1mHAlpha <- qnorm(1-ralpha/2, mean=0, sd=1)  #percentile of N(0,1)
t1mHAlpha_nu <- qt(1-ralpha/2, df = sampleSize-1)  #percentiles of t_{n-1}
lambda <- sqrt(2/(sampleSize-1))*gamma(sampleSize/2)/gamma((sampleSize-1)/2)

prop1 <- t1mHAlpha_nu*sqrt(1+1/sampleSize)  #The proportional constant of RR_i
prop2 <- rep(z1mHAlpha, length(sampleSize))
prop3 <- z1mHAlpha/lambda
prop4 <- z1mHAlpha*lambda

## Produce Figure 1 ##
plot(sampleSize, prop1, type="l", lty=1, xlim=c(2,150),ylim=c(1.5,3), xlab =expression(n),ylab =expression(c_i))
lines(sampleSize, prop3, lty=2, col="blue")
lines(sampleSize, prop2, lty=4, col="purple")
lines(sampleSize, prop4, lty=6, col="red")

legend("topright", inset=.01, 
       c("c_1","c_3","c_2","c_4"),
       lty=c(1,2,4,6),cex=0.8)

#### Figure 1 in ggplot2  ####
n.sampleSize<- length(sampleSize)
df.Fig1<- data.frame(sampleSize=rep(sampleSize,4), prop=c(prop1,prop2,prop3,prop4),
                     type=factor(rep(1:4, rep(n.sampleSize,4))))
df.Fig1$linetype<- factor(as.numeric(df.Fig1$type), labels=linetype)
df.Fig1$color<- lcols[as.numeric(df.Fig1$type)]

fig1<-
  ggplot(df.Fig1, aes(x=sampleSize, y=prop,  colour=color, linetype=linetype)) +
  geom_line(aes(linetype=type, col=type),size=1.25)  + 
  scale_y_continuous(limits=c(1.5,3)) + 
  scale_x_continuous(limits=c(2,n.sampleSize+1),breaks=seq(0,150,by=25)) + 
  labs(x=expression(~italic(n)), y=expression(~italic("c"["i"]))) + 
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(legend.position=c(0.8, 0.8)) + 
  theme(legend.text=element_text(colour='black', size=12, face='italic')) + 
  scale_colour_manual(values=lcols,
                      name=" ", breaks=as.character(1:4), 
                      labels=c(expression(~italic("c"["1"])),expression(~italic("c"["2"])),
                               expression(~italic("c"["3"])),expression(~italic("c"["4"])))) +
  scale_linetype_manual(values=linetype,
                        name=" ", breaks=as.character(1:4), 
                        labels=c(expression(~italic("c"["1"])),expression(~italic("c"["2"])),
                                 expression(~italic("c"["3"])),expression(~italic("c"["4"])))) +
  theme(legend.key.width = unit(1.5,"cm")) + 
  NULL

fig1

ggsave(filename='Fig1.pdf', plot=fig1, device='pdf', dpi=1200, width=6, height=4, units='in')
### OK

ggsave(filename='Fig1.jpg', plot=fig1, device='jpeg', dpi=1200, width=6, height=4, units='in')



#####################################################
## Find the prob by simulation and produce figure 2##
sim = 1000000
ralpha <- 0.05
nn <- 150
c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn)

set.seed(101)
norml <- rnorm(sim)
chisqu <- rchisq(sim,df=nn-1)
KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1)))
sum((KK1 <= 1-ralpha))/sim  # The prob required
sum((KK1 <= 0.90))/sim

#Plot the kernel density estimate of the pdf of KK1
#based on the simulated  KK1-values by using the package Kernsmooth#
#install.packages("KernSmooth")
#  library("KernSmooth", lib.loc="C:/Apps/RLibraries")

h <- dpik(KK1)
est150 <- bkde(KK1, bandwidth=h, gridsize=1000L)
plot(est150,type="l", xlim =c(0.85,1.0),ylim=c(0,32), xlab="x", ylab="density function",col="red")
#plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red")


#nn <- 1000
#c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn)
#
#set.seed(101)
#norml <- rnorm(sim)
#chisqu <- rchisq(sim,df=nn-1)
#KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1)))
#sum((KK1 <= 1-ralpha))/sim  # The prob required
#sum((KK1 <= 0.90))/sim
#
#Plot the kernel density estimate of the pdf of KK1
#based on the simulated  KK1-values by using the package Kernsmooth#
#install.packages("KernSmooth")
#  library("KernSmooth", lib.loc="C:/Apps/RLibraries")
#
#h <- dpik(KK1)
#est1000 <- bkde(KK1, bandwidth=h, gridsize=1000L)
#plot(est1000,type="l", xlim =c(0.85,1.0),ylim=c(0,85), xlab="x", ylab="density function",col="red")
#plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red")


nn <- 100
c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn)

set.seed(101)
norml <- rnorm(sim)
chisqu <- rchisq(sim,df=nn-1)
KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1)))
sum((KK1 <= 1-ralpha))/sim  # The prob required
sum((KK1 <= 0.90))/sim

#Plot the kernel density estimate of the pdf of KK1
#based on the simulated  KK1-values by using the package Kernsmooth#
#install.packages("KernSmooth")
#library("KernSmooth", lib.loc="C:/Apps/RLibraries")

h <- dpik(KK1)
est100 <- bkde(KK1, bandwidth=h, gridsize=1000L)
lines(est100,type="l", lty=2, col="purple")
#plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red")


nn <- 50
c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn)

set.seed(101)
norml <- rnorm(sim)
chisqu <- rchisq(sim,df=nn-1)
KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1)))
sum((KK1 <= 1-ralpha))/sim  # The prob required
sum((KK1 <= 0.90))/sim

#Plot the kernel density estimate of the pdf of KK1
#based on the simulated  KK1-values by using the package Kernsmooth#
#install.packages("KernSmooth")
#library("KernSmooth", lib.loc="C:/Apps/RLibraries")

h <- dpik(KK1)
est50 <- bkde(KK1, bandwidth=h, gridsize=1000L)
lines(est50,type="l", lty=4,col="blue")
#plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red")


nn <- 20
c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn)

set.seed(101)
norml <- rnorm(sim)
chisqu <- rchisq(sim,df=nn-1)
KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1)))
sum((KK1 <= 1-ralpha))/sim  # The prob required
sum((KK1 <= 0.90))/sim

#Plot the kernel density estimate of the pdf of KK1
#based on the simulated  KK1-values by using the package Kernsmooth#
#install.packages("KernSmooth")
#library("KernSmooth", lib.loc="C:/Apps/RLibraries")

h <- dpik(KK1)
est20 <- bkde(KK1, bandwidth=h, gridsize=1000L)
lines(est20,type="l",lty=6,col="black")
#plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red")

legend("topleft", inset=.01, 
       c("n=150","n=100","n=50","n=20"),
       lty=c(1,2,4,6),cex=0.8)

#abline(v = 0.95, col = "black", lwd = 1)
lines(c(0.95,0.95),c(0, 32), col="red")

#### Figure 2 with ggplot2 ####

n.pts<- length(est150$x)
df.Fig2<- data.frame(x=c(est150$x, est100$x, est50$x, est20$x),
                     y=c(est150$y, est100$y, est50$y, est20$y),
                     size=factor(rep(1:4, rep(n.pts, 4))))
df.Fig2$linetype<- factor(as.numeric(df.Fig2$size), labels=linetype)
df.Fig2$color<- lcols[as.numeric(df.Fig2$size)]

fig2<-ggplot(df.Fig2, aes(x=x, y=y,colour=color, linetype=linetype)) +
  geom_line(aes(linetype=size, col=size),size=1.25)  + 
  labs(x=expression(~italic("K"["1"])), y='probability density function')+ 
  scale_y_continuous(limits=c(0,32)) + 
  scale_x_continuous(limits=c(0.85,1)) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(legend.position=c(0.25, 0.8)) + 
  theme(legend.text.align = 0) + 
  scale_colour_manual(values=lcols,
                      name=" ", breaks=as.character(1:4), 
                      labels=c(expression(~italic("n=150")),
                               expression(~italic("n=100")),
                               expression(~italic("n=50")), 
                               expression(~italic("n=20")))) +
  scale_linetype_manual(values=linetype,
                        name=" ", breaks=as.character(1:4), 
                        labels=c(expression(~italic("n=150")),
                                 expression(~italic("n=100")),
                                 expression(~italic("n=50")), 
                                 expression(~italic("n=20")))) +
  theme(legend.key.width = unit(1.5,"cm")) + 
  theme(legend.text=element_text(colour='black', size=12, face='italic')) + 
  geom_vline(xintercept=0.95, color='gray', size=1, linetype='solid') + 
  NULL

fig2

ggsave(filename='Fig2.pdf', plot=fig2, device='pdf', dpi=1200, width=6, height=4, units='in')

ggsave(filename='Fig2.jpg', plot=fig2, device='jpeg', dpi=1200, width=6, height=4, units='in')


#####################
## Produce figure 3##

ralpha <- 0.05  #1-\alpha is the proportion of the population to catch
sampleSize <- 2:150  #Size of the sample from the population based on which to construct the reference ranges
n.pts<- length(sampleSize)

t1mHAlpha_nu <- qt(1-ralpha/2, df = sampleSize-1)  #percentiles of t_{n-1}
prop1 <- t1mHAlpha_nu*sqrt(1+1/sampleSize)  #The proportional constant of RR_1

plot(sampleSize, prop1, type="l", lty=1, xlim=c(2,150),ylim=c(1.9,4), xlab =expression(n),ylab =expression(c_i))

rgamma = 0.90
c5_90 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.95, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals
lines(sampleSize, c5_90, lty=2, col="blue")

rgamma = 0.95
c5_95 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.95, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals
lines(sampleSize, c5_95, lty=4, col="red")

rgamma = 0.99
c5_99 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.95, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals
lines(sampleSize, c5_99, lty=6, col="purple")


legend("topright", inset=.01, 
       c("c_1","c_5, gamma=0.90","c_5, gamma=0.95","c_5, gamma=0.99"),
       lty=c(1,2,4,6),cex=0.8)


##### Figure 3 with ggplot2 ####

df.Fig3<- data.frame(x=rep(sampleSize,4),
                     ci=c(prop1, c5_90, c5_95, c5_99),
                     type=factor(rep(1:4, rep(n.pts, 4))))

df.Fig3$linetype<- factor(as.numeric(df.Fig3$type), labels=linetype)
df.Fig3$color<- lcols[as.numeric(df.Fig3$type)]


fig3<- ggplot(df.Fig3, aes(x=x, y=ci, colour=color, linetype=linetype)) +
  geom_line(aes(linetype=type, col=type),size=1.25) + 
  scale_color_discrete(guide = "legend") +
  scale_y_continuous(limits=c(1.9,4)) + 
  scale_x_continuous(limits=c(0,150), breaks=seq(0,150,by=25)) + 
  labs(x=expression(~italic(n)), y=expression(~italic("c"["i"]))) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(legend.position=c(0.75, 0.8)) + 
  scale_colour_manual(values=lcols, name=" ",
                      labels = c(expression(~italic("c"["1"])),
                                 expression(~italic("c"["5"],)~gamma~"=0.90"),
                                 expression(~italic("c"["5"],)~gamma~"=0.95"),
                                 expression(~italic("c"["5"],)~gamma~"=0.99"))) +
  scale_linetype_manual(values=linetype, name=" ",
                        labels = c(expression(~italic("c"["1"])),
                                   expression(~italic("c"["5"],)~gamma~"=0.90"),
                                   expression(~italic("c"["5"],)~gamma~"=0.95"),
                                   expression(~italic("c"["5"],)~gamma~"=0.99"))) +
  theme(legend.key.width = unit(1.5,"cm")) + 
  theme(legend.text=element_text(colour='black', size=12, face='italic')) + 
  theme(legend.text.align = 0) + 
  geom_hline(yintercept=1.96,color='gray', size=1, linetype='solid') + 
  annotate("text", x = 140, y = 1.9, size=4,
           label = expression(~italic("z"[(1+P)/2]))) + 
  NULL
fig3


ggsave(filename='Fig3.pdf', plot=fig3, device='pdf', dpi=1200, width=6, height=4, units='in')
### OK

ggsave(filename='Fig3.jpg', plot=fig3, device='jpeg', dpi=1200, width=6, height=4, units='in')



#########################################################
## Equal-tailed tolerance interval and Produce figure 4##
#install.packages("tolerance")
#library("tolerance", lib.loc="C:/Apps/RLibraries")

## Compute c_6 by simulation
sim = 1000000
vc6 <- matrix(0, nrow = 3, ncol = length(sampleSize)) #Record the simulated critical values c6 

set.seed(10)
vNorml <- abs(rnorm(sim))

for(n in sampleSize) {
  vChisq <- sqrt( (n/(n-1)) * rchisq(sim,df=n-1) )
  vRV <- ( vNorml + sqrt(n)*z1mHAlpha )/vChisq
  vc6[1,n-1] <- quantile(vRV, probs = 0.90 )
  vc6[2,n-1] <- quantile(vRV, probs = 0.95 )
  vc6[3,n-1] <- quantile(vRV, probs = 0.99 )
  
}

## Figure 4
plot(sampleSize, c5_90, type="l", lty=2, 
     col="blue", xlim=c(2,150),ylim=c(2.0,4), xlab =expression(n),ylab =expression(c_i))
lines(sampleSize, c5_95, lty=4, col="red")
lines(sampleSize, c5_99, lty=6, col="green")

lines(sampleSize, vc6[1,], lty=2, col="blue")
lines(sampleSize, vc6[2,], lty=4, col="red")
lines(sampleSize, vc6[3,], lty=6, col="green")

legend("topright", inset=.01, 
       c("c_5, gamma = 0.90","c_5, gamma=0.95","c_5, gamma=0.99","c_6, gamma=0.90","c_6, gamma=0.95","c_6, gamma=0.99"),
       lty=c(2,4,6,2,4,6),cex=0.8)


##### Figure 4 with ggplot2 ####


df.Fig4<-data.frame(x=rep(sampleSize,6),
                    ci=c(c5_90, c5_95, c5_99, vc6[1,], vc6[2,], vc6[3,]),
                    type=factor(rep(1:6, rep(n.pts, 6))))

df.Fig4$linetype<- factor(as.numeric(df.Fig4$type), labels=linetype6)
df.Fig4$color<- lcols6[as.numeric(df.Fig4$type)]

fig4<- ggplot(df.Fig4, aes(x=x, y=ci, colour=color, linetype=linetype)) +
  geom_line(aes(linetype=type, col=type),size=1.25) + 
  scale_color_discrete(guide = "legend") +
  scale_y_continuous(limits=c(1.9,4)) + 
  scale_x_continuous(limits=c(0,150), breaks=seq(0,150,by=25)) + 
  labs(x=expression(~italic(n)), y=expression(~italic("c"["i"]))) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(legend.position=c(0.75, 0.70)) + 
  scale_colour_manual(values=lcols6, name = " ", 
                      labels = c(expression(~italic("c"["5"])~gamma~"=0.90"),
                                 expression(~italic("c"["5"])~gamma~"=0.95"),
                                 expression(~italic("c"["5"])~gamma~"=0.99"),
                                 expression(~italic("c"["6"])~gamma~"=0.90"),
                                 expression(~italic("c"["6"])~gamma~"=0.95"),
                                 expression(~italic("c"["6"])~gamma~"=0.99"))) +
  scale_linetype_manual(values=linetype6, name=" ",
                        labels = c(expression(~italic("c"["5"])~gamma~"=0.90"),
                                   expression(~italic("c"["5"])~gamma~"=0.95"),
                                   expression(~italic("c"["5"])~gamma~"=0.99"),
                                   expression(~italic("c"["6"])~gamma~"=0.90"),
                                   expression(~italic("c"["6"])~gamma~"=0.95"),
                                   expression(~italic("c"["6"])~gamma~"=0.99"))) +
  theme(legend.key.width = unit(1.5,"cm")) + 
  theme(legend.text=element_text(colour='black', size=12, face='italic')) + 
  theme(legend.text.align = 0) + 
  geom_hline(yintercept=1.96,color='gray', size=1, linetype='solid') + 
  annotate("text", x = 140, y = 1.9, size=4,
           label = expression(~italic("z"[(1+P)/2]))) + 
  NULL

fig4

ggsave(filename='Fig4.pdf', plot=fig4, device='pdf', dpi=1200, width=6, height=4, units='in')

ggsave(filename='Fig4.jpg', plot=fig4, device='jpeg', dpi=1200, width=6, height=4, units='in')



############################################################
## Nonparametric prediction intervals and Produce figure 5##
#install.packages("tolerance")
#library("tolerance", lib.loc="C:/Apps/RLibraries")


ralpha <- 0.05
minSamSiz <- 2/ralpha  ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals

sampleSize_05 <- minSamSiz + 50*(0:100)  #sample sizes
Jp <- floor( (sampleSize_05+1)*ralpha/2 )  #The j_p for prediction interval
ProbCov_05 <- pbeta(1-ralpha, sampleSize_05-2*Jp+1, 2*Jp, lower.tail = FALSE)

## Figure 5
plot(sampleSize_05, ProbCov_05, type="l", lty=2, 
     col="red", xlim=c(20,5250),ylim=c(0.4,1.0), xlab =expression(n),ylab =expression(Prob))


ralpha <- 0.01
minSamSiz <- 2/ralpha  ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals

sampleSize_01 <- minSamSiz + 50*(0:100)  #sample sizes
Jp <- floor( (sampleSize_01+1)*ralpha/2 )  #The j_p for prediction interval
ProbCov_01 <- pbeta(1-ralpha, sampleSize_01-2*Jp+1, 2*Jp, lower.tail = FALSE)
lines(sampleSize_01, ProbCov_01, lty=6, col="black")


ralpha <- 0.10
minSamSiz <- 2/ralpha  ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals

sampleSize_10 <- minSamSiz + 50*(0:100)  #sample sizes
Jp <- floor( (sampleSize_10+1)*ralpha/2 )  #The j_p for prediction interval
ProbCov_10 <- pbeta(1-ralpha, sampleSize_10-2*Jp+1, 2*Jp, lower.tail = FALSE)
lines(sampleSize_10, ProbCov_10, lty=1, col="blue")

abline(h = 0.5, col = "black", lwd = 1)

legend("topright", inset=.01, 
       c("alpha = 0.10","alpha=0.05","alpha=0.01"),
       lty=c(1,2,6),cex=0.8)



##### Figure 5 with ggplot2 ####



df.Fig5<-data.frame(x=c(sampleSize_01,sampleSize_05,sampleSize_10),
                    ci=c(ProbCov_01, ProbCov_05, ProbCov_10),
                    type=factor(rep(1:3, rep(length(sampleSize_01), 3))))


linetype3a<- c('twodash', 'longdash','solid')
df.Fig5$linetype<- factor(as.numeric(df.Fig5$type), labels=linetype3a)
df.Fig5$color<- lcols3[as.numeric(df.Fig5$type)]

fig5<- ggplot(df.Fig5, aes(x=x, y=ci, colour=color, linetype=linetype)) +
  geom_line(aes(linetype=type, col=type),size=1.25) + 
  scale_color_discrete(guide = "legend") +
  scale_y_continuous(limits=c(0.4,1)) + 
  scale_x_continuous(limits=c(0,5300), breaks=seq(0,5400,by=900)) + 
  labs(x=expression(~italic(n)), y='probability') + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position=c(0.75, 0.70)) + 
  scale_colour_manual(values=lcols3, name = " ", 
                      labels = c(expression(~italic("P")~"=0.99"),
                      expression(~italic("P")~"=0.95"),
                      expression(~italic("P")~"=0.90"))) + 
  scale_linetype_manual(values=linetype3, name=" ",
                        labels = c(expression(~italic("P")~"=0.99"),
                                   expression(~italic("P")~"=0.95"),
                                   expression(~italic("P")~"=0.90"))) +
  theme(legend.key.width = unit(1.5,"cm")) + 
  theme(legend.text=element_text(colour='black', size=12, face='italic')) + 
  theme(legend.text.align = 0) + 
  geom_hline(yintercept=0.5,color='gray', size=1, linetype='solid') + 
  annotate("text", x = 5000, y = 0.48, size=4,label = 'prob = 0.5') + 
  NULL

fig5

ggsave(filename='Fig5.pdf', plot=fig5, device='pdf', dpi=1200, 
       width=6, height=4, units='in')

ggsave(filename='Fig5.jpg', plot=fig5, device='jpeg', dpi=1200, 
       width=6, height=4, units='in')




############################################################
## Nonparametric tolerance intervals and Produce figure 6##
#install.packages("tolerance")
#library("tolerance", lib.loc="C:/Apps/RLibraries")


ralpha <- 0.05
minSamSizP <- 2/ralpha  ##the minimum sample size required for nonparametric prediction intervals

rgamma <- 0.90  #confidence level for tolerance intervals

# First, find the smallest sample size nn so that  (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence  tolerance tolerance interval #
nn=2
while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){
  nn <- nn + 1
}
nn  #The smallest sample size required
1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn )

sampleSize_90 <- max(minSamSizP,nn) + 10*(0:100)  #sample sizes
indexJpJt_90 <- matrix(0, nrow = length(sampleSize_90), ncol=2) #compute the j_p for prediction interval and j_t for tolerance interval 
for(i in 1:length(sampleSize_90)){
  jp <- floor( (sampleSize_90[i] +1)*ralpha/2 )  #The j_p for prediction interval
  jt=1
  while ( pbeta(1-ralpha, sampleSize_90[i]-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){
    jt <- jt + 1
  }
  jt <- jt-1  #the jt required
  indexJpJt_90[i,] <- c(jp,jt)
}

## Figure 6
plot(sampleSize_90, indexJpJt_90[,1], type="l", lty=2, 
     col="red", xlim=c(20,1200),ylim=c(0.0,30), xlab =expression(n),ylab ="j_p and j_t")
lines(sampleSize_90, indexJpJt_90[,2], lty=4, col="black")


rgamma <- 0.95  #confidence level for tolerance intervals

# First, find the smallest sample size nn so that  (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence  tolerance tolerance interval #
nn=2
while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){
  nn <- nn + 1
}
nn  #The smallest sample size required
1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn )

sampleSize_95 <- max(minSamSizP,nn) + 10*(0:100)  #sample sizes
indexJpJt_95 <- matrix(0, nrow = length(sampleSize_95), ncol=2) #compute the j_p for prediction interval and j_t for tolerance interval 
for(i in 1:length(sampleSize_95)){
  jp <- floor( (sampleSize_95[i] +1)*ralpha/2 )  #The j_p for prediction interval
  jt=1
  while ( pbeta(1-ralpha, sampleSize_95[i]-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){
    jt <- jt + 1
  }
  jt <- jt-1  #the jt required
  indexJpJt_95[i,] <- c(jp,jt)
}

#lines(sampleSize, indexJpJt[,1], type="l", lty=2, 
#     col="red", xlim=c(20,1200),ylim=c(0.0,30), xlab =expression(n),ylab =expression(j_p, j_t))
lines(sampleSize_95, indexJpJt_95[,2], lty=5, col="blue")


rgamma <- 0.99  #confidence level for tolerance intervals

# First, find the smallest sample size nn so that  (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence  tolerance tolerance interval #
nn=2
while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){
  nn <- nn + 1
}
nn  #The smallest sample size required
1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn )

sampleSize_99 <- max(minSamSizP,nn) + 10*(0:100)  #sample sizes
indexJpJt_99 <- matrix(0, nrow = length(sampleSize_99), ncol=2) #compute the j_p for prediction interval and j_t for tolerance interval 
for(i in 1:length(sampleSize_99)){
  jp <- floor( (sampleSize_99[i] +1)*ralpha/2 )  #The j_p for prediction interval
  jt=1
  while ( pbeta(1-ralpha, sampleSize_99[i]-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){
    jt <- jt + 1
  }
  jt <- jt-1  #the jt required
  indexJpJt_99[i,] <- c(jp,jt)
}

#lines(sampleSize, indexJpJt[,1], type="l", lty=2, 
#      col="red", xlim=c(20,1200),ylim=c(0.0,30), xlab =expression(n),ylab =expression(j_p, j_t))
lines(sampleSize_99, indexJpJt_99[,2], lty=7, col="pink")

legend("bottomright", inset=.01, 
       c("alpha = 0.05, j_p","gamma=0.90, j_t","gamma=0.95, j_t","gamma=0.99, j_t"),
       lty=c(2,4,5,7),cex=0.8)



##### Figure 6 with ggplot2 ####

df.Fig6<- data.frame(x=c(sampleSize_90,sampleSize_90,sampleSize_95,sampleSize_99),
                     ci=c(indexJpJt_90[,1],indexJpJt_90[,2],indexJpJt_95[,2], indexJpJt_99[,2]),
                     type=factor(rep(1:4, c(length(sampleSize_90),length(sampleSize_90),length(sampleSize_95),length(sampleSize_99)))))

df.Fig6$linetype<- factor(as.numeric(df.Fig6$type), labels=linetype)
df.Fig6$color<- lcols[as.numeric(df.Fig6$type)]


fig6<- ggplot(df.Fig6, aes(x=x, y=ci, colour=color, linetype=linetype)) +
  geom_line(aes(linetype=type, col=type),size=1.25) + 
  scale_color_discrete(guide = "legend") +
  scale_y_continuous(limits=c(0,30)) + 
  scale_x_continuous(limits=c(0,1200), breaks=seq(0,1200,by=200)) + 
  labs(x=expression(~italic(n)), y=expression(~italic("j"^"(p)")~and~italic("j"^"(t)"))) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(legend.position=c(0.75, 0.25)) + 
  scale_colour_manual(values=lcols, name=" ",
                      labels = c(expression(~italic("P")~"=0.95,"~italic("j"^"(p)")),
                                 expression(~gamma~"=0.90,"~italic("j"^"(t)")),
                                 expression(~gamma~"=0.95,"~italic("j"^"(t)")),
                                 expression(~gamma~"=0.99,"~italic("j"^"(t)")))) +
  scale_linetype_manual(values=linetype, name=" ",
                        labels = c(expression(~italic("P")~"=0.95,"~italic("j"^"(p)")),
                                   expression(~gamma~"=0.90,"~italic("j"^"(t)")),
                                   expression(~gamma~"=0.95,"~italic("j"^"(t)")),
                                   expression(~gamma~"=0.99,"~italic("j"^"(t)")))) +
  theme(legend.key.width = unit(1.5,"cm")) + 
  theme(legend.text=element_text(colour='black', size=12, face='italic')) + 
  theme(legend.text.align = 0) + 
  NULL
fig6


ggsave(filename='Fig6.pdf', plot=fig6, device='pdf', dpi=1200, width=6, height=4, units='in')
### OK
ggsave(filename='Fig6.jpg', plot=fig6, device='jpeg', dpi=1200, width=6, height=4, units='in')









#################
##Example    ####

#Read the sample into R
sample.data <- read.table("\\\\filestore.soton.ac.uk/users/wl/mydocuments/mydocuments/MyResearch/My papers/inProgress/Tolerance sets/Reference range/R/sample.txt", header=F)
#Alternatively, File -> Import Dataset -> From CSD

#Check it is a data.frame
str(sample.data)
dim(sample.data)
summary(sample.data)

sampData <- sample.data[,1]

hist(sampData)
qqnorm(sampData) 

ralpha <- 0.05 #0.01  #1-ralpha is the proportion of the population to catch
sampleSize <- length(sampData)  #20  #Size of the sample from the population based on which to construct the reference ranges
rgamma <- 0.95  #Confidence level
smean <- mean(sampData) #5.31  #sample mean
sstd <- sd(sampData) #0.41  #sample standard deviation

#### Normal distribution based intervals ####

z1mHAlpha <- qnorm(1-ralpha/2, mean=0, sd=1)  #percentile of N(0,1)
t1mHAlpha_nu <- qt(1-ralpha/2, df = sampleSize-1)  #percentiles of t_{n-1}
lambda <- sqrt(2/(sampleSize-1))*gamma(sampleSize/2)/gamma((sampleSize-1)/2)

prop1 <- t1mHAlpha_nu*sqrt(1+1/sampleSize)  #The proportional constant of RR_i
prop2 <- rep(z1mHAlpha, length(sampleSize))
prop3 <- z1mHAlpha/lambda
prop4 <- z1mHAlpha*lambda
RR_1 <- c( smean-prop1*sstd, smean+prop1*sstd )

## Find the prob P(KK_1 < 1-ralpha) by simulation ##
sim = 1000000
nn <- sampleSize
n <- nn
c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn)

set.seed(101)
norml <- rnorm(sim)
chisqu <- rchisq(sim,df=nn-1)
KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1)))
sum((KK1 < 1-ralpha))/sim  # The prob required
#sum((KK1 < 0.93))/sim

## The tolerance interval ##
c5 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 1-ralpha, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals
RR_5 <- c( smean-c5*sstd, smean+c5*sstd )

vNorml <- abs(rnorm(sim))
vChisq <- sqrt( (n/(n-1)) * rchisq(sim,df=n-1) )
vRV <- ( vNorml + sqrt(n)*z1mHAlpha )/vChisq
sum((vRV < c5))/sim  # The prob that the tolerance interval contains the middle 1-alpha of the population 

## The equal-tailed tolerance interval
set.seed(10)
vNorml <- abs(rnorm(sim))
vChisq <- sqrt( (n/(n-1)) * rchisq(sim,df=n-1) )
vRV <- ( vNorml + sqrt(n)*z1mHAlpha )/vChisq
c6 <- quantile(vRV, probs = rgamma )
RR_6 <- c( smean-c6*sstd, smean+c6*sstd )

set.seed(10)
norml <- rnorm(sim)
chisqu <- rchisq(sim,df=nn-1)
KK6 = pnorm(norml/sqrt(nn)+c6*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c6*sqrt(chisqu/(nn-1)))
sum((KK6 > 1-ralpha))/sim  # The prob that the interval contains 1-alpha proportion of the population

quantile(KK6, probs = 0.05 )
K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.9570664, side = 2, method = "EXACT", m = 100)

#### Nonparametric intervals ####

## Prediction interval ##
ralpha <- 0.05
minSamSiz <- 2/ralpha  ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals

Jp <- floor( (sampleSize+1)*ralpha/2 )  #The j_p for prediction interval
sSample = sort(sampData)  # Sorted sample observations
RR_7 <- c( sSample[Jp], sSample[sampleSize-Jp+1]) #The prediction interval
ProbCov <- pbeta(1-ralpha, sampleSize-2*Jp+1, 2*Jp, lower.tail = FALSE) #The chance that RR_7 catches at least 1-alpha of the population
1-ProbCov

## Tolerance interval ##

rgamma <- 0.95  #confidence level for tolerance intervals

# First, find the smallest sample size nn so that  (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence  tolerance tolerance interval #
nn=2
while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){
  nn <- nn + 1
}
nn  #The smallest sample size required
1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn )

jt=1
while ( pbeta(1-ralpha, sampleSize-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){
  jt <- jt + 1
}
jt <- jt-1  #the jt required for the tolerance interval
RR_8 <- c( sSample[jt], sSample[sampleSize-jt+1]) #The tolerance interval
pbeta(1-ralpha, sampleSize-2*jt+1, 2*jt, lower.tail=FALSE)  #The true confidence level