#### WL & MCB, 01.05.19; 10.05.19; 10.12.19; 02.03.20 ########################### libraries #### library(tidyverse) library(KernSmooth) library(tolerance) library(metagen) ########################### preliminaries #### lcols = metagen::cbbPalette[1:4]#[c(2,3,4,1)] linetype = c("solid", "longdash", "dotdash", "dotted") lcols6 = metagen::cbbPalette[1:6]#[c(2,3,4,1)] linetype6 = c("solid", "longdash", "dotdash", "dotted", "twodash", "12345678") lcols3 = metagen::cbbPalette[1:3]#[c(2,3,4,1)] linetype3 = c("dotted","longdash","solid") ############################################################################################## ######## This program implements the computations in the paper ######### ######### Reference range: which statistical intervals to use? ######### ######## by Liu, Bretz, and Cortina-Borja ######### ############################################################################################## ralpha <- 0.05 #1- ralpha is the proportion of the population to catch sampleSize <- 2:150 #Size of the sample from the population based on which to construct the reference ranges z1mHAlpha <- qnorm(1-ralpha/2, mean=0, sd=1) #percentile of N(0,1) t1mHAlpha_nu <- qt(1-ralpha/2, df = sampleSize-1) #percentiles of t_{n-1} lambda <- sqrt(2/(sampleSize-1))*gamma(sampleSize/2)/gamma((sampleSize-1)/2) prop1 <- t1mHAlpha_nu*sqrt(1+1/sampleSize) #The proportional constant of RR_i prop2 <- rep(z1mHAlpha, length(sampleSize)) prop3 <- z1mHAlpha/lambda prop4 <- z1mHAlpha*lambda ## Produce Figure 1 ## plot(sampleSize, prop1, type="l", lty=1, xlim=c(2,150),ylim=c(1.5,3), xlab =expression(n),ylab =expression(c_i)) lines(sampleSize, prop3, lty=2, col="blue") lines(sampleSize, prop2, lty=4, col="purple") lines(sampleSize, prop4, lty=6, col="red") legend("topright", inset=.01, c("c_1","c_3","c_2","c_4"), lty=c(1,2,4,6),cex=0.8) #### Figure 1 in ggplot2 #### n.sampleSize<- length(sampleSize) df.Fig1<- data.frame(sampleSize=rep(sampleSize,4), prop=c(prop1,prop2,prop3,prop4), type=factor(rep(1:4, rep(n.sampleSize,4)))) df.Fig1$linetype<- factor(as.numeric(df.Fig1$type), labels=linetype) df.Fig1$color<- lcols[as.numeric(df.Fig1$type)] fig1<- ggplot(df.Fig1, aes(x=sampleSize, y=prop, colour=color, linetype=linetype)) + geom_line(aes(linetype=type, col=type),size=1.25) + scale_y_continuous(limits=c(1.5,3)) + scale_x_continuous(limits=c(2,n.sampleSize+1),breaks=seq(0,150,by=25)) + labs(x=expression(~italic(n)), y=expression(~italic("c"["i"]))) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position=c(0.8, 0.8)) + theme(legend.text=element_text(colour='black', size=12, face='italic')) + scale_colour_manual(values=lcols, name=" ", breaks=as.character(1:4), labels=c(expression(~italic("c"["1"])),expression(~italic("c"["2"])), expression(~italic("c"["3"])),expression(~italic("c"["4"])))) + scale_linetype_manual(values=linetype, name=" ", breaks=as.character(1:4), labels=c(expression(~italic("c"["1"])),expression(~italic("c"["2"])), expression(~italic("c"["3"])),expression(~italic("c"["4"])))) + theme(legend.key.width = unit(1.5,"cm")) + NULL fig1 ggsave(filename='Fig1.pdf', plot=fig1, device='pdf', dpi=1200, width=6, height=4, units='in') ### OK ggsave(filename='Fig1.jpg', plot=fig1, device='jpeg', dpi=1200, width=6, height=4, units='in') ##################################################### ## Find the prob by simulation and produce figure 2## sim = 1000000 ralpha <- 0.05 nn <- 150 c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn) set.seed(101) norml <- rnorm(sim) chisqu <- rchisq(sim,df=nn-1) KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1))) sum((KK1 <= 1-ralpha))/sim # The prob required sum((KK1 <= 0.90))/sim #Plot the kernel density estimate of the pdf of KK1 #based on the simulated KK1-values by using the package Kernsmooth# #install.packages("KernSmooth") # library("KernSmooth", lib.loc="C:/Apps/RLibraries") h <- dpik(KK1) est150 <- bkde(KK1, bandwidth=h, gridsize=1000L) plot(est150,type="l", xlim =c(0.85,1.0),ylim=c(0,32), xlab="x", ylab="density function",col="red") #plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red") #nn <- 1000 #c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn) # #set.seed(101) #norml <- rnorm(sim) #chisqu <- rchisq(sim,df=nn-1) #KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1))) #sum((KK1 <= 1-ralpha))/sim # The prob required #sum((KK1 <= 0.90))/sim # #Plot the kernel density estimate of the pdf of KK1 #based on the simulated KK1-values by using the package Kernsmooth# #install.packages("KernSmooth") # library("KernSmooth", lib.loc="C:/Apps/RLibraries") # #h <- dpik(KK1) #est1000 <- bkde(KK1, bandwidth=h, gridsize=1000L) #plot(est1000,type="l", xlim =c(0.85,1.0),ylim=c(0,85), xlab="x", ylab="density function",col="red") #plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red") nn <- 100 c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn) set.seed(101) norml <- rnorm(sim) chisqu <- rchisq(sim,df=nn-1) KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1))) sum((KK1 <= 1-ralpha))/sim # The prob required sum((KK1 <= 0.90))/sim #Plot the kernel density estimate of the pdf of KK1 #based on the simulated KK1-values by using the package Kernsmooth# #install.packages("KernSmooth") #library("KernSmooth", lib.loc="C:/Apps/RLibraries") h <- dpik(KK1) est100 <- bkde(KK1, bandwidth=h, gridsize=1000L) lines(est100,type="l", lty=2, col="purple") #plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red") nn <- 50 c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn) set.seed(101) norml <- rnorm(sim) chisqu <- rchisq(sim,df=nn-1) KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1))) sum((KK1 <= 1-ralpha))/sim # The prob required sum((KK1 <= 0.90))/sim #Plot the kernel density estimate of the pdf of KK1 #based on the simulated KK1-values by using the package Kernsmooth# #install.packages("KernSmooth") #library("KernSmooth", lib.loc="C:/Apps/RLibraries") h <- dpik(KK1) est50 <- bkde(KK1, bandwidth=h, gridsize=1000L) lines(est50,type="l", lty=4,col="blue") #plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red") nn <- 20 c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn) set.seed(101) norml <- rnorm(sim) chisqu <- rchisq(sim,df=nn-1) KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1))) sum((KK1 <= 1-ralpha))/sim # The prob required sum((KK1 <= 0.90))/sim #Plot the kernel density estimate of the pdf of KK1 #based on the simulated KK1-values by using the package Kernsmooth# #install.packages("KernSmooth") #library("KernSmooth", lib.loc="C:/Apps/RLibraries") h <- dpik(KK1) est20 <- bkde(KK1, bandwidth=h, gridsize=1000L) lines(est20,type="l",lty=6,col="black") #plot(est,type="l", xlim =c(-6,6),ylim=c(0,1.6), xlab="x", ylab="density function",col="red") legend("topleft", inset=.01, c("n=150","n=100","n=50","n=20"), lty=c(1,2,4,6),cex=0.8) #abline(v = 0.95, col = "black", lwd = 1) lines(c(0.95,0.95),c(0, 32), col="red") #### Figure 2 with ggplot2 #### n.pts<- length(est150$x) df.Fig2<- data.frame(x=c(est150$x, est100$x, est50$x, est20$x), y=c(est150$y, est100$y, est50$y, est20$y), size=factor(rep(1:4, rep(n.pts, 4)))) df.Fig2$linetype<- factor(as.numeric(df.Fig2$size), labels=linetype) df.Fig2$color<- lcols[as.numeric(df.Fig2$size)] fig2<-ggplot(df.Fig2, aes(x=x, y=y,colour=color, linetype=linetype)) + geom_line(aes(linetype=size, col=size),size=1.25) + labs(x=expression(~italic("K"["1"])), y='probability density function')+ scale_y_continuous(limits=c(0,32)) + scale_x_continuous(limits=c(0.85,1)) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position=c(0.25, 0.8)) + theme(legend.text.align = 0) + scale_colour_manual(values=lcols, name=" ", breaks=as.character(1:4), labels=c(expression(~italic("n=150")), expression(~italic("n=100")), expression(~italic("n=50")), expression(~italic("n=20")))) + scale_linetype_manual(values=linetype, name=" ", breaks=as.character(1:4), labels=c(expression(~italic("n=150")), expression(~italic("n=100")), expression(~italic("n=50")), expression(~italic("n=20")))) + theme(legend.key.width = unit(1.5,"cm")) + theme(legend.text=element_text(colour='black', size=12, face='italic')) + geom_vline(xintercept=0.95, color='gray', size=1, linetype='solid') + NULL fig2 ggsave(filename='Fig2.pdf', plot=fig2, device='pdf', dpi=1200, width=6, height=4, units='in') ggsave(filename='Fig2.jpg', plot=fig2, device='jpeg', dpi=1200, width=6, height=4, units='in') ##################### ## Produce figure 3## ralpha <- 0.05 #1-\alpha is the proportion of the population to catch sampleSize <- 2:150 #Size of the sample from the population based on which to construct the reference ranges n.pts<- length(sampleSize) t1mHAlpha_nu <- qt(1-ralpha/2, df = sampleSize-1) #percentiles of t_{n-1} prop1 <- t1mHAlpha_nu*sqrt(1+1/sampleSize) #The proportional constant of RR_1 plot(sampleSize, prop1, type="l", lty=1, xlim=c(2,150),ylim=c(1.9,4), xlab =expression(n),ylab =expression(c_i)) rgamma = 0.90 c5_90 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.95, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals lines(sampleSize, c5_90, lty=2, col="blue") rgamma = 0.95 c5_95 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.95, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals lines(sampleSize, c5_95, lty=4, col="red") rgamma = 0.99 c5_99 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.95, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals lines(sampleSize, c5_99, lty=6, col="purple") legend("topright", inset=.01, c("c_1","c_5, gamma=0.90","c_5, gamma=0.95","c_5, gamma=0.99"), lty=c(1,2,4,6),cex=0.8) ##### Figure 3 with ggplot2 #### df.Fig3<- data.frame(x=rep(sampleSize,4), ci=c(prop1, c5_90, c5_95, c5_99), type=factor(rep(1:4, rep(n.pts, 4)))) df.Fig3$linetype<- factor(as.numeric(df.Fig3$type), labels=linetype) df.Fig3$color<- lcols[as.numeric(df.Fig3$type)] fig3<- ggplot(df.Fig3, aes(x=x, y=ci, colour=color, linetype=linetype)) + geom_line(aes(linetype=type, col=type),size=1.25) + scale_color_discrete(guide = "legend") + scale_y_continuous(limits=c(1.9,4)) + scale_x_continuous(limits=c(0,150), breaks=seq(0,150,by=25)) + labs(x=expression(~italic(n)), y=expression(~italic("c"["i"]))) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position=c(0.75, 0.8)) + scale_colour_manual(values=lcols, name=" ", labels = c(expression(~italic("c"["1"])), expression(~italic("c"["5"],)~gamma~"=0.90"), expression(~italic("c"["5"],)~gamma~"=0.95"), expression(~italic("c"["5"],)~gamma~"=0.99"))) + scale_linetype_manual(values=linetype, name=" ", labels = c(expression(~italic("c"["1"])), expression(~italic("c"["5"],)~gamma~"=0.90"), expression(~italic("c"["5"],)~gamma~"=0.95"), expression(~italic("c"["5"],)~gamma~"=0.99"))) + theme(legend.key.width = unit(1.5,"cm")) + theme(legend.text=element_text(colour='black', size=12, face='italic')) + theme(legend.text.align = 0) + geom_hline(yintercept=1.96,color='gray', size=1, linetype='solid') + annotate("text", x = 140, y = 1.9, size=4, label = expression(~italic("z"[(1+P)/2]))) + NULL fig3 ggsave(filename='Fig3.pdf', plot=fig3, device='pdf', dpi=1200, width=6, height=4, units='in') ### OK ggsave(filename='Fig3.jpg', plot=fig3, device='jpeg', dpi=1200, width=6, height=4, units='in') ######################################################### ## Equal-tailed tolerance interval and Produce figure 4## #install.packages("tolerance") #library("tolerance", lib.loc="C:/Apps/RLibraries") ## Compute c_6 by simulation sim = 1000000 vc6 <- matrix(0, nrow = 3, ncol = length(sampleSize)) #Record the simulated critical values c6 set.seed(10) vNorml <- abs(rnorm(sim)) for(n in sampleSize) { vChisq <- sqrt( (n/(n-1)) * rchisq(sim,df=n-1) ) vRV <- ( vNorml + sqrt(n)*z1mHAlpha )/vChisq vc6[1,n-1] <- quantile(vRV, probs = 0.90 ) vc6[2,n-1] <- quantile(vRV, probs = 0.95 ) vc6[3,n-1] <- quantile(vRV, probs = 0.99 ) } ## Figure 4 plot(sampleSize, c5_90, type="l", lty=2, col="blue", xlim=c(2,150),ylim=c(2.0,4), xlab =expression(n),ylab =expression(c_i)) lines(sampleSize, c5_95, lty=4, col="red") lines(sampleSize, c5_99, lty=6, col="green") lines(sampleSize, vc6[1,], lty=2, col="blue") lines(sampleSize, vc6[2,], lty=4, col="red") lines(sampleSize, vc6[3,], lty=6, col="green") legend("topright", inset=.01, c("c_5, gamma = 0.90","c_5, gamma=0.95","c_5, gamma=0.99","c_6, gamma=0.90","c_6, gamma=0.95","c_6, gamma=0.99"), lty=c(2,4,6,2,4,6),cex=0.8) ##### Figure 4 with ggplot2 #### df.Fig4<-data.frame(x=rep(sampleSize,6), ci=c(c5_90, c5_95, c5_99, vc6[1,], vc6[2,], vc6[3,]), type=factor(rep(1:6, rep(n.pts, 6)))) df.Fig4$linetype<- factor(as.numeric(df.Fig4$type), labels=linetype6) df.Fig4$color<- lcols6[as.numeric(df.Fig4$type)] fig4<- ggplot(df.Fig4, aes(x=x, y=ci, colour=color, linetype=linetype)) + geom_line(aes(linetype=type, col=type),size=1.25) + scale_color_discrete(guide = "legend") + scale_y_continuous(limits=c(1.9,4)) + scale_x_continuous(limits=c(0,150), breaks=seq(0,150,by=25)) + labs(x=expression(~italic(n)), y=expression(~italic("c"["i"]))) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position=c(0.75, 0.70)) + scale_colour_manual(values=lcols6, name = " ", labels = c(expression(~italic("c"["5"])~gamma~"=0.90"), expression(~italic("c"["5"])~gamma~"=0.95"), expression(~italic("c"["5"])~gamma~"=0.99"), expression(~italic("c"["6"])~gamma~"=0.90"), expression(~italic("c"["6"])~gamma~"=0.95"), expression(~italic("c"["6"])~gamma~"=0.99"))) + scale_linetype_manual(values=linetype6, name=" ", labels = c(expression(~italic("c"["5"])~gamma~"=0.90"), expression(~italic("c"["5"])~gamma~"=0.95"), expression(~italic("c"["5"])~gamma~"=0.99"), expression(~italic("c"["6"])~gamma~"=0.90"), expression(~italic("c"["6"])~gamma~"=0.95"), expression(~italic("c"["6"])~gamma~"=0.99"))) + theme(legend.key.width = unit(1.5,"cm")) + theme(legend.text=element_text(colour='black', size=12, face='italic')) + theme(legend.text.align = 0) + geom_hline(yintercept=1.96,color='gray', size=1, linetype='solid') + annotate("text", x = 140, y = 1.9, size=4, label = expression(~italic("z"[(1+P)/2]))) + NULL fig4 ggsave(filename='Fig4.pdf', plot=fig4, device='pdf', dpi=1200, width=6, height=4, units='in') ggsave(filename='Fig4.jpg', plot=fig4, device='jpeg', dpi=1200, width=6, height=4, units='in') ############################################################ ## Nonparametric prediction intervals and Produce figure 5## #install.packages("tolerance") #library("tolerance", lib.loc="C:/Apps/RLibraries") ralpha <- 0.05 minSamSiz <- 2/ralpha ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals sampleSize_05 <- minSamSiz + 50*(0:100) #sample sizes Jp <- floor( (sampleSize_05+1)*ralpha/2 ) #The j_p for prediction interval ProbCov_05 <- pbeta(1-ralpha, sampleSize_05-2*Jp+1, 2*Jp, lower.tail = FALSE) ## Figure 5 plot(sampleSize_05, ProbCov_05, type="l", lty=2, col="red", xlim=c(20,5250),ylim=c(0.4,1.0), xlab =expression(n),ylab =expression(Prob)) ralpha <- 0.01 minSamSiz <- 2/ralpha ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals sampleSize_01 <- minSamSiz + 50*(0:100) #sample sizes Jp <- floor( (sampleSize_01+1)*ralpha/2 ) #The j_p for prediction interval ProbCov_01 <- pbeta(1-ralpha, sampleSize_01-2*Jp+1, 2*Jp, lower.tail = FALSE) lines(sampleSize_01, ProbCov_01, lty=6, col="black") ralpha <- 0.10 minSamSiz <- 2/ralpha ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals sampleSize_10 <- minSamSiz + 50*(0:100) #sample sizes Jp <- floor( (sampleSize_10+1)*ralpha/2 ) #The j_p for prediction interval ProbCov_10 <- pbeta(1-ralpha, sampleSize_10-2*Jp+1, 2*Jp, lower.tail = FALSE) lines(sampleSize_10, ProbCov_10, lty=1, col="blue") abline(h = 0.5, col = "black", lwd = 1) legend("topright", inset=.01, c("alpha = 0.10","alpha=0.05","alpha=0.01"), lty=c(1,2,6),cex=0.8) ##### Figure 5 with ggplot2 #### df.Fig5<-data.frame(x=c(sampleSize_01,sampleSize_05,sampleSize_10), ci=c(ProbCov_01, ProbCov_05, ProbCov_10), type=factor(rep(1:3, rep(length(sampleSize_01), 3)))) linetype3a<- c('twodash', 'longdash','solid') df.Fig5$linetype<- factor(as.numeric(df.Fig5$type), labels=linetype3a) df.Fig5$color<- lcols3[as.numeric(df.Fig5$type)] fig5<- ggplot(df.Fig5, aes(x=x, y=ci, colour=color, linetype=linetype)) + geom_line(aes(linetype=type, col=type),size=1.25) + scale_color_discrete(guide = "legend") + scale_y_continuous(limits=c(0.4,1)) + scale_x_continuous(limits=c(0,5300), breaks=seq(0,5400,by=900)) + labs(x=expression(~italic(n)), y='probability') + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position=c(0.75, 0.70)) + scale_colour_manual(values=lcols3, name = " ", labels = c(expression(~italic("P")~"=0.99"), expression(~italic("P")~"=0.95"), expression(~italic("P")~"=0.90"))) + scale_linetype_manual(values=linetype3, name=" ", labels = c(expression(~italic("P")~"=0.99"), expression(~italic("P")~"=0.95"), expression(~italic("P")~"=0.90"))) + theme(legend.key.width = unit(1.5,"cm")) + theme(legend.text=element_text(colour='black', size=12, face='italic')) + theme(legend.text.align = 0) + geom_hline(yintercept=0.5,color='gray', size=1, linetype='solid') + annotate("text", x = 5000, y = 0.48, size=4,label = 'prob = 0.5') + NULL fig5 ggsave(filename='Fig5.pdf', plot=fig5, device='pdf', dpi=1200, width=6, height=4, units='in') ggsave(filename='Fig5.jpg', plot=fig5, device='jpeg', dpi=1200, width=6, height=4, units='in') ############################################################ ## Nonparametric tolerance intervals and Produce figure 6## #install.packages("tolerance") #library("tolerance", lib.loc="C:/Apps/RLibraries") ralpha <- 0.05 minSamSizP <- 2/ralpha ##the minimum sample size required for nonparametric prediction intervals rgamma <- 0.90 #confidence level for tolerance intervals # First, find the smallest sample size nn so that (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence tolerance tolerance interval # nn=2 while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){ nn <- nn + 1 } nn #The smallest sample size required 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) sampleSize_90 <- max(minSamSizP,nn) + 10*(0:100) #sample sizes indexJpJt_90 <- matrix(0, nrow = length(sampleSize_90), ncol=2) #compute the j_p for prediction interval and j_t for tolerance interval for(i in 1:length(sampleSize_90)){ jp <- floor( (sampleSize_90[i] +1)*ralpha/2 ) #The j_p for prediction interval jt=1 while ( pbeta(1-ralpha, sampleSize_90[i]-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){ jt <- jt + 1 } jt <- jt-1 #the jt required indexJpJt_90[i,] <- c(jp,jt) } ## Figure 6 plot(sampleSize_90, indexJpJt_90[,1], type="l", lty=2, col="red", xlim=c(20,1200),ylim=c(0.0,30), xlab =expression(n),ylab ="j_p and j_t") lines(sampleSize_90, indexJpJt_90[,2], lty=4, col="black") rgamma <- 0.95 #confidence level for tolerance intervals # First, find the smallest sample size nn so that (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence tolerance tolerance interval # nn=2 while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){ nn <- nn + 1 } nn #The smallest sample size required 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) sampleSize_95 <- max(minSamSizP,nn) + 10*(0:100) #sample sizes indexJpJt_95 <- matrix(0, nrow = length(sampleSize_95), ncol=2) #compute the j_p for prediction interval and j_t for tolerance interval for(i in 1:length(sampleSize_95)){ jp <- floor( (sampleSize_95[i] +1)*ralpha/2 ) #The j_p for prediction interval jt=1 while ( pbeta(1-ralpha, sampleSize_95[i]-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){ jt <- jt + 1 } jt <- jt-1 #the jt required indexJpJt_95[i,] <- c(jp,jt) } #lines(sampleSize, indexJpJt[,1], type="l", lty=2, # col="red", xlim=c(20,1200),ylim=c(0.0,30), xlab =expression(n),ylab =expression(j_p, j_t)) lines(sampleSize_95, indexJpJt_95[,2], lty=5, col="blue") rgamma <- 0.99 #confidence level for tolerance intervals # First, find the smallest sample size nn so that (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence tolerance tolerance interval # nn=2 while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){ nn <- nn + 1 } nn #The smallest sample size required 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) sampleSize_99 <- max(minSamSizP,nn) + 10*(0:100) #sample sizes indexJpJt_99 <- matrix(0, nrow = length(sampleSize_99), ncol=2) #compute the j_p for prediction interval and j_t for tolerance interval for(i in 1:length(sampleSize_99)){ jp <- floor( (sampleSize_99[i] +1)*ralpha/2 ) #The j_p for prediction interval jt=1 while ( pbeta(1-ralpha, sampleSize_99[i]-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){ jt <- jt + 1 } jt <- jt-1 #the jt required indexJpJt_99[i,] <- c(jp,jt) } #lines(sampleSize, indexJpJt[,1], type="l", lty=2, # col="red", xlim=c(20,1200),ylim=c(0.0,30), xlab =expression(n),ylab =expression(j_p, j_t)) lines(sampleSize_99, indexJpJt_99[,2], lty=7, col="pink") legend("bottomright", inset=.01, c("alpha = 0.05, j_p","gamma=0.90, j_t","gamma=0.95, j_t","gamma=0.99, j_t"), lty=c(2,4,5,7),cex=0.8) ##### Figure 6 with ggplot2 #### df.Fig6<- data.frame(x=c(sampleSize_90,sampleSize_90,sampleSize_95,sampleSize_99), ci=c(indexJpJt_90[,1],indexJpJt_90[,2],indexJpJt_95[,2], indexJpJt_99[,2]), type=factor(rep(1:4, c(length(sampleSize_90),length(sampleSize_90),length(sampleSize_95),length(sampleSize_99))))) df.Fig6$linetype<- factor(as.numeric(df.Fig6$type), labels=linetype) df.Fig6$color<- lcols[as.numeric(df.Fig6$type)] fig6<- ggplot(df.Fig6, aes(x=x, y=ci, colour=color, linetype=linetype)) + geom_line(aes(linetype=type, col=type),size=1.25) + scale_color_discrete(guide = "legend") + scale_y_continuous(limits=c(0,30)) + scale_x_continuous(limits=c(0,1200), breaks=seq(0,1200,by=200)) + labs(x=expression(~italic(n)), y=expression(~italic("j"^"(p)")~and~italic("j"^"(t)"))) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position=c(0.75, 0.25)) + scale_colour_manual(values=lcols, name=" ", labels = c(expression(~italic("P")~"=0.95,"~italic("j"^"(p)")), expression(~gamma~"=0.90,"~italic("j"^"(t)")), expression(~gamma~"=0.95,"~italic("j"^"(t)")), expression(~gamma~"=0.99,"~italic("j"^"(t)")))) + scale_linetype_manual(values=linetype, name=" ", labels = c(expression(~italic("P")~"=0.95,"~italic("j"^"(p)")), expression(~gamma~"=0.90,"~italic("j"^"(t)")), expression(~gamma~"=0.95,"~italic("j"^"(t)")), expression(~gamma~"=0.99,"~italic("j"^"(t)")))) + theme(legend.key.width = unit(1.5,"cm")) + theme(legend.text=element_text(colour='black', size=12, face='italic')) + theme(legend.text.align = 0) + NULL fig6 ggsave(filename='Fig6.pdf', plot=fig6, device='pdf', dpi=1200, width=6, height=4, units='in') ### OK ggsave(filename='Fig6.jpg', plot=fig6, device='jpeg', dpi=1200, width=6, height=4, units='in') ################# ##Example #### #Read the sample into R sample.data <- read.table("\\\\filestore.soton.ac.uk/users/wl/mydocuments/mydocuments/MyResearch/My papers/inProgress/Tolerance sets/Reference range/R/sample.txt", header=F) #Alternatively, File -> Import Dataset -> From CSD #Check it is a data.frame str(sample.data) dim(sample.data) summary(sample.data) sampData <- sample.data[,1] hist(sampData) qqnorm(sampData) ralpha <- 0.05 #0.01 #1-ralpha is the proportion of the population to catch sampleSize <- length(sampData) #20 #Size of the sample from the population based on which to construct the reference ranges rgamma <- 0.95 #Confidence level smean <- mean(sampData) #5.31 #sample mean sstd <- sd(sampData) #0.41 #sample standard deviation #### Normal distribution based intervals #### z1mHAlpha <- qnorm(1-ralpha/2, mean=0, sd=1) #percentile of N(0,1) t1mHAlpha_nu <- qt(1-ralpha/2, df = sampleSize-1) #percentiles of t_{n-1} lambda <- sqrt(2/(sampleSize-1))*gamma(sampleSize/2)/gamma((sampleSize-1)/2) prop1 <- t1mHAlpha_nu*sqrt(1+1/sampleSize) #The proportional constant of RR_i prop2 <- rep(z1mHAlpha, length(sampleSize)) prop3 <- z1mHAlpha/lambda prop4 <- z1mHAlpha*lambda RR_1 <- c( smean-prop1*sstd, smean+prop1*sstd ) ## Find the prob P(KK_1 < 1-ralpha) by simulation ## sim = 1000000 nn <- sampleSize n <- nn c1 = qt(1-ralpha/2, df = nn-1)*sqrt(1+1/nn) set.seed(101) norml <- rnorm(sim) chisqu <- rchisq(sim,df=nn-1) KK1 = pnorm(norml/sqrt(nn)+c1*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c1*sqrt(chisqu/(nn-1))) sum((KK1 < 1-ralpha))/sim # The prob required #sum((KK1 < 0.93))/sim ## The tolerance interval ## c5 <- K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 1-ralpha, side = 2, method = "EXACT", m = 100) #The critical constant for tolerance intervals RR_5 <- c( smean-c5*sstd, smean+c5*sstd ) vNorml <- abs(rnorm(sim)) vChisq <- sqrt( (n/(n-1)) * rchisq(sim,df=n-1) ) vRV <- ( vNorml + sqrt(n)*z1mHAlpha )/vChisq sum((vRV < c5))/sim # The prob that the tolerance interval contains the middle 1-alpha of the population ## The equal-tailed tolerance interval set.seed(10) vNorml <- abs(rnorm(sim)) vChisq <- sqrt( (n/(n-1)) * rchisq(sim,df=n-1) ) vRV <- ( vNorml + sqrt(n)*z1mHAlpha )/vChisq c6 <- quantile(vRV, probs = rgamma ) RR_6 <- c( smean-c6*sstd, smean+c6*sstd ) set.seed(10) norml <- rnorm(sim) chisqu <- rchisq(sim,df=nn-1) KK6 = pnorm(norml/sqrt(nn)+c6*sqrt(chisqu/(nn-1))) - pnorm(norml/sqrt(nn)-c6*sqrt(chisqu/(nn-1))) sum((KK6 > 1-ralpha))/sim # The prob that the interval contains 1-alpha proportion of the population quantile(KK6, probs = 0.05 ) K.factor(sampleSize, f=NULL, alpha=1-rgamma, P = 0.9570664, side = 2, method = "EXACT", m = 100) #### Nonparametric intervals #### ## Prediction interval ## ralpha <- 0.05 minSamSiz <- 2/ralpha ##the minimum sample size required for a given alpha in order to contruct a nonp pred intervals Jp <- floor( (sampleSize+1)*ralpha/2 ) #The j_p for prediction interval sSample = sort(sampData) # Sorted sample observations RR_7 <- c( sSample[Jp], sSample[sampleSize-Jp+1]) #The prediction interval ProbCov <- pbeta(1-ralpha, sampleSize-2*Jp+1, 2*Jp, lower.tail = FALSE) #The chance that RR_7 catches at least 1-alpha of the population 1-ProbCov ## Tolerance interval ## rgamma <- 0.95 #confidence level for tolerance intervals # First, find the smallest sample size nn so that (X_[1], X_[nn]) is a (1-alpha)-content gamma-confidence tolerance tolerance interval # nn=2 while ( 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) < rgamma ){ nn <- nn + 1 } nn #The smallest sample size required 1 - ( nn* (1-ralpha)^(nn-1) - (nn-1)* (1-ralpha)^nn ) jt=1 while ( pbeta(1-ralpha, sampleSize-2*jt+1, 2*jt, lower.tail=FALSE) >= rgamma ){ jt <- jt + 1 } jt <- jt-1 #the jt required for the tolerance interval RR_8 <- c( sSample[jt], sSample[sampleSize-jt+1]) #The tolerance interval pbeta(1-ralpha, sampleSize-2*jt+1, 2*jt, lower.tail=FALSE) #The true confidence level