##### Demonstration Script #4e ##### MATH322 ##### ##### Three Goodness-of-Fit tests ##### and more power! ### Preamble # Load a package library(nortest) # If you got this error: # Error in library(nortest) : there is no package called ‘nortest’ # then you will need to install this package. # Import some extended functions source("http://rfs.kvasaheim.com/stat200.R") ### The power against asymmetric distributions # In class, we did the Chi-Square distribution. It is a perfectly # fine distribution to use here. To expand your experiences, I # will use the Gamma distribution as our asymmetric distribution. # Gamma(1,1) x = seq(0,30,length=1e4) y = dgamma(x,shape=1,scale=1) plot(x,y) # Gamma(10,1) x = seq(0,30,length=1e4) y = dgamma(x,shape=10,scale=1) plot(x,y) ## The Anderson-Darling test pwrAD=numeric() for(i in 1:30) { pvalue = numeric() for(j in 1:1e4) { x = rgamma(100, shape=i,scale=1) pvalue[j] = ad.test(x)$p.value } pwrAD[i] = mean(pvalue<0.05) } plot(pwrAD, pch=17,col="green4", ylim=c(0,1)) ### The Shapiro-Wilk test pwrSW=numeric() for(i in 1:30) { pvalue = numeric() for(j in 1:1e4) { x = rgamma(100, shape=i,scale=1) pvalue[j] = shapiro.test(x)$p.value } pwrSW[i] = mean(pvalue<0.05) } points(pwrSW,col=rgb(30:1/30,1:30/30,1:30/30),pch="\u2605") ##### # Since we tend to concern ourselves with getting at least 80% power, # a sample size of n=100 is only enough to detect that a Gamma(6,1) # is non-Normal. # # If you need to ensure that you can detect non-Normality in a Gamma(10,1) # distribution, what sample size will you need? ## The Anderson-Darling test pwrAD=numeric() for(i in 1:20) { pvalue = numeric() for(j in 1:1e4) { x = rgamma(175, shape=i,scale=1) pvalue[j] = ad.test(x)$p.value } pwrAD[i] = mean(pvalue<0.05) } plot(pwrAD, pch=17,col="green4", ylim=c(0,1)) ### The Shapiro-Wilk test pwrSW=numeric() for(i in 1:20) { pvalue = numeric() for(j in 1:1e4) { x = rgamma(175, shape=i,scale=1) pvalue[j] = shapiro.test(x)$p.value } pwrSW[i] = mean(pvalue<0.05) } points(pwrSW,col=rgb(30:1/30,1:30/30,1:30/30),pch=5) ##### Graphics # A better graphic in terms of aesthetics. One nice thing about R # is that you can alter any part of a graphic... as long as you # know how. par(las=1) par(family="serif") par(xaxs="i",yaxs="i") par(mar=c(3,3,0.25,0)+0.5) par(cex.lab=1.25, font.lab=2) plot.new() plot.window( xlim=c(0,21), ylim=c(0,1) ) abline(h=0.80, col="grey", lty=3) abline(v=10, col="grey", lty=3) axis(1) axis(2) title(xlab="Shape Parameter", line=2.5) title(ylab="Power",line=2.5) points(pwrAD, col="green4", pch="\u2709") # Nadine's email symbol points(pwrSW, col=rgb(30:1/30,1:30/30,1:30/30),pch="\u2605") ### So, for this run (which should be pretty close to reality), # a sample size of 175 is more than enough to achieve the power # goals using the Shapiro-Willk test, but not enough for the # Anderson-Darling test (n=200 is sufficient, however).