######################################## # # Script: 13 January 2011 (20110113.R) # ######################################## #################### # Part I: Exploring power in comparing simple hypotheses # Power is the ability to resolve the difference between two # competing hypotheses, H0 and HA. Greater power allows us to # determine which if the two is (most likely) correct. Lower power # means that we cannot resolve the two distributions. Power is the # probability that we select the alternative hypothesis when we should, # and is the opposite of the Type II error rate. # Here, we graph and calculate the power for two Normal distributions # using one-tailed tests. # Set parameters (change these to change the graphs and the power) alpha <- 0.05 # The Type I error rate m0 <- 0 # The mean of your null hypothesis s0 <- 2 # The standard deviation of your null hypothesis mA <- 4 # The mean of your alternative hypothesis sA <- 2 # The standard deviation of your alternative hypothesis # Calculate the curve x <- seq(-5,10,0.01) # Create a sequence of x-values y0 <- dnorm(x, mean=m0,sd=s0) # Calculate the density at each x-value (H0) # Plot the curve plot(x,y0, type="l", xlim=c(-5,10), ylim=c(0,1), ylab="", xlab="",yaxs="i", las=1, lwd=2 ) text(m0,0.3/s0,expression(H[0])) # Use the quantile function to calculate the cv cv <- qnorm(1-alpha, mean=m0,sd=s0) # qnorm is the inverse of pnorm # Indicate the cv segments(cv,0,cv,0.5, lty=2, col=2) # Draw a segment from (cv,0) to (cv,0.5) text(cv,0.52, labels="CV", col=2) # Print some text on the plot axis(1,at=cv,labels=round(cv,3) ) # Place the CV on the x-axis # Shade the curve above the cv xv <- seq(cv,10,0.05) segments(xv,0, xv,dnorm(xv, mean=m0,sd=s0) ) ## Show two curves at once x <- seq(-5,10,0.01) # Create a sequence of x-values y0 <- dnorm(x, mean=m0,sd=s0) # Calculate the density for the null hypothesis yA <- dnorm(x, mean=mA,sd=sA) # Calculate the density for the alternative hypothesis # Plot Curve 0 (the null) plot(x,y0, type="l", xlim=c(-5,10), ylim=c(0,1), ylab="", xlab="",yaxs="i", las=1, lwd=2 ) text(m0,0.3/s0, expression(H[0]) ) # Plot Curve 1 (the alternative) lines(x,yA, col=4, lwd=2) text(mA,0.3/sA, expression(H[A]), col=4 ) # Calculate the critical value (cv) cv <- qnorm(1-alpha, mean=m0,sd=s0) # The cv for the null hypothesis # Indicate the cv segments(cv,0,cv,0.5, lty=2, col=2) # Draw the CV segment text(cv,0.52, labels="CV", col=2) # Write CV on the plot axis(1, at=cv, labels=round(cv,3) ) # Label the CV on the x-axis # Calculate power alpha <- 1-pnorm(cv, mean=m0,sd=s0) # Check that the Type I error rate is correct beta <- pnorm(cv, mean=mA,sd=sA) # Calculate the Type II error rate power <- 1-beta # Calculate the power # Display power on the plot text(0,0.9,paste("Power =",round(power,4))) #################### # Part II: Creating power curves # Creating power curves is simply calculating the power over several # different values of one of the five values that affects it. Doing this # by hand is onerous. Doing it on a computer can show what is involved # in the calculations, as such: # First, let us create function to calculate power powercalc <- function(m0,mA,s0,sA,alpha) { cv <- qnorm(1-alpha, mean=m0,sd=s0) # Calculate the CV (of the null) power <- 1-pnorm(cv, mean=mA,sd=sA) # Calculate 1-beta (of the alternative) return(power) } # This function is what we did in the first part without dealing # with graphing things. Note that the power is the area to the # right of the CV in the Alternative distribution. Also remember # that the CV is always calculated according to the Null distribution. # Also note the use of qnorm and pnorm. Review the difference between # the two and why I use them in this order. # Now, let us define our two competing distributions m0 <- 0 # The mean of your null hypothesis s0 <- 2 # The standard deviation of your null hypothesis mA <- 4 # The mean of your alternative hypothesis sA <- 2 # The standard deviation of your alternative hypothesis # and the Type I error rate alpha <- 0.05 # The next step is to change one of the parameters several times and # calculate the power for each change. A loop construct is ideal for # this. # First, we define our vectors: xval <- numeric() # This will hold the changed values pwr <- numeric() # This will hold the calculated power values for(i in 1:100) { # This will loop 100 times, changing `i' from 1 to 100, in turn alpha <- i/100 # This will be our changing parameter (alpha will change from 0.01 to 1.00 pwr[i] <- powercalc(m0,mA,s0,sA,alpha) # Use the function above to calculate the power and save it xval[i] <- alpha # Also save the alpha value } # Loops are bracketed between braces { and }, so this is the end of the loop # Let us now graph the results plot(xval,pwr, type="l",ylab="Power",las=1,ylim=c(0,1),yaxs="i",xaxs="i",main="Power Curve") axis(2,at=pwr[1],labels=round(pwr[1],2),las=1,cex.axis=0.8) ### # Instead of changing alpha, change the mean of the alternative using: m0 <- 0; s0 <- 2; mA <- 4; sA <- 2; alpha <- 0.05 xval <- numeric() pwr <- numeric() for(i in 1:100) { mA <- i/20 pwr[i] <- powercalc(m0,mA,s0,sA,alpha) xval[i] <- mA } plot(xval,pwr, type="l",ylab="Power",las=1,ylim=c(0,1),yaxs="i",xaxs="i",main="Power Curve") axis(2,at=pwr[1],labels=round(pwr[1],2),las=1,cex.axis=0.8) ### # Or, you can change the alternative's variance: m0 <- 0; s0 <- 2; mA <- 4; sA <- 2; alpha <- 0.05 xval <- numeric() pwr <- numeric() for(i in 1:100) { sA <- i/10 pwr[i] <- powercalc(m0,mA,s0,sA,alpha) xval[i] <- sA } plot(xval,pwr, type="l",ylab="Power",las=1,ylim=c(0,1),yaxs="i",xaxs="i",main="Power Curve") axis(2,at=pwr[1],labels=round(pwr[1],2),las=1,cex.axis=0.8) # Notice that there are only two lines changed in each option # So, in summary, what are the effects on power for each of the following changes? # 1. Increasing alpha? # 2. Increasing the distance between the means? # 3. Increasing the variance of the alternative? #################### # Part III: Calculating probabilities # There are three functions in R that are very useful for dealing with # probability distributions and calculating probabilities. For the # Normal distribution, those three functions are # pnorm(x) gives the F(x), the CDF value # dnorm(x) gives the f(x), the pdf value # qnorm(x) gives the inverse of F(x), the quantile value # Each of the three can take two optional parameters, m (mean) and s (sd). ***** # Thus, # pnorm(x,m=1,sd=10) gives the CDF value for a Normal(1,10) distribution # dnorm(x,m=3,s=1) gives the pdf value for a Normal(3,1) distribution # Finally notice that # pnorm(qnorm(x)) = qnorm(pnorm(x)) = x # as these two are inverses of each other. # Some practice # To calculate the 95th percentile of a standard normal: qnorm(0.95) # To calculate F(3) for a Normal(2,4) distribution: pnorm(3, m=2,s=4) # What is the probability of getting a value less than 10 in a Normal(15,5)? pnorm(10, m=15,s=5) # What is the median of the Normal(3,19)? qnorm(0.500, m=3,s=19) # What is the probability of getting between 3 and 5 in a Normal(2,1) curve? pnorm(5, m=2,s=1) - pnorm(3, m=2,s=1) # Finally, what is the 10th percentile of a Normal(14,14) distribution? qnorm(0.10, m=14,s=14) ### # This also works for other distributions. The key is to remember the p,d,q meanings # and the abbreviation for the distribution. # What is the median of an Exponential(3) distribution? qexp(0.500, rate=3) # Note that, for the Exponential here, rate = 1 / mean # What is the probability of getting a value greater than 40 in an Exponential(0.05)? 1-pexp(40, rate=0.05) # What is the probability of getting more than a 3 in a ChiSquare(df=10)? 1-pchisq(3, df=10) # Straight forward?