########################################
#
#  Script: 13 January 2011 (20110113.R)
#
########################################


####################
# Part I: Exploring power in comparing simple hypotheses

# Power is the ability to resolve the difference between two
# competing hypotheses, H0 and HA. Greater power allows us to
# determine which if the two is (most likely) correct. Lower power
# means that we cannot resolve the two distributions. Power is the
# probability that we select the alternative hypothesis when we should,
# and is the opposite of the Type II error rate.

# Here, we graph and calculate the power for two Normal distributions
# using one-tailed tests.


# Set parameters (change these to change the graphs and the power)
alpha <- 0.05   						# The Type I error rate
m0 <- 0         						# The mean of your null hypothesis
s0 <- 2         						# The standard deviation of your null hypothesis
mA <- 4         						# The mean of your alternative hypothesis
sA <- 2         						# The standard deviation of your alternative hypothesis

# Calculate the curve
x <- seq(-5,10,0.01)					# Create a sequence of x-values
y0 <- dnorm(x, mean=m0,sd=s0)   		# Calculate the density at each x-value (H0)

# Plot the curve
plot(x,y0, type="l", xlim=c(-5,10), ylim=c(0,1), ylab="", xlab="",yaxs="i", las=1, lwd=2 )
text(m0,0.3/s0,expression(H[0]))

# Use the quantile function to calculate the cv
cv <- qnorm(1-alpha, mean=m0,sd=s0)		# qnorm is the inverse of pnorm

# Indicate the cv
segments(cv,0,cv,0.5, lty=2, col=2)		# Draw a segment from (cv,0) to (cv,0.5)
text(cv,0.52, labels="CV", col=2)		# Print some text on the plot
axis(1,at=cv,labels=round(cv,3) )		# Place the CV on the x-axis

# Shade the curve above the cv
xv <- seq(cv,10,0.05)
segments(xv,0, xv,dnorm(xv, mean=m0,sd=s0) )



## Show two curves at once
x <- seq(-5,10,0.01)					# Create a sequence of x-values
y0 <- dnorm(x, mean=m0,sd=s0)			# Calculate the density for the null hypothesis
yA <- dnorm(x, mean=mA,sd=sA)			# Calculate the density for the alternative hypothesis


# Plot Curve 0 (the null)
plot(x,y0, type="l", xlim=c(-5,10), ylim=c(0,1), ylab="", xlab="",yaxs="i", las=1, lwd=2 )
text(m0,0.3/s0, expression(H[0]) )

# Plot Curve 1 (the alternative)
lines(x,yA, col=4, lwd=2)
text(mA,0.3/sA, expression(H[A]), col=4 )

# Calculate the critical value (cv)
cv <- qnorm(1-alpha, mean=m0,sd=s0)		# The cv for the null hypothesis

# Indicate the cv
segments(cv,0,cv,0.5, lty=2, col=2)		# Draw the CV segment
text(cv,0.52, labels="CV", col=2)		# Write CV on the plot
axis(1, at=cv, labels=round(cv,3) )		# Label the CV on the x-axis

# Calculate power
alpha <- 1-pnorm(cv, mean=m0,sd=s0)		# Check that the Type I error rate is correct
beta  <-   pnorm(cv, mean=mA,sd=sA)		# Calculate the Type II error rate
power <- 1-beta							# Calculate the power

# Display power on the plot
text(0,0.9,paste("Power =",round(power,4)))





####################
# Part II: Creating power curves

# Creating power curves is simply calculating the power over several 
# different values of one of the five values that affects it. Doing this
# by hand is onerous. Doing it on a computer can show what is involved
# in the calculations, as such:


# First, let us create function to calculate power
powercalc <- function(m0,mA,s0,sA,alpha) {
	cv <- qnorm(1-alpha, mean=m0,sd=s0)		# Calculate the CV (of the null)
	power <- 1-pnorm(cv, mean=mA,sd=sA)		# Calculate 1-beta (of the alternative)
	return(power)
}
# This function is what we did in the first part without dealing
# with graphing things. Note that the power is the area to the 
# right of the CV in the Alternative distribution. Also remember
# that the CV is always calculated according to the Null distribution.

# Also note the use of qnorm and pnorm. Review the difference between 
# the two and why I use them in this order.

# Now, let us define our two competing distributions
m0 <- 0         						# The mean of your null hypothesis
s0 <- 2         						# The standard deviation of your null hypothesis
mA <- 4         						# The mean of your alternative hypothesis
sA <- 2         						# The standard deviation of your alternative hypothesis

# and the Type I error rate
alpha <- 0.05


# The next step is to change one of the parameters several times and
# calculate the power for each change. A loop construct is ideal for
# this.

# First, we define our vectors:
xval <- numeric()	# This will hold the changed values
pwr  <- numeric()	# This will hold the calculated power values

for(i in 1:100) {	# This will loop 100 times, changing `i' from 1 to 100, in turn
alpha <- i/100		# This will be our changing parameter (alpha will change from 0.01 to 1.00
pwr[i] <- powercalc(m0,mA,s0,sA,alpha)	# Use the function above to calculate the power and save it
xval[i] <- alpha	# Also save the alpha value
}					# Loops are bracketed between braces { and }, so this is the end of the loop

# Let us now graph the results
plot(xval,pwr, type="l",ylab="Power",las=1,ylim=c(0,1),yaxs="i",xaxs="i",main="Power Curve")
axis(2,at=pwr[1],labels=round(pwr[1],2),las=1,cex.axis=0.8)


###
# Instead of changing alpha, change the mean of the alternative using:
m0 <- 0; s0 <- 2; mA <- 4; sA <- 2; alpha <- 0.05
xval <- numeric()
pwr  <- numeric()
for(i in 1:100) {
mA <- i/20
pwr[i] <- powercalc(m0,mA,s0,sA,alpha)
xval[i] <- mA
}
plot(xval,pwr, type="l",ylab="Power",las=1,ylim=c(0,1),yaxs="i",xaxs="i",main="Power Curve")
axis(2,at=pwr[1],labels=round(pwr[1],2),las=1,cex.axis=0.8)


###
# Or, you can change the alternative's variance:
m0 <- 0; s0 <- 2; mA <- 4; sA <- 2; alpha <- 0.05
xval <- numeric()
pwr  <- numeric()
for(i in 1:100) {
sA <- i/10
pwr[i] <- powercalc(m0,mA,s0,sA,alpha)
xval[i] <- sA
}
plot(xval,pwr, type="l",ylab="Power",las=1,ylim=c(0,1),yaxs="i",xaxs="i",main="Power Curve")
axis(2,at=pwr[1],labels=round(pwr[1],2),las=1,cex.axis=0.8)

# Notice that there are only two lines changed in each option


# So, in summary, what are the effects on power for each of the following changes?
#  1. Increasing alpha?
#  2. Increasing the distance between the means?
#  3. Increasing the variance of the alternative?





####################
# Part III: Calculating probabilities

# There are three functions in R that are very useful for dealing with
# probability distributions and calculating probabilities. For the 
# Normal distribution, those three functions are

#   pnorm(x)		gives the F(x), the CDF value
#   dnorm(x)		gives the f(x), the pdf value
#   qnorm(x)		gives the inverse of F(x), the quantile value

# Each of the three can take two optional parameters, m (mean) and s (sd). *****
# Thus,
#   pnorm(x,m=1,sd=10) gives the CDF value for a Normal(1,10) distribution
#   dnorm(x,m=3,s=1)   gives the pdf value for a Normal(3,1) distribution

# Finally notice that 
#              pnorm(qnorm(x)) = qnorm(pnorm(x)) = x
# as these two are inverses of each other.



# Some practice

# To calculate the 95th percentile of a standard normal:
qnorm(0.95)

# To calculate F(3) for a Normal(2,4) distribution:
pnorm(3, m=2,s=4)

# What is the probability of getting a value less than 10 in a Normal(15,5)?
pnorm(10, m=15,s=5)

# What is the median of the Normal(3,19)?
qnorm(0.500, m=3,s=19)

# What is the probability of getting between 3 and 5 in a Normal(2,1) curve?
pnorm(5, m=2,s=1) - pnorm(3, m=2,s=1)

# Finally, what is the 10th percentile of a Normal(14,14) distribution?
qnorm(0.10, m=14,s=14)


###
# This also works for other distributions. The key is to remember the p,d,q meanings
# and the abbreviation for the distribution. 

# What is the median of an Exponential(3) distribution?
qexp(0.500, rate=3)
# Note that, for the Exponential here, rate = 1 / mean

# What is the probability of getting a value greater than 40 in an Exponential(0.05)?
1-pexp(40, rate=0.05)

# What is the probability of getting more than a 3 in a ChiSquare(df=10)?
1-pchisq(3, df=10)


# Straight forward?