##############################
#
# Script: assignment04a.R
#   Answers to the third 
#   homework assignment
#
##############################


# Preamble

library(RFS)



#####
# Problem 1

# Part a: n=30
set.seed(577)
p <- numeric()  # p-values
t <- 1e6        # trials
n <- 30         # sample size

for(i in 1:t) {
  x <- rexp(n,1)
  p[i] <- z.test(x, mu=1, sigmax=1)$p.value
}

png("hist30.png",height=4,width=6,units="in",res=600)
par(cex=0.8, cex.lab=0.8,cex.axis=0.8)
par(mar=c(4.1,0,0,0)+0.1)
hist(p, yaxt="n", xlab="p-value", main="")
abline(h=t/20, col=4, lty=2)
dev.off()

ks.test(p, punif)



# Part b: n=50
set.seed(577)
p <- numeric()  # p-values
t <- 1e6        # trials
n <- 50         # sample size

for(i in 1:t) {
  x <- rexp(n,1)
  p[i] <- z.test(x, mu=1, sigmax=1)$p.value
}

png("hist50.png",height=4,width=6,units="in",res=600)
par(cex=0.8, cex.lab=0.8,cex.axis=0.8)
par(mar=c(4.1,0,0,0)+0.1)
hist(p, yaxt="n", xlab="p-value", main="")
abline(h=t/20, col=4, lty=2)
dev.off()

ks.test(p, punif)





#####
# Problem 2

set.seed(30)
alpha <- 0.05  # Our typical alpha-level
trials <- 1e4  # Number of trials to run

n1 <- 20       # sample size (years) for Station 1
n2 <- 20       # sample size (years) for Station 2
pp <- 0.30     # Pooled proportion

TS <- numeric() # To hold our test statistic

for(i in 1:trials) {
  X <- rbinom(1,size=n1, prob=pp) # Station 1 dist
  Y <- rbinom(1,size=n2, prob=pp) # Station 2 dist
  TS[i] <- X-Y                    # Test stat
}

length( which(TS>2) )/trials * 2    # p-value
quantile(TS,c(alpha/2, 1-alpha/2))  # conf int




# Alternatively (and quicker), we can draw
# multiple times from the distributions at once
trials <- 1e8
X  <- rbinom(trials,size=n1, prob=pp) # Station 1 dist
Y  <- rbinom(trials,size=n2, prob=pp) # Station 2 dist
TS <- X-Y                    # Test stat

length( which(TS>2) )/trials * 2    # p-value
quantile(TS,c(alpha/2, 1-alpha/2))  # conf int


# The histogram
png("TShistogram.png",height=4,width=6,units="in",res=600)
par(cex=0.8, cex.lab=0.8,cex.axis=0.8)
par(mar=c(4.1,0,0,0)+0.1)
hist(TS, breaks=-15.5:15.5, main="", yaxt="n", xlab=expression(mu[1]-mu[2]) )
abline(h=0)
axis(1,at=4,label="TS")
hist(TS[TS > 5], breaks=-15.5:15.5, main="", yaxt="n", xlab=expression(mu[1]-mu[2]), add=TRUE, col=2)
hist(TS[TS < -5], breaks=-15.5:15.5, main="", yaxt="n", xlab=expression(mu[1]-mu[2]), add=TRUE, col=2)
dev.off()