##### Slidedeck e1: Hypothesis Testing Means in R
#####

source("http://rfs.kvasaheim.com/stat200.R")


ito1 = "#E1EBEE"
ito2 = "#00AAE4"

ito = c(ito1,ito2)

set.seed(370)


##### Ex 1: Bainbridge IPE

# The Data
dt = read.csv("http://rfs.kvasaheim.com/data/bainbridge.csv")


# The Data Graph

par(bg="transparent")
par(yaxs="i", yaxt="n")
par(family="serif",las=1)
par(mar=c(3.5,0,0,1))
par(font.lab=2,cex.lab=1.1)

plot.new()
plot.window( xlim=c(0,60000), ylim=c(0,0.00008))

axis(1, at=seq(0,60000,10000), label=prettyNum(seq(0,60000,10000),big.mark=",") ) ;
title(xlab="GDP per Capita [USD]", line=2.25)


tt = hist(dt$gdpcap, plot=F, breaks=seq(0,125000,5000))
for(i in 1:length(tt$density)) {
  rect(tt$breaks[i],0, tt$breaks[i+1],tt$density[i], col=ito[2] )
}
axis(1, at=mean(dt$gdpcap), label="")
mtext(1, at=mean(dt$gdpcap), text=expression(bar(x)), line=0.5 ) ;


### The Bootstrap
# Initialize variables
B  = 1000000                           ## Specify the number of iterations
ts = numeric()                         ## Tell R to set aside memory

# Simple bootstrapping of the mean
for(i in 1:B) {
  x = sample(dt$gdpcap, replace=TRUE)  ## Sample from data
  ts[i] = mean(x)                      ## Calculate mean
}

# Analysis Calculations
2 * mean( ts > 15000 )                 ## p-value
quantile( ts, c(0.025,0.975) )         ## confidence interval


par(bg="transparent")
par(yaxs="i", xpd=NA, yaxt="n")
par(family="serif",las=1)
par(mar=c(3.5,0,0,1.5))
par(font.lab=2,cex.lab=1.1)

plot.new()
plot.window( xlim=c(6000,22000), ylim=c(0,0.00022))

axis(1, at=seq(5000,25000,2500), label=prettyNum( seq(5000,25000,2500), big.mark=","));
axis(1, at=mean(ts), label=" ", cex.axis=0.8)
mtext(1, at=mean(ts), text="obs", cex.axis=0.8, line=0.25)

title(xlab="Sample Average of GDP per Capita [USD]", line=2.25)

tt = hist(ts, plot=F, breaks=seq(5000,25000,500))
for(i in 1:length(tt$density)) {
  rect(tt$breaks[i],0, tt$breaks[i+1],tt$density[i], col=ito1)
}


ci = quantile(ts, c(0.025,0.975))
ci = c(9500,17500) ## Adjusted for niceness


par(bg="transparent")
par(yaxs="i", xpd=NA, yaxt="n")
par(family="serif",las=1)
par(mar=c(3.5,0,0,1.5))
par(font.lab=2,cex.lab=1.1)

plot.new()
plot.window( xlim=c(6000,22000), ylim=c(0,0.00022))

axis(1, at=seq(5000,25000,2500), label=prettyNum( seq(5000,25000,2500), big.mark=","));
axis(1, at=mean(ts), label=" ", cex.axis=0.8)
mtext(1, at=mean(ts), text="obs", cex.axis=0.8, line=0.25)

title(xlab="Sample Average of GDP per Capita [USD]", line=2.25)

tt = hist(ts, plot=F, breaks=seq(5000,25000,500))
for(i in 1:length(tt$density)) {
  rect(tt$breaks[i],0, tt$breaks[i+1],tt$density[i], 
    col=ito[1+(tt$breaks[i]<ci[1] | tt$breaks[i]>=ci[2])])
}


### Test Calculations

hildebrand.rule(dt$gdpcap)
wilcox.test(dt$gdpcap, mu=15000, conf.int=TRUE)

shapiroTest(dt$gdpcap)
t.test(dt$gdpcap, mu=15000)


### End of File