##### Slidedeck e1: Hypothesis Testing Means in R ##### source("http://rfs.kvasaheim.com/stat200.R") ito1 = "#E1EBEE" ito2 = "#00AAE4" ito = c(ito1,ito2) set.seed(370) ##### Ex 1: Bainbridge IPE # The Data dt = read.csv("http://rfs.kvasaheim.com/data/bainbridge.csv") # The Data Graph par(bg="transparent") par(yaxs="i", yaxt="n") par(family="serif",las=1) par(mar=c(3.5,0,0,1)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(0,60000), ylim=c(0,0.00008)) axis(1, at=seq(0,60000,10000), label=prettyNum(seq(0,60000,10000),big.mark=",") ) ; title(xlab="GDP per Capita [USD]", line=2.25) tt = hist(dt$gdpcap, plot=F, breaks=seq(0,125000,5000)) for(i in 1:length(tt$density)) { rect(tt$breaks[i],0, tt$breaks[i+1],tt$density[i], col=ito[2] ) } axis(1, at=mean(dt$gdpcap), label="") mtext(1, at=mean(dt$gdpcap), text=expression(bar(x)), line=0.5 ) ; ### The Bootstrap # Initialize variables B = 1000000 ## Specify the number of iterations ts = numeric() ## Tell R to set aside memory # Simple bootstrapping of the mean for(i in 1:B) { x = sample(dt$gdpcap, replace=TRUE) ## Sample from data ts[i] = mean(x) ## Calculate mean } # Analysis Calculations 2 * mean( ts > 15000 ) ## p-value quantile( ts, c(0.025,0.975) ) ## confidence interval par(bg="transparent") par(yaxs="i", xpd=NA, yaxt="n") par(family="serif",las=1) par(mar=c(3.5,0,0,1.5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(6000,22000), ylim=c(0,0.00022)) axis(1, at=seq(5000,25000,2500), label=prettyNum( seq(5000,25000,2500), big.mark=",")); axis(1, at=mean(ts), label=" ", cex.axis=0.8) mtext(1, at=mean(ts), text="obs", cex.axis=0.8, line=0.25) title(xlab="Sample Average of GDP per Capita [USD]", line=2.25) tt = hist(ts, plot=F, breaks=seq(5000,25000,500)) for(i in 1:length(tt$density)) { rect(tt$breaks[i],0, tt$breaks[i+1],tt$density[i], col=ito1) } ci = quantile(ts, c(0.025,0.975)) ci = c(9500,17500) ## Adjusted for niceness par(bg="transparent") par(yaxs="i", xpd=NA, yaxt="n") par(family="serif",las=1) par(mar=c(3.5,0,0,1.5)) par(font.lab=2,cex.lab=1.1) plot.new() plot.window( xlim=c(6000,22000), ylim=c(0,0.00022)) axis(1, at=seq(5000,25000,2500), label=prettyNum( seq(5000,25000,2500), big.mark=",")); axis(1, at=mean(ts), label=" ", cex.axis=0.8) mtext(1, at=mean(ts), text="obs", cex.axis=0.8, line=0.25) title(xlab="Sample Average of GDP per Capita [USD]", line=2.25) tt = hist(ts, plot=F, breaks=seq(5000,25000,500)) for(i in 1:length(tt$density)) { rect(tt$breaks[i],0, tt$breaks[i+1],tt$density[i], col=ito[1+(tt$breaks[i]=ci[2])]) } ### Test Calculations hildebrand.rule(dt$gdpcap) wilcox.test(dt$gdpcap, mu=15000, conf.int=TRUE) shapiroTest(dt$gdpcap) t.test(dt$gdpcap, mu=15000) ### End of File