######################################## # # Script: 4 April 2011 (preliminary) # ######################################## # Today: # # ** Bootstrapping # # ** Experimental design # # ################################################## ################################################## # Bootstrapping data <- read.csv("http://courses.kvasaheim.com/stat40x3/data/crime.csv", header=TRUE) names(data) m1 <- glm( gspcap00 ~ gspcap90 + vcrime90, family=gaussian(link=identity), data=data ) summary(m1) dim(data) n <- length(data$vcrime00) sam <- sample(1:n, n, replace=TRUE) m2 <- glm( gspcap00 ~ gspcap90 + vcrime90, family=gaussian(link=identity), data=data[sam,] ) summary(m2) m2$coefficients m2$coefficients[1] m2$coefficients[2] m2$coefficients[3] # We can store these values est.int <- m2$coefficients[1] est.gsp <- m2$coefficients[2] est.vcr <- m2$coefficients[3] # However... # Solution # Clear memory for them est.int <- numeric() est.gsp <- numeric() est.vcr <- numeric() # Set number of trials trials <- 1e3 # Start loop for(i in 1:trials) { sam <- sample(1:n, n, replace=TRUE) m <- glm( gspcap00 ~ gspcap90 + vcrime90, family=gaussian(link=identity), data=data[sam,] ) est.int[i] <- m$coefficients[1] est.gsp[i] <- m$coefficients[2] est.vcr[i] <- m$coefficients[3] } # Results mean(est.int) sd(est.int) mean(est.gsp) sd(est.gsp) mean(est.vcr) sd(est.vcr) # Histograms hist(est.int, breaks=21) hist(est.gsp, breaks=21) hist(est.vcr, breaks=21) # Nothing here. =) # Alright, you will need to Konjac dataset. Its URL is # http://courses.kvasaheim.com/stat40x3/data/konjac.csv