######################################## # # Script: 12 April 2011 (20110412.R) # ######################################## # Today: # # ** Analysis of variance (means) # ** Experimental design (blocking) # # Let us create a new dataset (Ex 15.1, page 960) ins <- data.frame( matrix( c(1,1,56, 1,2,48, 1,3,66, 1,4,62, 2,1,83, 2,2,78, 2,3,94, 2,4,93, 3,1,80, 3,2,72, 3,3,83, 3,4,85), ncol=3, byrow=T)) names(ins) <- c("Ins","Plot","Seedlings") ins$Plot <- as.factor(ins$Plot) ins$Ins <- as.factor(ins$Ins) ins <- data.frame(ins) #### And now for something completely different... # Note that the number of seedlings is the dependent variable # Note that insecticide is the independent variable # Note that the plot is a blocking variable model1 <- aov(Seedlings ~ Ins + Plot, data=ins) summary(model1) # Is the insecticide an important variable in determing the # number of seedlings? # Yes (F = 211.39; df1 = 2, df2 = 6; p << 0.0001) # What are the effect sizes of the three insecticides? summary.lm(model1) # For the insecticide, # Expected number of seedlings for Ins 1 = 56 # Expected number of seedlings for Ins 2 = 56+29 = 85 # Expected number of seedlings for Ins 3 = 56+22 = 78 # Is Insecticide 1 statistically worse than the other two? # Yes. Comparing Ins1 to Ins2: p << 0.0001, # and comparing Ins1 to Ins2: p << 0.0001 # Is Insecticide 2 significantly better than Ins3? # Recall t = (29-22)/1.472. Thus, t=4.755. Thus, p <0.05 pt((29-22)/1.472,df=6, lower.tail=FALSE)*2 # The p-value is 0.00314 # Alternatively, we could have used a professor-defined function source("http://courses.kvasaheim.com/stat40x3/R/set.base.R") ins$Ins <- set.base(ins$Ins,'2', data=ins) model2 <- aov(Seedlings ~ Ins + Plot, data=ins) summary.lm(model2) # From this, we can conclude the following order of insecticide # efficacy: 2 > 3 > 1. # Was the blocking necessary? summary(model1) # Yes, as the Plot variable was statistically significant (F=33.69, # df1=3, df2=6, p=0.00037).