############################## # # Script: Solutions 2 (assignment02a.R) # ############################## #################### # Problem 02.1 fb <- read.csv("football1.csv", header=TRUE) # Read in the data as fb b12 <- fb$score[fb$conference=="Big 12"] # Select the Big 12 scores sec <- fb$score[fb$conference=="SEC"] # Select the SEC scores t.test(b12,sec, alternative="greater") # Compare them # There is a shortcut in simple cases: fb <- read.csv("football1.csv", header=TRUE) t.test(fb$score~fb$conference, alternative="greater") # This can also be done in simple cases: fb <- read.csv("football1.csv", header=TRUE) t.test(score~conference, data=fb, alternative="greater") # So, why was the alternative = "greater" necessary? #################### # Problem 02.2 gdp <- read.csv("gdpcap.csv", header=TRUE) # Read in the data g.afr <- gdp$gdpcap[gdp$region=="Africa"] # Select only African gdpcaps g.east <- gdp$gdpcap[gdp$region=="Eastern"] # Select only Eastern gdpcaps t.test(g.afr,g.east, alternative="greater") # Perform the t-test # Why no shortcut options here? What is different about this # data set from the one above? Why is that difference important? # To be a good researcher, you must know your data! #################### # Problem 02.3 frat <- read.csv("http://courses.kvasaheim.com/stat40x3/data/fraternity.csv") gpa.ao <- frat$gpa[frat$fraternity=="AO"] gpa.xp <- frat$gpa[frat$fraternity=="XP"] t.test(gpa.ao,gpa.xp) # or frat <- read.csv("http://courses.kvasaheim.com/stat40x3/data/fraternity.csv") t.test(frat$gpa~frat$fraternity) # or frat <- read.csv("http://courses.kvasaheim.com/stat40x3/data/fraternity.csv") t.test(gpa~fraternity, data=frat) # Notice I am pulling this data off the website without saving it # to my disk first. I do not suggest this, unless the data will be # updated frequently. # Finally, the boxplot boxplot(gpa~fraternity, data=frat, las=1, ylab="GPA", main="GPA Comparison", xlab="Fraternity") # This is the typical "Structure F" of all graphing commands. It # is most usable when there is a dataset involved instead of two # separate variables that you want to plot. ##### Extra: # The means are not plotted on the boxplots. If you want the # means plotted, you will have to add them yourself: points(1,mean(frat$gpa[frat$fraternity=="AO"]), col=2, pch=16) points(2,mean(frat$gpa[frat$fraternity=="XP"]), col=2, pch=16) # Note that the plotting function is "points" and not "plot". The # latter will erase your current plot and start a new one. The former # will add to the previous. This is a typical "Structure P", the # alternaive format to graphing commands. The variables are included # in x,y format. Thus, the first point is plotted at the position(1, mAO), # where 'mAO' is the mean of the AO fraternity.