############################## # # Script: assignment02a.R # Answers to the second # homework assignment # ############################## ##### # Problem 2: Predicting Nationalized Petroleum # Read in the data clf <- read.csv("http://courses.kvasaheim.com/pols6123/data/clf.csv") names(clf) summary(clf) attach(clf) # To make things easier, let us create two short variables from # the data: fsi.n <- fsi[nationalized=="nationalized"] fsi.p <- fsi[nationalized=="privatized"] # I want to use a t-test (two populations). I need # to test its assumptions. # A1: Normality # One test qqnorm(fsi.n) qqline(fsi.n) # Maybe? qqnorm(fsi.p) qqline(fsi.p) # Maybe not? # Another hist(fsi.n) # Maybe? hist(fsi.p) # Maybe not? # Yet another shapiro.test(fsi.n) # yes shapiro.test(fsi.p) # not really, but close for Bonferonni # A2: Equal variances boxplot(fsi~nationalized) # very doubtful bartlett.test(fsi~nationalized) # reasonable fligner.test(fsi~nationalized) # reasonable # Assumptions passed! Therefore, t-test should work: t.test(fsi~nationalized, alternative="less", var.equal=TRUE) t.test(fsi~nationalized, alternative="less", var.equal=FALSE) # not much difference between the two (as expected) # Now to create presentation-level boxplot par(mar=c(4,4,1,1)+0.1) boxplot(fsi~nationalized, las=1, ylab="Failed States Index", xaxt="n",xlab="Petroleum Production Status") axis(1, label=c("Nationalized","Privatized"), at=1:2) ##### # Problem 3: Predicting Nationalized Petroleum # Read in the data bio <- read.csv("http://courses.kvasaheim.com/pols6123/data/biome2.csv") names(bio) summary(bio) attach(bio) # I want to use analysis of variance (five populations). I need # to test its assumptions. # A1: Normality shapiro.test(mfri[biome=="STR"]) # No fail shapiro.test(mfri[biome=="TBF"]) # No fail shapiro.test(mfri[biome=="TCF"]) # No fail shapiro.test(mfri[biome=="XER"]) # No fail shapiro.test(mfri[biome=="TS"]) # No fail # A2: Equal-variance boxplot(mfri~biome) # Fail! bartlett.test(mfri~biome) # Fail! fligner.test(mfri~biome) # Fail # A solution: A transformation may work # So, let us try the log-transform tfri <- log(mfri) boxplot(tfri~biome) # Better bartlett.test(tfri~biome) # No fail fligner.test(tfri~biome) # No fail # Another solution: Monte Carlo might work, # except we do not know the theoretical # distribution of mfri. # A last solution: When all else fails, we use # non-paramteric means. kruskal.test(mfri~biome) # Reject the null hypothesis. # Now to create presentation-level boxplot par(mar=c(4,4,1,1)+0.1) boxplot(mfri~biome, las=1, ylab="Mean Fire Return interval (year)", xlab="Biome Type") # or par(mar=c(4,12,1,1)+0.1) boxplot(mfri~biome, las=1, xlab="Mean Fire Return interval (year)", yaxt="n", ylab="", horizontal=TRUE) axis(2, label=c("Subtropical Rainforest\n(STR)","Temperate Broadleaf Forest\n(TBF)","Temperate Coniferous Forest\n(TCF)","Temperate Steppe\n(TS)","Xeric Shrubland\n(XER)"), at=1:5, las=1)