##############################
#
# Script: assignment02a.R
#   Answers to the second 
#   homework assignment
#
##############################



#####
# Problem 2: Predicting Nationalized Petroleum

# Read in the data
clf <- read.csv("http://courses.kvasaheim.com/pols6123/data/clf.csv")
names(clf)
summary(clf)
attach(clf)

# To make things easier, let us create two short variables from
# the data:

fsi.n <- fsi[nationalized=="nationalized"]
fsi.p <- fsi[nationalized=="privatized"]


# I want to use a t-test (two populations). I need
# to test its assumptions.

# A1: Normality

# One test
qqnorm(fsi.n)
qqline(fsi.n)   # Maybe?
qqnorm(fsi.p)
qqline(fsi.p)   # Maybe not?

# Another
hist(fsi.n)     # Maybe?
hist(fsi.p)     # Maybe not?

# Yet another
shapiro.test(fsi.n)  # yes
shapiro.test(fsi.p)  # not really, but close for Bonferonni


# A2: Equal variances

boxplot(fsi~nationalized)        # very doubtful
bartlett.test(fsi~nationalized)  # reasonable
fligner.test(fsi~nationalized)   # reasonable


# Assumptions passed! Therefore, t-test should work:

t.test(fsi~nationalized, alternative="less", var.equal=TRUE)
t.test(fsi~nationalized, alternative="less", var.equal=FALSE)

# not much difference between the two (as expected)

# Now to create presentation-level boxplot 

par(mar=c(4,4,1,1)+0.1)
boxplot(fsi~nationalized, las=1, ylab="Failed States Index", xaxt="n",xlab="Petroleum Production Status")
axis(1, label=c("Nationalized","Privatized"), at=1:2)




#####
# Problem 3: Predicting Nationalized Petroleum

# Read in the data
bio <- read.csv("http://courses.kvasaheim.com/pols6123/data/biome2.csv")
names(bio)
summary(bio)
attach(bio)






# I want to use analysis of variance (five populations). I need
# to test its assumptions.

# A1: Normality 
shapiro.test(mfri[biome=="STR"])    # No fail
shapiro.test(mfri[biome=="TBF"])    # No fail
shapiro.test(mfri[biome=="TCF"])    # No fail
shapiro.test(mfri[biome=="XER"])    # No fail
shapiro.test(mfri[biome=="TS"])     # No fail


# A2: Equal-variance
boxplot(mfri~biome)                 # Fail!
bartlett.test(mfri~biome)           # Fail!
fligner.test(mfri~biome)            # Fail


# A solution: A transformation may work
#  So, let us try the log-transform
tfri <- log(mfri)
boxplot(tfri~biome)                 # Better
bartlett.test(tfri~biome)           # No fail
fligner.test(tfri~biome)            # No fail


# Another solution: Monte Carlo might work, 
#  except we do not know the theoretical 
#  distribution of mfri.


# A last solution: When all else fails, we use
#  non-paramteric means.

kruskal.test(mfri~biome)

# Reject the null hypothesis.


# Now to create presentation-level boxplot 

par(mar=c(4,4,1,1)+0.1)
boxplot(mfri~biome, las=1, ylab="Mean Fire Return interval (year)", xlab="Biome Type")

# or

par(mar=c(4,12,1,1)+0.1)
boxplot(mfri~biome, las=1, xlab="Mean Fire Return interval (year)", yaxt="n", ylab="", horizontal=TRUE)
axis(2, label=c("Subtropical Rainforest\n(STR)","Temperate Broadleaf Forest\n(TBF)","Temperate Coniferous Forest\n(TCF)","Temperate Steppe\n(TS)","Xeric Shrubland\n(XER)"), at=1:5, las=1)