####################
#
# Filename: 20110823.R
#
####################
#
# Purpose:  Demonstrate t-tests and the use of 
#  random numbers in evaluating tests
#


# Step 1: Create some data

x <- rnorm(n=10, m=10,s=12)

# The vector x holds 100 pieces of data. We actually
#  known that this data is Normally distributed; we
#  designed it that way (the 'norm' in 'rnorm'). We
#  also know the mean of that population (m=10). Why
#  might it be useful to know reality for once?


# Step 2: State our null hypothesis
#
#     H0: The mean of the population equals 11.


# Step 3: Test the null hypothesis

t.test(x, mu=11)




# Note that p > 0.05, thus we cannot reject the null hyothesis
#  and we conclude that 11 is reasonable mean for the population.

# Note, we said REASONABLE.

# Also note, your answer may differ. This is the nature of 
#  random data generation. To ensure that we are on the 'same
#  page,' we can set the random number generator (RNG) seed:

set.seed(111)

#  Now, our answers will agree.





### Paired-Sample t-test

# Step 0: When do we use this test? Refer to notes/book.


# Step 1: Create some data

x.before <- rnorm(n=10,  m=11, s=12)
x.after  <- rnorm(n=10,  m=20, s=12)

# These are both valid variable names (and quite clear)


# Step 2: State our null hypothesis
#
#     H0: There is no difference between the pre- and post-tests


# Step 3: Test the null hypothesis

t.test(x.before,x.after, paired=TRUE)



# Since p < 0.05, thus we reject the null hypothesis and
#  conclude that there is a significant difference between pre-
#  and post-tests at the alpha=0.05 level.

# We could have also concluded that it was unreasonable at the 
#  alpha=0.05 level to conclude that there is no difference between
#  pre- and post-tests.







### Independent-samples t-test

# Step 0: When do we use this test? Refer to notes/book.


# Step 1: Create some data
set.seed(222)
x1 <- rnorm(n=10,  m=11, s=12)
x2 <- rnorm(n=10,  m=20, s=12)



# Step 2: State our null hypothesis
#
#     H0: There is no difference in means between the two populations



# Step 3: Test the null hypothesis

t.test(x1,x2)


# Note: If we knew that the variance of the two populations was equal,
# then we could have used: 

t.test(x1,x2, var.equal=TRUE)


# Since p > 0.05, we fail to reject the null hypothesis and
#  conclude that the means of the two populations are 
#  reasonably close to each other (at the alpha=0.05 level).




##################################################

### Extensions:

# Recall that for these tests, we assume that the data are
#  Normally distributed. What if they are not? What if they 
#  are distributed according to, say, an Exponential distribution?
#  Let us see:

# Set the seed
set.seed(888)


# Step 1: Create some data

x1 <- rexp(n=10,  rate=1)
x2 <- rexp(n=10,  rate=1)



# Step 2: State our null hypothesis
#
#     H0: There is no difference in means between the two populations



# Step 3: Test the null hypothesis

t.test(x1,x2)


# Note: We *know* that the means of the two populations are equal,
#  since we designed them that way. Thus, we would expect (hope) 
#  to fail to reject the null hypothesis and conclude that it is
#  reasonable for the two population means to be equal. Is this
#  what happened?