#Chapter 11 Problem 22

bearings<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/BEARINGS.txt",header=T)

attach(bearings)

#a)

 #Since I and II are the same size we have for the pooled sample variance

psv<-var(I)/2 + var(II)/2

#and for the estimated standard error of the difference in sample means

esedsm<-sqrt(psv)*sqrt(1/5)

# The value of our test statistic is

(mean(I) -mean(II))/esedsm

#[1] 2.072309

#Notice that the sample variances are different
var(I)
#[1] 23.22551
 var(II)
#[1] 12.97749

#Under the null hypothesis, the test statistic is approximately distributed as a t distribution with degrees
#of freedom given by

df<-(var(I)/10 + var(II)/10)^2/((var(I)/10)^2/9 +(var(II)/10)^2/9)
df
#[1] 16.66467
df<-17

# The two sided p-value of the test is therefore

2*(1-pt(2.072309,df=17))

#[1] 0.05375894

#The null hypothesis is rejected at the 6% level but not at the 5% level.

#The built in tests give similar results but use a continuous family of distributions to resolve the 
#problem of 16.665... degrees of freedom

t.test(I,II)

#        Welch Two Sample t-test

#data:  I and II 
#t = 2.0723, df = 16.665, p-value = 0.05408
#alternative hypothesis: true difference in means is not equal to 0 
#95 percent confidence interval:
# -0.07752643  7.96352643 
#sample estimates:
#mean of x mean of y 
#   10.693     6.750 

#Finally, if we suppose that the two distributions have the same variance, we do not get much different
#results

t.test(I,II,var.equal=T)

#        Two Sample t-test

#data:  I and II 
#t = 2.0723, df = 18, p-value = 0.05288
#alternative hypothesis: true difference in means is not equal to 0 
#95 percent confidence interval:
# -0.05444249  7.94044249 
#sample estimates:
#mean of x mean of y 
#   10.693     6.750 

b)
?dwilcox
#Details:

#     This distribution is obtained as follows.  Let 'x' and 'y' be two
#     random, independent samples of size 'm' and 'n'. Then the Wilcoxon
#     rank sum statistic is the number of all pairs '(x[i], y[j])' for
#     which 'y[j]' is not greater than 'x[i]'.  This statistic takes
#     values between '0' and 'm * n', and its mean and variance are 'm *
#     n / 2' and 'm * n * (m + n + 1) / 12', respectively.


sum(rank(c(I,II))[1:10])
#[1] 130

#From the description provided by the R help page and the result from lecture last time (pg 406)
# we see that we need to subtract 10*11/2 from 130 to get the value of the test statistic.
#We have to use a little caution in computing the two sided p-value

?pwilcox

#lower.tail: logical; if TRUE (default), probabilities are P[X <= x],
#          otherwise, P[X > x].


# The help page tells us that to compute the probability that our test statistic is greater than or equal to
#75, we must compute 

1-pwilcox(74,10,10)

#Then to get the 2 sided p-value, we need

2*(1-pwilcox(74,10,10)) 

#[1] 0.06301284


#Again, the built in function is very convenient once we know what it is doing.

wilcox.test(I,II)

#Wilcoxon rank sum test

#data:  I and II 
#W = 75, p-value = 0.06301
#alternative hypothesis: true mu is not equal to 0 

# The null hypothesis is rejected at the 6% level but not at the 7% level.

#c)
# We might suspect that lifetimes are distributed exponentially ( a skewed distribution)
# and our sample size is small. The non-parametric test is more appropriate in this case.


#d)
# The sum of the ranks of the type I bearings in the combined data is 

sum(rank(c(I,II))[1:10])
#[1] 130

#To get an estimate of the probability that I lasts longer than II we compute

(1/100)*(sum(rank(c(I,II))[1:10]) - 10*11/2)
#[1] 0.75

#e)
#In the above computation, replace I and II by resamples with replacement



f<-function(x){ (1/100)*(sum(rank(c(sample(I,replace=T),sample(II,replace=T)))[1:10]) - 10*11/2)

 }

d<-sapply(1:10000,f)

hist(d)

sd(d)
#[1] 0.1158202


#f)

 2*.75-d[order(d)[500]]
#[1] 0.95
 2*.75 - d[order(d)[9500]]
#[1] 0.57

#so an approximate 90% confidence interval is

c(.057,.095)