#Chapter 11 Problem 22 bearings<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/BEARINGS.txt",header=T) attach(bearings) #a) #Since I and II are the same size we have for the pooled sample variance psv<-var(I)/2 + var(II)/2 #and for the estimated standard error of the difference in sample means esedsm<-sqrt(psv)*sqrt(1/5) # The value of our test statistic is (mean(I) -mean(II))/esedsm #[1] 2.072309 #Notice that the sample variances are different var(I) #[1] 23.22551 var(II) #[1] 12.97749 #Under the null hypothesis, the test statistic is approximately distributed as a t distribution with degrees #of freedom given by df<-(var(I)/10 + var(II)/10)^2/((var(I)/10)^2/9 +(var(II)/10)^2/9) df #[1] 16.66467 df<-17 # The two sided p-value of the test is therefore 2*(1-pt(2.072309,df=17)) #[1] 0.05375894 #The null hypothesis is rejected at the 6% level but not at the 5% level. #The built in tests give similar results but use a continuous family of distributions to resolve the #problem of 16.665... degrees of freedom t.test(I,II) # Welch Two Sample t-test #data: I and II #t = 2.0723, df = 16.665, p-value = 0.05408 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: # -0.07752643 7.96352643 #sample estimates: #mean of x mean of y # 10.693 6.750 #Finally, if we suppose that the two distributions have the same variance, we do not get much different #results t.test(I,II,var.equal=T) # Two Sample t-test #data: I and II #t = 2.0723, df = 18, p-value = 0.05288 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: # -0.05444249 7.94044249 #sample estimates: #mean of x mean of y # 10.693 6.750 b) ?dwilcox #Details: # This distribution is obtained as follows. Let 'x' and 'y' be two # random, independent samples of size 'm' and 'n'. Then the Wilcoxon # rank sum statistic is the number of all pairs '(x[i], y[j])' for # which 'y[j]' is not greater than 'x[i]'. This statistic takes # values between '0' and 'm * n', and its mean and variance are 'm * # n / 2' and 'm * n * (m + n + 1) / 12', respectively. sum(rank(c(I,II))[1:10]) #[1] 130 #From the description provided by the R help page and the result from lecture last time (pg 406) # we see that we need to subtract 10*11/2 from 130 to get the value of the test statistic. #We have to use a little caution in computing the two sided p-value ?pwilcox #lower.tail: logical; if TRUE (default), probabilities are P[X <= x], # otherwise, P[X > x]. # The help page tells us that to compute the probability that our test statistic is greater than or equal to #75, we must compute 1-pwilcox(74,10,10) #Then to get the 2 sided p-value, we need 2*(1-pwilcox(74,10,10)) #[1] 0.06301284 #Again, the built in function is very convenient once we know what it is doing. wilcox.test(I,II) #Wilcoxon rank sum test #data: I and II #W = 75, p-value = 0.06301 #alternative hypothesis: true mu is not equal to 0 # The null hypothesis is rejected at the 6% level but not at the 7% level. #c) # We might suspect that lifetimes are distributed exponentially ( a skewed distribution) # and our sample size is small. The non-parametric test is more appropriate in this case. #d) # The sum of the ranks of the type I bearings in the combined data is sum(rank(c(I,II))[1:10]) #[1] 130 #To get an estimate of the probability that I lasts longer than II we compute (1/100)*(sum(rank(c(I,II))[1:10]) - 10*11/2) #[1] 0.75 #e) #In the above computation, replace I and II by resamples with replacement f<-function(x){ (1/100)*(sum(rank(c(sample(I,replace=T),sample(II,replace=T)))[1:10]) - 10*11/2) } d<-sapply(1:10000,f) hist(d) sd(d) #[1] 0.1158202 #f) 2*.75-d[order(d)[500]] #[1] 0.95 2*.75 - d[order(d)[9500]] #[1] 0.57 #so an approximate 90% confidence interval is c(.057,.095)