vitaminc<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/VITAMINC.txt",header=T,na.strings="*") #a) NSh0<-h0[Group==1] NSh0 NSh2<-h2[Group==1] Sh0<-h0[Group==2] Sh2<-h2[Group==2] par(mfrow=c(2,3)) hist(Sh0,xlim=c(0,2),prob=T) hist(Sh2,xlim=c(0,2.5),prob=T) hist(Sh2-Sh0,xlim=c(0,2),prob=T) hist(NSh0,xlim=c(0,2),ylim=c(0,2),prob=T,breaks=10) hist(NSh2,xlim=c(0,2.5),prob=T,breaks=10) hist(NSh2-NSh0,xlim=c(0,2),ylim=c(0,1.5),prob=T,breaks=10) par(mfrow=c(3,1)) boxplot(Sh0,NSh0) boxplot(Sh2,NSh2) boxplot(Sh2-Sh0,NSh2-NSh0) par(mfrow=c(1,1)) #The non-schizophrenic group appears to be higher in all three comparisons. #b) t.test(Sh0,NSh0) t.test(Sh2,NSh2) t.test(Sh2-Sh0,NSh2-NSh0) #c) wilcox.test(Sh0,NSh0) wilcox.test(Sh2,NSh2) wilcox.test(Sh2-Sh0,NSh2-NSh0) #d) NST<-UrTot[Group==1] ST<-UrTot[Group==2] NSmpk<-URmpk[Group==1] Smpk<-URmpk[Group==2] summary(Smpk) summary(NSmpk) summary(NST) summary(ST) boxplot(Smpk,NSmpk) boxplot(ST,NST) par(mfrow=c(2,2)) hist(Smpk) hist(ST) hist(NSmpk) hist(NST) #The data do not look normally distributed. The boxplots and histograms are skewed. #It appears that the non-schizophrenic group has a higher excretion rate in both comparisons. #e) t.test(Smpk,NSmpk) t.test(ST,NST) #The sample sizes are small and the distributions are skewed. The non-parametric methods are more appropriate #here even though the t.test may be somewhat robust against the departure from normality. #Normality should certainly not be assumed. #f) wilcox.test(Smpk,NSmpk) wilcox.test(ST,NST) #Neither t.test woould have rejected the null hypothesis that the mean values were the same at the 5% level. #The wilcoxon test rejects both null hypotheses at the 2% level and backs up the graphical observation #that the Schizophrenic group has a lower average rate of urinary excretion of vitamin C. #g) SP<-Plasma[Group==2&!is.na(Plasma)] SU<-Urine[Group==2&!is.na(Urine)] CP<-Plasma[Group==1] CU<-Urine[Group==1] par(mfrow=c(2,1)) hist(SP) hist(CP) hist(SU,xlim=c(0,300)) hist(CU) boxplot(SP,CP) boxplot(SU,CU) shapiro.test(SP) shapiro.test(SU) shapiro.test(CP) shapiro.test(CU) #From the graphics and the Shapiro test, it seems safe to use the t.test. i.e. the data do not depart too # far from normality.Note: This type of use of the Shapiro test for small samples should be made in #conjunction with graphical evidence. The power of the test may be low. #It seems from the graphics that the Plasma concentrations are similar but the Urinary concentrations are #significantly different. There is a noticeable difference in the variability of the quantities between the #groups. #h) t.test(SP,CP) t.test(SU,CU) #i) wilcox.test(SP,CP) wilcox.test(SU,CU) #The tests reach the same conclusions. It seems that the ranks come out perfectly for the Plasma #concentrations. The schizophrenic group has significantly lower concentration of vitamin C in urine. sum(rank(c(SP,CP))[1:15]) #[1] 232 sum(rank(c(SP,CP))[16:30]) #[1] 233 #I don't know enough biology to know what this all means. Maybe schizophrenics sweat more on the average? #IRON pg. 428 iron<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/IRON.txt",header=T,na.strings="*") iron #In this example we compare the 1.2 millimolar concentrations of the (3+) form U2, with the same concentration of #the (2+) form V2. attach(iron) op<-par(no.readonly=TRUE) par(mfcol=c(2,2),pin=c(2,2)) qqnorm(U2) qqnorm(V2) hist(U2) hist(V2) par(op) #Both distributions appear to be skewed and may contain outliers. boxplot(U2,V2) t.test(U2,V2) #log transformation boxplot(log(U2),log(V2)) op<-par(no.readonly=TRUE) par(mfcol=c(2,2),pin=c(2,2)) qqnorm(log(U2)) qqnorm(log(V2)) hist(log(U2)) hist(log(V2)) par(op) t.test(log(U2),log(V2)) #Welch Two Sample t-test #data: log(U2) and log(V2) #t = -0.9173, df = 33.373, p-value = 0.3656 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: #-0.6074408 0.2298013 #sample estimates: #mean of x mean of y #1.901225 2.090045 # Assuming the distibutions of U2 and V2 are the same, (-.61,.23) is 95 percent C.I. for #log(mu_U2) -log(mu_V2) #therefore: c(exp(-.61),exp(.23)) #is an approximate 95% C.I. for the ratio (mu_U2/mu_V2) mean(U2)/mean(V2) #The nonparametric method (see page 442) wilcox.test(U2,V2,conf.int=T) # Wilcoxon rank sum test #data: U2 and V2 #W = 133, p-value = 0.3717 #alternative hypothesis: true mu is not equal to 0 #95 percent confidence interval: # -3.67 1.64 #sample estimates: #difference in location -1.115 #------------------------------------------------------------------------------------------ # MERCURY mercury<-read.csv("C:/Documents and Settings/Mike/Desktop/Math 152/MERCURY.txt",header=T) mercury<-read.csv("http://math.cmc.edu/moneill/Math152/Handouts/MERCURY.txt",header=T) mercury<-read.table("fishmercury.txt",header=T) #Note: One of the data points may have a misplaced decimal mercury attach(mercury) P<-permanganate SR<-reduction Diff<-P-SR Diff #[1] 0.07 0.07 0.00 -0.04 0.10 -0.05 0.11 0.35 0.36 0.19 -0.05 -0.01 #[13] 0.02 -0.01 -0.02 0.01 0.08 -0.02 0.03 -0.13 0.02 -0.04 0.01 0.04 #[25] -0.08 srank<-sign(Diff[Diff!=0])*rank(abs(Diff[Diff!=0])) srank #[1] 16.0 15.0 -10.5 19.0 -13.5 20.0 23.0 24.0 22.0 -13.5 -2.5 5.0 #[13] -2.5 -7.0 2.5 17.5 -7.0 9.0 -21.0 7.0 -12.0 2.5 10.5 -17.5 #I don't know why R won't average ALL the tied values, but it should not change the statistic significantly. sum(srank[srank>0]) #[1] 193 sum(srank[srank<0]) #[1] -107 wilcox.test(SR,P,paired=T,correct=F) # Wilcoxon signed rank test #data: SR and P #V = 107, p-value = 0.2188 #alternative hypothesis: true mu is not equal to 0 #Warning messages: #1: cannot compute exact p-value with ties in: wilcox.test.default(SR, P, paired = T, correct = F) #2: cannot compute exact p-value with zeroes in: wilcox.test.default(SR, P, paired = T, correct = F) #What about the t-test? op<-par(no.readonly=TRUE) par(mfcol=c(2,2),pin=c(2,2)) qqnorm(SR) qqnorm(P) hist(SR) hist(P) par(op) shapiro.test(SR) shapiro.test(P) #The P data has a heavy right tail but the sample size is large enough that we may still expect the #t.test to be useful. (See lecture notes for some general rules of thumb). t.test(SR,P, paired=T) # Paired t-test #data: SR and P #t = -1.7448, df = 24, p-value = 0.09382 #alternative hypothesis: true difference in means is not equal to 0 #95 percent confidence interval: # -0.088189837 0.007389837 #sample estimates: #mean of the differences # -0.0404 # We may not conclude that the methods are systematically different from either of the tests. But the heavy tailed #distribution of P may be more than just an aberration. plot(P,Diff) #------------------------------------------------------------------------------------ #Problem 22 chapter 11 calcium<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/CALCIUM.txt",header=T,na.strings="*") attach(calcium) plot(Oxalate[order(Oxalate)],Flame[order(Flame)]) id<-function(x){x} curve(id,add=T) op<-par(no.readonly=TRUE) par(mfcol=c(2,1),pin=c(2,2)) hist(Oxalate) hist(Flame) hist(log(Oxalate)) hist(log(Flame)) par(op) boxplot(Oxalate,Flame) summary(Oxalate) summary(Flame) plot(1:118,Oxalate-Flame) barplot(Oxalate-Flame) t.test(Oxalate,Flame,paired=T) t.test(log(Oxalate),log(Flame),paired=T) wilcox.test(Oxalate,Flame,conf.int=T,paired=T) #There is a systematic difference in the two methods with the Oxalate method tending to produce values #which are larger on the average by about .035 #-------------------------------------------------------------------------------------------- #Problem 29 chapter 11 and the haiku data from the Amabile study. #haiku<-read.csv("U:/Math 152/haiku.txt",header=T) #haiku<-read.csv("C:/Documents and Settings/Mike/Desktop/Math 152/haiku.txt",header=T) haiku<-read.csv("http://math.cmc.edu/moneill/Math152/Handouts/haiku.txt",header=T) attach(haiku) names(haiku) score.int<-SCORE[TREATMENT=="INTRINSIC"] score.ext<-SCORE[TREATMENT=="EXTRINSIC"] boxplot(SCORE~TREATMENT) t.test(score.int,score.ext,var.equal=T) wilcox.test(score.int,score.ext,conf.int=T) # A permutation test mean(score.int) - mean(score.ext) f<-function(x){l<-sample(1:47,47) mean(SCORE[l][1:24])-mean(SCORE[l][25:47])} d<-sapply(1:10000,f) hist(d) d[order(d)][9950] d[order(d)][50] #------------------------------------------------------------------------------------------------- #Problem 35 chapter 11 ratozone<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/RATOZONE.txt",header=T,na.strings="*") names(ratozone) attach(ratozone) Oz<-Oz[!is.na(Oz)] op<-par(no.readonly=TRUE) par(mfcol=c(2,1),pin=c(2.5,2.5)) hist(Oz) hist(C) par(op) boxplot(Oz,C) qqnorm(Oz) qqnorm(C) shapiro.test(C[order(C)][2:23]) shapiro.test(Oz) wilcox.test(Oz,C,conf.int=T) t.test(Oz,C) qqplot(C,Oz) curve(id,add=T) # There is little doubt of a treatment effect. Ozone exposure appears to cause a decrease in weight gain #but strangely enough seems to increase weight gain for the subjects with the largest gains. #The treatment effect certainly does not seem to be additive, but see the discussion in Doksum and Sievers #(available on my webpage). #------------------------------------------------------------------------------------------- #Problem 47 chapter 11 france<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/FRANCE.txt",header=T,na.strings="*") names(france) detach(telephon) france Control seedtarg<-Target[Group==1] unseedtarg<-Target[Group==2] seedcon<-Control[Group==1] unseedcon<-Control[Group==2] op<-par(no.readonly=TRUE) par(mfcol=c(2,2),pin=c(2,2)) hist(seedtarg) hist(unseedtarg) hist(seedcon) hist(unseedcon) par(op) t.test(seedtarg,unseedtarg) wilcox.test(seedtarg,unseedtarg) op<-par(no.readonly=TRUE) par(mfcol=c(2,1),pin=c(2,2)) hist(seedtarg-seedcon,xlim=c(-15,50)) hist(unseedtarg-unseedcon,xlim=c(-15,50)) par(op) sd(seedtarg) sd(unseedtarg) sd(seedtarg-seedcon) sd(unseedtarg-unseedcon) t.test(seedtarg-seedcon,unseedtarg-unseedcon) wilcox.test(seedtarg-seedcon,unseedtarg-unseedcon,conf.int=T) #The seeding seems to have had no observable effect. The non-parametric method is more appropriate here # because of the skewed distributions of the rainfall. The investigators tried to alleviate the skewness # by testing the squareroots of the data. Histograms show that this may have been effective. #remark: #Whether the tests tell us anything or not, the other possible combinations of two sample tests #in this problem present some examples of wide variation between the results of the parametric and #non-parametric methods. The reason for the large difference is the skewness of the distributions. # In most cases one should prefer non-parametric tests in the presence of moderately large but skewed datasets. #For some interesting commentary on cloud seeding see e.g. #http://rams.atmos.colostate.edu/gkss.html #http://www.nawcinc.com/wmfaq.html #---------------------------------------------------------------------------------------------------- #Problem 54 Chapter 11 steel<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/STEEL.txt",header=T,na.strings="*") attach(steel) hist(Ox) mean(Ox) #Is this data normally distributed? #histogram and density fit hist(Ox,prob=T) curve(dnorm(x,mean=mean(Ox),sd=sd(Ox)),add=T) #normal probability plot qqnorm(Ox) #and by bare hands length(Ox) plot(qnorm((1/41)*(1:40)),Ox[order(Ox)]) #Checking the correlation coefficient for this plot cor(qnorm((1/41)*(1:40)),Ox[order(Ox)]) #From Filliben, this is between the 75th and 90th percentile. # The similar built in test named for Shapiro shapiro.test(Ox) #Hanging rootogram and chigram hist(Ox) phat<- pnorm((3 + .5*(1:9) -mean(Ox))/sd(Ox)) - pnorm((3 + .5*(0:8) -mean(Ox))/sd(Ox)) nhat<-length(Ox)*phat n<-function(j){length(Ox[3 + (j-1)*.5