Problem 9.15 f1<-function(x){exp(-x^2/2)/(exp(-x^2/2) + exp(-(x-1)^2/2))} f2<-function(x){exp(-x^2/8)/(exp(-x^2/8) + exp(-(x-1)^2/8))} op<-par(no.readonly=TRUE) par(mfcol=c(2,1),pin=c(1.8,.9)) curve(1-pnorm(x),xlim= c(-3,3)) curve(f1(x),add=T) abline(v=0,lty=2) abline(v=3,lty=2) curve(1-pnorm(x/2),xlim= c(-3,3)) curve(f2(x),add=T) abline(v=0,lty=2) abline(v=3,lty=2) par(op) #Problem 45 #using the statistic developed in problem 41 (as demonstrated in class). fr<-c(7,45,181,478,829,1112,1343,1033,670,286,104,24,3) phat<- sum(fr*(0:12))/sum(12*fr) phat sum(fr*((0:12)-12*phat)^2/(12*phat*(1-phat))) 1-pchisq(7122.813,df=6114) (7122.813-6114)/sqrt(2*6114) #[1] 9.1229 1-pnorm((7122.813-6114)/sqrt(2*6114)) #[1] 0 #The p-value is negligible. #Or using the Pearson Chi squared statistic. dbinom(0:12,12,phat) #the expected values under the null hypothesis are: e<-6115*dbinom(0:12,12,phat) e #Pooling the appropriate cells to get at least 5 #expected observations per cell: e<-c(e[1] + e[2],e[3:11],e[12] + e[13]) #and pooling the observed frequencies ion the same way: fr<-c(fr[1] + fr[2],fr[3:11],fr[12] + fr[13]) fr #[1] 52 181 478 829 1112 1343 1033 670 286 104 27 #The chi square statistic is: sum((fr-e)^2/e) #[1] 105.7913 1-pchisq(105.79,df=9) #[1] 0 #and the p-value is, again, negligible. #To understand why the model fails, it may be useful to see where the large #contributions to the chi square value are coming from: ((fr-e)^2/e) #[1] 19.5414269 17.4636749 11.2735366 0.7461465 18.6486299 0.4311387 #[7] 2.5119152 2.8013183 2.9311070 14.4371869 15.0052503 #In particular, the contributions from the "tails" (0,1,2) or (10,11,12) #boys are too large. The independence model in the hypothesis may be incorrect. #Perhaps these families kept growing until they reached a desired number of #either girls or boys. #------------------------------------------------------------------------- #problem 9.52 potassium<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/POTASS.txt",header=T) attach(potassium) names(potassium) hist(X39,breaks=50,prob=T) curve(dnorm(x,mean=mean(X39),sd=sd(X39)),add=T) #Notice the presence of the outlying data point in the attached histogram. #With the outlier deleted, we have (also attached): hist(X39[X39<13.9],breaks=20,prob=T) curve(dnorm(x,mean=mean(X39[X39<13.9]),sd=sd(X39[X39<13.9])),add=T) #The outlier is evident in the probability plot as well (attached): length(X39) plot(qnorm((1/79)*(1:78)),X39[order(X39)]) #Here is the probability plot without the outlier: plot(qnorm((1/78)*(1:77)),X39[X39<13.9][order(X39[X39<13.9])]) #Now we apply our correlation test: cor(qnorm((1/78)*(1:77)),X39[X39<13.9][order(X39[X39<13.9])]) f<-function(x){l<-rnorm(78) cor(qnorm((1/79)*(1:78)),l[order(l)]) } d<-sapply(1:10000,f) d[order(d)[50]] d[order(d)[100]] d[order(d)[250]] d[order(d)[500]] d[order(d)[1000]] d[order(d)[2000]] d[order(d)[2500]] d[order(d)[5000]] d[order(d)[7500]] d[order(d)[9000]] d[order(d)[9500]] d[order(d)[9750]] d[order(d)[9900]] d[order(d)[9950]] #With the outlier deleted, our test has a p-value of about .09. #With the outlier included: cor(qnorm((1/79)*(1:78)),X39[order(X39)]) #We would clearly reject the null hypothesis on the basis of #this numerical value alone. #We now do the same analysis or the other isotope: hist(X41,prob=T) curve(dnorm(x,mean=mean(X41),sd=sd(X41)),add=T) #The right hand tail may contain an outlier: hist(X41[X41<579],prob=T) curve(dnorm(x,mean=mean(X41[X41<579]),sd=sd(X41[X41<579])),add=T) #Histograms and fitted densities with and without the possible # outlier are attached. #The probability plot is more revealing: plot(qnorm((1/79)*(1:78),mean=mean(X41),sd=sd(X41)),X41[order(X41)]) cor(qnorm((1/79)*(1:78),mean=mean(X41),sd=sd(X41)),X41[order(X41)]) #[1] 0.9835837 #5% level cor(qnorm((1/78)*(1:77)),X41[X41<579][order(X41[X41<579])]) #[1] 0.9961584 #75% level #If we delete the single data point at 580, #the p-value (for normality) changes from .05 to .75. #Problem 54 octopods<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/OCTO.txt",header=T) attach(octopods) mu<-mean(log(dl)) sig<-sd(log(dl)) #The histogram with the fitted lognormal density is attached: hist(dl,prob=T,breaks=20) curve(dnorm(log(x),mean=mu,sd=sig)*(1/x),add=T) length(dl) #[1] 94 #The built in normality test which is similar to the one we developed in class #gives a p-value of 80% for the hypothesis that the log #of the dorsal lengths is normally distributed: shapiro.test(log(dl)) #The probability plot is also attached: plot(qnorm((1/95)*(1:94)),log(dl)[order(dl)]) #Now we apply the test for normality based on the correlation coefficient #of the probability plot: cor(qnorm((1/95)*(1:94)),log(dl)[order(dl)]) f<-function(x){l<-rnorm(94) cor(qnorm((1/95)*(1:94)),l[order(l)]) } d<-sapply(1:10000,f) d[order(d)[50]] d[order(d)[100]] d[order(d)[250]] d[order(d)[500]] d[order(d)[1000]] d[order(d)[2000]] d[order(d)[2500]] d[order(d)[5000]] d[order(d)[7500]] d[order(d)[9000]] d[order(d)[9500]] d[order(d)[9750]] d[order(d)[9900]] d[order(d)[9950]] #We see that the p-value is slightly less than .95. The lognormal model fits #quite well, and if anything, perhaps a little too well.