Problem 9.15

f1<-function(x){exp(-x^2/2)/(exp(-x^2/2) + exp(-(x-1)^2/2))}

f2<-function(x){exp(-x^2/8)/(exp(-x^2/8) + exp(-(x-1)^2/8))}

op<-par(no.readonly=TRUE)

par(mfcol=c(2,1),pin=c(1.8,.9))

curve(1-pnorm(x),xlim= c(-3,3))
curve(f1(x),add=T)
abline(v=0,lty=2)
abline(v=3,lty=2)

curve(1-pnorm(x/2),xlim= c(-3,3))
curve(f2(x),add=T)
abline(v=0,lty=2)
abline(v=3,lty=2)

par(op)

#Problem 45


#using the statistic developed in problem 41 (as demonstrated in class).


fr<-c(7,45,181,478,829,1112,1343,1033,670,286,104,24,3)

phat<- sum(fr*(0:12))/sum(12*fr)

phat

sum(fr*((0:12)-12*phat)^2/(12*phat*(1-phat)))

1-pchisq(7122.813,df=6114)

(7122.813-6114)/sqrt(2*6114)
#[1] 9.1229

1-pnorm((7122.813-6114)/sqrt(2*6114))
#[1] 0


#The p-value is negligible.

#Or using the Pearson Chi squared statistic.

dbinom(0:12,12,phat)

#the expected values under the null hypothesis are:

e<-6115*dbinom(0:12,12,phat)
 e
#Pooling the appropriate cells to get at least 5 
#expected observations per cell:

e<-c(e[1] + e[2],e[3:11],e[12] + e[13])

#and pooling the observed frequencies ion the same way:

fr<-c(fr[1] + fr[2],fr[3:11],fr[12] + fr[13])

 fr
 #[1]   52  181  478  829 1112 1343 1033  670  286  104   27


#The chi square statistic is:

 sum((fr-e)^2/e)
#[1] 105.7913

 1-pchisq(105.79,df=9)
#[1] 0

#and the p-value is, again, negligible.


#To understand why the model fails, it may be useful to see where the large 
#contributions to the chi square value are coming from:

((fr-e)^2/e)
 #[1] 19.5414269 17.4636749 11.2735366  0.7461465 18.6486299  0.4311387
 #[7]  2.5119152  2.8013183  2.9311070 14.4371869 15.0052503

#In particular, the contributions from the "tails" (0,1,2) or (10,11,12) 
#boys are too large. The independence model in the hypothesis may be incorrect.
#Perhaps these families kept growing until they reached a desired number of 
#either girls or boys.
 
#-------------------------------------------------------------------------
#problem 9.52
potassium<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/POTASS.txt",header=T)

attach(potassium)
names(potassium)

hist(X39,breaks=50,prob=T) 
curve(dnorm(x,mean=mean(X39),sd=sd(X39)),add=T)

#Notice the presence of the outlying data point in the attached histogram.

#With the outlier deleted, we have (also attached):

hist(X39[X39<13.9],breaks=20,prob=T)
curve(dnorm(x,mean=mean(X39[X39<13.9]),sd=sd(X39[X39<13.9])),add=T)


#The outlier is evident in the probability plot as well (attached):

length(X39)
plot(qnorm((1/79)*(1:78)),X39[order(X39)])

#Here is the probability plot without the outlier:

plot(qnorm((1/78)*(1:77)),X39[X39<13.9][order(X39[X39<13.9])])

#Now we apply our correlation test:

cor(qnorm((1/78)*(1:77)),X39[X39<13.9][order(X39[X39<13.9])])


f<-function(x){l<-rnorm(78)
cor(qnorm((1/79)*(1:78)),l[order(l)])
}


d<-sapply(1:10000,f)
d[order(d)[50]]
d[order(d)[100]]
d[order(d)[250]]
d[order(d)[500]]
d[order(d)[1000]]
d[order(d)[2000]]
d[order(d)[2500]]
d[order(d)[5000]]
d[order(d)[7500]]
d[order(d)[9000]]
d[order(d)[9500]]
d[order(d)[9750]]
d[order(d)[9900]]
d[order(d)[9950]]

#With the outlier deleted, our test has a p-value of about .09.


 #With the outlier included:
cor(qnorm((1/79)*(1:78)),X39[order(X39)])

#We would clearly reject the null hypothesis on the basis of 
#this numerical value alone.


#We now do the same analysis or the other isotope:


hist(X41,prob=T)
curve(dnorm(x,mean=mean(X41),sd=sd(X41)),add=T)

#The right hand tail may contain an outlier:

hist(X41[X41<579],prob=T)
 curve(dnorm(x,mean=mean(X41[X41<579]),sd=sd(X41[X41<579])),add=T)


#Histograms and fitted densities with and without the possible
# outlier are attached.

#The probability plot is more revealing:

plot(qnorm((1/79)*(1:78),mean=mean(X41),sd=sd(X41)),X41[order(X41)])

 cor(qnorm((1/79)*(1:78),mean=mean(X41),sd=sd(X41)),X41[order(X41)])
#[1] 0.9835837
#5% level
 cor(qnorm((1/78)*(1:77)),X41[X41<579][order(X41[X41<579])])
#[1] 0.9961584
#75% level


#If we delete the single data point at 580, 
#the p-value  (for normality) changes from .05 to .75.

#Problem 54

octopods<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/OCTO.txt",header=T)


attach(octopods)

mu<-mean(log(dl))

sig<-sd(log(dl))

#The histogram with the fitted lognormal density is attached:

hist(dl,prob=T,breaks=20)
 curve(dnorm(log(x),mean=mu,sd=sig)*(1/x),add=T)

length(dl)
#[1] 94


#The built in normality test which is similar to the one we developed in class
#gives a p-value of 80% for the hypothesis that the log 
#of the dorsal lengths is normally distributed:

shapiro.test(log(dl))

#The probability plot is also attached:

plot(qnorm((1/95)*(1:94)),log(dl)[order(dl)])

#Now we apply the test for normality based on the correlation coefficient 
#of the probability plot:

cor(qnorm((1/95)*(1:94)),log(dl)[order(dl)])


f<-function(x){l<-rnorm(94)
cor(qnorm((1/95)*(1:94)),l[order(l)])
}


d<-sapply(1:10000,f)
d[order(d)[50]]
d[order(d)[100]]
d[order(d)[250]]
d[order(d)[500]]
d[order(d)[1000]]
d[order(d)[2000]]
d[order(d)[2500]]
d[order(d)[5000]]
d[order(d)[7500]]
d[order(d)[9000]]
d[order(d)[9500]]
d[order(d)[9750]]
d[order(d)[9900]]
d[order(d)[9950]]

#We see that the p-value is slightly less than .95. The lognormal model fits 
#quite well, and if anything, perhaps a little too well.