glauxbtl<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/GLAUXBTL.txt",header=T,na.strings="*")

#glauxbtl<-read.table("U:/Math 152/GLAUXBTL.txt",header=T,na.strings="*")

attach(glauxbtl)

glauxbtl

Glaux

G<-glauxbtl$Glaux[!is.na(glauxbtl$Glaux)]

Gcount<-Count[!is.na(glauxbtl$Glaux)]

#Poisson for Glaux

ob<-G
npq<-Gcount
lambda<-sum(ob*npq)/sum(ob)
lambda
#[1] 5.796


vhat<-(1/sum(ob))*sum(ob*(npq-(lambda))^2)
vhat
#[1] 6.418384

e<-(1/factorial(0:13))*((lambda)^(0:13))*exp(-lambda)
e<-c(e,1-sum(e))
sum(ob)*e

#[1]  1.519845  8.809019 25.528538 49.321136 71.466325 82.843764 80.027076
 #[8] 66.262419 48.007123 30.916587 17.919254  9.441818  4.560398  2.033236
#[15]  1.343461

#examining the last output, we see that we should pool the first two rows and also the last three rows.

e<-c(sum(ob)*(e[1] + e[2]),sum(ob)*e[3:12],sum(ob)*sum(e[13:15]))
ob<-c(ob[1] + ob[2],ob[3:12],sum(ob[13:15]))
sum((ob-e)^2/e)
#[1] 17.61617

1-pchisq(.Last.value,df=10)
#[1] 0.06179398

#We conclude that if the Poisson model is correct, then the data are a bit unusual.
#Only six percent of such experiments are expected to show worse agreement with the model.

#negative binomial for Glaux

ob<-G

mhat<-lambda

khat<-(mhat^2/(vhat-mhat))

#See page 305 in your text for the recursive formula used in the following two lines:

p<-(1+mhat/khat)^(-khat) 

for (j in 1:14) p<-c(p,((khat + j-1)/j)*(mhat/(mhat + khat))*p[length(p)])

#p is now defined as the vector of negative binomial probabilities
#Of course one can just write them out since there are only 15.


nbdm<-p*sum(ob)

nbdm<-c(nbdm[1] + nbdm[2],nbdm[3:13],nbdm[14]+nbdm[15])

ob<-c(ob[1] + ob[2],ob[3:13],ob[14] + ob[15])

sum((ob-nbdm)^2/nbdm)
#[1] 13.69613


1-pchisq(.Last.value,df=10)
#[1] 0.1873085

#If the negative binomial model is correct than about 19% of such experiments would show worse agreement with the model.
# The negative binomial is the more plausible model.

#Poisson for Potato

ob<-Potato
npq<-Count
lambda<-sum(ob*npq)/sum(ob)
lambda
#[1] 4.736545


vhat<-(1/sum(ob))*sum(ob*(npq-(lambda))^2)

vhat
#[1] 14.99526

#It is already clear from comparing the variance to the mean that the Poisson will not fit the data.

e<-(1/factorial(0:27))*((lambda)^(0:27))*exp(-lambda)
e<-c(e,1-sum(e))
sum(ob)*e
#[1] 2.020352e+01 9.569489e+01 2.266316e+02 3.578169e+02 4.237040e+02
 #[6] 4.013786e+02 3.168580e+02 2.144017e+02 1.269404e+02 6.680657e+01
#[11] 3.164323e+01 1.362542e+01 5.378117e+00 1.959515e+00 6.629522e-01
#[16] 2.093402e-01 6.197183e-02 1.726661e-02 4.543560e-03 1.132673e-03
#21] 2.682477e-04 6.050321e-05 1.302619e-05 2.682571e-06 5.294217e-07
#[26] 1.003052e-07 1.827308e-08 3.205602e-09 6.471623e-10


#By inspection alone we can tell that the data has vitually no chance to have come from a 
#Poisson distribution. A count of 28 has approximate prob. 2.8 *10^-13.
# If we pool cells as usual, we might put rows 19,20,21,22 together and rows 23 through 28 together to 
#get a chi square distribution with 21 -1-1=19 degrees of freedom. The contribution of the last cell to 
#the chi square would be approx. 3.4 *10^10

#Negative binomial for Potato


mhat<-lambda

khat<-(mhat^2/(vhat-mhat))

p<-(1+mhat/khat)^(-khat) 

for (j in 1:29) p<-c(p,((khat + j-1)/j)*(mhat/(mhat + khat))*p[length(p)])

nbdm<-p*sum(ob)

nbdm<-c(nbdm[1:19],nbdm[20]+nbdm[21],sum(nbdm[22:28]))

ob<-c(ob[1:19],ob[20] + ob[21],sum(ob[22:28]))

sum((ob-nbdm)^2/nbdm)
#[1] 21.05272


1-pchisq(.Last.value,df=19)
#[1] 0.3338953
#The negative binomial is a plausible model for this data.