#Asbestos fibers (Example A section 8.4 and section 9.6)
o<-c(31,29,19,18,31,28,34,27,34,30,16,18,26,27,27,18,24,22,28,24,21,17,24)


sum(o)/23
#[1] 24.91304

#Poisson dispersion test
sum((o-24.91304)^2/24.91304)
[1] 26.56545

#likelihood ratio test
2*sum(o*log(o/24.91304) )
#[1] 27.10593

1-pchisq(26.56545,df=22)
#[1] 0.2281974

#----------------------------------------------------

#Bacterial clumps (example B sections 9.5 and 9.6)

#first with the Pearson chi square test

nps<-c(0:10,19)
fr<-c(56,104,80,62,42,27,9,9,5,3,2,1)

lambdahat<-sum(nps*fr)/sum(fr)

o<-c(fr[1:7],9+5+3+2+1)
e<-c(exp(-lambdahat)*lambdahat^(0:6)/factorial(0:6))
e<-c(e,1-sum(e))*sum(o)
e
#[1]  34.864341  85.068991 103.784169  84.411124  51.490786  25.127503  10.218518
#[8]   5.034568

(o-e)^2/e
#[1] 12.8129800  4.2128524  5.4506068  5.9501457  1.7493424  0.1395381  0.1453035
#[8] 44.4852787

sum((o-e)^2/e)
#[1] 74.94605

 1-pchisq(74.94605,df=6)
#[1] 3.941292e-14


#Now with the Poisson dispersion test

#There are 400 observations and the sample variance is:

(1/400)*sum(nps^2*fr) - lambdahat^2
#[1] 4.5914

#The test statistic is:

400*(4.5914)/lambdahat
#[1] 752.6885
#and this is the value of a random variable with approximate distribution which is chi square with 399
#degrees of freedom

1-pchisq(752.6885,399)
#[1] 0

# The computer dismisses the possibility, but we  further note that a chi square with 399 degrees of freedom
#is approximately normally distributed with mean 399 and variance 798

1-pnorm((752.7-399)/sqrt(798))
#[1] 0
#See the lecture notes from 3/20 for an estimate.
# The p-value is less than

(1/sqrt(2 pi))*(1/12.5)*exp(-12.5^2/2)
*[1] 3.756156e-36
#------------------------------------------------------------------


#serum potassium level data

spl<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/SPL.txt",header=T)
attach(spl)
names(spl)
rep(IM,F)
hist(rep(IM,F))
hist(rep(IM,F),breaks=30)

#Figure 9.3 a) Histogram

barplot(F)


ser.lev<-rep(IM,F)

muhat<-mean(ser.lev)
sighat<-sqrt(mean(ser.lev^2)-mean(ser.lev)^2)

phat<- pnorm((3.15 + .1*(1:22) -muhat)/sighat) - pnorm((3.15 + .1*(0:21) -muhat)/sighat)
nhat<-length(ser.lev)*phat


op<-par(no.readonly=TRUE)

par(mfcol=c(1,3),pin=c(2,.9))


#Figure 9.3 b) Hanging histogram

barplot(F-nhat)

#Figure 9.3 c)  Hanging rootogram

barplot(sqrt(F) -sqrt(nhat),ylim=c(-3,3))

#Figure 9.3 d)  Hanging chi-gram

barplot((F-nhat)/sqrt(nhat))

par(op)

#-------------------------------------------

#figure 9.4 uniform-uniform probability plot

l<-runif(100)
plot((1/101)*(1:100),l[order(l)])
abline(0,1,add=T)

#-----------------------------------------------

#figure 9.4 uniform-triangular probability plot

l1<-runif(100)
l2<-runif(100)
l3<-(l1+l2)/2

plot((1/101)*(1:100),l3[order(l3)])
abline(0,1,add=T)


#how far from linear?

cor((1/101)*(1:100),l3[order(l3)])

f<-function(x){l<-runif(100)
 cor((1/101)*(1:100),l[order(l)])
 }

 d<-sapply(1:1000,f)
 sd(d)
#[1] 0.00253484

 mean(d)
#[1] 0.996038

 hist(d)
 d<-sapply(1:10000,f)
 hist(d)

#------------------------------------------------

#Michelson data(figure 9.6)

light<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/LIGHTVEL.txt",header=T)
attach(light)
names(light)
mean(V)
sqrt(mean(V^2)-mean(V)^2)
plot(qnorm((1/101)*(1:100),mean=852.4,sd=78.6145),V[order(V)])

cor(qnorm((1/101)*(1:100),mean=852.4,sd=78.6145),V[order(V)])
cor(qnorm((1/101)*(1:100)),V[order(V)])
plot(qnorm((1/101)*(1:100)),V[order(V)])


#The following is a simulation which reproduces the n=100 row of the table in the paper by Filliben.
#From it we see that the Michelson data produce a correlation value 
#which is between the 20th and 25th percentiles.

f<-function(x){l<-rnorm(100)
cor(qnorm((1/101)*(1:100)),l[order(l)])
}


d<-sapply(1:10000,f)
d[order(d)[50]]
d[order(d)[100]]
d[order(d)[250]]
d[order(d)[500]]
d[order(d)[1000]]
d[order(d)[2000]]
d[order(d)[2500]]
d[order(d)[5000]]
d[order(d)[7500]]
d[order(d)[9000]]
d[order(d)[9500]]
d[order(d)[9750]]
d[order(d)[9900]]
d[order(d)[9950]]


#----------------------------------------------------------

#Normal probability plot of a double exponential(figure 9.7)

de<-(2*rbinom(500,1,.5)-1)*(-log(runif(500)))
 hist(de)
 plot(qnorm((1/501)*(1:500),mean=mean(de),sd=sd(de)),de[order(de)])

f<-function(x) x
curve(f,add=T)

n<-rnorm(500,mean=mean(de),sd=sd(de))
 plot(qnorm((1/501)*(1:500),mean=mean(de),sd=sd(de)),n[order(n)])
 curve(f,add=T)

#---------------------------------------------------------------

#Normal probability plot of a gamma distribution(figure 9.8)

gam<-rgamma(500,shape=5,scale=1)
plot(qnorm((1/501)*(1:500),mean=mean(gam),sd=sd(gam)),gam[order(gam)])

#figure 9.6
plot(qnorm((1/501)*(1:500)),gam[order(gam)])

#------------------------------------------------------------------
#Gamma probability plot of rainfall distribution(figure 9.9)

illrain<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/ILLRAIN.txt",header=F,na.strings="*")



attach(illrain)
names(illrain)
rain<-c(V1,V2,V3,V4,V5)
rain<-rain[!is.na(rain)]
length(rain)
plot(qgamma((1/228)*(1:227),shape=.471,scale=1),rain[order(rain)])

#----------------------------------------------------------------
#Normal probability plot of serum potassium data (figure 9.10)


spl<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/SPL.txt",header=T)

attach(spl)
names(spl)
F
N<-function(j) {sum(F[1:j])}
length(F)
sapply(1:length(F),N)
N(22)


plot(qnorm(sapply(1:length(F),N)/153),3.15 + .1*(1:22))