#Asbestos fibers (Example A section 8.4 and section 9.6) o<-c(31,29,19,18,31,28,34,27,34,30,16,18,26,27,27,18,24,22,28,24,21,17,24) sum(o)/23 #[1] 24.91304 #Poisson dispersion test sum((o-24.91304)^2/24.91304) [1] 26.56545 #likelihood ratio test 2*sum(o*log(o/24.91304) ) #[1] 27.10593 1-pchisq(26.56545,df=22) #[1] 0.2281974 #---------------------------------------------------- #Bacterial clumps (example B sections 9.5 and 9.6) #first with the Pearson chi square test nps<-c(0:10,19) fr<-c(56,104,80,62,42,27,9,9,5,3,2,1) lambdahat<-sum(nps*fr)/sum(fr) o<-c(fr[1:7],9+5+3+2+1) e<-c(exp(-lambdahat)*lambdahat^(0:6)/factorial(0:6)) e<-c(e,1-sum(e))*sum(o) e #[1] 34.864341 85.068991 103.784169 84.411124 51.490786 25.127503 10.218518 #[8] 5.034568 (o-e)^2/e #[1] 12.8129800 4.2128524 5.4506068 5.9501457 1.7493424 0.1395381 0.1453035 #[8] 44.4852787 sum((o-e)^2/e) #[1] 74.94605 1-pchisq(74.94605,df=6) #[1] 3.941292e-14 #Now with the Poisson dispersion test #There are 400 observations and the sample variance is: (1/400)*sum(nps^2*fr) - lambdahat^2 #[1] 4.5914 #The test statistic is: 400*(4.5914)/lambdahat #[1] 752.6885 #and this is the value of a random variable with approximate distribution which is chi square with 399 #degrees of freedom 1-pchisq(752.6885,399) #[1] 0 # The computer dismisses the possibility, but we further note that a chi square with 399 degrees of freedom #is approximately normally distributed with mean 399 and variance 798 1-pnorm((752.7-399)/sqrt(798)) #[1] 0 #See the lecture notes from 3/20 for an estimate. # The p-value is less than (1/sqrt(2 pi))*(1/12.5)*exp(-12.5^2/2) *[1] 3.756156e-36 #------------------------------------------------------------------ #serum potassium level data spl<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/SPL.txt",header=T) attach(spl) names(spl) rep(IM,F) hist(rep(IM,F)) hist(rep(IM,F),breaks=30) #Figure 9.3 a) Histogram barplot(F) ser.lev<-rep(IM,F) muhat<-mean(ser.lev) sighat<-sqrt(mean(ser.lev^2)-mean(ser.lev)^2) phat<- pnorm((3.15 + .1*(1:22) -muhat)/sighat) - pnorm((3.15 + .1*(0:21) -muhat)/sighat) nhat<-length(ser.lev)*phat op<-par(no.readonly=TRUE) par(mfcol=c(1,3),pin=c(2,.9)) #Figure 9.3 b) Hanging histogram barplot(F-nhat) #Figure 9.3 c) Hanging rootogram barplot(sqrt(F) -sqrt(nhat),ylim=c(-3,3)) #Figure 9.3 d) Hanging chi-gram barplot((F-nhat)/sqrt(nhat)) par(op) #------------------------------------------- #figure 9.4 uniform-uniform probability plot l<-runif(100) plot((1/101)*(1:100),l[order(l)]) abline(0,1,add=T) #----------------------------------------------- #figure 9.4 uniform-triangular probability plot l1<-runif(100) l2<-runif(100) l3<-(l1+l2)/2 plot((1/101)*(1:100),l3[order(l3)]) abline(0,1,add=T) #how far from linear? cor((1/101)*(1:100),l3[order(l3)]) f<-function(x){l<-runif(100) cor((1/101)*(1:100),l[order(l)]) } d<-sapply(1:1000,f) sd(d) #[1] 0.00253484 mean(d) #[1] 0.996038 hist(d) d<-sapply(1:10000,f) hist(d) #------------------------------------------------ #Michelson data(figure 9.6) light<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/LIGHTVEL.txt",header=T) attach(light) names(light) mean(V) sqrt(mean(V^2)-mean(V)^2) plot(qnorm((1/101)*(1:100),mean=852.4,sd=78.6145),V[order(V)]) cor(qnorm((1/101)*(1:100),mean=852.4,sd=78.6145),V[order(V)]) cor(qnorm((1/101)*(1:100)),V[order(V)]) plot(qnorm((1/101)*(1:100)),V[order(V)]) #The following is a simulation which reproduces the n=100 row of the table in the paper by Filliben. #From it we see that the Michelson data produce a correlation value #which is between the 20th and 25th percentiles. f<-function(x){l<-rnorm(100) cor(qnorm((1/101)*(1:100)),l[order(l)]) } d<-sapply(1:10000,f) d[order(d)[50]] d[order(d)[100]] d[order(d)[250]] d[order(d)[500]] d[order(d)[1000]] d[order(d)[2000]] d[order(d)[2500]] d[order(d)[5000]] d[order(d)[7500]] d[order(d)[9000]] d[order(d)[9500]] d[order(d)[9750]] d[order(d)[9900]] d[order(d)[9950]] #---------------------------------------------------------- #Normal probability plot of a double exponential(figure 9.7) de<-(2*rbinom(500,1,.5)-1)*(-log(runif(500))) hist(de) plot(qnorm((1/501)*(1:500),mean=mean(de),sd=sd(de)),de[order(de)]) f<-function(x) x curve(f,add=T) n<-rnorm(500,mean=mean(de),sd=sd(de)) plot(qnorm((1/501)*(1:500),mean=mean(de),sd=sd(de)),n[order(n)]) curve(f,add=T) #--------------------------------------------------------------- #Normal probability plot of a gamma distribution(figure 9.8) gam<-rgamma(500,shape=5,scale=1) plot(qnorm((1/501)*(1:500),mean=mean(gam),sd=sd(gam)),gam[order(gam)]) #figure 9.6 plot(qnorm((1/501)*(1:500)),gam[order(gam)]) #------------------------------------------------------------------ #Gamma probability plot of rainfall distribution(figure 9.9) illrain<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/ILLRAIN.txt",header=F,na.strings="*") attach(illrain) names(illrain) rain<-c(V1,V2,V3,V4,V5) rain<-rain[!is.na(rain)] length(rain) plot(qgamma((1/228)*(1:227),shape=.471,scale=1),rain[order(rain)]) #---------------------------------------------------------------- #Normal probability plot of serum potassium data (figure 9.10) spl<-read.table("http://math.cmc.edu/moneill/Math152/Handouts/SPL.txt",header=T) attach(spl) names(spl) F N<-function(j) {sum(F[1:j])} length(F) sapply(1:length(F),N) N(22) plot(qnorm(sapply(1:length(F),N)/153),3.15 + .1*(1:22))