lecture 2 For most of what follows I assume that the data frame "hospitals" is attached and that functions from rnotes1.txt are available. exercise 14 chapter 7 length(discharges[discharges<1000])/393 phat<-function(x){sample(discharges,25)->s;length(s[s<1000])/25} hist(sapply(1:500,phat),prob=T) curve(dnorm(x,mean=.654,sd=sqrt(.654*.346/25)),xlim=c(0,1),add=T) ----------------------------------------------------------------------------------- exercise 15 chapter 7 f<-function(x){2*(1-pnorm((200/589.7)*sqrt(x)*sqrt(392/(393-x))))} plot(20:100,sapply(20:100,f)) f<-function(x,c){qnorm(1-(c/2))*(589.7/sqrt(x))*(sqrt((393-x)/392))} f(c(20,40,80),.1) f(c(20,40,80),.5) ----------------------------------------------------------------------------------- Ratio Estimates Note the strong relationship between number of beds and number of discharges: plot(beds,discharges) Compute one ratio estimate: s<-sample(1:393,64) mean(hospitals$beds)* mean(hospitals$discharges[s])/mean(hospitals$beds[s]) Write a function that does the same thing: k<-function(x){ s<-sample(1:393,64) mean(hospitals$beds)* mean(hospitals$discharges[s])/mean(hospitals$beds[s]) Generalize the previous function: h<-function(m,n){ k<-function(x){ s<-sample(1:393,n) mean(hospitals$beds)* mean(hospitals$discharges[s])/mean(hospitals$beds[s]) } sapply(1:m,k) } Compare histograms for the two methods of estimating the population mean (simple random sampling vs. ratio estimate, note that you need the function "g" defined in lecture1.txt) op<-par(no.readonly=TRUE) par(mfcol=c(2,1),pin=c(1.8,.9)) hist(g(500,64,hospitals$discharges),xlim=c(500,1100)) w<- h(500,64) hist(w,xlim=c(500,1100)) par(op) Compare the standard deviations: sd(w) sd(g(500,64,hospitals$discharges)) ------------------------------------------------------------------------------------ Stratified Sampling how to stratify: o<-order(beds) mu1<-mean(discharges[o][1:98]) sig1<-sd(discharges[o][1:98]) mu2<-mean(discharges[o][99:196]) sig2<-sd(discharges[o][99:196]) mu3<-mean(discharges[o][197:294]) sig3<-sd(discharges[o][197:294]) mu4<-mean(discharges[o][295:393]) sig4<-sd(discharges[o][295:393]) computing a variance : v1<-c(.249^2,.249^2,.249^2,(.251^2)) v2<-c(sig1^2,sig2^2,sig3^2,sig4^2) 4*sum(v1*v2) Comparing proportional stratification with simple random sampling in a histogram: f1<-function(x){o<-order(hospitals$beds) k<-function(l){mean(sample(hospitals$discharges[o][l],10))} .249*(k(1:98) + k(99:196) + k(197:294)) + .251*k(295:393) } op<-par(no.readonly=TRUE) par(mfcol=c(2,1),pin=c(1.8,.9)) hist(g(500,40,hospitals$discharges),xlim=c(500,1100)) hist(sapply(1:500,f1),xlim=c(500,1100)) par(op) Comparing proportional, optimal and simple: weights: 40*.249*sig1/(sum(sqrt(v1)*sqrt(v2))) 40*.249*sig2/(sum(sqrt(v1)*sqrt(v2))) 40*.249*sig3/(sum(sqrt(v1)*sqrt(v2))) 40*.251*sig4/(sum(sqrt(v1)*sqrt(v2))) f2<-function(x){o<-order(hospitals$beds) k<-function(l,m){mean(sample(hospitals$discharges[o][l],m))} .249*(k(1:98,4) + k(99:196,8) + k(197:294,10)) + .251*k(295:393,18) } op<-par(no.readonly=TRUE) par(mfcol=c(3,1),pin=c(1.8,.9)) hist(g(500,40,hospitals$discharges),xlim=c(500,1100)) hist(sapply(1:500,f1),xlim=c(500,1100)) hist(sapply(1:500,f2),xlim=c(500,1100)) par(op)