lecture 2

For most of what follows I assume that the data frame "hospitals" is attached and that 
functions from rnotes1.txt are available.



 exercise 14 chapter 7

length(discharges[discharges<1000])/393

  phat<-function(x){sample(discharges,25)->s;length(s[s<1000])/25}

  hist(sapply(1:500,phat),prob=T)

  curve(dnorm(x,mean=.654,sd=sqrt(.654*.346/25)),xlim=c(0,1),add=T)
-----------------------------------------------------------------------------------
exercise 15 chapter 7

f<-function(x){2*(1-pnorm((200/589.7)*sqrt(x)*sqrt(392/(393-x))))}

plot(20:100,sapply(20:100,f))

f<-function(x,c){qnorm(1-(c/2))*(589.7/sqrt(x))*(sqrt((393-x)/392))}
f(c(20,40,80),.1)
f(c(20,40,80),.5)
-----------------------------------------------------------------------------------

Ratio Estimates

Note the strong relationship between number of beds and number of discharges:

plot(beds,discharges)


Compute one ratio estimate:

s<-sample(1:393,64)

mean(hospitals$beds)* mean(hospitals$discharges[s])/mean(hospitals$beds[s])


Write a function that does the same thing:

k<-function(x){
 s<-sample(1:393,64)
  mean(hospitals$beds)* mean(hospitals$discharges[s])/mean(hospitals$beds[s])


Generalize the previous function:


h<-function(m,n){
 
 k<-function(x){
 s<-sample(1:393,n)
  mean(hospitals$beds)* mean(hospitals$discharges[s])/mean(hospitals$beds[s])
 }
 
 
 
 sapply(1:m,k)
 }

Compare histograms for the two methods of estimating the population mean
(simple random sampling vs. ratio estimate, note that you need the function "g"
defined in lecture1.txt)

op<-par(no.readonly=TRUE)

par(mfcol=c(2,1),pin=c(1.8,.9))


hist(g(500,64,hospitals$discharges),xlim=c(500,1100))

w<- h(500,64)

hist(w,xlim=c(500,1100))

par(op)

Compare the standard deviations:

sd(w)
sd(g(500,64,hospitals$discharges))
------------------------------------------------------------------------------------
Stratified Sampling

how to stratify:

o<-order(beds)

mu1<-mean(discharges[o][1:98])

sig1<-sd(discharges[o][1:98])

mu2<-mean(discharges[o][99:196])

sig2<-sd(discharges[o][99:196])

mu3<-mean(discharges[o][197:294])

sig3<-sd(discharges[o][197:294])

mu4<-mean(discharges[o][295:393])

sig4<-sd(discharges[o][295:393])

computing a variance :

v1<-c(.249^2,.249^2,.249^2,(.251^2))


v2<-c(sig1^2,sig2^2,sig3^2,sig4^2)

4*sum(v1*v2)

Comparing proportional stratification with simple random sampling in a histogram:

f1<-function(x){o<-order(hospitals$beds)
                
                k<-function(l){mean(sample(hospitals$discharges[o][l],10))}

   .249*(k(1:98) + k(99:196) + k(197:294)) + .251*k(295:393)

}

op<-par(no.readonly=TRUE)

par(mfcol=c(2,1),pin=c(1.8,.9))


hist(g(500,40,hospitals$discharges),xlim=c(500,1100))

hist(sapply(1:500,f1),xlim=c(500,1100))

par(op)


Comparing proportional, optimal and simple:

weights:

40*.249*sig1/(sum(sqrt(v1)*sqrt(v2)))
40*.249*sig2/(sum(sqrt(v1)*sqrt(v2)))
40*.249*sig3/(sum(sqrt(v1)*sqrt(v2)))
40*.251*sig4/(sum(sqrt(v1)*sqrt(v2)))



f2<-function(x){o<-order(hospitals$beds)
                
                k<-function(l,m){mean(sample(hospitals$discharges[o][l],m))}

   .249*(k(1:98,4) + k(99:196,8) + k(197:294,10)) + .251*k(295:393,18)

}

op<-par(no.readonly=TRUE)

par(mfcol=c(3,1),pin=c(1.8,.9))


hist(g(500,40,hospitals$discharges),xlim=c(500,1100))

hist(sapply(1:500,f1),xlim=c(500,1100))

hist(sapply(1:500,f2),xlim=c(500,1100))


par(op)