#This file is still being edited 4/30 #Bismuth example (I.II) and problem 32 (II.I) (for class) #bismuth<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/BISMUTH.txt",header=T) bismuth<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/BISMUTH.txt",header=T) attach(bismuth) #example from the text #First we'll do some of the work by "bare hands". #This is a purely illustrative exercise. It is not the way the actual computation proceeds in general because #inversion of the design matrix (dm) is a famously difficult numerical problem. #See problems 6 and 7. #Create the matrix of 1's and x's and its transpose. X<-model.matrix(I.II~TEMP) X t(X) #Matrix multiplication is given by %*% t(X)%*%X dm<-t(X)%*%X #The solve function solves systems of linear equations. But when applied to a matrix with no other #arguments, it returns the inverse matrix. solve(dm) sqrt(solve(dm)) # (Intercept) TEMP #(Intercept) 0.6389184 NaN #TEMP NaN 0.0199233 #Warning message: #NaNs produced in: sqrt(solve(dm)) #The NaN's come from trying to take the square root of a negative number. #Compute the estimated standard error. We showed in lecture that the square of this quantity is an unbiased #estimate of the variance of the model errors. s<-sqrt(sum((I.II -X%*%solve(dm)%*%t(X)%*%I.II)^2)/21) #Now we can compute the standard error estimates for the coefficients as from Theorem B on pg. 538. .6389*s .0199233*s #Here are the estimated coefficients solve(dm)%*%t(X)%*%I.II #Here are the fitted pressures X%*%solve(dm)%*%t(X)%*%I.II #Now let's do the work with the built in function "lm". #Compare the results with those above. lm<-lm(I.II~TEMP) summary(lm) fitted(lm) #There is probably a built in confidence interval generator but here it is from scratch. c(-41.2835 -.0199233*s*qt(.975,df=21),-41.2835 +.0199233*s*qt(.975,df=21)) # The standardized residuals are given by the function rstandard. Note once again that the theoretical #description of the standardized residuals given in your text is correct, but it does not describe the #actual computation. Any efficient and accurate numerical algorithm is careful to avoid inverting the design #matrix t(X)%*%X. In practice it is t(R)%*%R which might be inverted, where R is the upper triangular factor from #the Cholesky or QR decomposition (see problems 6 and 7). Typically the whole inverse is not needed and # only a single or a few entries of the inverse are computed or estimated. SR<-rstandard(lm) cbind(I.II,TEMP,SR) plot(SR,TEMP) # As described in your text, the residual plot reveals problems with the data which call into question the #assumption about independence of the errors in the observations. #Without a better description of what happened in the laboratory, the estimates for the coefficients and the #confidence interval generated above can not be considered reliable. ############################################## #Problem 32 plot(II.I~TEMP) lm<-lm(II.I~TEMP) par(mfrow=c(2,1)) #In the following pair of plots, notice that the difference is basically a change of scale plot(resid(lm)~TEMP) plot(rstandard(lm)~TEMP) #Comparison with the table below raises he question of thoe order of the data collection. #If the data were collected in the order they are recorded, then just as in the previous example, #the techniques used in the laboratory must be examined. In particular, it appears as though systematic #errors may have been produced by recalibration of equipment between measurements at different temperatures. SR<-rstandard(lm) cbind(II.I,TEMP,SR) #Plotting the residuals in the order that they were (probably) recorded shows an upward trend between observations #3-11 and a downward trend in the last 8 observations. Assumptions about randmoness and independence of #observation errors are again in doubt. plot(1:23,rstandard(lm)) par(mfrow=c(1,1)) #---------------------------------------------- #Catheter example (for class) catheter<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/catheter.txt",header=T) names(catheter) rm(w) rm(h) attach(catheter) #Check the variables to see that they represent the correct data before continuing. # # The function "pairs" produces an array of plots of each pairing of variables. #The plots below reproduce figures 14.21,14.22 and 14.23 in the text. #The summary's pf the linear models follow the discussion in section 14.5 which should be read #in conjunction with this sample code. pairs(dpa~h+w) #Simple linear predictions by height and weight alone and plots of their standardized residuals. summary(lm(dpa~h)) summary(lm(dpa~w)) par(mfrow=c(2,1)) plot(rstandard(lm(dpa~h))~h) plot(rstandard(lm(dpa~w))~w) par(mfrow=c(1,1)) #Multiple regression using height and weight together. #Since the height and weight are so well correlated, there is not much extra predictive value. #Note the large variablity of the coefficients in the multiple regression estimates. #See the discussion on pages 547 and 548. summary(lm(dpa~h+w)) par(mfrow=c(2,1)) plot(rstandard(lm(dpa~h +w))~h) plot(rstandard(lm(dpa~h+w))~w) par(mfrow=c(1,1)) #------------------------------------------------- #Problem 33 #barium<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/barium.txt",header=T) barium<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/barium.txt",header=T) #The physics predicts a linear relationship between the log of the pressure and #the reciprocal of the temperature plot(1/Temp,log(Pr)) x=1/Temp lm<-lm(log(Pr)~x) abline(lm,lty=2,lwd=2) plot(resid(lm)~x) plot(1:32,resid(lm)) #The above residual plots give no cause for serious doubt about the #random nature of the observation errors. summary(lm) ?#Call: #lm(formula = log(Pr) ~ x) #Residuals: # Min 1Q Median 3Q Max #-0.158596 -0.087968 0.004495 0.061145 0.293036 #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 1.818e+01 1.419e-01 128.2 <2e-16 *** #x -2.126e+04 1.335e+02 -159.3 <2e-16 *** #--- #Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #Residual standard error: 0.1164 on 30 degrees of freedom #Multiple R-Squared: 0.9988, Adjusted R-squared: 0.9988 #F-statistic: 2.538e+04 on 1 and 30 DF, p-value: < 2.2e-16 #Intercept confidence interval c(18.18 -.1419*qt(.975,df=30),18.18 +.1419*qt(.975,df=30)) #[1] 17.8902 18.4698 #slope confidence interval c(-21260 - 133.5*qt(.975,df=30),-21260 + 133.5*qt(.975,df=30)) #[1] -21532.64 -20987.36 #---------------------------------------------------------------------------------- #Problem 34 (on last assignment) #sapphire<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/SAPPHIRE.txt",header=T) sapphire<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/SAPPHIRE.txt",header=T) lm<-lm(YM~Temp) summary(lm) #Call: #lm(formula = YM ~ Temp) #Residuals: Min 1Q Median 3Q Max #-16.731 -13.164 4.427 7.059 20.692 #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 628.62878 13.42758 46.82 <2e-16 *** #Temp -0.61369 0.01761 -34.85 <2e-16 *** #--- #Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #Residual standard error: 11.72 on 22 degrees of freedom #Multiple R-Squared: 0.9822, Adjusted R-squared: 0.9814 #F-statistic: 1214 on 1 and 22 DF, p-value: < 2.2e-16 plot(YM~Temp) abline(lm,lty=2,lwd=2) plot(resid(lm)~Temp) plot(rstandard(lm)~Temp) #------------------------------------------------------------------ #Problem 35 #nuclear<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/NUCLEAR.txt",header=T) nuclear<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/NUCLEAR.txt",header=T) plot(Pressure~Volume) lm<-lm(Pressure~Volume) abline(lm,lty=1,lwd=2) plot(rstandard(lm)~Volume) lm<-lm(Pressure~Volume + I(Volume^2)) plot(resid(lm)~Volume) #Neither model fits very well. Adding two more terms produces random looking residuals. #------------------------------------------------------------------- #Problem 36 #provring<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/PROVRING.txt",header=T) provring<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/PROVRING.txt",header=T) attach(provring) names(provring) plot(Deflection~Load) lm<-lm(Deflection~Load) abline(lm,lty=1,lwd=2) plot(rstandard(lm)~Load) lm<-lm(Deflection~Load+I(Load^2)) plot(rstandard(lm)~Load) #------------------------------------------------------------------------ #Problem 37 #------------------------------------------------------------------------ #Problem 38 v<-c(20.5,20.5,30.5,40.5,48.8,57.8) y<-c(15.4,13.3,33.9,73.1,113.0,142.6) lm<-lm(y~v) plot(y~v) abline(lm) plot(resid(lm)~v) lm<-lm(y~v +I(v^2)) summary(lm) Call: lm(formula = y ~ v + I(v^2)) Residuals: 1 2 3 4 5 6 2.5226 0.4226 -6.1225 -0.3490 7.0365 -3.5102 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -23.13270 21.94412 -1.054 0.369 v 1.11275 1.28836 0.864 0.451 I(v^2) 0.03141 0.01684 1.865 0.159 Residual standard error: 5.944 on 3 degrees of freedom Multiple R-Squared: 0.9927, Adjusted R-squared: 0.9878 F-statistic: 203.6 on 2 and 3 DF, p-value: 0.0006254 curve(-23.13270 + 1.11275*x + 0.03141*x^2,add=T) lm<-lm(sqrt(y)~0+v) summary(lm) curve(.20672^2 * x^2,add=T) plot(resid(lm)~v) #------------------------------------------------------------------------------ #Problem 39 #cysts<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/CYSTS.txt",header=T) cysts<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/CYSTS.txt",header=T) attach(cysts) cysts plot(T10~Diameter) lm<-lm(T10~Diameter,weights=c(2,3,11,17,8,5,2)) abline(lm) lm<-lm(T10~Diameter) abline(lm) lm<-lm(sqrt(T10)~Diameter,weights=c(2,3,11,17,8,5,2)) plot(resid(lm)~Diameter) lm<-lm(T10~Diameter+I(Diameter^2)) summary(lm) Call: lm(formula = T10 ~ Diameter + I(Diameter^2)) Residuals: 1 2 3 4 5 6 7 2.3688 -1.9976 -3.6584 0.6124 3.5638 1.1756 -2.0647 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 776.3795 41.3473 18.777 4.74e-05 *** Diameter -67.3338 5.3309 -12.631 0.000226 *** I(Diameter^2) 1.6082 0.1675 9.604 0.000657 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 3.229 on 4 degrees of freedom Multiple R-Squared: 0.9974, Adjusted R-squared: 0.9962 F-statistic: 781.6 on 2 and 4 DF, p-value: 6.514e-06 plot(resid(lm)~Diameter) plot(T10~Diameter) curve(776.3795 -67.338*x + 1.6082*x^2,add=T) lm<-lm(log(T10)~log(Diameter)) summary(lm) lm<-lm(log(T10)~log(Diameter),weights=c(2,3,11,17,8,5,2)) summary(lm) lm<-lm(T25~I(1/Diameter^2),weights=c(1,3,13,11,6,4,1)) summary(lm) lm<-lm(T28~I(1/Diameter^2),weights=c(2,4,11,18,6,4,2)) summary(lm) #------------------------------------------------------------- #Problem 41 #reading<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/READING.txt",header=T) reading<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/READING.txt",header=T) attach(reading) plot(Y83~Y82) lm<-lm(Y83~Y82) abline(lm) abline(a=0,b=1,lty=2) qqnorm(resid(lm)) plot(resid(lm),Y83) #----------------------------------------------------------------------------------------- #Problem 44 #------------------------------------------------------------------------- #Problem 45 vapor<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/vapor.txt",header=T) vapor hold.out<-sample(1:125,40) # hold.out<-c( 8, 35 , 14, 120, 85 , 51, 13, 1, 122, 31, 24, 123 , 78, 32 , 21 , 41 , 44 ,121, 15, # 124, 107, 54 ,64, 70 , 73 , 77, 48 ,106, 55, 108, 34, 16, 46, 98 , 80 , 52, 27 , 9, # 93 , 88) keep.in<-setdiff(1:125,hold.out) vapor.keep<-vapor[keep.in,] attach(vapor.keep) pairs(vapor.keep) lm<-lm(Y~X1+X2+X3+X4) summary(lm) lm<-lm(Y~X2+X3+X4) summary(lm) lm<-lm(Y~X3+X4) summary(lm) par(mfrow=c(2,1)) plot(resid(lm(Y~X4))~X4) plot(resid(lm(Y~sqrt(X4)))~X4) par(mfrow=c(1,1)) lm<-lm(Y~sqrt(X4)+sqrt(X3) + X2 + X1) summary(lm) lm<-lm(Y~sqrt(X4) + X2 + X1 ) summary(lm) lm1<-lm(Y~X2+X3+X4) summary(lm1) p1<-predict(lm1,newdata=vapor.out) sqrt((1/40)*sum((vapor.out$Y -p1)^2)) lm2<-lm(Y~sqrt(X4) + X2 + X1) summary(lm2) p2<-predict(lm2,newdata=vapor.out) sqrt((1/40)*sum((vapor.out$Y -p2)^2)) #-----------------------------------------------------------------------------