#This file is still being edited 4/30


#Bismuth example (I.II) and problem 32 (II.I)   (for class)

#bismuth<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/BISMUTH.txt",header=T)
bismuth<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/BISMUTH.txt",header=T)

attach(bismuth)

#example from the text

#First we'll do some of the work by "bare hands".
#This is a purely illustrative exercise. It is not the way the actual computation proceeds in general because #inversion of the design matrix (dm) is a famously difficult numerical problem.
#See problems 6 and 7.

#Create the  matrix of 1's and x's and its transpose.

X<-model.matrix(I.II~TEMP)

X

t(X)

#Matrix multiplication is given by %*%

t(X)%*%X
dm<-t(X)%*%X

#The solve function solves systems of linear equations. But when applied to a matrix with no other
#arguments, it returns the inverse matrix.

solve(dm)
sqrt(solve(dm))

#            (Intercept)      TEMP
#(Intercept)   0.6389184       NaN
#TEMP                NaN 0.0199233
#Warning message:
#NaNs produced in: sqrt(solve(dm)) 

#The NaN's come from trying to take the square root of a negative number.

#Compute the estimated standard error. We showed in lecture that the square of this quantity is an unbiased #estimate of the variance of the model errors.

s<-sqrt(sum((I.II -X%*%solve(dm)%*%t(X)%*%I.II)^2)/21)

#Now we can compute the standard error estimates for the coefficients as from Theorem B on pg. 538.
.6389*s

.0199233*s

#Here are the estimated coefficients

solve(dm)%*%t(X)%*%I.II

#Here are the fitted pressures

X%*%solve(dm)%*%t(X)%*%I.II

#Now let's do the work with the built in function "lm".
#Compare the results with those above.

lm<-lm(I.II~TEMP)
 summary(lm)

fitted(lm)

#There is probably a built in confidence interval generator but here it is from scratch.

c(-41.2835 -.0199233*s*qt(.975,df=21),-41.2835 +.0199233*s*qt(.975,df=21))

# The standardized residuals are given by the function rstandard. Note once again that the theoretical 
#description of the standardized residuals given in your text is correct, but it does not describe the 
#actual computation. Any efficient and accurate numerical algorithm is careful to avoid inverting the design #matrix t(X)%*%X. In practice it is t(R)%*%R which might be inverted, where R is the upper triangular factor from #the Cholesky or QR decomposition (see problems 6 and 7). Typically the whole inverse is not needed and
# only a single or a few entries of the inverse are computed or estimated.

SR<-rstandard(lm)
cbind(I.II,TEMP,SR)

plot(SR,TEMP)

# As described in your text, the residual plot reveals problems with the data which call into question the #assumption  about independence of the errors in the observations.
#Without a better description of what happened in the laboratory, the estimates for the coefficients and the #confidence interval generated above can not be considered reliable.
##############################################
#Problem 32 

plot(II.I~TEMP)
lm<-lm(II.I~TEMP)

par(mfrow=c(2,1))

#In the following pair of plots, notice that the difference is basically a change of scale

plot(resid(lm)~TEMP)
plot(rstandard(lm)~TEMP)

#Comparison with the table below raises he question of thoe order of the data collection.
#If the data were collected in the order they are recorded, then just as in the previous example, 
#the techniques used in the laboratory must be examined. In particular, it appears as though systematic
#errors may have been produced by recalibration of equipment between measurements at different temperatures.

SR<-rstandard(lm)
cbind(II.I,TEMP,SR)

#Plotting the residuals in the order that they were (probably) recorded shows an upward trend between observations #3-11 and a downward trend in the last 8 observations. Assumptions about randmoness and independence of #observation errors are again in doubt.

plot(1:23,rstandard(lm))


par(mfrow=c(1,1))
#----------------------------------------------

#Catheter example  (for class)

catheter<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/catheter.txt",header=T)
names(catheter)

rm(w)
rm(h)

attach(catheter)

#Check the variables to see that they represent the correct data before continuing.
#

# The function "pairs" produces an array of plots of each pairing of variables.
#The plots below reproduce figures 14.21,14.22 and 14.23 in the text.
#The summary's pf the linear models follow the discussion in section 14.5 which should be read 
#in conjunction with this sample code.


pairs(dpa~h+w)

#Simple linear predictions by height and weight alone and plots of their standardized residuals.

summary(lm(dpa~h))

summary(lm(dpa~w))

par(mfrow=c(2,1))
plot(rstandard(lm(dpa~h))~h)
plot(rstandard(lm(dpa~w))~w)
par(mfrow=c(1,1))

#Multiple regression using height and weight together.

#Since the height and weight are so well correlated, there is not much extra predictive value.
#Note the large variablity of the coefficients in the multiple regression estimates.
#See the discussion on pages 547 and 548.

summary(lm(dpa~h+w))

par(mfrow=c(2,1))
plot(rstandard(lm(dpa~h +w))~h)
plot(rstandard(lm(dpa~h+w))~w)
par(mfrow=c(1,1))

#-------------------------------------------------

#Problem 33

#barium<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/barium.txt",header=T)
barium<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/barium.txt",header=T)

#The physics predicts a linear relationship between the log of the pressure and 
#the reciprocal of the temperature

plot(1/Temp,log(Pr))

x=1/Temp

lm<-lm(log(Pr)~x)

abline(lm,lty=2,lwd=2)

plot(resid(lm)~x)

plot(1:32,resid(lm))

#The above residual plots give no cause for serious doubt about the 
#random nature of the observation errors.

summary(lm)

?#Call:
#lm(formula = log(Pr) ~ x)

#Residuals:
#     Min        1Q    Median        3Q       Max 
#-0.158596 -0.087968  0.004495  0.061145  0.293036 

#Coefficients:
#              Estimate Std. Error t value Pr(>|t|)    
#(Intercept)  1.818e+01  1.419e-01   128.2   <2e-16 ***
#x           -2.126e+04  1.335e+02  -159.3   <2e-16 ***
#---
#Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

#Residual standard error: 0.1164 on 30 degrees of freedom
#Multiple R-Squared: 0.9988,     Adjusted R-squared: 0.9988 
#F-statistic: 2.538e+04 on 1 and 30 DF,  p-value: < 2.2e-16 

#Intercept confidence interval


c(18.18 -.1419*qt(.975,df=30),18.18 +.1419*qt(.975,df=30))
#[1] 17.8902 18.4698

#slope confidence interval

c(-21260 - 133.5*qt(.975,df=30),-21260 + 133.5*qt(.975,df=30))
#[1] -21532.64 -20987.36

#----------------------------------------------------------------------------------

#Problem 34 (on last assignment)

#sapphire<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/SAPPHIRE.txt",header=T)
sapphire<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/SAPPHIRE.txt",header=T)

lm<-lm(YM~Temp)
summary(lm)

#Call:
#lm(formula = YM ~ Temp)

#Residuals:
    Min      1Q  Median      3Q     Max 
#-16.731 -13.164   4.427   7.059  20.692 

#Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
#(Intercept) 628.62878   13.42758   46.82   <2e-16 ***
#Temp         -0.61369    0.01761  -34.85   <2e-16 ***
#---
#Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

#Residual standard error: 11.72 on 22 degrees of freedom
#Multiple R-Squared: 0.9822,     Adjusted R-squared: 0.9814 
#F-statistic:  1214 on 1 and 22 DF,  p-value: < 2.2e-16 

 plot(YM~Temp)
 abline(lm,lty=2,lwd=2)
 plot(resid(lm)~Temp)
 plot(rstandard(lm)~Temp)

 
#------------------------------------------------------------------ 

#Problem 35
 
 #nuclear<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/NUCLEAR.txt",header=T)
nuclear<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/NUCLEAR.txt",header=T)
 
plot(Pressure~Volume)

lm<-lm(Pressure~Volume)

abline(lm,lty=1,lwd=2)


plot(rstandard(lm)~Volume)

lm<-lm(Pressure~Volume + I(Volume^2))
 plot(resid(lm)~Volume)

#Neither model fits very well. Adding two more terms produces random looking residuals.

#-------------------------------------------------------------------

#Problem 36

#provring<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/PROVRING.txt",header=T)
provring<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/PROVRING.txt",header=T)

attach(provring)

names(provring)

 plot(Deflection~Load)


lm<-lm(Deflection~Load)
 abline(lm,lty=1,lwd=2)

plot(rstandard(lm)~Load)

lm<-lm(Deflection~Load+I(Load^2))
 plot(rstandard(lm)~Load)

#------------------------------------------------------------------------
#Problem 37


#------------------------------------------------------------------------

#Problem 38


v<-c(20.5,20.5,30.5,40.5,48.8,57.8)

y<-c(15.4,13.3,33.9,73.1,113.0,142.6)

lm<-lm(y~v)

plot(y~v)

abline(lm)
 

plot(resid(lm)~v)  

lm<-lm(y~v +I(v^2))

summary(lm)

Call:
lm(formula = y ~ v + I(v^2))

Residuals:
      1       2       3       4       5       6 
 2.5226  0.4226 -6.1225 -0.3490  7.0365 -3.5102 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) -23.13270   21.94412  -1.054    0.369
v             1.11275    1.28836   0.864    0.451
I(v^2)        0.03141    0.01684   1.865    0.159

Residual standard error: 5.944 on 3 degrees of freedom
Multiple R-Squared: 0.9927,     Adjusted R-squared: 0.9878 
F-statistic: 203.6 on 2 and 3 DF,  p-value: 0.0006254 

curve(-23.13270 + 1.11275*x + 0.03141*x^2,add=T)

lm<-lm(sqrt(y)~0+v)

summary(lm)

curve(.20672^2 * x^2,add=T)

plot(resid(lm)~v)

#------------------------------------------------------------------------------
#Problem 39

#cysts<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/CYSTS.txt",header=T)
cysts<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/CYSTS.txt",header=T)

attach(cysts)

cysts

plot(T10~Diameter)

lm<-lm(T10~Diameter,weights=c(2,3,11,17,8,5,2))

abline(lm)

lm<-lm(T10~Diameter)
 abline(lm)

lm<-lm(sqrt(T10)~Diameter,weights=c(2,3,11,17,8,5,2))
 plot(resid(lm)~Diameter)

lm<-lm(T10~Diameter+I(Diameter^2))
 summary(lm)

Call:
lm(formula = T10 ~ Diameter + I(Diameter^2))

Residuals:
      1       2       3       4       5       6       7 
 2.3688 -1.9976 -3.6584  0.6124  3.5638  1.1756 -2.0647 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   776.3795    41.3473  18.777 4.74e-05 ***
Diameter      -67.3338     5.3309 -12.631 0.000226 ***
I(Diameter^2)   1.6082     0.1675   9.604 0.000657 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 3.229 on 4 degrees of freedom
Multiple R-Squared: 0.9974,     Adjusted R-squared: 0.9962 
F-statistic: 781.6 on 2 and 4 DF,  p-value: 6.514e-06 

plot(resid(lm)~Diameter)

plot(T10~Diameter)
curve(776.3795 -67.338*x + 1.6082*x^2,add=T)

lm<-lm(log(T10)~log(Diameter))
summary(lm)

lm<-lm(log(T10)~log(Diameter),weights=c(2,3,11,17,8,5,2))
summary(lm)

lm<-lm(T25~I(1/Diameter^2),weights=c(1,3,13,11,6,4,1))
summary(lm)

lm<-lm(T28~I(1/Diameter^2),weights=c(2,4,11,18,6,4,2))
summary(lm)

#-------------------------------------------------------------

#Problem 41

#reading<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/READING.txt",header=T)
reading<-read.table("C:/Documents and Settings/Mike/Desktop/Math 152/READING.txt",header=T)

attach(reading)

plot(Y83~Y82)
lm<-lm(Y83~Y82)
abline(lm)
abline(a=0,b=1,lty=2)
qqnorm(resid(lm))
plot(resid(lm),Y83)

#-----------------------------------------------------------------------------------------
#Problem 44

#-------------------------------------------------------------------------
#Problem 45

vapor<-read.table("http://math.mckenna.edu/moneill/Math152/Handouts/vapor.txt",header=T)
 
vapor

hold.out<-sample(1:125,40)

# hold.out<-c(  8,  35 , 14, 120,  85 , 51,  13,   1, 122,  31,  24, 123 , 78,  32 , 21 , 41 , 44 ,121,  15,
# 124, 107,  54  ,64,  70 , 73 , 77,  48 ,106,  55, 108,  34,  16,  46,  98 , 80 , 52,  27 ,  9,
#  93 , 88)


keep.in<-setdiff(1:125,hold.out)

vapor.keep<-vapor[keep.in,]

attach(vapor.keep)

pairs(vapor.keep)

lm<-lm(Y~X1+X2+X3+X4)
 summary(lm)

lm<-lm(Y~X2+X3+X4)
 summary(lm)

lm<-lm(Y~X3+X4)
 summary(lm)


par(mfrow=c(2,1))

plot(resid(lm(Y~X4))~X4)
plot(resid(lm(Y~sqrt(X4)))~X4)

par(mfrow=c(1,1))

lm<-lm(Y~sqrt(X4)+sqrt(X3) + X2 + X1)
 summary(lm)

lm<-lm(Y~sqrt(X4) + X2 + X1 )
 summary(lm)

lm1<-lm(Y~X2+X3+X4)
 summary(lm1)

 p1<-predict(lm1,newdata=vapor.out)
 sqrt((1/40)*sum((vapor.out$Y -p1)^2))



lm2<-lm(Y~sqrt(X4) + X2 + X1)
 summary(lm2)

p2<-predict(lm2,newdata=vapor.out)
 sqrt((1/40)*sum((vapor.out$Y -p2)^2))
#-----------------------------------------------------------------------------