module test !need the regeression subroutine to see this real, allocatable :: Xhat(:,:), b(:,:) end module program Jackknife_Cross_Validation !************************************************************************ ! This program uses a jackknife cross validation sum of squared ! residuals method for finding the appropriate regression coefficients ! in a multiple linear regression. Each data point is held out in turn ! and the distance between the line drawn through the other data points ! to the withheld datapoint makes up the critical distance (the sum of ! these distances, squared is the quantity we wish to minimize.) ! Version of 1/30/02 DRH !************************************************************************ !This program took me a long time to make work. The problem had to do !with the parentheses in the JKSS calculation line. Sometimes I hate !fortran. It is pretty messy and I didn't include any graphics. !However, if you look at the output, which shows the included variables !and the corresponding values of JKSS, you can recognize the U shape. !If anyone ever looks at this and has questions about how it works, !ask me. USE DFIMSLSS USE IMSLf90 use test implicit none integer, parameter :: rows=59 !max size of data arrays integer, parameter :: p=7 !look at your data file and check the # of predictors integer, external :: choose integer :: i,j,k,l,g,n,q,o,ioerror,trial(p),place,col integer :: incl_vars(p) real :: X(rows,p),y(rows,1), yhat(rows-1,1) ! real :: JKSS !Jackknife cross validation sum of squared residuals real :: min,inter,yavg,Xavg,sumX,slope,intercept,LCI,UCI,Delta character (len=12) :: model(p),template(p) data template /'JanTemp','JulyTemp','Rain','PopDensity',& 'Education','NonWhite','logHC'/ open (4,file='JKCVSS.txt') !output file open (20,file='Mortality_dataset.txt',status='old') !datafile !read in data file n=0 ioerror=0 do i=1,rows n=n+1 read (20,*,iostat=ioerror) y(i,1),(X(i,j),j=1,p) if(ioerror==-1) then n=n-1 write(6,'(a,i3)') 'end of file reached, n= ',n exit else if (ioerror/=0) then write(6,'(a,i3)') 'read error!, line ',i endif !write(4,'(
(F9.2,1x))') (X(i,j) ,j=1,p)
enddo
!Automated Stepwise regression
trial = 0 !these are variables used to track which col's
place = 0 !have been tested and which have been accepted
do o=1,p !o represents the order of the data matrix being tested
!o=1
min=9999999999999999.0
do k=1,p !run through each of the variables
JKSS=0.0 !this is the jackknife sum of squared residuals
if(trial(k)==0) then !skip this variable if it is already in model
allocate(Xhat(n-1,o),b(o,1)) !the data matrix and vector of
!regression coefficients have to be dynamically allocated
!because the number of col's in X is not constant.
do i=1,n !run through all data rows
yhat=0.0 !zero response vector
Xhat=0.0 !zero data matrix
b=0.0 !etc...
!fill regression data matrix, first determine which variables
!will go in...
col=1
incl_vars=0 !after we have excepted a variable
!it is no longer a straight forward calculation to
!find Yhat of make Xhat. We need to use the original data matrix
!because we are leaving out one observation and the
!included col's might not be sequential. This tracks
!which cols go in.
do j=1,p !this loop makes Xhat, the data matrix
!excluding one observation.
if(trial(j)==1) then
call add_variable(n-1,i,col,X(:,j))
incl_vars(col)=j
col=col+1
else
continue
end if
end do
call add_variable(n-1,i,col,X(:,k)) ! column k always goes in
incl_vars(col)=k !track the vars that have been included
call add_variableY(n-1,i,y,yhat) !we also need a special
!response vector, since obs i is left out
call Regression(n-1,o,yhat) !calculate the regression coef's
!find the difference between yhat and y for this regression
!Find intercept. The regression line must pass through the
!mean of y. The intercept will be equal to the difference
!between mean y and the corresponding point on the regression
!line.
inter = 0.0 !intercept changes each time
call average(n,yhat,yavg)
inter = yavg !intercept = yavg - sum(Xbar(i) * b(i))
sumX=0.0
do j=1,o
call average(n-1,Xhat(:,j),Xavg)
inter = inter - Xavg*b(j,1) !finish calculating intercept
sumX = sumX + X(i,incl_vars(j))*b(j,1) !This is the
!sum of the x's multiplied by their coefficients, at
!point i
end do
!here is our criteria for calculating the best
!variables for predicting our response
JKSS = JKSS +( (y(i,1) - (sumX + inter) )**2.0)
enddo !i loop leaving out each successive variable
!write(4,'(/,f12.2)') JKSS
!$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
!JKSS=JKSS/real(n) !test effect of averaging JKSS
!$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
if(JKSS