module test !need the regeression subroutine to see this real, allocatable :: Xhat(:,:), b(:,:) end module program Jackknife_Cross_Validation !************************************************************************ ! This program uses a jackknife cross validation sum of squared ! residuals method for finding the appropriate regression coefficients ! in a multiple linear regression. Each data point is held out in turn ! and the distance between the line drawn through the other data points ! to the withheld datapoint makes up the critical distance (the sum of ! these distances, squared is the quantity we wish to minimize.) ! Version of 1/30/02 DRH !************************************************************************ !This program took me a long time to make work. The problem had to do !with the parentheses in the JKSS calculation line. Sometimes I hate !fortran. It is pretty messy and I didn't include any graphics. !However, if you look at the output, which shows the included variables !and the corresponding values of JKSS, you can recognize the U shape. !If anyone ever looks at this and has questions about how it works, !ask me. USE DFIMSLSS USE IMSLf90 use test implicit none integer, parameter :: rows=59 !max size of data arrays integer, parameter :: p=7 !look at your data file and check the # of predictors integer, external :: choose integer :: i,j,k,l,g,n,q,o,ioerror,trial(p),place,col integer :: incl_vars(p) real :: X(rows,p),y(rows,1), yhat(rows-1,1) ! real :: JKSS !Jackknife cross validation sum of squared residuals real :: min,inter,yavg,Xavg,sumX,slope,intercept,LCI,UCI,Delta character (len=12) :: model(p),template(p) data template /'JanTemp','JulyTemp','Rain','PopDensity',& 'Education','NonWhite','logHC'/ open (4,file='JKCVSS.txt') !output file open (20,file='Mortality_dataset.txt',status='old') !datafile !read in data file n=0 ioerror=0 do i=1,rows n=n+1 read (20,*,iostat=ioerror) y(i,1),(X(i,j),j=1,p) if(ioerror==-1) then n=n-1 write(6,'(a,i3)') 'end of file reached, n= ',n exit else if (ioerror/=0) then write(6,'(a,i3)') 'read error!, line ',i endif !write(4,'(

(F9.2,1x))') (X(i,j) ,j=1,p) enddo !Automated Stepwise regression trial = 0 !these are variables used to track which col's place = 0 !have been tested and which have been accepted do o=1,p !o represents the order of the data matrix being tested !o=1 min=9999999999999999.0 do k=1,p !run through each of the variables JKSS=0.0 !this is the jackknife sum of squared residuals if(trial(k)==0) then !skip this variable if it is already in model allocate(Xhat(n-1,o),b(o,1)) !the data matrix and vector of !regression coefficients have to be dynamically allocated !because the number of col's in X is not constant. do i=1,n !run through all data rows yhat=0.0 !zero response vector Xhat=0.0 !zero data matrix b=0.0 !etc... !fill regression data matrix, first determine which variables !will go in... col=1 incl_vars=0 !after we have excepted a variable !it is no longer a straight forward calculation to !find Yhat of make Xhat. We need to use the original data matrix !because we are leaving out one observation and the !included col's might not be sequential. This tracks !which cols go in. do j=1,p !this loop makes Xhat, the data matrix !excluding one observation. if(trial(j)==1) then call add_variable(n-1,i,col,X(:,j)) incl_vars(col)=j col=col+1 else continue end if end do call add_variable(n-1,i,col,X(:,k)) ! column k always goes in incl_vars(col)=k !track the vars that have been included call add_variableY(n-1,i,y,yhat) !we also need a special !response vector, since obs i is left out call Regression(n-1,o,yhat) !calculate the regression coef's !find the difference between yhat and y for this regression !Find intercept. The regression line must pass through the !mean of y. The intercept will be equal to the difference !between mean y and the corresponding point on the regression !line. inter = 0.0 !intercept changes each time call average(n,yhat,yavg) inter = yavg !intercept = yavg - sum(Xbar(i) * b(i)) sumX=0.0 do j=1,o call average(n-1,Xhat(:,j),Xavg) inter = inter - Xavg*b(j,1) !finish calculating intercept sumX = sumX + X(i,incl_vars(j))*b(j,1) !This is the !sum of the x's multiplied by their coefficients, at !point i end do !here is our criteria for calculating the best !variables for predicting our response JKSS = JKSS +( (y(i,1) - (sumX + inter) )**2.0) enddo !i loop leaving out each successive variable !write(4,'(/,f12.2)') JKSS !$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ !JKSS=JKSS/real(n) !test effect of averaging JKSS !$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ if(JKSS(a12),F16.2)') (model(j),j=1,o), min !out put end do !outer, number of variables loop ,o stop 'normal completion' end program Jackknife_Cross_Validation !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% subroutine add_variable(n,i,col,vec) ! a routine to fill a column (col of data matrix called Xhat, !made available from module test. Fills with n data points from !vector vec(n+1) leaving out row i. use test integer n,i,j,col,k real :: vec(n+1) k=1 do j=1,n+1 if(j/=i) then Xhat(k,col) = vec(j) k=k+1 else continue end if end do return end subroutine add_variable !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% subroutine add_variableY(n,i,vec,newvec) ! a routine to fill a column (col of data matrix called Xhat, !made available from module test. Fills with n data points from !vector vec(n+1) leaving out row i. use test integer n,i,j,col,k real :: vec(n+1),newvec(n) k=1 do j=1,n+1 if(j/=i) then newvec(k) = vec(j) k=k+1 else continue end if end do return end subroutine add_variableY