%function runOlsJK %runOlsJK A program to demonstrate Jackknife residual sum of squares. % Load real data: %AllData = load('d:\tomosFolders\classes\stat510(consulting)\sat.txt'); [AllData, Header] = readtable('d:\tomosFolders\classes\multiVariateSpring2002\Mortality_dataset.txt'); %[AllData, Header] = readtable('d:\tomosFolders\classes\multiVariateSpring2002\sat.txt'); headerS = header2strings(Header); ind1013 = find(double(headerS)==10 | double(headerS) == 13); % replace 10 and 13 into 32 headerS(ind1013) = 32; indHeaderS = headerS(2:end, :); data = AllData(:, 1:end); [nrData, ncData] = size(data); % OR % create independent variables: %nObs = 50; % a great study with n = 50; %nRVs = 10; % Use nRVs underlying variables %nVars = 5; % to create nVars random variables. %[data, covData, invCov] = multipleRegression(nObs, nVars, nRVs); % create a set of parameters (one is intercept): %betaTrue = normrnd(3, 6, (ncData+1), 1); % add the intercept term and arbitrary terms: X = [ones(nrData, 1), data(:, 2:end), normrnd(4,2,nrData,3)]; indHeaderS = char(indHeaderS, 'var1', 'var2', 'var3'); %X = [ones(nrData, 1), data(:, 2:end)]; dataX = X(:, 2:end); % without the intercept term [nrX, ncX] = size(X); % # columns = # vars + intercept nVars = ncX - 1; origVar = [1:nVars]; % index for the original variables. % initialize the output vector for residual SS. outSSvec = []; % create a dependent variable: %y = X*betaTrue + normrnd(0, 5, nrData, 1); % or pick up the column y = AllData(:, 1); varsIn = 0; % # variables in the model % initialize the intermediate data matrix X0 = X(:, 1); % It should have at least an intercept. while varsIn < nVars, jkSsVec = zeros(1, nVars - varsIn); % initialize the ss vector. % Calculate jackknife estimates. Add one variable at a time. for i = 1:(nVars - varsIn), X1 = []; X1 = [X0, dataX(:, i)]; % add one column at a time. jkSsVec(i) = olsJackknife(X1, y); % compute the JK Res. SS. end % Find the minimum JKss for selecting the first variable. bestVar = find(jkSsVec == nanmin(jkSsVec)); X0 = [X0, dataX(:, bestVar)]; % add the best variable into X0. outSSvec(varsIn+1) = nanmin(jkSsVec); % record the SS fprintf(1, 'Added variable was %d: %s\n', origVar(bestVar), indHeaderS(origVar(bestVar), :)); % Error checking lines ***************************************************** % fprintf(1, 'The first value was %.4f\n', data(1, 1+origVar(bestVar))); % jkSsVec % outSSvec % pause; % Error checking lines ***************************************************** % Pull out the variable from X matrix if bestVar < (nVars - varsIn) & bestVar > 1; dataX = [dataX(:, 1:(bestVar-1)), dataX(:, (bestVar+1):end)]; origVar = [origVar(1:(bestVar-1)), origVar((bestVar+1):end)]; % fprintf(1, '1\n'); elseif bestVar == nVars - varsIn, dataX = dataX(:, 1:(bestVar-1)); origVar = origVar(1:bestVar-1); % fprintf(1, '2\n'); else dataX = dataX(:, 2:end); origVar = origVar(2:end); % fprintf(1, '3\n'); end varsIn = varsIn + 1; % pause; end plot([1:nVars], outSSvec); xlabel('Number of variables in the model'); ylabel('Residual sum of squares'); [betaHats, aHats] = olsEstimates(X, y)