Contents
clear all
close all
clc
s = RandStream('mt19937ar','Seed',sum(round(clock)));
RandStream.setGlobalStream(s);
1 Classic Efron - does Aspirin prevent heart attacks
nA = 11037; 
nHA = 104; 
nP = 11034; 
nHP = 189; 
nSamples = 1e5; 
ratio = zeros(nSamples,1); 
A1 = ones(nHA,1); 
A2 = zeros(nA-nHA,1);
P1 = ones(nHP,1); 
P2 = zeros(nP-nHP,1); 
A = [A1;A2]; 
P = [P1;P2]; 
for ii = 1:nSamples 
indicesA = randi(nA,[nA 1]); 
indicesP = randi(nP,[nP 1]); 
ratio(ii) = sum(A(indicesA))/sum(P(indicesP)); 
end
sortedRatio = sort(ratio); 
lowerBoundIndex = round(nSamples/100*2.5); 
upperBoundIndex = round(nSamples/100*97.5); 
lowerBound = sortedRatio(lowerBoundIndex); 
upperBound = sortedRatio(upperBoundIndex); 
figure
histogram(ratio,50); 
xlabel('Ratio')
line([mean(ratio) mean(ratio)], [min(ylim) max(ylim)],'color','r','linewidth',2)
line([lowerBound lowerBound], [min(ylim) max(ylim)],'color','g','linewidth',2)
line([upperBound upperBound], [min(ylim) max(ylim)],'color','g','linewidth',2)
title('Bootstrapped distribution of heart attack ratios Aspirin vs. Placebo')
 
 2 Using resampling methods for cross-validation
numPoints = 8; 
noiseMagnitude = 2; 
x = -5:0.01:5; 
y = x.^2 + noiseMagnitude .* randn(1,length(x)); 
figure
plot(x,y)
title('A noisy parabola')
samplingIndices = randi(length(x),[numPoints,1 ]) ;
hold on;
plot(x(samplingIndices),y(samplingIndices),'.','color','r','markersize',20)
RMSE = []; 
figure
for ii = 1:numPoints
    subplot(2,4,ii)
    p = polyfit(x(samplingIndices),y(samplingIndices),ii); 
    
    yHat = polyval(p,x); 
    plot(x,yHat)
    hold on
    plot(x(samplingIndices),y(samplingIndices),'.','color','r','markersize',20)
    title(num2str(ii))
    RMSE(ii) = sqrt(mean((y(samplingIndices)-yHat(samplingIndices)).^2));
end
figure
plot(1:numPoints,RMSE)
title('RMSE as a function of degree of polynomial')
xlabel('Degree of polynomial')
ylabel('RMSE')
numSamples = 1e2; 
RMSE = zeros(numSamples,numPoints-1); 
for ii = 1:numSamples 
testIndex = randi(numPoints,1); 
testSet = samplingIndices(testIndex); 
trainingSet = samplingIndices; 
trainingSet(testIndex,:) = []; 
    for jj = 1:numPoints-1
    p = polyfit(x(trainingSet),y(trainingSet),jj); 
    yHat = polyval(p,x); 
    RMSE(ii,jj) = sqrt((y(testSet)-yHat(testSet)).^2);
    end
end
figure
plot(1:numPoints-1,mean(RMSE))
title('REAL RMSE as a function of degree of polynomial')
xlabel('Degree of polynomial')
ylabel('RMSE')
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
Warning: Polynomial is not unique; degree >= number of data points. 
 
  
  
  
 3 Shortcut - stability of sample mean by hand and with MATLAB's built in bootstrp function
nRepeats = 1e3; 
y = random('unif',0,1,[100 1]) 
m1 = zeros(nRepeats,1); 
for ii = 1:nRepeats 
    indices = randi(length(y),[length(y) 1]) ; 
    m1(ii,1) = mean(y(indices)); 
end
m2 = bootstrp(nRepeats, @mean, y); 
figure
subplot(2,1,1)
hist(m1,50)
subplot(2,1,2)
hist(m2,50)
mean(m1)
mean(m2)
std(m1)
std(m2)
y =
    0.0076
    0.1253
    0.7569
    0.2094
    0.4729
    0.0316
    0.9912
    0.2786
    0.8140
    0.4911
    0.3587
    0.4710
    0.4195
    0.5091
    0.6741
    0.7100
    0.4897
    0.5014
    0.0007
    0.3830
    0.3474
    0.6258
    0.9185
    0.9014
    0.6600
    0.8927
    0.0038
    0.4455
    0.5090
    0.3830
    0.7089
    0.6247
    0.8909
    0.2380
    0.0854
    0.1631
    0.0758
    0.4118
    0.4839
    0.6714
    0.1564
    0.6374
    0.5745
    0.5900
    0.2622
    0.9397
    0.6651
    0.8160
    0.7631
    0.8537
    0.6834
    0.9245
    0.8710
    0.1642
    0.9891
    0.1098
    0.1353
    0.9855
    0.0413
    0.5222
    0.1700
    0.9170
    0.2860
    0.6094
    0.3714
    0.0693
    0.5561
    0.0323
    0.4145
    0.8976
    0.5232
    0.2807
    0.2814
    0.5201
    0.3245
    0.4115
    0.6242
    0.6617
    0.3796
    0.6700
    0.8352
    0.0667
    0.4345
    0.1021
    0.0417
    0.3144
    0.2083
    0.9490
    0.7538
    0.7655
    0.1035
    0.5466
    0.5532
    0.0749
    0.8732
    0.1990
    0.2426
    0.6686
    0.6568
    0.1033
ans =
    0.4784
ans =
    0.4791
ans =
    0.0282
ans =
    0.0288
 
 4 Comparing means with permutation tests
K = [117 123 111 101 121] 
C = [98 104 106 92 88] 
numMCS = 1e5; 
nK = length(K); 
nC = length(C); 
testStatistic = sum(K)-sum(C)
dTS = zeros(numMCS,1); 
allData = [K C]; 
for ii=1:numMCS 
    indices = randperm(nK+nC); 
    
    
    tempDATA = allData(indices); 
    group1 = tempDATA(1:nK); 
    tempDATA(:,1:nK) = []; 
    group2 = tempDATA; 
    dTS(ii) = sum(group1)-sum(group2); 
end
figure
histogram(dTS,40)
exactP = sum((dTS>=testStatistic))/length(dTS); 
line([testStatistic testStatistic],[min(ylim) max(ylim)],'color','k','linewidth',3)
xlabel('Distribution of simulated test statistics in absence of effect')
title(['Distribution of test statistics. Exact p = ', num2str(exactP,'%1.4f')])
K =
   117   123   111   101   121
C =
    98   104   106    92    88
testStatistic =
    85
