**CODE**

% Here's an example dataset.

x = 1:100;

y = evalgaussian1d([40 10 10 2],x) + 4*randn(1,100);

figure(999); clf; hold on;

h1 = scatter(x,y,'k.');

xlabel('x'); ylabel('y');

% Let's presume that the model that describes these data is

% a one-dimensional Gaussian function. (Indeed, that's what

% we used to generate the data; see above.) Let's fit this

% model to the data using bootstrapping. Specifically, let's

% draw 100 bootstraps from the data points and fit the

% Gaussian model to each bootstrap.

model = {[30 20 5 0] [-Inf 0 -Inf -Inf; Inf Inf Inf Inf] @(pp,xx) evalgaussian1d(pp,xx)};

rec = fitprfstatic(x',y',model,[],[],100,[],[],[],@calccod);

modelfits = [];

for p=1:size(rec.params,1)

modelfits(p,:) = evalgaussian1d(rec.params(p,:),x);

end

h2 = plot(modelfits','r-');

uistack(h1,'top');

% Each red line is the model fit obtained from one of

% the bootstraps. By inspecting the variability of the

% red lines across bootstraps, we get a sense of the

% reliability of the model.

% For better visualization, let's compute the mean and standard

% deviation across bootstraps. This gives us the mean model

% fit and its standard error.

delete(h2);

mn = mean(modelfits,1);

se = std(modelfits,[],1);

h3 = errorbar3(x,mn,se,'v',[1 .7 .7]);

h4 = plot(x,mn,'r-');

uistack(h1,'top');

% So far we have been examining the variability of the model fit.

% But we may be interested in the variability of the model parameters

% (which underlie the model fit). Let's look at how model parameters

% vary from bootstrap to bootstrap.

figure(998); clf; hold on;

np = size(rec.params,2);

bar(1:np,median(rec.params,1));

for p=1:size(rec.params,2)

set(scatter(p*ones(1,100) - 0.1,rec.params(:,p),'r.'),'CData',[1 .7 .7]);

end

errorbar2(1:np,median(rec.params,1),stdquartile(rec.params,1,1),'v','r-','LineWidth',2);

set(gca,'XTick',1:np,'XTickLabel',{'Mean' 'Std Dev' 'Gain' 'Offset'});

xlabel('Parameter'); ylabel('Value');

% In this figure, the light dots indicate the parameters obtained in

% different bootstraps, the black bars indicate the median across

% bootstraps, and the red error bars indicate the 68% confidence

% intervals calculated using percentiles (these confidence intervals

% are analogous to plus and minus one standard error in the case

% of Gaussian distributions).

% There are two basic factors that determine the reliability of

% an estimated model: how many data points there are to fit the

% model and how noisy each individual data point is. First,

% let's look at an example that illustrates the impact of the

% number of data points.

nns = [20 80 320 1280];

noiselevels = [4 4 4 4];

figure(997); setfigurepos([100 100 500 400]); clf;

for p=1:length(nns)

x = 1 + rand(1,nns(p))*99;

y = evalgaussian1d([40 10 10 2],x) + noiselevels(p)*randn(1,nns(p));

subplot(2,2,p); hold on;

h1 = scatter(x,y,'k.');

xlabel('x'); ylabel('y');

if p == 1

ax = axis;

end

model = {[30 20 5 0] [-Inf 0 -Inf -Inf; Inf Inf Inf Inf] @(pp,xx) evalgaussian1d(pp,xx)};

rec = fitprfstatic(x',y',model,[],[],50,[],[],[],@calccod);

modelfits = [];

xx = 1:100;

for q=1:size(rec.params,1)

modelfits(q,:) = evalgaussian1d(rec.params(q,:),xx);

end

mn = mean(modelfits,1);

se = std(modelfits,[],1);

h3 = errorbar3(xx,mn,se,'v',[1 .7 .7]);

h4 = plot(xx,mn,'r-');

if p <= 2

uistack(h1,'top');

end

axis(ax);

title(sprintf('Number of data points = %d, noise level = %.1f',nns(p),noiselevels(p)));

end

% Notice that as the number of data points increases, the reliability

% of the estimated model increases. Next, let's look at an example

% that illustrates the impact of the noise level.

nns = [80 80 80 80];

noiselevels = [4 2 1 0.5];

% Notice that as the noise level decreases, the reliability of the

% estimated model increases.

Bootstrapping is been very useful for many web developer in fact even i am a data analysis expert i understand a little bit about this and it was really useful.Lucky i am to understand it more here.

ReplyDelete