Commit b500a1d5 authored by Riccardo La Placa's avatar Riccardo La Placa
Browse files

Added functions to calculate Wilson and Clopper-Pearson confidence intervals

parent 64e8a7ad
Loading
Loading
Loading
Loading

functions/conf_int.m

0 → 100644
+45 −0
Original line number Diff line number Diff line
function CP_ci = conf_int(x,n,alpha)
%CONF_INT Clopper-Pearson confidence intervals on binned (discrete) data.
%   Returns the lower and upper bound of the 100(1-alpha) percent 
%   confidence interval given the data, x successful extractions in n total
%   trials. If x is a vector, CP_ci is a matrix with size (length(x),2). 
%
%   By default, the optional parameter alpha is set to 0.05 (corresponding 
%   to 95%-confidence intervals.

% %--------------
if nargin < 3 
    alpha = 0.05;
end

if n<0 || n~=round(n) || isinf(n) || any(x>n)
    disp('n must be a non-negative integer at least as large as x.')
    disp('THE RESULTS WILL MAKE NO SENSE.')
end
if any(x<0)
    disp('All of x must not be negative.')
    disp('THE RESULTS WILL MAKE NO SENSE.')
end

CP_ci = zeros(length(x),2);
CP_ci(:,2) = 1;
%Compute Wilson Score Intervals
% phat = x./n;
% z=sqrt(2).*erfcinv(alpha);
% den=1+(z^2./n);
% xc=(phat+(z^2)./(2*n))./den;
% halfwidth=(z*sqrt((phat.*(1-phat)./n)+(z^2./(4*(n.^2)))))./den;
% wsi=[xc(:) xc(:)]+[-halfwidth(:) halfwidth(:)];
maskxpu = logical(x~=0 & x~=n);
maskx0 = logical(x==0);
maskxn = logical(x==n);
xpu = x(maskxpu);

CP_ci(maskxpu,2) = icdf('Beta',1-alpha/2,xpu+1,n-xpu);
CP_ci(maskxpu,1) = icdf('Beta',alpha/2,xpu,n-xpu+1);
CP_ci(maskx0,2) = 1-((alpha./2).^(1./n));
CP_ci(maskxn,1) = ((alpha./2).^(1./n));

CP_ci = CP_ci.*n;

end
 No newline at end of file
+2 −2
Original line number Diff line number Diff line
function counts = fastcounts(times,tedges)
%FASTCOUNTS Histogram of array of times in bins with tedges (use on RAM!)
%FASTCOUNTS Histogram of array of times in bins with tedges (use on CPU!)
%   For longish arrays it's much faster than hisctounts and uses only a
%   fraction of its memory; operating on sorted times are where it shines
%   fraction of its memory; operating on sorted times is where it shines
%   but it still is faster in  unsorted ones. 
%   The tedges can be non-uniformly spaced but the algorithm doesn't
%   rescale counts based on width: if you want that use histcounts
+7 −5
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ figure('Units','centimeters','Position', [0.75 0.75 19 20.5])
t = tiledlayout(3,1);
nexttile
loglog(F0,Y0)
title('Power spectrum (PS) up to Nyq. freq.')
title('Power spectrum (PS) up to the Nyquist frequency')
xlabel('Frequency [Hz]')
ylabel('Leahy-normalized power (no corr.)')

@@ -74,7 +74,7 @@ yme = mean(Y0(round(Hfreq*Tseg)+1:end));
loglog(Fqlmid,Yqlbin)
xlim([10 NyqFreq+1000])
ylim([yme*0.8 yme*1.2])
yline(yme,'--',['Avg. pow. for f > ',num2str(fix(Hfreq)),' Hz'])
yline(yme,'--',['Avg. power for f > ',num2str(fix(Hfreq)),' Hz'])
xline(Hfreq,':')
xlabel('Frequency [Hz]')
ylabel('Leahy-normalized power (no corr.)')
@@ -189,15 +189,17 @@ extrtot1 = N.*ptotk(p1_1,extrpoiss);
blindpoiss = N.*poisspdf((1:maxcount),mean(xtemp));
t = tiledlayout(2,1);
nexttile
semilogy(1:maxcount,extrtot1,1:maxcount,extrtot4,1:maxcount,extrtot8,1:maxcount,blindpoiss,1:maxcount,datadist,'+')
semilogy(1:maxcount,extrtot1,1:maxcount,extrtot4,1:maxcount,extrtot8,1:maxcount,blindpoiss,0:maxcount,[N*ptot0,datadist],'+')
legend({'1-pixel crosstalk','4-pixel crosstalk','8-pixel crosstalk','Pure Poissonian','Data distribution'},'Location','southwest')
text(0.8,0.870,['$\epsilon_{ct} = $',num2str(epscr)])
text(0.84,0.920,['$\epsilon_{\mathrm{ct}} = $ ',num2str(epscr,3)], 'Interpreter', 'latex', 'Units','normalized')
xlim([0 maxcount+1])
xlabel('Counts')
ylabel('Number of bins with x counts')
title('Extrapolated generalized Poisson distributions vs data')
babbo = nexttile;
semilogy(1:maxcount,abs(extrtot1-datadist)./datadist,'x',1:maxcount,abs(extrtot4-datadist)./datadist,'o',1:maxcount,abs(extrtot8-datadist)./datadist,'*',1:maxcount,abs(blindpoiss-datadist)./datadist,'square')
ylim([0.0001 2])
xlim([0 maxcount+1])
% legend('abs(extrtot1-datadist)/datadist','abs(extrtot4-datadist)/datadist','abs(extrtot8-datadist)/datadist')
yline([0.01 0.1 0.5 1.0],'--')
babbo.YGrid = 'on';

functions/wilson.m

0 → 100644
+67 −0
Original line number Diff line number Diff line
function [xc, wsi] = wilson(x,n,alpha)
%WILSON Centers and Wilson Score Intervals for binomial data.
%   XC = WILSON(X,N) Returns the center of the Wilson Score Interval for the
%   binomial distribution. X and N are scalars containing the number of
%   successes and the number of trials, respectively.  If X and N are vectors,
%   WILSON returns a vector of estimates whose I-th element is the parameter
%   estimate for X(I) and N(I).  A scalar value for X or N is expanded to the
%   same size as the other input.
%
%   [XC, WSI] = WILSON(X,N,ALPHA) gives the centers and 100(1-ALPHA) 
%   percent Score Intervals given the data. Each row of WSI contains
%   the lower and upper bounds for the corresponding element of XC.
%   By default, the optional parameter ALPHA = 0.05 corresponding to 95%
%   score interval.
%
%   See also BINOFIT. 

%   Mike Sheppard
%   MIT Lincoln Laboratory
%   michael.sheppard@ll.mit.edu
%   Original: 2-Jan-2011


%The error catching is the same as BINOFIT. 
%Included here, in slightly modified form, so the Statistics Toolbox is 
%not required for the Wilson Score Interval function. Only the error
%catching part of the code is used.
%--------------
if nargin < 3 
    alpha = 0.05;
end
% Initialize params to zero.
[row, col] = size(x);
if min(row,col) ~= 1
   error('WILSON:VectorRequired','First argument must be a vector.');
end
[r1,c1] = size(n);
if ~isscalar(n)
   if row ~= r1 || col ~= c1
      error('WILSON:InputSizeMismatch',...
            'The first two inputs must match in size.');
   end
end
if ~isfloat(x)
   x = double(x);
end
if any(n<0) || any(n~=round(n)) || any(isinf(n)) || any(x>n)
    error('WILSON:InvalidN',...
          'All N values must be non-negative integers at least as large as X.')
end
if any(x<0)
    error('WILSON:InvalidX','X must not be negative.')
end
%--------------



%Compute Wilson Score Intervals
phat = x./n;
z=sqrt(2).*erfcinv(alpha);
den=1+(z^2./n);
xc=(phat+(z^2)./(2*n))./den;
halfwidth=(z*sqrt((phat.*(1-phat)./n)+(z^2./(4*(n.^2)))))./den;
wsi=[xc(:) xc(:)]+[-halfwidth(:) halfwidth(:)];


end
 No newline at end of file