3 %>@brief Weighted subsampling
5 %> Does so by integrating the weights, then approximate
"soft" (closest match) binary search. Selected items are marked and garbage collection takes place once in a
while, then integrates again etc.
7 %> @param no_sel Number of observations to select
8 %> @param weights Observation weights
11 no_weights = numel(weights);
13 % These variables will have items removed at
"garbage collection"
14 weirem = weights; % Remaining weights
15 map = 1:no_weights; % Original indexes or remaining weights
16 weiyi = cumsum(weirem); % Accumulated sum of remaining weights
17 flag_gone = zeros(1, no_weights);
20 maiyi = weiyi(end); % End of cumulative sum of weights to scale random number generation
22 wei_gone_threshold = maiyi/2;
25 % it_no_gone_threshold = max(100, numel(weiyi)/3);
27 idxs = zeros(1, no_sel);
32 ii =
bsearch(weiyi, rand()*maiyi, 1); % Monotonic inverse
function lookup (finds x-value based on random y-value)
42 % it_no_gone = it_no_gone+1;
43 wei_gone = wei_gone+weights(map(ii));
45 if wei_gone >= wei_gone_threshold %it_no_gone >= it_no_gone_threshold
46 % Garbage collection is triggered when half the "total weight area" has been covered
47 % "total weight area" is defined
as the sum of all the weights
48 weirem = weirem(~flag_gone);
49 map = map(~flag_gone);
50 weiyi = cumsum(weirem);
51 flag_gone = zeros(1, numel(weiyi));
55 wei_gone_threshold = maiyi/2;
57 % it_no_gone_threshold = max(100, numel(weiyi)/3);
63 % % Takes item out of the box
64 % maiyi = maiyi-weights(map(ii));
65 % weiyi(ii+1:end) = weiyi(ii+1:end)-weights(map(ii));
function weightedsubsampling(in no_sel, in weights)
Analysis Session (AS) base class.
function bsearch(in x, in var, in flag_bin)