IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
weightedsubsampling.m
Go to the documentation of this file.
1 %>@ingroup maths
2 %>@file
3 %>@brief Weighted subsampling
4 %>
5 %> Does so by integrating the weights, then approximate "soft" (closest match) binary search. Selected items are marked and garbage collection takes place once in a while, then integrates again etc.
6 %
7 %> @param no_sel Number of observations to select
8 %> @param weights Observation weights
9 function idxs = weightedsubsampling(no_sel, weights)
10 
11 no_weights = numel(weights);
12 
13 % These variables will have items removed at "garbage collection"
14 weirem = weights; % Remaining weights
15 map = 1:no_weights; % Original indexes or remaining weights
16 weiyi = cumsum(weirem); % Accumulated sum of remaining weights
17 flag_gone = zeros(1, no_weights);
18 
19 
20 maiyi = weiyi(end); % End of cumulative sum of weights to scale random number generation
21 wei_gone = 0;
22 wei_gone_threshold = maiyi/2;
23 
24 
25 % it_no_gone_threshold = max(100, numel(weiyi)/3);
26 
27 idxs = zeros(1, no_sel);
28 
29 i = 1;
30 % it_no_gone = 0;
31 while 1
32  ii = bsearch(weiyi, rand()*maiyi, 1); % Monotonic inverse function lookup (finds x-value based on random y-value)
33 
34  if ~flag_gone(ii)
35  idxs(i) = map(ii);
36  i = i+1;
37  if i > no_sel
38  break;
39  end;
40 
41  flag_gone(ii) = 1;
42 % it_no_gone = it_no_gone+1;
43  wei_gone = wei_gone+weights(map(ii));
44 
45  if wei_gone >= wei_gone_threshold %it_no_gone >= it_no_gone_threshold
46  % Garbage collection is triggered when half the "total weight area" has been covered
47  % "total weight area" is defined as the sum of all the weights
48  weirem = weirem(~flag_gone);
49  map = map(~flag_gone);
50  weiyi = cumsum(weirem);
51  flag_gone = zeros(1, numel(weiyi));
52 
53  maiyi = weiyi(end);
54  wei_gone = 0;
55  wei_gone_threshold = maiyi/2;
56 % it_no_gone = 0;
57 % it_no_gone_threshold = max(100, numel(weiyi)/3);
58  end;
59  end;
60 
61 
62 
63 % % Takes item out of the box
64 % maiyi = maiyi-weights(map(ii));
65 % weiyi(ii+1:end) = weiyi(ii+1:end)-weights(map(ii));
66 % map(ii) = [];
67 % weiyi(ii) = [];
68 %
69 end;
70 
function weightedsubsampling(in no_sel, in weights)
Analysis Session (AS) base class.
Definition: as.m:6
function bsearch(in x, in var, in flag_bin)