IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
reptt_bag.m
Go to the documentation of this file.
1 %> @brief REPTT for bagging classifiers.
2 %>
3 %> Implements a nested loop: external is just a repetition ("bagging repetitions"); internal is a cross-validation loop.
4 %>
5 %> The particularity of a bagging train-test session is that each iteration of the bagging loop will do exactly the
6 %> same thing: train the classifiers with exactly the same data at each iteration
7 %>
8 %>
9 %> <h3>Usage</h3>
10 %> Calling go() will build the @ref blocks and @ref logs properties.
11 %>
12 %> The @ref blocks property will be a [no_datasets]x[no_blocks] matrix, where no_datasets is the number of sub-samples generated by the SGS;
13 %> @ref no_blocks is the number of elements of the @ref block_mold property. It is expected that the blocks are
14 %> classifiers (@ref clssr).
15 %>
16 %> The @ref reptt_bag::sgs property defines a number of sub-datasets. To each row of the @ref blocks property
17 %> corresponds one dataset, i.e., at every "bagging repetition" (bag_rep()), all the classifiers in the row will be
18 %> re-trained on their corresponding dataset. This is why this class is designed for bagging classifiers (@ref
19 %> aggr_bag). Internally, such classifier will draw sub-datasets (sub-sub-datasets if you consider that each training
20 %> dataset is already a sub-dataset to train component classifiers.
21 %>
22 %> The @ref logs property is a 3D matrix, [no_logs]x[no_blocks]x[@ref no_bagreps]. Inspecting the logs individually has
23 %> little practical meaning.
24 %>
25 %> The "results" are obtained throught extract_curves() which generates one dataset per log in the @ref log_mold
26 %> property. The dataset x-axis correspond to "bagging repetitions" and the values are classification rates. Only one
27 %> row will be generated per classifier in the @ref block_mold property. It is expected that the classification rate
28 %> will rise with the bagging repetitions.
29 %>
30 %> Another "results" options is using extract_as_dsperc_x_rate()
31 %>
32 %> @todo temporarily deactivated.
33 %>
34 %> @sa uip_reptt_bag.m, demo_reptt_bag.m
35 classdef reptt_bag < reptt
36  properties
37  sgs;
38  %> Number of bagging repetitions.
39  no_bagreps = 100;
40  end;
41 
42  properties(SetAccess=protected)
43  obsidxs;
44  datasets;
45  i_bagrep;
46  %> Deserves a property because in this way, dataset may be cleaned, and this property still remains
47  no_datasets;
48  end;
49 
50  methods
51  function o = reptt_bag()
52  o.classtitle = 'Bagging';
53  o.moreactions = {'go', 'extract_logs', 'extract_curves', 'extract_log_celldata'};
54  o.flag_ui = 0;
55  end;
56  end;
57 
58 %{
59  methods(Access=protected)
60  %> Allocates cell of blocks (no_datasets)X(no_blocks), where no_datasets is the number of sub-samples generated by the
61  %> SGS, and no_blocks is the number of elements of the @c block_mold property.
62  %>
63  %> This function must be called BEFORE allocate_logs()
64  function o = allocate_blocks(o)
65  if ~iscell(o.block_mold)
66  mold = {o.block_mold};
67  else
68  mold = o.block_mold;
69  end;
70  no_blocks = numel(mold);
71 
72  % Checks if classifiers are from the right class
73  % This is really important, otherwise it is pointless.
74  for j = 1:no_blocks
75  cll = class(mold{j});
76  if ~strcmp(cll, 'aggr_bag')
77  irerror(sprintf('Element in block_mold is of class %s, must be an aggr_bag!', cll));
78  end;
79  end;
80 
81  o.blocks = cell(o.no_datasets, no_blocks);
82  for i = 1:o.no_datasets
83  for j = 1:no_blocks
84  o.blocks{i, j} = mold{j}.boot();
85  end;
86  end;
87  end;
88 
89 
90  %> Allocates cell of logs (no_logs)X(no_blocks)X(o.no_bagreps), each one allocated with no_datasets slots, where no_logs is the
91  %> number of elements in the @c log_mold property, and @c no_blocks is the number of elements in the @c
92  %> block_mold property.
93  %>
94  %> This method must be called AFTER allocate_blocks()
95  function o = allocate_logs(o)
96  nb = numel(o.block_mold);
97  if ~iscell(o.block_mold)
98  bmold = {o.block_mold};
99  else
100  bmold = o.block_mold;
101  end;
102 
103  if ~iscell(o.log_mold)
104  mold = {o.log_mold};
105  else
106  mold = o.log_mold;
107  end;
108  no_logs = numel(mold);
109  o.logs = cell(numel(mold), nb, o.no_bagreps);
110  for i = 1:no_logs
111  for j = 1:nb
112  for k = 1:o.no_bagreps
113  o.logs{i, j, k} = mold{i}.allocate(o.no_datasets);
114  o.logs{i, j, k}.title = ['From classifier ', bmold{j}.get_description()];
115  end;
116  end;
117  end;
118  end;
119  end;
120 
121  methods
122  function o = boot(o, data)
123  o = o.boot_postpr(); % from reptt
124 
125  o.obsidxs = o.sgs.get_obsidxs(data);
126  o.no_datasets = size(o.obsidxs, 1);
127 
128  o = o.allocate_blocks();
129 
130  o.datasets = data.split_map(o.obsidxs(:, 1:2)); % I think I put this function here to let allocate_blocks() give an error (if it will) before doing something time-consuming
131 
132  o = o.allocate_logs();
133 
134  o.i_bagrep = 0;
135  o.flag_booted = 1;
136  end;
137 
138  function o = assert_booted(o)
139  if ~o.flag_booted
140  o = o.boot();
141  end;
142 % if ~o.flag_booted
143 % irerror('Must call boot() first!');
144 % end;
145  end;
146 
147 
148  %> This function just calls @c do_bagrep() @c no_bagreps times
149  function o = go(o)
150  o = o.boot();
151 
152  for i = 1:o.no_bagreps
153  o = o.do_bagrep();
154  end;
155  end;
156 
157 
158  function o = do_bagrep(o)
159  o = o.assert_booted();
160 
161  o.i_bagrep = o.i_bagrep+1;
162 
163  [nl, nb, nbr] = size(o.logs);
164  if o.i_bagrep > nbr
165  irerror('Number of bagging repetitions exceded');
166  end;
167 
168  ipro = progress2_open('REPTT_BAG', [], 0, o.no_datasets);
169  for i_rep = 1:o.no_datasets
170  for i = 1:nb
171  bl = o.blocks{i_rep, i};
172  bl = bl.train(o.datasets(i_rep, 1));
173  o.blocks{i_rep, i} = bl;
174  est = bl.use(o.datasets(i_rep, 2));
175 
176  if ~isempty(o.postpr_est)
177  est = o.postpr_est.use(est);
178  end;
179  if isempty(est.classes)
180  irerror('Estimation post-processing did not assign classes!');
181  end;
182 
183 
184  if ~isempty(o.postpr_test)
185  ds_test = o.postpr_test.use(o.datasets(i_rep, 2));
186  else
187  ds_test = o.datasets(i_rep, 2);
188  end;
189 
190  pars = struct('est', {est}, 'ds_test', {ds_test}, 'clssr', {bl});
191  for j = 1:nl
192  o.logs{j, i, o.i_bagrep} = o.logs{j, i, o.i_bagrep}.record(pars);
193  end;
194  end;
195  ipro = progress2_change(ipro, [], [], i_rep);
196  end;
197  progress2_close(ipro);
198  end;
199 
200 
201  %> Generates as many datasets as there are elements in the @c log_mold property.
202  %>
203  %> Each dataset will have one curve per element in the @ref block_mold property X per sub-dateset. Each element in @ref block_mold
204  %> gives a different class. Each element in @ref log_mold gives a different dataset.
205  function out = extract_curves(o)
206  [no_logs, no_blocks, nbr] = size(o.logs);
207 
208  out = cell(1, no_logs);
209  for l = 1:no_logs
210  % Determines how many rows the dataset will have, for pre-allocation
211  no = no_blocks*o.no_datasets;
212  X = zeros(no, nbr);
213  classes = zeros(no, 1);
214 
215  for i = 1:no_blocks
216  for j = 1:nbr
217  X((i-1)*o.no_datasets+1:i*o.no_datasets, j) = o.logs{l, i, j}.get_rates();
218  classes((i-1)*o.no_datasets+1:i*o.no_datasets, 1) = i-1;
219  end;
220  end;
221 
222  blocktitles = cell(1, no_blocks);
223  for i = 1:no_blocks
224  blocktitles{i} = o.blocks{1, i}.get_description();
225  end;
226  df = ['Derived from ', o.logs{l, 1, 1}.get_description()];
227 
228  % ... 1D rates dataset...
229  d = irdata();
230  d.fea_x = 1:nbr;
231  d.xname = 'Bagging repetitions'; %> @todo this needs to have other names
232  d.xunit = '';
233  d.yname = o.logs{l, 1, 1}.get_legend();
234  d.yunit = o.logs{l, 1, 1}.get_unit();
235  d.X = X;
236  d.classes = classes;
237  d.classlabels = blocktitles;
238  d.title = df;
239 
240  out{l} = d;
241  end;
242  end;
243  end;
244 %}
245 end
Base Sub-dataset Generation Specification (SGS) class.
Definition: sgs.m:6
function progress2_change(in prgrss, in title, in perc, in i, in n)
function progress2_open(in title, in perc, in i, in n)
function irerror(in s)
Dataset class.
Definition: irdata.m:30
REPTT for bagging classifiers.
Definition: reptt_bag.m:35
Pre-processing block base class.
Definition: pre.m:2
Bagging ensemble.
Definition: aggr_bag.m:9
Classifiers base class.
Definition: clssr.m:6
Property flag_ui
(GUI setting) Whether to "publish" in blockmenu and datatool. Note that a class can be "published" wi...
Definition: irobj.m:60
function progress2_close(in prgrss)
REPeated Train-Test.
Definition: reptt.m:8
Analysis Session (AS) base class.
Definition: as.m:6