IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
clssr_tree.m
Go to the documentation of this file.
1 %> @brief Binary Decision Tree Classifier
2 %>
3 %> @sa uip_clssr_tree.m, demo_clssr_tree.m
4 classdef clssr_tree < clssr
5  properties
6  %> @c =fsgt_infgain(). FSGT object to obtain the feature to be used at each node.
7  fsgt = fsgt_infgain();
8  %> =1 . Pruning type: 0 - no pruning; 1 - maximum number of levels; 2 - backward pruning; 3 - Quinan's chi-squared test
9  pruningtype = 1;
10  %> =10 . Maximum number of levels. The number of nodes is 2^(numberOfLevels)-1
11  no_levels_max = 5;
12  %> Chi-squared test threshold for @c pruningtype = 3. 0 - no pruning; 10 - heavy pruning
13  chi2threshold = 3;
14  end;
15 
16  properties(SetAccess=protected)
17  %> Structure with the following fields:
18  %> @arg feaidx
19  %> @arg probs
20  %> @arg flag_leaf
21  %> @arg idx_left
22  %> @arg idx_right
23  %> @arg occur
24  %> @arg occur0
25  %> @arg threshold
26  nodes;
27  end;
28 
29  methods
30  function o = clssr_tree(o)
31  o.classtitle = 'Binary Decision Tree';
32  o.short = 'BTree';
33  end;
34 
35 % function s = get_description(o)
36 % no_leaves = 0;
37 % if ~isempty(o.nodes)
38 % no_leaves = sum([o.nodes.flag_leaf]);
39 % end;
40 % s = [get_description@clssr(o), '; number of leaves: ', int2str(no_leaves)];
41 % end;
42 
43 
44  function o = prune(o)
45  % Not sure, as I have so many features. Am I really going to grow this shit till the end???
46  end;
47 
48 
49 
50 
51  function s = get_treedescription(o)
52  nind = 0;
53  ptr = 1;
54  s = o.get_treedescription_(ptr, nind);
55  end;
56 
57  end;
58 
59  methods(Access=protected)
60  function s = get_treedescription_(o, ptr, nind)
61  s = '';
62  node = o.nodes(ptr);
63  if node.flag_leaf
64  [vv, ii] = max(node.probs);
65  s = [s, 10, 32*ones(1, nind*4), sprintf('class %s (%d: %g%%)', o.classlabels{ii}, node.occur0(ii), vv*100)];
66  else
67  s = [s, 10, 32*ones(1, nind*4), sprintf('if fea%d <= %g', node.feaidx, node.threshold)];
68  s = [s, o.get_treedescription_(node.idx_left, nind+1)];
69  s = [s, 10, 32*ones(1, nind*4), 'else'];
70  s = [s, o.get_treedescription_(node.idx_right, nind+1)];
71  end;
72  end;
73 
74 
75  function [out, idxptr, no_leaves] = make_tree(o, X, classes, nc, idxptr, no_leaves, level)
76  node = struct('feaidx', {[]}, 'probs', {[]}, 'flag_leaf', {0}, 'idx_left', {[]}, 'idx_right', {[]}, 'occur', {[]}, 'occur0', {[]}, 'threshold', {0});
77 
78 
79 % if numel(classes) == 0
80 % disp('Viche nego, fudeu alguma coisa');
81 % end;
82 
83 
84  flag_leaf = all(classes == classes(1)) || ... % All observations have same class
85  (o.pruningtype == 1 && level >= o.no_levels_max);
86 % (o.pruningtype == 1 && no_leaves >= o.maxleaves-1); % Reached maximum number of leaves
87 
88 
89 
90  idxs = 1:size(X, 2); % This is gonna change at some point
91 
92  if ~flag_leaf
93  [grades, idx, threshold] = o.fsgt.test(X, classes);
94 
95  if isempty(threshold)
96  flag_leaf = 1;
97 % elseif o.pruningtype == 1 && idxptr > o.maxleaves
98  elseif o.pruningtype == 2
99  % hypothesis test criterion
100  irerror('Hypothesis test criterion not implemented yet!');
101  elseif o.pruningtype == 3
102  chi2 = o.quinlantest(X(:, idx), classes, threshold);
103  if chi2 <= o.chi2threshold
104  flag_leaf = 1;
105  end;
106  end
107 
108  if ~flag_leaf
109  % Splits datasets for the branches
110  ii1 = X(:, idxs(idx)) <= threshold;
111  ii2 = X(:, idxs(idx)) > threshold;
112 
113  if sum(ii1) == 0 || sum(ii2) == 0
114  % For some reason, the FSGT test may yield a threshold that completely isolates the points into one side ...
115  % This is happening very rarely
116  flag_leaf = 1;
117  else
118  node.flag_leaf = 0;
119  node.threshold = threshold;
120  node.feaidx = idxs(idx);
121  [node.probs, node.occur0] = get_probs(classes, nc); % Gets per-class probabilities based on number of occurences for each class
122 
123  idxptr = idxptr+1;
124  node.idx_left = idxptr;
125  [tree_left, idxptr, no_leaves] = o.make_tree(X(ii1, :), classes(ii1), nc, idxptr, no_leaves, level+1); % Left branch
126 
127  node.idx_right = idxptr;
128  [tree_right, idxptr, no_leaves] = o.make_tree(X(ii2, :), classes(ii2), nc, idxptr, no_leaves, level+1); % Right branch
129 
130  out = [node, tree_left, tree_right];
131  end;
132  end;
133  end
134 
135  if flag_leaf
136  [node.probs, node.occur0] = get_probs(classes, nc);
137  node.flag_leaf = 1;
138  out = node;
139  idxptr = idxptr+1;
140  no_leaves = no_leaves+1;
141  end;
142  end;
143 
144 
145  %> Quinlan's Chi-square test for early stopping
146  %>
147  %>
148  %> Computes the Chi-square test described by Quinlan [1].
149  %>
150  %> [1] J.R. Quinlan, Simplifying Decision Trees,
151  %> Int. J. Man - Machine Studies, vol. 27, 1987, pp. 221-234.
152  %>
153  %> Credits:
154  %>
155  %> Guido te Brake, TWI/SSOR, TU Delft.
156  %> Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl
157  %> Faculty of Applied Physics, Delft University of Technology
158  %> P.O. Box 5046, 2600 GA Delft, The Netherlands
159 
160  function xi2 = quinlantest(o, Xj, classes, threshold)
161  m = numel(Xj);
162  ELAB = classes2boolean(classes);
163  L = sum(ELAB(Xj <= threshold,:),1) + 0.001;
164  R = sum(ELAB(Xj > threshold,:),1) + 0.001;
165  LL = (L+R) * sum(L) / m;
166  RR = (L+R) * sum(R) / m;
167  xi2 = sum(((L-LL).^2)./LL + ((R-RR).^2)./RR);
168  end;
169 
170 
171 
172 
173  function o = do_train(o, data)
174  o.classlabels = data.classlabels;
175  tic;
176  [o.nodes, dummy1, dummy2] = o.make_tree(data.X, data.classes, data.nc, 1, 0, 1);
177  o.time_train = toc;
178  end;
179 
180 
181  %> Number of occurences per class are registered at each node
182  function est = do_use(o, data)
183  est = estimato();
184  est.classlabels = o.classlabels;
185  est = est.copy_from_data(data);
186 
187  t = tic();
188  est.X = zeros(data.no, numel(o.classlabels));
189 
190 
191  classes = renumber_classes(data.classes, data.classlabels, o.classlabels);
192  boolclasses = classes2boolean(classes, numel(o.classlabels));
193  flag_record_occurences = ~isempty(boolclasses);
194  obsnodes = ones(data.no, 1);
195  for i = 1:numel(o.nodes)
196  ii = find(obsnodes == i);
197  if flag_record_occurences
198  o.nodes(i).occur = sum(boolclasses(ii,:));
199  end
200  if ~o.nodes(i).flag_leaf
201  ii_left = ii(data.X(ii, o.nodes(i).feaidx) <= o.nodes(i).threshold);
202  ii_right = ii(data.X(ii,o.nodes(i).feaidx) > o.nodes(i).threshold);
203  obsnodes(ii_left) = o.nodes(i).idx_left*ones(1,length(ii_left));
204  obsnodes(ii_right) = o.nodes(i).idx_right*ones(1,length(ii_right));
205  else
206  obsnodes(ii) = inf * ones(1,length(ii));
207  est.X(ii, :) = repmat(o.nodes(i).probs, numel(ii), 1);
208  end;
209  end;
210 
211  o.time_use = toc(t);
212  end;
213 
214 
215 
216 
217  %>
218  function s = do_get_report(o)
219  s = [get_matlab_output(o), 10, o.get_treedescription()];
220  end;
221 
222 
223 
224 
225  function o = do_draw_domain(o, params)
226 
227  do_draw_domain@clssr(o, params);
228 
229  if ~isempty(o.nodes)
230  o.draw_lines(1, params.x_range, params.y_range);
231  end;
232  end;
233 
234  function o = draw_lines(o, nodeidx, x_range, y_range)
235  node = o.nodes(nodeidx);
236  if ~node.flag_leaf
237  if node.feaidx == 1
238  xx = [1, 1]*node.threshold;
239  yy = y_range;
240  o.draw_lines(node.idx_left, [x_range(1), node.threshold], y_range);
241  o.draw_lines(node.idx_right, [node.threshold, x_range(2)], y_range);
242  else
243  xx = x_range;
244  yy = [1, 1]*node.threshold;
245  o.draw_lines(node.idx_left, x_range, [y_range(1), node.threshold]);
246  o.draw_lines(node.idx_right, x_range, [node.threshold, y_range(2)]);
247  end;
248  plot3(xx, yy, [2, 2], 'k', 'LineWidth', 2);
249  end;
250  end;
251 
252  end;
253 end
function classes2boolean(in classes, in no_different)
Binary Decision Tree Classifier.
Definition: clssr_tree.m:4
Classifiers base class.
Definition: clssr.m:6
Dataset representing estimation.
Definition: estimato.m:4
function renumber_classes(in classes_orig, in classlabels_orig, in classlabels_ref)
function get_matlab_output(in o, in flag_cell)
Definition: fsgt.m:5