1 %> @brief Binary Decision Tree Classifier
3 %> @sa uip_clssr_tree.m, demo_clssr_tree.m
6 %> @c =
fsgt_infgain(). FSGT
object to obtain the feature to be used at each node.
8 %> =1 . Pruning type: 0 - no pruning; 1 - maximum number of levels; 2 - backward pruning; 3 - Quinan
's chi-squared test
10 %> =10 . Maximum number of levels. The number of nodes is 2^(numberOfLevels)-1
12 %> Chi-squared test threshold for @c pruningtype = 3. 0 - no pruning; 10 - heavy pruning
16 properties(SetAccess=protected)
17 %> Structure with the following fields:
30 function o = clssr_tree(o)
31 o.classtitle = 'Binary Decision Tree
';
35 % function s = get_description(o)
37 % if ~isempty(o.nodes)
38 % no_leaves = sum([o.nodes.flag_leaf]);
40 % s = [get_description@clssr(o), '; number of leaves:
', int2str(no_leaves)];
45 % Not sure, as I have so many features. Am I really going to grow this shit till the end???
51 function s = get_treedescription(o)
54 s = o.get_treedescription_(ptr, nind);
59 methods(Access=protected)
60 function s = get_treedescription_(o, ptr, nind)
64 [vv, ii] = max(node.probs);
65 s = [s, 10, 32*ones(1, nind*4), sprintf('class %s (%d: %g%%)
', o.classlabels{ii}, node.occur0(ii), vv*100)];
67 s = [s, 10, 32*ones(1, nind*4), sprintf('if fea%d <= %g
', node.feaidx, node.threshold)];
68 s = [s, o.get_treedescription_(node.idx_left, nind+1)];
69 s = [s, 10, 32*ones(1, nind*4), 'else'];
70 s = [s, o.get_treedescription_(node.idx_right, nind+1)];
75 function [out, idxptr, no_leaves] = make_tree(o, X, classes, nc, idxptr, no_leaves, level)
76 node = struct('feaidx
', {[]}, 'probs
', {[]}, 'flag_leaf
', {0}, 'idx_left
', {[]}, 'idx_right
', {[]}, 'occur
', {[]}, 'occur0
', {[]}, 'threshold
', {0});
79 % if numel(classes) == 0
80 % disp('Viche nego, fudeu alguma coisa
');
84 flag_leaf = all(classes == classes(1)) || ... % All observations have same class
85 (o.pruningtype == 1 && level >= o.no_levels_max);
86 % (o.pruningtype == 1 && no_leaves >= o.maxleaves-1); % Reached maximum number of leaves
90 idxs = 1:size(X, 2); % This is gonna change at some point
93 [grades, idx, threshold] = o.fsgt.test(X, classes);
97 % elseif o.pruningtype == 1 && idxptr > o.maxleaves
98 elseif o.pruningtype == 2
99 % hypothesis test criterion
100 irerror('Hypothesis test criterion not implemented yet!
');
101 elseif o.pruningtype == 3
102 chi2 = o.quinlantest(X(:, idx), classes, threshold);
103 if chi2 <= o.chi2threshold
109 % Splits datasets for the branches
110 ii1 = X(:, idxs(idx)) <= threshold;
111 ii2 = X(:, idxs(idx)) > threshold;
113 if sum(ii1) == 0 || sum(ii2) == 0
114 % For some reason, the FSGT test may yield a threshold that completely isolates the points into one side ...
115 % This is happening very rarely
119 node.threshold = threshold;
120 node.feaidx = idxs(idx);
121 [node.probs, node.occur0] = get_probs(classes, nc); % Gets per-class probabilities based on number of occurences for each class
124 node.idx_left = idxptr;
125 [tree_left, idxptr, no_leaves] = o.make_tree(X(ii1, :), classes(ii1), nc, idxptr, no_leaves, level+1); % Left branch
127 node.idx_right = idxptr;
128 [tree_right, idxptr, no_leaves] = o.make_tree(X(ii2, :), classes(ii2), nc, idxptr, no_leaves, level+1); % Right branch
130 out = [node, tree_left, tree_right];
136 [node.probs, node.occur0] = get_probs(classes, nc);
140 no_leaves = no_leaves+1;
145 %> Quinlan's Chi-square test
for early stopping
148 %> Computes the Chi-square test described by Quinlan [1].
150 %> [1] J.R. Quinlan, Simplifying Decision Trees,
151 %> Int. J. Man - Machine Studies, vol. 27, 1987, pp. 221-234.
155 %> Guido te Brake, TWI/SSOR, TU Delft.
156 %> Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl
157 %> Faculty of Applied Physics, Delft University of Technology
158 %> P.O. Box 5046, 2600 GA Delft, The Netherlands
160 function xi2 = quinlantest(o, Xj, classes, threshold)
163 L = sum(ELAB(Xj <= threshold,:),1) + 0.001;
164 R = sum(ELAB(Xj > threshold,:),1) + 0.001;
165 LL = (L+R) * sum(L) / m;
166 RR = (L+R) * sum(R) / m;
167 xi2 = sum(((L-LL).^2)./LL + ((R-RR).^2)./RR);
173 function o = do_train(o, data)
174 o.classlabels = data.classlabels;
176 [o.nodes, dummy1, dummy2] = o.make_tree(data.X, data.classes, data.nc, 1, 0, 1);
181 %> Number of occurences per class are registered at each node
182 function est = do_use(o, data)
184 est.classlabels = o.classlabels;
185 est = est.copy_from_data(data);
188 est.X = zeros(data.no, numel(o.classlabels));
193 flag_record_occurences = ~isempty(boolclasses);
194 obsnodes = ones(data.no, 1);
195 for i = 1:numel(o.nodes)
196 ii = find(obsnodes == i);
197 if flag_record_occurences
198 o.nodes(i).occur = sum(boolclasses(ii,:));
200 if ~o.nodes(i).flag_leaf
201 ii_left = ii(data.X(ii, o.nodes(i).feaidx) <= o.nodes(i).threshold);
202 ii_right = ii(data.X(ii,o.nodes(i).feaidx) > o.nodes(i).threshold);
203 obsnodes(ii_left) = o.nodes(i).idx_left*ones(1,length(ii_left));
204 obsnodes(ii_right) = o.nodes(i).idx_right*ones(1,length(ii_right));
206 obsnodes(ii) = inf * ones(1,length(ii));
207 est.X(ii, :) = repmat(o.nodes(i).probs, numel(ii), 1);
218 function s = do_get_report(o)
225 function o = do_draw_domain(o, params)
227 do_draw_domain@
clssr(o, params);
230 o.draw_lines(1, params.x_range, params.y_range);
234 function o = draw_lines(o, nodeidx, x_range, y_range)
235 node = o.nodes(nodeidx);
238 xx = [1, 1]*node.threshold;
240 o.draw_lines(node.idx_left, [x_range(1), node.threshold], y_range);
241 o.draw_lines(node.idx_right, [node.threshold, x_range(2)], y_range);
244 yy = [1, 1]*node.threshold;
245 o.draw_lines(node.idx_left, x_range, [y_range(1), node.threshold]);
246 o.draw_lines(node.idx_right, x_range, [node.threshold, y_range(2)]);
248 plot3(xx, yy, [2, 2], 'k', 'LineWidth', 2);
function classes2boolean(in classes, in no_different)
Binary Decision Tree Classifier.
Dataset representing estimation.
function renumber_classes(in classes_orig, in classlabels_orig, in classlabels_ref)
function get_matlab_output(in o, in flag_cell)