IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
clssr_ls.m
Go to the documentation of this file.
1 %> @brief Least-squares classifier
2 %>
3 %>
4 %>
5 classdef clssr_ls < clssr
6  properties
7  %> =1. Whether or not to use priors. If set, will calculate the prior class-conditional probabilities based on the amount of
8  %> training data for each class.
9  flag_use_priors = 1;
10  %> Whether to weight the observations (for unbalanced classes)
11  flag_weighted = 0;
12  %> []. Number of features. If specified, will find a threshold to trim the coefficients
13  nf_select = [];
14 % nregions_select = [];
15  end;
16 
17  properties(SetAccess=protected)
18  %> "Intercepts" (account for features whose mean is not zero
19  intercepts;
20  %> "Loadings matrix" [nf]x[nc]
21  L;
22  %> Recording of each stage of the feature reduction
23  S;
24 
25  end;
26 
27 
28  methods
29  function o = clssr_ls()
30  o.classtitle = 'Least-Squares';
31  o.short = 'LS';
32  o.flag_ui = 0;
33  end;
34 
35 % %> If title is not empty, will not mess with description too much
36 % function s = get_description(o)
37 % if ~isempty(o.title)
38 % s = get_description@clssr(o);
39 % else
40 % s = [get_description@clssr(o), ' type = ', o.type];
41 % end;
42 % end;
43  end;
44 
45 
46  methods(Static)
47 % %> Thresholds L_ to have maximum nf non-zero coefficients
48 % function [L_, threshs] = nfthreshold(L_, maxnf)
49 % [ro, co] = size(L_);
50 %
51 % if maxnf >= ro
52 % %> nf maximum is less restrictive than number of existing features
53 % threshs = zeros(1, co);
54 % return;
55 % end;
56 %
57 % for i = co:-1:1
58 %
59 % y_ = abs(L_(:, i));
60 % y = sort(y_);
61 %
62 % % Binary search
63 % ma = ro;
64 % mi = 1;
65 % while 1
66 % me = round(mean([ma, mi]));
67 % thresh = y(me);
68 % num = numel(find(y_ <= thresh));
69 % if num == ro-maxnf
70 % break;
71 % elseif num < ro-maxnf
72 % mi = me;
73 % else
74 % ma = me;
75 % end;
76 %
77 % if ma == mi
78 % break;
79 % end;
80 % end;
81 %
82 % L_(y_ <= thresh, i) = 0;
83 % threshs(i) = thresh;
84 % end;
85 % end;
86 
87  %> Returns weights that are inversely propoertional to the number of observations in each class
88  %>
89  %> Weights are scaled so that sum(w_c*N_c) = N,
90  %>
91  %> where w_c is the weight for class c, N_c is the number of observations in class c, and N is the total number of observations.
92  function w = get_weights(data)
93  ww = arrayfun(@(i) (sqrt(sum(data.classes == i))), 0:data.nc-1);
94 % ww = arrayfun(@(i) (sum(data.classes == i)), 0:data.nc-1);
95  w = (1./ww)*(data.no/sum(ww));
96  end;
97  end;
98 
99  methods(Access=protected)
100  %{
101  function o = do_train(o, data)
102  o.classlabels = data.classlabels;
103 
104  t = tic();
105 
106  X = [ones(data.no, 1), data.X];
107 
108  if o.flag_weighted
109  w = data.get_weights(1); % Calculates weights for the classes
110 
111  for i = 1:data.nc
112  X(data.classes == i-1, :) = X(data.classes == i-1, :)*w(i);
113  end;
114  end;
115 
116  Y = classes2boolean(data.classes);
117 
118  L0 = (X'*X)\(X'*Y); % Least-squares formula
119 
120 
121  % Separates the intercept and loadings; this is for interpretability, wouldn't be needed otherwise
122  o.L = L0(2:end, :);
123  o.intercepts = L0(1, :);
124 
125  if ~isempty(o.nf_select) && o.nf_select > 0
126  [o.L, o.thresholds] = o.nfthreshold(o.L, o.nf_select);
127  end;
128 
129 
130  o.time_train = toc(t);
131  end;
132 
133 
134 
135 
136 
137 
138 pars.x_range = [1, 6];
139 pars.y_range = [3, 8];
140 % % % function o = do_train(o, data)
141 % % % o.classlabels = data.classlabels;
142 % % %
143 % % % t = tic();
144 % % %
145 % % % X = [ones(data.no, 1), data.X];
146 % % %
147 % % % if o.flag_weighted
148 % % % w = data.get_weights(1); % Calculates weighblock_mold.block_mold.ts for the classes
149 % % %
150 % % % for i = 1:data.nc
151 % % % X(data.classes == i-1, :) pars.x_range = [1, 6];
152 pars.y_range = [3, 8];= X(data.clasblock_mold.ses == i-1, :)*w(i);
153 % % % end;
154 % % % end;
155 % % %
156 % % % Y = classes2boolean(data.classes);
157 % % %
158 % % % L0 = (X'*X)\(X'*Y); % Least-squares formula
159 % % %
160 % % %
161 % % % % Separates the intercept and loadings; this is for interpretability, wouldn't be needed otherwise
162 % % % o.L = L0(2:end, :);
163 % % % o.intercepts = L0(1, :);
164 % % %
165 % % % if ~isempty(o.nf_select) && o.nf_select > 0
166 % % % [o.L, o.thresholds] = o.nfthreshold(o.L, o.nf_select);
167 % % %
168 % % % o.v = o.L > 0; % This is 2-class but just for a while...
169 % % %
170 % % %
171 % % %
172 % % %
173 % % % X = [ones(data.no, 1), data.X(:, o.v(:, 1))];
174 % % %
175 % % % if o.flag_weighted
176 % % % w = data.get_weights(1); % Calculates weights for the classes
177 % % %
178 % % % for i = 1:data.nc
179 % % % X(data.classes == i-1, :) = X(data.classes == i-1, :)*w(i);
180 % % % end;
181 % % % end;
182 % % %
183 % % % Y = classes2boolean(data.classes);
184 % % %
185 % % % L0 = (X'*X)\(X'*Y); % Least-squares formula
186 % % %
187 % % %
188 % % % % Separates the intercept and loadings; this is for interpretability, wouldn't be needed otherwise
189 % % % o.L = L0(2:end, :);
190 % % % o.intercepts = L0(1, :);
191 % % %
192 % % %
193 % % %
194 % % %
195 % % %
196 % % %
197 % % %
198 % % %
199 % % %
200 % % %
201 % % %
202 % % %
203 % % %
204 % % % end;
205 % % %
206 % % %
207 % % % o.time_train = toc(t);
208 % % % end;
209 % % %
210 
211 %}
212 
213  function o = do_train(o, data)
214  o.classlabels = data.classlabels;
215 
216  o.time_train = 0;
217 
218  X0 = [ones(data.no, 1), data.X];
219  if o.flag_weighted
220  w = o.get_weights(data); % Calculates weights for the classes
221 % w =
222 % w = w*data.nc*100; %/min(w);
223 % w = w*100;
224 
225  for i = 1:data.nc
226  X0(data.classes == i-1, :) = X0(data.classes == i-1, :)*w(i);
227  end;
228  end;
229 
230  Y = classes2boolean(data.classes);
231 
232 
233  flag_reduce = ~isempty(o.nf_select);
234  if ~flag_reduce
235  % Much simpler stuff
236 
237  L0 = (X0'*X0)\(X0'*Y); % Least-squares formula
238  else
239 
240 
241 % nc = iif(data.nc == 2, 1, data.nc);
242  nc = data.nc;
243 
244  L0 = zeros(data.nf+1, data.nc);
245 
246  for ic = 1:nc
247 
248  vin = 1:data.nf; % Features selected
249 
250  Sc = zeros(data.nf-o.nf_select, data.nf);
251  Yc = Y(:, ic);
252 
253 
254  for i = data.nf:-1:o.nf_select
255  X = X0(:, [1, vin+1]);
256 
257  if i == o.nf_select
258  t = tic;
259  end;
260 
261 % tt = tic();
262  L1 = (X'*X)\(X'*Yc); % Least-squares formula
263 % fprintf('%d features took %.5f seconds\n', size(X, 2), toc(tt));
264 
265  if i == o.nf_select
266  tt = toc(t);
267  fprintf('%d fea, toc toc %.6f\n', i, tt);
268  o.time_train = o.time_train+tt;
269  end;
270 
271  temp = zeros(1, data.nf);
272  temp(vin) = abs(L1(2:end));
273  Sc(i-o.nf_select+1, :) = temp;
274 
275  if i == o.nf_select
276  break;
277  end;
278 
279 
280  [va, in] = min(abs(L1(2:end, 1)));
281  vin(in) = [];
282  end;
283 
284 
285  L0([1, vin+1], ic) = L1;
286  o.S(:, :, ic) = Sc;
287  end;
288  fprintf('---Time train was %.6f seconds\n', o.time_train());
289  end;
290 
291  o.L = L0(2:end, :);
292  o.intercepts = L0(1, :);
293 
294 
295  end;
296 
297 
298 %{
299  %> With bits from MATLAB classify()
300  function est = do_use(o, data)
301  est = estimato();
302  est.classlabels = o.classlabels;
303  est = est.copy_from_data(data);
304 
305  t = tic();
306 
307  X = data.X(:, o.v); % Temporary
308 
309 
310  X = [ones(data.no, 1), X];
311  posteriors = X*[o.intercepts; o.L];
312  posteriors = normalize_rows(posteriors);
313 
314  est.X = posteriors;
315  o.time_use = toc(t);
316  end;
317 %}
318 
319 
320  function est = do_use(o, data)
321  est = estimato();
322  est.classlabels = o.classlabels;
323  est = est.copy_from_data(data);
324 
325  t = tic();
326 
327  X = [ones(data.no, 1), data.X];
328  posteriors = X*[o.intercepts; o.L];
329 % posteriors = normalize_rows(posteriors);
330 % posteriors = irsoftmax(posteriors);
331 % posteriors = exp(exp(exp(exp(posteriors))));
332  posteriors = normalize_rows(exp(posteriors).^2);
333 % posteriors = normalize_rows(posteriors);
334 % posteriors = round(posteriors);
335 
336  est.X = posteriors;
337 % est.X = round(est.X);
338  o.time_use = toc(t);
339  end;
340 
341 
342 
343 
344  end;
345 
346 
347 end
function classes2boolean(in classes, in no_different)
function scaled(in i)
Classifiers base class.
Definition: clssr.m:6
Least-squares classifier.
Definition: clssr_ls.m:5