IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
subsetsprocessor.m
Go to the documentation of this file.
1 %>@ingroup maths
2 %>@file
3 %
4 %>@brief Processor of a set of subsets of features
5 %>
6 %> @arg Histogram generation
7 %> @arg "grades" vector calculation from histogram
8 %>
9 %> <h3>Usage</h3>
10 %> Set the @ref subsets and @ref nf properties, and call go().
11 %>
12 %> Of course there is a lot of customization. @ref nf4gradesmode, @ref nf4grades, @ref staibilitythreshold, @ref weightmode, and @ref stabilitype are
13 %> all properties that affect the way in which @ref grades is calculated.
14 %>
15 %> @ref gradethreshold is applied after @ref grades has been calculated.
17  properties
18  %> Number of position-related histograms to add up to form the grades. If not specified, will use the maximum
19  nf4grades = [];
20 
21  %> ='fixed'.
22  %> @arg @c 'fixed' will apply the number specified by the @ref nf4grades property
23  %> @arg @c 'stability' will use a stability threshold (@ref stabilitythreshold property).
24  %> Per-position stabilities will be calculated according to @ref stabilitytype
25  nf4gradesmode = 'fixed';
26 
27  %> =0.05 (5%). Stability threshold specified as a percentage of maximum stability found.
28  stabilitythreshold = .05;
29 
30  %> ='uniform'. This is how the hits will be weighted in order to generate an overall histogram
31  %> @arg 'uniform' All hits will have weight 1
32  %> @arg 'stability' ....... kun' Hits will have weights depending on the univariate Kuncheva stability index of the selection position
33  %> (i.e., 1st feature to be selected, 2nd feature to be selected etc)
34  %>
35  %> I honestly think that 'uniform' is the best option to keep things simple.
36  weightmode = 'uniform';
37 
38  %> ='kun'. stability type to pass to the function ref featurestability.m
39  stabilitytype = 'kun';
40 
41  % Post-processing of the HISTOGRAMS AFTER generated
42 
43  %> =0. Minimum number of hits within the histogram matrix. All values will be trimmed to zero below that. It is expressed
44  %> as a fraction of the total number of hits of each position-wise histogram.
45  minhits_perc = 0;
46  end;
47 
48  methods
49  function o = subsetsprocessor()
50  o.classtitle = 'Feature subsets processor';
51  o.inputclass = 'log_fselrepeater';
52  end;
53  end;
54 
55 
56  % Calculations
57  methods(Access=protected)
58  function log = do_use(o, input)
59  log = log_hist();
60  log.hitss = o.get_hitss(input);
61 
62  n = o.get_nf4grades(input);
63  log.nf4grades = n;
64 
65  log.grades = sum(log.hitss(1:n, :), 1);
66  log.xname = input.xname;
67  log.xunit = input.xunit;
68  log.fea_x = input.fea_x;
69  log.yname = 'Hits';
70  log.yunit = '';
71  end;
72 
73  end;
74 
75  methods
76  function n = get_nf4grades(o, input)
77  switch o.nf4gradesmode
78  case 'fixed'
79  n = o.get_nf_select(input);
80  if isempty(o.nf4grades)
81  n = min(o.nf4grades, n);
82  end;
83  case 'stability'
84  w = o.get_stabilities(input);
85  wmax = max(w);
86  ii = find(w/wmax < o.stabilitythreshold);
87  if ~isempty(ii)
88  n = ii(1)-1;
89  else
90  n = o.get_nf_select(input);
91  end;
92 
93  otherwise
94  irerror(sprintf('nf4gradesmode "%s" invalid', o.nf4gradesmode));
95  end;
96  end;
97 
98  %> Calculates the number of features to be selected as the maximum subset size
99  function n = get_nf_select(o, input)
100  n = max(cellfun(@numel, input.subsets));
101  end;
102 
103 
104  %> Returns the "Hit Weights".
105  %>
106  %> Hit weights are used to give, when assembling the histograms, more importance to variables that are selected first
107  function w = get_positionweights(o, input)
108  nnf = o.get_nf_select(input);
109 
110  switch o.weightmode
111  case 'uniform'
112  w = ones(1, nnf);
113  case 'lin'
114  irerror('"lin" not implemented yet');
115  case 'exp'
116  irerror('"exp" not implemented yet');
117  case 'sig'
118  irerror('"sig" not implemented yet');
119  case 'stability'
120  w = o.get_stabilities(input);
121  end;
122  end;
123 
124 
125  %> Calculates histss from the subsets property.
126  function H = get_hitss(o, input)
127  subsets = input.subsets;
128  w = o.get_positionweights(input);
129 
130  H = zeros(o.get_nf_select(input), numel(input.fea_x));
131 
132 
133  nreps = numel(subsets);
134 
135  for i = 1:nreps
136  s = subsets{i};
137  for j = 1:numel(s)
138  H(j, s(j)) = H(j, s(j))+w(j);
139  end;
140  end;
141 
142  % Post-processing
143 
144  if o.minhits_perc > 0
145  ma = sum(H(1, :));
146  H(H < ma*o.minhits_perc) = 0;
147  end;
148  end;
149 
150  %> Returns a (feature position)x(stability curve)
151  %>
152  %> Calculates according to the @ref stabilitytype property
153  function z = get_stabilities(o, input)
154  z = input.get_stabilities(o.stabilitytype, 'uni');
155  end;
156  end;
157 end
Generated by fselrepeater, carries subsets of features.
Other blocks.
Definition: blbl.m:3
Processor of a set of subsets of features.
Analysis Session (AS) base class.
Definition: as.m:6