IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
data_draw_scatter_1d.m
Go to the documentation of this file.
1 %>@ingroup datasettools
2 %>@file
3 %>@brief Draws 1-D scatter plot with optional per-class distributions
4 
5 %> @param data dataset
6 %> @param idx_fea What feature to use. Number points to 1 column in @c data.X
7 %> @param type_distr: controls drawing the distributions on top of the scores
8 %> @arg @c 0 no distributions are drawn
9 %> @arg @c 1 estimates distribution by gaussian kernel estimation (each point contributes with a small gaussian bump)
10 %> @arg @c 2 histogram distribution estimation - each bar in the histogram represents a quantile, therefore has same area, therefore has variable width. Looks cool
11 %> @param threshold Works only for two classes. If specified, illustrates "misclassified" regions of each class
12 function data_draw_scatter_1d(data, idx_fea, type_distr, threshold)
13 global SCALE;
14 
15 if ~exist('type_distr', 'var')
16  type_distr = 1;
17 end;
18 
19 flag_threshold = nargin > 3 && ~isempty(threshold);
20 
21 if data.nc < 1
22  pieces = data;
23 else
24  pieces = data_split_classes(data);
25 end;
26 no_classes = size(pieces, 2);
27 
28 idx_fea = idx_fea(1, 1);
29 
30 if idx_fea > data.nf
31  irerror(sprintf('Attempt to plot feature %d, but dataset has only %d feature(s)!', idx_fea, data.nf));
32 end;
33 
34 
35 % determines point range
36 xmin = +Inf;
37 xmax = -Inf;
38 for i = 1:no_classes
39  mintemp = min(pieces(i).X(:, idx_fea(1)));
40  maxtemp = max(pieces(i).X(:, idx_fea(1)));
41 
42  if mintemp < xmin
43  xmin = mintemp;
44  end;
45  if maxtemp > xmax
46  xmax = maxtemp;
47  end;
48 end;
49 
50 
51 % First thing to draw is the threshold stuff: line and hachure
52 if flag_threshold
53  if data.nc ~= 2
54  irwarning('Dataset does not have two classes, threshold will be ignored');
55  else
56  flag_right = mean(pieces(1).X(:, idx_fea)) < mean(pieces(2).X(:, idx_fea));
57  for i = 1:2
58 
59  % Hachures to mark "misclassified area"
60  if flag_right
61  draw_hachure([threshold, 3-i-0.1, xmax-threshold, 0.9]);
62  else
63  draw_hachure([xmin, 3-i-0.1, threshold-xmin, 0.9]);
64  end;
65  hold on;
66 
67  flag_right = ~flag_right;
68  end;
69 
70  % Vertical line
71  plot([1, 1]*threshold, [0.5, 3], 'LineWidth', scaled(3), 'LineStyle', '--', 'Color', [0, 0, 0]);
72  hold on;
73  end;
74 end;
75 
76 switch type_distr
77  case 1
78  % Determines distribution multiplication factor. Classes with less points
79  % will have a less high mountain. This gives a better visual feeling of the
80  % outcome of a bayesian classifier.
81 
82  % I have dropped this because if there is a huge difference in number
83  % of points between classes, the one with less points will have a
84  % mountain which is too flat! Taking log() would make things more
85  % presentable however wrong.
86 
87  if 0
88  distrmult = zeros(1, no_classes);
89  for i = 1:no_classes
90  distrmult(i) = size(pieces(i).X, 1);
91  end;
92 
93  if 0
94  distrmult = log(distrmult);
95  end;
96  distrmult = distrmult/max(distrmult);
97  else
98  distrmult = ones(1, no_classes);
99  end;
100  case 2
101 end;
102 
103 hh = [];
104 for i = 1:no_classes
105  y_offset = no_classes+1-i;
106  x = pieces(i).X(:, idx_fea(1));
107  no_x = size(x, 1);
108  if no_x <= 0
109  % Just for the legend
110  hh(end+1) = plot(1e-10, 0, 'Color', find_color(i), 'Marker', find_marker(i), 'LineStyle', 'none', 'MarkerSize', 10*SCALE, 'LineWidth', scaled(2));
111  hold on;
112  continue;
113  end;
114 
115  no_quants = ceil(sqrt(no_x))+1;
116  quants = linspace(0, 1, no_quants);
117  quants = quants(2:end);
118 
119 
120  y = y_offset*ones(no_x, 1); % zero 1-column vector
121 
122 
123  % Histogram
124  switch type_distr
125  case 1
126  [xdistr, ydistr] = distribution(x, 200);
127  ydistr = distrmult(i)*ydistr/max(ydistr)*.75;
128  plot(xdistr, ydistr+y_offset+.05, 'Color', [1, 1, 1]*.4, 'LineWidth', scaled(2));
129  hold on;
130  case 2
131  xsorted = sort(x);
132  I = (1:length(xsorted))/length(xsorted);
133 
134  % attempt 2 (good but uses the spline toolbox)
135  w = ones(1, length(I)); w([1, end]) = 300;
136  sp = spaps(I, xsorted, 1e-15, w, 1); % perfect!!!
137  t = fnval(sp, quants);
138  t = [xsorted(1) t];
139 
140 
141  % t = quantile_landmarks(integrate(x), no_quants, [min(x), max(x)]);
142  tdiff = diff(t);
143  [hmult, idx] = min(tdiff);
144 
145  for j = 1:no_quants
146 
147  % This logic here finds bar widths at both sides of the current point
148  for k = 1:2
149  if j == 1
150  if k == 2
151  wleft = wright;
152  end;
153  else
154  wleft = t(j)-t(j-1);
155  end;
156 
157  if k == 1
158  if j == no_quants
159  wright = wleft;
160  else
161  wright = t(j+1)-t(j);
162  end;
163  end;
164  end;
165 
166 
167  t1 = t(j)-wleft/2;
168  t2 = t(j)+wright/2;
169  v1 = [t1, t2, t2, t1, t1];
170  if t2-t1 > 0
171  h = 1/(t2-t1)*hmult;
172  else
173  h = 1;
174  end;
175  h = h*.75;
176  v2 = [0, 0, h, h, 0]+y_offset+.05;
177 
178  plot(v1, v2, 'Color', [1, 1, 1]*.4, 'LineWidth', scaled(2));
179  hold on;
180  end;
181  end;
182 
183 
184  hh(end+1) = plot(x, y, 'Color', find_color(i), 'Marker', find_marker(i), 'LineStyle', 'none', 'MarkerSize', 10*SCALE, 'LineWidth', scaled(2));
185  hold on;
186  plot(mean(x(:)), y(1), 'Color', find_color(i), 'Marker', find_marker(i), 'LineWidth', scaled(3), 'MarkerSize', 15*SCALE);
187 end
188 
189 
190 extent = xmax-xmin;
191 k = 0.05;
192 xmin = xmin-extent*k;
193 xmax = xmax+extent*k;
194 if xmin == xmax
195  xmax = xmax+.001; % This is just an error prevention when there is no scatter at all.
196  xmin = xmin-.001;
197 end;
198 set(gca(), 'XLim', [xmin, xmax]);
199 % xmaxabs = max(abs([xmin, xmax]));
200 % set(gca(), 'XLim', [-xmaxabs, xmaxabs]);
201 
202 set(gca(), 'YTick', [])
203 set(gca(), 'YLim', [0.5 no_classes+1]);
204 
205 feanames = data.get_fea_names(idx_fea);
206 xlabel(feanames{1});
207 
208 legend(hh, data_get_legend(data));
209 format_frank([], SCALE);
function find_marker(in i)
function find_color(in i)
function irerror(in s)
function data_get_legend(in data)
function data_draw_scatter_1d(in data, in idx_fea, in type_distr, in threshold)
function distribution(in x, in no_points, in range, in wid)
function scaled(in i)
function data_split_classes(in data, in hierarchy)
function format_frank(in F, in scale, in handles)
function integrate(in X)
function quantile_landmarks(in I, in par2, in t_range)
function draw_hachure(in position)
function irwarning(in s)