1 %>@ingroup datasettools
3 %>@brief Draws 1-D scatter plot with optional per-
class distributions
6 %> @param idx_fea What feature to use. Number points to 1 column in @c data.X
7 %> @param type_distr: controls drawing the distributions on top of the scores
8 %> @arg @c 0 no distributions are drawn
9 %> @arg @c 1 estimates
distribution by gaussian kernel estimation (each point contributes with a small gaussian bump)
10 %> @arg @c 2 histogram
distribution estimation - each bar in the histogram represents a quantile, therefore has same area, therefore has variable width. Looks cool
11 %> @param threshold Works only
for two classes. If specified, illustrates
"misclassified" regions of each
class
15 if ~exist('type_distr', 'var')
19 flag_threshold = nargin > 3 && ~isempty(threshold);
26 no_classes = size(pieces, 2);
28 idx_fea = idx_fea(1, 1);
31 irerror(sprintf('Attempt to plot feature %d, but dataset has only %d feature(s)!', idx_fea, data.nf));
35 % determines point range
39 mintemp = min(pieces(i).X(:, idx_fea(1)));
40 maxtemp = max(pieces(i).X(:, idx_fea(1)));
51 % First thing to draw is the threshold stuff: line and hachure
54 irwarning('Dataset does not have two classes, threshold will be ignored');
56 flag_right = mean(pieces(1).X(:, idx_fea)) < mean(pieces(2).X(:, idx_fea));
59 % Hachures to mark "misclassified area"
67 flag_right = ~flag_right;
71 plot([1, 1]*threshold, [0.5, 3], 'LineWidth',
scaled(3), 'LineStyle', '--', 'Color', [0, 0, 0]);
78 % Determines
distribution multiplication factor. Classes with less points
79 % will have a less high mountain. This gives a better visual feeling of the
80 % outcome of a bayesian classifier.
82 % I have dropped this because if there is a huge difference in number
83 % of points between classes, the one with less points will have a
84 % mountain which is too flat! Taking log() would make things more
85 % presentable however wrong.
88 distrmult = zeros(1, no_classes);
90 distrmult(i) = size(pieces(i).X, 1);
94 distrmult = log(distrmult);
96 distrmult = distrmult/max(distrmult);
98 distrmult = ones(1, no_classes);
105 y_offset = no_classes+1-i;
106 x = pieces(i).X(:, idx_fea(1));
109 % Just for the legend
110 hh(end+1) = plot(1e-10, 0, 'Color',
find_color(i), 'Marker',
find_marker(i), 'LineStyle', 'none', 'MarkerSize', 10*SCALE, 'LineWidth', scaled(2));
115 no_quants = ceil(sqrt(no_x))+1;
116 quants = linspace(0, 1, no_quants);
117 quants = quants(2:end);
120 y = y_offset*ones(no_x, 1); % zero 1-column vector
127 ydistr = distrmult(i)*ydistr/max(ydistr)*.75;
128 plot(xdistr, ydistr+y_offset+.05, 'Color', [1, 1, 1]*.4, 'LineWidth', scaled(2));
132 I = (1:length(xsorted))/length(xsorted);
134 % attempt 2 (good but uses the spline toolbox)
135 w = ones(1, length(I)); w([1, end]) = 300;
136 sp = spaps(I, xsorted, 1e-15, w, 1); % perfect!!!
137 t = fnval(sp, quants);
143 [hmult, idx] = min(tdiff);
147 % This logic here finds bar widths at both sides of the current point
161 wright = t(j+1)-t(j);
169 v1 = [t1, t2, t2, t1, t1];
176 v2 = [0, 0, h, h, 0]+y_offset+.05;
178 plot(v1, v2, 'Color', [1, 1, 1]*.4, 'LineWidth', scaled(2));
184 hh(end+1) = plot(x, y, 'Color',
find_color(i), 'Marker',
find_marker(i), 'LineStyle', 'none', 'MarkerSize', 10*SCALE, 'LineWidth', scaled(2));
186 plot(mean(x(:)), y(1), 'Color',
find_color(i), 'Marker',
find_marker(i), 'LineWidth', scaled(3), 'MarkerSize', 15*SCALE);
192 xmin = xmin-extent*k;
193 xmax = xmax+extent*k;
195 xmax = xmax+.001; % This is just an error prevention when there is no scatter at all.
198 set(gca(), 'XLim', [xmin, xmax]);
199 % xmaxabs = max(abs([xmin, xmax]));
200 % set(gca(), 'XLim', [-xmaxabs, xmaxabs]);
202 set(gca(), 'YTick', [])
203 set(gca(), 'YLim', [0.5 no_classes+1]);
205 feanames = data.get_fea_names(idx_fea);
function find_marker(in i)
function find_color(in i)
function data_get_legend(in data)
function data_draw_scatter_1d(in data, in idx_fea, in type_distr, in threshold)
function distribution(in x, in no_points, in range, in wid)
function data_split_classes(in data, in hierarchy)
function quantile_landmarks(in I, in par2, in t_range)
function draw_hachure(in position)