IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
blmisc_rowsout_uni.m
Go to the documentation of this file.
1 %> @brief Univariate Outlier removal
2 %>
3 %> This block generates two datasets. The first one contains the inliers and the second one the outliers.
4 %> The inlier is derived at training stage. \c train() and \c use() don't need to be called with the same dataset, but
5 %> the datasets do need to have the same number of rows.
7  properties(SetAccess=protected)
8  distances = [];
9  hits = [];
10  edges = [];
11  end;
12 
13  properties
14  %> =1. Index of feature to consider
15  idx_fea = 1;
16  %> Number of bins for histogram calculation.
17  no_bins = 100;
18  %> [no_ranges][2] matrix. Univariate outlier removal is range-based. Descendants may have different ways to fill in this property.
19  ranges = [];
20  end;
21 
22  methods
23  function o = blmisc_rowsout_uni(o)
24  o.classtitle = 'Univariate';
25  end;
26  end;
27 
28  methods
29  %. @brief Returns the index of the first bin that already starts above the 50%
30  function z = get_idx_50plus(o, x)
31  I = integrate(x);
32  [val, idx] = find(I > I(end)/2);
33  z = idx(1);
34  end;
35 
36  function o = calculate_map(o, data)
37  o = o.calculate_ranges(data);
38  boolmap = ones(1, length(o.distances));
39  for i = 1:size(o.ranges, 1)
40  boolmap = boolmap & ~((o.distances >= o.ranges(i, 1)) & (o.distances < o.ranges(i, 2)));
41  end;
42  o.map = find(boolmap);
43  end;
44 
45  function o = calculate_distances(o, data)
46  if data.no > 0
47  o.distances = data.X(:, o.idx_fea)';
48  else
49  o.distances = [];
50  end;
51  end;
52 
53  %> Abstract.
54  function o = calculate_ranges(o, data)
55  end;
56 
57  %> @brief Calculates @c edges and @c hist properties. Dependant on calling @c calculate_distances() first.
58  function o = calculate_hits(o)
59  if isempty(o.distances)
60  o.hits = [];
61  o.edges = [];
62  else
63  maxdist = max(o.distances);
64  mindist = min(o.distances);
65  offset = (maxdist-mindist)*.00001;
66  o.edges = linspace(mindist-offset, maxdist+offset, o.no_bins+1);
67  o.hits = histc(o.distances, o.edges);
68  o.hits = o.hits(1:end-1); % For some reason histc always places a zero element at the end
69  end;
70  end;
71 
72  %> Draws hachures do signal ranges, and histogram
73  function o = draw_histogram(o)
74  o = o.calculate_hits();
75  if ~isempty(o.hits)
76 
77  % Replaces infinities in ranges
78  ra = o.ranges;
79  di = o.edges(2)-o.edges(1);
80  ra(ra == -Inf) = o.edges(1)-di;
81  ra(ra == +Inf) = o.edges(end)+di;
82 
83  x = sum([o.edges(1:end-1); o.edges(2:end)], 1)/2;
84  maxy = max(o.hits)*1.1;
85 
86  for i = 1:size(ra, 1)
87  if ra(i, 2) > ra(i, 1)
88  draw_hachure([ra(i, 1), 0, ra(i, 2)-ra(i, 1), maxy]);
89  else
90  % ignores in drawing
91  end;
92  hold on;
93  end;
94 
95  bar(x, o.hits, 'FaceColor', [.3, .3, 1]);
96  hold on;
97 
98  xlim([o.edges(1), o.edges(end)]);
99  ylim([0, maxy]);
100  format_frank();
101  end;
102  end;
103  end;
104 end
105 
Outlier Removal base class.
Base Block class.
Definition: block.m:2
Univariate Outlier removal.
function format_frank(in F, in scale, in handles)
function integrate(in X)
function draw_hachure(in position)