IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
normaliz.m
Go to the documentation of this file.
1 %>@ingroup maths
2 %> @file
3 %> @brief Normalization
4 %>
5 %> <table>
6 %> <tr><td>Direction</td><td>Type</td><td>@c type parameter</td><td>Description</td></tr>
7 %> <tr><td>row-wise</td><td>Max</td><td>@c v</td><td>"max" normalization across each row (if present, will be first task).
8 %> @c idxs_fea affects only this option</td></tr>
9 %> <tr><td>col-wise</td><td>Mean-centering</td><td>@c c</td><td>centers variables</td></tr>
10 %> <tr><td>row-wise</td><td>Vector</td><td>@c n</td><td>Normalized to Euclidean norm (\a aka "Vector Normalization")</td></tr>
11 %> <tr><td>row-wise</td><td>Area</td><td>@c a</td><td>Normalizes to total area (makes area to be unity)</td></tr>
12 %> <tr><td>row-wise</td><td>Amide I</td><td>@c 1</td><td>per-row normalization to Amide I peak</td></tr>
13 %> <tr><td>row-wise</td><td>Amide II</td><td>@c 2</td><td>per-row normalization starting at wavenumber 1585 to seach max (Amide II)</td></tr>
14 %> <tr><td>col-wise</td><td>Standardization</td><td>@c s</td><td>centers and forces all variable variances to 1 (so-called "standardization")</td></tr>
15 %> <tr><td>col-wise</td><td>0-1 range</td><td>@c r</td><td>forces each variable range to [0, 1]</td></tr>
16 %> </table>
17 %> <h3>References</g3>
18 %> [1] Pirouette (Infometrix Inc.) Help Documentation.
19 %>
20 %> [2] Bruker Optik GmbH, OPUS 5 Reference Manual. Ettlingen: Bruker, 2004.
21 %>
22 %> [3] J. G. Kelly et al., “Biospectroscopy to metabolically profile biomolecular structure: a multistage approach linking computational analysis with biomarkers,” J. Proteome Res., vol. 10, no. 4, pp. 1437-1448, Apr. 2011.
23 %
24 %> @param X data matrix containing rows as observations, columns as features.
25 %> @param x x-axis values of the columns of X. unused in most cases, essential for '1' or '2' normalization.
26 %> @param types see table
27 %> @param idxs_fea optional idxs_fea for max normalization (@c types = @c 'v'). It is the FULL RANGE, NOT limits
28 %> @return Normalized X
29 function X = normaliz(X, x, types, idxs_fea)
30 
31 [no, nf] = size(X);
32 if nargin() == 1
33  types = '';
34 end;
35 
36 flag_var = sum(types == 's') > 0;
37 flag_center = sum(types == 'c') > 0;
38 flag_range = sum(types == 'r') > 0;
39 
40 if flag_var
41  if sum(var(X) == 0)
42  irerror('Can''t standardize data because there are variables with ZERO variance!');
43  end;
44 end;
45 
46 %> HHH Horizontal normalization
47 flag_range_max = 0;
48 flag_area = sum(types == 'a') > 0;
49 flag_norm2 = sum(types == 'n') > 0;
50 flag_logit = sum(types == 'l') > 0;
51 if flag_area
52  %> Note: area calculation assumes the x-axis range to be unity, that's why 'a' below is divided by 'nf'
53  %> For ex., if all points are '1/nf', the total area will be 'nf'.
54  for i = 1:no
55  a = sum(X(i, :))/nf;
56  X(i, :) = X(i, :)/a;
57  end;
58 
59 elseif flag_norm2
60  for i = 1:no
61  X(i, :) = X(i, :)/norm(X(i, :));
62  end;
63 elseif flag_logit
64  X = log(1./(1-X));
65 else
66  [xx, yy] = meshgrid(types, 'v12');
67  flag_range_max = sum(sum(xx == yy)) > 0;
68  if flag_range_max
69  if any(types == 'v') || any(types == 'r')
70  if ~exist('idxs_fea', 'var') || isempty(idxs_fea)
71  idxs_fea = 1:size(X, 2);
72  end;
73  elseif sum(types == '1') > 0
74  %>***Attention: needs p1 as DATA
75  %> amide I peak
76  if all(x < 1610) || all (x > 1680)
77  irerror('Cannot normalize to Amide I peak; x axis out of Amide I limits!');
78  end;
79  idxs_fea = v_x2ind(1680, x):v_x2ind(1610, x);
80  elseif sum(types == '2') > 0
81  %> amide II peak
82  if all(x < 1570) || all (x > 1470)
83  irerror('Cannot normalize to Amide II peak; x axis out of Amide II limits!');
84  end;
85  idxs_fea = v_x2ind(1570, x):v_x2ind(1470, x);
86  end;
87  for i = 1:no
88  X(i, :) = X(i, :)/max(X(i, idxs_fea));
89  end;
90  end;
91 end;
92 
93 %> VVV Vertical normalization;
94 if flag_center || flag_var || flag_range
95  means = mean(X, 1); %> vector containing the means for each feature/column
96  vars = var(X, 1); %> variances of features/columns of X
97  for icol = 1:nf
98  colnowcentered = X(:, icol)-means(icol);
99 
100  if flag_center
101  X(:, icol) = colnowcentered;
102  elseif flag_var
103  %> forces variance to 1
104  X(:, icol) = colnowcentered/sqrt(vars(icol));
105  elseif flag_range
106  %> forces column range to 1
107  mi = min(X(:, icol));
108  ma = max(X(:, icol));
109  X(:, icol) = (X(:, icol)-mi)/(ma-mi);
110  end;
111  end;
112 end;
113 
function v_x2ind(in v, in x)
function irerror(in s)
Analysis Session (AS) base class.
Definition: as.m:6
function normaliz(in X, in x, in types, in idxs_fea)