IRootLab
An Open-Source MATLAB toolbox for vibrational biospectroscopy
dataio_txt_irootlab.m
Go to the documentation of this file.
1 %> @brief IRootLab TXT loader/saver
2 %>
3 %> This file type is recommended if you want to edit the dataset in e.g. Excel, it is capable of storing all the properties of the dataset.
4 %>
5 %> See Figure 1 for example.
6 %>
7 %> @image html dataformat_iroot.png
8 %> <center>Figure 1 - Example of IRootLab TXT file open in a spreadsheet editing program.</center>
10  properties(SetAccess=protected)
11  %> This affects saving: whether to save the classes as strings.
12  %> This is set to 1 in dataio_txt_irootlab2
13  flag_stringclasses = 0;
14  end;
15  methods
16  function data = load(o)
17  data = irdata();
18 
19  [no_cols, deli] = get_no_cols_deli(o.filename);
20  mask = repmat('%q', 1, no_cols);
21 
22  fid = fopen(o.filename);
23  flag_exp_header = 0;
24  flag_exp_table = 0;
25  fieldsfound = struct('name', {}, 'flag_cell', {}, 'idxs', {}, 'flag_classes', {});
26  flag_stringclasses = 0; % Whether classes are specified as strings or numbers
27 
28  while 1
29  if flag_exp_table % expecting table
30  % reads everything, i.e., row fields
31  cc = textscan(fid, newmask, 'Delimiter', deli, 'CollectOutput', 0);
32 
33  for i = 1:length(fieldsfound)
34  fn = fieldsfound(i).name;
35  idxs = fieldsfound(i).idxs;
36  if fieldsfound(i).flag_classes
37  if any(cellfun(@(x) isempty(str2num(x)), cc{idxs}, 'UniformOutput', 1))
38  % Classes are expressed as strings
39  flag_stringclasses = 1; %#ok<*PROP>
40  clalpha = cc{idxs};
41  data.classlabels = unique_appear(clalpha');
42 
43  no_obs = numel(clalpha);
44  data.classes(no_obs, 1) = -1; % pre-allocates
45  for i = 1:no_obs
46  data.classes(i) = find(strcmp(data.classlabels, clalpha{i}))-1;
47  end;
48  else
49  % Classes are expressed as numbers
50  data.(fn) = cellfun(@str2num, cc{idxs}, 'UniformOutput', 1);
51  end;
52  elseif fieldsfound(i).flag_cell
53  data.(fn) = cc{idxs};
54  else
55  data.(fn) = cell2mat(cc(idxs));
56  end;
57  end;
58  break;
59  else
60  % reads one line only
61  cc = textscan(fid, mask, 1, 'Delimiter', deli, 'CollectOutput', 1);
62  cc = cc{1};
63  if isempty(cc)
64  break;
65  end;
66  end;
67 
68  if flag_exp_header
69  % goes through header to find which columns contain what
70  newmask = '';
71  num_fields = 0;
72  fn_now = '@#$!*%@!'; % Current field name
73  for i = 1:no_cols
74  s = strip_quotes(cc{i});
75  if ~strcmp(s, fn_now)
76  b = strcmp(s, data.rowfieldnames);
77  if any(b)
78  j = find(b); j = j(1);
79  num_fields = num_fields+1;
80  fieldsfound(num_fields).name = s;
81  flag_cell = data.flags_cell(j);
82  fieldsfound(num_fields).flag_cell = flag_cell;
83  fieldsfound(num_fields).flag_classes = strcmp(s, 'classes');
84  fn_now = s;
85  else
86  irerror(sprintf('Unknown field: "%s"', s));
87  end;
88  end;
89  fieldsfound(num_fields).idxs(end+1) = i;
90  newmask = [newmask, '%', iif(strcmp(s, 'classes') || flag_cell, 'q', 'f')]; %#ok<AGROW>
91  end;
92  flag_exp_header = 0;
93  flag_exp_table = 1;
94  else
95  s = strip_quotes(cc{1});
96  if strcmp(s, 'classlabels')
97  try
98  data.classlabels = eval(strip_quotes(cc{2}));
99  catch ME
100  irerror(['Error trying to parse the "classlabels" property!', 10, 10, 'Original error:', 10, ME.message]);
101  end;
102 
103  elseif strcmp(s, 'fea_x')
104  % discards empty elements at the end of the cell
105  for i = length(cc):-1:1
106  if ~isempty(cc{i})
107  break;
108  end;
109  end;
110  cc = cc(2:i);
111  data.fea_x = str2double(cc);
112  elseif strcmp(s, 'table')
113  flag_exp_header = 1;
114  elseif strcmp(s, 'direction')
115  data.direction = strip_quotes(cc{2});
116  elseif strcmp(s, 'height')
117  data.height = str2num(strip_quotes(cc{2})); %#ok<*ST2NM>
118  elseif strcmp(s, 'title')
119  data.title = str2num(strip_quotes(cc{2})); %#ok<*ST2NM>
120  else
121  % Never mind
122  end;
123  end;
124  end;
125 
126 
127  if ~flag_stringclasses
128  % Makes sure claslabels is correct
129  ncc = max(data.classes)+1;
130  if ncc > numel(data.classlabels)
131  irverbose('WARNING: Number of classlabels lower than number of classes', 2);
132  nl = data.get_no_levels();
133  suffix = repmat('|1', 1, nl-1);
134  for i = numel(data.classlabels)+1:ncc
135  % data.classlabels{i} = ['Class ', int2str(i-1), suffix];
136  data.classlabels{i} = [int2str(i-1), suffix];
137  end;
138  end;
139 
140  data = data.eliminate_unused_classlabels();
141  end;
142 
143  data.assert_not_nan();
144  data.filename = o.filename;
145  data.filetype = 'txt_irootlab';
146  data = data.make_groupnumbers();
147  end;
148 
149 
150  %------------------------------------------------------------------
151  % Saver
152  function o = save(o, data)
153 
154  h = fopen(o.filename, 'w');
155  if h < 1
156  irerror(sprintf('Could not create file ''%s''!', o.filename));
157  end;
158 
159  fieldidxs = [];
160  fieldcols = [];
161  no_cols = 0;
162  no_fields = 0;
163  flag_table = 1; % Whether any of the data.rowfieldnames is in use, otherwise table part of file won't be saved
164 
165  % goes through possible fields to find which ones are being used, and to determine number of columns for CSV
166  % file
167  for i = 1:length(data.rowfieldnames)
168  if ~isempty(data.(data.rowfieldnames{i}))
169  fieldidxs(end+1) = i;
170  fieldcols(end+1) = size(data.(data.rowfieldnames{i}), 2);
171  no_cols = no_cols+fieldcols(end);
172  no_fields = no_fields+1;
173  end;
174  end;
175  if no_cols == 0
176  flag_table = 0;
177  no_cols = length(data.fea_x)+1;
178  if no_cols < 2
179  no_cols = 2;
180  end;
181  end;
182 
183  tab = sprintf('\t');
184  newl = sprintf('\n');
185 
186  fwrite(h, ['IRootLab ' irootlab_version() repmat(tab, 1, no_cols-1) newl]);
187  fwrite(h, ['title' tab data.title repmat(tab, 1, no_cols-2) newl]);
188  if ~o.flag_stringclasses
189  fwrite(h, ['classlabels' tab cell2str(data.classlabels) repmat(tab, 1, no_cols-2) newl]);
190  end;
191  temp = sprintf(['%g' tab], data.fea_x);
192  fwrite(h, ['fea_x' tab temp(1:end-1) repmat(tab, 1, no_cols-data.nf-1) newl]);
193  fwrite(h, ['height' tab int2str(data.height) repmat(tab, 1, no_cols-2) newl]);
194  fwrite(h, ['direction' tab data.direction repmat(tab, 1, no_cols-2) newl]);
195  fwrite(h, ['table' repmat(tab, 1, no_cols-1) newl]);
196 
197 
198 
199  if flag_table
200  buflen = 1024; % writes every MB to disk
201  buffer = repmat(' ', 1, buflen);
202  ptr = 1;
203 
204  % table header
205  for i = 1:no_fields
206  s = repmat([data.rowfieldnames{fieldidxs(i)} tab], 1, fieldcols(i));
207  buffer(ptr:ptr+length(s)-1) = s;
208  ptr = ptr+length(s);
209  if i == no_fields
210  ptr = ptr-1;% last tab won't count
211  end;
212  end;
213  buffer(ptr) = newl;
214  ptr = ptr+1;
215  flag_buffer = 1;
216 
217  if o.flag_stringclasses
218  labels = classes2labels(data.classes, data.classlabels);
219  end;
220 
221 
222  rowptr = 1;
223  flag_calc_len = 0;
224  rowlen = 0; % average row length
225  while 1
226  if rowptr > data.no
227  break;
228  end;
229 
230  % data row
231  ptr_save = ptr;
232  for i = 1:no_fields
233  fn = data.rowfieldnames{fieldidxs(i)};
234  if strcmp(fn, 'classes') && o.flag_stringclasses
235  s = sprintf('%s\t', labels{rowptr});
236  else
237  if data.flags_cell(fieldidxs(i))
238  s = sprintf('%s\t', data.(fn){rowptr, :});
239  else
240  s = sprintf('%g\t', data.(fn)(rowptr, :));
241  end;
242  end;
243  buffer(ptr:ptr+length(s)-1) = s;
244  ptr = ptr+length(s);
245  if i == no_fields
246  ptr = ptr-1;% last tab won't count
247  end;
248  end;
249  buffer(ptr) = newl;
250  ptr = ptr+1;
251  rowlen = (rowlen*(rowptr-1)+(ptr-ptr_save))/rowptr;
252  flag_buffer = 1;
253 
254  % tolerance of rowlen not to blow buffer
255  if ptr+2*rowlen > buflen
256  fwrite(h, buffer(1:ptr-1));
257  ptr = 1;
258  flag_buffer = 0;
259  end;
260 
261  rowptr = rowptr+1;
262  end;
263 
264  if flag_buffer
265  fwrite(h, buffer(1:ptr-1));
266  end;
267  end;
268 
269  fclose(h);
270 
271  irverbose(sprintf('Just saved file "%s"', o.filename), 2);
272  end;
273  end
274 end
function irverbose(in s, in level)
function cell2str(in c)
Dataset class.
Definition: irdata.m:30
IRootLab TXT that saves classes column as labels, not numbers.
Dataset loader/saver common class.
Definition: dataio.m:2
Analysis Session (AS) base class.
Definition: as.m:6
function unique_appear(in classlabels)
function get_no_cols_deli(in filename)
function irootlab_version()
IRootLab TXT loader/saver.