ProteoWizard
SpectrumListCacheTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: SpectrumListCacheTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Matt Chambers <matt.chambers <a.t> vanderbilt.edu>
6 //
7 // Copyright 2008 Vanderbilt University - Nashville, TN 37232
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // Unless required by applicable law or agreed to in writing, software
16 // distributed under the License is distributed on an "AS IS" BASIS,
17 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 // See the License for the specific language governing permissions and
19 // limitations under the License.
20 //
21 
22 
24 #include "MSDataFile.hpp"
25 #include "MemoryMRUCache.hpp"
26 #include "SpectrumListCache.hpp"
28 #include "Serializer_MGF.hpp"
29 
30 
31 using namespace pwiz::util;
32 using namespace pwiz::cv;
33 using namespace pwiz::msdata;
34 //using namespace pwiz::analysis;
35 
36 
37 ostream* os_ = 0;
38 
39 
40 namespace std {
41 
42 ostream& operator<< (ostream& os, SpectrumListCache::CacheType& cache)
43 {
44  os << "Spectrum cache indices (from MRU to LRU):";
45  for (SpectrumListCache::CacheType::iterator itr = cache.begin(); itr != cache.end(); ++itr)
46  os << " " << itr->second->index;
47  return os;
48 }
49 
50 } // namespace std
51 
52 
54 {
56 
58  unit_assert(cache.empty());
59  unit_assert_operator_equal(0, cache.size());
60 
61  cache.insert(make_pair(0, SpectrumPtr()));
62 
63  unit_assert(!cache.empty());
64  unit_assert_operator_equal(1, cache.size());
65 
66  cache.insert(make_pair(1, SpectrumPtr()));
67 
68  unit_assert_operator_equal(2, cache.size());
69  unit_assert_operator_equal(1, cache.mru().first);
70  unit_assert_operator_equal(0, cache.lru().first);
71 
72  cache.insert(make_pair(0, SpectrumPtr()));
73 
74  unit_assert_operator_equal(2, cache.size());
75  unit_assert_operator_equal(0, cache.mru().first);
76  unit_assert_operator_equal(1, cache.lru().first);
77 
78  cache.insert(make_pair(2, SpectrumPtr()));
79 
80  unit_assert_operator_equal(2, cache.size());
81  unit_assert_operator_equal(2, cache.mru().first);
82  unit_assert_operator_equal(0, cache.lru().first);
83 }
84 
85 
86 SpectrumPtr makeSpectrumPtr(size_t index, const string& id)
87 {
88  SpectrumPtr spectrum(new Spectrum);
89  spectrum->id = id;
90  spectrum->index = index;
91  spectrum->set(MS_MSn_spectrum);
92  spectrum->set(MS_ms_level, 2);
93  spectrum->precursors.push_back(Precursor(123.4));
94  spectrum->setMZIntensityArrays(vector<double>(), vector<double>(), MS_number_of_counts);
95  BinaryDataArray& mzArray = *spectrum->getMZArray();
96  BinaryDataArray& intensityArray = *spectrum->getIntensityArray();
97  for (size_t i=0; i < (index+1)*10; ++i)
98  {
99  mzArray.data.push_back(i);
100  intensityArray.data.push_back(i*100);
101  }
102  spectrum->defaultArrayLength = mzArray.data.size();
103  return spectrum;
104 }
105 
107 {
108  return s.dataProcessingPtr.get() ||
109  s.sourceFilePtr.get() ||
110  !s.scanList.empty() ||
111  !s.precursors.empty() ||
112  !s.paramGroupPtrs.empty() ||
113  !s.cvParams.empty() ||
114  !s.userParams.empty();
115 }
116 
118 {
119  return s.hasBinaryData();
120 }
121 
123 {
124  // initialize list
125  shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
126  sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
127  sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
128  sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
129  sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
130 
131  // access a series of spectra and make sure the cache behaves appropriately:
132  // in off mode, the cache should always be empty
133 
134  SpectrumPtr s;
135 
138 
139  unit_assert(cache.empty());
140 
141  s = slc.spectrum(0, false);
142  s = slc.spectrum(1, true);
143  s = slc.spectrum(2, false);
144  s = slc.spectrum(3, true);
145 
146  if (os_) *os_ << cache << endl;
147  unit_assert(cache.empty());
148 }
149 
150 
152 {
153  // initialize list
154  MSData msd;
155  shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
156  sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
157  sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
158  sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
159  sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
160  msd.run.spectrumListPtr = sl;
161 
162  // SpectrumListSimple returns the same shared_ptrs regardless of caching;
163  // serializing to MGF and back will produce different shared_ptrs
164  boost::shared_ptr<stringstream> ss(new stringstream);
165  Serializer_MGF serializer;
166  serializer.write(*ss, msd, 0);
167  serializer.read(ss, msd);
168 
169  // access a series of spectra and make sure the cache behaves appropriately:
170  // in metadata-only mode, entries in the cache should:
171  // - always have metadata
172  // - never have binary data
173 
174  SpectrumPtr s;
175 
178 
179  unit_assert(cache.empty());
181 
182  s = slc.spectrum(0, false);
183 
184  // pointers should be equal
185  unit_assert_operator_equal(slc.spectrum(0, false), s);
186 
187  if (os_) *os_ << cache << endl;
188  unit_assert(!cache.empty());
189  unit_assert_operator_equal(1, cache.size());
190  unit_assert_operator_equal(0, cache.mru().second->index);
191 
192  // with-binary-data access should return the binary data, but only cache the metadata
193  s = slc.spectrum(1, true);
194 
195  if (os_) *os_ << cache << endl;
196  unit_assert_operator_equal(2, cache.size());
197  unit_assert_operator_equal(1, cache.mru().second->index);
198  unit_assert(spectrumHasMetadata(*cache.mru().second));
199  unit_assert(!spectrumHasBinaryData(*cache.mru().second));
200  unit_assert(spectrumHasMetadata(*cache.lru().second));
201  unit_assert_operator_equal(0, cache.lru().second->index);
202 
203  s = slc.spectrum(2, false);
204 
205  // pointers should be equal
206  unit_assert_operator_equal(slc.spectrum(2, false), s);
207 
208  if (os_) *os_ << cache << endl;
209  unit_assert_operator_equal(2, cache.size());
210  unit_assert_operator_equal(2, cache.mru().second->index);
211  unit_assert(spectrumHasMetadata(*cache.mru().second));
212  unit_assert(!spectrumHasBinaryData(*cache.mru().second));
213  unit_assert_operator_equal(1, cache.lru().second->index);
214 
215  s = slc.spectrum(3, true);
216 
217  if (os_) *os_ << cache << endl;
218  unit_assert_operator_equal(2, cache.size());
219  unit_assert_operator_equal(3, cache.mru().second->index);
220  unit_assert(spectrumHasMetadata(*cache.mru().second));
221  unit_assert(!spectrumHasBinaryData(*cache.mru().second));
222  unit_assert_operator_equal(2, cache.lru().second->index);
223  unit_assert(spectrumHasMetadata(*cache.lru().second));
224 
225  s = slc.spectrum(2, true);
226 
227  if (os_) *os_ << cache << endl;
228  unit_assert_operator_equal(2, cache.size());
229  unit_assert_operator_equal(2, cache.mru().second->index);
230  unit_assert(spectrumHasMetadata(*cache.mru().second));
231  unit_assert(!spectrumHasBinaryData(*cache.mru().second));
232  unit_assert_operator_equal(3, cache.lru().second->index);
233  unit_assert(spectrumHasMetadata(*cache.lru().second));
234 }
235 
236 
238 {
239  // initialize list
240  MSData msd;
241  shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
242  sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
243  sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
244  sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
245  sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
246  msd.run.spectrumListPtr = sl;
247 
248  // SpectrumListSimple returns the same shared_ptrs regardless of caching;
249  // serializing to MGF and back will produce different shared_ptrs
250  boost::shared_ptr<stringstream> ss(new stringstream);
251  Serializer_MGF serializer;
252  serializer.write(*ss, msd, 0);
253  serializer.read(ss, msd);
254 
255  // access a series of spectra and make sure the cache behaves appropriately:
256  // in binary-data-only mode, entries in the cache should:
257  // - never have metadata
258  // - always have binary data
259 
260  SpectrumPtr s;
261 
264 
265  unit_assert(cache.empty());
267 
268  // metadata-only access should not affect the cache
269  s = slc.spectrum(0, false);
270 
271  if (os_) *os_ << cache << endl;
272  unit_assert(cache.empty());
273  unit_assert_operator_equal(0, cache.size());
274 
275  // with-binary-data access should be cached without the metadata
276  s = slc.spectrum(1, true);
277 
278  if (os_) *os_ << cache << endl;
279  unit_assert_operator_equal(1, cache.size());
280  unit_assert_operator_equal(1, cache.mru().second->index);
281  unit_assert(!spectrumHasMetadata(*cache.mru().second));
282  unit_assert(spectrumHasBinaryData(*cache.mru().second));
283 
284  s = slc.spectrum(2, false);
285 
286  if (os_) *os_ << cache << endl;
287  unit_assert_operator_equal(1, cache.size());
288  unit_assert_operator_equal(1, cache.mru().second->index);
289  unit_assert(!spectrumHasMetadata(*cache.mru().second));
290  unit_assert(spectrumHasBinaryData(*cache.mru().second));
291 
292  s = slc.spectrum(3, true);
293 
294  if (os_) *os_ << cache << endl;
295  unit_assert_operator_equal(2, cache.size());
296  unit_assert_operator_equal(3, cache.mru().second->index);
297  unit_assert(!spectrumHasMetadata(*cache.mru().second));
298  unit_assert(spectrumHasBinaryData(*cache.mru().second));
299  unit_assert_operator_equal(1, cache.lru().second->index);
300  unit_assert(!spectrumHasMetadata(*cache.lru().second));
301  unit_assert(spectrumHasBinaryData(*cache.lru().second));
302 
303  s = slc.spectrum(1, true);
304 
305  if (os_) *os_ << cache << endl;
306  unit_assert_operator_equal(2, cache.size());
307  unit_assert_operator_equal(1, cache.mru().second->index);
308  unit_assert(!spectrumHasMetadata(*cache.mru().second));
309  unit_assert(spectrumHasBinaryData(*cache.mru().second));
310  unit_assert_operator_equal(3, cache.lru().second->index);
311  unit_assert(!spectrumHasMetadata(*cache.lru().second));
312  unit_assert(spectrumHasBinaryData(*cache.lru().second));
313 }
314 
315 
317 {
318  // initialize list
319  MSData msd;
320  shared_ptr<SpectrumListSimple> sl(new SpectrumListSimple);
321  sl->spectra.push_back(makeSpectrumPtr(0, "S1"));
322  sl->spectra.push_back(makeSpectrumPtr(1, "S2"));
323  sl->spectra.push_back(makeSpectrumPtr(2, "S3"));
324  sl->spectra.push_back(makeSpectrumPtr(3, "S4"));
325  msd.run.spectrumListPtr = sl;
326 
327  // SpectrumListSimple returns the same shared_ptrs regardless of caching;
328  // serializing to MGF and back will produce different shared_ptrs
329  boost::shared_ptr<stringstream> ss(new stringstream);
330  Serializer_MGF serializer;
331  serializer.write(*ss, msd, 0);
332  serializer.read(ss, msd);
333 
334  // access a series of spectra and make sure the cache behaves appropriately:
335  // in metadata-and-binary-data mode, entries in the cache should:
336  // - always have metadata
337  // - always have binary data
338 
339  SpectrumPtr s;
340 
341  SpectrumListCache slc(msd.run.spectrumListPtr, MemoryMRUCacheMode_MetaDataAndBinaryData, 2);
343 
344  unit_assert(cache.empty());
346 
347  // metadata-only access should not affect the cache
348  s = slc.spectrum(0, false);
349 
350  if (os_) *os_ << cache << endl;
351  unit_assert(cache.empty());
352  unit_assert_operator_equal(0, cache.size());
353 
354  s = slc.spectrum(1, true);
355 
356  // pointers should be equal
357  unit_assert_operator_equal(slc.spectrum(1, true), s);
358 
359  if (os_) *os_ << cache << endl;
360  unit_assert_operator_equal(1, cache.size());
361  unit_assert_operator_equal(1, cache.mru().second->index);
362  unit_assert(spectrumHasMetadata(*cache.mru().second));
363  unit_assert(spectrumHasBinaryData(*cache.mru().second));
364 
365  s = slc.spectrum(2, false);
366 
367  if (os_) *os_ << cache << endl;
368  unit_assert_operator_equal(1, cache.size());
369  unit_assert_operator_equal(1, cache.mru().second->index);
370  unit_assert(spectrumHasMetadata(*cache.mru().second));
371  unit_assert(spectrumHasBinaryData(*cache.mru().second));
372 
373  s = slc.spectrum(3, true);
374 
375  // pointers should be equal
376  unit_assert_operator_equal(slc.spectrum(3, true), s);
377 
378  if (os_) *os_ << cache << endl;
379  unit_assert_operator_equal(2, cache.size());
380  unit_assert_operator_equal(3, cache.mru().second->index);
381  unit_assert(spectrumHasMetadata(*cache.mru().second));
382  unit_assert(spectrumHasBinaryData(*cache.mru().second));
383  unit_assert_operator_equal(1, cache.lru().second->index);
384  unit_assert(spectrumHasMetadata(*cache.lru().second));
385  unit_assert(spectrumHasBinaryData(*cache.lru().second));
386 
387  s = slc.spectrum(2, true);
388 
389  if (os_) *os_ << cache << endl;
390  unit_assert_operator_equal(2, cache.size());
391  unit_assert_operator_equal(2, cache.mru().second->index);
392  unit_assert(spectrumHasMetadata(*cache.mru().second));
393  unit_assert(spectrumHasBinaryData(*cache.mru().second));
394  unit_assert_operator_equal(3, cache.lru().second->index);
395  unit_assert(spectrumHasMetadata(*cache.lru().second));
396  unit_assert(spectrumHasBinaryData(*cache.lru().second));
397 }
398 
399 void testFileReads(const char *filename) {
400  std::string srcparent(__FILE__); // locate test data relative to this source file
401  // something like \ProteoWizard\pwiz\pwiz\data\msdata\SpectrumListCacheTest.cpp
402  size_t pos = srcparent.rfind("pwiz");
403  srcparent.resize(pos);
404  std::string example_data_dir = srcparent + "example_data/";
405  pwiz::msdata::MSDataFile msd1(example_data_dir + filename);
407  pwiz::msdata::MSDataFile msd2(example_data_dir + filename);
408  // test logic for efficient delayed read of binary data -
409  // we try to avoid reparsing the header since we have that cached
410  // mzML and mzXML readers can do this, others could probably be made to
411  int index = 3;
412  SpectrumPtr s=msd2.run.spectrumListPtr->spectrum(index, false);
413  SpectrumPtr c=cache.spectrum(index, false);
414  unit_assert(*s==*c);
415  unit_assert(!s->hasBinaryData());
416  unit_assert(!c->hasBinaryData());
417  s=msd2.run.spectrumListPtr->spectrum(index, true);
418  c=cache.spectrum(index, true);
419  unit_assert(*s==*c);
420  unit_assert(s->hasBinaryData());
421  unit_assert(c->hasBinaryData());
422  unit_assert(s->binaryDataArrayPtrs[0]->data[0]==
423  c->binaryDataArrayPtrs[0]->data[0]);
424  unit_assert(!s->binaryDataArrayPtrs[1]->data.empty());
425  unit_assert(!c->binaryDataArrayPtrs[1]->data.empty());
426  unit_assert(s->binaryDataArrayPtrs[1]->data[0]==
427  c->binaryDataArrayPtrs[1]->data[0]);
428 }
429 
430 
431 void test()
432 {
434  testModeOff();
438  // check the delayed-binary-read
439  // logic for mzML and mzXML readers
440  testFileReads("tiny.pwiz.mzXML");
441  testFileReads("tiny.pwiz.1.0.mzML");
442  testFileReads("tiny.pwiz.1.1.mzML");
443 }
444 
445 
446 int main(int argc, char* argv[])
447 {
448  TEST_PROLOG(argc, argv)
449 
450  try
451  {
452  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
453  test();
454  }
455  catch (exception& e)
456  {
457  TEST_FAILED(e.what())
458  }
459  catch (...)
460  {
461  TEST_FAILED("Caught unknown exception.")
462  }
463 
465 }