ProteoWizard
FeatureDetectorTuningTest.cpp
Go to the documentation of this file.
1 //
2 // $Id: FeatureDetectorTuningTest.cpp 4129 2012-11-20 00:05:37Z chambm $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2009 Center for Applied Molecular Medicine
8 // University of Southern California, Los Angeles, CA
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #include "PeakExtractor.hpp"
25 #include "PeakelGrower.hpp"
26 #include "PeakelPicker.hpp"
30 #include "boost/filesystem/path.hpp"
32 
33 using namespace pwiz::util;
34 using namespace pwiz::analysis;
35 using namespace pwiz::data;
36 using namespace pwiz::data::peakdata;
37 using namespace pwiz::msdata;
38 namespace bfs = boost::filesystem;
39 
40 
41 ostream* os_ = 0;
42 
43 
44 shared_ptr<PeakExtractor> createPeakExtractor()
45 {
46  shared_ptr<NoiseCalculator> noiseCalculator(new NoiseCalculator_2Pass);
47 
48  PeakFinder_SNR::Config pfsnrConfig;
49  pfsnrConfig.windowRadius = 2;
50  pfsnrConfig.zValueThreshold = 3;
51 
52  shared_ptr<PeakFinder> peakFinder(new PeakFinder_SNR(noiseCalculator, pfsnrConfig));
53 
55  pfpConfig.windowRadius = 1; // (windowRadius != 1) is not good for real data
56  shared_ptr<PeakFitter> peakFitter(new PeakFitter_Parabola(pfpConfig));
57 
58  return shared_ptr<PeakExtractor>(new PeakExtractor(peakFinder, peakFitter));
59 }
60 
61 
63 {
64  double rt;
65  SetRetentionTime(double _rt) : rt(_rt) {}
66  void operator()(Peak& peak) {peak.retentionTime = rt;}
67 };
68 
69 
70 vector< vector<Peak> > extractPeaks(const MSData& msd, const PeakExtractor& peakExtractor)
71 {
72  MSDataCache msdCache;
73  msdCache.open(msd);
74 
75  const size_t spectrumCount = msdCache.size();
76  vector< vector<Peak> > result(spectrumCount);
77 
78  for (size_t index=0; index<spectrumCount; index++)
79  {
80  const SpectrumInfo& spectrumInfo = msdCache.spectrumInfo(index, true);
81 
82  vector<Peak>& peaks = result[index];
83  peakExtractor.extractPeaks(spectrumInfo.data, peaks);
84  for_each(peaks.begin(), peaks.end(), SetRetentionTime(spectrumInfo.retentionTime));
85 
86  if (os_)
87  {
88  *os_ << "index: " << index << endl;
89  *os_ << "peaks: " << peaks.size() << endl;
90  copy(peaks.begin(), peaks.end(), ostream_iterator<Peak>(*os_, "\n"));
91  }
92  }
93 
94  return result;
95 }
96 
97 
98 shared_ptr<PeakelGrower> createPeakelGrower()
99 {
101  config.mzTolerance = .01;
102  config.rtTolerance = 10; // seconds
103 
104  return shared_ptr<PeakelGrower>(new PeakelGrower_Proximity(config));
105 }
106 
107 
108 void print(ostream& os, const string& label, vector<PeakelPtr> v)
109 {
110  os << label << ":\n";
111  for (vector<PeakelPtr>::const_iterator it=v.begin(); it!=v.end(); ++it)
112  os << **it << endl;
113 }
114 
115 
116 void verifyBombesinPeakels(const PeakelField& peakelField)
117 {
118  // TODO: assert # peaks/peakel, verify metadata
119 
120  // charge state 2
121 
122  vector<PeakelPtr> bombesin_2_0 = peakelField.find(810.41, .01, RTMatches_Contains<Peakel>(1870));
123  if (os_) print(*os_, "bombesin_2_0", bombesin_2_0);
124  unit_assert(bombesin_2_0.size() == 1);
125 
126  vector<PeakelPtr> bombesin_2_1 = peakelField.find(810.91, .01, RTMatches_Contains<Peakel>(1870));
127  if (os_) print(*os_, "bombesin_2_1", bombesin_2_1);
128  unit_assert(bombesin_2_1.size() == 1);
129 
130  vector<PeakelPtr> bombesin_2_2 = peakelField.find(811.41, .01, RTMatches_Contains<Peakel>(1870));
131  if (os_) print(*os_, "bombesin_2_2", bombesin_2_2);
132  unit_assert(bombesin_2_2.size() == 1);
133 
134  vector<PeakelPtr> bombesin_2_3 = peakelField.find(811.91, .01, RTMatches_Contains<Peakel>(1870,10));
135  if (os_) print(*os_, "bombesin_2_3", bombesin_2_3);
136  unit_assert(bombesin_2_3.size() == 1);
137 
138  // charge state 3
139 
140  vector<PeakelPtr> bombesin_3_0 = peakelField.find(540.61, .01, RTMatches_Contains<Peakel>(1870));
141  if (os_) print(*os_, "bombesin_3_0", bombesin_3_0);
142  unit_assert(bombesin_3_0.size() == 1);
143 
144  vector<PeakelPtr> bombesin_3_1 = peakelField.find(540.61 + 1./3., .02, RTMatches_Contains<Peakel>(1865));
145  if (os_) print(*os_, "bombesin_3_1", bombesin_3_1);
146  unit_assert(bombesin_3_1.size() == 1);
147 
148  vector<PeakelPtr> bombesin_3_2 = peakelField.find(540.61 + 2./3., .02, RTMatches_Contains<Peakel>(1865,5));
149  if (os_) print(*os_, "bombesin_3_2", bombesin_3_2);
150  unit_assert(bombesin_3_2.size() == 1);
151  // TODO: verify peaks.size() == 1
152 }
153 
154 
155 shared_ptr<PeakelPicker> createPeakelPicker()
156 {
158  //config.log = os_;
159  config.minMonoisotopicPeakelSize = 2;
160 
161  return shared_ptr<PeakelPicker>(new PeakelPicker_Basic(config));
162 }
163 
164 
165 void verifyBombesinFeatures(const FeatureField& featureField)
166 {
167  const double epsilon = .01;
168 
169  const double mz_bomb2 = 810.415;
170  vector<FeaturePtr> bombesin_2_found = featureField.find(mz_bomb2, epsilon,
172  unit_assert(bombesin_2_found.size() == 1);
173  const Feature& bombesin_2 = *bombesin_2_found[0];
174  unit_assert(bombesin_2.charge == 2);
175  unit_assert(bombesin_2.peakels.size() == 5);
176  unit_assert_equal(bombesin_2.peakels[0]->mz, mz_bomb2, epsilon);
177  unit_assert_equal(bombesin_2.peakels[1]->mz, mz_bomb2+.5, epsilon);
178  unit_assert_equal(bombesin_2.peakels[2]->mz, mz_bomb2+1, epsilon);
179  unit_assert_equal(bombesin_2.peakels[3]->mz, mz_bomb2+1.5, epsilon);
180  unit_assert_equal(bombesin_2.peakels[4]->mz, mz_bomb2+2, epsilon);
181  //TODO: verify feature metadata
182 
183  const double mz_bomb3 = 540.612;
184  vector<FeaturePtr> bombesin_3_found = featureField.find(mz_bomb3, epsilon,
186  unit_assert(bombesin_3_found.size() == 1);
187  const Feature& bombesin_3 = *bombesin_3_found[0];
188  unit_assert(bombesin_3.charge == 3);
189  unit_assert(bombesin_3.peakels.size() == 3);
190  unit_assert_equal(bombesin_3.peakels[0]->mz, mz_bomb3, epsilon);
191  unit_assert_equal(bombesin_3.peakels[1]->mz, mz_bomb3+1./3, epsilon);
192  unit_assert_equal(bombesin_3.peakels[2]->mz, mz_bomb3+2./3, epsilon);
193  //TODO: verify feature metadata
194 }
195 
196 
197 void testBombesin(const string& filename)
198 {
199  if (os_) *os_ << "testBombesin()" << endl;
200 
201  // open data file and check sanity
202 
203  MSDataFile msd(filename);
204  unit_assert(msd.run.spectrumListPtr.get());
205  unit_assert(msd.run.spectrumListPtr->size() == 8);
206 
207  // instantiate PeakExtractor and extract peaks
208 
209  shared_ptr<PeakExtractor> peakExtractor = createPeakExtractor();
210  vector< vector<Peak> > peaks = extractPeaks(msd, *peakExtractor);
211  unit_assert(peaks.size() == 8);
212 
213  // grow peakels
214  shared_ptr<PeakelGrower> peakelGrower = createPeakelGrower();
215  PeakelField peakelField;
216  peakelGrower->sowPeaks(peakelField, peaks);
217 
218  if (os_) *os_ << "peakelField:\n" << peakelField << endl;
219  verifyBombesinPeakels(peakelField);
220 
221  // pick peakels
222 
223  shared_ptr<PeakelPicker> peakelPicker = createPeakelPicker();
224  FeatureField featureField;
225  peakelPicker->pick(peakelField, featureField);
226 
227  if (os_) *os_ << "featureField:\n" << featureField << endl;
228  verifyBombesinFeatures(featureField);
229 }
230 
231 
232 void test(const bfs::path& datadir)
233 {
234  testBombesin((datadir / "FeatureDetectorTest_Bombesin.mzML").string());
235 }
236 
237 
238 int main(int argc, char* argv[])
239 {
240  TEST_PROLOG(argc, argv)
241 
242  try
243  {
244  bfs::path datadir = ".";
245 
246  for (int i=1; i<argc; i++)
247  {
248  if (!strcmp(argv[i],"-v"))
249  os_ = &cout;
250  else
251  // hack to allow running unit test from a different directory:
252  // Jamfile passes full path to specified input file.
253  // we want the path, so we can ignore filename
254  datadir = bfs::path(argv[i]).branch_path();
255  }
256 
257  test(datadir);
258  }
259  catch (exception& e)
260  {
261  TEST_FAILED(e.what())
262  }
263  catch (...)
264  {
265  TEST_FAILED("Caught unknown exception.")
266  }
267 
269 }
270