ProteoWizard
MSData.hpp
Go to the documentation of this file.
1 //
2 // $Id: MSData.hpp 3543 2012-04-17 20:50:44Z pcbrefugee $
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2007 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #ifndef _MSDATA_HPP_
25 #define _MSDATA_HPP_
26 
27 
30 #include "boost/shared_ptr.hpp"
31 #include "boost/iostreams/positioning.hpp"
32 #include <vector>
33 #include <string>
34 #include <map>
35 
36 
37 namespace pwiz {
38 namespace msdata {
39 
40 
41  using namespace pwiz::data;
42 
43 
44 PWIZ_API_DECL std::vector<CV> defaultCVList();
45 
46 
47 /// This summarizes the different types of spectra that can be expected in the file. This is expected to aid processing software in skipping files that do not contain appropriate spectrum types for it.
49 
50 
51 /// Description of the source file, including location and type.
53 {
54  /// an identifier for this file.
55  std::string id;
56 
57  /// name of the source file, without reference to location (either URI or local path).
58  std::string name;
59 
60  /// URI-formatted location where the file was retrieved.
61  std::string location;
62 
63  SourceFile(const std::string _id = "",
64  const std::string _name = "",
65  const std::string _location = "");
66 
67 
68  /// returns true iff the element contains no params and all members are empty or null
69  bool empty() const;
70 };
71 
72 
73 /// Description of the source file, including location and type.
74 typedef boost::shared_ptr<SourceFile> SourceFilePtr;
75 
76 
77 /// Structure allowing the use of a controlled (cvParam) or uncontrolled vocabulary (userParam), or a reference to a predefined set of these in this mzML file (paramGroupRef).
79 
80 
81 /// Information pertaining to the entire mzML file (i.e. not specific to any part of the data set) is stored here.
83 {
84  /// this summarizes the different types of spectra that can be expected in the file. This is expected to aid processing software in skipping files that do not contain appropriate spectrum types for it.
86 
87  /// list and descriptions of the source files this mzML document was generated or derived from.
88  std::vector<SourceFilePtr> sourceFilePtrs;
89 
90  /// structure allowing the use of a controlled (cvParam) or uncontrolled vocabulary (userParam), or a reference to a predefined set of these in this mzML file (paramGroupRef)
91  std::vector<Contact> contacts;
92 
93  /// returns true iff all members are empty or null
94  bool empty() const;
95 };
96 
97 
98 /// Expansible description of the sample used to generate the dataset, named in sampleName.
100 {
101  /// a unique identifier across the samples with which to reference this sample description.
102  std::string id;
103 
104  /// an optional name for the sample description, mostly intended as a quick mnemonic.
105  std::string name;
106 
107  Sample(const std::string _id = "",
108  const std::string _name = "");
109 
110 
111  /// returns true iff the element contains no params and all members are empty or null
112  bool empty() const;
113 };
114 
115 
116 typedef boost::shared_ptr<Sample> SamplePtr;
117 
118 
120 {
125 };
126 
127 
128 /// A component of an instrument corresponding to a source (i.e. ion source), an analyzer (i.e. mass analyzer), or a detector (i.e. ion detector)
130 {
131  /// the type of component (Source, Analyzer, or Detector)
133 
134  /// this attribute MUST be used to indicate the order in which the components are encountered from source to detector (e.g., in a Q-TOF, the quadrupole would have the lower order number, and the TOF the higher number of the two).
135  int order;
136 
137  Component() : type(ComponentType_Unknown), order(0) {}
138  Component(ComponentType type, int order) : type(type), order(order) {}
139  Component(CVID cvid, int order) { define(cvid, order); }
140 
141  void define(CVID cvid, int order);
142 
143  /// returns true iff the element contains no params and all members are empty or null
144  bool empty() const;
145 };
146 
147 
148 //struct PWIZ_API_DECL Source : public Component {};
149 //struct PWIZ_API_DECL Analyzer : public Component {};
150 //struct PWIZ_API_DECL Detector : public Component {};
151 
152 
153 /// List with the different components used in the mass spectrometer. At least one source, one mass analyzer and one detector need to be specified.
154 struct PWIZ_API_DECL ComponentList : public std::vector<Component>
155 {
156  /// returns the source component with ordinal <index+1>
157  Component& source(size_t index);
158 
159  /// returns the analyzer component with ordinal <index+1>
160  Component& analyzer(size_t index);
161 
162  /// returns the detector component with ordinal <index+1>
163  Component& detector(size_t index);
164 
165  /// returns the source component with ordinal <index+1>
166  const Component& source(size_t index) const;
167 
168  /// returns the analyzer component with ordinal <index+1>
169  const Component& analyzer(size_t index) const;
170 
171  /// returns the detector component with ordinal <index+1>
172  const Component& detector(size_t index) const;
173 };
174 
175 
176 /// A piece of software.
178 {
179  /// an identifier for this software that is unique across all SoftwareTypes.
180  std::string id;
181 
182  /// the software version.
183  std::string version;
184 
185  Software(const std::string& _id = "");
186 
187  Software(const std::string& _id,
188  const CVParam& _param,
189  const std::string& _version);
190 
191  /// returns true iff all members are empty or null
192  bool empty() const;
193 };
194 
195 
196 typedef boost::shared_ptr<Software> SoftwarePtr;
197 
198 
199 /// TODO
201 
202 
203 /// Description of the acquisition settings of the instrument prior to the start of the run.
205 {
206  /// a unique identifier for this acquisition setting.
207  std::string id;
208 
209  /// container for a list of source file references.
210  std::vector<SourceFilePtr> sourceFilePtrs;
211 
212  /// target list (or 'inclusion list') configured prior to the run.
213  std::vector<Target> targets;
214 
215  ScanSettings(const std::string& _id = "");
216 
217 
218  /// returns true iff the element contains no params and all members are empty or null
219  bool empty() const;
220 };
221 
222 
223 typedef boost::shared_ptr<ScanSettings> ScanSettingsPtr;
224 
225 
226 /// Description of a particular hardware configuration of a mass spectrometer. Each configuration MUST have one (and only one) of the three different components used for an analysis. For hybrid instruments, such as an LTQ-FT, there MUST be one configuration for each permutation of the components that is used in the document. For software configuration, reference the appropriate ScanSettings element.
228 {
229  /// an identifier for this instrument configuration.
230  std::string id;
231 
232  /// list with the different components used in the mass spectrometer. At least one source, one mass analyzer and one detector need to be specified.
234 
235  /// reference to a previously defined software element.
237 
238  /// reference to a scan settings element defining global scan settings used by this configuration
240 
241  InstrumentConfiguration(const std::string& _id = "");
242 
243  /// returns true iff the element contains no params and all members are empty or null
244  bool empty() const;
245 };
246 
247 
248 typedef boost::shared_ptr<InstrumentConfiguration> InstrumentConfigurationPtr;
249 
250 
251 /// Description of the default peak processing method. This element describes the base method used in the generation of a particular mzML file. Variable methods should be described in the appropriate acquisition section - if no acquisition-specific details are found, then this information serves as the default.
253 {
254  /// this attributes allows a series of consecutive steps to be placed in the correct order.
255  int order;
256 
257  /// this attribute MUST reference the 'id' of the appropriate SoftwareType.
259 
260  ProcessingMethod() : order(0) {}
261 
262  /// returns true iff the element contains no params and all members are empty or null
263  bool empty() const;
264 };
265 
266 
267 typedef boost::shared_ptr<ProcessingMethod> ProcessingMethodPtr;
268 
269 
270 /// Description of the way in which a particular software was used.
272 {
273  /// a unique identifier for this data processing that is unique across all DataProcessingTypes.
274  std::string id;
275 
276  /// description of the default peak processing method(s). This element describes the base method used in the generation of a particular mzML file. Variable methods should be described in the appropriate acquisition section - if no acquisition-specific details are found, then this information serves as the default.
277  std::vector<ProcessingMethod> processingMethods;
278 
279  DataProcessing(const std::string& _id = "");
280 
281  /// returns true iff the element contains no params and all members are empty or null
282  bool empty() const;
283 };
284 
285 
286 typedef boost::shared_ptr<DataProcessing> DataProcessingPtr;
287 
288 
289 /// This element captures the isolation (or 'selection') window configured to isolate one or more precursors.
291 
292 
293 /// TODO
295 {
297  explicit SelectedIon(double mz);
298  explicit SelectedIon(double mz, double intensity, CVID intensityUnit);
299  explicit SelectedIon(double mz, int chargeState);
300  explicit SelectedIon(double mz, double intensity, int chargeState, CVID intensityUnit);
301 };
302 
303 
304 /// The type and energy level used for activation.
306 
307 
308 /// The method of precursor ion selection and activation
310 {
311  /// for precursor spectra that are external to this document, this attribute MUST reference the 'id' attribute of a sourceFile representing that external document.
312  /// note: this attribute is mutually exclusive with spectrumID; i.e. use one or the other but not both
314 
315  /// for precursor spectra that are external to this document, this string MUST correspond to the 'id' attribute of a spectrum in the external document indicated by 'sourceFileRef'.
316  /// note: this attribute is mutually exclusive with spectrumID; i.e. use one or the other but not both
317  std::string externalSpectrumID;
318 
319  /// reference to the id attribute of the spectrum from which the precursor was selected.
320  /// note: this attribute is mutually exclusive with externalSpectrumID; i.e. use one or the other but not both
321  std::string spectrumID;
322 
323  /// this element captures the isolation (or 'selection') window configured to isolate one or more precursors.
325 
326  /// this list of precursor ions that were selected.
327  std::vector<SelectedIon> selectedIons;
328 
329  /// the type and energy level used for activation.
331 
333  explicit Precursor(double mz);
334  explicit Precursor(double mz, double intensity, CVID intensityUnit);
335  explicit Precursor(double mz, int chargeState);
336  explicit Precursor(double mz, double intensity, int chargeState, CVID intensityUnit);
337 
338 
339  /// returns true iff the element contains no params and all members are empty or null
340  bool empty() const;
341 };
342 
343 
344 /// product ion information
346 {
347  /// this element captures the isolation (or 'selection') window configured to isolate one or more precursors.
349 
350  /// returns true iff the element contains no params and all members are empty or null
351  bool empty() const;
352 
353  /// returns true iff this product's isolation window is equal to that product's
354  bool operator==(const Product& that) const;
355 };
356 
357 
358 /// TODO
360 {
362  ScanWindow(double low, double high, CVID unit);
363 };
364 
365 
366 /// Scan or acquisition from original raw file used to create this peak list, as specified in sourceFile.
368 {
369  /// if this attribute is set, it must reference the 'id' attribute of a sourceFile representing the external document containing the spectrum referred to by 'externalSpectrumID'.
370  /// note: this attribute is mutually exclusive with spectrumID; i.e. use one or the other but not both
372 
373  /// for scans that are external to this document, this string must correspond to the 'id' attribute of a spectrum in the external document indicated by 'sourceFileRef'.
374  /// note: this attribute is mutually exclusive with spectrumID; i.e. use one or the other but not both
375  std::string externalSpectrumID;
376 
377  /// for scans that are local to this document, this attribute can be used to reference the 'id' attribute of the spectrum corresponding to the scan.
378  /// note: this attribute is mutually exclusive with externalSpectrumID; i.e. use one or the other but not both
379  std::string spectrumID;
380 
381  /// this attribute MUST reference the 'id' attribute of the appropriate instrument configuration.
383 
384  /// container for a list of select windows.
385  std::vector<ScanWindow> scanWindows;
386 
387  /// returns true iff the element contains no params and all members are empty or null
388  bool empty() const;
389 };
390 
391 
392 /// List and descriptions of scans.
394 {
395  std::vector<Scan> scans;
396 
397  bool empty() const;
398 };
399 
400 
401 /// The structure into which encoded binary data goes. Byte ordering is always little endian (Intel style). Computers using a different endian style MUST convert to/from little endian when writing/reading mzML
403 {
404  /// this optional attribute may reference the 'id' attribute of the appropriate dataProcessing.
406 
407  /// the binary data.
408  std::vector<double> data;
409 
410  /// returns true iff the element contains no params and all members are empty or null
411  bool empty() const;
412 };
413 
414 
415 typedef boost::shared_ptr<BinaryDataArray> BinaryDataArrayPtr;
416 
417 
418 #pragma pack(1)
419 /// The data point type of a mass spectrum.
421 {
422  double mz;
423  double intensity;
424 
426  : mz(0), intensity(0)
427  {}
428 
429  MZIntensityPair(double mz, double intensity)
430  : mz(mz), intensity(intensity)
431  {}
432 
433  /// returns true iff mz and intensity are pairwise equal
434  bool operator==(const MZIntensityPair& that) const;
435 };
436 #pragma pack()
437 
438 
439 PWIZ_API_DECL std::ostream& operator<<(std::ostream& os, const MZIntensityPair& mzi);
440 
441 
442 #pragma pack(1)
443 /// The data point type of a chromatogram.
445 {
446  double time;
447  double intensity;
448 
450  : time(0), intensity(0)
451  {}
452 
453  TimeIntensityPair(double time, double intensity)
454  : time(time), intensity(intensity)
455  {}
456 
457  /// returns true iff time and intensity are pairwise equal
458  bool operator==(const TimeIntensityPair& that) const;
459 };
460 #pragma pack()
461 
462 
463 PWIZ_API_DECL std::ostream& operator<<(std::ostream& os, const TimeIntensityPair& ti);
464 
465 const size_t IDENTITY_INDEX_NONE = (size_t)-1;
466 
467 /// Identifying information for a spectrum
469 {
470  /// the zero-based, consecutive index of the spectrum in the SpectrumList.
471  size_t index;
472 
473  /// a unique identifier for this spectrum. It should be expected that external files may use this identifier together with the mzML filename or accession to reference a particular spectrum.
474  std::string id;
475 
476  /// the identifier for the spot from which this spectrum was derived, if a MALDI or similar run.
477  std::string spotID;
478 
479  /// for file-based MSData implementations, this attribute may refer to the spectrum's position in the file
480  boost::iostreams::stream_offset sourceFilePosition;
481 
482 
483  SpectrumIdentity() : index(IDENTITY_INDEX_NONE), sourceFilePosition((boost::iostreams::stream_offset)-1) {}
484 };
485 
486 /// Identifying information for a chromatogram
488 {
489  /// the zero-based, consecutive index of the chromatogram in the ChromatogramList.
490  size_t index;
491 
492  /// a unique identifier for this chromatogram. It should be expected that external files may use this identifier together with the mzML filename or accession to reference a particular chromatogram.
493  std::string id;
494 
495  /// for file-based MSData implementations, this attribute may refer to the chromatogram's position in the file
496  boost::iostreams::stream_offset sourceFilePosition;
497 
498  ChromatogramIdentity() : index(IDENTITY_INDEX_NONE), sourceFilePosition(-1) {}
499 };
500 
501 
502 /// The structure that captures the generation of a peak list (including the underlying acquisitions)
504 {
505  /// default length of binary data arrays contained in this element.
507 
508  /// this attribute can optionally reference the 'id' of the appropriate dataProcessing.
510 
511  /// this attribute can optionally reference the 'id' of the appropriate sourceFile.
513 
514  /// list of scans
516 
517  /// list and descriptions of precursors to the spectrum currently being described.
518  std::vector<Precursor> precursors;
519 
520  /// list and descriptions of product ion information
521  std::vector<Product> products;
522 
523  /// list of binary data arrays.
524  std::vector<BinaryDataArrayPtr> binaryDataArrayPtrs;
525 
526 
527  Spectrum() : defaultArrayLength(0) {}
528 
529  /// returns true iff the element contains no params and all members are empty or null
530  bool empty() const;
531 
532  /// returns true iff has nonnull and nonempty BinaryDataArrayPtr
533  bool hasBinaryData() const {
534  return binaryDataArrayPtrs.size() &&
535  binaryDataArrayPtrs[0] &&
536  !binaryDataArrayPtrs[0]->data.empty();
537  };
538 
539  /// copy binary data arrays into m/z-intensity pair array
540  void getMZIntensityPairs(std::vector<MZIntensityPair>& output) const;
541 
542  /// copy binary data arrays into m/z-intensity pair array
543  /// note: this overload is to allow client to allocate own buffer; the client
544  /// must determine the correct size beforehand, or an exception will be thrown
545  void getMZIntensityPairs(MZIntensityPair* output, size_t expectedSize) const;
546 
547  /// get m/z array (may be null)
548  BinaryDataArrayPtr getMZArray() const;
549 
550  /// get intensity array (may be null)
551  BinaryDataArrayPtr getIntensityArray() const;
552 
553  /// set binary data arrays
554  void setMZIntensityPairs(const std::vector<MZIntensityPair>& input, CVID intensityUnits);
555 
556  /// set binary data arrays
557  void setMZIntensityPairs(const MZIntensityPair* input, size_t size, CVID intensityUnits);
558 
559  /// set m/z and intensity arrays separately (they must be the same size)
560  void setMZIntensityArrays(const std::vector<double>& mzArray, const std::vector<double>& intensityArray, CVID intensityUnits);
561 };
562 
563 
564 typedef boost::shared_ptr<Spectrum> SpectrumPtr;
565 
566 
567 /// A single chromatogram.
569 {
570  /// default length of binary data arrays contained in this element.
572 
573  /// this attribute can optionally reference the 'id' of the appropriate dataProcessing.
575 
576  /// description of precursor ion information (i.e. Q1 settings)
578 
579  /// description of product ion information (i.e. Q3 settings)
581 
582  /// list of binary data arrays.
583  std::vector<BinaryDataArrayPtr> binaryDataArrayPtrs;
584 
585  Chromatogram() : defaultArrayLength(0) {}
586 
587  /// returns true iff the element contains no params and all members are empty or null
588  bool empty() const;
589 
590  /// copy binary data arrays into time-intensity pair array
591  void getTimeIntensityPairs(std::vector<TimeIntensityPair>& output) const;
592 
593  /// copy binary data arrays into time-intensity pair array
594  /// note: this overload is to allow client to allocate own buffer; the client
595  /// must determine the correct size beforehand, or an exception will be thrown
596  void getTimeIntensityPairs(TimeIntensityPair* output, size_t expectedSize) const;
597 
598  /// get time array (may be null)
599  BinaryDataArrayPtr getTimeArray() const;
600 
601  /// get intensity array (may be null)
602  BinaryDataArrayPtr getIntensityArray() const;
603 
604  /// set binary data arrays
605  void setTimeIntensityPairs(const std::vector<TimeIntensityPair>& input, CVID timeUnits, CVID intensityUnits);
606 
607  /// set binary data arrays
608  void setTimeIntensityPairs(const TimeIntensityPair* input, size_t size, CVID timeUnits, CVID intensityUnits);
609 
610  /// set time and intensity arrays separately (they must be the same size)
611  void setTimeIntensityArrays(const std::vector<double>& timeArray, const std::vector<double>& intensityArray, CVID timeUnits, CVID intensityUnits);
612 };
613 
614 
615 typedef boost::shared_ptr<Chromatogram> ChromatogramPtr;
616 
617 
618 // note: derived container to support dynamic linking on Windows
619 class IndexList : public std::vector<size_t> {};
620 
622 {
627 };
628 
629 ///
630 /// Interface for accessing spectra, which may be stored in memory
631 /// or backed by a data file (RAW, mzXML, mzML).
632 ///
633 /// Implementation notes:
634 ///
635 /// - Implementations are expected to keep a spectrum index in the form of
636 /// vector<SpectrumIdentity> or equivalent. The default find*() functions search
637 /// the index linearly. Implementations may provide constant time indexing.
638 ///
639 /// - The semantics of spectrum() may vary slightly with implementation. In particular,
640 /// a SpectrumList implementation that is backed by a file may choose either to cache
641 /// or discard the SpectrumPtrs for future access, with the caveat that the client
642 /// may write to the underlying data.
643 ///
644 /// - It is the implementation's responsibility to return a valid SpectrumPtr from spectrum().
645 /// If this cannot be done, an exception must be thrown.
646 ///
647 /// - The 'getBinaryData' flag is a hint if false : implementations may provide valid
648 /// BinaryDataArrayPtrs on spectrum(index, false); implementations *must* provide
649 /// valid BinaryDataArrayPtrs on spectrum(index, true).
650 ///
652 {
653  public:
654 
655  /// returns the number of spectra
656  virtual size_t size() const = 0;
657 
658  /// returns true iff (size() == 0) and (dataProcessingPtr.get() == NULL)
659  virtual bool empty() const;
660 
661  /// access to a spectrum index
662  virtual const SpectrumIdentity& spectrumIdentity(size_t index) const = 0;
663 
664  /// find id in the spectrum index (returns size() on failure)
665  virtual size_t find(const std::string& id) const;
666 
667  /// find all spectrum indexes with specified name/value pair
668  virtual IndexList findNameValue(const std::string& name, const std::string& value) const;
669 
670  /// find all spectrum indexes with spotID (returns empty vector on failure)
671  virtual IndexList findSpotID(const std::string& spotID) const;
672 
673  /// retrieve a spectrum by index
674  /// - binary data arrays will be provided if (getBinaryData == true);
675  /// - client may assume the underlying Spectrum* is valid
676  virtual SpectrumPtr spectrum(size_t index, bool getBinaryData = false) const = 0;
677 
678  /// get a copy of the seed spectrum, optionally with its binary data populated
679  /// this is useful for formats like mzML that can delay loading of binary data
680  /// - client may assume the underlying Spectrum* is valid
681  virtual SpectrumPtr spectrum(const SpectrumPtr &seed, bool getBinaryData) const {
682  return spectrum(seed->index, getBinaryData); // default implementation
683  };
684 
685  /// retrieve a spectrum by index
686  /// - detailLevel determines what fields are guaranteed present on the spectrum after the call
687  /// - client may assume the underlying Spectrum* is valid
688  virtual SpectrumPtr spectrum(size_t index, DetailLevel detailLevel) const
689  {
690  // By default faster metadeta access is not implemented
691  if (detailLevel == DetailLevel_FastMetadata || detailLevel == DetailLevel_InstantMetadata)
692  return SpectrumPtr(new Spectrum);
693 
694  return spectrum(index, detailLevel == DetailLevel_FullData);
695  }
696 
697  /// returns the data processing affecting spectra retrieved through this interface
698  /// - may return a null shared pointer
699  virtual const boost::shared_ptr<const DataProcessing> dataProcessingPtr() const;
700 
701  virtual ~SpectrumList(){}
702 };
703 
704 
705 typedef boost::shared_ptr<SpectrumList> SpectrumListPtr;
706 
707 
708 /// Simple writeable in-memory implementation of SpectrumList.
709 /// Note: This spectrum() implementation returns internal SpectrumPtrs.
711 {
712  std::vector<SpectrumPtr> spectra;
714 
715  // SpectrumList implementation
716 
717  virtual size_t size() const {return spectra.size();}
718  virtual bool empty() const {return spectra.empty() && !dp.get();}
719  virtual const SpectrumIdentity& spectrumIdentity(size_t index) const;
720  virtual SpectrumPtr spectrum(size_t index, bool getBinaryData) const;
721  virtual const boost::shared_ptr<const DataProcessing> dataProcessingPtr() const;
722 };
723 
724 
725 typedef boost::shared_ptr<SpectrumListSimple> SpectrumListSimplePtr;
726 
727 
728 ///
729 /// Interface for accessing chromatograms, which may be stored in memory
730 /// or backed by a data file (RAW, mzXML, mzML).
731 ///
732 /// Implementation notes:
733 ///
734 /// - Implementations are expected to keep a chromatogram index in the form of
735 /// vector<ChromatogramIdentity> or equivalent. The default find*() functions search
736 /// the index linearly. Implementations may provide constant time indexing.
737 ///
738 /// - The semantics of chromatogram() may vary slightly with implementation. In particular,
739 /// a ChromatogramList implementation that is backed by a file may choose either to cache
740 /// or discard the ChromatogramPtrs for future access, with the caveat that the client
741 /// may write to the underlying data.
742 ///
743 /// - It is the implementation's responsibility to return a valid ChromatogramPtr from chromatogram().
744 /// If this cannot be done, an exception must be thrown.
745 ///
746 /// - The 'getBinaryData' flag is a hint if false : implementations may provide valid
747 /// BinaryDataArrayPtrs on chromatogram(index, false); implementations *must* provide
748 /// valid BinaryDataArrayPtrs on chromatogram(index, true).
749 ///
751 {
752  public:
753 
754  /// returns the number of chromatograms
755  virtual size_t size() const = 0;
756 
757  /// returns true iff (size() == 0) and (dataProcessingPtr.get() == NULL)
758  bool empty() const;
759 
760  /// access to a chromatogram index
761  virtual const ChromatogramIdentity& chromatogramIdentity(size_t index) const = 0;
762 
763  /// find id in the chromatogram index (returns size() on failure)
764  virtual size_t find(const std::string& id) const;
765 
766  /// retrieve a chromatogram by index
767  /// - binary data arrays will be provided if (getBinaryData == true);
768  /// - client may assume the underlying Chromatogram* is valid
769  virtual ChromatogramPtr chromatogram(size_t index, bool getBinaryData = false) const = 0;
770 
771  /// returns the data processing affecting spectra retrieved through this interface
772  /// - may return a null shared pointer
773  virtual const boost::shared_ptr<const DataProcessing> dataProcessingPtr() const;
774 
775  virtual ~ChromatogramList(){}
776 };
777 
778 
779 typedef boost::shared_ptr<ChromatogramList> ChromatogramListPtr;
780 
781 
782 /// Simple writeable in-memory implementation of ChromatogramList.
783 /// Note: This chromatogram() implementation returns internal ChromatogramPtrs.
785 {
786  std::vector<ChromatogramPtr> chromatograms;
788 
789  // ChromatogramList implementation
790 
791  virtual size_t size() const {return chromatograms.size();}
792  virtual bool empty() const {return chromatograms.empty() && !dp.get();}
793  virtual const ChromatogramIdentity& chromatogramIdentity(size_t index) const;
794  virtual ChromatogramPtr chromatogram(size_t index, bool getBinaryData) const;
795  virtual const boost::shared_ptr<const DataProcessing> dataProcessingPtr() const;
796 };
797 
798 
799 typedef boost::shared_ptr<ChromatogramListSimple> ChromatogramListSimplePtr;
800 
801 
802 /// A run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument.
804 {
805  /// a unique identifier for this run.
806  std::string id;
807 
808  /// this attribute MUST reference the 'id' of the default instrument configuration. If a scan does not reference an instrument configuration, it implicitly refers to this configuration.
810 
811  /// this attribute MUST reference the 'id' of the appropriate sample.
813 
814  /// the optional start timestamp of the run, in UT.
815  std::string startTimeStamp;
816 
817  /// default source file reference
819 
820  /// all mass spectra and the acquisitions underlying them are described and attached here. Subsidiary data arrays are also both described and attached here.
822 
823  /// all chromatograms for this run.
825 
826  Run(){}
827  bool empty() const;
828 
829  private:
830  // no copying - any implementation must handle:
831  // - SpectrumList cloning
832  // - internal cross-references to heap-allocated objects
833  Run(const Run&);
834  Run& operator=(const Run&);
835 };
836 
837 
838 namespace IO {struct HandlerMSData;} // forward declaration for friend
839 
840 
841 /// This is the root element of ProteoWizard; it represents the mzML element, defined as:
842 /// intended to capture the use of a mass spectrometer, the data generated, and the initial processing of that data (to the level of the peak list).
844 {
845  /// an optional accession number for the mzML document.
846  std::string accession;
847 
848  /// an optional id for the mzML document. It is recommended to use LSIDs when possible.
849  std::string id;
850 
851  /// container for one or more controlled vocabulary definitions.
852  /// note: one of the <cv> elements in this list MUST be the PSI MS controlled vocabulary. All <cvParam> elements in the document MUST refer to one of the <cv> elements in this list.
853  std::vector<CV> cvs;
854 
855  /// information pertaining to the entire mzML file (i.e. not specific to any part of the data set) is stored here.
857 
858  /// container for a list of referenceableParamGroups
859  std::vector<ParamGroupPtr> paramGroupPtrs;
860 
861  /// list and descriptions of samples.
862  std::vector<SamplePtr> samplePtrs;
863 
864  /// list and descriptions of software used to acquire and/or process the data in this mzML file.
865  std::vector<SoftwarePtr> softwarePtrs;
866 
867  /// list with the descriptions of the acquisition settings applied prior to the start of data acquisition.
868  std::vector<ScanSettingsPtr> scanSettingsPtrs;
869 
870  /// list and descriptions of instrument configurations.
871  std::vector<InstrumentConfigurationPtr> instrumentConfigurationPtrs;
872 
873  /// list and descriptions of data processing applied to this data.
874  std::vector<DataProcessingPtr> dataProcessingPtrs;
875 
876  /// return dataProcessingPtrs augmented by the dataProcessingPtr() set in SpectrumList and/or ChromatogramList
877  std::vector<DataProcessingPtr> allDataProcessingPtrs() const;
878 
879  /// a run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument.
881 
882  MSData();
883  virtual ~MSData();
884  bool empty() const;
885 
886  /// returns the version of this mzML document;
887  /// for a document created programmatically, the version is the current release version of mzML;
888  /// for a document created from a file/stream, the version is the schema version read from the file/stream
889  const std::string& version() const;
890 
891  private:
892  // no copying
893  MSData(const MSData&);
894  MSData& operator=(const MSData&);
895 
896  protected:
897  std::string version_; // schema version read from the file/stream
898  friend struct IO::HandlerMSData;
899 };
900 
901 
902 typedef boost::shared_ptr<MSData> MSDataPtr;
903 
904 
905 namespace id {
906 
907 /// parses an id string into a map<string,string>
908 PWIZ_API_DECL std::map<std::string,std::string> parse(const std::string& id);
909 
910 /// convenience function to extract a named value from an id string
911 PWIZ_API_DECL std::string value(const std::string& id, const std::string& name);
912 
913 /// templated convenience function to extract a named value from an id string
914 template<typename value_type>
915 value_type valueAs(const std::string& id, const std::string& name)
916 {
917  std::string result = value(id, name);
918  return !result.empty() ? boost::lexical_cast<value_type>(result)
919  : boost::lexical_cast<value_type>(0);
920 }
921 
922 /// returns the nativeID format from the defaultSourceFilePtr if set,
923 /// or from sourceFilePtrs[0] if the list isn't empty,
924 /// or CVID_Unknown
926 
927 /// translates a "scan number" to a string that is correct for the given nativeID format;
928 /// semantic validity requires that scanNumber be parseable as an integer;
929 /// some nativeID formats cannot be translated to and will always return an empty string
930 /// currently supported formats: Thermo, Bruker/Agilent YEP, Bruker BAF, mzXML, MGF, and mzData
931 PWIZ_API_DECL std::string translateScanNumberToNativeID(CVID nativeIDFormat, const std::string& scanNumber);
932 
933 /// translates a nativeID in the given nativeID format to a simple integer "scan number";
934 /// some nativeID formats cannot be translated from and will always return an empty string
935 /// currently supported formats: Thermo, Bruker/Agilent YEP, Bruker BAF, mzXML, MGF, and mzData
936 PWIZ_API_DECL std::string translateNativeIDToScanNumber(CVID nativeIDFormat, const std::string& id);
937 
938 /// abbreviates a nativeID ("name1=value1 name2=value2" translates to "value1.value2")
939 PWIZ_API_DECL std::string abbreviate(const std::string& id, char delimiter = '.');
940 
941 } // namespace id
942 
943 
944 } // namespace msdata
945 } // namespace pwiz
946 
947 
948 #endif // _MSDATA_HPP_
949