ProteoWizard
ramp.h
Go to the documentation of this file.
1 // $Id: ramp.h 3397 2012-03-19 19:20:57Z pcbrefugee $
2 /***************************************************************************
3  RAMP
4 
5 
6 Non sequential parser for mzXML files
7 and mzData files, too!
8 and mzML, if you have the PWIZ library from Spielberg Family Proteomics Center
9 
10  -------------------
11  begin : Wed Oct 10
12  copyright : (C) 2003 by Pedrioli Patrick, ISB, Proteomics
13  email : ppatrick@student.ethz.ch
14  additional work for C++, >2GB files in WIN32, and portability (C) 2004 by Brian Pratt, Insilicos LLC
15  ***************************************************************************/
16 
17 /***************************************************************************
18 * *
19 * This program is free software; you can redistribute it and/or modify *
20 * it under the terms of the GNU Library or "Lesser" General Public *
21 * License (LGPL) as published by the Free Software Foundation; *
22 * either version 2 of the License, or (at your option) any later *
23 * version. *
24 ***************************************************************************/
25 
26 #ifndef _RAMP_H
27 #define _RAMP_H
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 
32 #ifdef TPPLIB
33 #include "common/sysdepend.h" // tpp lib system depencies handling
34 #ifdef _MSC_VER
35 #include <inttypes.h>
36 #endif
37 #else
38 // copied this code from TPP's sysdepend.h to make RAMP stand alone for other uses
39 #if defined(_MSC_VER) || defined(__MINGW32__) // MSVC or MinGW
40 #ifndef WINDOWS_NATIVE
41 #define WINDOWS_NATIVE
42 #endif
43 #endif
44 #if defined(_MSC_VER) && !defined(S_ISREG)
45 #define S_ISREG(mode) ((mode)&_S_IFREG)
46 #endif
47 #endif
48 
49 #ifdef _MSC_VER
50 #define atoll(a) _atoi64(a)
51 #endif
52 
53 #ifdef TPPLIB
54 #define HAVE_PWIZ_MZML_LIB 1 // define this to enable use of Spielberg Proteomics Center's pwiz mzML reader
55 #endif
56 #ifdef HAVE_PWIZ_MZML_LIB
57 #define RAMP_HAVE_GZ_INPUT 1 // can read mzxml.gz, mzdata.gz - depends on pwiz lib
58 #endif
59 
60 #ifdef WINDOWS_NATIVE // MSVC or MinGW
61 #include <winsock2.h>
62 #include <sys/types.h>
63 #include <fcntl.h>
64 #include <io.h>
65 #else
66 #include <stdint.h>
67 #include <netinet/in.h>
68 #endif
69 #include <sys/stat.h>
70 
71 #ifdef RAMP_HAVE_GZ_INPUT
72 #include "random_access_gzFile.h" // for reading .mzxml.gz
73 typedef random_access_gzFile * ramp_filehandle_t;
74 #else // no gzip support
75 #ifdef _MSC_VER // use MSFT API for 64 bit file pointers
76 #define RAMP_NONNATIVE_LONGFILE
77 typedef int ramp_filehandle_t; // use MSFT API for 64 bit file pointers
78 #else // not MSVC
79 typedef FILE * ramp_filehandle_t; // can use fopen, fseek etc
80 #endif // end else not MSVC
81 #endif // end else no gzip support
82 
83 // set mz and intensity precision
84 #ifndef RAMPREAL_FLOAT
85 typedef double RAMPREAL;
86 #else
87 typedef float RAMPREAL;
88 #endif
89 
90 #include "ramp_base64.h"
91 typedef enum { mzInt = 0 , mzRuler, mzOnly, intensityOnly } e_contentType;
92 #ifdef SWIG
93 %apply long long {ramp_fileoffset_t};
94 #else
95 #ifndef RAMP_STRUCT_DECL_ONLY // useful for pwiz, which only wants to mimic ramp structs
96 #ifdef HAVE_PWIZ_MZML_LIB
97 namespace pwiz { // forward ref
98  namespace msdata { // forward ref
99  class RAMPAdapter; // forward ref
100  }
101 }
102 #endif
103 
104 //
105 // we use this struct instead of FILE* so we can track what kind of files we're parsing
106 //
107 typedef struct {
109 #ifdef HAVE_PWIZ_MZML_LIB
110  pwiz::msdata::RAMPAdapter *mzML; // if nonNULL, then we're reading mzML
111 #endif
112  int bIsMzData; // if not mzML, then is it mzXML or mzData?
113 } RAMPFILE;
114 #endif // RAMP_STRUCT_DECL_ONLY
115 #ifdef RAMP_HAVE_GZ_INPUT
116 #define ramp_fgets(buf,len,handle) random_access_gzgets((handle)->fileHandle, buf, len )
117 #define ramp_feof(a) random_access_gzeof((a)->fileHandle)
118 #define ramp_fseek(a,b,c) random_access_gzseek((a)->fileHandle,b,c)
119 #define ramp_fread(buf,len,handle) random_access_gzread((handle)->fileHandle,buf,len)
120 #define ramp_ftell(a) random_access_gztell((a)->fileHandle)
122 #elif defined(RAMP_NONNATIVE_LONGFILE) // use MSFT API for 64 bit file pointers
123 typedef __int64 ramp_fileoffset_t;
124 #define ramp_fseek(a,b,c) _lseeki64((a)->fileHandle,b,c)
125 #define ramp_ftell(a) _lseeki64((a)->fileHandle,0,SEEK_CUR)
126 #define ramp_fread(buf,len,handle) read((handle)->fileHandle,buf,len)
127 #ifndef RAMP_STRUCT_DECL_ONLY // useful for pwiz, which only wants to mimic ramp structs
128 char *ramp_fgets(char *buf,int len,RAMPFILE *handle);
129 #endif
130 #define ramp_feof(handle) eof((handle)->fileHandle)
131 #else // can use fopen for long files
132 #define ramp_fread(buf,len,handle) fread(buf,1,len,(handle)->fileHandle)
133 #define ramp_fgets(buf,len,handle) fgets(buf, len, (handle)->fileHandle)
134 #define ramp_feof(handle) feof((handle)->fileHandle)
135 #ifdef __MINGW32__
136 typedef off64_t ramp_fileoffset_t;
137 #define ramp_fseek(a,b,c) fseeko64((a)->fileHandle,b,c)
138 #define ramp_ftell(a) ftello64((a)->fileHandle)
139 #else // a real OS with real file handling
140 typedef off_t ramp_fileoffset_t;
141 #define ramp_fseek(a,b,c) fseeko((a)->fileHandle,b,c)
142 #define ramp_ftell(a) ftello((a)->fileHandle)
143 #endif
144 #endif
145 #endif // not SWIG
146 
147 
148 #include <string.h>
149 #include <string>
150 #include <ctype.h>
151 
152 #define INSTRUMENT_LENGTH 2000
153 #define SCANTYPE_LENGTH 32
154 #define CHARGEARRAY_LENGTH 128
155 
157 {
158  int seqNum; // number in sequence observed file (1-based)
159  int acquisitionNum; // scan number as declared in File (may be gaps)
160  int msLevel;
163  double retentionTime; /* in seconds */
164  double basePeakMZ;
169  double lowMZ;
170  double highMZ;
171  int precursorScanNum; /* only if MS level > 1 */
172  double precursorMZ; /* only if MS level > 1 */
173  int precursorCharge; /* only if MS level > 1 */
174  double precursorIntensity; /* only if MS level > 1 */
179  bool possibleChargesArray[CHARGEARRAY_LENGTH]; /* NOTE: does NOT include "precursorCharge" information; only from "possibleCharges" */
180  int mergedScan; /* only if MS level > 1 */
181  int mergedResultScanNum; /* scan number of the resultant merged scan */
182  int mergedResultStartScanNum; /* smallest scan number of the scanOrigin for merged scan */
183  int mergedResultEndScanNum; /* largest scan number of the scanOrigin for merged scan */
184  std::string filterLine;
185  ramp_fileoffset_t filePosition; /* where in the file is this header? */
186 };
187 
189 {
191  double lowMZ;
192  double highMZ;
193  double startMZ;
194  double endMZ;
195  double dStartTime;
196  double dEndTime;
197 };
198 
199 typedef struct InstrumentStruct
200 {
206  //char msType[INSTRUMENT_LENGTH];
208 
209 #ifndef RAMP_STRUCT_DECL_ONLY // useful for pwiz, which only wants to mimic ramp structs
210 // file open/close
211 RAMPFILE *rampOpenFile(const char *filename);
212 void rampCloseFile(RAMPFILE *pFI);
213 
214 // construct a filename in buf from a basename, adding .mzXML or .mzData
215 // as exists, or .mzXML if neither exists. returns buf, or NULL if buflen
216 // is too short
217 std::string rampConstructInputFileName(const std::string &basename);
218 char *rampConstructInputFileName(char *buf,int buflen,const char *basename);
219 char *rampConstructInputPath(char *buf, // put the result here
220  int inbuflen, // max result length
221  const char *dir_in, // use this as a directory hint if basename does not contain valid dir info
222  const char *basename); // we'll try adding various filename extensions to this
223 
224 // construct a filename in inbuf from a basename and taking hints from a named
225 // spectrum, adding .mzXML or .mzData as exists
226 // return true on success
227 int rampValidateOrDeriveInputFilename(char *inbuf, int inbuflen, char *spectrumName);
228 
229 // trim a filename of its .mzData or .mzXML extension
230 // return trimmed buffer, or null if no proper .ext found
231 char *rampTrimBaseName(char *buf);
232 
233 // locate the .mzData or .mzXML extension in the buffer
234 // return pointer to extension, or NULL if not found
235 char *rampValidFileType(const char *buf);
236 
237 // returns a null-terminated array of const ptrs
238 const char **rampListSupportedFileTypes();
239 
240 // exercise at least some of the ramp interface - return non-0 on failure
241 int rampSelfTest(char *filename); // if filename is non-null we'll exercise reader with it
242 
245  ramp_fileoffset_t indexOffset,
246  int *iLastScan);
247 void readHeader(RAMPFILE *pFI,
248  ramp_fileoffset_t lScanIndex, // read from this file position
249  struct ScanHeaderStruct *scanHeader);
250 int readMsLevel(RAMPFILE *pFI,
251  ramp_fileoffset_t lScanIndex);
252 double readStartMz(RAMPFILE *pFI,
253  ramp_fileoffset_t lScanIndex);
254 double readEndMz(RAMPFILE *pFI,
255  ramp_fileoffset_t lScanIndex);
256 int readPeaksCount(RAMPFILE *pFI,
257  ramp_fileoffset_t lScanIndex);
259  ramp_fileoffset_t lScanIndex);
260 void readRunHeader(RAMPFILE *pFI,
261  ramp_fileoffset_t *pScanIndex,
262  struct RunHeaderStruct *runHeader,
263  int iLastScan);
264 void readMSRun(RAMPFILE *pFI,
265  struct RunHeaderStruct *runHeader);
266 
268 
269 // for MS/MS averaged scan
270 enum {
271  MASK_SCANS_TYPE = 0x0003,
274  OPTION_AVERAGE_SCANS = BIT_AVERAGE_SCANS, // return scan including merged resultant scan
275  // but exclude 'real' scan via peaksCount=0
276  OPTION_ORIGIN_SCANS = BIT_ORIGIN_SCANS, // return 'real' scan
277  // but exclude merged resultant scan via peaksCount=0
279  // return 'real' scan + merged resultant scan
281 };
282 void setRampOption(long option);
283 // return 0 if the scan has not been used in merged scan
284 // 1 otherwise
285 int isScanAveraged(struct ScanHeaderStruct *scanHeader);
286 // return 1 if the scan is generated by merging other scans
287 // 0 otherwise
288 int isScanMergedResult(struct ScanHeaderStruct *scanHeader);
289 // return the scan range for a "raw" scan or merged scan
290 // return (<scan num>,<scan num>) in the case of "raw" (i.e. non-merged) scan
291 // return (<smallest scan num>,<highest scan num>) in the case of merged scan
292 void getScanSpanRange(const struct ScanHeaderStruct *scanHeader, int *startScanNum, int *endScanNum);
293 // END - for MS/MS averaged scan
294 
295 // Caching support
296 // Useful for working with a range of MS1 scans. Code can just ask for scan
297 // headers and peaks as normal, and the cache takes care of shifting its range.
298 
300 {
301  int seqNumStart; // scan at which the cache starts
302  int size; // number of scans in the cache
305 };
306 
307 // create a chache struct
308 struct ScanCacheStruct *getScanCache(int size);
309 
310 // free all memory held by a cache struct
311 void freeScanCache(struct ScanCacheStruct* cache);
312 
313 void clearScanCache(struct ScanCacheStruct* cache);
314 
315 // cached versions of standard ramp functions
316 const struct ScanHeaderStruct* readHeaderCached(struct ScanCacheStruct* cache, int seqNum, RAMPFILE* pFI, ramp_fileoffset_t lScanIndex);
317 int readMsLevelCached(struct ScanCacheStruct* cache, int seqNum, RAMPFILE* pFI, ramp_fileoffset_t lScanIndex);
318 const RAMPREAL *readPeaksCached(struct ScanCacheStruct* cache, int seqNum, RAMPFILE* pFI, ramp_fileoffset_t lScanIndex);
319 
320 #endif // ifndef RAMP_STRUCT_DECL_ONLY useful for pwiz, which only wants to mimic ramp structs
321 
322 #endif
323