Commit 71776f8f authored by mirandaa's avatar mirandaa

init

parents
Copyright 2006-2010
LipidXplorer is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
LipidXplorer is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
LipidXplorer; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
Fifth Floor, Boston, MA 02110-1301 USA or find it under http://www.gnu.org/licenses/gpl.html
ELEMENTTREE is released under the terms of the Python (MIT style) license:
http://pypi.python.org/pypi/elementtree/
LXML is released under the terms of the BSD license:
http://lxml.de/
PLY is released under the terms of the BSD license:
http://www.dabeaz.com/ply/
mzAPI is part of the multiplerz project and released under
the terms of the GNU Lesser General Public License:
http://blais.dfci.harvard.edu/index.php?id=63
from lx.gui import lpdxGUI
import wx
class MyApp(wx.App):
def OnInit(self):
self.frame = lpdxGUI.LpdxFrame(None, -1, "",
rawimport = False,
lipidxplorer = True,
version = "1.2.7")
self.frame.Show(True)
self.frame.Center()
self.SetTopWindow(self.frame)
return True
def main():
app = MyApp(0)
app.MainLoop()
## end of the software
if __name__ == "__main__":
main()
from lx.gui import lpdxGUI
import wx
class MyApp(wx.App):
def OnInit(self):
self.frame = lpdxGUI.LpdxFrame(None, -1, "",
rawimport = False,
lipidxplorer = True,
version = "1.2.6")
self.frame.Show(True)
self.frame.Center()
self.SetTopWindow(self.frame)
return True
def main():
app = MyApp(0)
app.MainLoop()
## end of the software
if __name__ == "__main__":
main()
# -*- mode: python -*-
block_cipher = None
a = Analysis(['LipidXplorer.py'],
pathex=['D:\\tmp\\LipidXplorer-1.2.7'],
binaries=[],
datas=[('lx\\stuff\*','lx\\stuff')],
hiddenimports=[],
hookspath=[],
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher)
pyz = PYZ(a.pure, a.zipped_data,
cipher=block_cipher)
exe = EXE(pyz,
a.scripts,
exclude_binaries=True,
name='LipidXplorer',
debug=False,
strip=False,
upx=False,
console=False )
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
name='LipidXplorer')
Metadata-Version: 1.0
Name: LipidXplorer
Version: 1.2.7
Summary: UNKNOWN
Home-page: https://wiki.mpi-cbg.de/wiki/lipidx/index.php/Main_Page
Author: Ronny Herzog
Author-email: herzog@mpi-cbg.de
License: UNKNOWN
Description: ...
Platform: UNKNOWN
12.08.2008
- new function isStandard(<variable>, "<sample>", "<scope>"). Place it in SUCHTHAT section.
A scan is started to find <variable> in <sample>. Once found it is declared as the standard and the
intensities of all other peaks in <scope> are recalculated as ratio.
Example: isStandard(varStandard, "hilde01", "MS1+")
- new function for addressing groups of samples with the help of placeholders:
Patterns are Unix shell style:
* matches everything
? matches any single character
[seq] matches any character in seq
[!seq] matches any char not in seq
Example: FA1.intensity["*hilde0[1-9]*"] > FA1.intensity["*hilde1[0-9]*"]
- also possible: column(FA1, "*hilde0[1-9]*")
13.08.2008
- semi-new function: isStandard(<variable>, "<scope>"). With this syntax the standard is
calculated for every single sample.
14.08.2008
- every peak (either MS or MS/MS) has now additionally the following information:
. peak mean
. peak median
. peak variance
. peak standard deviation
all this info will be put out in the dump
22.08.2008
- isStandard() function works now for MS1+/- (! no MS2+/- !). The function should be placed in the SUCHTHAT
section and has as attributes 1) a variable containing the marked standard and 2) the scope of
the standard (MS1+, MS1-, MS2+ or MS2-). Next an example of an MFQL script identifying
PC with calculating the standard:
QUERYNAME = Phosphatidylcholine;
DEFINE PR = 'C[36..50] H[30..200] N[1] O[8] P[1]' WITH DBR = (1.5,7.5), CHG = 1;
DEFINE DietherPC = 'C44 H93 O6 N1 P1' WITH CHG = 1;
DEFINE DietherPE = 'C45 H95 O6 N1 P1' WITH CHG = 1;
IDENTIFY Phosphatidylcholine WHERE
# marking
PR IN MS1+ WITH TOLERANCE = 4 ppm OR
DietherPC IN MS1+
SUCHTHAT
isEven(PR.chemsc[C]) AND
isStandard(DietherPC, "MS1+")
REPORT
MASS = "%4.4f" % "(PR.mass)";
NAME = "PC [%d:%d]" % "((PR.chemsc)[C] - 8, (PR.chemsc)[db] - 1.5)";
PRECURINTENS = PR.intensity;;
2.09.2008
- de-isotoping was implemented for MS mode. The algorithm uses the sum compositions
which where calculated with the used MFQL scripts. So, only molecular species of interest
are considered for the de-isotoping. The algorithm is the following:
1) sort MS spectrum increasingly.
2) for every mass m which has a chemical sum composition assigned:
3) look, if there is are masses i1, i2, i3 or i4 which could be isotopes, i.e.
is there a mass i1 = m + 1.0033,
is there a mass i2 = m + 2 * (1.0033), and so on ...
4) calcuate the isotopic distribution of m for only 13C. Isotopes of other elements
are so little for lipids, that we leave them beside. The distribution is binomial
with a probability that a 13C occurs of 0.01082 <insert citation here>.
5) substract calculated isotopic percentage from i1-4
- de-isotoping of MS/MS was revisited. The algorithm is the following:
1) generate artificial PIS spectra P1, ..., Pn for fragments f1, ..., fn by collecting
precursor masses which have f1 (f2, ..., fn) in thier MS/MS spectrum.
2) for every PIS spectrum Pi:
3) for every mass m in Pi:
4) look, if there is are masses i1, i2, i3 or i4 which could be isotopes
5) calculate the isotopica distribution of the neutral loss of m accoding to
<insert citation here>
6) subtract the calculated isotopic percentage from i1-4
1.10.2008
- implemented a debugging window in the GUI.
- GUI looks a bit more compact now
- update of the merging algorithm (for *.mzXML import). Average masses are now calculated
by intensity weighted average. This yields more accurate spectra.
9.10.2008
- switched off the cleaning procedure for *.mzXML imported files
- if more than one sum composition is found for a precursor mass, it will be reported
ordered by identification error.
- Bug with *.mzXML file, where only MS/MS spectra are given is fixed.
23.10.2008
- new Function: complementMasterScan. Switch it on with a checkbox on the
Run-Panel. It will produce the "complement MasterScan" of the current
query, i.e. a MasterScan with all peaks which where not identified in
the current run. It will be saved as <original MasterScan name>-complement.sc.
- the purpose is do blind queries for unknown or not excpected sum compositions.
18.11.2008
- new Function: DEFINE allows now to define a list of same variables with
different names in one line. The user just writes a list:
old:
DEFINE FA1 = 'C[14..22] H[20..60] O[2] N[1]' WITH DBR = (0.0,6.0), CHG = 0;
DEFINE FA2 = 'C[14..22] H[20..60] O[2] N[1]' WITH DBR = (0.0,6.0), CHG = 0;
DEFINE FA3 = 'C[14..22] H[20..60] O[2] N[1]' WITH DBR = (0.0,6.0), CHG = 0;
new:
DEFINE (FA1, FA2, FA3) = 'C[14..22] H[20..60] O[2] N[1]' WITH DBR = (0.0,6.0), CHG = 0;
- new Function: DEFINE allow the definition of a list of fragments. For example:
DEFINE FA = ('C14 H27 O2 N1',
'C14 H29 O2 N1',
'C14 H31 O2 N1',
'C15 H21 O2 N1',
'C15 H31 O2 N1');
2009-03-06
- adding a new Variable to TypeMFQL: currVars, which carries the current variables
and is accessible for all functions
- adding a new function: avg(), which calculates the average of the attribute (which
should be an intensity vector by now)
2009-07-02
- new function in the import which allows grouping the samples by letting the user put
them into group folders. This does only work for *.mzXML until now.
2009-12-01
* fixed massrange error when using masses in DEFINE
* fixed problem with order of the *.mfql scripts in the Run Panel
2009-12-03
* implemented multiple charge recognition !NOTE!: it is to be tested!
Lipids with charge unequal to 1, 0 or -1 are not able to account to
isotopic correction.
* upgrade of the Exception handling.
[FAS_LTQ_30000MS1]
msmsminoccupation = 0.5
msresolutiondelta = -14
msmsresolution = 300
msmscalibration =
mscalibration =
msmassrange = (450,1200)
mstolerance = 20 ppm
timerange = (0,1500)
msresolution = 30000
msmstolerance = 0.3 Da
selectionwindow = 0.5
forcesinglecharge = 1
msminoccupation = 0.5
msmsthreshold = 1
msmsmassrange = (50,2000)
msthreshold = 6000
msmsresolutiondelta = 1
[FAS_MS1_100000_MS2_15000]
msmsminoccupation = 0.5
msresolutiondelta = -53.4
msmscalibration =
mscalibration =
msmassrange = (450,1200)
mstolerance = 5 ppm
timerange = (0,3600)
msresolution = 110000
msmstolerance = 0.005 Da
msthreshold = 400
precursormassshift = 0
selectionwindow = 0.5
msmsresolution = 25000
msminoccupation = 0.5
msmsthreshold = 75
msmsmassrange = (50,2000)
forcesinglecharge = 1
msmsresolutiondelta = -20
[FAS_LTQ_7500MS1]
msmsminoccupation = 0
msresolutiondelta = -3
msmsresolution = 7500
msmscalibration =
mscalibration =
msmassrange = (450,1200)
mstolerance = 75 ppm
timerange = (0,1500)
msresolution = 7500
msmsmassrange = (50,2000)
msmstolerance = 0.3 Da
precursormassshift = 0
selectionwindow = 0.5
forcesinglecharge = 1
msminoccupation = 0.5
msmsthreshold = 1
precursormassshiftorbi = 0
msthreshold = 1
msmsresolutiondelta = -3
[FAS_LTQ_100000MS1]
mscalibration = 718.5392, 852.5396
timerange = (0,1500)
msmsresolution = 300
forcesinglecharge = 1
msthreshold = 2500
msmsresolutiondelta = 1
msmsminoccupation = 0.5
msresolutiondelta = -53.4
msmsthresholdtype = absolute
selectionwindow = 0.5
msmsthreshold = 1
msmsmassrange = (50,2000)
msthresholdtype = absolute
msmscalibration =
msmassrange = (450,1000)
mstolerance = 5 ppm
msresolution = 110000
precursormassshift = 0
msmstolerance = 0.3 Da
msminoccupation = 0.5
precursormassshiftorbi = 0
[FAS_LTQ]
msmsminoccupation = 0.5
msresolutiondelta = 1
msmsresolution = 300
msmscalibration =
mscalibration =
msmassrange = (450,1200)
mstolerance = 0.3 Da
timerange = (0,1500)
msresolution = 700
msmsmassrange = (50,2000)
msmstolerance = 0.3 Da
precursormassshift = 0
selectionwindow = 0.5
forcesinglecharge = 1
msminoccupation = 0.5
msmsthreshold = 5
precursormassshiftorbi = 0
msthreshold = 100
msmsresolutiondelta = 1
[HCD_7500_7500]
msmsminoccupation = 0
msresolutiondelta = 0
msmscalibration =
mscalibration =
msmassrange = (450,1200)
mstolerance = 75 ppm
timerange = (0,120)
msresolution = 7500
precursormassshiftorbi = 0
msmstolerance = 75 ppm
msthreshold = 1
precursormassshift = 0
selectionwindow = 0.5
msmsresolution = 7500
msminoccupation = 0
msmsthreshold = 1
msmsmassrange = (50,2000)
forcesinglecharge = 1
msmsresolutiondelta = 0
This diff is collapsed.
#!/usr/bin/python
import os, sys, math
from optparse import OptionParser
sys.path.append('..' + os.sep + 'lib')
from lpdxDataStr import *
from lpdxSCTools import *
from lpdxTools import *
from lpdxUITools import *
from lpdxParser import *
# -c / --csv : the comma separated file for output
# -d / --dump : the scan dump file
def main():
optParser = OptionParser(usage="Usage: lpdxSCC.py <subcommand> [options] [args]\n\n\
Availible subcommands:\n\
\t msc (mass to sum composition) [mass] [sf-constraint]\n\
\t scm (sum composition to mass) [sum composition]\n\
\t sfsc (sf-constraint to sum composition) [sum composition]\n\
\t corrDP (compare 2 mass spectra with the dot-product correlation) [*.csv1] [*.csv2] [tolerance in ppm]\n\
\t corrPC (compare 2 mass spectra with the Pearson correlation) [*.csv1] [*.csv2] [tolerance in ppm]\n")
optParser.add_option("-a", "--accuracy", dest="accuracy",
help="Set accuracy for sum composition calculation. Default is 5 ppm")
(options, args) = optParser.parse_args()
# open input mfql file
if len(args) > 0:
if options.accuracy:
accuracy = 1000000 / float(options.accuracy)
else:
accuracy = 1000000 / 5
if not args[0] or not args[1]:
print "You forgot the arguments/subcommands!"
else:
if args[0] == "msc":
elscp = parseElemSeq(args[2])
rslt = lpdxSCTools.calcSFbyMass(float(args[1]), elscp, accuracy)
rsltlist = []
for i in rslt:
rsltlist.append((i.getWeight(), i))
# rsltlist = sorted(rsltlist)
if rsltlist == []:
print "No sum composition found for %s with m/z %2.f" % (elscp, float(args[1]))
for mass, scp in rsltlist:
print "%.4f" % mass, scp, "error: %.4f" % (float(args[1]) - scp.getWeight())
elif args[0] == "scm":
print "Check, if you did not forget to add the charge!"
elscp = parseElemSeq(args[1])
rslt = elscp.getWeight()
print "Weight is:", rslt, "; Double Bounds are:", elscp.get_DB()
elif args[0] == "sfsc":
elscp = parseElemSeq(args[1])
for i in elscp.get_norangeElemSeq():
print i, i.getWeight()
elif args[0] == "corrDP":
res = 1000000 / float(args[3])
# open the spectra files
s1 = open(args[1], 'r')
spec1 = []
for line in s1.readlines():
spec1.append([float(line.split(',')[0]), float(line.split(',')[1])])
s1.close()
s2 = open(args[2], 'r')
spec2 = []
for line in s2.readlines():
spec2.append([float(line.split(',')[0]), float(line.split(',')[1])])
s2.close()
# match both spectra. The result are vectors VSpec1/2 with the same dimension
spec1.sort(cmp = lambda x, y: cmp(x[0], y[0]), reverse = False)
spec2.sort(cmp = lambda x, y: cmp(x[0], y[0]), reverse = False)
vSpec1 = []
vSpec2 = []
sum = 0
peak = 0
while peak < max(len(spec1), len(spec2)) - 1:
t = spec1[peak][0] / res
if spec2[peak][0] - t < spec1[peak][0] and spec1[peak][0] < spec2[peak][0] + t:
if peak < len(spec1) - 1 and peak < len(spec2) - 1:
if not (spec2[peak][0] - t < spec1[peak + 1][0] and spec1[peak + 1][0] < spec2[peak][0] + t):
vSpec1.append(spec1[peak])
vSpec2.append(spec2[peak])
else:
if abs(spec1[peak][0] - spec2[peak][0]) > abs(spec1[peak + 1][0] - spec2[peak][0]):
vSpec1.append(spec1[peak])
vSpec2.append([0.0, 0.0])
spec2.insert(peak, [0.0, 0.0])
else:
vSpec1.append(spec1[peak])
vSpec2.append(spec2[peak])
elif spec1[peak][0] < spec2[peak][0]:
vSpec1.append(spec1[peak])
vSpec2.append([0.0, 0.0])
spec2.insert(peak, [0.0, 0.0])
elif spec1[peak][0] > spec2[peak][0]:
vSpec1.append([0.0, 0.0])
vSpec2.append(spec2[peak])
spec1.insert(peak, [0.0, 0.0])
sum += abs(spec1[peak][0] - spec2[peak][0])
peak += 1
# calc mean of vectors vSpec1/2 (which is the expectation value)
sumInt = 0.0
for p in vSpec1:
sumInt += p[1]
meanVSpec1 = sumInt / len(vSpec1)
sumInt = 0.0
for p in vSpec2:
sumInt += p[1]
meanVSpec2 = sumInt / len(vSpec2)
# substract mean from the intensities of vSpec1/2 to center the 2 vectors
for p in vSpec1:
p[1] = p[1] - meanVSpec1
for p in vSpec2:
p[1] = p[1] - meanVSpec2
# calc geometrical length of vectors vSpec1/2
sum = 0.0
for p in vSpec1:
sum += p[1] * p[1]
lenghtVSpec1 = math.sqrt(sum)
sum = 0.0
for p in vSpec2:
sum += p[1] * p[1]
lenghtVSpec2 = math.sqrt(sum)
# calc the dot product
sum = 0.0
for index in range(len(vSpec1)):
sum += vSpec1[index][1] * vSpec2[index][1]
# calc the ankle
phi = math.acos(sum / (lenghtVSpec1 * lenghtVSpec2))
print 'dot product: %.4f, similarity: %.2f %%' % (sum, 100 - (phi * 100) / math.pi)
elif args[0] == "corrPC":
res = 1000000 / float(args[3])
# open the spectra files
s1 = open(args[1], 'r')
spec1 = []
for line in s1.readlines():
spec1.append([float(line.split(',')[0]), float(line.split(',')[1])])
s1.close()
s2 = open(args[2], 'r')
spec2 = []
for line in s2.readlines():
spec2.append([float(line.split(',')[0]), float(line.split(',')[1])])
s2.close()
# match both spectra. The result are vectors VSpec1/2 with the same dimension
spec1.sort(cmp = lambda x, y: cmp(x[0], y[0]), reverse = False)
spec2.sort(cmp = lambda x, y: cmp(x[0], y[0]), reverse = False)
vSpec1 = []
vSpec2 = []
sum = 0
peak = 0
while peak < max(len(spec1), len(spec2)) - 1:
t = spec1[peak][0] / res
if spec2[peak][0] - t < spec1[peak][0] and spec1[peak][0] < spec2[peak][0] + t:
if peak < len(spec1) - 1 and peak < len(spec2) - 1:
if not (spec2[peak][0] - t < spec1[peak + 1][0] and spec1[peak + 1][0] < spec2[peak][0] + t):
vSpec1.append(spec1[peak])
vSpec2.append(spec2[peak])
else:
if abs(spec1[peak][0] - spec2[peak][0]) > abs(spec1[peak + 1][0] - spec2[peak][0]):
vSpec1.append(spec1[peak])
vSpec2.append([0.0, 0.0])
spec2.insert(peak, [0.0, 0.0])
else:
vSpec1.append(spec1[peak])
vSpec2.append(spec2[peak])
elif spec1[peak][0] < spec2[peak][0]:
vSpec1.append(spec1[peak])
vSpec2.append([0.0, 0.0])
spec2.insert(peak, [0.0, 0.0])
elif spec1[peak][0] > spec2[peak][0]:
vSpec1.append([0.0, 0.0])
vSpec2.append(spec2[peak])
spec1.insert(peak, [0.0, 0.0])
sum += abs(spec1[peak][0] - spec2[peak][0])
peak += 1
# calc mean of vectors vSpec1/2 (which is the expectation value)
sumVSpec1 = 0.0
for p in vSpec1:
sumVSpec1 += p[1]
meanVSpec1 = sumVSpec1 / len(vSpec1)
sumVSpec2 = 0.0
for p in vSpec2:
sumVSpec2 += p[1]
meanVSpec2 = sumVSpec2 / len(vSpec2)
# calc standard deviation sVSpec
sumVSpec1quad = 0.0
for p in vSpec1:
sumVSpec1quad += p[1] * p[1]
sumVSpec2quad = 0.0
for p in vSpec2:
sumVSpec2quad += p[1] * p[1]
sVSpec1 = math.sqrt(len(vSpec1) * sumVSpec1quad - (sumVSpec1 * sumVSpec1))
sVSpec2 = math.sqrt(len(vSpec2) * sumVSpec2quad - (sumVSpec2 * sumVSpec2))
# substract mean from the intensities of vSpec1/2 to center the 2 vectors
for p in vSpec1:
p[1] = p[1] - meanVSpec1