@Article{IPB-2443, author = {Ruttkies, C. and Neumann, S. and Posch, S.}, title = {{Improving MetFrag with statistical learning of fragment annotations}}, year = {2019}, pages = {376}, journal = {BMC Bioinformatics}, doi = {10.1186/s12859-019-2954-7}, url = {https://dx.doi.org/10.1186/s12859-019-2954-7}, volume = {20}, abstract = {BackgroundMolecule identification is a crucial step in metabolomics and environmental sciences. Besides in silico fragmentation, as performed by MetFrag, also machine learning and statistical methods evolved, showing an improvement in molecule annotation based on MS/MS data. In this work we present a new statistical scoring method where annotations of m/z fragment peaks to fragment-structures are learned in a training step. Based on a Bayesian model, two additional scoring terms are integrated into the new MetFrag2.4.5 and evaluated on the test data set of the CASMI 2016 contest.ResultsThe results on the 87 MS/MS spectra from positive and negative mode show a substantial improvement of the results compared to submissions made by the former MetFrag approach. Top1 rankings increased from 5 to 21 and Top10 rankings from 39 to 55 both showing higher values than for CSI:IOKR, the winner of the CASMI 2016 contest. For the negative mode spectra, MetFrag’s statistical scoring outperforms all other participants which submitted results for this type of spectra.ConclusionsThis study shows how statistical learning can improve molecular structure identification based on MS/MS data compared on the same method using combinatorial in silico fragmentation only. MetFrag2.4.5 shows especially in negative mode a better performance compared to the other participating approaches.} }