diff --git a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis.ui/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/ui/preferences/PreferencePage.java b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis.ui/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/ui/preferences/PreferencePage.java index ecda324eed..00b434d548 100644 --- a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis.ui/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/ui/preferences/PreferencePage.java +++ b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis.ui/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/ui/preferences/PreferencePage.java @@ -41,6 +41,7 @@ public void createFieldEditors() { addField(new BooleanFieldEditor(PreferenceSupplier.P_NORMALIZE_INTENSITIES, "Normalize intensities", getFieldEditorParent())); addField(new BooleanFieldEditor(PreferenceSupplier.P_EXPORT_INTENSITIES_AS_INTEGER, "Export intensities as Integer", getFieldEditorParent())); addField(new BooleanFieldEditor(PreferenceSupplier.P_PARSE_COMPOUND_INFORMATION, "Parse Compound Information (*.CID)", getFieldEditorParent())); + addField(new BooleanFieldEditor(PreferenceSupplier.P_PARSE_MOL_INFORMATION, "Parse MOL Information (*.MOL)", getFieldEditorParent())); } /* diff --git a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterCID.java b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterCID.java index 6fa66527f7..fd43963561 100644 --- a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterCID.java +++ b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterCID.java @@ -116,34 +116,6 @@ public static void transfer(List compoundList, IMassSpectra } } - private static int getRetentionTime(CompoundInformation compoundInformation) { - - String retentionTime = compoundInformation.getRetentionTime(); - if(retentionTime != null && !retentionTime.isEmpty()) { - try { - return (int)(Double.parseDouble(compoundInformation.getRetentionTime()) * IChromatogram.MINUTE_CORRELATION_FACTOR); - } catch(NumberFormatException e) { - logger.warn(e); - } - } - // - return 0; - } - - private static float getRetentionIndex(CompoundInformation compoundInformation) { - - String retentionIndex = compoundInformation.getRetentionIndex(); - if(retentionIndex != null && !retentionIndex.isEmpty()) { - try { - return Float.parseFloat(compoundInformation.getRetentionIndex()); - } catch(NumberFormatException e) { - logger.warn(e); - } - } - // - return 0.0f; - } - /** * Converts e.g. the file * --- @@ -229,4 +201,32 @@ private static CompoundInformation getCompoundInformation(IRegularLibraryMassSpe // return null; } + + private static int getRetentionTime(CompoundInformation compoundInformation) { + + String retentionTime = compoundInformation.getRetentionTime(); + if(retentionTime != null && !retentionTime.isEmpty()) { + try { + return (int)(Double.parseDouble(compoundInformation.getRetentionTime()) * IChromatogram.MINUTE_CORRELATION_FACTOR); + } catch(NumberFormatException e) { + logger.warn(e); + } + } + // + return 0; + } + + private static float getRetentionIndex(CompoundInformation compoundInformation) { + + String retentionIndex = compoundInformation.getRetentionIndex(); + if(retentionIndex != null && !retentionIndex.isEmpty()) { + try { + return Float.parseFloat(compoundInformation.getRetentionIndex()); + } catch(NumberFormatException e) { + logger.warn(e); + } + } + // + return 0.0f; + } } \ No newline at end of file diff --git a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterMOL.java b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterMOL.java new file mode 100644 index 0000000000..24b964431b --- /dev/null +++ b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterMOL.java @@ -0,0 +1,156 @@ +/******************************************************************************* + * Copyright (c) 2021 Lablicate GmbH. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Philip Wenig - initial API and implementation + *******************************************************************************/ +package org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.FilenameUtils; +import org.eclipse.chemclipse.logging.core.Logger; +import org.eclipse.chemclipse.model.cas.CasSupport; +import org.eclipse.chemclipse.model.identifier.ILibraryInformation; +import org.eclipse.chemclipse.msd.model.core.IMassSpectra; +import org.eclipse.chemclipse.msd.model.core.IRegularLibraryMassSpectrum; +import org.eclipse.chemclipse.msd.model.core.IScanMSD; + +public class ConverterMOL { + + private static final Logger logger = Logger.getLogger(ConverterMOL.class); + // + private static final String MARKER_END = "M END"; + private static final Pattern patternCAS = Pattern.compile("(CAS)(\\s+)(rn)(\\s+)(=)(\\s+)((\\d+)(-?)(\\d+)(-?)(\\d+))"); + + public static Map convert(File file) { + + Map moleculeStructureMap = new HashMap<>(); + try (BufferedReader bufferedReader = new BufferedReader(new FileReader(file))) { + /* + * Header + */ + String line = null; + String name = null; + StringBuilder builder = new StringBuilder(); + // + while((line = bufferedReader.readLine()) != null) { + if(line.trim().isEmpty()) { + continue; + } else if(line.trim().equals(MARKER_END)) { + if(name != null) { + moleculeStructureMap.put(name.toUpperCase(), builder.toString().trim()); + } + name = null; + builder = new StringBuilder(); + } else { + if(name == null) { + name = line.trim(); + } + builder.append(line); + builder.append("\n"); + } + } + } catch(IOException e) { + logger.warn(e); + } + // + return moleculeStructureMap; + } + + /** + * Converts e.g. the file + * --- + * /home/.../library.msl + * to + * /home/.../library.mol + * --- + * If the file doesn't exist, null is returned. + * + * @param file + * @return File + */ + public static File getFileMOL(File file) { + + File fileMOL = null; + if(file.isFile()) { + String path = file.getParentFile().getAbsolutePath(); + String fileBaseName = FilenameUtils.getBaseName(file.getName()); + // + fileMOL = new File(path + File.separator + fileBaseName + ".MOL"); + if(!fileMOL.exists()) { + fileMOL = new File(path + File.separator + fileBaseName + ".mol"); + if(!fileMOL.exists()) { + fileMOL = null; + } + } + } + // + return fileMOL; + } + + public static void transfer(Map moleculeStructureMap, IMassSpectra massSpectra) { + + Map casNumberMap = new HashMap<>(); + for(String moleculeStructure : moleculeStructureMap.values()) { + String casNumber = extractCASNumber(moleculeStructure); + if(!casNumber.isEmpty()) { + casNumberMap.put(CasSupport.format(casNumber), moleculeStructure); + } + } + // + for(IScanMSD massSpectrum : massSpectra.getList()) { + if(massSpectrum instanceof IRegularLibraryMassSpectrum) { + IRegularLibraryMassSpectrum libraryMassSpectrum = (IRegularLibraryMassSpectrum)massSpectrum; + ILibraryInformation libraryInformation = libraryMassSpectrum.getLibraryInformation(); + /* + * Get the molecule structure. + */ + String moleculeStructure = moleculeStructureMap.get(libraryInformation.getName().toUpperCase()); + if(moleculeStructure == null) { + /* + * Use the CAS# instead. + */ + moleculeStructure = casNumberMap.get(CasSupport.format(libraryInformation.getCasNumber())); + } + /* + * Set if not null or empty. + */ + if(moleculeStructure != null && !moleculeStructure.isEmpty()) { + libraryInformation.setMoleculeStructure(moleculeStructure); + } + } + } + } + + /** + * Returns the CAS# or "" if none is available. + * ... CAS rn = 19906720, ... + * ... CAS rn = 19906-72-0, ... + * + * @return String + */ + public static String extractCASNumber(String moleculeStructure) { + + if(moleculeStructure != null && !moleculeStructure.isEmpty()) { + Matcher matcher = patternCAS.matcher(moleculeStructure); + if(matcher.find()) { + return matcher.group(7); + } + } + // + return ""; + } +} \ No newline at end of file diff --git a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSLReader.java b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSLReader.java index f35e5549c7..3af6132b0e 100644 --- a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSLReader.java +++ b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSLReader.java @@ -20,6 +20,7 @@ import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -32,6 +33,7 @@ import org.eclipse.chemclipse.msd.converter.io.IMassSpectraReader; import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.CompoundInformation; import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterCID; +import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterMOL; import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.IVendorLibraryMassSpectrum; import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.VendorLibraryMassSpectrum; import org.eclipse.chemclipse.msd.converter.supplier.amdis.preferences.PreferenceSupplier; @@ -84,6 +86,16 @@ public IMassSpectra read(File file, IProgressMonitor monitor) throws FileNotFoun ConverterCID.transfer(compoundList, massSpectra); } } + /* + * MOL Information (*.MOL) + */ + if(PreferenceSupplier.isParseMolInformation()) { + File fileMOL = ConverterMOL.getFileMOL(file); + if(fileMOL != null) { + Map moleculeStructureMap = ConverterMOL.convert(fileMOL); + ConverterMOL.transfer(moleculeStructureMap, massSpectra); + } + } // return massSpectra; } diff --git a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSPReader.java b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSPReader.java index 8875e8eec5..80e8146c3a 100644 --- a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSPReader.java +++ b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/io/MSPReader.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -36,6 +37,7 @@ import org.eclipse.chemclipse.msd.converter.io.IMassSpectraReader; import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.CompoundInformation; import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterCID; +import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterMOL; import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.IVendorLibraryMassSpectrum; import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.VendorLibraryMassSpectrum; import org.eclipse.chemclipse.msd.converter.supplier.amdis.preferences.PreferenceSupplier; @@ -91,6 +93,16 @@ public IMassSpectra read(File file, IProgressMonitor monitor) throws FileNotFoun ConverterCID.transfer(compoundList, massSpectra); } } + /* + * MOL Information (*.MOL) + */ + if(PreferenceSupplier.isParseMolInformation()) { + File fileMOL = ConverterMOL.getFileMOL(file); + if(fileMOL != null) { + Map moleculeStructureMap = ConverterMOL.convert(fileMOL); + ConverterMOL.transfer(moleculeStructureMap, massSpectra); + } + } // return massSpectra; } diff --git a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/preferences/PreferenceSupplier.java b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/preferences/PreferenceSupplier.java index 168ca96732..42ff11f777 100644 --- a/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/preferences/PreferenceSupplier.java +++ b/chemclipse/plugins/org.eclipse.chemclipse.msd.converter.supplier.amdis/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/preferences/PreferenceSupplier.java @@ -36,6 +36,8 @@ public class PreferenceSupplier implements IPreferenceSupplier { public static final boolean DEF_EXPORT_INTENSITIES_AS_INTEGER = true; public static final String P_PARSE_COMPOUND_INFORMATION = "parseCompoundInformation"; public static final boolean DEF_PARSE_COMPOUND_INFORMATION = true; + public static final String P_PARSE_MOL_INFORMATION = "parseMolInformation"; + public static final boolean DEF_PARSE_MOL_INFORMATION = true; // private static IPreferenceSupplier preferenceSupplier; @@ -70,6 +72,7 @@ public Map getDefaultValues() { defaultValues.put(P_NORMALIZE_INTENSITIES, Boolean.toString(DEF_NORMALIZE_INTENSITIES)); defaultValues.put(P_EXPORT_INTENSITIES_AS_INTEGER, Boolean.toString(DEF_EXPORT_INTENSITIES_AS_INTEGER)); defaultValues.put(P_PARSE_COMPOUND_INFORMATION, Boolean.toString(DEF_PARSE_COMPOUND_INFORMATION)); + defaultValues.put(P_PARSE_MOL_INFORMATION, Boolean.toString(DEF_PARSE_MOL_INFORMATION)); return defaultValues; } @@ -120,4 +123,10 @@ public static boolean isParseCompoundInformation() { IEclipsePreferences preferences = INSTANCE().getPreferences(); return preferences.getBoolean(P_PARSE_COMPOUND_INFORMATION, DEF_PARSE_COMPOUND_INFORMATION); } + + public static boolean isParseMolInformation() { + + IEclipsePreferences preferences = INSTANCE().getPreferences(); + return preferences.getBoolean(P_PARSE_MOL_INFORMATION, DEF_PARSE_MOL_INFORMATION); + } } diff --git a/chemclipse/tests/org.eclipse.chemclipse.msd.converter.supplier.amdis.fragment.test/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterMOL_Test.java b/chemclipse/tests/org.eclipse.chemclipse.msd.converter.supplier.amdis.fragment.test/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterMOL_Test.java new file mode 100644 index 0000000000..3c603bf5d0 --- /dev/null +++ b/chemclipse/tests/org.eclipse.chemclipse.msd.converter.supplier.amdis.fragment.test/src/org/eclipse/chemclipse/msd/converter/supplier/amdis/converter/misc/ConverterMOL_Test.java @@ -0,0 +1,42 @@ +/******************************************************************************* + * Copyright (c) 2021 Lablicate GmbH. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Philip Wenig - initial API and implementation + *******************************************************************************/ +package org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc; + +import junit.framework.TestCase; + +public class ConverterMOL_Test extends TestCase { + + public void test1() { + + assertEquals("19906720", ConverterMOL.extractCASNumber(" CAS rn = 19906720, ")); + } + + public void test2() { + + assertEquals("19906-72-0", ConverterMOL.extractCASNumber(" CAS rn = 19906-72-0, ")); + } + + public void test3() { + + assertEquals("", ConverterMOL.extractCASNumber("")); + } + + public void test4() { + + assertEquals("", ConverterMOL.extractCASNumber(null)); + } + + public void test5() { + + assertEquals("", ConverterMOL.extractCASNumber(" CAS rn = ABC, ")); + } +} \ No newline at end of file