Skip to content

Commit

Permalink
Fixed #716 - Libraries *.msl, *.msp - parse mol file (*.MOL)
Browse files Browse the repository at this point in the history
  • Loading branch information
eselmeister committed Aug 23, 2021
1 parent 25a0695 commit 9f466b4
Show file tree
Hide file tree
Showing 7 changed files with 260 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public void createFieldEditors() {
addField(new BooleanFieldEditor(PreferenceSupplier.P_NORMALIZE_INTENSITIES, "Normalize intensities", getFieldEditorParent()));
addField(new BooleanFieldEditor(PreferenceSupplier.P_EXPORT_INTENSITIES_AS_INTEGER, "Export intensities as Integer", getFieldEditorParent()));
addField(new BooleanFieldEditor(PreferenceSupplier.P_PARSE_COMPOUND_INFORMATION, "Parse Compound Information (*.CID)", getFieldEditorParent()));
addField(new BooleanFieldEditor(PreferenceSupplier.P_PARSE_MOL_INFORMATION, "Parse MOL Information (*.MOL)", getFieldEditorParent()));
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,34 +116,6 @@ public static void transfer(List<CompoundInformation> compoundList, IMassSpectra
}
}

private static int getRetentionTime(CompoundInformation compoundInformation) {

String retentionTime = compoundInformation.getRetentionTime();
if(retentionTime != null && !retentionTime.isEmpty()) {
try {
return (int)(Double.parseDouble(compoundInformation.getRetentionTime()) * IChromatogram.MINUTE_CORRELATION_FACTOR);
} catch(NumberFormatException e) {
logger.warn(e);
}
}
//
return 0;
}

private static float getRetentionIndex(CompoundInformation compoundInformation) {

String retentionIndex = compoundInformation.getRetentionIndex();
if(retentionIndex != null && !retentionIndex.isEmpty()) {
try {
return Float.parseFloat(compoundInformation.getRetentionIndex());
} catch(NumberFormatException e) {
logger.warn(e);
}
}
//
return 0.0f;
}

/**
* Converts e.g. the file
* ---
Expand Down Expand Up @@ -229,4 +201,32 @@ private static CompoundInformation getCompoundInformation(IRegularLibraryMassSpe
//
return null;
}

private static int getRetentionTime(CompoundInformation compoundInformation) {

String retentionTime = compoundInformation.getRetentionTime();
if(retentionTime != null && !retentionTime.isEmpty()) {
try {
return (int)(Double.parseDouble(compoundInformation.getRetentionTime()) * IChromatogram.MINUTE_CORRELATION_FACTOR);
} catch(NumberFormatException e) {
logger.warn(e);
}
}
//
return 0;
}

private static float getRetentionIndex(CompoundInformation compoundInformation) {

String retentionIndex = compoundInformation.getRetentionIndex();
if(retentionIndex != null && !retentionIndex.isEmpty()) {
try {
return Float.parseFloat(compoundInformation.getRetentionIndex());
} catch(NumberFormatException e) {
logger.warn(e);
}
}
//
return 0.0f;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*******************************************************************************
* Copyright (c) 2021 Lablicate GmbH.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Philip Wenig - initial API and implementation
*******************************************************************************/
package org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FilenameUtils;
import org.eclipse.chemclipse.logging.core.Logger;
import org.eclipse.chemclipse.model.cas.CasSupport;
import org.eclipse.chemclipse.model.identifier.ILibraryInformation;
import org.eclipse.chemclipse.msd.model.core.IMassSpectra;
import org.eclipse.chemclipse.msd.model.core.IRegularLibraryMassSpectrum;
import org.eclipse.chemclipse.msd.model.core.IScanMSD;

public class ConverterMOL {

private static final Logger logger = Logger.getLogger(ConverterMOL.class);
//
private static final String MARKER_END = "M END";
private static final Pattern patternCAS = Pattern.compile("(CAS)(\\s+)(rn)(\\s+)(=)(\\s+)((\\d+)(-?)(\\d+)(-?)(\\d+))");

public static Map<String, String> convert(File file) {

Map<String, String> moleculeStructureMap = new HashMap<>();
try (BufferedReader bufferedReader = new BufferedReader(new FileReader(file))) {
/*
* Header
*/
String line = null;
String name = null;
StringBuilder builder = new StringBuilder();
//
while((line = bufferedReader.readLine()) != null) {
if(line.trim().isEmpty()) {
continue;
} else if(line.trim().equals(MARKER_END)) {
if(name != null) {
moleculeStructureMap.put(name.toUpperCase(), builder.toString().trim());
}
name = null;
builder = new StringBuilder();
} else {
if(name == null) {
name = line.trim();
}
builder.append(line);
builder.append("\n");
}
}
} catch(IOException e) {
logger.warn(e);
}
//
return moleculeStructureMap;
}

/**
* Converts e.g. the file
* ---
* /home/.../library.msl
* to
* /home/.../library.mol
* ---
* If the file doesn't exist, null is returned.
*
* @param file
* @return File
*/
public static File getFileMOL(File file) {

File fileMOL = null;
if(file.isFile()) {
String path = file.getParentFile().getAbsolutePath();
String fileBaseName = FilenameUtils.getBaseName(file.getName());
//
fileMOL = new File(path + File.separator + fileBaseName + ".MOL");
if(!fileMOL.exists()) {
fileMOL = new File(path + File.separator + fileBaseName + ".mol");
if(!fileMOL.exists()) {
fileMOL = null;
}
}
}
//
return fileMOL;
}

public static void transfer(Map<String, String> moleculeStructureMap, IMassSpectra massSpectra) {

Map<String, String> casNumberMap = new HashMap<>();
for(String moleculeStructure : moleculeStructureMap.values()) {
String casNumber = extractCASNumber(moleculeStructure);
if(!casNumber.isEmpty()) {
casNumberMap.put(CasSupport.format(casNumber), moleculeStructure);
}
}
//
for(IScanMSD massSpectrum : massSpectra.getList()) {
if(massSpectrum instanceof IRegularLibraryMassSpectrum) {
IRegularLibraryMassSpectrum libraryMassSpectrum = (IRegularLibraryMassSpectrum)massSpectrum;
ILibraryInformation libraryInformation = libraryMassSpectrum.getLibraryInformation();
/*
* Get the molecule structure.
*/
String moleculeStructure = moleculeStructureMap.get(libraryInformation.getName().toUpperCase());
if(moleculeStructure == null) {
/*
* Use the CAS# instead.
*/
moleculeStructure = casNumberMap.get(CasSupport.format(libraryInformation.getCasNumber()));
}
/*
* Set if not null or empty.
*/
if(moleculeStructure != null && !moleculeStructure.isEmpty()) {
libraryInformation.setMoleculeStructure(moleculeStructure);
}
}
}
}

/**
* Returns the CAS# or "" if none is available.
* ... CAS rn = 19906720, ...
* ... CAS rn = 19906-72-0, ...
*
* @return String
*/
public static String extractCASNumber(String moleculeStructure) {

if(moleculeStructure != null && !moleculeStructure.isEmpty()) {
Matcher matcher = patternCAS.matcher(moleculeStructure);
if(matcher.find()) {
return matcher.group(7);
}
}
//
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand All @@ -32,6 +33,7 @@
import org.eclipse.chemclipse.msd.converter.io.IMassSpectraReader;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.CompoundInformation;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterCID;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterMOL;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.IVendorLibraryMassSpectrum;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.VendorLibraryMassSpectrum;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.preferences.PreferenceSupplier;
Expand Down Expand Up @@ -84,6 +86,16 @@ public IMassSpectra read(File file, IProgressMonitor monitor) throws FileNotFoun
ConverterCID.transfer(compoundList, massSpectra);
}
}
/*
* MOL Information (*.MOL)
*/
if(PreferenceSupplier.isParseMolInformation()) {
File fileMOL = ConverterMOL.getFileMOL(file);
if(fileMOL != null) {
Map<String, String> moleculeStructureMap = ConverterMOL.convert(fileMOL);
ConverterMOL.transfer(moleculeStructureMap, massSpectra);
}
}
//
return massSpectra;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -36,6 +37,7 @@
import org.eclipse.chemclipse.msd.converter.io.IMassSpectraReader;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.CompoundInformation;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterCID;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc.ConverterMOL;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.IVendorLibraryMassSpectrum;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.model.VendorLibraryMassSpectrum;
import org.eclipse.chemclipse.msd.converter.supplier.amdis.preferences.PreferenceSupplier;
Expand Down Expand Up @@ -91,6 +93,16 @@ public IMassSpectra read(File file, IProgressMonitor monitor) throws FileNotFoun
ConverterCID.transfer(compoundList, massSpectra);
}
}
/*
* MOL Information (*.MOL)
*/
if(PreferenceSupplier.isParseMolInformation()) {
File fileMOL = ConverterMOL.getFileMOL(file);
if(fileMOL != null) {
Map<String, String> moleculeStructureMap = ConverterMOL.convert(fileMOL);
ConverterMOL.transfer(moleculeStructureMap, massSpectra);
}
}
//
return massSpectra;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class PreferenceSupplier implements IPreferenceSupplier {
public static final boolean DEF_EXPORT_INTENSITIES_AS_INTEGER = true;
public static final String P_PARSE_COMPOUND_INFORMATION = "parseCompoundInformation";
public static final boolean DEF_PARSE_COMPOUND_INFORMATION = true;
public static final String P_PARSE_MOL_INFORMATION = "parseMolInformation";
public static final boolean DEF_PARSE_MOL_INFORMATION = true;
//
private static IPreferenceSupplier preferenceSupplier;

Expand Down Expand Up @@ -70,6 +72,7 @@ public Map<String, String> getDefaultValues() {
defaultValues.put(P_NORMALIZE_INTENSITIES, Boolean.toString(DEF_NORMALIZE_INTENSITIES));
defaultValues.put(P_EXPORT_INTENSITIES_AS_INTEGER, Boolean.toString(DEF_EXPORT_INTENSITIES_AS_INTEGER));
defaultValues.put(P_PARSE_COMPOUND_INFORMATION, Boolean.toString(DEF_PARSE_COMPOUND_INFORMATION));
defaultValues.put(P_PARSE_MOL_INFORMATION, Boolean.toString(DEF_PARSE_MOL_INFORMATION));
return defaultValues;
}

Expand Down Expand Up @@ -120,4 +123,10 @@ public static boolean isParseCompoundInformation() {
IEclipsePreferences preferences = INSTANCE().getPreferences();
return preferences.getBoolean(P_PARSE_COMPOUND_INFORMATION, DEF_PARSE_COMPOUND_INFORMATION);
}

public static boolean isParseMolInformation() {

IEclipsePreferences preferences = INSTANCE().getPreferences();
return preferences.getBoolean(P_PARSE_MOL_INFORMATION, DEF_PARSE_MOL_INFORMATION);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*******************************************************************************
* Copyright (c) 2021 Lablicate GmbH.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Philip Wenig - initial API and implementation
*******************************************************************************/
package org.eclipse.chemclipse.msd.converter.supplier.amdis.converter.misc;

import junit.framework.TestCase;

public class ConverterMOL_Test extends TestCase {

public void test1() {

assertEquals("19906720", ConverterMOL.extractCASNumber(" CAS rn = 19906720, "));
}

public void test2() {

assertEquals("19906-72-0", ConverterMOL.extractCASNumber(" CAS rn = 19906-72-0, "));
}

public void test3() {

assertEquals("", ConverterMOL.extractCASNumber(""));
}

public void test4() {

assertEquals("", ConverterMOL.extractCASNumber(null));
}

public void test5() {

assertEquals("", ConverterMOL.extractCASNumber(" CAS rn = ABC, "));
}
}

0 comments on commit 9f466b4

Please sign in to comment.