Skip to content

Commit

Permalink
feat: implement SmilesParser to allow SMARTS features (#156)
Browse files Browse the repository at this point in the history
Closes: #151
  • Loading branch information
targos authored Feb 24, 2023
1 parent 9d2dd13 commit 2ca12ed
Show file tree
Hide file tree
Showing 7 changed files with 319 additions and 11 deletions.
144 changes: 144 additions & 0 deletions __tests__/SmilesParser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
'use strict';

const { SmilesParser, Molecule } = require('../minimal');

it.each([
['COCO', { atoms: 4 }],
['CC(=O)O', { atoms: 4 }],
])('should parse normal SMILES %s', (smiles, { atoms }) => {
const parser = new SmilesParser();
const mol = parser.parseMolecule(smiles);
expect(mol.isFragment()).toBe(false);
expect(mol.getAllAtoms()).toBe(atoms);
});

it.each([
['COCO', { atoms: 4 }],
['[C,c]', { atoms: 1 }],
['[R0]', { atoms: 1 }],
])('should parse SMARTS %s', (smarts, { atoms }) => {
const parser = new SmilesParser({ smartsMode: 'smarts' });
const mol = parser.parseMolecule(smarts);
expect(mol.isFragment()).toBe(true);
expect(mol.getAllAtoms()).toBe(atoms);
});

it('should guess SMARTS', () => {
const parser = new SmilesParser({ smartsMode: 'guess' });
const molNormal = parser.parseMolecule('COCO');
expect(molNormal.isFragment()).toBe(false);
const molSmarts = parser.parseMolecule('[C,c]');
expect(molSmarts.isFragment()).toBe(true);
});

it('should optionally not parse CACTVS', () => {
const cactvs = '[C;z3]';
const parserWithCactvs = new SmilesParser({
smartsMode: 'smarts',
});
const molecule = parserWithCactvs.parseMolecule(cactvs);
expect(molecule.getAllAtoms()).toBe(1);
const parserWithoutCactvs = new SmilesParser({
smartsMode: 'smarts',
noCactvs: true,
});
expect(() => {
parserWithoutCactvs.parseMolecule(cactvs);
}).toThrow(/'z'/);
});

it('should optionally skip coordinate templates', () => {
const cubane = 'C12C3C4C1C5C2C3C45';
const molecule = new Molecule(0, 0);
const parserWithTemplates = new SmilesParser();
parserWithTemplates.setRandomSeed(1);
parserWithTemplates.parseMolecule(cubane, { molecule });
const coords1 = molecule.getIDCoordinates();
const parserWithoutTemplates = new SmilesParser({
skipCoordinateTemplates: true,
});
parserWithoutTemplates.setRandomSeed(1);
parserWithoutTemplates.parseMolecule(cubane, { molecule });
expect(molecule.getIDCoordinates()).not.toBe(coords1);
});

it('should optionally make hydrogens explicit', () => {
const smiles = '[CH4]';
const molecule = new Molecule(0, 0);
const parserWithoutExplicitH = new SmilesParser({ smartsMode: 'smarts' });
parserWithoutExplicitH.parseMolecule(smiles, { molecule });
expect(molecule.getAllAtoms()).toBe(1);
const parserWithExplicitH = new SmilesParser({
smartsMode: 'smarts',
makeHydrogenExplicit: true,
});
parserWithExplicitH.parseMolecule(smiles, { molecule });
expect(molecule.getAllAtoms()).toBe(5);
});

it('should allow to set random seed', () => {
const smiles = 'C1CN2CCN1CC2';
const parser = new SmilesParser();
const coords1 = parser.parseMolecule(smiles).getIDCoordinates();
const coords2 = parser.parseMolecule(smiles).getIDCoordinates();
// TODO: Find a SMILES that goes through the random branch of coordinate invention.
// expect(coords1).not.toBe(coords2);
expect(coords1).toBe(coords2);
parser.setRandomSeed(1);
const coords3 = parser.parseMolecule(smiles).getIDCoordinates();
const coords4 = parser.parseMolecule(smiles).getIDCoordinates();
expect(coords3).toBe(coords4);
});

it('should create smarts warnings', () => {
const parserWithoutWarnings = new SmilesParser({
smartsMode: 'smarts',
});
parserWithoutWarnings.parseMolecule('[R9]');
expect(parserWithoutWarnings.getSmartsWarning()).toBe('');
const parserWithWarnings = new SmilesParser({
smartsMode: 'smarts',
createSmartsWarnings: true,
});
parserWithWarnings.parseMolecule('[R9]');
expect(parserWithWarnings.getSmartsWarning()).toBe(
'Unresolved SMARTS features: R9',
);
});

it('should parse into the passed molecule', () => {
const parser = new SmilesParser();
const molecule = new Molecule(0, 0);
const mol = parser.parseMolecule('COCO', { molecule });
expect(mol.toSmiles()).toBe('COCO');
expect(mol).toBe(molecule);
});

it('should should optionally not invent coordinates', () => {
const parser = new SmilesParser();
const molecule = new Molecule(0, 0);
parser.parseMolecule('COCO', { molecule, noCoordinates: false });
expect(molecule.getAtomX(0)).not.toBe(0);
parser.parseMolecule('COCO', { molecule, noCoordinates: true });
expect(molecule.getAtomX(0)).toBe(0);
});

it('should should optionally not parse stereo features', () => {
const parser = new SmilesParser();
const vitaminA = 'C/C(=C\\CO)/C=C/C=C(/C)\\C=C\\C1=C(C)CCCC1(C)C';
const molecule = new Molecule(0, 0);
parser.parseMolecule(vitaminA, { molecule, noStereo: false });
const idCodeWithStereo = molecule.getIDCode();
parser.parseMolecule(vitaminA, { molecule, noStereo: true });
const idCodeWithoutStereo = molecule.getIDCode();
expect(idCodeWithStereo).not.toBe(idCodeWithoutStereo);
});

it('should parse reactions', () => {
const parser = new SmilesParser();
const reaction = parser.parseReaction('COCO>>COC.O');
expect(reaction.getProducts()).toBe(1);
expect(reaction.getReactants()).toBe(1);
expect(reaction.getCatalysts()).toBe(0);
expect(reaction.toSmiles()).toBe('COCO>>COC.O');
});
11 changes: 11 additions & 0 deletions __tests__/__snapshots__/library.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,15 @@ exports[`prototype properties of SSSearcherWithIndex 1`] = `

exports[`prototype properties of SVGRenderer 1`] = `[]`;

exports[`prototype properties of SmilesParser 1`] = `
[
"getSmartsWarning",
"parseMolecule",
"parseReaction",
"setRandomSeed",
]
`;

exports[`prototype properties of StructureEditor 1`] = `
[
"getIDCode",
Expand Down Expand Up @@ -719,6 +728,8 @@ exports[`static properties of SVGRenderer 1`] = `
]
`;

exports[`static properties of SmilesParser 1`] = `[]`;

exports[`static properties of StructureEditor 1`] = `
[
"addPasteHandler",
Expand Down
1 change: 1 addition & 0 deletions __tests__/library.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const minimalAPI = [
'Reaction',
'RingCollection',
'SDFileParser',
'SmilesParser',
'SSSearcher',
'SSSearcherWithIndex',
'Util',
Expand Down
3 changes: 3 additions & 0 deletions minimal.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ export {
IDepictorOptions,
Reaction,
SDFileParser,
ISmilesParserOptions,
ISmilesParserParseMoleculeOptions,
SmilesParser,
SSSearcher,
SSSearcherWithIndex,
Util,
Expand Down
16 changes: 7 additions & 9 deletions src/com/actelion/research/gwt/minimal/JSMolecule.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ public static native JSMolecule fromSmiles(String smiles, JavaScriptObject optio
throws Exception
/*-{
options = options || {};
var coordinates = !options.noCoordinates;
var stereo = !options.noStereo;
return @com.actelion.research.gwt.minimal.JSMolecule::fromSmiles(Ljava/lang/String;ZZ)(smiles, coordinates, stereo);
var createCoordinates = !options.noCoordinates;
var readStereoFeatures = !options.noStereo;
return @com.actelion.research.gwt.minimal.JSMolecule::fromSmiles(Ljava/lang/String;ZZ)(smiles, createCoordinates, readStereoFeatures);
}-*/;

public static JSMolecule fromMolfile(String molfile) throws Exception {
Expand Down Expand Up @@ -248,13 +248,11 @@ private void addImplicitHydrogens(int atomNumber) {
}

@JsIgnore
public static JSMolecule fromSmiles(String smiles, boolean ensure2DCoordinates,
boolean readStereoFeatures) throws Exception {
public static JSMolecule fromSmiles(String smiles, boolean createCoordinates,
boolean readStereoFeatures) throws Exception {
SmilesParser parser = new SmilesParser();
JSMolecule mol = new JSMolecule();
new SmilesParser().parse(mol.oclMolecule, smiles.getBytes(), false, readStereoFeatures);
if (ensure2DCoordinates) {
mol.inventCoordinates();
}
parser.parse(mol.oclMolecule, smiles.getBytes(), createCoordinates, readStereoFeatures);
return mol;
}

Expand Down
78 changes: 78 additions & 0 deletions src/com/actelion/research/gwt/minimal/JSSmilesParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.actelion.research.gwt.minimal;

import com.actelion.research.chem.*;
import com.google.gwt.core.client.JavaScriptObject;
import jsinterop.annotations.*;

@JsType(name = "SmilesParser")
public class JSSmilesParser {
private SmilesParser oclParser;

public JSSmilesParser(JavaScriptObject options) {
init(options);
}

private native void init(JavaScriptObject options)
/*-{
options = options || {};
var smartsMode = options.smartsMode || 'smiles';
var createSmartsWarnings = options.createSmartsWarnings || false;
var skipCoordinateTemplates = options.skipCoordinateTemplates || false;
var makeHydrogenExplicit = options.makeHydrogenExplicit || false;
var noCactvs = options.noCactvs || false;
[email protected]::init(Ljava/lang/String;ZZZZ)(smartsMode, createSmartsWarnings, skipCoordinateTemplates, makeHydrogenExplicit, noCactvs);
}-*/;

private void init(String smartsMode, boolean createSmartsWarnings,
boolean skipCoordinateTemplates, boolean makeHydrogenExplicit,
boolean noCactvs) {
int mode = SmilesParser.SMARTS_MODE_IS_SMILES;
switch (smartsMode) {
case "smarts":
mode = SmilesParser.SMARTS_MODE_IS_SMARTS;
break;
case "guess":
mode = SmilesParser.SMARTS_MODE_GUESS;
break;
}
if (makeHydrogenExplicit) {
mode |= SmilesParser.MODE_MAKE_HYDROGEN_EXPLICIT;
}
if (skipCoordinateTemplates) {
mode |= SmilesParser.MODE_SKIP_COORDINATE_TEMPLATES;
}
if (noCactvs) {
mode |= SmilesParser.MODE_NO_CACTUS_SYNTAX;
}
oclParser = new SmilesParser(mode, createSmartsWarnings);
}

public void setRandomSeed(int seed) {
oclParser.setRandomSeed((long)seed);
}

public native JSMolecule parseMolecule(String smiles, JavaScriptObject options)
/*-{
options = options || {};
var molecule = options.molecule || @com.actelion.research.gwt.minimal.JSMolecule::new()();
var createCoordinates = !options.noCoordinates;
var readStereoFeatures = !options.noStereo;
return [email protected]::parseMolecule(Lcom/actelion/research/gwt/minimal/JSMolecule;Ljava/lang/String;ZZ)(molecule, smiles, createCoordinates, readStereoFeatures);
}-*/;

private JSMolecule parseMolecule(JSMolecule molecule, String smiles,
boolean createCoordinates, boolean readStereoFeatures) throws Exception {
oclParser.parse(molecule.getStereoMolecule(), smiles.getBytes(), createCoordinates, readStereoFeatures);
return molecule;
}

public JSReaction parseReaction(String smiles) throws Exception {
return new JSReaction(oclParser.parseReaction(smiles));
}

public String getSmartsWarning() {
return oclParser.getSmartsWarning();
}
}
77 changes: 75 additions & 2 deletions types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

export interface IMoleculeFromSmilesOptions {
/**
* Disable extra coordinate computation. Default: false.
* Disable coordinate invention.
* @default `false`
*/
noCoordinates?: boolean;

/**
* Disable stereo features parsing. Default: false.
* Disable stereo features parsing.
* @default `false`
*/
noStereo?: boolean;
}
Expand Down Expand Up @@ -2573,6 +2575,77 @@ export declare class Molecule {
setAssignParitiesToNitrogen(b: boolean): void;
}

export interface ISmilesParserOptions {
/**
* Enable SMARTS parsing with `'smarts'` or `'guess'`.
* @default `'smiles'`
*/
smartsMode?: 'smiles' | 'smarts' | 'guess';

createSmartsWarnings?: boolean;

skipCoordinateTemplates?: boolean;

makeHydrogenExplicit?: boolean;

/**
* Disable parsing of CACTVS syntax.
*/
noCactvs?: boolean;
}

export interface ISmilesParserParseMoleculeOptions {
/**
* Molecule to parse into.
*/
molecule?: Molecule;

/**
* Disable coordinate invention.
* @default `false`
*/
noCoordinates?: boolean;

/**
* Disable stereo features parsing.
* @default `false`
*/
noStereo?: boolean;
}

export declare class SmilesParser {
/**
* Create a SMILES parser.
*/
constructor(options?: ISmilesParserOptions);

/**
* Set the random seed used to invent coordinates.
* @param seed
*/
setRandomSeed(seed: number): void;

/**
* Parse a SMILES string and return a molecule.
*/
parseMolecule(
smiles: string,
options?: ISmilesParserParseMoleculeOptions,
): Molecule;

/**
* Parse a SMILES string and return a reaction.
*/
parseReaction(smiles: string): Reaction;

/**
* If createSmartsWarnings in the constructor was passed as true, then this method
* returns a list of all SMARTS features, which could not be interpreted in the most recently
* parsed SMILES/SMARTS pattern.
*/
getSmartsWarning(): string;
}

export interface MolecularFormula {
absoluteWeight: number;

Expand Down

0 comments on commit 2ca12ed

Please sign in to comment.