MolVS icon indicating copy to clipboard operation
MolVS copied to clipboard

phosphinic acid SMARTS closes #20

Open gjgetzinger opened this issue 5 years ago • 0 comments

Updates SMARTS definitions for phosphinic acids. Requires 3 explicit (X3) and 3 total (D3) connections for tautomerizing phosphinic acids. New behavior properly handles compounds with 4 connections (e.g., phosphates, phosphonic acids).

from rdkit import Chem
from molvs.tautomer import TautomerCanonicalizer
import pandas as pd 

my_transforms = (
  TautomerTransform('phosphonic acid f', '[OH]-[PD3X3H0]', bonds='='),
  TautomerTransform('phosphonic acid r', '[PD3X3H1]=[O]', bonds='-')
)

cpds = ['methylphosphinic acid','methylphosphonous acid','methylphosphonic acid','NADPH']
smiles = ['CP(=O)O','CP(O)O','CP(=O)(O)O','NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](N4C=NC5=C4N=CN=C5N)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1']
mols = [Chem.MolFromSmiles(smi) for smi in smiles]
can_taut = [TautomerCanonicalizer(transforms=my_transforms).canonicalize(mol) for mol in mols]
smiles_taut = [Chem.MolToSmiles(mol) for mol in can_taut]

df = pd.DataFrame({'cpd':cpds,'smi':smiles,'taut_smi':smiles_taut})

	cpd	smi	taut_smi
0	methylphosphinic acid	CP(=O)O	C[PH](=O)O
1	methylphosphonous acid	CP(O)O	C[PH](=O)O
2	methylphosphonic acid	CP(=O)(O)O	CP(=O)(O)O
3	NADPH	NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](N4C=NC5=C4N=CN=C5N)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1	NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1

gjgetzinger avatar Apr 16 '20 16:04 gjgetzinger