MolVS
MolVS copied to clipboard
phosphinic acid SMARTS closes #20
Updates SMARTS definitions for phosphinic acids. Requires 3 explicit (X3) and 3 total (D3) connections for tautomerizing phosphinic acids. New behavior properly handles compounds with 4 connections (e.g., phosphates, phosphonic acids).
from rdkit import Chem
from molvs.tautomer import TautomerCanonicalizer
import pandas as pd
my_transforms = (
TautomerTransform('phosphonic acid f', '[OH]-[PD3X3H0]', bonds='='),
TautomerTransform('phosphonic acid r', '[PD3X3H1]=[O]', bonds='-')
)
cpds = ['methylphosphinic acid','methylphosphonous acid','methylphosphonic acid','NADPH']
smiles = ['CP(=O)O','CP(O)O','CP(=O)(O)O','NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](N4C=NC5=C4N=CN=C5N)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1']
mols = [Chem.MolFromSmiles(smi) for smi in smiles]
can_taut = [TautomerCanonicalizer(transforms=my_transforms).canonicalize(mol) for mol in mols]
smiles_taut = [Chem.MolToSmiles(mol) for mol in can_taut]
df = pd.DataFrame({'cpd':cpds,'smi':smiles,'taut_smi':smiles_taut})
cpd smi taut_smi
0 methylphosphinic acid CP(=O)O C[PH](=O)O
1 methylphosphonous acid CP(O)O C[PH](=O)O
2 methylphosphonic acid CP(=O)(O)O CP(=O)(O)O
3 NADPH NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](N4C=NC5=C4N=CN=C5N)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1 NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1