Substituting Functional Groups in Molecules

Build functionalized molecules by adding ligands to a substrate, then generate and visualize the resulting products in 3D and 2D.

Downloads: Notebook | Script ?

Related examples
Related tutorials
Related documentation
../_images/molecule_substitution_19_0_428350dc.png

Initial imports

import scm.plams as plams
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from typing import List, Tuple

IPythonConsole.ipython_useSVG = True
IPythonConsole.molSize = 250, 250

Helper class and function

The MoleculeConnector class and substitute() method below are convenient to use.

class MoleculeConnector:
    def __init__(self, molecule, connector, name="molecule"):
        self.name = name
        self.molecule = molecule
        self.molecule.properties.name = name
        self.connector = connector  # 2-tuple of integers, unlike the Molecule.substitute() method which uses two atoms

    def __str__(self):
        return f"""
Name: {self.name}
{self.molecule}
Connector: {self.connector}. This means that when substitution is performed atom {self.connector[0]} will be kept in the substituted molecule. Atom {self.connector[1]}, and anything connected to it, will NOT be kept.
        """


def substitute(substrate: MoleculeConnector, ligand: MoleculeConnector):
    """
    Returns: Molecule with the ligand added to the substrate, replacing the respective connector bonds.
    """
    molecule = substrate.molecule.copy()
    molecule.substitute(
        connector=(molecule[substrate.connector[0]], molecule[substrate.connector[1]]),
        ligand=ligand.molecule,
        ligand_connector=(
            ligand.molecule[ligand.connector[0]],
            ligand.molecule[ligand.connector[1]],
        ),
    )
    return molecule


def set_atom_indices(rdmol: Chem.rdchem.Mol, start=0):
    for atom in rdmol.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx() + start)  # give 1-based index
    return rdmol


def to_lewis(molecule: plams.Molecule, template=None, regenerate: bool = True):
    if isinstance(molecule, Chem.rdchem.Mol):
        rdmol = molecule
    else:
        rdmol = plams.to_rdmol(molecule)
    if regenerate:
        rdmol = Chem.RemoveHs(rdmol)
        smiles = Chem.MolToSmiles(rdmol)
        rdmol = Chem.MolFromSmiles(smiles)
    if template is not None:
        AllChem.GenerateDepictionMatching2DStructure(rdmol, template)
    try:
        if molecule.properties.name:
            rdmol.SetProp("_Name", molecule.properties.name)
    except AttributeError:
        pass
    return rdmol


def smiles2template(smiles: str):
    template = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(template)
    return template


def draw_lewis_grid(
    molecules: List[plams.Molecule],
    molsPerRow: int = 4,
    template_smiles: str = None,
    regenerate: bool = False,
    draw_atom_indices: bool = False,
    draw_legend: bool = True,
):
    template = None
    if template_smiles:
        template = smiles2template(template_smiles)

    rdmols = [to_lewis(x, template=template, regenerate=regenerate) for x in molecules]
    if draw_atom_indices:
        for rdmol in rdmols:
            set_atom_indices(rdmol, start=1)
    legends = None
    if draw_legend:
        try:
            legends = [x.properties.name or f"mol{i}" for i, x in enumerate(molecules)]
        except AttributeError:
            pass

    return Draw.MolsToGridImage(rdmols, molsPerRow=molsPerRow, legends=legends)

Generate substrate molecule

substrate_smiles = "c1ccccc1"
substrate = plams.from_smiles(substrate_smiles, forcefield="uff")
substrate.properties.name = "benzene"

Find out which bond to cleave

In the molecule you need to define which bond to cleave. To find out the bonds, run for example:

for b in substrate.bonds:
    el1 = b.atom1.symbol
    el2 = b.atom2.symbol
    idx1, idx2 = substrate.index(b)
    print(f"{el1}({idx1})--{el2}({idx2})")
C(1)--C(2)
C(2)--C(3)
C(3)--C(4)
C(4)--C(5)
C(5)--C(6)
C(6)--C(1)
C(1)--H(7)
C(2)--H(8)
C(3)--H(9)
C(4)--H(10)
C(5)--H(11)
C(6)--H(12)

to find that atoms 6 (C) and 12 (H) are bonded. We will choose this bond to cleave.

Alternatively, we can inspect the molecule inside a Jupyter notebook, visualizing with AMSView, to also find that atoms 6 (C) and 12 (H) are bonded.

plams.view(
    substrate, padding=-0.5, show_atom_labels=True, atom_label_type="Name", width=300, height=300
)
[10.04|11:46:11] Starting Xvfb...
[10.04|11:46:11] Xvfb started
image generated from notebook
substrate_connector = MoleculeConnector(
    substrate, (6, 12), "phenyl"
)  # benzene becomes phenyl when bond between atoms 6,12 is cleaved

Define ligands

Perform the same steps for the ligands.

Note: The ligands below have an extra hydrogen or even more atoms compared to the name that they’re given.

ligands = [
    MoleculeConnector(
        plams.from_smiles("CCOC(=O)C", forcefield="uff"), (3, 2), "acetate"
    ),  # ethyl acetate, bond from O to C cleaved
    MoleculeConnector(
        plams.from_smiles("O=NO", forcefield="uff"), (3, 4), "nitrite"
    ),  # nitrous acid, bond from O to H cleaved
    MoleculeConnector(
        plams.from_smiles("Cl", forcefield="uff"), (1, 2), "chloride"
    ),  # hydrogen chloride, bond from Cl to H cleaved
    MoleculeConnector(
        plams.from_smiles("c1ccccc1", forcefield="uff"), (6, 12), "phenyl"
    ),  # benzene, bond to C to H cleaved
]

ligand_molecules = [ligand.molecule for ligand in ligands]

images = {
    ligand.name: plams.view(
        ligand.molecule, width=400, show_atom_labels=True, atom_label_type="Name"
    )
    for ligand in ligands
}

plams.plot_image_grid(images);
image generated from notebook

Above we see that cleaving the bonds from O(3)-C(2), O(3)-H(4), Cl(1)-H(2), and C(6)-H(12) will give the acetate, nitrite, chloride, and phenyl substituents, respectively.

Generate substituted molecules

mols = []

for ligand in ligands:
    mol = substitute(substrate_connector, ligand)
    mol.properties.name = f"{substrate_connector.name}--{ligand.name}"
    mols.append(mol)
    print(f"Writing {mol.properties.name}.xyz")
    mol.write(f"{mol.properties.name}.xyz")
    print(f"{mol.properties.name} formula: {mol.get_formula(as_dict=True)}")
Writing phenyl--acetate.xyz
phenyl--acetate formula: {'C': 8, 'H': 8, 'O': 2}
Writing phenyl--nitrite.xyz
phenyl--nitrite formula: {'C': 6, 'H': 5, 'O': 2, 'N': 1}
Writing phenyl--chloride.xyz
phenyl--chloride formula: {'C': 6, 'H': 5, 'Cl': 1}
Writing phenyl--phenyl.xyz
phenyl--phenyl formula: {'C': 12, 'H': 10}

Plot 3D structures with PLAMS

images = {mol.properties.name: plams.view(mol, width=400) for mol in mols}

plams.plot_image_grid(images, rows=1);
image generated from notebook

Plot 2D Lewis structures with RDKit

The molecules can be aligned by using a benzene template. The regenerate option regenerates the molecule with RDkit to clean up the atomic positions.

draw_lewis_grid(mols, template_smiles=substrate_smiles, regenerate=True)
image generated from notebook

See also

Python Script

#!/usr/bin/env python
# coding: utf-8

# ## Initial imports

import scm.plams as plams
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from typing import List, Tuple

IPythonConsole.ipython_useSVG = True
IPythonConsole.molSize = 250, 250


# ## Helper class and function
#
# The ``MoleculeConnector`` class and ``substitute()`` method below are convenient to use.


class MoleculeConnector:
    def __init__(self, molecule, connector, name="molecule"):
        self.name = name
        self.molecule = molecule
        self.molecule.properties.name = name
        self.connector = connector  # 2-tuple of integers, unlike the Molecule.substitute() method which uses two atoms

    def __str__(self):
        return f"""
Name: {self.name}
{self.molecule}
Connector: {self.connector}. This means that when substitution is performed atom {self.connector[0]} will be kept in the substituted molecule. Atom {self.connector[1]}, and anything connected to it, will NOT be kept.
        """


def substitute(substrate: MoleculeConnector, ligand: MoleculeConnector):
    """
    Returns: Molecule with the ligand added to the substrate, replacing the respective connector bonds.
    """
    molecule = substrate.molecule.copy()
    molecule.substitute(
        connector=(molecule[substrate.connector[0]], molecule[substrate.connector[1]]),
        ligand=ligand.molecule,
        ligand_connector=(
            ligand.molecule[ligand.connector[0]],
            ligand.molecule[ligand.connector[1]],
        ),
    )
    return molecule


def set_atom_indices(rdmol: Chem.rdchem.Mol, start=0):
    for atom in rdmol.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx() + start)  # give 1-based index
    return rdmol


def to_lewis(molecule: plams.Molecule, template=None, regenerate: bool = True):
    if isinstance(molecule, Chem.rdchem.Mol):
        rdmol = molecule
    else:
        rdmol = plams.to_rdmol(molecule)
    if regenerate:
        rdmol = Chem.RemoveHs(rdmol)
        smiles = Chem.MolToSmiles(rdmol)
        rdmol = Chem.MolFromSmiles(smiles)
    if template is not None:
        AllChem.GenerateDepictionMatching2DStructure(rdmol, template)
    try:
        if molecule.properties.name:
            rdmol.SetProp("_Name", molecule.properties.name)
    except AttributeError:
        pass
    return rdmol


def smiles2template(smiles: str):
    template = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(template)
    return template


def draw_lewis_grid(
    molecules: List[plams.Molecule],
    molsPerRow: int = 4,
    template_smiles: str = None,
    regenerate: bool = False,
    draw_atom_indices: bool = False,
    draw_legend: bool = True,
):
    template = None
    if template_smiles:
        template = smiles2template(template_smiles)

    rdmols = [to_lewis(x, template=template, regenerate=regenerate) for x in molecules]
    if draw_atom_indices:
        for rdmol in rdmols:
            set_atom_indices(rdmol, start=1)
    legends = None
    if draw_legend:
        try:
            legends = [x.properties.name or f"mol{i}" for i, x in enumerate(molecules)]
        except AttributeError:
            pass

    return Draw.MolsToGridImage(rdmols, molsPerRow=molsPerRow, legends=legends)


# ## Generate substrate molecule

substrate_smiles = "c1ccccc1"
substrate = plams.from_smiles(substrate_smiles, forcefield="uff")
substrate.properties.name = "benzene"


# ## Find out which bond to cleave

# In the molecule you need to define which bond to cleave. To find out the bonds, run for example:

for b in substrate.bonds:
    el1 = b.atom1.symbol
    el2 = b.atom2.symbol
    idx1, idx2 = substrate.index(b)
    print(f"{el1}({idx1})--{el2}({idx2})")


# to find that atoms 6 (C) and 12 (H) are bonded. We will choose this bond to cleave.
#
# Alternatively, we can inspect the molecule inside a Jupyter notebook, visualizing with AMSView, to also find that atoms 6 (C) and 12 (H) are bonded.

plams.view(
    substrate,
    padding=-0.5,
    show_atom_labels=True,
    atom_label_type="Name",
    width=300,
    height=300,
    picture_path="picture1.png",
)


substrate_connector = MoleculeConnector(
    substrate, (6, 12), "phenyl"
)  # benzene becomes phenyl when bond between atoms 6,12 is cleaved


# ## Define ligands

# Perform the same steps for the ligands.
#
# **Note**: The ligands below have an extra hydrogen  or even more atoms compared to the name that they're given.

ligands = [
    MoleculeConnector(
        plams.from_smiles("CCOC(=O)C", forcefield="uff"), (3, 2), "acetate"
    ),  # ethyl acetate, bond from O to C cleaved
    MoleculeConnector(
        plams.from_smiles("O=NO", forcefield="uff"), (3, 4), "nitrite"
    ),  # nitrous acid, bond from O to H cleaved
    MoleculeConnector(
        plams.from_smiles("Cl", forcefield="uff"), (1, 2), "chloride"
    ),  # hydrogen chloride, bond from Cl to H cleaved
    MoleculeConnector(
        plams.from_smiles("c1ccccc1", forcefield="uff"), (6, 12), "phenyl"
    ),  # benzene, bond to C to H cleaved
]

ligand_molecules = [ligand.molecule for ligand in ligands]

images = {
    ligand.name: plams.view(ligand.molecule, width=400, show_atom_labels=True, atom_label_type="Name")
    for ligand in ligands
}

plams.plot_image_grid(images, save_path="picture2.png")
# Above we see that cleaving the bonds from O(3)-C(2), O(3)-H(4), Cl(1)-H(2), and C(6)-H(12) will give the acetate, nitrite, chloride, and phenyl substituents, respectively.

# ## Generate substituted molecules

mols = []

for ligand in ligands:
    mol = substitute(substrate_connector, ligand)
    mol.properties.name = f"{substrate_connector.name}--{ligand.name}"
    mols.append(mol)
    print(f"Writing {mol.properties.name}.xyz")
    mol.write(f"{mol.properties.name}.xyz")
    print(f"{mol.properties.name} formula: {mol.get_formula(as_dict=True)}")


# ## Plot 3D structures with PLAMS

images = {mol.properties.name: plams.view(mol, width=400) for mol in mols}

plams.plot_image_grid(images, rows=1, save_path="picture3.png")
# ## Plot 2D Lewis structures with RDKit
#
# The molecules can be aligned by using a benzene template. The ``regenerate`` option regenerates the molecule with RDkit to clean up the atomic positions.

draw_lewis_grid(mols, template_smiles=substrate_smiles, regenerate=True)