Complete Python code

#!/usr/bin/env amspython
# coding: utf-8

# ## Initial imports

from scm.plams import *
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from typing import List

IPythonConsole.ipython_useSVG = True
IPythonConsole.molSize = 250, 250

# this line is not required in AMS2025+
init()


# ## Helper class and function
#
# The ``MoleculeConnector`` class and ``substitute()`` method below are convenient to use.


class MoleculeConnector:
    def __init__(self, molecule, connector, name="molecule"):
        self.name = name
        self.molecule = molecule
        self.molecule.properties.name = name
        self.connector = connector  # 2-tuple of integers, unlike the Molecule.substitute() method which uses two atoms

    def __str__(self):
        return f"""
Name: {self.name}
{self.molecule}
Connector: {self.connector}. This means that when substitution is performed atom {self.connector[0]} will be kept in the substituted molecule. Atom {self.connector[1]}, and anything connected to it, will NOT be kept.
        """


def substitute(substrate: MoleculeConnector, ligand: MoleculeConnector):
    """
    Returns: Molecule with the ligand added to the substrate, replacing the respective connector bonds.
    """
    molecule = substrate.molecule.copy()
    molecule.substitute(
        connector=(molecule[substrate.connector[0]], molecule[substrate.connector[1]]),
        ligand=ligand.molecule,
        ligand_connector=(ligand.molecule[ligand.connector[0]], ligand.molecule[ligand.connector[1]]),
    )
    return molecule


def set_atom_indices(rdmol: Chem.rdchem.Mol, start=0):
    for atom in rdmol.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx() + start)  # give 1-based index
    return rdmol


def to_lewis(molecule: Molecule, template=None, regenerate: bool = True):
    if isinstance(molecule, Chem.rdchem.Mol):
        rdmol = molecule
    else:
        rdmol = to_rdmol(molecule)
    if regenerate:
        rdmol = Chem.RemoveHs(rdmol)
        smiles = Chem.MolToSmiles(rdmol)
        rdmol = Chem.MolFromSmiles(smiles)
    if template is not None:
        AllChem.GenerateDepictionMatching2DStructure(rdmol, template)
    try:
        if molecule.properties.name:
            rdmol.SetProp("_Name", molecule.properties.name)
    except AttributeError:
        pass
    return rdmol


def smiles2template(smiles: str):
    template = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(template)
    return template


def draw_lewis_grid(
    molecules: List[Molecule],
    molsPerRow: int = 4,
    template_smiles: str = None,
    regenerate: bool = False,
    draw_atom_indices: bool = False,
    draw_legend: bool = True,
):
    template = None
    if template_smiles:
        template = smiles2template(template_smiles)

    rdmols = [to_lewis(x, template=template, regenerate=regenerate) for x in molecules]
    if draw_atom_indices:
        for rdmol in rdmols:
            set_atom_indices(rdmol, start=1)
    legends = None
    if draw_legend:
        try:
            legends = [x.properties.name or f"mol{i}" for i, x in enumerate(molecules)]
        except AttributeError:
            pass

    return Draw.MolsToGridImage(rdmols, molsPerRow=molsPerRow, legends=legends)


# ## Generate substrate molecule

substrate_smiles = "c1ccccc1"
substrate = from_smiles(substrate_smiles, forcefield="uff")
substrate.properties.name = "benzene"

plot_molecule(substrate)
plt.title(substrate.properties.name)


# ## Find out which bond to cleave

# In the molecule you need to define which bond to cleave. To find out, run for example

substrate.write("substrate.xyz")


# Then open ``substrate.xyz`` in the AMS GUI and find that atoms 6 (C) and 12 (H) are bonded. We will choose this bond to cleave.
#
# Alternatively, we can plot the molecule inside a Jupyter notebook with RDkit to also find that atoms 6 (C) and 12 (H) are bonded.

draw_lewis_grid([substrate], draw_atom_indices=True, draw_legend=False)


substrate_connector = MoleculeConnector(
    substrate, (6, 12), "phenyl"
)  # benzene becomes phenyl when bond between atoms 6,12 is cleaved


# ## Define ligands

# Similarly for the ligand, if you do not know which bond to cleave, write the molecule to a .xyz file and find out.
#
# Or plot it with rdkit in the Jupyter notebook.
#
# **Note**: The ligands below have an extra hydrogen  or even more atoms compared to the name that they're given.

ligands = [
    MoleculeConnector(
        from_smiles("CCOC(=O)C", forcefield="uff"), (3, 2), "acetate"
    ),  # ethyl acetate, bond from O to C cleaved
    MoleculeConnector(
        from_smiles("O=NO", forcefield="uff"), (3, 4), "nitrite"
    ),  # nitrous acid, bond from O to H cleaved
    MoleculeConnector(
        from_smiles("Cl", forcefield="uff"), (1, 2), "chloride"
    ),  # hydrogen chloride, bond from Cl to H cleaved
    MoleculeConnector(from_smiles("c1ccccc1", forcefield="uff"), (6, 12), "phenyl"),  # benzene, bond to C to H cleaved
]

ligand_molecules = [ligand.molecule for ligand in ligands]

fig, axes = plt.subplots(1, len(ligands), figsize=(15, 3))

for ax, ligand in zip(axes, ligands):
    plot_molecule(ligand.molecule, ax=ax)
    ax.set_title(ligand.name)


draw_lewis_grid(ligand_molecules, draw_atom_indices=True, draw_legend=False, molsPerRow=4)


# Above we see that cleaving the bonds from O(3)-C(2), O(3)-H(4), Cl(1)-H(2), and C(6)-H(12) will give the acetate, nitrite, chloride, and phenyl substituents, respectively.

# ## Generate substituted molecules

mols = []

for ligand in ligands:
    mol = substitute(substrate_connector, ligand)
    mol.properties.name = f"{substrate_connector.name}--{ligand.name}"
    mols.append(mol)
    print(f"Writing {mol.properties.name}.xyz")
    mol.write(f"{mol.properties.name}.xyz")
    print(f"{mol.properties.name} formula: {mol.get_formula(as_dict=True)}")


# ## Plot 3D structures with PLAMS / ASE

fig, axes = plt.subplots(1, len(mols), figsize=(15, 3))

for ax, mol in zip(axes, mols):
    plot_molecule(mol, ax=ax)
    ax.set_title(mol.properties.name)


# ## Plot 2D Lewis structures with RDKit
#
# The molecules can be aligned by using a benzene template. The ``regenerate`` option regenerates the molecule with RDkit to clean up the atomic positions.

draw_lewis_grid(mols, template_smiles=substrate_smiles, regenerate=True)