#!/usr/bin/env amspython
# coding: utf-8
# ## Initial imports
from scm.plams import *
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from typing import List
IPythonConsole.ipython_useSVG = True
IPythonConsole.molSize = 250, 250
# this line is not required in AMS2025+
init()
# ## Helper class and function
#
# The ``MoleculeConnector`` class and ``substitute()`` method below are convenient to use.
class MoleculeConnector:
def __init__(self, molecule, connector, name="molecule"):
self.name = name
self.molecule = molecule
self.molecule.properties.name = name
self.connector = connector # 2-tuple of integers, unlike the Molecule.substitute() method which uses two atoms
def __str__(self):
return f"""
Name: {self.name}
{self.molecule}
Connector: {self.connector}. This means that when substitution is performed atom {self.connector[0]} will be kept in the substituted molecule. Atom {self.connector[1]}, and anything connected to it, will NOT be kept.
"""
def substitute(substrate: MoleculeConnector, ligand: MoleculeConnector):
"""
Returns: Molecule with the ligand added to the substrate, replacing the respective connector bonds.
"""
molecule = substrate.molecule.copy()
molecule.substitute(
connector=(molecule[substrate.connector[0]], molecule[substrate.connector[1]]),
ligand=ligand.molecule,
ligand_connector=(ligand.molecule[ligand.connector[0]], ligand.molecule[ligand.connector[1]]),
)
return molecule
def set_atom_indices(rdmol: Chem.rdchem.Mol, start=0):
for atom in rdmol.GetAtoms():
atom.SetAtomMapNum(atom.GetIdx() + start) # give 1-based index
return rdmol
def to_lewis(molecule: Molecule, template=None, regenerate: bool = True):
if isinstance(molecule, Chem.rdchem.Mol):
rdmol = molecule
else:
rdmol = to_rdmol(molecule)
if regenerate:
rdmol = Chem.RemoveHs(rdmol)
smiles = Chem.MolToSmiles(rdmol)
rdmol = Chem.MolFromSmiles(smiles)
if template is not None:
AllChem.GenerateDepictionMatching2DStructure(rdmol, template)
try:
if molecule.properties.name:
rdmol.SetProp("_Name", molecule.properties.name)
except AttributeError:
pass
return rdmol
def smiles2template(smiles: str):
template = Chem.MolFromSmiles(smiles)
AllChem.Compute2DCoords(template)
return template
def draw_lewis_grid(
molecules: List[Molecule],
molsPerRow: int = 4,
template_smiles: str = None,
regenerate: bool = False,
draw_atom_indices: bool = False,
draw_legend: bool = True,
):
template = None
if template_smiles:
template = smiles2template(template_smiles)
rdmols = [to_lewis(x, template=template, regenerate=regenerate) for x in molecules]
if draw_atom_indices:
for rdmol in rdmols:
set_atom_indices(rdmol, start=1)
legends = None
if draw_legend:
try:
legends = [x.properties.name or f"mol{i}" for i, x in enumerate(molecules)]
except AttributeError:
pass
return Draw.MolsToGridImage(rdmols, molsPerRow=molsPerRow, legends=legends)
# ## Generate substrate molecule
substrate_smiles = "c1ccccc1"
substrate = from_smiles(substrate_smiles, forcefield="uff")
substrate.properties.name = "benzene"
plot_molecule(substrate)
plt.title(substrate.properties.name)
# ## Find out which bond to cleave
# In the molecule you need to define which bond to cleave. To find out, run for example
substrate.write("substrate.xyz")
# Then open ``substrate.xyz`` in the AMS GUI and find that atoms 6 (C) and 12 (H) are bonded. We will choose this bond to cleave.
#
# Alternatively, we can plot the molecule inside a Jupyter notebook with RDkit to also find that atoms 6 (C) and 12 (H) are bonded.
draw_lewis_grid([substrate], draw_atom_indices=True, draw_legend=False)
substrate_connector = MoleculeConnector(
substrate, (6, 12), "phenyl"
) # benzene becomes phenyl when bond between atoms 6,12 is cleaved
# ## Define ligands
# Similarly for the ligand, if you do not know which bond to cleave, write the molecule to a .xyz file and find out.
#
# Or plot it with rdkit in the Jupyter notebook.
#
# **Note**: The ligands below have an extra hydrogen or even more atoms compared to the name that they're given.
ligands = [
MoleculeConnector(
from_smiles("CCOC(=O)C", forcefield="uff"), (3, 2), "acetate"
), # ethyl acetate, bond from O to C cleaved
MoleculeConnector(
from_smiles("O=NO", forcefield="uff"), (3, 4), "nitrite"
), # nitrous acid, bond from O to H cleaved
MoleculeConnector(
from_smiles("Cl", forcefield="uff"), (1, 2), "chloride"
), # hydrogen chloride, bond from Cl to H cleaved
MoleculeConnector(from_smiles("c1ccccc1", forcefield="uff"), (6, 12), "phenyl"), # benzene, bond to C to H cleaved
]
ligand_molecules = [ligand.molecule for ligand in ligands]
fig, axes = plt.subplots(1, len(ligands), figsize=(15, 3))
for ax, ligand in zip(axes, ligands):
plot_molecule(ligand.molecule, ax=ax)
ax.set_title(ligand.name)
draw_lewis_grid(ligand_molecules, draw_atom_indices=True, draw_legend=False, molsPerRow=4)
# Above we see that cleaving the bonds from O(3)-C(2), O(3)-H(4), Cl(1)-H(2), and C(6)-H(12) will give the acetate, nitrite, chloride, and phenyl substituents, respectively.
# ## Generate substituted molecules
mols = []
for ligand in ligands:
mol = substitute(substrate_connector, ligand)
mol.properties.name = f"{substrate_connector.name}--{ligand.name}"
mols.append(mol)
print(f"Writing {mol.properties.name}.xyz")
mol.write(f"{mol.properties.name}.xyz")
print(f"{mol.properties.name} formula: {mol.get_formula(as_dict=True)}")
# ## Plot 3D structures with PLAMS / ASE
fig, axes = plt.subplots(1, len(mols), figsize=(15, 3))
for ax, mol in zip(axes, mols):
plot_molecule(mol, ax=ax)
ax.set_title(mol.properties.name)
# ## Plot 2D Lewis structures with RDKit
#
# The molecules can be aligned by using a benzene template. The ``regenerate`` option regenerates the molecule with RDkit to clean up the atomic positions.
draw_lewis_grid(mols, template_smiles=substrate_smiles, regenerate=True)