from typing import List, Literal, Optional, overload, TYPE_CHECKING, Sequence, Dict, Any
import random
import sys
import copy
from warnings import warn
from scm.plams.core.functions import add_to_class, log, requires_optional_package
from scm.plams.mol.atom import Atom
from scm.plams.mol.bond import Bond
from scm.plams.mol.molecule import Molecule
from scm.plams.core.errors import PlamsError
if TYPE_CHECKING:
from rdkit import Mol as RDKitMol
__all__ = [
"add_Hs",
"apply_reaction_smarts",
"apply_template",
"gen_coords_rdmol",
"get_backbone_atoms",
"modify_atom",
"to_rdmol",
"from_rdmol",
"from_sequence",
"from_smiles",
"from_smarts",
"to_smiles",
"partition_protein",
"readpdb",
"writepdb",
"get_substructure",
"get_conformations",
"yield_coords",
"canonicalize_mol",
"to_image",
"get_reaction_image",
]
[docs]@requires_optional_package("rdkit")
def from_rdmol(rdkit_mol: "RDKitMol", confid: int = -1, properties: bool = True) -> Molecule:
"""
Translate an RDKit molecule into a PLAMS molecule type.
RDKit properties will be unpickled if their name ends with '_pickled'.
:parameter rdkit_mol: RDKit molecule
:type rdkit_mol: rdkit.Chem.Mol
:parameter int confid: conformer identifier from which to take coordinates
:parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format.
:return: a PLAMS molecule
:rtype: |Molecule|
"""
from rdkit import Chem
if isinstance(rdkit_mol, Molecule):
return rdkit_mol
# Create PLAMS molecule
plams_mol = Molecule()
total_charge = 0
try:
Chem.Kekulize(rdkit_mol)
except Exception:
pass
conf = rdkit_mol.GetConformer(id=confid)
# Add atoms and assign properties to the PLAMS atom if *properties* = True
for rd_atom in rdkit_mol.GetAtoms():
pos = conf.GetAtomPosition(rd_atom.GetIdx())
ch = rd_atom.GetFormalCharge()
pl_atom = Atom(rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), rdkit={"charge": ch})
if properties and rd_atom.GetPDBResidueInfo():
pl_atom.properties.rdkit.pdb_info = get_PDBResidueInfo(rd_atom)
plams_mol.add_atom(pl_atom)
total_charge += ch
# Check for R/S information
stereo = str(rd_atom.GetChiralTag())
if stereo == "CHI_TETRAHEDRAL_CCW":
pl_atom.properties.rdkit.stereo = "counter-clockwise"
elif stereo == "CHI_TETRAHEDRAL_CW":
pl_atom.properties.rdkit.stereo = "clockwise"
# Add bonds to the PLAMS molecule
for bond in rdkit_mol.GetBonds():
at1 = plams_mol.atoms[bond.GetBeginAtomIdx()]
at2 = plams_mol.atoms[bond.GetEndAtomIdx()]
plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble()))
# Check for cis/trans information
stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir())
if stereo == "STEREOZ" or stereo == "STEREOCIS":
plams_mol.bonds[-1].properties.rdkit.stereo = "Z"
elif stereo == "STEREOE" or stereo == "STEREOTRANS":
plams_mol.bonds[-1].properties.rdkit.stereo = "E"
elif bond_dir == "ENDUPRIGHT":
plams_mol.bonds[-1].properties.rdkit.stereo = "up"
elif bond_dir == "ENDDOWNRIGHT":
plams_mol.bonds[-1].properties.rdkit.stereo = "down"
# Set charge and assign properties to PLAMS molecule and bonds if *properties* = True
plams_mol.properties.charge = total_charge
if properties:
prop_from_rdmol(plams_mol, rdkit_mol)
for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol):
prop_from_rdmol(plams_atom, rd_atom)
for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds):
prop_from_rdmol(plams_bond, rd_bond)
return plams_mol
[docs]@requires_optional_package("rdkit")
def to_rdmol(
plams_mol: Molecule,
sanitize: bool = True,
properties: bool = True,
assignChirality: bool = False,
presanitize: bool = False,
) -> "RDKitMol":
"""
Translate a PLAMS molecule into an RDKit molecule type.
PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats,
integers, floats nor strings, the resulting property names are appended with '_pickled'.
:parameter plams_mol: A PLAMS molecule
:parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization
:parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format.
:parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule.
:parameter bool presanitize: Iteratively adjust bonding and atomic charges, to avoid failure of sanitization.
Only relevant is sanitize is set to True.
:type plams_mol: |Molecule|
:return: an RDKit molecule
:rtype: rdkit.Chem.Mol
"""
from rdkit import Chem, Geometry
if isinstance(plams_mol, Chem.Mol):
return plams_mol
# Create rdkit molecule
e = Chem.EditableMol(Chem.Mol())
# Add atoms and assign properties to the RDKit atom if *properties* = True
for pl_atom in plams_mol.atoms:
rd_atom = Chem.Atom(int(pl_atom.atnum))
if "rdkit" in pl_atom.properties:
if "charge" in pl_atom.properties.rdkit:
rd_atom.SetFormalCharge(pl_atom.properties.rdkit.charge)
if properties:
if "rdkit" in pl_atom.properties:
if "pdb_info" in pl_atom.properties.rdkit:
set_PDBresidueInfo(rd_atom, pl_atom.properties.rdkit.pdb_info)
for prop in pl_atom.properties.rdkit:
if prop not in ("charge", "pdb_info", "stereo"):
prop_to_rdmol(rd_atom, prop, pl_atom.properties.rdkit.get(prop))
prop_dic = {}
for prop in pl_atom.properties:
if prop != "rdkit":
prop_dic[prop] = pl_atom.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rd_atom, "plams", prop_dic)
# Check for R/S information
if pl_atom.properties.rdkit.stereo:
stereo = pl_atom.properties.rdkit.stereo.lower()
if stereo == "counter-clockwise":
rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW)
elif stereo == "clockwise":
rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
e.AddAtom(rd_atom)
# Mapping of PLAMS bond orders to RDKit bond types:
def plams_to_rd_bonds(bo):
if 1.4 < bo < 1.6:
return 12 # bond type for aromatic bond
else:
return int(bo)
# Add bonds to the RDKit molecule
for bond in plams_mol.bonds:
a1 = plams_mol.atoms.index(bond.atom1)
a2 = plams_mol.atoms.index(bond.atom2)
e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order)))
rdmol = e.GetMol()
# Check for cis/trans information
for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
if pl_bond.properties.rdkit.stereo:
stereo = pl_bond.properties.rdkit.stereo.lower()
if stereo == "e" or stereo == "trans":
rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE)
elif stereo == "z" or stereo == "cis":
rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ)
elif stereo == "up":
rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT)
elif stereo == "down":
rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT)
# Assign properties to RDKit molecule and bonds if *properties* = True
# All properties will be taken from 'rdkit' subsettings, except the molecular charge
if properties:
prop_dic = {}
for prop in plams_mol.properties:
if prop == "rdkit":
for rdprop in plams_mol.properties.rdkit:
prop_to_rdmol(rdmol, rdprop, plams_mol.properties.rdkit.get(rdprop))
else:
# prop_dic[prop] = {'plams':plams_mol.properties.get(prop)}
prop_dic[prop] = plams_mol.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rdmol, "plams", prop_dic)
prop_dic = {}
for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
for prop in pl_bond.properties:
if prop == "rdkit":
for rdprop in pl_bond.properties.rdkit:
if rdprop != "stereo":
prop_to_rdmol(rd_bond, rdprop, pl_bond.properties.rdkit.get(rdprop))
else:
prop_dic[prop] = pl_bond.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rd_bond, "plams", prop_dic)
if sanitize:
try:
if presanitize:
rdmol = _presanitize(plams_mol, rdmol)
else:
Chem.SanitizeMol(rdmol)
except ValueError as exc:
log("RDKit Sanitization Error.")
text = "Most likely this is a problem with the assigned bond orders: "
text += "Use chemical insight to adjust them."
log(text)
log("Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.")
raise exc
conf = Chem.Conformer()
for i, atom in enumerate(plams_mol.atoms):
xyz = Geometry.Point3D(atom.x, atom.y, atom.z)
conf.SetAtomPosition(i, xyz)
rdmol.AddConformer(conf)
# REB: Assign all stereochemistry, if it wasn't already there
if assignChirality:
Chem.rdmolops.AssignAtomChiralTagsFromStructure(rdmol, confId=conf.GetId(), replaceExistingTags=False)
try:
Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False)
except AttributeError:
pass
return rdmol
[docs]@requires_optional_package("rdkit")
def to_smiles(plams_mol: Molecule, short_smiles: bool = True, **kwargs) -> str:
"""
Returns the RDKit-generated SMILES string of a PLAMS molecule.
Note: SMILES strings are generated based on the molecule's connectivity. If the input PLAMS molecule does not contain any bonds, "guessed bonds" will be used.
:parameter plams_mol: A PLAMS |Molecule|
:parameter bool short_smiles: whether or not to use some RDKit sanitization to get shorter smiles (e.g. for a water molecule, short_smiles=True -> "O", short_smiles=False -> [H]O[H])
:parameter \**kwargs: With 'kwargs' you can provide extra optional parameters to the rdkit.Chem method 'MolToSmiles'. See the rdkit documentation for more info.
:return: the SMILES string
"""
from rdkit import Chem
if len(plams_mol.bonds) > 0:
mol_with_bonds = plams_mol
else:
mol_with_bonds = plams_mol.copy()
mol_with_bonds.guess_bonds()
rd_mol = to_rdmol(mol_with_bonds, sanitize=False)
# This sanitization black magic is needed for getting the "short, nice and clean" SMILES string.
# Without this, the SMILES string for water would be "[H]O[H]". With this is just "O"
if short_smiles:
s = Chem.rdmolops.SanitizeFlags
rdkitSanitizeOptions = (
s.SANITIZE_ADJUSTHS
or s.SANITIZE_CLEANUP
or s.SANITIZE_CLEANUPCHIRALITY
or s.SANITIZE_FINDRADICALS
or s.SANITIZE_PROPERTIES
or s.SANITIZE_SETAROMATICITY
or s.SANITIZE_SETCONJUGATION
or s.SANITIZE_SETHYBRIDIZATION
or s.SANITIZE_SYMMRINGS
)
Chem.rdmolops.AssignRadicals(rd_mol)
rd_mol = Chem.rdmolops.RemoveHs(rd_mol, updateExplicitCount=True, sanitize=False)
Chem.rdmolops.SanitizeMol(rd_mol, rdkitSanitizeOptions)
smiles = Chem.MolToSmiles(rd_mol, **kwargs)
return smiles
pdb_residue_info_items = [
"AltLoc",
"ChainId",
"InsertionCode",
"IsHeteroAtom",
"Name",
"Occupancy",
"ResidueName",
"ResidueNumber",
"SecondaryStructure",
"SegmentNumber",
"SerialNumber",
"TempFactor",
]
# 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized
def get_PDBResidueInfo(rdkit_atom):
pdb_info = {}
for item in pdb_residue_info_items:
get_function = "Get" + item
pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)()
return pdb_info
@requires_optional_package("rdkit")
def set_PDBresidueInfo(rdkit_atom, pdb_info):
from rdkit import Chem
atom_pdb_residue_info = Chem.AtomPDBResidueInfo()
for item, value in pdb_info.items():
set_function = "Set" + item
atom_pdb_residue_info.__getattribute__(set_function)(value)
rdkit_atom.SetMonomerInfo(atom_pdb_residue_info)
def prop_to_rdmol(rd_obj, propkey, propvalue):
"""
Convert a single PLAMS property into an RDKit property.
:paramter pl_obj: A PLAMS object.
:type pl_obj: |Molecule|, |Atom| or |Bond|.
:parameter rd_obj: An RDKit object.
:type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
:parameter str propkey: The |Settings| key of the PLAMS property.
"""
try:
import dill as pickle
except ImportError:
import pickle
obj = type(propvalue)
obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp}
if obj_dict.get(obj):
obj_dict[obj](propkey, propvalue)
else:
name = propkey + "_pickled"
try:
rd_obj.SetProp(name, pickle.dumps(propvalue, 0).decode())
except (Exception, pickle.PicklingError):
pass
def prop_from_rdmol(pl_obj, rd_obj):
"""
Convert one or more RDKit properties into PLAMS properties.
:paramter pl_obj: A PLAMS object.
:type pl_obj: |Molecule|, |Atom| or |Bond|.
:parameter rd_obj: An RDKit object.
:type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
"""
try:
import dill as pickle
except ImportError:
import pickle
prop_dict = rd_obj.GetPropsAsDict()
for propname in prop_dict.keys():
if propname == "plams_pickled":
plams_props = pickle.loads(prop_dict[propname].encode())
if not isinstance(plams_props, dict):
raise Exception("PLAMS property not properly stored in RDKit")
for key, value in plams_props.items():
pl_obj.properties[key] = value
else:
if propname == "__computedProps":
continue
if "_pickled" not in propname:
pl_obj.properties.rdkit[propname] = prop_dict[propname]
else:
prop = prop_dict[propname]
propname = propname.rsplit("_pickled", 1)[0]
propvalue = pickle.loads(prop.encode())
pl_obj.properties.rdkit[propname] = propvalue
@overload
def from_smiles(
smiles: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Molecule: ...
@overload
def from_smiles(
smiles: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> List[Molecule]: ...
[docs]@requires_optional_package("rdkit")
def from_smiles(
smiles: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
):
"""
Generates PLAMS molecule(s) from a smiles strings.
:parameter str smiles: A smiles string
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization
and ranking of conformations. The default value None results in skipping of the
geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for
removing similar/equivalent conformations
:return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
smiles = str(smiles.split()[0])
smiles = Chem.CanonSmiles(smiles)
rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
rdkit_mol.SetProp("smiles", smiles)
return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
[docs]@requires_optional_package("rdkit")
def from_smarts(
smarts: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
):
"""
Generates PLAMS molecule(s) from a smarts strings.
This allows for example to define hydrogens explicitly.
However it is less suitable for aromatic molecules (use from_smiles in that case).
:parameter str smarts: A smarts string
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
optimization and ranking of comformations. The default value None results
in skipping of the geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for removing
similar/equivalent conformations.
:return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
smiles = str(smarts.split()[0])
mol = Chem.MolFromSmarts(smiles)
Chem.SanitizeMol(mol)
molecule = Chem.AddHs(mol)
molecule.SetProp("smiles", smiles)
return get_conformations(molecule, nconfs, name, forcefield, rms)
[docs]@requires_optional_package("rdkit")
def from_sequence(sequence, nconfs=1, name=None, forcefield=None, rms=0.1):
"""
Generates PLAMS molecule from a peptide sequence.
Includes explicit hydrogens and 3D coordinates.
:parameter str sequence: A peptide sequence, e.g. 'HAG'
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
optimization and ranking of comformations. The default value None results
in skipping of the geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for removing
similar/equivalent conformations.
:return: A peptide molecule with hydrogens and 3D coordinates
or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence))
rdkit_mol.SetProp("sequence", sequence)
return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
@requires_optional_package("rdkit")
def calc_rmsd(mol1, mol2):
"""
Superimpose two molecules and calculate the root-mean-squared deviations of
the atomic positions.
The molecules should be identical, but the ordering of the atoms may differ.
:param mol1: Molecule 1
:param mol2: Molecule 2
:return: The rmsd after superposition
:rtype: float
"""
from rdkit.Chem import AllChem
rdkit_mol1 = to_rdmol(mol1)
rdkit_mol2 = to_rdmol(mol2)
try:
return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2)
except:
return -999
[docs]@requires_optional_package("rdkit")
def modify_atom(mol, idx, element):
"""
Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter int idx: index of the atom to be modified
:parameter str element:
:return: Molecule with new element and possibly added or removed hydrogens
:rtype: |Molecule|
"""
from rdkit import Chem
rdmol = to_rdmol(mol)
if rdmol.GetAtomWithIdx(idx).GetSymbol() == element:
return mol
else:
e = Chem.EditableMol(rdmol)
for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()):
if neighbor.GetSymbol() == "H":
e.RemoveAtom(neighbor.GetIdx())
e.ReplaceAtom(idx - 1, Chem.Atom(element))
newmol = e.GetMol()
Chem.SanitizeMol(newmol)
newmol = Chem.AddHs(newmol, addCoords=True)
return from_rdmol(newmol)
[docs]@requires_optional_package("rdkit")
def apply_template(mol, template):
"""
Modifies bond orders in PLAMS molecule according template smiles structure.
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter str template: smiles string defining the correct chemical structure
:return: Molecule with correct chemical structure and provided 3D coordinates
:rtype: |Molecule|
"""
from rdkit import Chem
rdmol = to_rdmol(mol, sanitize=False)
template_mol = Chem.AddHs(Chem.MolFromSmiles(template))
newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol)
return from_rdmol(newmol)
[docs]@requires_optional_package("rdkit")
def apply_reaction_smarts(mol, reaction_smarts, complete=False, forcefield=None, return_rdmol=False):
"""
Applies reaction smirks and returns product.
If returned as a PLAMS molecule, thismolecule.properties.orig_atoms
is a list of indices of atoms that have not been changed
(which can for example be used partially optimize new atoms only with the freeze keyword)
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter str reactions_smarts: Reactions smarts to be applied to molecule
:parameter complete: Apply reaction until no further changes occur or given
fraction of reaction centers have been modified
:type complete: bool or float (value between 0 and 1)
:parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based
geometry optimization of product structures.
:type forcefield: str
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: (product molecule, list of unchanged atoms)
:rtype: (|Molecule|, list of int)
"""
from rdkit import Chem
from rdkit.Chem import AllChem
def react(reactant, reaction):
"""Apply reaction to reactant and return products"""
ps = reaction.RunReactants([reactant])
# if reaction doesn't apply, return the reactant
if len(ps) == 0:
return [(reactant, range(reactant.GetNumAtoms()))]
full = len(ps)
while complete: # when complete is True
# apply reaction until no further changes
r = random.randint(0, len(ps) - 1)
reactant = ps[r][0]
ps = reaction.RunReactants([reactant])
if len(ps) == 0 or len(ps) / full < (1 - complete):
ps = [[reactant]]
break
# add hydrogens and generate coordinates for new atoms
products = []
for p in ps[0]:
Chem.SanitizeMol(p)
q = Chem.AddHs(p)
Chem.SanitizeMol(q)
u = gen_coords_rdmol(q) # These are the atoms that have not changed
products.append((q, u))
return products
mol = to_rdmol(mol)
reaction = AllChem.ReactionFromSmarts(reaction_smarts)
# RDKit removes fragments that are disconnected from the reaction center
# In order to keep these, the molecule is first split in separate fragments
# and the results, including non-reacting parts, are re-combined afterwards
frags = Chem.GetMolFrags(mol, asMols=True)
product = Chem.Mol()
unchanged = [] # List of atoms that have not changed
for frag in frags:
for p, u in react(frag, reaction):
unchanged += [product.GetNumAtoms() + i for i in u]
product = Chem.CombineMols(product, p)
if forcefield:
optimize_coordinates(product, forcefield, fixed=unchanged)
# The molecule is returned together with a list of atom indices of the atoms
# that are identical to those
# in the reactants. This list can be used in subsequent partial optimization of the molecule
if not return_rdmol:
product = from_rdmol(product)
product.properties.orig_atoms = [a + 1 for a in unchanged]
return product
def gen_coords(plamsmol):
"""Calculate 3D positions only for atoms without coordinates"""
rdmol = to_rdmol(plamsmol)
unchanged = gen_coords_rdmol(rdmol)
conf = rdmol.GetConformer()
for a in range(len(plamsmol.atoms)):
pos = conf.GetAtomPosition(a)
atom = plamsmol.atoms[a]
atom._setx(pos.x)
atom._sety(pos.y)
atom._setz(pos.z)
return [a + 1 for a in unchanged]
@requires_optional_package("rdkit")
def gen_coords_rdmol(rdmol):
from rdkit.Chem import AllChem
ref = rdmol.__copy__()
conf = rdmol.GetConformer()
coordDict = {}
unchanged = []
maps = []
# Put known coordinates in coordDict
for i in range(rdmol.GetNumAtoms()):
pos = conf.GetAtomPosition(i)
if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and (-0.0001 < pos.z < 0.0001):
continue # atom without coordinates
coordDict[i] = pos
unchanged.append(i)
maps.append((i, i))
# compute coordinates for new atoms, keeping known coordinates
rms = 1
rs = 1
# repeat embedding and alignment until the rms of mapped atoms is sufficiently small
if rdmol.GetNumAtoms() > len(maps):
while rms > 0.1:
AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True)
# align new molecule to original coordinates
rms = AllChem.AlignMol(rdmol, ref, atomMap=maps)
rs += 1
return unchanged
@requires_optional_package("rdkit")
def optimize_coordinates(rdkit_mol, forcefield, fixed=[]):
from rdkit import Chem
from rdkit.Chem import AllChem
def MMFFminimize():
ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol))
for f in fixed:
ff.AddFixedPoint(f)
try:
ff.Minimize()
except:
warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))
def UFFminimize():
ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True)
for f in fixed:
ff.AddFixedPoint(f)
try:
ff.Minimize()
except:
warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))
optimize_molecule = {"uff": UFFminimize, "mmff": MMFFminimize}[forcefield]
Chem.SanitizeMol(rdkit_mol)
optimize_molecule()
return
@requires_optional_package("rdkit")
def write_molblock(plams_mol, file=sys.stdout):
from rdkit import Chem
file.write(Chem.MolToMolBlock(to_rdmol(plams_mol)))
[docs]@requires_optional_package("rdkit")
def readpdb(pdb_file, sanitize=True, removeHs=False, proximityBonding=False, return_rdmol=False):
"""
Generate a molecule from a PDB file
:param pdb_file: The PDB file to read
:type pdb_file: path- or file-like
:param bool sanitize:
:param bool removeHs: Hydrogens are removed if True
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: The molecule
:rtype: |Molecule| or rdkit.Chem.Mol
"""
from rdkit import Chem
try:
pdb_file = open(pdb_file, "r")
except TypeError:
pass # pdb_file is a file-like object... hopefully
pdb_mol = Chem.MolFromPDBBlock(pdb_file.read(), sanitize=sanitize, removeHs=removeHs)
return pdb_mol if return_rdmol else from_rdmol(pdb_mol)
[docs]@requires_optional_package("rdkit")
def writepdb(mol, pdb_file=sys.stdout):
"""
Write a PDB file from a molecule
:parameter mol: molecule to be exported to PDB
:type mol: |Molecule| or rdkit.Chem.Mol
:param pdb_file: The PDB file to write to, or a filename
:type pdb_file: path- or file-like
"""
from rdkit import Chem
try:
pdb_file = open(pdb_file, "w")
except TypeError:
pass # pdb_file is a file-like object... hopefully
mol = to_rdmol(mol, sanitize=False)
pdb_file.write(Chem.MolToPDBBlock(mol))
[docs]@requires_optional_package("rdkit")
def add_Hs(mol, forcefield=None, return_rdmol=False):
"""
Add hydrogens to protein molecules read from PDB.
Makes sure that the hydrogens get the correct PDBResidue info.
:param mol: Molecule to be protonated
:type mol: |Molecule| or rdkit.Chem.Mol
:param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based
geometry optimization on new atoms.
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: A molecule with explicit hydrogens added
:rtype: |Molecule| or rdkit.Chem.Mol
"""
from rdkit import Chem
mol = to_rdmol(mol)
retmol = Chem.AddHs(mol)
for atom in retmol.GetAtoms():
if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H":
bond = atom.GetBonds()[0]
if bond.GetBeginAtom().GetIdx() == atom.GetIdx:
connected_atom = bond.GetEndAtom()
else:
connected_atom = bond.GetBeginAtom()
try:
ResInfo = connected_atom.GetPDBResidueInfo()
if ResInfo is None:
continue # Segmentation faults are raised if ResInfo is None
atom.SetMonomerInfo(ResInfo)
atomname = "H" + atom.GetPDBResidueInfo().GetName()[1:]
atom.GetPDBResidueInfo().SetName(atomname)
except:
pass
unchanged = gen_coords_rdmol(retmol)
if forcefield:
optimize_coordinates(retmol, forcefield, fixed=unchanged)
return retmol if return_rdmol else from_rdmol(retmol)
@requires_optional_package("rdkit")
def add_fragment(rwmol, frag, rwmol_atom_idx=None, frag_atom_idx=None, bond_order=None):
from rdkit import Chem
molconf = rwmol.GetConformer()
fragconf = frag.GetConformer()
new_indices = []
for a in frag.GetAtoms():
new_index = rwmol.AddAtom(a)
new_indices.append(new_index)
molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx()))
for b in frag.GetBonds():
ba = b.GetBeginAtomIdx()
ea = b.GetEndAtomIdx()
rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType())
if bond_order:
rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order])
rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0)
@requires_optional_package("rdkit")
def get_fragment(mol, indices, incl_expl_Hs=True, neutralize=True):
from rdkit import Chem
molconf = mol.GetConformer()
fragment = Chem.RWMol(Chem.Mol())
fragconf = Chem.Conformer()
# Put atoms in fragment
for i in indices:
atom = mol.GetAtomWithIdx(i)
new_index = fragment.AddAtom(atom)
pos = molconf.GetAtomPosition(i)
fragconf.SetAtomPosition(new_index, pos)
# Put bonds in fragment
for b in mol.GetBonds():
ba = b.GetBeginAtomIdx()
ea = b.GetEndAtomIdx()
if ba in indices and ea in indices:
fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType())
continue
if not incl_expl_Hs:
continue
if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == "H":
hi = fragment.AddAtom(mol.GetAtomWithIdx(ea))
fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea))
fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE)
continue
if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == "H":
hi = fragment.AddAtom(mol.GetAtomWithIdx(ba))
fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba))
fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE)
ret_frag = fragment.GetMol()
Chem.SanitizeMol(ret_frag)
if neutralize:
for atom in ret_frag.GetAtoms():
nrad = atom.GetNumRadicalElectrons()
if nrad > 0:
atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad)
atom.SetNumRadicalElectrons(0)
Chem.SanitizeMol(ret_frag)
ret_frag.AddConformer(fragconf)
return ret_frag
[docs]@requires_optional_package("rdkit")
def partition_protein(mol, residue_bonds=None, split_heteroatoms=True, return_rdmol=False):
"""
Splits a protein molecule into capped amino acid fragments and caps.
:param mol: A protein molecule
:type mol: |Molecule| or rdkit.Chem.Mol
:param tuple residue_bonds: a tuple of pairs of residue number indicating which
peptide bonds to split. If none, split all peptide bonds.
:param bool split_heteroatoms: if True, all bonds between a heteroatom and
a non-heteroatom across residues are removed
:return: list of fragments, list of caps
"""
from rdkit import Chem
mol = to_rdmol(mol)
caps = []
em = Chem.RWMol(mol)
if split_heteroatoms:
for bond in mol.GetBonds():
resinfa = bond.GetBeginAtom().GetPDBResidueInfo()
resinfb = bond.GetEndAtom().GetPDBResidueInfo()
if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom():
if resinfa.GetResidueNumber() != resinfb.GetResidueNumber():
em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
# Split peptide bonds
pept_bond = Chem.MolFromSmarts("[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)")
for match in mol.GetSubstructMatches(pept_bond):
if residue_bonds:
resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber()
resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber()
if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds:
continue
cap = get_fragment(mol, match[0:5])
cap = add_Hs(cap, return_rdmol=True)
caps.append(cap if return_rdmol else from_rdmol(cap))
cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4][CX3]=O"))
cap_o = get_fragment(cap, cap_o_ind, neutralize=False)
cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("O=[CX3][NX3][C;X4]"))[2:]
cap_n = get_fragment(cap, cap_n_ind, neutralize=False)
em.RemoveBond(match[1], match[3])
add_fragment(em, cap_o, match[3], 1, 1)
add_fragment(em, cap_n, match[1], 0, 1)
# Split disulfide bonds
ss_bond = Chem.MolFromSmarts("[C;X4;H1,H2]SS[C;X4;H1,H2]")
for match in mol.GetSubstructMatches(ss_bond):
cap = get_fragment(mol, match[0:5])
cap = add_Hs(cap, return_rdmol=True)
caps.append(cap if return_rdmol else from_rdmol(cap))
cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4]SS[C;X4]"))
cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False)
cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False)
em.RemoveBond(match[1], match[2])
add_fragment(em, cap_s1, match[2], 1, 1)
add_fragment(em, cap_s2, match[1], 0, 1)
frags = Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False)
if not return_rdmol:
frags = [from_rdmol(frag) for frag in frags]
return frags, caps
@requires_optional_package("rdkit")
def charge_AAs(mol, return_rdmol=False):
from rdkit import Chem
ionizations = {"ARG_NH2": 1, "LYS_NZ": 1, "GLU_OE2": -1, "ASP_OD2": -1}
mol = to_rdmol(mol)
for atom in mol.GetAtoms():
resinfo = atom.GetPDBResidueInfo()
res_atom = resinfo.GetResidueName() + "_" + resinfo.GetName().strip()
try:
atom.SetFormalCharge(ionizations[res_atom])
Chem.SanitizeMol(mol)
except KeyError:
pass
Chem.SanitizeMol(mol)
return mol if return_rdmol else from_rdmol(mol)
[docs]def get_backbone_atoms(mol):
"""
Return a list of atom indices corresponding to the backbone atoms in a peptide molecule.
This function assumes PDB information in properties.pdb_info of each atom, which is the case
if the molecule is generated with the "readpdb" or "from_sequence" functions.
:parameter mol: a peptide molecule
:type mol: |Molecule| or rdkit.Chem.Mol
:return: a list of atom indices
:rtype: list
"""
mol = from_rdmol(mol)
backbone = ["N", "CA", "C", "O"]
return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]
[docs]@requires_optional_package("rdkit")
def get_substructure(mol, func_list):
"""
Search for functional groups within a molecule based on a list of reference functional groups.
SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list".
Example:
.. code:: python
>>> mol = from_smiles('OCCO') # Ethylene glycol
>>> func_list = ['[H]O', 'C[N+]', 'O=PO']
>>> get_substructure(mol, func_list)
{'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>,
<scm.plams.mol.atom.Atom at 0x1251836a0>),
(<scm.plams.mol.atom.Atom at 0x125183550>,
<scm.plams.mol.atom.Atom at 0x125183240>)]}
:parameter mol: A PLAMS molecule.
:type mol: |Molecule|
:parameter list func_list: A list of functional groups.
Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules.
:return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples
with matching PLAMS |Atom| as values.
"""
from rdkit import Chem
def _to_rdmol(functional_group):
"""Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule."""
if isinstance(functional_group, str):
# RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled
sanitize = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS
ret = Chem.MolFromSmiles(functional_group, sanitize=False)
Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize)
return ret
elif isinstance(functional_group, Molecule):
return to_rdmol(functional_group)
elif isinstance(functional_group, Chem.Mol):
return functional_group
raise TypeError(
"get_substructure: "
+ str(type(functional_group))
+ " is not a supported \
object type"
)
def _get_match(mol, rdmol, functional_group):
"""Perform a substructure match on "mol".
If a match is found, return a list of n-tuples consisting PLAMS |Atom|.
Otherwise return False."""
matches = rdmol.GetSubstructMatches(functional_group)
if matches:
return [tuple(mol[j + 1] for j in idx_tup) for idx_tup in matches]
return False
rdmol = to_rdmol(mol)
rdmol_func_list = [_to_rdmol(i) for i in func_list]
gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list)
return {key: value for key, value in zip(func_list, gen) if value}
[docs]def yield_coords(rdmol, id=-1):
"""Take an rdkit molecule and yield its coordinates as 3-tuples.
.. code-block:: python
>>> from scm.plams import yield_coords
>>> from rdkit import Chem
>>> rdmol = Chem.Mol(...) # e.g. Methane
>>> for xyz in yield_coords(rdmol):
... print(xyz)
(-0.0, -0.0, -0.0)
(0.6405, 0.6405, -0.6405)
(0.6405, -0.6405, 0.6405)
(-0.6405, 0.6405, 0.6405)
(-0.6405, -0.6405, -0.6405)
The iterator produced by this function can, for example, be passed to
:meth:`Molecule.from_array()<scm.plams.mol.molecule.Molecule.from_array>`
the update the coordinates of a PLAMS Molecule in-place.
.. code-block:: python
>>> from scm.plams import Molecule
>>> mol = Molecule(...)
>>> xyz_iterator = yield_coords(rdmol)
>>> mol.from_array(xyz_iterator)
:parameter rdmol: An RDKit mol.
:type rdmol: rdkit.Chem.Mol
:parameter int id: The ID of the desired conformer.
:return: An iterator yielding 3-tuples with *rdmol*'s Cartesian coordinates.
:rtype: iterator
"""
conf = rdmol.GetConformer(id=id)
for atom in rdmol.GetAtoms():
pos = conf.GetAtomPosition(atom.GetIdx())
yield (pos.x, pos.y, pos.z)
@add_to_class(Molecule)
def assign_chirality(self):
"""
Assigns stereo-info to PLAMS molecule by invoking RDKIT
"""
rd_mol = to_rdmol(self, assignChirality=True)
pl_mol = from_rdmol(rd_mol)
# Add R/S info to self
for iat, pl_atom in enumerate(pl_mol.atoms):
# Check for R/S information
if pl_atom.properties.rdkit.stereo:
self.atoms[iat].properties.rdkit.stereo = pl_atom.properties.rdkit.stereo
# Add cis/trans information to self
for ibond, pl_bond in enumerate(pl_mol.bonds):
if pl_bond.properties.rdkit.stereo:
self.bonds[ibond] = pl_bond.properties.rdkit.stereo
@add_to_class(Molecule)
@requires_optional_package("rdkit")
def get_chirality(self):
"""
Returns the chirality of the atoms
"""
from rdkit import Chem
rd_mol = to_rdmol(self, assignChirality=True)
return Chem.FindMolChiralCenters(rd_mol, force=True, includeUnassigned=True)
[docs]@requires_optional_package("rdkit")
def canonicalize_mol(mol, inplace=False, **kwargs):
r"""Take a PLAMS molecule and sort its atoms based on their canonical rank.
Example:
.. code:: python
>>> from scm.plams import Molecule, canonicalize_mol
# Methane
>>> mol: Molecule = ...
>>> print(mol)
Atoms:
1 H 0.640510 0.640510 -0.640510
2 H 0.640510 -0.640510 0.640510
3 C 0.000000 0.000000 0.000000
4 H -0.640510 0.640510 0.640510
5 H -0.640510 -0.640510 -0.640510
>>> print(canonicalize_mol(mol))
Atoms:
1 C 0.000000 0.000000 0.000000
2 H -0.640510 -0.640510 -0.640510
3 H -0.640510 0.640510 0.640510
4 H 0.640510 -0.640510 0.640510
5 H 0.640510 0.640510 -0.640510
:parameter mol: The to-be canonicalized molecule.
:type mol: |Molecule|
:parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule.
:parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_.
:return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``.
:rtype: None or |Molecule|
.. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms
"""
from rdkit import Chem
if not isinstance(mol, Molecule):
raise TypeError("`mol` expected a plams Molecule")
rdmol = to_rdmol(mol)
idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs)
if inplace:
mol.atoms = [at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)]
return None
else:
ret = mol.copy()
ret.atoms = [at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)]
return ret
[docs]@requires_optional_package("rdkit")
@requires_optional_package("PIL")
def to_image(
mol: Molecule,
remove_hydrogens: bool = True,
filename: Optional[str] = None,
fmt: str = "svg",
size: Sequence[int] = (200, 100),
as_string: bool = True,
):
"""
Convert single molecule to single image object
* ``mol`` -- PLAMS Molecule object
* ``remove_hydrogens`` -- Wether or not to remove the H-atoms from the image
* ``filename`` -- Optional: Name of image file to be created.
* ``fmt`` -- One of "svg", "png", "eps", "pdf", "jpeg"
* ``size`` -- Tuple/list containing width and height of image in pixels.
* ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format
will be returned, which can be either a PIL image or SVG text
We do this because after converting a PIL image to a bytestring it is not possible
to further edit it (with our version of PIL).
* Returns -- Image text file / binary of image text file / PIL Image object.
"""
from io import BytesIO
from PIL import Image
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import MolsToGridImage
extensions = ["svg", "png", "eps", "pdf", "jpeg"]
classes: Dict[str, Any] = {}
classes["svg"] = _MolsToGridSVG
for ext in extensions[1:]:
classes[ext] = None
# PNG can only be created in this way with later version of RDKit
if hasattr(rdMolDraw2D, "MolDraw2DCairo"):
for ext in extensions[1:]:
classes[ext] = MolsToGridImage
# Determine the type of image file
if filename is not None:
if "." in filename:
extension = filename.split(".")[-1]
if extension in classes.keys():
fmt = extension
else:
msg = [f"Image type {extension} not available."]
msg += [f"Available extensions are: {' '.join(extensions)}"]
raise Exception("\n".join(msg))
else:
filename = ".".join([filename, fmt])
if fmt not in classes.keys():
raise Exception(f"Image type {fmt} not available.")
rdmol = _rdmol_for_image(mol, remove_hydrogens)
# Draw the image
if classes[fmt.lower()] is None:
# With AMS version of RDKit MolsToGridImage fails for eps (because of paste)
img = Draw.MolToImage(rdmol, size=size)
buf = BytesIO()
img.save(buf, format=fmt)
img_text = buf.getvalue()
else:
# This fails for a C=C=O molecule, with AMS rdkit version
img = classes[fmt.lower()]([rdmol], molsPerRow=1, subImgSize=size)
img_text = img
if isinstance(img, Image.Image):
buf = BytesIO()
img.save(buf, format=fmt)
img_text = buf.getvalue()
# If I do not make this correction to the SVG text, it is not readable in JupyterLab
if fmt.lower() == "svg":
img_text = _correct_svg(img_text)
# Write to file, if required
if filename is not None:
mode = "w"
if isinstance(img_text, bytes):
mode = "wb"
with open(filename, mode) as outfile:
outfile.write(img_text)
if as_string:
img = img_text
return img
[docs]@requires_optional_package("rdkit")
@requires_optional_package("PIL")
def get_reaction_image(
reactants: Sequence[Molecule],
products: Sequence[Molecule],
filename: Optional[str] = None,
fmt: str = "svg",
size: Sequence[int] = (200, 100),
as_string: bool = True,
):
"""
Create a 2D reaction image from reactants and products (PLAMS molecules)
* ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
* ``products`` -- Iterable of PLAMS Molecule objects representing the products.
* ``filename`` -- Optional: Name of image file to be created.
* ``fmt`` -- The format of the image (svg, png, eps, pdf, jpeg).
The extension in the filename, if provided, takes precedence.
* ``size`` -- Tuple/list containing width and height of image in pixels.
* ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format
will be returned, which can be either a PIL image or SVG text
* Returns -- SVG image text file.
"""
extensions = ["svg", "png", "eps", "pdf", "jpeg"]
# Determine the type of image file
if filename is not None:
if "." in filename:
extension = filename.split(".")[-1]
if extension in extensions:
fmt = extension
else:
msg = [f"Image type {extension} not available."]
msg += [f"Available extensions are: {' '.join(extensions)}"]
raise Exception("\n".join(msg))
else:
filename = ".".join([filename, fmt])
if fmt.lower() not in extensions:
raise Exception(f"Image type {fmt} not available.")
# Get the actual image
width = size[0]
height = size[1]
if fmt.lower() == "svg":
img_text = _get_reaction_image_svg(reactants, products, width, height)
else:
img_text = _get_reaction_image_pil(reactants, products, fmt, width, height, as_string=as_string)
# Write to file, if required
if filename is not None:
mode = "w"
if isinstance(img_text, bytes):
mode = "wb"
with open(filename, mode) as outfile:
outfile.write(img_text)
return img_text
def _get_reaction_image_svg(
reactants: Sequence[Molecule], products: Sequence[Molecule], width: int = 200, height: int = 100
):
"""
Create a 2D reaction image from reactants and products (PLAMS molecules)
* ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
* ``products`` -- Iterable of PLAMS Molecule objects representing the products.
* Returns -- SVG image text file.
"""
from rdkit import Chem
def svg_arrow(x1, y1, x2, y2, prefix=""):
"""
The reaction arrow in html format
"""
# The arrow head
l = ['<%sdefs> <%smarker id="arrow" viewBox="0 0 10 10" refX="5" refY="5" ' % (prefix, prefix)]
l += ['markerWidth="6" markerHeight="6" ']
l += ['orient="auto-start-reverse"> ']
l += ['<%spath d="M 0 0 L 10 5 L 0 10 z" /></%smarker></%sdefs>' % (prefix, prefix, prefix)]
arrow = "".join(l)
# The line
l = ['<%sline x1="%i" y1="%i" x2="%i" y2="%i" ' % (prefix, x1, y1, x2, y2)]
l += ['stroke="black" marker-end="url(#arrow)" />']
line = "".join(l)
return [arrow, line]
def add_plus_signs_svg(img_text, width, height, nmols, nreactants, prefix=""):
"""
Add the lines with + signs to the SVG image
"""
y = int(0.55 * height)
t = []
for i in range(nmols - 1):
x = int(((i + 1) * width) - (0.1 * width))
if i + 1 in (nreactants, nreactants + 1):
continue
t += ['<%stext x="%i" y="%i" font-size="16">+</%stext>' % (prefix, x, y, prefix)]
lines = img_text.split("\n")
lines = lines[:-2] + t + lines[-2:]
return "\n".join(lines)
def add_arrow_svg(img_text, width, height, nreactants, prefix=""):
"""
Add the arrow to the SVG image
"""
y = int(0.5 * height)
x1 = int((nreactants * width) + (0.3 * width))
x2 = int((nreactants * width) + (0.7 * width))
t = svg_arrow(x1, y, x2, y, prefix)
lines = img_text.split("\n")
lines = lines[:-2] + t + lines[-2:]
return "\n".join(lines)
# Get the rdkit molecules
rdmols = [_rdmol_for_image(mol) for mol in reactants]
rdmols += [Chem.Mol()] # This is where the arrow will go
rdmols += [_rdmol_for_image(mol) for mol in products]
nmols = len(rdmols)
# Place the molecules in a row of images
subimg_size = [width, height]
kwargs = {"legendFontSize": 16} # ,"legendFraction":0.1}
img_text = _MolsToGridSVG(rdmols, molsPerRow=nmols, subImgSize=subimg_size, **kwargs)
img_text = _correct_svg(img_text)
# Add + and =>
nreactants = len(reactants)
img_text = add_plus_signs_svg(img_text, width, height, nmols, nreactants)
img_text = add_arrow_svg(img_text, width, height, nreactants)
return img_text
def _get_reaction_image_pil(
reactants: Sequence[Molecule],
products: Sequence[Molecule],
fmt: str,
width: int = 200,
height: int = 100,
as_string: bool = True,
):
"""
Create a 2D reaction image from reactants and products (PLAMS molecules)
* ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
* ``products`` -- Iterable of PLAMS Molecule objects representing the products.
* Returns -- SVG image text file.
"""
from io import BytesIO
from PIL import Image
from PIL import ImageDraw
from rdkit import Chem
from rdkit.Chem.Draw import rdMolDraw2D, MolsToGridImage
def add_arrow_pil(img, width, height, nreactants):
"""
Add the arrow to the PIL image
"""
y1 = int(0.5 * height)
y2 = y1
x1 = int((nreactants * width) + (0.3 * width))
x2 = int((nreactants * width) + (0.7 * width))
# Draw a line
black = (0, 0, 0)
draw = ImageDraw.Draw(img)
draw.line(((x1, y1), (x2, y2)), fill=128)
# Draw the arrow head
headscale = 20
xshift = width // headscale
yshift = img.size[1] // headscale
p1 = (x2, y2 + yshift)
p2 = (x2, y2 - yshift)
p3 = (x2 + xshift, y2)
draw.polygon((p1, p2, p3), fill=black)
return img
def add_plus_signs_pil(img, width, height, nmols, nreactants):
"""
Add the lines with + signs to the SVG image
"""
black = (0, 0, 0)
I1 = ImageDraw.Draw(img)
y = int(0.5 * height)
# myfont = ImageFont.truetype('FreeMono.ttf', 25)
for i in range(nmols):
x = int(((i + 1) * width) - (0.05 * width))
if i + 1 in (nreactants, nreactants + 1):
continue
# I1.text((x, y), "+", font=myfont, fill=black)
I1.text((x, y), "+", fill=black)
return img
def join_pil_images(pil_images):
"""
Create a new image which connects the ones above with text
"""
white = (255, 255, 255)
widths = [img.width for img in pil_images]
width = sum(widths)
height = max([img.height for img in pil_images])
final_img = Image.new("RGB", (width, height), white)
# Concatenate the PIL images
for i, img in enumerate(pil_images):
pos = sum(widths[:i])
h = int((height - img.height) / 2)
final_img.paste(img, (pos, h))
return final_img
nreactants = len(reactants)
nmols = nreactants + len(products)
if not hasattr(rdMolDraw2D, "MolDraw2DCairo"):
# We are working with the old AMS version of RDKit
white = (255, 255, 255)
rimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(reactants)]
pimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(products)]
blanc = Image.new("RGB", (width, height), white)
# Get the image (with arrow)
nreactants = len(reactants)
all_images = rimages + [blanc] + pimages
img = join_pil_images(all_images)
else:
# We have a later version of RDKit that can regulate the font sizes
rdmols = [_rdmol_for_image(mol) for mol in reactants]
rdmols += [Chem.Mol()] # This is where the arrow will go
rdmols += [_rdmol_for_image(mol) for mol in products]
# Place the molecules in a row of images
subimg_size = [width, height]
kwargs = {"legendFontSize": 16} # ,"legendFraction":0.1}
img = MolsToGridImage(rdmols, molsPerRow=nmols + 1, subImgSize=subimg_size, **kwargs)
# Add + and =>
img = add_plus_signs_pil(img, width, height, nmols, nreactants)
img = add_arrow_pil(img, width, height, nreactants)
# Get the bytestring
img_text = img
if as_string:
buf = BytesIO()
img.save(buf, format=fmt)
img_text = buf.getvalue()
return img_text
def _correct_svg(image):
"""
Correct for a bug in the AMS rdkit created SVG file
"""
if not "svg:" in image:
return image
image = image.replace("svg:", "")
lines = image.split("\n")
for iline, line in enumerate(lines):
if "xmlns:svg=" in line:
lines[iline] = line.replace("xmlns:svg", "xmlns")
break
image = "\n".join(lines)
return image
def _presanitize(mol, rdmol):
"""
Change bonding and atom charges to avoid failed sanitization
Note: Used by to_rdmol
"""
from rdkit import Chem
mol = mol.copy()
for i in range(10):
try:
rdmol_test = copy.deepcopy(rdmol)
Chem.SanitizeMol(rdmol_test)
stored_exc = None
break
except ValueError as exc:
stored_exc = exc
text = repr(exc)
# Fix the problem
rdmol, bonds, charges = _kekulize(mol, rdmol, text)
# print ("REB bonds charges: ",bonds, charges)
# print (rdmol.Debug())
# rdmol = to_rdmol(mol, sanitize=False)
# print (rdmol.Debug())
if stored_exc is not None:
raise stored_exc
else:
rdmol = rdmol_test
return rdmol
def _kekulize(mol, rdmol, text, use_dfs=True):
"""
Kekulize the atoms indicated as problematic by RDKit
* ``mol`` - PLAMS molecule
* ``text`` - Sanitation error produced by RDKit
Note: Returns the changes in bond orders and atomic charges, that will make sanitation succeed.
"""
from rdkit import Chem
# Find the indices of the problematic atoms
indices = _find_aromatic_sequence(rdmol, text)
if indices is None:
return rdmol, {}, {}
# Set the bond orders along the chain to 2, 1, 2, 1,...
altered_bonds = {}
if len(indices) > 1:
emol = Chem.RWMol(rdmol)
if use_dfs:
# It may be better to create the tree first, and then start at a leaf
altered_bonds = _alter_aromatic_bonds(emol, indices[0])
indices = list(set([ind for tup in altered_bonds.keys() for ind in tup]))
else:
indices = _order_atom_indices(emol, indices)
altered_bonds = _alter_bonds_along_chain(emol, indices)
_adjust_atom_aromaticity(emol, altered_bonds)
rdmol = emol.GetMol()
# Adjust the PLAMS molecule as well
for (iat, jat), order in altered_bonds.items():
for bond in mol.atoms[iat].bonds:
if mol.index(bond.other_end(mol.atoms[iat])) - 1 == jat:
bond.order = order
break
# If the atom at the chain end has the wrong bond order, give it a charge.
altered_charge = {}
atom_indices, dangling_bonds = _get_charged_atoms(rdmol, indices)
if len(atom_indices) > 0:
iat = atom_indices[0]
ndangling = dangling_bonds[iat]
altered_charge = _guess_atomic_charge(iat, ndangling, rdmol, mol)
rdmol.GetAtomWithIdx(iat).SetFormalCharge(altered_charge[iat])
for k, v in altered_charge.items():
mol.atoms[k].properties.rdkit.charge = v
return rdmol, altered_bonds, altered_charge
def _find_aromatic_sequence(rdmol, text):
"""
Find the sequence of atoms with 1.5 bond orders
"""
indices = None
lines = text.split("\n")
line = lines[-1]
if "Unkekulized atoms:" in line:
text = line.split("Unkekulized atoms:")[-1].split("\\n")[0]
if '"' in text:
text = text.split('"')[0]
indices = [int(w) for w in text.split()]
if len(indices) > 1:
return indices
line = "atom %i marked aromatic" % (indices[0])
iat: Any
if "marked aromatic" in line:
iat = int(line.split("atom")[-1].split()[0])
indices = [iat]
while iat is not None:
icurrent = iat
iat = None
for bond in rdmol.GetAtomWithIdx(icurrent).GetBonds():
if str(bond.GetBondType()) == "AROMATIC":
iat = bond.GetOtherAtomIdx(icurrent)
if iat in indices:
iat = None
continue
indices.append(iat)
break
elif "Explicit valence for atom" in line:
iat = int(line.split("#")[-1].split()[0])
indices = [iat]
return indices
def _alter_aromatic_bonds(emol, iat, depth=0, double_first=False):
"""
Switch all thearomitic bonds to single/double, starting at iat
* ``emol`` -- RDKit EditableMol type, for which bond orders will be changed
* ``iat`` -- Starting point for depth first search
"""
from collections import OrderedDict
from rdkit import Chem
from scm.plams import PeriodicTable as PT
# Use OrderedDict, so that the leaves of the tree
# will be at the end
bonds_changed = OrderedDict()
at = emol.GetAtomWithIdx(iat)
valence = PT.get_connectors(at.GetAtomicNum())
bonds = at.GetBonds()
are_aromatic = [str(b.GetBondType()) == "AROMATIC" for b in bonds]
int_orders = sum([b.GetBondTypeAsDouble() for i, b in enumerate(bonds) if not are_aromatic[i]])
numbonds = len([b for i, b in enumerate(bonds) if are_aromatic[i]])
valence -= int(int_orders)
# Here I place the double bond first.
# I could also start with a single bond instead.
orders = [1 for i in range(numbonds)]
if valence > numbonds:
for i in range(min(numbonds, valence - numbonds)):
if double_first:
orders[i] = 2
else:
orders[numbonds - i - 1] = 2
for i, bond in enumerate(bonds):
jat = bond.GetOtherAtomIdx(iat)
if are_aromatic[i]:
pair = tuple(sorted([iat, jat]))
order = orders.pop(0)
bond.SetBondType(Chem.BondType(order))
bond.SetIsAromatic(False) # This is necessary with the newer RDKit
bonds_changed[pair] = order
d = _alter_aromatic_bonds(emol, jat, depth + 1)
bonds_changed.update(d)
return bonds_changed
def _alter_bonds_along_chain(emol, indices):
"""
Along the chain of atoms (indices), alternate double and single bonds
"""
from scm.plams import PeriodicTable as PT
from rdkit import Chem
if len(indices) > 1:
# The first bond order is set to 2, if aromic
# Else it is flipped
first_bond = emol.GetBondBetweenAtoms(indices[0], indices[1])
if str(first_bond.GetBondType()) == "AROMATIC":
new_order = 2
else:
new_order = first_bond.GetBondTypeAsDouble()
new_order = (new_order % 2) + 1
# Unless this breaks valence rules.
iat = indices[0]
at = emol.GetAtomWithIdx(iat)
valence = PT.get_connectors(at.GetAtomicNum())
orders = [b.GetBondTypeAsDouble() for b in at.GetBonds()]
jat = indices[1]
at_next = emol.GetAtomWithIdx(jat)
valence_next = PT.get_connectors(at_next.GetAtomicNum())
orders_next = [b.GetBondTypeAsDouble() for b in at_next.GetBonds()]
if sum(orders) > valence or sum(orders_next) > valence_next:
new_order = 1
# Set the bond orders along the chain to 2, 1, 2, 1,...
altered_bonds = {}
for i, iat in enumerate(indices[:-1]):
bond = emol.GetBondBetweenAtoms(iat, indices[i + 1])
bond.SetBondType(Chem.BondType(new_order))
bond.SetIsAromatic(False) # This is necessary with the newer RDKit versions
altered_bonds[iat, indices[i + 1]] = new_order
new_order = ((new_order) % 2) + 1
return altered_bonds
def _order_atom_indices(rdmol, indices):
"""
Order the atomic indices so that they are consecutive along a bonded chain
"""
# Order the indices, so that they are consecutive in the molecule
start = 0
for i, iat in enumerate(indices):
at = rdmol.GetAtomWithIdx(iat)
neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()]
relevant_neighbors = [jat for jat in neighbors if jat in indices]
if len(relevant_neighbors) == 1:
start = i
break
iat = indices[start]
atoms = [iat]
while 1:
at = rdmol.GetAtomWithIdx(iat)
neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()]
relevant_neighbors = [jat for jat in neighbors if jat in indices and not jat in atoms]
if len(relevant_neighbors) == 0:
break
iat = relevant_neighbors[0]
atoms.append(iat)
if len(atoms) < len(indices):
raise Exception("The unkekulized atoms are not in a consecutive chain")
indices = atoms
return indices
def _adjust_atom_aromaticity(emol, altered_bonds):
"""
Assign aromaticity to the atoms based on the new bond orders
"""
indices = list(set([ind for tup in altered_bonds.keys() for ind in tup]))
for ind in indices:
at = emol.GetAtomWithIdx(ind)
if not at.GetIsAromatic():
continue
# Only change this if we are sure the atom is not aromatic anymore
aromatic = False
for bond in at.GetBonds():
if str(bond.GetBondType()) == "AROMATIC":
aromatic = True
break
if not aromatic:
at.SetIsAromatic(False)
def _get_charged_atoms(rdmol, indices):
"""
Locate the atoms that need a charge
"""
from scm.plams import PeriodicTable as PT
atom_indices = []
dangling_bonds = {}
for iat in indices[::-1]:
at = rdmol.GetAtomWithIdx(iat)
valence = PT.get_connectors(at.GetAtomicNum())
bonds = at.GetBonds()
orders = [b.GetBondTypeAsDouble() for b in bonds]
ndangling = int(valence - sum(orders))
if ndangling != 0:
dangling_bonds[iat] = ndangling
atom_indices.append(iat)
return atom_indices, dangling_bonds
def _guess_atomic_charge(iat, ndangling, rdmol, mol):
"""
Guess the best atomic charge for atom iat
"""
# Get the total charge from atomic charges already set
charges = [at.GetFormalCharge() for at in rdmol.GetAtoms()]
totcharge = sum(charges) - charges[iat]
# Here I hope that the estimated charge will be more reliable than the
# actual (user defined) system charge, but am not sure
est_charges = mol.guess_atomic_charges(adjust_to_systemcharge=False, depth=0)
molcharge = int(sum(est_charges))
molcharge = molcharge - totcharge
# Adjust the sign based on the estimated charge
sign = ndangling / (abs(ndangling))
if molcharge != 0:
sign = molcharge / (abs(molcharge))
elif est_charges[iat] != 0:
sign = est_charges[iat] / abs(est_charges[iat])
# Then use the over/under valence with the new sign as the atomic charge
charge = int(sign * abs(ndangling))
# Perhaps we tried this already
if charge == charges[iat]:
charge = -charge
altered_charge = {iat: charge}
return altered_charge
def _rdmol_for_image(mol, remove_hydrogens=True):
"""
Convert PLAMS molecule to an RDKit molecule specifically for a 2D image
"""
from rdkit.Chem import AllChem
from rdkit.Chem import RemoveHs
rdmol = to_rdmol(mol, presanitize=True)
# Flatten the molecule
AllChem.Compute2DCoords(rdmol)
# Remove the Hs only if there are carbon atoms in this system
# Otherwise this will turn an OH radical into a water molecule.
carbons = [i for i, at in enumerate(mol.atoms) if at.symbol in ["C", "Si"]]
if remove_hydrogens and len(carbons) > 0:
rdmol = RemoveHs(rdmol)
else:
for atom in rdmol.GetAtoms():
atom.SetNoImplicit(True)
ids = [c.GetId() for c in rdmol.GetConformers()]
for cid in ids:
rdmol.RemoveConformer(cid)
return rdmol
def _MolsToGridSVG(
mols,
molsPerRow=3,
subImgSize=(200, 200),
legends=None,
highlightAtomLists=None,
highlightBondLists=None,
drawOptions=None,
**kwargs,
):
"""
Replaces the old version of this function in our RDKit for a more recent one, with more options
"""
from rdkit.Chem.Draw import rdMolDraw2D
if legends is None:
legends = [""] * len(mols)
nRows = len(mols) // molsPerRow
if len(mols) % molsPerRow:
nRows += 1
fullSize = (molsPerRow * subImgSize[0], nRows * subImgSize[1])
d2d = rdMolDraw2D.MolDraw2DSVG(fullSize[0], fullSize[1], subImgSize[0], subImgSize[1])
if drawOptions is not None:
d2d.SetDrawOptions(drawOptions)
else:
dops = d2d.drawOptions()
for k, v in list(kwargs.items()):
if hasattr(dops, k):
setattr(dops, k, v)
del kwargs[k]
d2d.DrawMolecules(
list(mols),
legends=legends or None,
highlightAtoms=highlightAtomLists or [],
highlightBonds=highlightBondLists or [],
**kwargs,
)
d2d.FinishDrawing()
res = d2d.GetDrawingText()
return res