from typing import (
List,
Literal,
Optional,
overload,
TYPE_CHECKING,
Sequence,
Dict,
Any,
Union,
Tuple,
IO,
Iterable,
Generator,
)
import random
import sys
import copy
from warnings import warn
from collections import OrderedDict
from scm.plams.core.functions import log, requires_optional_package
from scm.plams.mol.atom import Atom
from scm.plams.mol.bond import Bond
from scm.plams.mol.molecule import Molecule
from scm.plams.core.errors import PlamsError
try:
from scm.base import ChemicalSystem
_has_scm_chemsys = True
except ImportError:
_has_scm_chemsys = False
if TYPE_CHECKING:
from rdkit.Chem import Mol as RDKitMol
from rdkit.Chem import Atom as RDKitAtom
from rdkit.Chem import Bond as RDKitBond
from rdkit.Chem import EditableMol as RDKitEdMol
from rdkit.Chem.AllChem import ChemicalReaction as RDKitReaction
from rdkit.Chem import Draw
from PIL import Image
__all__ = [
"add_Hs",
"apply_reaction_smarts",
"apply_template",
"gen_coords_rdmol",
"get_backbone_atoms",
"modify_atom",
"to_rdmol",
"from_rdmol",
"from_sequence",
"from_smiles",
"from_smarts",
"to_smiles",
"partition_protein",
"readpdb",
"writepdb",
"get_substructure",
"get_conformations",
"yield_coords",
"canonicalize_mol",
"to_image",
"get_reaction_image",
]
[docs]@requires_optional_package("rdkit")
def from_rdmol(rdkit_mol: "RDKitMol", confid: int = -1, properties: bool = True) -> Molecule:
"""
Translate an RDKit molecule into a PLAMS molecule type.
RDKit properties will be unpickled if their name ends with '_pickled'.
:parameter rdkit_mol: RDKit molecule
:type rdkit_mol: rdkit.Chem.Mol
:parameter int confid: conformer identifier from which to take coordinates
:parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format.
:return: a PLAMS molecule
:rtype: |Molecule|
"""
from rdkit import Chem
if isinstance(rdkit_mol, Molecule):
return rdkit_mol
# Create PLAMS molecule
plams_mol = Molecule()
total_charge = 0
try:
Chem.Kekulize(rdkit_mol)
except Exception:
pass
conf = rdkit_mol.GetConformer(id=confid)
# Add atoms and assign properties to the PLAMS atom if *properties* = True
for rd_atom in rdkit_mol.GetAtoms():
pos = conf.GetAtomPosition(rd_atom.GetIdx())
ch = rd_atom.GetFormalCharge()
pl_atom = Atom(rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), rdkit={"charge": ch})
if properties and rd_atom.GetPDBResidueInfo():
pl_atom.properties.rdkit.pdb_info = get_PDBResidueInfo(rd_atom)
plams_mol.add_atom(pl_atom)
total_charge += ch
# Check for R/S information
stereo = str(rd_atom.GetChiralTag())
if stereo == "CHI_TETRAHEDRAL_CCW":
pl_atom.properties.rdkit.stereo = "counter-clockwise"
elif stereo == "CHI_TETRAHEDRAL_CW":
pl_atom.properties.rdkit.stereo = "clockwise"
# Add bonds to the PLAMS molecule
for bond in rdkit_mol.GetBonds():
at1 = plams_mol.atoms[bond.GetBeginAtomIdx()]
at2 = plams_mol.atoms[bond.GetEndAtomIdx()]
plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble()))
# Check for cis/trans information
stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir())
if stereo == "STEREOZ" or stereo == "STEREOCIS":
plams_mol.bonds[-1].properties.rdkit.stereo = "Z"
elif stereo == "STEREOE" or stereo == "STEREOTRANS":
plams_mol.bonds[-1].properties.rdkit.stereo = "E"
elif bond_dir == "ENDUPRIGHT":
plams_mol.bonds[-1].properties.rdkit.stereo = "up"
elif bond_dir == "ENDDOWNRIGHT":
plams_mol.bonds[-1].properties.rdkit.stereo = "down"
# Set charge and assign properties to PLAMS molecule and bonds if *properties* = True
plams_mol.properties.charge = total_charge
if properties:
prop_from_rdmol(plams_mol, rdkit_mol)
for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol):
prop_from_rdmol(plams_atom, rd_atom)
for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds):
prop_from_rdmol(plams_bond, rd_bond)
return plams_mol
[docs]@requires_optional_package("rdkit")
def to_rdmol(
plams_mol: Molecule,
sanitize: bool = True,
properties: bool = True,
assignChirality: bool = False,
presanitize: bool = False,
) -> "RDKitMol":
"""
Translate a PLAMS molecule into an RDKit molecule type.
PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats,
integers, floats nor strings, the resulting property names are appended with '_pickled'.
:parameter plams_mol: A PLAMS molecule
:parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization
:parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format.
:parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule.
:parameter bool presanitize: Iteratively adjust bonding and atomic charges, to avoid failure of sanitization.
Only relevant is sanitize is set to True.
:type plams_mol: |Molecule|
:return: an RDKit molecule
:rtype: rdkit.Chem.Mol
"""
from rdkit import Chem, Geometry
if isinstance(plams_mol, Chem.Mol):
return plams_mol
# Create rdkit molecule
e = Chem.EditableMol(Chem.Mol())
# Add atoms and assign properties to the RDKit atom if *properties* = True
for pl_atom in plams_mol.atoms:
rd_atom = Chem.Atom(int(pl_atom.atnum))
if "rdkit" in pl_atom.properties:
if "charge" in pl_atom.properties.rdkit:
rd_atom.SetFormalCharge(pl_atom.properties.rdkit.charge)
if properties:
if "rdkit" in pl_atom.properties:
if "pdb_info" in pl_atom.properties.rdkit:
set_PDBresidueInfo(rd_atom, pl_atom.properties.rdkit.pdb_info)
for prop in pl_atom.properties.rdkit:
if prop not in ("charge", "pdb_info", "stereo"):
prop_to_rdmol(rd_atom, prop, pl_atom.properties.rdkit.get(prop))
prop_dic = {}
for prop in pl_atom.properties:
if prop != "rdkit":
prop_dic[prop] = pl_atom.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rd_atom, "plams", prop_dic)
# Check for R/S information
if pl_atom.properties.rdkit.stereo:
stereo = pl_atom.properties.rdkit.stereo.lower()
if stereo == "counter-clockwise":
rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW)
elif stereo == "clockwise":
rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
e.AddAtom(rd_atom)
# Mapping of PLAMS bond orders to RDKit bond types:
def plams_to_rd_bonds(bo: float) -> int:
if 1.4 < bo < 1.6:
return 12 # bond type for aromatic bond
else:
return int(bo)
# Add bonds to the RDKit molecule
for bond in plams_mol.bonds:
a1 = plams_mol.atoms.index(bond.atom1)
a2 = plams_mol.atoms.index(bond.atom2)
e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order)))
rdmol = e.GetMol()
# Check for cis/trans information
for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
if pl_bond.properties.rdkit.stereo:
stereo = pl_bond.properties.rdkit.stereo.lower()
if stereo == "e" or stereo == "trans":
rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE)
elif stereo == "z" or stereo == "cis":
rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ)
elif stereo == "up":
rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT)
elif stereo == "down":
rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT)
# Assign properties to RDKit molecule and bonds if *properties* = True
# All properties will be taken from 'rdkit' subsettings, except the molecular charge
if properties:
prop_dic = {}
for prop in plams_mol.properties:
if prop == "rdkit":
for rdprop in plams_mol.properties.rdkit:
prop_to_rdmol(rdmol, rdprop, plams_mol.properties.rdkit.get(rdprop))
else:
# prop_dic[prop] = {'plams':plams_mol.properties.get(prop)}
prop_dic[prop] = plams_mol.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rdmol, "plams", prop_dic)
prop_dic = {}
for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
for prop in pl_bond.properties:
if prop == "rdkit":
for rdprop in pl_bond.properties.rdkit:
if rdprop != "stereo":
prop_to_rdmol(rd_bond, rdprop, pl_bond.properties.rdkit.get(rdprop))
else:
prop_dic[prop] = pl_bond.properties.get(prop)
if len(prop_dic) > 0:
prop_to_rdmol(rd_bond, "plams", prop_dic)
if sanitize:
try:
if presanitize:
rdmol = _presanitize(plams_mol, rdmol)
else:
Chem.SanitizeMol(rdmol)
except ValueError as exc:
log("RDKit Sanitization Error.")
text = "Most likely this is a problem with the assigned bond orders: "
text += "Use chemical insight to adjust them."
log(text)
log("Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.")
raise exc
conf = Chem.Conformer()
for i, atom in enumerate(plams_mol.atoms):
xyz = Geometry.Point3D(atom.x, atom.y, atom.z)
conf.SetAtomPosition(i, xyz)
rdmol.AddConformer(conf)
# REB: Assign all stereochemistry, if it wasn't already there
if assignChirality:
Chem.rdmolops.AssignAtomChiralTagsFromStructure(rdmol, confId=conf.GetId(), replaceExistingTags=False)
try:
Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False)
except AttributeError:
pass
return rdmol
[docs]@requires_optional_package("rdkit")
def to_smiles(plams_mol: Molecule, short_smiles: bool = True, **kwargs: Any) -> str:
"""
Returns the RDKit-generated SMILES string of a PLAMS molecule.
Note: SMILES strings are generated based on the molecule's connectivity. If the input PLAMS molecule does not contain any bonds, "guessed bonds" will be used.
:parameter plams_mol: A PLAMS |Molecule|
:parameter bool short_smiles: whether or not to use some RDKit sanitization to get shorter smiles (e.g. for a water molecule, short_smiles=True -> "O", short_smiles=False -> [H]O[H])
:parameter kwargs: With 'kwargs' you can provide extra optional parameters to the rdkit.Chem method 'MolToSmiles'. See the rdkit documentation for more info.
:return: the SMILES string
"""
from rdkit import Chem
if len(plams_mol.bonds) > 0:
mol_with_bonds = plams_mol
else:
mol_with_bonds = plams_mol.copy()
mol_with_bonds.guess_bonds()
rd_mol = to_rdmol(mol_with_bonds, sanitize=False)
# This sanitization black magic is needed for getting the "short, nice and clean" SMILES string.
# Without this, the SMILES string for water would be "[H]O[H]". With this is just "O"
if short_smiles:
s = Chem.rdmolops.SanitizeFlags
rdkitSanitizeOptions = (
s.SANITIZE_ADJUSTHS
or s.SANITIZE_CLEANUP
or s.SANITIZE_CLEANUPCHIRALITY
or s.SANITIZE_FINDRADICALS
or s.SANITIZE_PROPERTIES
or s.SANITIZE_SETAROMATICITY
or s.SANITIZE_SETCONJUGATION
or s.SANITIZE_SETHYBRIDIZATION
or s.SANITIZE_SYMMRINGS
)
Chem.rdmolops.AssignRadicals(rd_mol)
rd_mol = Chem.rdmolops.RemoveHs(rd_mol, updateExplicitCount=True, sanitize=False)
Chem.rdmolops.SanitizeMol(rd_mol, rdkitSanitizeOptions)
smiles = Chem.MolToSmiles(rd_mol, **kwargs)
return smiles
pdb_residue_info_items = [
"AltLoc",
"ChainId",
"InsertionCode",
"IsHeteroAtom",
"Name",
"Occupancy",
"ResidueName",
"ResidueNumber",
"SecondaryStructure",
"SegmentNumber",
"SerialNumber",
"TempFactor",
]
# 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized
def get_PDBResidueInfo(rdkit_atom: "RDKitAtom") -> Dict[str, Any]:
pdb_info = {}
for item in pdb_residue_info_items:
get_function = "Get" + item
pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)()
return pdb_info
@requires_optional_package("rdkit")
def set_PDBresidueInfo(rdkit_atom: "RDKitAtom", pdb_info: Dict[str, Any]) -> None:
from rdkit import Chem
atom_pdb_residue_info = Chem.AtomPDBResidueInfo()
for item, value in pdb_info.items():
set_function = "Set" + item
atom_pdb_residue_info.__getattribute__(set_function)(value)
rdkit_atom.SetMonomerInfo(atom_pdb_residue_info)
def prop_to_rdmol(rd_obj: Union["RDKitMol", "RDKitAtom", "RDKitBond"], propkey: str, propvalue: Any) -> None:
"""
Convert a single PLAMS property into an RDKit property.
:paramter pl_obj: A PLAMS object.
:type pl_obj: |Molecule|, |Atom| or |Bond|.
:parameter rd_obj: An RDKit object.
:type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
:parameter str propkey: The |Settings| key of the PLAMS property.
"""
try:
import dill as pickle
except ImportError:
import pickle
obj = type(propvalue)
obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp}
if obj_dict.get(obj):
obj_dict[obj](propkey, propvalue) # type: ignore[operator]
else:
name = propkey + "_pickled"
try:
rd_obj.SetProp(name, pickle.dumps(propvalue, 0).decode())
except (Exception, pickle.PicklingError):
pass
# ToDo: remove type ignore once mypy_path is enabled
@overload
def prop_from_rdmol(pl_obj: Bond, rd_obj: "RDKitBond") -> None: ...
@overload
def prop_from_rdmol(pl_obj: Atom, rd_obj: "RDKitAtom") -> None: ...
@overload
def prop_from_rdmol(pl_obj: Molecule, rd_obj: "RDKitMol") -> None: ...
def prop_from_rdmol(pl_obj: Union[Molecule, Atom, Bond], rd_obj: Union["RDKitMol", "RDKitAtom", "RDKitBond"]) -> None:
"""
Convert one or more RDKit properties into PLAMS properties.
:paramter pl_obj: A PLAMS object.
:type pl_obj: |Molecule|, |Atom| or |Bond|.
:parameter rd_obj: An RDKit object.
:type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
"""
try:
import dill as pickle
except ImportError:
import pickle
prop_dict = rd_obj.GetPropsAsDict()
for propname in prop_dict.keys():
if propname == "plams_pickled":
plams_props = pickle.loads(prop_dict[propname].encode())
if not isinstance(plams_props, dict):
raise Exception("PLAMS property not properly stored in RDKit")
for key, value in plams_props.items():
pl_obj.properties[key] = value
else:
if propname == "__computedProps":
continue
if "_pickled" not in propname:
pl_obj.properties.rdkit[propname] = prop_dict[propname]
else:
prop = prop_dict[propname]
propname = propname.rsplit("_pickled", 1)[0]
propvalue = pickle.loads(prop.encode())
pl_obj.properties.rdkit[propname] = propvalue
@overload
def from_smiles(
smiles: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Molecule: ...
@overload
def from_smiles(
smiles: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Union[Molecule, List[Molecule]]: ...
[docs]@requires_optional_package("rdkit")
def from_smiles(
smiles: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
) -> Union[Molecule, List[Molecule]]:
"""
Generates PLAMS molecule(s) from a smiles strings.
:parameter str smiles: A smiles string
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization
and ranking of conformations. The default value None results in skipping of the
geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for
removing similar/equivalent conformations
:return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
smiles = str(smiles.split()[0])
smiles = Chem.CanonSmiles(smiles)
rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
rdkit_mol.SetProp("smiles", smiles)
return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
@overload
def from_smarts(
smarts: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Molecule: ...
@overload
def from_smarts(
smarts: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Union[Molecule, List[Molecule]]: ...
[docs]@requires_optional_package("rdkit")
def from_smarts(
smarts: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
) -> Union[Molecule, List[Molecule]]:
"""
Generates PLAMS molecule(s) from a smarts strings.
This allows for example to define hydrogens explicitly.
However it is less suitable for aromatic molecules (use from_smiles in that case).
:parameter str smarts: A smarts string
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
optimization and ranking of comformations. The default value None results
in skipping of the geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for removing
similar/equivalent conformations.
:return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
smiles = str(smarts.split()[0])
mol = Chem.MolFromSmarts(smiles)
Chem.SanitizeMol(mol)
molecule = Chem.AddHs(mol)
molecule.SetProp("smiles", smiles)
return get_conformations(molecule, nconfs, name, forcefield, rms)
@overload
@requires_optional_package("rdkit")
def get_conformations(
mol: Union[Molecule, "RDKitMol"],
nconfs: Literal[1] = 1,
name: Optional[str] = None,
forcefield: Optional[str] = None,
rms: float = -1,
enforceChirality: bool = False,
useExpTorsionAnglePrefs: str = "default",
constraint_ats: Optional[List[int]] = None,
EmbedParameters: str = "EmbedParameters",
randomSeed: int = 1,
best_rms: float = -1,
) -> Molecule: ...
@overload
@requires_optional_package("rdkit")
def get_conformations(
mol: Union[Molecule, "RDKitMol"],
nconfs: int = ...,
name: Optional[str] = None,
forcefield: Optional[str] = None,
rms: float = -1,
enforceChirality: bool = False,
useExpTorsionAnglePrefs: str = "default",
constraint_ats: Optional[List[int]] = None,
EmbedParameters: str = "EmbedParameters",
randomSeed: int = 1,
best_rms: float = -1,
) -> Union[List[Molecule], Molecule]: ...
@overload
@requires_optional_package("rdkit")
def from_sequence(
sequence: str,
nconfs: Literal[1] = 1,
name: Optional[str] = None,
forcefield: Optional[str] = None,
rms: float = 0.1,
) -> Molecule: ...
@overload
@requires_optional_package("rdkit")
def from_sequence(
sequence: str, nconfs: int, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
) -> Union[List[Molecule], Molecule]: ...
[docs]@requires_optional_package("rdkit")
def from_sequence(
sequence: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
) -> Union[List[Molecule], Molecule]:
"""
Generates PLAMS molecule from a peptide sequence.
Includes explicit hydrogens and 3D coordinates.
:parameter str sequence: A peptide sequence, e.g. 'HAG'
:parameter int nconfs: Number of conformers to be generated
:parameter str name: A name for the molecule
:parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
optimization and ranking of comformations. The default value None results
in skipping of the geometry optimization step.
:parameter float rms: Root Mean Square deviation threshold for removing
similar/equivalent conformations.
:return: A peptide molecule with hydrogens and 3D coordinates
or a list of molecules if nconfs > 1
:rtype: |Molecule| or list of PLAMS Molecules
"""
from rdkit import Chem
rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence))
rdkit_mol.SetProp("sequence", sequence)
return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
@requires_optional_package("rdkit")
def calc_rmsd(mol1: Molecule, mol2: Molecule) -> float:
"""
Superimpose two molecules and calculate the root-mean-squared deviations of
the atomic positions.
The molecules should be identical, but the ordering of the atoms may differ.
:param mol1: Molecule 1
:param mol2: Molecule 2
:return: The rmsd after superposition
:rtype: float
"""
from rdkit.Chem import AllChem
rdkit_mol1 = to_rdmol(mol1)
rdkit_mol2 = to_rdmol(mol2)
try:
return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2) # type: ignore[attr-defined]
except:
return -999
[docs]@requires_optional_package("rdkit")
def modify_atom(mol: Molecule, idx: int, element: str) -> Molecule:
"""
Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter int idx: index of the atom to be modified
:parameter str element:
:return: Molecule with new element and possibly added or removed hydrogens
:rtype: |Molecule|
"""
from rdkit import Chem
rdmol = to_rdmol(mol)
if rdmol.GetAtomWithIdx(idx).GetSymbol() == element:
return mol
else:
e = Chem.EditableMol(rdmol)
for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()):
if neighbor.GetSymbol() == "H":
e.RemoveAtom(neighbor.GetIdx())
e.ReplaceAtom(idx - 1, Chem.Atom(element))
newmol = e.GetMol()
Chem.SanitizeMol(newmol)
newmol = Chem.AddHs(newmol, addCoords=True)
return from_rdmol(newmol)
[docs]@requires_optional_package("rdkit")
def apply_template(mol: Molecule, template: str) -> Molecule:
"""
Modifies bond orders in PLAMS molecule according template smiles structure.
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter str template: smiles string defining the correct chemical structure
:return: Molecule with correct chemical structure and provided 3D coordinates
:rtype: |Molecule|
"""
from rdkit import Chem
rdmol = to_rdmol(mol, sanitize=False)
template_mol = Chem.AddHs(Chem.MolFromSmiles(template))
newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol)
return from_rdmol(newmol)
@overload
@requires_optional_package("rdkit")
def apply_reaction_smarts(
mol: Union[Molecule, "RDKitMol"],
reaction_smarts: str,
complete: bool = False,
forcefield: Optional[str] = None,
return_rdmol: Literal[False] = False,
) -> Molecule: ...
@overload
@requires_optional_package("rdkit")
def apply_reaction_smarts(
mol: Union[Molecule, "RDKitMol"],
reaction_smarts: str,
complete: bool = False,
forcefield: Optional[str] = None,
return_rdmol: Literal[True] = ...,
) -> "RDKitMol": ...
@overload
@requires_optional_package("rdkit")
def apply_reaction_smarts(
mol: Union[Molecule, "RDKitMol"],
reaction_smarts: str,
complete: bool = False,
forcefield: Optional[str] = None,
return_rdmol: bool = ...,
) -> Union[Molecule, "RDKitMol"]: ...
[docs]@requires_optional_package("rdkit")
def apply_reaction_smarts(
mol: Union[Molecule, "RDKitMol"],
reaction_smarts: str,
complete: bool = False,
forcefield: Optional[str] = None,
return_rdmol: bool = False,
) -> Union[Molecule, "RDKitMol"]:
"""
Applies reaction smirks and returns product.
If returned as a PLAMS molecule, thismolecule.properties.orig_atoms
is a list of indices of atoms that have not been changed
(which can for example be used partially optimize new atoms only with the freeze keyword)
:parameter mol: molecule to be modified
:type mol: |Molecule| or rdkit.Chem.Mol
:parameter str reactions_smarts: Reactions smarts to be applied to molecule
:parameter complete: Apply reaction until no further changes occur or given
fraction of reaction centers have been modified
:type complete: bool or float (value between 0 and 1)
:parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based
geometry optimization of product structures.
:type forcefield: str
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: (product molecule, list of unchanged atoms)
:rtype: (|Molecule|, list of int)
"""
from rdkit import Chem
from rdkit.Chem import AllChem
def react(reactant: "RDKitMol", reaction: "RDKitReaction") -> List[Tuple["RDKitMol", Iterable[int]]]:
"""Apply reaction to reactant and return products"""
ps = reaction.RunReactants([reactant])
# if reaction doesn't apply, return the reactant
if len(ps) == 0:
return [(reactant, range(reactant.GetNumAtoms()))]
full = len(ps)
while complete: # when complete is True
# apply reaction until no further changes
r = random.randint(0, len(ps) - 1)
reactant = ps[r][0]
ps = reaction.RunReactants([reactant])
if len(ps) == 0 or len(ps) / full < (1 - complete):
ps = [[reactant]]
break
# add hydrogens and generate coordinates for new atoms
products: List[Tuple["RDKitMol", Iterable[int]]] = []
for p in ps[0]:
Chem.SanitizeMol(p)
q = Chem.AddHs(p)
Chem.SanitizeMol(q)
u = gen_coords_rdmol(q) # These are the atoms that have not changed
products.append((q, u))
return products
mol = to_rdmol(mol)
reaction = AllChem.ReactionFromSmarts(reaction_smarts) # type: ignore[attr-defined]
# RDKit removes fragments that are disconnected from the reaction center
# In order to keep these, the molecule is first split in separate fragments
# and the results, including non-reacting parts, are re-combined afterwards
frags = Chem.GetMolFrags(mol, asMols=True)
product = Chem.Mol()
unchanged = [] # List of atoms that have not changed
for frag in frags:
for p, u in react(frag, reaction):
unchanged += [product.GetNumAtoms() + i for i in u]
product = Chem.CombineMols(product, p)
if forcefield:
optimize_coordinates(product, forcefield, fixed=unchanged)
# The molecule is returned together with a list of atom indices of the atoms
# that are identical to those
# in the reactants. This list can be used in subsequent partial optimization of the molecule
if not return_rdmol:
product_mol = from_rdmol(product)
product_mol.properties.orig_atoms = [a + 1 for a in unchanged]
return product_mol
return product
def gen_coords(plamsmol: Molecule) -> List[int]:
"""Calculate 3D positions only for atoms without coordinates"""
rdmol = to_rdmol(plamsmol)
unchanged = gen_coords_rdmol(rdmol)
conf = rdmol.GetConformer()
for a in range(len(plamsmol.atoms)):
pos = conf.GetAtomPosition(a)
atom = plamsmol.atoms[a]
atom._setx(pos.x) # type: ignore[attr-defined]
atom._sety(pos.y) # type: ignore[attr-defined]
atom._setz(pos.z) # type: ignore[attr-defined]
return [a + 1 for a in unchanged]
@requires_optional_package("rdkit")
def gen_coords_rdmol(rdmol: "RDKitMol") -> List[int]:
from rdkit.Chem import AllChem
ref = rdmol.__copy__()
conf = rdmol.GetConformer()
coordDict = {}
unchanged: List[int] = []
maps = []
# Put known coordinates in coordDict
for i in range(rdmol.GetNumAtoms()):
pos = conf.GetAtomPosition(i)
if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and (-0.0001 < pos.z < 0.0001):
continue # atom without coordinates
coordDict[i] = pos
unchanged.append(i)
maps.append((i, i))
# compute coordinates for new atoms, keeping known coordinates
rms = 1
rs = 1
# repeat embedding and alignment until the rms of mapped atoms is sufficiently small
if rdmol.GetNumAtoms() > len(maps):
while rms > 0.1:
AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True) # type: ignore[attr-defined]
# align new molecule to original coordinates
rms = AllChem.AlignMol(rdmol, ref, atomMap=maps) # type: ignore[attr-defined]
rs += 1
return unchanged
@requires_optional_package("rdkit")
def optimize_coordinates(rdkit_mol: "RDKitMol", forcefield: str, fixed: Sequence = []) -> None:
from rdkit import Chem
from rdkit.Chem import AllChem
def MMFFminimize() -> None:
ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol)) # type: ignore[attr-defined]
for f in fixed:
ff.AddFixedPoint(f)
try:
ff.Minimize()
except:
warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))
def UFFminimize() -> None:
ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True) # type: ignore[attr-defined]
for f in fixed:
ff.AddFixedPoint(f)
try:
ff.Minimize()
except:
warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))
optimize_molecule = {"uff": UFFminimize, "mmff": MMFFminimize}[forcefield]
Chem.SanitizeMol(rdkit_mol)
optimize_molecule()
return
@requires_optional_package("rdkit")
def write_molblock(plams_mol: Molecule, file: IO = sys.stdout) -> None:
from rdkit import Chem
file.write(Chem.MolToMolBlock(to_rdmol(plams_mol)))
@overload
@requires_optional_package("rdkit")
def readpdb(
pdb_file: Union[str, IO],
sanitize: bool = True,
removeHs: bool = False,
proximityBonding: bool = False,
return_rdmol: Literal[False] = False,
) -> Molecule: ...
@overload
@requires_optional_package("rdkit")
def readpdb(
pdb_file: Union[str, IO],
sanitize: bool = True,
removeHs: bool = False,
proximityBonding: bool = False,
return_rdmol: Literal[True] = ...,
) -> "RDKitMol": ...
@overload
@requires_optional_package("rdkit")
def readpdb(
pdb_file: Union[str, IO],
sanitize: bool = True,
removeHs: bool = False,
proximityBonding: bool = False,
return_rdmol: bool = ...,
) -> Union["RDKitMol", Molecule]: ...
[docs]@requires_optional_package("rdkit")
def readpdb(
pdb_file: Union[str, IO],
sanitize: bool = True,
removeHs: bool = False,
proximityBonding: bool = False,
return_rdmol: bool = False,
) -> Union["RDKitMol", Molecule]:
"""
Generate a molecule from a PDB file
:param pdb_file: The PDB file to read
:type pdb_file: path- or file-like
:param bool sanitize:
:param bool removeHs: Hydrogens are removed if True
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: The molecule
:rtype: |Molecule| or rdkit.Chem.Mol
"""
from rdkit import Chem
if isinstance(pdb_file, str):
with open(pdb_file) as f:
contents = f.read()
else:
contents = pdb_file.read()
pdb_mol = Chem.MolFromPDBBlock(contents, sanitize=sanitize, removeHs=removeHs)
return pdb_mol if return_rdmol else from_rdmol(pdb_mol)
[docs]@requires_optional_package("rdkit")
def writepdb(mol: Union[Molecule, "RDKitMol"], pdb_file: Union[str, IO] = sys.stdout) -> None:
"""
Write a PDB file from a molecule
:parameter mol: molecule to be exported to PDB
:type mol: |Molecule| or rdkit.Chem.Mol
:param pdb_file: The PDB file to write to, or a filename
:type pdb_file: path- or file-like
"""
from rdkit import Chem
mol = to_rdmol(mol, sanitize=False)
if isinstance(pdb_file, str):
with open(pdb_file, "w") as f:
f.write(Chem.MolToPDBBlock(mol))
else:
pdb_file.write(Chem.MolToPDBBlock(mol))
@overload
@requires_optional_package("rdkit")
def add_Hs(
mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: Literal[False] = False
) -> Molecule: ...
@overload
@requires_optional_package("rdkit")
def add_Hs(
mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: Literal[True] = ...
) -> "RDKitMol": ...
@overload
@requires_optional_package("rdkit")
def add_Hs(
mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: bool = ...
) -> Union[Molecule, "RDKitMol"]: ...
[docs]@requires_optional_package("rdkit")
def add_Hs(
mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: bool = False
) -> Union[Molecule, "RDKitMol"]:
"""
Add hydrogens to protein molecules read from PDB.
Makes sure that the hydrogens get the correct PDBResidue info.
:param mol: Molecule to be protonated
:type mol: |Molecule| or rdkit.Chem.Mol
:param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based
geometry optimization on new atoms.
:param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
:return: A molecule with explicit hydrogens added
:rtype: |Molecule| or rdkit.Chem.Mol
"""
from rdkit import Chem
mol = to_rdmol(mol)
retmol = Chem.AddHs(mol)
for atom in retmol.GetAtoms():
if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H":
bond = atom.GetBonds()[0]
if bond.GetBeginAtom().GetIdx() == atom.GetIdx:
connected_atom = bond.GetEndAtom()
else:
connected_atom = bond.GetBeginAtom()
try:
ResInfo = connected_atom.GetPDBResidueInfo()
if ResInfo is None:
continue # Segmentation faults are raised if ResInfo is None
atom.SetMonomerInfo(ResInfo)
atomname = "H" + atom.GetPDBResidueInfo().GetName()[1:]
atom.GetPDBResidueInfo().SetName(atomname)
except:
pass
unchanged = gen_coords_rdmol(retmol)
if forcefield:
optimize_coordinates(retmol, forcefield, fixed=unchanged)
return retmol if return_rdmol else from_rdmol(retmol)
@requires_optional_package("rdkit")
def add_fragment(
rwmol: "RDKitMol",
frag: "RDKitMol",
rwmol_atom_idx: Optional[int] = None,
frag_atom_idx: Optional[int] = None,
bond_order: Optional[int] = None,
) -> None:
from rdkit import Chem
molconf = rwmol.GetConformer()
fragconf = frag.GetConformer()
new_indices = []
for a in frag.GetAtoms():
new_index = rwmol.AddAtom(a)
new_indices.append(new_index)
molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx()))
for b in frag.GetBonds():
ba = b.GetBeginAtomIdx()
ea = b.GetEndAtomIdx()
rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType())
if bond_order and rwmol_atom_idx and frag_atom_idx:
rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order])
rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0)
@requires_optional_package("rdkit")
def get_fragment(
mol: "RDKitMol", indices: Sequence[int], incl_expl_Hs: bool = True, neutralize: bool = True
) -> "RDKitMol":
from rdkit import Chem
molconf = mol.GetConformer()
fragment = Chem.RWMol(Chem.Mol())
fragconf = Chem.Conformer()
# Put atoms in fragment
for i in indices:
atom = mol.GetAtomWithIdx(i)
new_index = fragment.AddAtom(atom)
pos = molconf.GetAtomPosition(i)
fragconf.SetAtomPosition(new_index, pos)
# Put bonds in fragment
for b in mol.GetBonds():
ba = b.GetBeginAtomIdx()
ea = b.GetEndAtomIdx()
if ba in indices and ea in indices:
fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType())
continue
if not incl_expl_Hs:
continue
if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == "H":
hi = fragment.AddAtom(mol.GetAtomWithIdx(ea))
fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea))
fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE)
continue
if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == "H":
hi = fragment.AddAtom(mol.GetAtomWithIdx(ba))
fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba))
fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE)
ret_frag = fragment.GetMol()
Chem.SanitizeMol(ret_frag)
if neutralize:
for atom in ret_frag.GetAtoms():
nrad = atom.GetNumRadicalElectrons()
if nrad > 0:
atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad)
atom.SetNumRadicalElectrons(0)
Chem.SanitizeMol(ret_frag)
ret_frag.AddConformer(fragconf)
return ret_frag
@overload
@requires_optional_package("rdkit")
def partition_protein(
mol: Union[Molecule, "RDKitMol"],
residue_bonds: Optional[Iterable[Tuple[int, int]]] = None,
split_heteroatoms: bool = True,
return_rdmol: Literal[False] = False,
) -> Tuple[List[Molecule], List[Molecule]]: ...
@overload
@requires_optional_package("rdkit")
def partition_protein(
mol: Union[Molecule, "RDKitMol"],
residue_bonds: Optional[Iterable[Tuple[int, int]]] = None,
split_heteroatoms: bool = True,
return_rdmol: Literal[True] = ...,
) -> Tuple[List["RDKitMol"], List["RDKitMol"]]: ...
@overload
@requires_optional_package("rdkit")
def partition_protein(
mol: Union[Molecule, "RDKitMol"],
residue_bonds: Optional[Iterable[Tuple[int, int]]] = None,
split_heteroatoms: bool = True,
return_rdmol: bool = ...,
) -> Tuple[List[Union[Molecule, "RDKitMol"]], List[Union[Molecule, "RDKitMol"]]]: ...
[docs]@requires_optional_package("rdkit")
def partition_protein(
mol: Union[Molecule, "RDKitMol"],
residue_bonds: Optional[Iterable[Tuple[int, int]]] = None,
split_heteroatoms: bool = True,
return_rdmol: bool = False,
) -> Tuple[Union[List[Molecule], List["RDKitMol"]], Union[List[Molecule], List["RDKitMol"]]]:
"""
Splits a protein molecule into capped amino acid fragments and caps.
:param mol: A protein molecule
:type mol: |Molecule| or rdkit.Chem.Mol
:param tuple residue_bonds: a tuple of pairs of residue number indicating which
peptide bonds to split. If none, split all peptide bonds.
:param bool split_heteroatoms: if True, all bonds between a heteroatom and
a non-heteroatom across residues are removed
:return: list of fragments, list of caps
"""
from rdkit import Chem
mol = to_rdmol(mol)
caps = []
em = Chem.RWMol(mol)
if split_heteroatoms:
for bond in mol.GetBonds():
resinfa = bond.GetBeginAtom().GetPDBResidueInfo()
resinfb = bond.GetEndAtom().GetPDBResidueInfo()
if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom():
if resinfa.GetResidueNumber() != resinfb.GetResidueNumber():
em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
# Split peptide bonds
pept_bond = Chem.MolFromSmarts("[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)")
for match in mol.GetSubstructMatches(pept_bond):
if residue_bonds:
resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber()
resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber()
if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds:
continue
cap = get_fragment(mol, match[0:5])
cap = add_Hs(cap, return_rdmol=True)
caps.append(cap if return_rdmol else from_rdmol(cap))
cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4][CX3]=O"))
cap_o = get_fragment(cap, cap_o_ind, neutralize=False)
cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("O=[CX3][NX3][C;X4]"))[2:]
cap_n = get_fragment(cap, cap_n_ind, neutralize=False)
em.RemoveBond(match[1], match[3])
add_fragment(em, cap_o, match[3], 1, 1)
add_fragment(em, cap_n, match[1], 0, 1)
# Split disulfide bonds
ss_bond = Chem.MolFromSmarts("[C;X4;H1,H2]SS[C;X4;H1,H2]")
for match in mol.GetSubstructMatches(ss_bond):
cap = get_fragment(mol, match[0:5])
cap = add_Hs(cap, return_rdmol=True)
caps.append(cap if return_rdmol else from_rdmol(cap))
cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4]SS[C;X4]"))
cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False)
cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False)
em.RemoveBond(match[1], match[2])
add_fragment(em, cap_s1, match[2], 1, 1)
add_fragment(em, cap_s2, match[1], 0, 1)
frags: List["RDKitMol"] = [f for f in Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False)]
if not return_rdmol:
return [from_rdmol(frag) for frag in frags], caps
return frags, caps
@overload
@requires_optional_package("rdkit")
def charge_AAs(mol: Molecule, return_rdmol: Literal[False] = False) -> Molecule: ...
@overload
@requires_optional_package("rdkit")
def charge_AAs(mol: Molecule, return_rdmol: Literal[True] = ...) -> Union[Molecule, "RDKitMol"]: ...
@overload
@requires_optional_package("rdkit")
def charge_AAs(mol: Molecule, return_rdmol: bool = ...) -> Union[Molecule, "RDKitMol"]: ...
@requires_optional_package("rdkit")
def charge_AAs(mol: Molecule, return_rdmol: bool = False) -> Union[Molecule, "RDKitMol"]:
from rdkit import Chem
ionizations = {"ARG_NH2": 1, "LYS_NZ": 1, "GLU_OE2": -1, "ASP_OD2": -1}
mol = to_rdmol(mol)
for atom in mol.GetAtoms(): # type: ignore[attr-defined]
resinfo = atom.GetPDBResidueInfo()
res_atom = resinfo.GetResidueName() + "_" + resinfo.GetName().strip()
try:
atom.SetFormalCharge(ionizations[res_atom])
Chem.SanitizeMol(mol)
except KeyError:
pass
Chem.SanitizeMol(mol)
return mol if return_rdmol else from_rdmol(mol)
[docs]def get_backbone_atoms(mol: Union[Molecule, "RDKitMol"]) -> List[int]:
"""
Return a list of atom indices corresponding to the backbone atoms in a peptide molecule.
This function assumes PDB information in properties.pdb_info of each atom, which is the case
if the molecule is generated with the "readpdb" or "from_sequence" functions.
:parameter mol: a peptide molecule
:type mol: |Molecule| or rdkit.Chem.Mol
:return: a list of atom indices
:rtype: list
"""
if not isinstance(mol, Molecule):
mol = from_rdmol(mol)
backbone = ["N", "CA", "C", "O"]
return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]
[docs]@requires_optional_package("rdkit")
def get_substructure(
mol: Molecule, func_list: Sequence[Union[str, Molecule, "RDKitMol"]]
) -> Dict[Union[str, Molecule, "RDKitMol"], Tuple[Atom, ...]]:
"""
Search for functional groups within a molecule based on a list of reference functional groups.
SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list".
Example:
.. code:: python
>>> mol = from_smiles('OCCO') # Ethylene glycol
>>> func_list = ['[H]O', 'C[N+]', 'O=PO']
>>> get_substructure(mol, func_list)
{'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>,
<scm.plams.mol.atom.Atom at 0x1251836a0>),
(<scm.plams.mol.atom.Atom at 0x125183550>,
<scm.plams.mol.atom.Atom at 0x125183240>)]}
:parameter mol: A PLAMS molecule.
:type mol: |Molecule|
:parameter list func_list: A list of functional groups.
Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules.
:return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples
with matching PLAMS |Atom| as values.
"""
from rdkit import Chem
def _to_rdmol(functional_group: Union[str, Molecule, "RDKitMol"]) -> "RDKitMol":
"""Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule."""
if isinstance(functional_group, str):
# RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled
sanitize = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS
ret = Chem.MolFromSmiles(functional_group, sanitize=False)
Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize)
return ret
elif isinstance(functional_group, Molecule):
return to_rdmol(functional_group)
elif isinstance(functional_group, Chem.Mol):
return functional_group
raise TypeError(
"get_substructure: "
+ str(type(functional_group))
+ " is not a supported \
object type"
)
def _get_match(
mol: Molecule, rdmol: "RDKitMol", functional_group: Union[str, Molecule, "RDKitMol"]
) -> Union[List[Tuple[Atom]], bool]:
"""Perform a substructure match on "mol".
If a match is found, return a list of n-tuples consisting PLAMS |Atom|.
Otherwise return False."""
matches = rdmol.GetSubstructMatches(functional_group) # type: ignore[arg-type]
if matches:
return [tuple(mol[j + 1] for j in idx_tup) for idx_tup in matches]
return False
rdmol = to_rdmol(mol)
rdmol_func_list = [_to_rdmol(i) for i in func_list]
gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list)
return {key: value for key, value in zip(func_list, gen) if value} # type: ignore[misc]
[docs]def yield_coords(rdmol: "RDKitMol", id: int = -1) -> Generator[Tuple[float, float, float], None, None]:
"""Take an rdkit molecule and yield its coordinates as 3-tuples.
.. code-block:: python
>>> from scm.plams import yield_coords
>>> from rdkit import Chem
>>> rdmol = Chem.Mol(...) # e.g. Methane
>>> for xyz in yield_coords(rdmol):
... print(xyz)
(-0.0, -0.0, -0.0)
(0.6405, 0.6405, -0.6405)
(0.6405, -0.6405, 0.6405)
(-0.6405, 0.6405, 0.6405)
(-0.6405, -0.6405, -0.6405)
The iterator produced by this function can, for example, be passed to
:meth:`Molecule.from_array()<scm.plams.mol.molecule.Molecule.from_array>`
the update the coordinates of a PLAMS Molecule in-place.
.. code-block:: python
>>> from scm.plams import Molecule
>>> mol = Molecule(...)
>>> xyz_iterator = yield_coords(rdmol)
>>> mol.from_array(xyz_iterator)
:parameter rdmol: An RDKit mol.
:type rdmol: rdkit.Chem.Mol
:parameter int id: The ID of the desired conformer.
:return: An iterator yielding 3-tuples with *rdmol*'s Cartesian coordinates.
:rtype: iterator
"""
conf = rdmol.GetConformer(id=id)
for atom in rdmol.GetAtoms():
pos = conf.GetAtomPosition(atom.GetIdx())
yield (pos.x, pos.y, pos.z)
@overload
@requires_optional_package("rdkit")
def canonicalize_mol(mol: Molecule, inplace: Literal[True], **kwargs: Any) -> None: ...
@overload
@requires_optional_package("rdkit")
def canonicalize_mol(mol: Molecule, inplace: Literal[False] = False, **kwargs: Any) -> Molecule: ...
[docs]@requires_optional_package("rdkit")
def canonicalize_mol(mol: Molecule, inplace: bool = False, **kwargs: Any) -> Optional[Molecule]:
r"""Take a PLAMS molecule and sort its atoms based on their canonical rank.
Example:
.. code:: python
>>> from scm.plams import Molecule, canonicalize_mol
# Methane
>>> mol: Molecule = ...
>>> print(mol)
Atoms:
1 H 0.640510 0.640510 -0.640510
2 H 0.640510 -0.640510 0.640510
3 C 0.000000 0.000000 0.000000
4 H -0.640510 0.640510 0.640510
5 H -0.640510 -0.640510 -0.640510
>>> print(canonicalize_mol(mol))
Atoms:
1 C 0.000000 0.000000 0.000000
2 H -0.640510 -0.640510 -0.640510
3 H -0.640510 0.640510 0.640510
4 H 0.640510 -0.640510 0.640510
5 H 0.640510 0.640510 -0.640510
:parameter mol: The to-be canonicalized molecule.
:type mol: |Molecule|
:parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule.
:parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_.
:return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``.
:rtype: None or |Molecule|
.. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms
"""
from rdkit import Chem
if not isinstance(mol, Molecule):
raise TypeError("`mol` expected a plams Molecule")
rdmol = to_rdmol(mol)
idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs)
if inplace:
mol.atoms = [at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)]
return None
else:
ret = mol.copy()
ret.atoms = [at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)]
return ret
[docs]@requires_optional_package("rdkit")
@requires_optional_package("PIL")
def to_image(
mol: Molecule,
remove_hydrogens: bool = True,
filename: Optional[str] = None,
fmt: str = "svg",
size: Sequence[int] = (200, 100),
as_string: bool = True,
) -> Union[str, bytes, "Image.Image"]:
"""
Convert single molecule to single image object
* ``mol`` -- PLAMS Molecule object
* ``remove_hydrogens`` -- Wether or not to remove the H-atoms from the image
* ``filename`` -- Optional: Name of image file to be created.
* ``fmt`` -- One of "svg", "png", "eps", "pdf", "jpeg"
* ``size`` -- Tuple/list containing width and height of image in pixels.
* ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format
will be returned, which can be either a PIL image or SVG text
We do this because after converting a PIL image to a bytestring it is not possible
to further edit it (with our version of PIL).
* Returns -- Image text file / binary of image text file / PIL Image object.
"""
from io import BytesIO
from PIL import Image
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import MolsToGridImage
extensions = ["svg", "png", "eps", "pdf", "jpeg"]
classes: Dict[str, Any] = {}
classes["svg"] = _MolsToGridSVG
for ext in extensions[1:]:
classes[ext] = None
# PNG can only be created in this way with later version of RDKit
if hasattr(rdMolDraw2D, "MolDraw2DCairo"):
for ext in extensions[1:]:
classes[ext] = MolsToGridImage
# Determine the type of image file
if filename is not None:
if "." in filename:
extension = filename.split(".")[-1]
if extension in classes.keys():
fmt = extension
else:
msg = [f"Image type {extension} not available."]
msg += [f"Available extensions are: {' '.join(extensions)}"]
raise Exception("\n".join(msg))
else:
filename = ".".join([filename, fmt])
if fmt not in classes.keys():
raise Exception(f"Image type {fmt} not available.")
rdmol = _rdmol_for_image(mol, remove_hydrogens)
# Draw the image
if classes[fmt.lower()] is None:
# With AMS version of RDKit MolsToGridImage fails for eps (because of paste)
img = Draw.MolToImage(rdmol, size=size)
buf = BytesIO()
img.save(buf, format=fmt)
img_text = buf.getvalue()
else:
# This fails for a C=C=O molecule, with AMS rdkit version
img = classes[fmt.lower()]([rdmol], molsPerRow=1, subImgSize=size)
img_text = img
if isinstance(img, Image.Image):
buf = BytesIO()
img.save(buf, format=fmt)
img_text = buf.getvalue()
# If I do not make this correction to the SVG text, it is not readable in JupyterLab
if fmt.lower() == "svg" and isinstance(img_text, str):
img_text = _correct_svg(img_text)
# Write to file, if required
if filename is not None:
mode = "w"
if isinstance(img_text, bytes):
mode = "wb"
with open(filename, mode) as outfile:
outfile.write(img_text)
if as_string:
img = img_text
return img
[docs]@requires_optional_package("rdkit")
@requires_optional_package("PIL")
def get_reaction_image(
reactants: Sequence[Molecule],
products: Sequence[Molecule],
filename: Optional[str] = None,
fmt: str = "svg",
size: Sequence[int] = (200, 100),
as_string: bool = True,
) -> Union[str, bytes, "Image.Image"]:
"""
Create a 2D reaction image from reactants and products (PLAMS molecules)
* ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
* ``products`` -- Iterable of PLAMS Molecule objects representing the products.
* ``filename`` -- Optional: Name of image file to be created.
* ``fmt`` -- The format of the image (svg, png, eps, pdf, jpeg).
The extension in the filename, if provided, takes precedence.
* ``size`` -- Tuple/list containing width and height of image in pixels.
* ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format
will be returned, which can be either a PIL image or SVG text
* Returns -- SVG image text file.
"""
extensions = ["svg", "png", "eps", "pdf", "jpeg"]
# Determine the type of image file
if filename is not None:
if "." in filename:
extension = filename.split(".")[-1]
if extension in extensions:
fmt = extension
else:
msg = [f"Image type {extension} not available."]
msg += [f"Available extensions are: {' '.join(extensions)}"]
raise Exception("\n".join(msg))
else:
filename = ".".join([filename, fmt])
if fmt.lower() not in extensions:
raise Exception(f"Image type {fmt} not available.")
# Get the actual image
width = size[0]
height = size[1]
img_text: Union[str, bytes, "Image.Image"]
if fmt.lower() == "svg":
img_text = _get_reaction_image_svg(reactants, products, width, height)
else:
img_text = _get_reaction_image_pil(reactants, products, fmt, width, height, as_string=as_string)
# Write to file, if required
if filename is not None:
mode = "w"
if isinstance(img_text, bytes):
mode = "wb"
with open(filename, mode) as outfile:
outfile.write(img_text)
return img_text
def _get_reaction_image_svg(
reactants: Sequence[Molecule], products: Sequence[Molecule], width: int = 200, height: int = 100
) -> str:
"""
Create a 2D reaction image from reactants and products (PLAMS molecules)
* ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
* ``products`` -- Iterable of PLAMS Molecule objects representing the products.
* Returns -- SVG image text file.
"""
from rdkit import Chem
def svg_arrow(x1: float, y1: float, x2: float, y2: float, prefix: str = "") -> List[str]:
"""
The reaction arrow in html format
"""
# The arrow head
l = ['<%sdefs> <%smarker id="arrow" viewBox="0 0 10 10" refX="5" refY="5" ' % (prefix, prefix)]
l += ['markerWidth="6" markerHeight="6" ']
l += ['orient="auto-start-reverse"> ']
l += ['<%spath d="M 0 0 L 10 5 L 0 10 z" /></%smarker></%sdefs>' % (prefix, prefix, prefix)]
arrow = "".join(l)
# The line
l = ['<%sline x1="%i" y1="%i" x2="%i" y2="%i" ' % (prefix, x1, y1, x2, y2)]
l += ['stroke="black" marker-end="url(#arrow)" />']
line = "".join(l)
return [arrow, line]
def add_plus_signs_svg(
img_text: str, width: float, height: float, nmols: int, nreactants: int, prefix: str = ""
) -> str:
"""
Add the lines with + signs to the SVG image
"""
y = int(0.55 * height)
t = []
for i in range(nmols - 1):
x = int(((i + 1) * width) - (0.1 * width))
if i + 1 in (nreactants, nreactants + 1):
continue
t += ['<%stext x="%i" y="%i" font-size="16">+</%stext>' % (prefix, x, y, prefix)]
lines = img_text.split("\n")
lines = lines[:-2] + t + lines[-2:]
return "\n".join(lines)
def add_arrow_svg(img_text: str, width: float, height: float, nreactants: int, prefix: str = "") -> str:
"""
Add the arrow to the SVG image
"""
y = int(0.5 * height)
x1 = int((nreactants * width) + (0.3 * width))
x2 = int((nreactants * width) + (0.7 * width))
t = svg_arrow(x1, y, x2, y, prefix)
lines = img_text.split("\n")
lines = lines[:-2] + t + lines[-2:]
return "\n".join(lines)
# Get the rdkit molecules
rdmols = [_rdmol_for_image(mol) for mol in reactants]
rdmols += [Chem.Mol()] # This is where the arrow will go
rdmols += [_rdmol_for_image(mol) for mol in products]
nmols = len(rdmols)
# Place the molecules in a row of images
kwargs = {"legendFontSize": 16} # ,"legendFraction":0.1}
img_text = _MolsToGridSVG(rdmols, molsPerRow=nmols, subImgSize=(width, height), **kwargs) # type: ignore[arg-type]
img_text = _correct_svg(img_text)
# Add + and =>
nreactants = len(reactants)
img_text = add_plus_signs_svg(img_text, width, height, nmols, nreactants)
img_text = add_arrow_svg(img_text, width, height, nreactants)
return img_text
def _get_reaction_image_pil(
reactants: Sequence[Molecule],
products: Sequence[Molecule],
fmt: str,
width: int = 200,
height: int = 100,
as_string: bool = True,
) -> Union[bytes, "Image.Image"]:
"""
Create a 2D reaction image from reactants and products (PLAMS molecules)
* ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
* ``products`` -- Iterable of PLAMS Molecule objects representing the products.
* Returns -- SVG image text file.
"""
from io import BytesIO
from PIL import Image
from PIL import ImageDraw
from rdkit import Chem
from rdkit.Chem.Draw import rdMolDraw2D, MolsToGridImage
def add_arrow_pil(img: "Image.Image", width: float, height: float, nreactants: int) -> "Image.Image":
"""
Add the arrow to the PIL image
"""
y1 = int(0.5 * height)
y2 = y1
x1 = int((nreactants * width) + (0.3 * width))
x2 = int((nreactants * width) + (0.7 * width))
# Draw a line
black = (0, 0, 0)
draw = ImageDraw.Draw(img)
draw.line(((x1, y1), (x2, y2)), fill=128)
# Draw the arrow head
headscale = 20
xshift = width // headscale
yshift = img.size[1] // headscale
p1 = (x2, y2 + yshift)
p2 = (x2, y2 - yshift)
p3 = (x2 + xshift, y2)
draw.polygon((p1, p2, p3), fill=black)
return img
def add_plus_signs_pil(
img: "Image.Image", width: float, height: float, nmols: int, nreactants: int
) -> "Image.Image":
"""
Add the lines with + signs to the SVG image
"""
black = (0, 0, 0)
I1 = ImageDraw.Draw(img)
y = int(0.5 * height)
# myfont = ImageFont.truetype('FreeMono.ttf', 25)
for i in range(nmols):
x = int(((i + 1) * width) - (0.05 * width))
if i + 1 in (nreactants, nreactants + 1):
continue
# I1.text((x, y), "+", font=myfont, fill=black)
I1.text((x, y), "+", fill=black)
return img
def join_pil_images(pil_images: List["Image.Image"]) -> "Image.Image":
"""
Create a new image which connects the ones above with text
"""
white = (255, 255, 255)
widths = [img.width for img in pil_images]
width = sum(widths)
height = max([img.height for img in pil_images])
final_img = Image.new("RGB", (width, height), white)
# Concatenate the PIL images
for i, img in enumerate(pil_images):
pos = sum(widths[:i])
h = int((height - img.height) / 2)
final_img.paste(img, (pos, h))
return final_img
nreactants = len(reactants)
nmols = nreactants + len(products)
if not hasattr(rdMolDraw2D, "MolDraw2DCairo"):
# We are working with the old AMS version of RDKit
white = (255, 255, 255)
rimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(reactants)]
pimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(products)]
blanc = Image.new("RGB", (width, height), white)
# Get the image (with arrow)
nreactants = len(reactants)
all_images = rimages + [blanc] + pimages # type: ignore[arg-type]
img = join_pil_images(all_images) # type: ignore[arg-type]
else:
# We have a later version of RDKit that can regulate the font sizes
rdmols = [_rdmol_for_image(mol) for mol in reactants]
rdmols += [Chem.Mol()] # This is where the arrow will go
rdmols += [_rdmol_for_image(mol) for mol in products]
# Place the molecules in a row of images
subimg_size = [width, height]
kwargs = {"legendFontSize": 16} # ,"legendFraction":0.1}
img = MolsToGridImage(rdmols, molsPerRow=nmols + 1, subImgSize=subimg_size, **kwargs)
# Add + and =>
img = add_plus_signs_pil(img, width, height, nmols, nreactants)
img = add_arrow_pil(img, width, height, nreactants)
# Get the img/bytestring
if as_string:
buf = BytesIO()
img.save(buf, format=fmt)
img_bytes = buf.getvalue()
return img_bytes
else:
return img
def _correct_svg(image: str) -> str:
"""
Correct for a bug in the AMS rdkit created SVG file
"""
if not "svg:" in image:
return image
image = image.replace("svg:", "")
lines = image.split("\n")
for iline, line in enumerate(lines):
if "xmlns:svg=" in line:
lines[iline] = line.replace("xmlns:svg", "xmlns")
break
image = "\n".join(lines)
return image
def _presanitize(mol: Molecule, rdmol: "RDKitMol") -> "RDKitMol":
"""
Change bonding and atom charges to avoid failed sanitization
Note: Used by to_rdmol
"""
from rdkit import Chem
mol = mol.copy()
for i in range(10):
try:
rdmol_test = copy.deepcopy(rdmol)
Chem.SanitizeMol(rdmol_test)
stored_exc = None
break
except ValueError as exc:
stored_exc = exc
text = repr(exc)
# Fix the problem
rdmol, bonds, charges = _kekulize(mol, rdmol, text)
# print ("REB bonds charges: ",bonds, charges)
# print (rdmol.Debug())
# rdmol = to_rdmol(mol, sanitize=False)
# print (rdmol.Debug())
if stored_exc is not None:
raise stored_exc
else:
rdmol = rdmol_test
return rdmol
def _kekulize(
mol: Molecule, rdmol: "RDKitMol", text: str, use_dfs: bool = True
) -> Tuple["RDKitMol", Dict[Tuple[int, int], int], Dict[int, int]]:
"""
Kekulize the atoms indicated as problematic by RDKit
* ``mol`` - PLAMS molecule
* ``text`` - Sanitation error produced by RDKit
Note: Returns the changes in bond orders and atomic charges, that will make sanitation succeed.
"""
from rdkit import Chem
# Find the indices of the problematic atoms
indices = _find_aromatic_sequence(rdmol, text)
if indices is None:
return rdmol, {}, {}
# Set the bond orders along the chain to 2, 1, 2, 1,...
altered_bonds: Dict[Tuple[int, int], int] = {}
if len(indices) > 1:
emol = Chem.RWMol(rdmol)
if use_dfs:
# It may be better to create the tree first, and then start at a leaf
altered_bonds = _alter_aromatic_bonds(emol, indices[0])
indices = list(set([ind for tup in altered_bonds.keys() for ind in tup]))
else:
indices = _order_atom_indices(emol, indices)
altered_bonds = _alter_bonds_along_chain(emol, indices)
_adjust_atom_aromaticity(emol, altered_bonds)
rdmol = emol.GetMol()
# Adjust the PLAMS molecule as well
for (iat, jat), order in altered_bonds.items():
for bond in mol.atoms[iat].bonds:
if mol.index(bond.other_end(mol.atoms[iat])) - 1 == jat:
bond.order = order
break
# If the atom at the chain end has the wrong bond order, give it a charge.
altered_charge = {}
atom_indices, dangling_bonds = _get_charged_atoms(rdmol, indices)
if len(atom_indices) > 0:
iat = atom_indices[0]
ndangling = dangling_bonds[iat]
altered_charge = _guess_atomic_charge(iat, ndangling, rdmol, mol)
rdmol.GetAtomWithIdx(iat).SetFormalCharge(altered_charge[iat])
for k, v in altered_charge.items():
mol.atoms[k].properties.rdkit.charge = v
return rdmol, altered_bonds, altered_charge
def _find_aromatic_sequence(rdmol: "RDKitMol", text: str) -> Optional[List[int]]:
"""
Find the sequence of atoms with 1.5 bond orders
"""
indices: Optional[List[int]] = None
lines = text.split("\n")
line = lines[-1]
if "Unkekulized atoms:" in line:
text = line.split("Unkekulized atoms:")[-1].split("\\n")[0]
if '"' in text:
text = text.split('"')[0]
indices = [int(w) for w in text.split()]
if len(indices) > 1:
return indices
line = "atom %i marked aromatic" % (indices[0])
iat: Any
if "marked aromatic" in line:
iat = int(line.split("atom")[-1].split()[0])
indices = [iat]
while iat is not None:
icurrent = iat
iat = None
for bond in rdmol.GetAtomWithIdx(icurrent).GetBonds():
if str(bond.GetBondType()) == "AROMATIC":
iat = bond.GetOtherAtomIdx(icurrent)
if iat in indices:
iat = None
continue
indices.append(iat)
break
elif "Explicit valence for atom" in line:
iat = int(line.split("#")[-1].split()[0])
indices = [iat]
return indices
def _alter_aromatic_bonds(
emol: "RDKitEdMol", iat: int, depth: int = 0, double_first: bool = False
) -> "OrderedDict[Tuple[int, int], int]":
"""
Switch all thearomitic bonds to single/double, starting at iat
* ``emol`` -- RDKit EditableMol type, for which bond orders will be changed
* ``iat`` -- Starting point for depth first search
"""
from rdkit import Chem
from scm.plams import PeriodicTable as PT
# Use OrderedDict, so that the leaves of the tree
# will be at the end
bonds_changed: OrderedDict[Tuple[int, int], int] = OrderedDict()
at = emol.GetAtomWithIdx(iat)
valence = PT.get_connectors(at.GetAtomicNum())
bonds = at.GetBonds()
are_aromatic = [str(b.GetBondType()) == "AROMATIC" for b in bonds]
int_orders = sum([b.GetBondTypeAsDouble() for i, b in enumerate(bonds) if not are_aromatic[i]])
numbonds = len([b for i, b in enumerate(bonds) if are_aromatic[i]])
valence -= int(int_orders)
# Here I place the double bond first.
# I could also start with a single bond instead.
orders = [1 for i in range(numbonds)]
if valence > numbonds:
for i in range(min(numbonds, valence - numbonds)):
if double_first:
orders[i] = 2
else:
orders[numbonds - i - 1] = 2
for i, bond in enumerate(bonds):
jat = bond.GetOtherAtomIdx(iat)
if are_aromatic[i]:
pair: Tuple[int, int] = tuple(sorted([iat, jat]))
order = orders.pop(0)
bond.SetBondType(Chem.BondType(order))
bond.SetIsAromatic(False) # This is necessary with the newer RDKit
bonds_changed[pair] = order
d = _alter_aromatic_bonds(emol, jat, depth + 1)
bonds_changed.update(d)
return bonds_changed
def _alter_bonds_along_chain(emol: "RDKitEdMol", indices: Sequence[int]) -> Dict[Tuple[int, int], int]:
"""
Along the chain of atoms (indices), alternate double and single bonds
"""
from scm.plams import PeriodicTable as PT
from rdkit import Chem
if len(indices) > 1:
# The first bond order is set to 2, if aromic
# Else it is flipped
first_bond = emol.GetBondBetweenAtoms(indices[0], indices[1])
if str(first_bond.GetBondType()) == "AROMATIC":
new_order = 2
else:
new_order = first_bond.GetBondTypeAsDouble()
new_order = (new_order % 2) + 1
# Unless this breaks valence rules.
iat = indices[0]
at = emol.GetAtomWithIdx(iat)
valence = PT.get_connectors(at.GetAtomicNum())
orders = [b.GetBondTypeAsDouble() for b in at.GetBonds()]
jat = indices[1]
at_next = emol.GetAtomWithIdx(jat)
valence_next = PT.get_connectors(at_next.GetAtomicNum())
orders_next = [b.GetBondTypeAsDouble() for b in at_next.GetBonds()]
if sum(orders) > valence or sum(orders_next) > valence_next:
new_order = 1
# Set the bond orders along the chain to 2, 1, 2, 1,...
altered_bonds: Dict[Tuple[int, int], int] = {}
for i, iat in enumerate(indices[:-1]):
bond = emol.GetBondBetweenAtoms(iat, indices[i + 1])
bond.SetBondType(Chem.BondType(new_order))
bond.SetIsAromatic(False) # This is necessary with the newer RDKit versions
altered_bonds[iat, indices[i + 1]] = new_order
new_order = ((new_order) % 2) + 1
return altered_bonds
def _order_atom_indices(rdmol: "RDKitMol", indices: Sequence[int]) -> List[int]:
"""
Order the atomic indices so that they are consecutive along a bonded chain
"""
# Order the indices, so that they are consecutive in the molecule
start = 0
for i, iat in enumerate(indices):
at = rdmol.GetAtomWithIdx(iat)
neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()]
relevant_neighbors = [jat for jat in neighbors if jat in indices]
if len(relevant_neighbors) == 1:
start = i
break
iat = indices[start]
atoms = [iat]
while 1:
at = rdmol.GetAtomWithIdx(iat)
neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()]
relevant_neighbors = [jat for jat in neighbors if jat in indices and not jat in atoms]
if len(relevant_neighbors) == 0:
break
iat = relevant_neighbors[0]
atoms.append(iat)
if len(atoms) < len(indices):
raise Exception("The unkekulized atoms are not in a consecutive chain")
indices = atoms
return indices
def _adjust_atom_aromaticity(emol: "RDKitEdMol", altered_bonds: Dict[Tuple[int, int], int]) -> None:
"""
Assign aromaticity to the atoms based on the new bond orders
"""
indices = list(set([ind for tup in altered_bonds.keys() for ind in tup]))
for ind in indices:
at = emol.GetAtomWithIdx(ind)
if not at.GetIsAromatic():
continue
# Only change this if we are sure the atom is not aromatic anymore
aromatic = False
for bond in at.GetBonds():
if str(bond.GetBondType()) == "AROMATIC":
aromatic = True
break
if not aromatic:
at.SetIsAromatic(False)
def _get_charged_atoms(rdmol: "RDKitMol", indices: Sequence[int]) -> Tuple[List[int], Dict[int, int]]:
"""
Locate the atoms that need a charge
"""
from scm.plams import PeriodicTable as PT
atom_indices: List[int] = []
dangling_bonds: Dict[int, int] = {}
for iat in indices[::-1]:
at = rdmol.GetAtomWithIdx(iat)
valence = PT.get_connectors(at.GetAtomicNum())
bonds = at.GetBonds()
orders = [b.GetBondTypeAsDouble() for b in bonds]
ndangling = int(valence - sum(orders))
if ndangling != 0:
dangling_bonds[iat] = ndangling
atom_indices.append(iat)
return atom_indices, dangling_bonds
def _guess_atomic_charge(iat: int, ndangling: int, rdmol: "RDKitMol", mol: Molecule) -> Dict[int, int]:
"""
Guess the best atomic charge for atom iat
"""
# Get the total charge from atomic charges already set
charges = [at.GetFormalCharge() for at in rdmol.GetAtoms()]
totcharge = sum(charges) - charges[iat]
# Here I hope that the estimated charge will be more reliable than the
# actual (user defined) system charge, but am not sure
est_charges = mol.guess_atomic_charges(adjust_to_systemcharge=False, depth=0)
molcharge = int(sum(est_charges))
molcharge = molcharge - totcharge
# Adjust the sign based on the estimated charge
sign = ndangling / (abs(ndangling))
if molcharge != 0:
sign = molcharge / (abs(molcharge))
elif est_charges[iat] != 0:
sign = est_charges[iat] / abs(est_charges[iat])
# Then use the over/under valence with the new sign as the atomic charge
charge = int(sign * abs(ndangling))
# Perhaps we tried this already
if charge == charges[iat]:
charge = -charge
altered_charge = {iat: charge}
return altered_charge
def _rdmol_for_image(mol: Union[Molecule, "ChemicalSystem"], remove_hydrogens: bool = True) -> "RDKitMol":
"""
Convert PLAMS molecule to an RDKit molecule specifically for a 2D image
"""
from rdkit.Chem import AllChem
from rdkit.Chem import RemoveHs
if _has_scm_chemsys and isinstance(mol, ChemicalSystem):
rdmol = mol.to_rdkit_mol()
else:
rdmol = to_rdmol(mol, presanitize=True)
# Flatten the molecule
AllChem.Compute2DCoords(rdmol) # type: ignore[attr-defined]
# Remove the Hs only if there are carbon atoms in this system
# Otherwise this will turn an OH radical into a water molecule.
carbons = [i for i, at in enumerate(mol.atoms) if at.symbol in ["C", "Si"]]
if remove_hydrogens and len(carbons) > 0:
rdmol = RemoveHs(rdmol)
else:
for atom in rdmol.GetAtoms():
atom.SetNoImplicit(True)
ids = [c.GetId() for c in rdmol.GetConformers()]
for cid in ids:
rdmol.RemoveConformer(cid)
return rdmol
def _MolsToGridSVG(
mols: Sequence[Molecule],
molsPerRow: int = 3,
subImgSize: Tuple[int, int] = (200, 200),
legends: Optional[Sequence[str]] = None,
highlightAtomLists: Optional[Sequence[int]] = None,
highlightBondLists: Optional[Sequence[int]] = None,
drawOptions: Optional["Draw.MolDrawOptions"] = None,
**kwargs: Any,
) -> str:
"""
Replaces the old version of this function in our RDKit for a more recent one, with more options
"""
from rdkit.Chem.Draw import rdMolDraw2D
if legends is None:
legends = [""] * len(mols)
nRows = len(mols) // molsPerRow
if len(mols) % molsPerRow:
nRows += 1
fullSize = (molsPerRow * subImgSize[0], nRows * subImgSize[1])
d2d = rdMolDraw2D.MolDraw2DSVG(fullSize[0], fullSize[1], subImgSize[0], subImgSize[1])
if drawOptions is not None:
d2d.SetDrawOptions(drawOptions)
else:
dops = d2d.drawOptions()
for k, v in list(kwargs.items()):
if hasattr(dops, k):
setattr(dops, k, v)
del kwargs[k]
d2d.DrawMolecules(
list(mols),
legends=legends or None,
highlightAtoms=highlightAtomLists or [],
highlightBonds=highlightBondLists or [],
**kwargs,
)
d2d.FinishDrawing()
res = d2d.GetDrawingText()
return res