Source code for scm.plams.interfaces.molecule.rdkit

from typing import (
    List,
    Literal,
    Optional,
    overload,
    TYPE_CHECKING,
    Sequence,
    Dict,
    Any,
    Union,
    Tuple,
    IO,
    Iterable,
    Generator,
)
import random
import sys
import copy
from warnings import warn
from collections import OrderedDict

from scm.plams.core.functions import log, requires_optional_package
from scm.plams.mol.atom import Atom
from scm.plams.mol.bond import Bond
from scm.plams.mol.molecule import Molecule
from scm.plams.core.errors import PlamsError

try:
    from scm.base import ChemicalSystem

    _has_scm_chemsys = True
except ImportError:
    _has_scm_chemsys = False

if TYPE_CHECKING:
    from rdkit.Chem import Mol as RDKitMol
    from rdkit.Chem import Atom as RDKitAtom
    from rdkit.Chem import Bond as RDKitBond
    from rdkit.Chem import EditableMol as RDKitEdMol
    from rdkit.Chem.AllChem import ChemicalReaction as RDKitReaction
    from rdkit.Chem import Draw
    from PIL import Image

__all__ = [
    "add_Hs",
    "apply_reaction_smarts",
    "apply_template",
    "gen_coords_rdmol",
    "get_backbone_atoms",
    "modify_atom",
    "to_rdmol",
    "from_rdmol",
    "from_sequence",
    "from_smiles",
    "from_smarts",
    "to_smiles",
    "partition_protein",
    "readpdb",
    "writepdb",
    "get_substructure",
    "get_conformations",
    "yield_coords",
    "canonicalize_mol",
    "to_image",
    "get_reaction_image",
]


[docs]@requires_optional_package("rdkit") def from_rdmol(rdkit_mol: "RDKitMol", confid: int = -1, properties: bool = True) -> Molecule: """ Translate an RDKit molecule into a PLAMS molecule type. RDKit properties will be unpickled if their name ends with '_pickled'. :parameter rdkit_mol: RDKit molecule :type rdkit_mol: rdkit.Chem.Mol :parameter int confid: conformer identifier from which to take coordinates :parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format. :return: a PLAMS molecule :rtype: |Molecule| """ from rdkit import Chem if isinstance(rdkit_mol, Molecule): return rdkit_mol # Create PLAMS molecule plams_mol = Molecule() total_charge = 0 try: Chem.Kekulize(rdkit_mol) except Exception: pass conf = rdkit_mol.GetConformer(id=confid) # Add atoms and assign properties to the PLAMS atom if *properties* = True for rd_atom in rdkit_mol.GetAtoms(): pos = conf.GetAtomPosition(rd_atom.GetIdx()) ch = rd_atom.GetFormalCharge() pl_atom = Atom(rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), rdkit={"charge": ch}) if properties and rd_atom.GetPDBResidueInfo(): pl_atom.properties.rdkit.pdb_info = get_PDBResidueInfo(rd_atom) plams_mol.add_atom(pl_atom) total_charge += ch # Check for R/S information stereo = str(rd_atom.GetChiralTag()) if stereo == "CHI_TETRAHEDRAL_CCW": pl_atom.properties.rdkit.stereo = "counter-clockwise" elif stereo == "CHI_TETRAHEDRAL_CW": pl_atom.properties.rdkit.stereo = "clockwise" # Add bonds to the PLAMS molecule for bond in rdkit_mol.GetBonds(): at1 = plams_mol.atoms[bond.GetBeginAtomIdx()] at2 = plams_mol.atoms[bond.GetEndAtomIdx()] plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble())) # Check for cis/trans information stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir()) if stereo == "STEREOZ" or stereo == "STEREOCIS": plams_mol.bonds[-1].properties.rdkit.stereo = "Z" elif stereo == "STEREOE" or stereo == "STEREOTRANS": plams_mol.bonds[-1].properties.rdkit.stereo = "E" elif bond_dir == "ENDUPRIGHT": plams_mol.bonds[-1].properties.rdkit.stereo = "up" elif bond_dir == "ENDDOWNRIGHT": plams_mol.bonds[-1].properties.rdkit.stereo = "down" # Set charge and assign properties to PLAMS molecule and bonds if *properties* = True plams_mol.properties.charge = total_charge if properties: prop_from_rdmol(plams_mol, rdkit_mol) for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol): prop_from_rdmol(plams_atom, rd_atom) for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds): prop_from_rdmol(plams_bond, rd_bond) return plams_mol
[docs]@requires_optional_package("rdkit") def to_rdmol( plams_mol: Molecule, sanitize: bool = True, properties: bool = True, assignChirality: bool = False, presanitize: bool = False, ) -> "RDKitMol": """ Translate a PLAMS molecule into an RDKit molecule type. PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats, integers, floats nor strings, the resulting property names are appended with '_pickled'. :parameter plams_mol: A PLAMS molecule :parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization :parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format. :parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule. :parameter bool presanitize: Iteratively adjust bonding and atomic charges, to avoid failure of sanitization. Only relevant is sanitize is set to True. :type plams_mol: |Molecule| :return: an RDKit molecule :rtype: rdkit.Chem.Mol """ from rdkit import Chem, Geometry if isinstance(plams_mol, Chem.Mol): return plams_mol # Create rdkit molecule e = Chem.EditableMol(Chem.Mol()) # Add atoms and assign properties to the RDKit atom if *properties* = True for pl_atom in plams_mol.atoms: rd_atom = Chem.Atom(int(pl_atom.atnum)) if "rdkit" in pl_atom.properties: if "charge" in pl_atom.properties.rdkit: rd_atom.SetFormalCharge(pl_atom.properties.rdkit.charge) if properties: if "rdkit" in pl_atom.properties: if "pdb_info" in pl_atom.properties.rdkit: set_PDBresidueInfo(rd_atom, pl_atom.properties.rdkit.pdb_info) for prop in pl_atom.properties.rdkit: if prop not in ("charge", "pdb_info", "stereo"): prop_to_rdmol(rd_atom, prop, pl_atom.properties.rdkit.get(prop)) prop_dic = {} for prop in pl_atom.properties: if prop != "rdkit": prop_dic[prop] = pl_atom.properties.get(prop) if len(prop_dic) > 0: prop_to_rdmol(rd_atom, "plams", prop_dic) # Check for R/S information if pl_atom.properties.rdkit.stereo: stereo = pl_atom.properties.rdkit.stereo.lower() if stereo == "counter-clockwise": rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW) elif stereo == "clockwise": rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW) e.AddAtom(rd_atom) # Mapping of PLAMS bond orders to RDKit bond types: def plams_to_rd_bonds(bo: float) -> int: if 1.4 < bo < 1.6: return 12 # bond type for aromatic bond else: return int(bo) # Add bonds to the RDKit molecule for bond in plams_mol.bonds: a1 = plams_mol.atoms.index(bond.atom1) a2 = plams_mol.atoms.index(bond.atom2) e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order))) rdmol = e.GetMol() # Check for cis/trans information for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): if pl_bond.properties.rdkit.stereo: stereo = pl_bond.properties.rdkit.stereo.lower() if stereo == "e" or stereo == "trans": rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) elif stereo == "z" or stereo == "cis": rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) elif stereo == "up": rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT) elif stereo == "down": rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT) # Assign properties to RDKit molecule and bonds if *properties* = True # All properties will be taken from 'rdkit' subsettings, except the molecular charge if properties: prop_dic = {} for prop in plams_mol.properties: if prop == "rdkit": for rdprop in plams_mol.properties.rdkit: prop_to_rdmol(rdmol, rdprop, plams_mol.properties.rdkit.get(rdprop)) else: # prop_dic[prop] = {'plams':plams_mol.properties.get(prop)} prop_dic[prop] = plams_mol.properties.get(prop) if len(prop_dic) > 0: prop_to_rdmol(rdmol, "plams", prop_dic) prop_dic = {} for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): for prop in pl_bond.properties: if prop == "rdkit": for rdprop in pl_bond.properties.rdkit: if rdprop != "stereo": prop_to_rdmol(rd_bond, rdprop, pl_bond.properties.rdkit.get(rdprop)) else: prop_dic[prop] = pl_bond.properties.get(prop) if len(prop_dic) > 0: prop_to_rdmol(rd_bond, "plams", prop_dic) if sanitize: try: if presanitize: rdmol = _presanitize(plams_mol, rdmol) else: Chem.SanitizeMol(rdmol) except ValueError as exc: log("RDKit Sanitization Error.") text = "Most likely this is a problem with the assigned bond orders: " text += "Use chemical insight to adjust them." log(text) log("Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.") raise exc conf = Chem.Conformer() for i, atom in enumerate(plams_mol.atoms): xyz = Geometry.Point3D(atom.x, atom.y, atom.z) conf.SetAtomPosition(i, xyz) rdmol.AddConformer(conf) # REB: Assign all stereochemistry, if it wasn't already there if assignChirality: Chem.rdmolops.AssignAtomChiralTagsFromStructure(rdmol, confId=conf.GetId(), replaceExistingTags=False) try: Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False) except AttributeError: pass return rdmol
[docs]@requires_optional_package("rdkit") def to_smiles(plams_mol: Molecule, short_smiles: bool = True, **kwargs: Any) -> str: """ Returns the RDKit-generated SMILES string of a PLAMS molecule. Note: SMILES strings are generated based on the molecule's connectivity. If the input PLAMS molecule does not contain any bonds, "guessed bonds" will be used. :parameter plams_mol: A PLAMS |Molecule| :parameter bool short_smiles: whether or not to use some RDKit sanitization to get shorter smiles (e.g. for a water molecule, short_smiles=True -> "O", short_smiles=False -> [H]O[H]) :parameter kwargs: With 'kwargs' you can provide extra optional parameters to the rdkit.Chem method 'MolToSmiles'. See the rdkit documentation for more info. :return: the SMILES string """ from rdkit import Chem if len(plams_mol.bonds) > 0: mol_with_bonds = plams_mol else: mol_with_bonds = plams_mol.copy() mol_with_bonds.guess_bonds() rd_mol = to_rdmol(mol_with_bonds, sanitize=False) # This sanitization black magic is needed for getting the "short, nice and clean" SMILES string. # Without this, the SMILES string for water would be "[H]O[H]". With this is just "O" if short_smiles: s = Chem.rdmolops.SanitizeFlags rdkitSanitizeOptions = ( s.SANITIZE_ADJUSTHS or s.SANITIZE_CLEANUP or s.SANITIZE_CLEANUPCHIRALITY or s.SANITIZE_FINDRADICALS or s.SANITIZE_PROPERTIES or s.SANITIZE_SETAROMATICITY or s.SANITIZE_SETCONJUGATION or s.SANITIZE_SETHYBRIDIZATION or s.SANITIZE_SYMMRINGS ) Chem.rdmolops.AssignRadicals(rd_mol) rd_mol = Chem.rdmolops.RemoveHs(rd_mol, updateExplicitCount=True, sanitize=False) Chem.rdmolops.SanitizeMol(rd_mol, rdkitSanitizeOptions) smiles = Chem.MolToSmiles(rd_mol, **kwargs) return smiles
pdb_residue_info_items = [ "AltLoc", "ChainId", "InsertionCode", "IsHeteroAtom", "Name", "Occupancy", "ResidueName", "ResidueNumber", "SecondaryStructure", "SegmentNumber", "SerialNumber", "TempFactor", ] # 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized def get_PDBResidueInfo(rdkit_atom: "RDKitAtom") -> Dict[str, Any]: pdb_info = {} for item in pdb_residue_info_items: get_function = "Get" + item pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)() return pdb_info @requires_optional_package("rdkit") def set_PDBresidueInfo(rdkit_atom: "RDKitAtom", pdb_info: Dict[str, Any]) -> None: from rdkit import Chem atom_pdb_residue_info = Chem.AtomPDBResidueInfo() for item, value in pdb_info.items(): set_function = "Set" + item atom_pdb_residue_info.__getattribute__(set_function)(value) rdkit_atom.SetMonomerInfo(atom_pdb_residue_info) def prop_to_rdmol(rd_obj: Union["RDKitMol", "RDKitAtom", "RDKitBond"], propkey: str, propvalue: Any) -> None: """ Convert a single PLAMS property into an RDKit property. :paramter pl_obj: A PLAMS object. :type pl_obj: |Molecule|, |Atom| or |Bond|. :parameter rd_obj: An RDKit object. :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond :parameter str propkey: The |Settings| key of the PLAMS property. """ try: import dill as pickle except ImportError: import pickle obj = type(propvalue) obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp} if obj_dict.get(obj): obj_dict[obj](propkey, propvalue) # type: ignore[operator] else: name = propkey + "_pickled" try: rd_obj.SetProp(name, pickle.dumps(propvalue, 0).decode()) except (Exception, pickle.PicklingError): pass # ToDo: remove type ignore once mypy_path is enabled @overload def prop_from_rdmol(pl_obj: Bond, rd_obj: "RDKitBond") -> None: ... @overload def prop_from_rdmol(pl_obj: Atom, rd_obj: "RDKitAtom") -> None: ... @overload def prop_from_rdmol(pl_obj: Molecule, rd_obj: "RDKitMol") -> None: ... def prop_from_rdmol(pl_obj: Union[Molecule, Atom, Bond], rd_obj: Union["RDKitMol", "RDKitAtom", "RDKitBond"]) -> None: """ Convert one or more RDKit properties into PLAMS properties. :paramter pl_obj: A PLAMS object. :type pl_obj: |Molecule|, |Atom| or |Bond|. :parameter rd_obj: An RDKit object. :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond """ try: import dill as pickle except ImportError: import pickle prop_dict = rd_obj.GetPropsAsDict() for propname in prop_dict.keys(): if propname == "plams_pickled": plams_props = pickle.loads(prop_dict[propname].encode()) if not isinstance(plams_props, dict): raise Exception("PLAMS property not properly stored in RDKit") for key, value in plams_props.items(): pl_obj.properties[key] = value else: if propname == "__computedProps": continue if "_pickled" not in propname: pl_obj.properties.rdkit[propname] = prop_dict[propname] else: prop = prop_dict[propname] propname = propname.rsplit("_pickled", 1)[0] propvalue = pickle.loads(prop.encode()) pl_obj.properties.rdkit[propname] = propvalue @overload def from_smiles( smiles: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ... ) -> Molecule: ... @overload def from_smiles( smiles: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ... ) -> Union[Molecule, List[Molecule]]: ...
[docs]@requires_optional_package("rdkit") def from_smiles( smiles: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1 ) -> Union[Molecule, List[Molecule]]: """ Generates PLAMS molecule(s) from a smiles strings. :parameter str smiles: A smiles string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of conformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem smiles = str(smiles.split()[0]) smiles = Chem.CanonSmiles(smiles) rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) rdkit_mol.SetProp("smiles", smiles) return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
@overload def from_smarts( smarts: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ... ) -> Molecule: ... @overload def from_smarts( smarts: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ... ) -> Union[Molecule, List[Molecule]]: ...
[docs]@requires_optional_package("rdkit") def from_smarts( smarts: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1 ) -> Union[Molecule, List[Molecule]]: """ Generates PLAMS molecule(s) from a smarts strings. This allows for example to define hydrogens explicitly. However it is less suitable for aromatic molecules (use from_smiles in that case). :parameter str smarts: A smarts string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem smiles = str(smarts.split()[0]) mol = Chem.MolFromSmarts(smiles) Chem.SanitizeMol(mol) molecule = Chem.AddHs(mol) molecule.SetProp("smiles", smiles) return get_conformations(molecule, nconfs, name, forcefield, rms)
@overload @requires_optional_package("rdkit") def get_conformations( mol: Union[Molecule, "RDKitMol"], nconfs: Literal[1] = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = -1, enforceChirality: bool = False, useExpTorsionAnglePrefs: str = "default", constraint_ats: Optional[List[int]] = None, EmbedParameters: str = "EmbedParameters", randomSeed: int = 1, best_rms: float = -1, ) -> Molecule: ... @overload @requires_optional_package("rdkit") def get_conformations( mol: Union[Molecule, "RDKitMol"], nconfs: int = ..., name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = -1, enforceChirality: bool = False, useExpTorsionAnglePrefs: str = "default", constraint_ats: Optional[List[int]] = None, EmbedParameters: str = "EmbedParameters", randomSeed: int = 1, best_rms: float = -1, ) -> Union[List[Molecule], Molecule]: ...
[docs]@requires_optional_package("rdkit") def get_conformations( mol: Union[Molecule, "RDKitMol"], nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = -1, enforceChirality: bool = False, useExpTorsionAnglePrefs: str = "default", constraint_ats: Optional[List[int]] = None, EmbedParameters: str = "EmbedParameters", randomSeed: int = 1, best_rms: float = -1, ) -> Union[List[Molecule], Molecule]: """ Generates 3D conformation(s) for an rdkit_mol or a PLAMS Molecule :parameter mol: RDKit or PLAMS Molecule :type mol: rdkit.Chem.Mol or |Molecule| :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :parameter float best_rms: Root Mean Square deviation of best atomic permutation for removing similar/equivalent conformations. :parameter bool enforceChirality: Enforce the correct chirality if chiral centers are present :parameter str useExpTorsionAnglePrefs: Use experimental torsion angles preferences for the conformer generation by rdkit :parameter list constraint_ats: List of atom indices to be constrained :parameter str EmbedParameters: Name of RDKit EmbedParameters class ('EmbedParameters', 'ETKDG') :parameter int randomSeed: The seed for the random number generator. If set to None the generated conformers will be non-deterministic. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem from rdkit.Chem import AllChem if isinstance(mol, Molecule): if not mol.bonds: mol.guess_bonds() rdkit_mol = to_rdmol(mol, assignChirality=enforceChirality) else: rdkit_mol = mol def constrained_embedding( rdkit_mol: "RDKitMol", nconfs: int, param_obj: bool, template_mol: "RDKitMol", randomSeed: int ) -> List[int]: """ Use RDKit ConstrainedEmbed to add conformers to rdkit_mol (EmbedMultipleConfs does not constrain) Note: ConstrainedEmbed seems very sensitive to the randomseed supplied High seeds lead to badly constrained coordinates """ # ConstrainedEmbed overwrites conformers (does not add) # So I need to do that in a separate molecule, and add the resulting confs later. rdmol_orig = copy.deepcopy(rdkit_mol) if randomSeed: seed = randomSeed # Will crash with high seed if randomSeed > 1e9: seed = randomSeed - (int((randomSeed / 1e9)) * int(1e9)) random_ints = [i + seed for i in range(nconfs)] else: base = random.getrandbits(32) random_ints = [i + base for i in range(nconfs)] conformers = [] for seed in random_ints: rdmol_orig.RemoveAllConformers() rdmol_orig = AllChem.ConstrainedEmbed(rdmol_orig, template_mol, param_obj, randomseed=seed) conf = rdmol_orig.GetConformer(0) newconf = Chem.Conformer() for i, pos in enumerate(conf.GetPositions()): newconf.SetAtomPosition(i, pos) conformers.append(newconf) rdkit_mol.AddConformer(newconf, True) cids = [c.GetId() for c in rdkit_mol.GetConformers()] return cids def MMFFenergy(cid: int) -> float: ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid) # type: ignore[attr-defined] try: energy = ff.CalcEnergy() except: msg = ( "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + "\nNo geometry optimization was performed." ) warn(msg) energy = 1e9 return energy def UFFenergy(cid: int) -> float: ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid) # type: ignore[attr-defined] try: energy = ff.CalcEnergy() except: msg = ( "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + "\nNo geometry optimization was performed." ) warn(msg) energy = 1e9 return energy def remove_some_Hs(m: "RDKitMol") -> "RDKitMol": res = Chem.RWMol(m) c_hs = [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts("[#1;$([#1]-[#6])]"))] c_hs.sort(reverse=True) for aid in c_hs: res.RemoveAtom(aid) return res.GetMol() if name: rdkit_mol.SetProp("name", name) if best_rms > 0: if rms > 0: raise PlamsError("Cannot set both rms and best_rms") rms = best_rms # if enforceChirality : # # This is how chirality is enforced in the GUI. The argument is not passed to AllChem.EmbedMultipleConfs # Chem.AssignAtomChiralTagsFromStructure(rdkit_mol) # param_obj = AllChem.ETKDG() param_obj = getattr(AllChem, EmbedParameters)() param_obj.pruneRmsThresh = rms param_obj.enforceChirality = enforceChirality if useExpTorsionAnglePrefs != "default": # The default (True of False) changes with rdkit versions param_obj.useExpTorsionAnglePrefs = True if constraint_ats is not None: # Just adding the coordMap and using EmbedMultipleConfs does not seem to work (in this version) # coordMap = {} # for i, iat in enumerate(constraint_ats): # coordMap[iat] = rdkit_mol.GetConformer(0).GetAtomPosition(iat) # param_obj.coordMap = coordMap emol = Chem.RWMol(rdkit_mol) indices = [i for i in range(rdkit_mol.GetNumAtoms()) if not i in constraint_ats][::-1] for iat in indices: emol.RemoveAtom(iat) template_mol = emol.GetMol() else: param_obj.randomSeed = randomSeed if randomSeed is not None else random.getrandbits(31) try: if constraint_ats is not None: cids = constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed) else: param_obj.randomSeed = randomSeed if randomSeed is not None else random.getrandbits(31) cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj)) # type: ignore[attr-defined] except Exception: # ``useRandomCoords = True`` prevents (poorly documented) crash for large systems param_obj.useRandomCoords = True if constraint_ats is not None: cids = constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed) else: cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj)) # type: ignore[attr-defined] if len(cids) == 0: # Sometimes rdkit does not crash (for large systems), but simply doe snot create conformers param_obj.useRandomCoords = True if constraint_ats is not None: cids = constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed) else: cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj)) # type: ignore[attr-defined] if forcefield: # Select the forcefield (UFF or MMFF) optimize_molecule, energy = { "uff": [AllChem.UFFOptimizeMolecule, UFFenergy], # type: ignore[attr-defined] "mmff": [AllChem.MMFFOptimizeMolecule, MMFFenergy], # type: ignore[attr-defined] }[forcefield] # Optimize and sort conformations for cid in cids: optimize_molecule(rdkit_mol, confId=cid) cids.sort(key=energy) # Remove duplicate conformations based on RMS if best_rms > 0 or forcefield: rdmol_local = rdkit_mol rms_function = AllChem.AlignMol # type: ignore[attr-defined] if best_rms > 0: # Remove the H atoms, and prepare to use the more expensive RDKit function rdmol_local = remove_some_Hs(rdkit_mol) rms_function = AllChem.GetBestRMS # type: ignore[attr-defined] keep = [cids[0]] for cid in cids[1:]: for idx in keep: try: # r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx) r = rms_function(rdmol_local, rdmol_local, cid, idx) except Exception: r = rms + 1 message = "Alignment failed in multiple conformation generation: " message += Chem.MolToSmiles(rdkit_mol) message += "\nAssuming different conformations." warn(message) if r < rms: break else: keep.append(cid) cids = keep if nconfs == 1: return from_rdmol(rdkit_mol) else: return [from_rdmol(rdkit_mol, cid) for cid in cids]
@overload @requires_optional_package("rdkit") def from_sequence( sequence: str, nconfs: Literal[1] = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1, ) -> Molecule: ... @overload @requires_optional_package("rdkit") def from_sequence( sequence: str, nconfs: int, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1 ) -> Union[List[Molecule], Molecule]: ...
[docs]@requires_optional_package("rdkit") def from_sequence( sequence: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1 ) -> Union[List[Molecule], Molecule]: """ Generates PLAMS molecule from a peptide sequence. Includes explicit hydrogens and 3D coordinates. :parameter str sequence: A peptide sequence, e.g. 'HAG' :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A peptide molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ from rdkit import Chem rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence)) rdkit_mol.SetProp("sequence", sequence) return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
@requires_optional_package("rdkit") def calc_rmsd(mol1: Molecule, mol2: Molecule) -> float: """ Superimpose two molecules and calculate the root-mean-squared deviations of the atomic positions. The molecules should be identical, but the ordering of the atoms may differ. :param mol1: Molecule 1 :param mol2: Molecule 2 :return: The rmsd after superposition :rtype: float """ from rdkit.Chem import AllChem rdkit_mol1 = to_rdmol(mol1) rdkit_mol2 = to_rdmol(mol2) try: return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2) # type: ignore[attr-defined] except: return -999
[docs]@requires_optional_package("rdkit") def modify_atom(mol: Molecule, idx: int, element: str) -> Molecule: """ Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter int idx: index of the atom to be modified :parameter str element: :return: Molecule with new element and possibly added or removed hydrogens :rtype: |Molecule| """ from rdkit import Chem rdmol = to_rdmol(mol) if rdmol.GetAtomWithIdx(idx).GetSymbol() == element: return mol else: e = Chem.EditableMol(rdmol) for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()): if neighbor.GetSymbol() == "H": e.RemoveAtom(neighbor.GetIdx()) e.ReplaceAtom(idx - 1, Chem.Atom(element)) newmol = e.GetMol() Chem.SanitizeMol(newmol) newmol = Chem.AddHs(newmol, addCoords=True) return from_rdmol(newmol)
[docs]@requires_optional_package("rdkit") def apply_template(mol: Molecule, template: str) -> Molecule: """ Modifies bond orders in PLAMS molecule according template smiles structure. :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter str template: smiles string defining the correct chemical structure :return: Molecule with correct chemical structure and provided 3D coordinates :rtype: |Molecule| """ from rdkit import Chem rdmol = to_rdmol(mol, sanitize=False) template_mol = Chem.AddHs(Chem.MolFromSmiles(template)) newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol) return from_rdmol(newmol)
@overload @requires_optional_package("rdkit") def apply_reaction_smarts( mol: Union[Molecule, "RDKitMol"], reaction_smarts: str, complete: bool = False, forcefield: Optional[str] = None, return_rdmol: Literal[False] = False, ) -> Molecule: ... @overload @requires_optional_package("rdkit") def apply_reaction_smarts( mol: Union[Molecule, "RDKitMol"], reaction_smarts: str, complete: bool = False, forcefield: Optional[str] = None, return_rdmol: Literal[True] = ..., ) -> "RDKitMol": ... @overload @requires_optional_package("rdkit") def apply_reaction_smarts( mol: Union[Molecule, "RDKitMol"], reaction_smarts: str, complete: bool = False, forcefield: Optional[str] = None, return_rdmol: bool = ..., ) -> Union[Molecule, "RDKitMol"]: ...
[docs]@requires_optional_package("rdkit") def apply_reaction_smarts( mol: Union[Molecule, "RDKitMol"], reaction_smarts: str, complete: bool = False, forcefield: Optional[str] = None, return_rdmol: bool = False, ) -> Union[Molecule, "RDKitMol"]: """ Applies reaction smirks and returns product. If returned as a PLAMS molecule, thismolecule.properties.orig_atoms is a list of indices of atoms that have not been changed (which can for example be used partially optimize new atoms only with the freeze keyword) :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter str reactions_smarts: Reactions smarts to be applied to molecule :parameter complete: Apply reaction until no further changes occur or given fraction of reaction centers have been modified :type complete: bool or float (value between 0 and 1) :parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization of product structures. :type forcefield: str :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: (product molecule, list of unchanged atoms) :rtype: (|Molecule|, list of int) """ from rdkit import Chem from rdkit.Chem import AllChem def react(reactant: "RDKitMol", reaction: "RDKitReaction") -> List[Tuple["RDKitMol", Iterable[int]]]: """Apply reaction to reactant and return products""" ps = reaction.RunReactants([reactant]) # if reaction doesn't apply, return the reactant if len(ps) == 0: return [(reactant, range(reactant.GetNumAtoms()))] full = len(ps) while complete: # when complete is True # apply reaction until no further changes r = random.randint(0, len(ps) - 1) reactant = ps[r][0] ps = reaction.RunReactants([reactant]) if len(ps) == 0 or len(ps) / full < (1 - complete): ps = [[reactant]] break # add hydrogens and generate coordinates for new atoms products: List[Tuple["RDKitMol", Iterable[int]]] = [] for p in ps[0]: Chem.SanitizeMol(p) q = Chem.AddHs(p) Chem.SanitizeMol(q) u = gen_coords_rdmol(q) # These are the atoms that have not changed products.append((q, u)) return products mol = to_rdmol(mol) reaction = AllChem.ReactionFromSmarts(reaction_smarts) # type: ignore[attr-defined] # RDKit removes fragments that are disconnected from the reaction center # In order to keep these, the molecule is first split in separate fragments # and the results, including non-reacting parts, are re-combined afterwards frags = Chem.GetMolFrags(mol, asMols=True) product = Chem.Mol() unchanged = [] # List of atoms that have not changed for frag in frags: for p, u in react(frag, reaction): unchanged += [product.GetNumAtoms() + i for i in u] product = Chem.CombineMols(product, p) if forcefield: optimize_coordinates(product, forcefield, fixed=unchanged) # The molecule is returned together with a list of atom indices of the atoms # that are identical to those # in the reactants. This list can be used in subsequent partial optimization of the molecule if not return_rdmol: product_mol = from_rdmol(product) product_mol.properties.orig_atoms = [a + 1 for a in unchanged] return product_mol return product
def gen_coords(plamsmol: Molecule) -> List[int]: """Calculate 3D positions only for atoms without coordinates""" rdmol = to_rdmol(plamsmol) unchanged = gen_coords_rdmol(rdmol) conf = rdmol.GetConformer() for a in range(len(plamsmol.atoms)): pos = conf.GetAtomPosition(a) atom = plamsmol.atoms[a] atom._setx(pos.x) # type: ignore[attr-defined] atom._sety(pos.y) # type: ignore[attr-defined] atom._setz(pos.z) # type: ignore[attr-defined] return [a + 1 for a in unchanged] @requires_optional_package("rdkit") def gen_coords_rdmol(rdmol: "RDKitMol") -> List[int]: from rdkit.Chem import AllChem ref = rdmol.__copy__() conf = rdmol.GetConformer() coordDict = {} unchanged: List[int] = [] maps = [] # Put known coordinates in coordDict for i in range(rdmol.GetNumAtoms()): pos = conf.GetAtomPosition(i) if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and (-0.0001 < pos.z < 0.0001): continue # atom without coordinates coordDict[i] = pos unchanged.append(i) maps.append((i, i)) # compute coordinates for new atoms, keeping known coordinates rms = 1 rs = 1 # repeat embedding and alignment until the rms of mapped atoms is sufficiently small if rdmol.GetNumAtoms() > len(maps): while rms > 0.1: AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True) # type: ignore[attr-defined] # align new molecule to original coordinates rms = AllChem.AlignMol(rdmol, ref, atomMap=maps) # type: ignore[attr-defined] rs += 1 return unchanged @requires_optional_package("rdkit") def optimize_coordinates(rdkit_mol: "RDKitMol", forcefield: str, fixed: Sequence = []) -> None: from rdkit import Chem from rdkit.Chem import AllChem def MMFFminimize() -> None: ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol)) # type: ignore[attr-defined] for f in fixed: ff.AddFixedPoint(f) try: ff.Minimize() except: warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol)) def UFFminimize() -> None: ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True) # type: ignore[attr-defined] for f in fixed: ff.AddFixedPoint(f) try: ff.Minimize() except: warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol)) optimize_molecule = {"uff": UFFminimize, "mmff": MMFFminimize}[forcefield] Chem.SanitizeMol(rdkit_mol) optimize_molecule() return @requires_optional_package("rdkit") def write_molblock(plams_mol: Molecule, file: IO = sys.stdout) -> None: from rdkit import Chem file.write(Chem.MolToMolBlock(to_rdmol(plams_mol))) @overload @requires_optional_package("rdkit") def readpdb( pdb_file: Union[str, IO], sanitize: bool = True, removeHs: bool = False, proximityBonding: bool = False, return_rdmol: Literal[False] = False, ) -> Molecule: ... @overload @requires_optional_package("rdkit") def readpdb( pdb_file: Union[str, IO], sanitize: bool = True, removeHs: bool = False, proximityBonding: bool = False, return_rdmol: Literal[True] = ..., ) -> "RDKitMol": ... @overload @requires_optional_package("rdkit") def readpdb( pdb_file: Union[str, IO], sanitize: bool = True, removeHs: bool = False, proximityBonding: bool = False, return_rdmol: bool = ..., ) -> Union["RDKitMol", Molecule]: ...
[docs]@requires_optional_package("rdkit") def readpdb( pdb_file: Union[str, IO], sanitize: bool = True, removeHs: bool = False, proximityBonding: bool = False, return_rdmol: bool = False, ) -> Union["RDKitMol", Molecule]: """ Generate a molecule from a PDB file :param pdb_file: The PDB file to read :type pdb_file: path- or file-like :param bool sanitize: :param bool removeHs: Hydrogens are removed if True :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: The molecule :rtype: |Molecule| or rdkit.Chem.Mol """ from rdkit import Chem if isinstance(pdb_file, str): with open(pdb_file) as f: contents = f.read() else: contents = pdb_file.read() pdb_mol = Chem.MolFromPDBBlock(contents, sanitize=sanitize, removeHs=removeHs) return pdb_mol if return_rdmol else from_rdmol(pdb_mol)
[docs]@requires_optional_package("rdkit") def writepdb(mol: Union[Molecule, "RDKitMol"], pdb_file: Union[str, IO] = sys.stdout) -> None: """ Write a PDB file from a molecule :parameter mol: molecule to be exported to PDB :type mol: |Molecule| or rdkit.Chem.Mol :param pdb_file: The PDB file to write to, or a filename :type pdb_file: path- or file-like """ from rdkit import Chem mol = to_rdmol(mol, sanitize=False) if isinstance(pdb_file, str): with open(pdb_file, "w") as f: f.write(Chem.MolToPDBBlock(mol)) else: pdb_file.write(Chem.MolToPDBBlock(mol))
@overload @requires_optional_package("rdkit") def add_Hs( mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: Literal[False] = False ) -> Molecule: ... @overload @requires_optional_package("rdkit") def add_Hs( mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: Literal[True] = ... ) -> "RDKitMol": ... @overload @requires_optional_package("rdkit") def add_Hs( mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: bool = ... ) -> Union[Molecule, "RDKitMol"]: ...
[docs]@requires_optional_package("rdkit") def add_Hs( mol: Union[Molecule, "RDKitMol"], forcefield: Optional[str] = None, return_rdmol: bool = False ) -> Union[Molecule, "RDKitMol"]: """ Add hydrogens to protein molecules read from PDB. Makes sure that the hydrogens get the correct PDBResidue info. :param mol: Molecule to be protonated :type mol: |Molecule| or rdkit.Chem.Mol :param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization on new atoms. :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: A molecule with explicit hydrogens added :rtype: |Molecule| or rdkit.Chem.Mol """ from rdkit import Chem mol = to_rdmol(mol) retmol = Chem.AddHs(mol) for atom in retmol.GetAtoms(): if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H": bond = atom.GetBonds()[0] if bond.GetBeginAtom().GetIdx() == atom.GetIdx: connected_atom = bond.GetEndAtom() else: connected_atom = bond.GetBeginAtom() try: ResInfo = connected_atom.GetPDBResidueInfo() if ResInfo is None: continue # Segmentation faults are raised if ResInfo is None atom.SetMonomerInfo(ResInfo) atomname = "H" + atom.GetPDBResidueInfo().GetName()[1:] atom.GetPDBResidueInfo().SetName(atomname) except: pass unchanged = gen_coords_rdmol(retmol) if forcefield: optimize_coordinates(retmol, forcefield, fixed=unchanged) return retmol if return_rdmol else from_rdmol(retmol)
@requires_optional_package("rdkit") def add_fragment( rwmol: "RDKitMol", frag: "RDKitMol", rwmol_atom_idx: Optional[int] = None, frag_atom_idx: Optional[int] = None, bond_order: Optional[int] = None, ) -> None: from rdkit import Chem molconf = rwmol.GetConformer() fragconf = frag.GetConformer() new_indices = [] for a in frag.GetAtoms(): new_index = rwmol.AddAtom(a) new_indices.append(new_index) molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx())) for b in frag.GetBonds(): ba = b.GetBeginAtomIdx() ea = b.GetEndAtomIdx() rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType()) if bond_order and rwmol_atom_idx and frag_atom_idx: rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order]) rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0) @requires_optional_package("rdkit") def get_fragment( mol: "RDKitMol", indices: Sequence[int], incl_expl_Hs: bool = True, neutralize: bool = True ) -> "RDKitMol": from rdkit import Chem molconf = mol.GetConformer() fragment = Chem.RWMol(Chem.Mol()) fragconf = Chem.Conformer() # Put atoms in fragment for i in indices: atom = mol.GetAtomWithIdx(i) new_index = fragment.AddAtom(atom) pos = molconf.GetAtomPosition(i) fragconf.SetAtomPosition(new_index, pos) # Put bonds in fragment for b in mol.GetBonds(): ba = b.GetBeginAtomIdx() ea = b.GetEndAtomIdx() if ba in indices and ea in indices: fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType()) continue if not incl_expl_Hs: continue if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == "H": hi = fragment.AddAtom(mol.GetAtomWithIdx(ea)) fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea)) fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE) continue if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == "H": hi = fragment.AddAtom(mol.GetAtomWithIdx(ba)) fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba)) fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE) ret_frag = fragment.GetMol() Chem.SanitizeMol(ret_frag) if neutralize: for atom in ret_frag.GetAtoms(): nrad = atom.GetNumRadicalElectrons() if nrad > 0: atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad) atom.SetNumRadicalElectrons(0) Chem.SanitizeMol(ret_frag) ret_frag.AddConformer(fragconf) return ret_frag @overload @requires_optional_package("rdkit") def partition_protein( mol: Union[Molecule, "RDKitMol"], residue_bonds: Optional[Iterable[Tuple[int, int]]] = None, split_heteroatoms: bool = True, return_rdmol: Literal[False] = False, ) -> Tuple[List[Molecule], List[Molecule]]: ... @overload @requires_optional_package("rdkit") def partition_protein( mol: Union[Molecule, "RDKitMol"], residue_bonds: Optional[Iterable[Tuple[int, int]]] = None, split_heteroatoms: bool = True, return_rdmol: Literal[True] = ..., ) -> Tuple[List["RDKitMol"], List["RDKitMol"]]: ... @overload @requires_optional_package("rdkit") def partition_protein( mol: Union[Molecule, "RDKitMol"], residue_bonds: Optional[Iterable[Tuple[int, int]]] = None, split_heteroatoms: bool = True, return_rdmol: bool = ..., ) -> Tuple[List[Union[Molecule, "RDKitMol"]], List[Union[Molecule, "RDKitMol"]]]: ...
[docs]@requires_optional_package("rdkit") def partition_protein( mol: Union[Molecule, "RDKitMol"], residue_bonds: Optional[Iterable[Tuple[int, int]]] = None, split_heteroatoms: bool = True, return_rdmol: bool = False, ) -> Tuple[Union[List[Molecule], List["RDKitMol"]], Union[List[Molecule], List["RDKitMol"]]]: """ Splits a protein molecule into capped amino acid fragments and caps. :param mol: A protein molecule :type mol: |Molecule| or rdkit.Chem.Mol :param tuple residue_bonds: a tuple of pairs of residue number indicating which peptide bonds to split. If none, split all peptide bonds. :param bool split_heteroatoms: if True, all bonds between a heteroatom and a non-heteroatom across residues are removed :return: list of fragments, list of caps """ from rdkit import Chem mol = to_rdmol(mol) caps = [] em = Chem.RWMol(mol) if split_heteroatoms: for bond in mol.GetBonds(): resinfa = bond.GetBeginAtom().GetPDBResidueInfo() resinfb = bond.GetEndAtom().GetPDBResidueInfo() if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom(): if resinfa.GetResidueNumber() != resinfb.GetResidueNumber(): em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) # Split peptide bonds pept_bond = Chem.MolFromSmarts("[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)") for match in mol.GetSubstructMatches(pept_bond): if residue_bonds: resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber() resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber() if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds: continue cap = get_fragment(mol, match[0:5]) cap = add_Hs(cap, return_rdmol=True) caps.append(cap if return_rdmol else from_rdmol(cap)) cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4][CX3]=O")) cap_o = get_fragment(cap, cap_o_ind, neutralize=False) cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("O=[CX3][NX3][C;X4]"))[2:] cap_n = get_fragment(cap, cap_n_ind, neutralize=False) em.RemoveBond(match[1], match[3]) add_fragment(em, cap_o, match[3], 1, 1) add_fragment(em, cap_n, match[1], 0, 1) # Split disulfide bonds ss_bond = Chem.MolFromSmarts("[C;X4;H1,H2]SS[C;X4;H1,H2]") for match in mol.GetSubstructMatches(ss_bond): cap = get_fragment(mol, match[0:5]) cap = add_Hs(cap, return_rdmol=True) caps.append(cap if return_rdmol else from_rdmol(cap)) cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4]SS[C;X4]")) cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False) cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False) em.RemoveBond(match[1], match[2]) add_fragment(em, cap_s1, match[2], 1, 1) add_fragment(em, cap_s2, match[1], 0, 1) frags: List["RDKitMol"] = [f for f in Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False)] if not return_rdmol: return [from_rdmol(frag) for frag in frags], caps return frags, caps
@overload @requires_optional_package("rdkit") def charge_AAs(mol: Molecule, return_rdmol: Literal[False] = False) -> Molecule: ... @overload @requires_optional_package("rdkit") def charge_AAs(mol: Molecule, return_rdmol: Literal[True] = ...) -> Union[Molecule, "RDKitMol"]: ... @overload @requires_optional_package("rdkit") def charge_AAs(mol: Molecule, return_rdmol: bool = ...) -> Union[Molecule, "RDKitMol"]: ... @requires_optional_package("rdkit") def charge_AAs(mol: Molecule, return_rdmol: bool = False) -> Union[Molecule, "RDKitMol"]: from rdkit import Chem ionizations = {"ARG_NH2": 1, "LYS_NZ": 1, "GLU_OE2": -1, "ASP_OD2": -1} mol = to_rdmol(mol) for atom in mol.GetAtoms(): # type: ignore[attr-defined] resinfo = atom.GetPDBResidueInfo() res_atom = resinfo.GetResidueName() + "_" + resinfo.GetName().strip() try: atom.SetFormalCharge(ionizations[res_atom]) Chem.SanitizeMol(mol) except KeyError: pass Chem.SanitizeMol(mol) return mol if return_rdmol else from_rdmol(mol)
[docs]def get_backbone_atoms(mol: Union[Molecule, "RDKitMol"]) -> List[int]: """ Return a list of atom indices corresponding to the backbone atoms in a peptide molecule. This function assumes PDB information in properties.pdb_info of each atom, which is the case if the molecule is generated with the "readpdb" or "from_sequence" functions. :parameter mol: a peptide molecule :type mol: |Molecule| or rdkit.Chem.Mol :return: a list of atom indices :rtype: list """ if not isinstance(mol, Molecule): mol = from_rdmol(mol) backbone = ["N", "CA", "C", "O"] return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]
[docs]@requires_optional_package("rdkit") def get_substructure( mol: Molecule, func_list: Sequence[Union[str, Molecule, "RDKitMol"]] ) -> Dict[Union[str, Molecule, "RDKitMol"], Tuple[Atom, ...]]: """ Search for functional groups within a molecule based on a list of reference functional groups. SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list". Example: .. code:: python >>> mol = from_smiles('OCCO') # Ethylene glycol >>> func_list = ['[H]O', 'C[N+]', 'O=PO'] >>> get_substructure(mol, func_list) {'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>, <scm.plams.mol.atom.Atom at 0x1251836a0>), (<scm.plams.mol.atom.Atom at 0x125183550>, <scm.plams.mol.atom.Atom at 0x125183240>)]} :parameter mol: A PLAMS molecule. :type mol: |Molecule| :parameter list func_list: A list of functional groups. Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules. :return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples with matching PLAMS |Atom| as values. """ from rdkit import Chem def _to_rdmol(functional_group: Union[str, Molecule, "RDKitMol"]) -> "RDKitMol": """Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule.""" if isinstance(functional_group, str): # RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled sanitize = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS ret = Chem.MolFromSmiles(functional_group, sanitize=False) Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize) return ret elif isinstance(functional_group, Molecule): return to_rdmol(functional_group) elif isinstance(functional_group, Chem.Mol): return functional_group raise TypeError( "get_substructure: " + str(type(functional_group)) + " is not a supported \ object type" ) def _get_match( mol: Molecule, rdmol: "RDKitMol", functional_group: Union[str, Molecule, "RDKitMol"] ) -> Union[List[Tuple[Atom]], bool]: """Perform a substructure match on "mol". If a match is found, return a list of n-tuples consisting PLAMS |Atom|. Otherwise return False.""" matches = rdmol.GetSubstructMatches(functional_group) # type: ignore[arg-type] if matches: return [tuple(mol[j + 1] for j in idx_tup) for idx_tup in matches] return False rdmol = to_rdmol(mol) rdmol_func_list = [_to_rdmol(i) for i in func_list] gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list) return {key: value for key, value in zip(func_list, gen) if value} # type: ignore[misc]
[docs]def yield_coords(rdmol: "RDKitMol", id: int = -1) -> Generator[Tuple[float, float, float], None, None]: """Take an rdkit molecule and yield its coordinates as 3-tuples. .. code-block:: python >>> from scm.plams import yield_coords >>> from rdkit import Chem >>> rdmol = Chem.Mol(...) # e.g. Methane >>> for xyz in yield_coords(rdmol): ... print(xyz) (-0.0, -0.0, -0.0) (0.6405, 0.6405, -0.6405) (0.6405, -0.6405, 0.6405) (-0.6405, 0.6405, 0.6405) (-0.6405, -0.6405, -0.6405) The iterator produced by this function can, for example, be passed to :meth:`Molecule.from_array()<scm.plams.mol.molecule.Molecule.from_array>` the update the coordinates of a PLAMS Molecule in-place. .. code-block:: python >>> from scm.plams import Molecule >>> mol = Molecule(...) >>> xyz_iterator = yield_coords(rdmol) >>> mol.from_array(xyz_iterator) :parameter rdmol: An RDKit mol. :type rdmol: rdkit.Chem.Mol :parameter int id: The ID of the desired conformer. :return: An iterator yielding 3-tuples with *rdmol*'s Cartesian coordinates. :rtype: iterator """ conf = rdmol.GetConformer(id=id) for atom in rdmol.GetAtoms(): pos = conf.GetAtomPosition(atom.GetIdx()) yield (pos.x, pos.y, pos.z)
@overload @requires_optional_package("rdkit") def canonicalize_mol(mol: Molecule, inplace: Literal[True], **kwargs: Any) -> None: ... @overload @requires_optional_package("rdkit") def canonicalize_mol(mol: Molecule, inplace: Literal[False] = False, **kwargs: Any) -> Molecule: ...
[docs]@requires_optional_package("rdkit") def canonicalize_mol(mol: Molecule, inplace: bool = False, **kwargs: Any) -> Optional[Molecule]: r"""Take a PLAMS molecule and sort its atoms based on their canonical rank. Example: .. code:: python >>> from scm.plams import Molecule, canonicalize_mol # Methane >>> mol: Molecule = ... >>> print(mol) Atoms: 1 H 0.640510 0.640510 -0.640510 2 H 0.640510 -0.640510 0.640510 3 C 0.000000 0.000000 0.000000 4 H -0.640510 0.640510 0.640510 5 H -0.640510 -0.640510 -0.640510 >>> print(canonicalize_mol(mol)) Atoms: 1 C 0.000000 0.000000 0.000000 2 H -0.640510 -0.640510 -0.640510 3 H -0.640510 0.640510 0.640510 4 H 0.640510 -0.640510 0.640510 5 H 0.640510 0.640510 -0.640510 :parameter mol: The to-be canonicalized molecule. :type mol: |Molecule| :parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule. :parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_. :return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``. :rtype: None or |Molecule| .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms """ from rdkit import Chem if not isinstance(mol, Molecule): raise TypeError("`mol` expected a plams Molecule") rdmol = to_rdmol(mol) idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs) if inplace: mol.atoms = [at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)] return None else: ret = mol.copy() ret.atoms = [at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)] return ret
[docs]@requires_optional_package("rdkit") @requires_optional_package("PIL") def to_image( mol: Molecule, remove_hydrogens: bool = True, filename: Optional[str] = None, fmt: str = "svg", size: Sequence[int] = (200, 100), as_string: bool = True, ) -> Union[str, bytes, "Image.Image"]: """ Convert single molecule to single image object * ``mol`` -- PLAMS Molecule object * ``remove_hydrogens`` -- Wether or not to remove the H-atoms from the image * ``filename`` -- Optional: Name of image file to be created. * ``fmt`` -- One of "svg", "png", "eps", "pdf", "jpeg" * ``size`` -- Tuple/list containing width and height of image in pixels. * ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format will be returned, which can be either a PIL image or SVG text We do this because after converting a PIL image to a bytestring it is not possible to further edit it (with our version of PIL). * Returns -- Image text file / binary of image text file / PIL Image object. """ from io import BytesIO from PIL import Image from rdkit.Chem import Draw from rdkit.Chem.Draw import rdMolDraw2D from rdkit.Chem.Draw import MolsToGridImage extensions = ["svg", "png", "eps", "pdf", "jpeg"] classes: Dict[str, Any] = {} classes["svg"] = _MolsToGridSVG for ext in extensions[1:]: classes[ext] = None # PNG can only be created in this way with later version of RDKit if hasattr(rdMolDraw2D, "MolDraw2DCairo"): for ext in extensions[1:]: classes[ext] = MolsToGridImage # Determine the type of image file if filename is not None: if "." in filename: extension = filename.split(".")[-1] if extension in classes.keys(): fmt = extension else: msg = [f"Image type {extension} not available."] msg += [f"Available extensions are: {' '.join(extensions)}"] raise Exception("\n".join(msg)) else: filename = ".".join([filename, fmt]) if fmt not in classes.keys(): raise Exception(f"Image type {fmt} not available.") rdmol = _rdmol_for_image(mol, remove_hydrogens) # Draw the image if classes[fmt.lower()] is None: # With AMS version of RDKit MolsToGridImage fails for eps (because of paste) img = Draw.MolToImage(rdmol, size=size) buf = BytesIO() img.save(buf, format=fmt) img_text = buf.getvalue() else: # This fails for a C=C=O molecule, with AMS rdkit version img = classes[fmt.lower()]([rdmol], molsPerRow=1, subImgSize=size) img_text = img if isinstance(img, Image.Image): buf = BytesIO() img.save(buf, format=fmt) img_text = buf.getvalue() # If I do not make this correction to the SVG text, it is not readable in JupyterLab if fmt.lower() == "svg" and isinstance(img_text, str): img_text = _correct_svg(img_text) # Write to file, if required if filename is not None: mode = "w" if isinstance(img_text, bytes): mode = "wb" with open(filename, mode) as outfile: outfile.write(img_text) if as_string: img = img_text return img
[docs]@requires_optional_package("rdkit") @requires_optional_package("PIL") def get_reaction_image( reactants: Sequence[Molecule], products: Sequence[Molecule], filename: Optional[str] = None, fmt: str = "svg", size: Sequence[int] = (200, 100), as_string: bool = True, ) -> Union[str, bytes, "Image.Image"]: """ Create a 2D reaction image from reactants and products (PLAMS molecules) * ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants. * ``products`` -- Iterable of PLAMS Molecule objects representing the products. * ``filename`` -- Optional: Name of image file to be created. * ``fmt`` -- The format of the image (svg, png, eps, pdf, jpeg). The extension in the filename, if provided, takes precedence. * ``size`` -- Tuple/list containing width and height of image in pixels. * ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format will be returned, which can be either a PIL image or SVG text * Returns -- SVG image text file. """ extensions = ["svg", "png", "eps", "pdf", "jpeg"] # Determine the type of image file if filename is not None: if "." in filename: extension = filename.split(".")[-1] if extension in extensions: fmt = extension else: msg = [f"Image type {extension} not available."] msg += [f"Available extensions are: {' '.join(extensions)}"] raise Exception("\n".join(msg)) else: filename = ".".join([filename, fmt]) if fmt.lower() not in extensions: raise Exception(f"Image type {fmt} not available.") # Get the actual image width = size[0] height = size[1] img_text: Union[str, bytes, "Image.Image"] if fmt.lower() == "svg": img_text = _get_reaction_image_svg(reactants, products, width, height) else: img_text = _get_reaction_image_pil(reactants, products, fmt, width, height, as_string=as_string) # Write to file, if required if filename is not None: mode = "w" if isinstance(img_text, bytes): mode = "wb" with open(filename, mode) as outfile: outfile.write(img_text) return img_text
def _get_reaction_image_svg( reactants: Sequence[Molecule], products: Sequence[Molecule], width: int = 200, height: int = 100 ) -> str: """ Create a 2D reaction image from reactants and products (PLAMS molecules) * ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants. * ``products`` -- Iterable of PLAMS Molecule objects representing the products. * Returns -- SVG image text file. """ from rdkit import Chem def svg_arrow(x1: float, y1: float, x2: float, y2: float, prefix: str = "") -> List[str]: """ The reaction arrow in html format """ # The arrow head l = ['<%sdefs> <%smarker id="arrow" viewBox="0 0 10 10" refX="5" refY="5" ' % (prefix, prefix)] l += ['markerWidth="6" markerHeight="6" '] l += ['orient="auto-start-reverse"> '] l += ['<%spath d="M 0 0 L 10 5 L 0 10 z" /></%smarker></%sdefs>' % (prefix, prefix, prefix)] arrow = "".join(l) # The line l = ['<%sline x1="%i" y1="%i" x2="%i" y2="%i" ' % (prefix, x1, y1, x2, y2)] l += ['stroke="black" marker-end="url(#arrow)" />'] line = "".join(l) return [arrow, line] def add_plus_signs_svg( img_text: str, width: float, height: float, nmols: int, nreactants: int, prefix: str = "" ) -> str: """ Add the lines with + signs to the SVG image """ y = int(0.55 * height) t = [] for i in range(nmols - 1): x = int(((i + 1) * width) - (0.1 * width)) if i + 1 in (nreactants, nreactants + 1): continue t += ['<%stext x="%i" y="%i" font-size="16">+</%stext>' % (prefix, x, y, prefix)] lines = img_text.split("\n") lines = lines[:-2] + t + lines[-2:] return "\n".join(lines) def add_arrow_svg(img_text: str, width: float, height: float, nreactants: int, prefix: str = "") -> str: """ Add the arrow to the SVG image """ y = int(0.5 * height) x1 = int((nreactants * width) + (0.3 * width)) x2 = int((nreactants * width) + (0.7 * width)) t = svg_arrow(x1, y, x2, y, prefix) lines = img_text.split("\n") lines = lines[:-2] + t + lines[-2:] return "\n".join(lines) # Get the rdkit molecules rdmols = [_rdmol_for_image(mol) for mol in reactants] rdmols += [Chem.Mol()] # This is where the arrow will go rdmols += [_rdmol_for_image(mol) for mol in products] nmols = len(rdmols) # Place the molecules in a row of images kwargs = {"legendFontSize": 16} # ,"legendFraction":0.1} img_text = _MolsToGridSVG(rdmols, molsPerRow=nmols, subImgSize=(width, height), **kwargs) # type: ignore[arg-type] img_text = _correct_svg(img_text) # Add + and => nreactants = len(reactants) img_text = add_plus_signs_svg(img_text, width, height, nmols, nreactants) img_text = add_arrow_svg(img_text, width, height, nreactants) return img_text def _get_reaction_image_pil( reactants: Sequence[Molecule], products: Sequence[Molecule], fmt: str, width: int = 200, height: int = 100, as_string: bool = True, ) -> Union[bytes, "Image.Image"]: """ Create a 2D reaction image from reactants and products (PLAMS molecules) * ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants. * ``products`` -- Iterable of PLAMS Molecule objects representing the products. * Returns -- SVG image text file. """ from io import BytesIO from PIL import Image from PIL import ImageDraw from rdkit import Chem from rdkit.Chem.Draw import rdMolDraw2D, MolsToGridImage def add_arrow_pil(img: "Image.Image", width: float, height: float, nreactants: int) -> "Image.Image": """ Add the arrow to the PIL image """ y1 = int(0.5 * height) y2 = y1 x1 = int((nreactants * width) + (0.3 * width)) x2 = int((nreactants * width) + (0.7 * width)) # Draw a line black = (0, 0, 0) draw = ImageDraw.Draw(img) draw.line(((x1, y1), (x2, y2)), fill=128) # Draw the arrow head headscale = 20 xshift = width // headscale yshift = img.size[1] // headscale p1 = (x2, y2 + yshift) p2 = (x2, y2 - yshift) p3 = (x2 + xshift, y2) draw.polygon((p1, p2, p3), fill=black) return img def add_plus_signs_pil( img: "Image.Image", width: float, height: float, nmols: int, nreactants: int ) -> "Image.Image": """ Add the lines with + signs to the SVG image """ black = (0, 0, 0) I1 = ImageDraw.Draw(img) y = int(0.5 * height) # myfont = ImageFont.truetype('FreeMono.ttf', 25) for i in range(nmols): x = int(((i + 1) * width) - (0.05 * width)) if i + 1 in (nreactants, nreactants + 1): continue # I1.text((x, y), "+", font=myfont, fill=black) I1.text((x, y), "+", fill=black) return img def join_pil_images(pil_images: List["Image.Image"]) -> "Image.Image": """ Create a new image which connects the ones above with text """ white = (255, 255, 255) widths = [img.width for img in pil_images] width = sum(widths) height = max([img.height for img in pil_images]) final_img = Image.new("RGB", (width, height), white) # Concatenate the PIL images for i, img in enumerate(pil_images): pos = sum(widths[:i]) h = int((height - img.height) / 2) final_img.paste(img, (pos, h)) return final_img nreactants = len(reactants) nmols = nreactants + len(products) if not hasattr(rdMolDraw2D, "MolDraw2DCairo"): # We are working with the old AMS version of RDKit white = (255, 255, 255) rimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(reactants)] pimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(products)] blanc = Image.new("RGB", (width, height), white) # Get the image (with arrow) nreactants = len(reactants) all_images = rimages + [blanc] + pimages # type: ignore[arg-type] img = join_pil_images(all_images) # type: ignore[arg-type] else: # We have a later version of RDKit that can regulate the font sizes rdmols = [_rdmol_for_image(mol) for mol in reactants] rdmols += [Chem.Mol()] # This is where the arrow will go rdmols += [_rdmol_for_image(mol) for mol in products] # Place the molecules in a row of images subimg_size = [width, height] kwargs = {"legendFontSize": 16} # ,"legendFraction":0.1} img = MolsToGridImage(rdmols, molsPerRow=nmols + 1, subImgSize=subimg_size, **kwargs) # Add + and => img = add_plus_signs_pil(img, width, height, nmols, nreactants) img = add_arrow_pil(img, width, height, nreactants) # Get the img/bytestring if as_string: buf = BytesIO() img.save(buf, format=fmt) img_bytes = buf.getvalue() return img_bytes else: return img def _correct_svg(image: str) -> str: """ Correct for a bug in the AMS rdkit created SVG file """ if not "svg:" in image: return image image = image.replace("svg:", "") lines = image.split("\n") for iline, line in enumerate(lines): if "xmlns:svg=" in line: lines[iline] = line.replace("xmlns:svg", "xmlns") break image = "\n".join(lines) return image def _presanitize(mol: Molecule, rdmol: "RDKitMol") -> "RDKitMol": """ Change bonding and atom charges to avoid failed sanitization Note: Used by to_rdmol """ from rdkit import Chem mol = mol.copy() for i in range(10): try: rdmol_test = copy.deepcopy(rdmol) Chem.SanitizeMol(rdmol_test) stored_exc = None break except ValueError as exc: stored_exc = exc text = repr(exc) # Fix the problem rdmol, bonds, charges = _kekulize(mol, rdmol, text) # print ("REB bonds charges: ",bonds, charges) # print (rdmol.Debug()) # rdmol = to_rdmol(mol, sanitize=False) # print (rdmol.Debug()) if stored_exc is not None: raise stored_exc else: rdmol = rdmol_test return rdmol def _kekulize( mol: Molecule, rdmol: "RDKitMol", text: str, use_dfs: bool = True ) -> Tuple["RDKitMol", Dict[Tuple[int, int], int], Dict[int, int]]: """ Kekulize the atoms indicated as problematic by RDKit * ``mol`` - PLAMS molecule * ``text`` - Sanitation error produced by RDKit Note: Returns the changes in bond orders and atomic charges, that will make sanitation succeed. """ from rdkit import Chem # Find the indices of the problematic atoms indices = _find_aromatic_sequence(rdmol, text) if indices is None: return rdmol, {}, {} # Set the bond orders along the chain to 2, 1, 2, 1,... altered_bonds: Dict[Tuple[int, int], int] = {} if len(indices) > 1: emol = Chem.RWMol(rdmol) if use_dfs: # It may be better to create the tree first, and then start at a leaf altered_bonds = _alter_aromatic_bonds(emol, indices[0]) indices = list(set([ind for tup in altered_bonds.keys() for ind in tup])) else: indices = _order_atom_indices(emol, indices) altered_bonds = _alter_bonds_along_chain(emol, indices) _adjust_atom_aromaticity(emol, altered_bonds) rdmol = emol.GetMol() # Adjust the PLAMS molecule as well for (iat, jat), order in altered_bonds.items(): for bond in mol.atoms[iat].bonds: if mol.index(bond.other_end(mol.atoms[iat])) - 1 == jat: bond.order = order break # If the atom at the chain end has the wrong bond order, give it a charge. altered_charge = {} atom_indices, dangling_bonds = _get_charged_atoms(rdmol, indices) if len(atom_indices) > 0: iat = atom_indices[0] ndangling = dangling_bonds[iat] altered_charge = _guess_atomic_charge(iat, ndangling, rdmol, mol) rdmol.GetAtomWithIdx(iat).SetFormalCharge(altered_charge[iat]) for k, v in altered_charge.items(): mol.atoms[k].properties.rdkit.charge = v return rdmol, altered_bonds, altered_charge def _find_aromatic_sequence(rdmol: "RDKitMol", text: str) -> Optional[List[int]]: """ Find the sequence of atoms with 1.5 bond orders """ indices: Optional[List[int]] = None lines = text.split("\n") line = lines[-1] if "Unkekulized atoms:" in line: text = line.split("Unkekulized atoms:")[-1].split("\\n")[0] if '"' in text: text = text.split('"')[0] indices = [int(w) for w in text.split()] if len(indices) > 1: return indices line = "atom %i marked aromatic" % (indices[0]) iat: Any if "marked aromatic" in line: iat = int(line.split("atom")[-1].split()[0]) indices = [iat] while iat is not None: icurrent = iat iat = None for bond in rdmol.GetAtomWithIdx(icurrent).GetBonds(): if str(bond.GetBondType()) == "AROMATIC": iat = bond.GetOtherAtomIdx(icurrent) if iat in indices: iat = None continue indices.append(iat) break elif "Explicit valence for atom" in line: iat = int(line.split("#")[-1].split()[0]) indices = [iat] return indices def _alter_aromatic_bonds( emol: "RDKitEdMol", iat: int, depth: int = 0, double_first: bool = False ) -> "OrderedDict[Tuple[int, int], int]": """ Switch all thearomitic bonds to single/double, starting at iat * ``emol`` -- RDKit EditableMol type, for which bond orders will be changed * ``iat`` -- Starting point for depth first search """ from rdkit import Chem from scm.plams import PeriodicTable as PT # Use OrderedDict, so that the leaves of the tree # will be at the end bonds_changed: OrderedDict[Tuple[int, int], int] = OrderedDict() at = emol.GetAtomWithIdx(iat) valence = PT.get_connectors(at.GetAtomicNum()) bonds = at.GetBonds() are_aromatic = [str(b.GetBondType()) == "AROMATIC" for b in bonds] int_orders = sum([b.GetBondTypeAsDouble() for i, b in enumerate(bonds) if not are_aromatic[i]]) numbonds = len([b for i, b in enumerate(bonds) if are_aromatic[i]]) valence -= int(int_orders) # Here I place the double bond first. # I could also start with a single bond instead. orders = [1 for i in range(numbonds)] if valence > numbonds: for i in range(min(numbonds, valence - numbonds)): if double_first: orders[i] = 2 else: orders[numbonds - i - 1] = 2 for i, bond in enumerate(bonds): jat = bond.GetOtherAtomIdx(iat) if are_aromatic[i]: pair: Tuple[int, int] = tuple(sorted([iat, jat])) order = orders.pop(0) bond.SetBondType(Chem.BondType(order)) bond.SetIsAromatic(False) # This is necessary with the newer RDKit bonds_changed[pair] = order d = _alter_aromatic_bonds(emol, jat, depth + 1) bonds_changed.update(d) return bonds_changed def _alter_bonds_along_chain(emol: "RDKitEdMol", indices: Sequence[int]) -> Dict[Tuple[int, int], int]: """ Along the chain of atoms (indices), alternate double and single bonds """ from scm.plams import PeriodicTable as PT from rdkit import Chem if len(indices) > 1: # The first bond order is set to 2, if aromic # Else it is flipped first_bond = emol.GetBondBetweenAtoms(indices[0], indices[1]) if str(first_bond.GetBondType()) == "AROMATIC": new_order = 2 else: new_order = first_bond.GetBondTypeAsDouble() new_order = (new_order % 2) + 1 # Unless this breaks valence rules. iat = indices[0] at = emol.GetAtomWithIdx(iat) valence = PT.get_connectors(at.GetAtomicNum()) orders = [b.GetBondTypeAsDouble() for b in at.GetBonds()] jat = indices[1] at_next = emol.GetAtomWithIdx(jat) valence_next = PT.get_connectors(at_next.GetAtomicNum()) orders_next = [b.GetBondTypeAsDouble() for b in at_next.GetBonds()] if sum(orders) > valence or sum(orders_next) > valence_next: new_order = 1 # Set the bond orders along the chain to 2, 1, 2, 1,... altered_bonds: Dict[Tuple[int, int], int] = {} for i, iat in enumerate(indices[:-1]): bond = emol.GetBondBetweenAtoms(iat, indices[i + 1]) bond.SetBondType(Chem.BondType(new_order)) bond.SetIsAromatic(False) # This is necessary with the newer RDKit versions altered_bonds[iat, indices[i + 1]] = new_order new_order = ((new_order) % 2) + 1 return altered_bonds def _order_atom_indices(rdmol: "RDKitMol", indices: Sequence[int]) -> List[int]: """ Order the atomic indices so that they are consecutive along a bonded chain """ # Order the indices, so that they are consecutive in the molecule start = 0 for i, iat in enumerate(indices): at = rdmol.GetAtomWithIdx(iat) neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()] relevant_neighbors = [jat for jat in neighbors if jat in indices] if len(relevant_neighbors) == 1: start = i break iat = indices[start] atoms = [iat] while 1: at = rdmol.GetAtomWithIdx(iat) neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()] relevant_neighbors = [jat for jat in neighbors if jat in indices and not jat in atoms] if len(relevant_neighbors) == 0: break iat = relevant_neighbors[0] atoms.append(iat) if len(atoms) < len(indices): raise Exception("The unkekulized atoms are not in a consecutive chain") indices = atoms return indices def _adjust_atom_aromaticity(emol: "RDKitEdMol", altered_bonds: Dict[Tuple[int, int], int]) -> None: """ Assign aromaticity to the atoms based on the new bond orders """ indices = list(set([ind for tup in altered_bonds.keys() for ind in tup])) for ind in indices: at = emol.GetAtomWithIdx(ind) if not at.GetIsAromatic(): continue # Only change this if we are sure the atom is not aromatic anymore aromatic = False for bond in at.GetBonds(): if str(bond.GetBondType()) == "AROMATIC": aromatic = True break if not aromatic: at.SetIsAromatic(False) def _get_charged_atoms(rdmol: "RDKitMol", indices: Sequence[int]) -> Tuple[List[int], Dict[int, int]]: """ Locate the atoms that need a charge """ from scm.plams import PeriodicTable as PT atom_indices: List[int] = [] dangling_bonds: Dict[int, int] = {} for iat in indices[::-1]: at = rdmol.GetAtomWithIdx(iat) valence = PT.get_connectors(at.GetAtomicNum()) bonds = at.GetBonds() orders = [b.GetBondTypeAsDouble() for b in bonds] ndangling = int(valence - sum(orders)) if ndangling != 0: dangling_bonds[iat] = ndangling atom_indices.append(iat) return atom_indices, dangling_bonds def _guess_atomic_charge(iat: int, ndangling: int, rdmol: "RDKitMol", mol: Molecule) -> Dict[int, int]: """ Guess the best atomic charge for atom iat """ # Get the total charge from atomic charges already set charges = [at.GetFormalCharge() for at in rdmol.GetAtoms()] totcharge = sum(charges) - charges[iat] # Here I hope that the estimated charge will be more reliable than the # actual (user defined) system charge, but am not sure est_charges = mol.guess_atomic_charges(adjust_to_systemcharge=False, depth=0) molcharge = int(sum(est_charges)) molcharge = molcharge - totcharge # Adjust the sign based on the estimated charge sign = ndangling / (abs(ndangling)) if molcharge != 0: sign = molcharge / (abs(molcharge)) elif est_charges[iat] != 0: sign = est_charges[iat] / abs(est_charges[iat]) # Then use the over/under valence with the new sign as the atomic charge charge = int(sign * abs(ndangling)) # Perhaps we tried this already if charge == charges[iat]: charge = -charge altered_charge = {iat: charge} return altered_charge def _rdmol_for_image(mol: Union[Molecule, "ChemicalSystem"], remove_hydrogens: bool = True) -> "RDKitMol": """ Convert PLAMS molecule to an RDKit molecule specifically for a 2D image """ from rdkit.Chem import AllChem from rdkit.Chem import RemoveHs if _has_scm_chemsys and isinstance(mol, ChemicalSystem): rdmol = mol.to_rdkit_mol() else: rdmol = to_rdmol(mol, presanitize=True) # Flatten the molecule AllChem.Compute2DCoords(rdmol) # type: ignore[attr-defined] # Remove the Hs only if there are carbon atoms in this system # Otherwise this will turn an OH radical into a water molecule. carbons = [i for i, at in enumerate(mol.atoms) if at.symbol in ["C", "Si"]] if remove_hydrogens and len(carbons) > 0: rdmol = RemoveHs(rdmol) else: for atom in rdmol.GetAtoms(): atom.SetNoImplicit(True) ids = [c.GetId() for c in rdmol.GetConformers()] for cid in ids: rdmol.RemoveConformer(cid) return rdmol def _MolsToGridSVG( mols: Sequence[Molecule], molsPerRow: int = 3, subImgSize: Tuple[int, int] = (200, 200), legends: Optional[Sequence[str]] = None, highlightAtomLists: Optional[Sequence[int]] = None, highlightBondLists: Optional[Sequence[int]] = None, drawOptions: Optional["Draw.MolDrawOptions"] = None, **kwargs: Any, ) -> str: """ Replaces the old version of this function in our RDKit for a more recent one, with more options """ from rdkit.Chem.Draw import rdMolDraw2D if legends is None: legends = [""] * len(mols) nRows = len(mols) // molsPerRow if len(mols) % molsPerRow: nRows += 1 fullSize = (molsPerRow * subImgSize[0], nRows * subImgSize[1]) d2d = rdMolDraw2D.MolDraw2DSVG(fullSize[0], fullSize[1], subImgSize[0], subImgSize[1]) if drawOptions is not None: d2d.SetDrawOptions(drawOptions) else: dops = d2d.drawOptions() for k, v in list(kwargs.items()): if hasattr(dops, k): setattr(dops, k, v) del kwargs[k] d2d.DrawMolecules( list(mols), legends=legends or None, highlightAtoms=highlightAtomLists or [], highlightBonds=highlightBondLists or [], **kwargs, ) d2d.FinishDrawing() res = d2d.GetDrawingText() return res