Source code for scm.plams.interfaces.molecule.rdkit

from typing import List, Literal, Optional, overload, TYPE_CHECKING, Sequence, Dict, Any
import random
import sys
import copy
from warnings import warn

from scm.plams.core.functions import add_to_class, log, requires_optional_package
from scm.plams.mol.atom import Atom
from scm.plams.mol.bond import Bond
from scm.plams.mol.molecule import Molecule
from scm.plams.core.errors import PlamsError

if TYPE_CHECKING:
    from rdkit import Mol as RDKitMol

__all__ = [
    "add_Hs",
    "apply_reaction_smarts",
    "apply_template",
    "gen_coords_rdmol",
    "get_backbone_atoms",
    "modify_atom",
    "to_rdmol",
    "from_rdmol",
    "from_sequence",
    "from_smiles",
    "from_smarts",
    "to_smiles",
    "partition_protein",
    "readpdb",
    "writepdb",
    "get_substructure",
    "get_conformations",
    "yield_coords",
    "canonicalize_mol",
    "to_image",
    "get_reaction_image",
]


[docs]@requires_optional_package("rdkit")
def from_rdmol(rdkit_mol: "RDKitMol", confid: int = -1, properties: bool = True) -> Molecule:
    """
    Translate an RDKit molecule into a PLAMS molecule type.
    RDKit properties will be unpickled if their name ends with '_pickled'.

    :parameter rdkit_mol: RDKit molecule
    :type rdkit_mol: rdkit.Chem.Mol
    :parameter int confid: conformer identifier from which to take coordinates
    :parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format.
    :return: a PLAMS molecule
    :rtype: |Molecule|
    """
    from rdkit import Chem

    if isinstance(rdkit_mol, Molecule):
        return rdkit_mol
    # Create PLAMS molecule
    plams_mol = Molecule()
    total_charge = 0
    try:
        Chem.Kekulize(rdkit_mol)
    except Exception:
        pass
    conf = rdkit_mol.GetConformer(id=confid)

    # Add atoms and assign properties to the PLAMS atom if *properties* = True
    for rd_atom in rdkit_mol.GetAtoms():
        pos = conf.GetAtomPosition(rd_atom.GetIdx())
        ch = rd_atom.GetFormalCharge()
        pl_atom = Atom(rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), rdkit={"charge": ch})
        if properties and rd_atom.GetPDBResidueInfo():
            pl_atom.properties.rdkit.pdb_info = get_PDBResidueInfo(rd_atom)
        plams_mol.add_atom(pl_atom)
        total_charge += ch

        # Check for R/S information
        stereo = str(rd_atom.GetChiralTag())
        if stereo == "CHI_TETRAHEDRAL_CCW":
            pl_atom.properties.rdkit.stereo = "counter-clockwise"
        elif stereo == "CHI_TETRAHEDRAL_CW":
            pl_atom.properties.rdkit.stereo = "clockwise"

    # Add bonds to the PLAMS molecule
    for bond in rdkit_mol.GetBonds():
        at1 = plams_mol.atoms[bond.GetBeginAtomIdx()]
        at2 = plams_mol.atoms[bond.GetEndAtomIdx()]
        plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble()))

        # Check for cis/trans information
        stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir())
        if stereo == "STEREOZ" or stereo == "STEREOCIS":
            plams_mol.bonds[-1].properties.rdkit.stereo = "Z"
        elif stereo == "STEREOE" or stereo == "STEREOTRANS":
            plams_mol.bonds[-1].properties.rdkit.stereo = "E"
        elif bond_dir == "ENDUPRIGHT":
            plams_mol.bonds[-1].properties.rdkit.stereo = "up"
        elif bond_dir == "ENDDOWNRIGHT":
            plams_mol.bonds[-1].properties.rdkit.stereo = "down"

    # Set charge and assign properties to PLAMS molecule and bonds if *properties* = True
    plams_mol.properties.charge = total_charge
    if properties:
        prop_from_rdmol(plams_mol, rdkit_mol)
        for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol):
            prop_from_rdmol(plams_atom, rd_atom)
        for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds):
            prop_from_rdmol(plams_bond, rd_bond)
    return plams_mol


[docs]@requires_optional_package("rdkit")
def to_rdmol(
    plams_mol: Molecule,
    sanitize: bool = True,
    properties: bool = True,
    assignChirality: bool = False,
    presanitize: bool = False,
) -> "RDKitMol":
    """
    Translate a PLAMS molecule into an RDKit molecule type.
    PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats,
    integers, floats nor strings, the resulting property names are appended with '_pickled'.

    :parameter plams_mol: A PLAMS molecule
    :parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization
    :parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format.
    :parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule.
    :parameter bool presanitize: Iteratively adjust bonding and atomic charges, to avoid failure of sanitization.
                                 Only relevant is sanitize is set to True.
    :type plams_mol: |Molecule|
    :return: an RDKit molecule
    :rtype: rdkit.Chem.Mol
    """
    from rdkit import Chem, Geometry

    if isinstance(plams_mol, Chem.Mol):
        return plams_mol
    # Create rdkit molecule
    e = Chem.EditableMol(Chem.Mol())

    # Add atoms and assign properties to the RDKit atom if *properties* = True
    for pl_atom in plams_mol.atoms:
        rd_atom = Chem.Atom(int(pl_atom.atnum))
        if "rdkit" in pl_atom.properties:
            if "charge" in pl_atom.properties.rdkit:
                rd_atom.SetFormalCharge(pl_atom.properties.rdkit.charge)
        if properties:
            if "rdkit" in pl_atom.properties:
                if "pdb_info" in pl_atom.properties.rdkit:
                    set_PDBresidueInfo(rd_atom, pl_atom.properties.rdkit.pdb_info)
                for prop in pl_atom.properties.rdkit:
                    if prop not in ("charge", "pdb_info", "stereo"):
                        prop_to_rdmol(rd_atom, prop, pl_atom.properties.rdkit.get(prop))
            prop_dic = {}
            for prop in pl_atom.properties:
                if prop != "rdkit":
                    prop_dic[prop] = pl_atom.properties.get(prop)
            if len(prop_dic) > 0:
                prop_to_rdmol(rd_atom, "plams", prop_dic)

        # Check for R/S information
        if pl_atom.properties.rdkit.stereo:
            stereo = pl_atom.properties.rdkit.stereo.lower()
            if stereo == "counter-clockwise":
                rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW)
            elif stereo == "clockwise":
                rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
        e.AddAtom(rd_atom)

    # Mapping of PLAMS bond orders to RDKit bond types:
    def plams_to_rd_bonds(bo):
        if 1.4 < bo < 1.6:
            return 12  # bond type for aromatic bond
        else:
            return int(bo)

    # Add bonds to the RDKit molecule
    for bond in plams_mol.bonds:
        a1 = plams_mol.atoms.index(bond.atom1)
        a2 = plams_mol.atoms.index(bond.atom2)
        e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order)))
    rdmol = e.GetMol()

    # Check for cis/trans information
    for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
        if pl_bond.properties.rdkit.stereo:
            stereo = pl_bond.properties.rdkit.stereo.lower()
            if stereo == "e" or stereo == "trans":
                rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE)
            elif stereo == "z" or stereo == "cis":
                rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ)
            elif stereo == "up":
                rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT)
            elif stereo == "down":
                rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT)

    # Assign properties to RDKit molecule and bonds if *properties* = True
    # All properties will be taken from 'rdkit' subsettings, except the molecular charge
    if properties:
        prop_dic = {}
        for prop in plams_mol.properties:
            if prop == "rdkit":
                for rdprop in plams_mol.properties.rdkit:
                    prop_to_rdmol(rdmol, rdprop, plams_mol.properties.rdkit.get(rdprop))
            else:
                # prop_dic[prop] = {'plams':plams_mol.properties.get(prop)}
                prop_dic[prop] = plams_mol.properties.get(prop)
        if len(prop_dic) > 0:
            prop_to_rdmol(rdmol, "plams", prop_dic)
        prop_dic = {}
        for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
            for prop in pl_bond.properties:
                if prop == "rdkit":
                    for rdprop in pl_bond.properties.rdkit:
                        if rdprop != "stereo":
                            prop_to_rdmol(rd_bond, rdprop, pl_bond.properties.rdkit.get(rdprop))
                else:
                    prop_dic[prop] = pl_bond.properties.get(prop)
        if len(prop_dic) > 0:
            prop_to_rdmol(rd_bond, "plams", prop_dic)

    if sanitize:
        try:
            if presanitize:
                rdmol = _presanitize(plams_mol, rdmol)
            else:
                Chem.SanitizeMol(rdmol)
        except ValueError as exc:
            log("RDKit Sanitization Error.")
            text = "Most likely this is a problem with the assigned bond orders: "
            text += "Use chemical insight to adjust them."
            log(text)
            log("Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.")
            raise exc
    conf = Chem.Conformer()
    for i, atom in enumerate(plams_mol.atoms):
        xyz = Geometry.Point3D(atom.x, atom.y, atom.z)
        conf.SetAtomPosition(i, xyz)
    rdmol.AddConformer(conf)
    # REB: Assign all stereochemistry, if it wasn't already there
    if assignChirality:
        Chem.rdmolops.AssignAtomChiralTagsFromStructure(rdmol, confId=conf.GetId(), replaceExistingTags=False)
        try:
            Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False)
        except AttributeError:
            pass
    return rdmol


[docs]@requires_optional_package("rdkit")
def to_smiles(plams_mol: Molecule, short_smiles: bool = True, **kwargs) -> str:
    """
    Returns the RDKit-generated SMILES string of a PLAMS molecule.

    Note: SMILES strings are generated based on the molecule's connectivity. If the input PLAMS molecule does not contain any bonds, "guessed bonds" will be used.

    :parameter plams_mol: A PLAMS |Molecule|
    :parameter bool short_smiles: whether or not to use some RDKit sanitization to get shorter smiles (e.g. for a water molecule, short_smiles=True -> "O", short_smiles=False -> [H]O[H])
    :parameter \**kwargs: With 'kwargs' you can provide extra optional parameters to the rdkit.Chem method 'MolToSmiles'. See the rdkit documentation for more info.

    :return: the SMILES string
    """
    from rdkit import Chem

    if len(plams_mol.bonds) > 0:
        mol_with_bonds = plams_mol
    else:
        mol_with_bonds = plams_mol.copy()
        mol_with_bonds.guess_bonds()

    rd_mol = to_rdmol(mol_with_bonds, sanitize=False)

    # This sanitization black magic is needed for getting the "short, nice and clean" SMILES string.
    # Without this, the SMILES string for water would be "[H]O[H]". With this is just "O"
    if short_smiles:
        s = Chem.rdmolops.SanitizeFlags
        rdkitSanitizeOptions = (
            s.SANITIZE_ADJUSTHS
            or s.SANITIZE_CLEANUP
            or s.SANITIZE_CLEANUPCHIRALITY
            or s.SANITIZE_FINDRADICALS
            or s.SANITIZE_PROPERTIES
            or s.SANITIZE_SETAROMATICITY
            or s.SANITIZE_SETCONJUGATION
            or s.SANITIZE_SETHYBRIDIZATION
            or s.SANITIZE_SYMMRINGS
        )
        Chem.rdmolops.AssignRadicals(rd_mol)
        rd_mol = Chem.rdmolops.RemoveHs(rd_mol, updateExplicitCount=True, sanitize=False)
        Chem.rdmolops.SanitizeMol(rd_mol, rdkitSanitizeOptions)
    smiles = Chem.MolToSmiles(rd_mol, **kwargs)
    return smiles


pdb_residue_info_items = [
    "AltLoc",
    "ChainId",
    "InsertionCode",
    "IsHeteroAtom",
    "Name",
    "Occupancy",
    "ResidueName",
    "ResidueNumber",
    "SecondaryStructure",
    "SegmentNumber",
    "SerialNumber",
    "TempFactor",
]
# 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized


def get_PDBResidueInfo(rdkit_atom):
    pdb_info = {}
    for item in pdb_residue_info_items:
        get_function = "Get" + item
        pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)()
    return pdb_info


@requires_optional_package("rdkit")
def set_PDBresidueInfo(rdkit_atom, pdb_info):
    from rdkit import Chem

    atom_pdb_residue_info = Chem.AtomPDBResidueInfo()
    for item, value in pdb_info.items():
        set_function = "Set" + item
        atom_pdb_residue_info.__getattribute__(set_function)(value)
    rdkit_atom.SetMonomerInfo(atom_pdb_residue_info)


def prop_to_rdmol(rd_obj, propkey, propvalue):
    """
    Convert a single PLAMS property into an RDKit property.

    :paramter pl_obj: A PLAMS object.
    :type pl_obj: |Molecule|, |Atom| or |Bond|.
    :parameter rd_obj: An RDKit object.
    :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
    :parameter str propkey: The |Settings| key of the PLAMS property.
    """
    try:
        import dill as pickle
    except ImportError:
        import pickle

    obj = type(propvalue)
    obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp}
    if obj_dict.get(obj):
        obj_dict[obj](propkey, propvalue)
    else:
        name = propkey + "_pickled"
        try:
            rd_obj.SetProp(name, pickle.dumps(propvalue, 0).decode())
        except (Exception, pickle.PicklingError):
            pass


def prop_from_rdmol(pl_obj, rd_obj):
    """
    Convert one or more RDKit properties into PLAMS properties.

    :paramter pl_obj: A PLAMS object.
    :type pl_obj: |Molecule|, |Atom| or |Bond|.
    :parameter rd_obj: An RDKit object.
    :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond
    """
    try:
        import dill as pickle
    except ImportError:
        import pickle

    prop_dict = rd_obj.GetPropsAsDict()
    for propname in prop_dict.keys():
        if propname == "plams_pickled":
            plams_props = pickle.loads(prop_dict[propname].encode())
            if not isinstance(plams_props, dict):
                raise Exception("PLAMS property not properly stored in RDKit")
            for key, value in plams_props.items():
                pl_obj.properties[key] = value
        else:
            if propname == "__computedProps":
                continue
            if "_pickled" not in propname:
                pl_obj.properties.rdkit[propname] = prop_dict[propname]
            else:
                prop = prop_dict[propname]
                propname = propname.rsplit("_pickled", 1)[0]
                propvalue = pickle.loads(prop.encode())
                pl_obj.properties.rdkit[propname] = propvalue


@overload
def from_smiles(
    smiles: str, nconfs: Literal[1] = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> Molecule: ...


@overload
def from_smiles(
    smiles: str, nconfs: int = ..., name: Optional[str] = ..., forcefield: Optional[str] = ..., rms: float = ...
) -> List[Molecule]: ...


[docs]@requires_optional_package("rdkit")
def from_smiles(
    smiles: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
):
    """
    Generates PLAMS molecule(s) from a smiles strings.

    :parameter str smiles: A smiles string
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization
        and ranking of conformations. The default value None results in skipping of the
        geometry optimization step.
    :parameter float rms: Root Mean Square deviation threshold for
        removing similar/equivalent conformations
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    from rdkit import Chem

    smiles = str(smiles.split()[0])
    smiles = Chem.CanonSmiles(smiles)
    rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
    rdkit_mol.SetProp("smiles", smiles)
    return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)


[docs]@requires_optional_package("rdkit")
def from_smarts(
    smarts: str, nconfs: int = 1, name: Optional[str] = None, forcefield: Optional[str] = None, rms: float = 0.1
):
    """
    Generates PLAMS molecule(s) from a smarts strings.
    This allows for example to define hydrogens explicitly.
    However it is less suitable for aromatic molecules (use from_smiles in that case).

    :parameter str smarts: A smarts string
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
        optimization and ranking of comformations. The default value None results
        in skipping of the geometry optimization step.
    :parameter float rms: Root Mean Square deviation threshold for removing
        similar/equivalent conformations.
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    from rdkit import Chem

    smiles = str(smarts.split()[0])
    mol = Chem.MolFromSmarts(smiles)
    Chem.SanitizeMol(mol)
    molecule = Chem.AddHs(mol)
    molecule.SetProp("smiles", smiles)
    return get_conformations(molecule, nconfs, name, forcefield, rms)


[docs]@requires_optional_package("rdkit")
def get_conformations(
    mol,
    nconfs=1,
    name=None,
    forcefield=None,
    rms=-1,
    enforceChirality=False,
    useExpTorsionAnglePrefs="default",
    constraint_ats=None,
    EmbedParameters="EmbedParameters",
    randomSeed=1,
    best_rms=-1,
):
    """
    Generates 3D conformation(s) for an rdkit_mol or a PLAMS Molecule

    :parameter mol: RDKit or PLAMS Molecule
    :type mol: rdkit.Chem.Mol or |Molecule|
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
        optimization and ranking of comformations. The default value None results
        in skipping of the geometry optimization step
    :parameter float rms: Root Mean Square deviation threshold for removing
        similar/equivalent conformations.
    :parameter float best_rms: Root Mean Square deviation of best atomic permutation for removing
        similar/equivalent conformations.
    :parameter bool enforceChirality: Enforce the correct chirality if chiral centers are present
    :parameter str useExpTorsionAnglePrefs: Use experimental torsion angles preferences for the conformer generation by rdkit
    :parameter list constraint_ats: List of atom indices to be constrained
    :parameter str EmbedParameters: Name of RDKit EmbedParameters class ('EmbedParameters', 'ETKDG')
    :parameter int randomSeed: The seed for the random number generator. If set to None the generated conformers will be non-deterministic.
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    from rdkit import Chem
    from rdkit.Chem import AllChem

    if isinstance(mol, Molecule):
        if not mol.bonds:
            mol.guess_bonds()
        rdkit_mol = to_rdmol(mol, assignChirality=enforceChirality)
    else:
        rdkit_mol = mol

    def constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed):
        """
        Use RDKit ConstrainedEmbed to add conformers to rdkit_mol (EmbedMultipleConfs does not constrain)

        Note: ConstrainedEmbed seems very sensitive to the randomseed supplied
              High seeds lead to badly constrained coordinates
        """
        # ConstrainedEmbed overwrites conformers (does not add)
        # So I need to do that in a separate molecule, and add the resulting confs later.
        rdmol_orig = copy.deepcopy(rdkit_mol)
        if randomSeed:
            seed = randomSeed
            # Will crash with high seed
            if randomSeed > 1e9:
                seed = randomSeed - (int((randomSeed / 1e9)) * int(1e9))
            random_ints = [i + seed for i in range(nconfs)]
        else:
            base = random.getrandbits(32)
            random_ints = [i + base for i in range(nconfs)]
        conformers = []
        for seed in random_ints:
            rdmol_orig.RemoveAllConformers()
            rdmol_orig = AllChem.ConstrainedEmbed(rdmol_orig, template_mol, param_obj, randomseed=seed)
            conf = rdmol_orig.GetConformer(0)
            newconf = Chem.Conformer()
            for i, pos in enumerate(conf.GetPositions()):
                newconf.SetAtomPosition(i, pos)
            conformers.append(newconf)
            rdkit_mol.AddConformer(newconf, True)
            cids = [c.GetId() for c in rdkit_mol.GetConformers()]
        return cids

    def MMFFenergy(cid):
        ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = (
                "MMFF energy calculation failed for molecule: "
                + Chem.MolToSmiles(rdkit_mol)
                + "\nNo geometry optimization was performed."
            )
            warn(msg)
            energy = 1e9
        return energy

    def UFFenergy(cid):
        ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = (
                "MMFF energy calculation failed for molecule: "
                + Chem.MolToSmiles(rdkit_mol)
                + "\nNo geometry optimization was performed."
            )
            warn(msg)
            energy = 1e9
        return energy

    def remove_some_Hs(m):
        res = Chem.RWMol(m)
        c_hs = [x[0] for x in m.GetSubstructMatches(Chem.MolFromSmarts("[#1;$([#1]-[#6])]"))]
        c_hs.sort(reverse=True)
        for aid in c_hs:
            res.RemoveAtom(aid)
        return res.GetMol()

    if name:
        rdkit_mol.SetProp("name", name)

    if best_rms > 0:
        if rms > 0:
            raise PlamsError("Cannot set both rms and best_rms")
        rms = best_rms

    # if enforceChirality :
    #    # This is how chirality is enforced in the GUI. The argument is not passed to AllChem.EmbedMultipleConfs
    #    Chem.AssignAtomChiralTagsFromStructure(rdkit_mol)
    # param_obj = AllChem.ETKDG()
    param_obj = getattr(AllChem, EmbedParameters)()
    param_obj.pruneRmsThresh = rms
    param_obj.enforceChirality = enforceChirality
    if useExpTorsionAnglePrefs != "default":  # The default (True of False) changes with rdkit versions
        param_obj.useExpTorsionAnglePrefs = True
    if constraint_ats is not None:
        # Just adding the coordMap and using EmbedMultipleConfs does not seem to work (in this version)
        # coordMap = {}
        # for i, iat in enumerate(constraint_ats):
        #     coordMap[iat] = rdkit_mol.GetConformer(0).GetAtomPosition(iat)
        # param_obj.coordMap = coordMap
        emol = Chem.RWMol(rdkit_mol)
        indices = [i for i in range(rdkit_mol.GetNumAtoms()) if not i in constraint_ats][::-1]
        for iat in indices:
            emol.RemoveAtom(iat)
        template_mol = emol.GetMol()
    else:
        param_obj.randomSeed = randomSeed if randomSeed is not None else random.getrandbits(31)
    try:
        if constraint_ats is not None:
            cids = constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed)
        else:
            param_obj.randomSeed = randomSeed if randomSeed is not None else random.getrandbits(31)
            cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj))
    except Exception:
        # ``useRandomCoords = True`` prevents (poorly documented) crash for large systems
        param_obj.useRandomCoords = True
        if constraint_ats is not None:
            cids = constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed)
        else:
            cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj))
    if len(cids) == 0:
        # Sometimes rdkit does not crash (for large systems), but simply doe snot create conformers
        param_obj.useRandomCoords = True
        if constraint_ats is not None:
            cids = constrained_embedding(rdkit_mol, nconfs, param_obj, template_mol, randomSeed)
        else:
            cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, param_obj))

    if forcefield:
        # Select the forcefield (UFF or MMFF)
        optimize_molecule, energy = {
            "uff": [AllChem.UFFOptimizeMolecule, UFFenergy],
            "mmff": [AllChem.MMFFOptimizeMolecule, MMFFenergy],
        }[forcefield]

        # Optimize and sort conformations
        for cid in cids:
            optimize_molecule(rdkit_mol, confId=cid)
        cids.sort(key=energy)

    # Remove duplicate conformations based on RMS
    if best_rms > 0 or forcefield:
        rdmol_local = rdkit_mol
        rms_function = AllChem.AlignMol
        if best_rms > 0:
            # Remove the H atoms, and prepare to use the more expensive RDKit function
            rdmol_local = remove_some_Hs(rdkit_mol)
            rms_function = AllChem.GetBestRMS
        keep = [cids[0]]
        for cid in cids[1:]:
            for idx in keep:
                try:
                    # r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx)
                    r = rms_function(rdmol_local, rdmol_local, cid, idx)
                except Exception:
                    r = rms + 1
                    message = "Alignment failed in multiple conformation generation: "
                    message += Chem.MolToSmiles(rdkit_mol)
                    message += "\nAssuming different conformations."
                    warn(message)
                if r < rms:
                    break
            else:
                keep.append(cid)
        cids = keep

    if nconfs == 1:
        return from_rdmol(rdkit_mol)
    else:
        return [from_rdmol(rdkit_mol, cid) for cid in cids]


[docs]@requires_optional_package("rdkit")
def from_sequence(sequence, nconfs=1, name=None, forcefield=None, rms=0.1):
    """
    Generates PLAMS molecule from a peptide sequence.
    Includes explicit hydrogens and 3D coordinates.

    :parameter str sequence: A peptide sequence, e.g. 'HAG'
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
        optimization and ranking of comformations. The default value None results
        in skipping of the geometry optimization step.
    :parameter float rms: Root Mean Square deviation threshold for removing
        similar/equivalent conformations.
    :return: A peptide molecule with hydrogens and 3D coordinates
        or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    from rdkit import Chem

    rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence))
    rdkit_mol.SetProp("sequence", sequence)
    return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)


@requires_optional_package("rdkit")
def calc_rmsd(mol1, mol2):
    """
    Superimpose two molecules and calculate the root-mean-squared deviations of
    the atomic positions.
    The molecules should be identical, but the ordering of the atoms may differ.

    :param mol1: Molecule 1
    :param mol2: Molecule 2
    :return: The rmsd after superposition
    :rtype: float
    """
    from rdkit.Chem import AllChem

    rdkit_mol1 = to_rdmol(mol1)
    rdkit_mol2 = to_rdmol(mol2)
    try:
        return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2)
    except:
        return -999


[docs]@requires_optional_package("rdkit")
def modify_atom(mol, idx, element):
    """
    Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly

    :parameter mol: molecule to be modified
    :type mol: |Molecule| or rdkit.Chem.Mol
    :parameter int idx: index of the atom to be modified
    :parameter str element:
    :return: Molecule with new element and possibly added or removed hydrogens
    :rtype: |Molecule|
    """
    from rdkit import Chem

    rdmol = to_rdmol(mol)
    if rdmol.GetAtomWithIdx(idx).GetSymbol() == element:
        return mol
    else:
        e = Chem.EditableMol(rdmol)
        for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()):
            if neighbor.GetSymbol() == "H":
                e.RemoveAtom(neighbor.GetIdx())
        e.ReplaceAtom(idx - 1, Chem.Atom(element))
        newmol = e.GetMol()
        Chem.SanitizeMol(newmol)
        newmol = Chem.AddHs(newmol, addCoords=True)
        return from_rdmol(newmol)


[docs]@requires_optional_package("rdkit")
def apply_template(mol, template):
    """
    Modifies bond orders in PLAMS molecule according template smiles structure.

    :parameter mol: molecule to be modified
    :type mol: |Molecule| or rdkit.Chem.Mol
    :parameter str template: smiles string defining the correct chemical structure
    :return: Molecule with correct chemical structure and provided 3D coordinates
    :rtype: |Molecule|
    """
    from rdkit import Chem

    rdmol = to_rdmol(mol, sanitize=False)
    template_mol = Chem.AddHs(Chem.MolFromSmiles(template))
    newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol)
    return from_rdmol(newmol)


[docs]@requires_optional_package("rdkit")
def apply_reaction_smarts(mol, reaction_smarts, complete=False, forcefield=None, return_rdmol=False):
    """
    Applies reaction smirks and returns product.
    If returned as a PLAMS molecule, thismolecule.properties.orig_atoms
    is a list of indices of atoms that have not been changed
    (which can for example be used partially optimize new atoms only with the freeze keyword)

    :parameter mol: molecule to be modified
    :type mol: |Molecule| or rdkit.Chem.Mol
    :parameter str reactions_smarts: Reactions smarts to be applied to molecule
    :parameter complete: Apply reaction until no further changes occur or given
        fraction of reaction centers have been modified
    :type complete: bool or float (value between 0 and 1)
    :parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based
        geometry optimization of product structures.
    :type forcefield: str
    :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
    :return: (product molecule, list of unchanged atoms)
    :rtype: (|Molecule|, list of int)
    """
    from rdkit import Chem
    from rdkit.Chem import AllChem

    def react(reactant, reaction):
        """Apply reaction to reactant and return products"""
        ps = reaction.RunReactants([reactant])
        # if reaction doesn't apply, return the reactant
        if len(ps) == 0:
            return [(reactant, range(reactant.GetNumAtoms()))]
        full = len(ps)
        while complete:  # when complete is True
            # apply reaction until no further changes
            r = random.randint(0, len(ps) - 1)
            reactant = ps[r][0]
            ps = reaction.RunReactants([reactant])
            if len(ps) == 0 or len(ps) / full < (1 - complete):
                ps = [[reactant]]
                break
        # add hydrogens and generate coordinates for new atoms
        products = []
        for p in ps[0]:
            Chem.SanitizeMol(p)
            q = Chem.AddHs(p)
            Chem.SanitizeMol(q)
            u = gen_coords_rdmol(q)  # These are the atoms that have not changed
            products.append((q, u))
        return products

    mol = to_rdmol(mol)
    reaction = AllChem.ReactionFromSmarts(reaction_smarts)
    # RDKit removes fragments that are disconnected from the reaction center
    # In order to keep these, the molecule is first split in separate fragments
    # and the results, including non-reacting parts, are re-combined afterwards
    frags = Chem.GetMolFrags(mol, asMols=True)
    product = Chem.Mol()
    unchanged = []  # List of atoms that have not changed
    for frag in frags:
        for p, u in react(frag, reaction):
            unchanged += [product.GetNumAtoms() + i for i in u]
            product = Chem.CombineMols(product, p)
    if forcefield:
        optimize_coordinates(product, forcefield, fixed=unchanged)
    # The molecule is returned together with a list of atom indices of the atoms
    # that are identical to those
    # in the reactants. This list can be used in subsequent partial optimization of the molecule
    if not return_rdmol:
        product = from_rdmol(product)
        product.properties.orig_atoms = [a + 1 for a in unchanged]
    return product


def gen_coords(plamsmol):
    """Calculate 3D positions only for atoms without coordinates"""
    rdmol = to_rdmol(plamsmol)
    unchanged = gen_coords_rdmol(rdmol)
    conf = rdmol.GetConformer()
    for a in range(len(plamsmol.atoms)):
        pos = conf.GetAtomPosition(a)
        atom = plamsmol.atoms[a]
        atom._setx(pos.x)
        atom._sety(pos.y)
        atom._setz(pos.z)
    return [a + 1 for a in unchanged]


@requires_optional_package("rdkit")
def gen_coords_rdmol(rdmol):
    from rdkit.Chem import AllChem

    ref = rdmol.__copy__()
    conf = rdmol.GetConformer()
    coordDict = {}
    unchanged = []
    maps = []
    # Put known coordinates in coordDict
    for i in range(rdmol.GetNumAtoms()):
        pos = conf.GetAtomPosition(i)
        if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and (-0.0001 < pos.z < 0.0001):
            continue  # atom without coordinates
        coordDict[i] = pos
        unchanged.append(i)
        maps.append((i, i))
    # compute coordinates for new atoms, keeping known coordinates
    rms = 1
    rs = 1
    # repeat embedding and alignment until the rms of mapped atoms is sufficiently small
    if rdmol.GetNumAtoms() > len(maps):
        while rms > 0.1:
            AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True)
            # align new molecule to original coordinates
            rms = AllChem.AlignMol(rdmol, ref, atomMap=maps)
            rs += 1
    return unchanged


@requires_optional_package("rdkit")
def optimize_coordinates(rdkit_mol, forcefield, fixed=[]):
    from rdkit import Chem
    from rdkit.Chem import AllChem

    def MMFFminimize():
        ff = AllChem.MMFFGetMoleculeForceField(rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol))
        for f in fixed:
            ff.AddFixedPoint(f)
        try:
            ff.Minimize()
        except:
            warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))

    def UFFminimize():
        ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True)
        for f in fixed:
            ff.AddFixedPoint(f)
        try:
            ff.Minimize()
        except:
            warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol))

    optimize_molecule = {"uff": UFFminimize, "mmff": MMFFminimize}[forcefield]
    Chem.SanitizeMol(rdkit_mol)
    optimize_molecule()
    return


@requires_optional_package("rdkit")
def write_molblock(plams_mol, file=sys.stdout):
    from rdkit import Chem

    file.write(Chem.MolToMolBlock(to_rdmol(plams_mol)))


[docs]@requires_optional_package("rdkit")
def readpdb(pdb_file, sanitize=True, removeHs=False, proximityBonding=False, return_rdmol=False):
    """
    Generate a molecule from a PDB file

    :param pdb_file: The PDB file to read
    :type pdb_file: path- or file-like
    :param bool sanitize:
    :param bool removeHs: Hydrogens are removed if True
    :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
    :return: The molecule
    :rtype: |Molecule| or rdkit.Chem.Mol
    """
    from rdkit import Chem

    try:
        pdb_file = open(pdb_file, "r")
    except TypeError:
        pass  # pdb_file is a file-like object... hopefully

    pdb_mol = Chem.MolFromPDBBlock(pdb_file.read(), sanitize=sanitize, removeHs=removeHs)
    return pdb_mol if return_rdmol else from_rdmol(pdb_mol)


[docs]@requires_optional_package("rdkit")
def writepdb(mol, pdb_file=sys.stdout):
    """
    Write a PDB file from a molecule

    :parameter mol: molecule to be exported to PDB
    :type mol: |Molecule| or rdkit.Chem.Mol
    :param pdb_file: The PDB file to write to, or a filename
    :type pdb_file: path- or file-like
    """
    from rdkit import Chem

    try:
        pdb_file = open(pdb_file, "w")
    except TypeError:
        pass  # pdb_file is a file-like object... hopefully

    mol = to_rdmol(mol, sanitize=False)
    pdb_file.write(Chem.MolToPDBBlock(mol))


[docs]@requires_optional_package("rdkit")
def add_Hs(mol, forcefield=None, return_rdmol=False):
    """
    Add hydrogens to protein molecules read from PDB.
    Makes sure that the hydrogens get the correct PDBResidue info.

    :param mol: Molecule to be protonated
    :type mol: |Molecule| or rdkit.Chem.Mol
    :param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based
        geometry optimization on new atoms.
    :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
    :return: A molecule with explicit hydrogens added
    :rtype: |Molecule| or rdkit.Chem.Mol
    """
    from rdkit import Chem

    mol = to_rdmol(mol)
    retmol = Chem.AddHs(mol)
    for atom in retmol.GetAtoms():
        if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == "H":
            bond = atom.GetBonds()[0]
            if bond.GetBeginAtom().GetIdx() == atom.GetIdx:
                connected_atom = bond.GetEndAtom()
            else:
                connected_atom = bond.GetBeginAtom()
            try:
                ResInfo = connected_atom.GetPDBResidueInfo()
                if ResInfo is None:
                    continue  # Segmentation faults are raised if ResInfo is None
                atom.SetMonomerInfo(ResInfo)
                atomname = "H" + atom.GetPDBResidueInfo().GetName()[1:]
                atom.GetPDBResidueInfo().SetName(atomname)
            except:
                pass
    unchanged = gen_coords_rdmol(retmol)
    if forcefield:
        optimize_coordinates(retmol, forcefield, fixed=unchanged)
    return retmol if return_rdmol else from_rdmol(retmol)


@requires_optional_package("rdkit")
def add_fragment(rwmol, frag, rwmol_atom_idx=None, frag_atom_idx=None, bond_order=None):
    from rdkit import Chem

    molconf = rwmol.GetConformer()
    fragconf = frag.GetConformer()
    new_indices = []
    for a in frag.GetAtoms():
        new_index = rwmol.AddAtom(a)
        new_indices.append(new_index)
        molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx()))
    for b in frag.GetBonds():
        ba = b.GetBeginAtomIdx()
        ea = b.GetEndAtomIdx()
        rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType())
    if bond_order:
        rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order])
        rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0)


@requires_optional_package("rdkit")
def get_fragment(mol, indices, incl_expl_Hs=True, neutralize=True):
    from rdkit import Chem

    molconf = mol.GetConformer()
    fragment = Chem.RWMol(Chem.Mol())
    fragconf = Chem.Conformer()
    # Put atoms in fragment
    for i in indices:
        atom = mol.GetAtomWithIdx(i)
        new_index = fragment.AddAtom(atom)
        pos = molconf.GetAtomPosition(i)
        fragconf.SetAtomPosition(new_index, pos)
    # Put bonds in fragment
    for b in mol.GetBonds():
        ba = b.GetBeginAtomIdx()
        ea = b.GetEndAtomIdx()
        if ba in indices and ea in indices:
            fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType())
            continue
        if not incl_expl_Hs:
            continue
        if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == "H":
            hi = fragment.AddAtom(mol.GetAtomWithIdx(ea))
            fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea))
            fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE)
            continue
        if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == "H":
            hi = fragment.AddAtom(mol.GetAtomWithIdx(ba))
            fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba))
            fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE)
    ret_frag = fragment.GetMol()
    Chem.SanitizeMol(ret_frag)
    if neutralize:
        for atom in ret_frag.GetAtoms():
            nrad = atom.GetNumRadicalElectrons()
            if nrad > 0:
                atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad)
                atom.SetNumRadicalElectrons(0)
    Chem.SanitizeMol(ret_frag)
    ret_frag.AddConformer(fragconf)
    return ret_frag


[docs]@requires_optional_package("rdkit")
def partition_protein(mol, residue_bonds=None, split_heteroatoms=True, return_rdmol=False):
    """
    Splits a protein molecule into capped amino acid fragments and caps.

    :param mol: A protein molecule
    :type mol: |Molecule| or rdkit.Chem.Mol
    :param tuple residue_bonds: a tuple of pairs of residue number indicating which
        peptide bonds to split. If none, split all peptide bonds.
    :param bool split_heteroatoms: if True, all bonds between a heteroatom and
        a non-heteroatom across residues are removed
    :return: list of fragments, list of caps
    """
    from rdkit import Chem

    mol = to_rdmol(mol)
    caps = []
    em = Chem.RWMol(mol)
    if split_heteroatoms:
        for bond in mol.GetBonds():
            resinfa = bond.GetBeginAtom().GetPDBResidueInfo()
            resinfb = bond.GetEndAtom().GetPDBResidueInfo()
            if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom():
                if resinfa.GetResidueNumber() != resinfb.GetResidueNumber():
                    em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
    # Split peptide bonds
    pept_bond = Chem.MolFromSmarts("[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)")
    for match in mol.GetSubstructMatches(pept_bond):
        if residue_bonds:
            resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber()
            resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber()
            if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds:
                continue
        cap = get_fragment(mol, match[0:5])
        cap = add_Hs(cap, return_rdmol=True)
        caps.append(cap if return_rdmol else from_rdmol(cap))
        cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4][CX3]=O"))
        cap_o = get_fragment(cap, cap_o_ind, neutralize=False)
        cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("O=[CX3][NX3][C;X4]"))[2:]
        cap_n = get_fragment(cap, cap_n_ind, neutralize=False)
        em.RemoveBond(match[1], match[3])
        add_fragment(em, cap_o, match[3], 1, 1)
        add_fragment(em, cap_n, match[1], 0, 1)
    # Split disulfide bonds
    ss_bond = Chem.MolFromSmarts("[C;X4;H1,H2]SS[C;X4;H1,H2]")
    for match in mol.GetSubstructMatches(ss_bond):
        cap = get_fragment(mol, match[0:5])
        cap = add_Hs(cap, return_rdmol=True)
        caps.append(cap if return_rdmol else from_rdmol(cap))
        cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts("[C;X4]SS[C;X4]"))
        cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False)
        cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False)
        em.RemoveBond(match[1], match[2])
        add_fragment(em, cap_s1, match[2], 1, 1)
        add_fragment(em, cap_s2, match[1], 0, 1)
    frags = Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False)
    if not return_rdmol:
        frags = [from_rdmol(frag) for frag in frags]
    return frags, caps


@requires_optional_package("rdkit")
def charge_AAs(mol, return_rdmol=False):
    from rdkit import Chem

    ionizations = {"ARG_NH2": 1, "LYS_NZ": 1, "GLU_OE2": -1, "ASP_OD2": -1}
    mol = to_rdmol(mol)
    for atom in mol.GetAtoms():
        resinfo = atom.GetPDBResidueInfo()
        res_atom = resinfo.GetResidueName() + "_" + resinfo.GetName().strip()
        try:
            atom.SetFormalCharge(ionizations[res_atom])
            Chem.SanitizeMol(mol)
        except KeyError:
            pass
        Chem.SanitizeMol(mol)
    return mol if return_rdmol else from_rdmol(mol)


[docs]def get_backbone_atoms(mol):
    """
    Return a list of atom indices corresponding to the backbone atoms in a peptide molecule.
    This function assumes PDB information in properties.pdb_info of each atom, which is the case
    if the molecule is generated with the "readpdb" or "from_sequence" functions.

    :parameter mol: a peptide molecule
    :type mol: |Molecule| or rdkit.Chem.Mol
    :return: a list of atom indices
    :rtype: list
    """
    mol = from_rdmol(mol)
    backbone = ["N", "CA", "C", "O"]
    return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]


[docs]@requires_optional_package("rdkit")
def get_substructure(mol, func_list):
    """
    Search for functional groups within a molecule based on a list of reference functional groups.
    SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list".

    Example:

    .. code:: python

        >>> mol = from_smiles('OCCO')  # Ethylene glycol
        >>> func_list = ['[H]O', 'C[N+]', 'O=PO']
        >>> get_substructure(mol, func_list)

        {'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>,
                   <scm.plams.mol.atom.Atom at 0x1251836a0>),
                  (<scm.plams.mol.atom.Atom at 0x125183550>,
                   <scm.plams.mol.atom.Atom at 0x125183240>)]}

    :parameter mol: A PLAMS molecule.
    :type mol: |Molecule|
    :parameter list func_list: A list of functional groups.
        Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules.
    :return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples
        with matching PLAMS |Atom| as values.
    """
    from rdkit import Chem

    def _to_rdmol(functional_group):
        """Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule."""
        if isinstance(functional_group, str):
            # RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled
            sanitize = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS
            ret = Chem.MolFromSmiles(functional_group, sanitize=False)
            Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize)
            return ret
        elif isinstance(functional_group, Molecule):
            return to_rdmol(functional_group)
        elif isinstance(functional_group, Chem.Mol):
            return functional_group
        raise TypeError(
            "get_substructure: "
            + str(type(functional_group))
            + " is not a supported \
                        object type"
        )

    def _get_match(mol, rdmol, functional_group):
        """Perform a substructure match on "mol".
        If a match is found, return a list of n-tuples consisting PLAMS |Atom|.
        Otherwise return False."""
        matches = rdmol.GetSubstructMatches(functional_group)
        if matches:
            return [tuple(mol[j + 1] for j in idx_tup) for idx_tup in matches]
        return False

    rdmol = to_rdmol(mol)
    rdmol_func_list = [_to_rdmol(i) for i in func_list]
    gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list)
    return {key: value for key, value in zip(func_list, gen) if value}


[docs]def yield_coords(rdmol, id=-1):
    """Take an rdkit molecule and yield its coordinates as 3-tuples.

    .. code-block:: python

        >>> from scm.plams import yield_coords
        >>> from rdkit import Chem

        >>> rdmol = Chem.Mol(...)  # e.g. Methane
        >>> for xyz in yield_coords(rdmol):
        ...     print(xyz)
        (-0.0, -0.0, -0.0)
        (0.6405, 0.6405, -0.6405)
        (0.6405, -0.6405, 0.6405)
        (-0.6405, 0.6405, 0.6405)
        (-0.6405, -0.6405, -0.6405)


    The iterator produced by this function can, for example, be passed to
    :meth:`Molecule.from_array()<scm.plams.mol.molecule.Molecule.from_array>`
    the update the coordinates of a PLAMS Molecule in-place.

    .. code-block:: python

        >>> from scm.plams import Molecule

        >>> mol = Molecule(...)

        >>> xyz_iterator = yield_coords(rdmol)
        >>> mol.from_array(xyz_iterator)


    :parameter rdmol: An RDKit mol.
    :type rdmol: rdkit.Chem.Mol
    :parameter int id: The ID of the desired conformer.
    :return: An iterator yielding 3-tuples with *rdmol*'s Cartesian coordinates.
    :rtype: iterator
    """
    conf = rdmol.GetConformer(id=id)
    for atom in rdmol.GetAtoms():
        pos = conf.GetAtomPosition(atom.GetIdx())
        yield (pos.x, pos.y, pos.z)


@add_to_class(Molecule)
def assign_chirality(self):
    """
    Assigns stereo-info to PLAMS molecule by invoking RDKIT
    """
    rd_mol = to_rdmol(self, assignChirality=True)
    pl_mol = from_rdmol(rd_mol)

    # Add R/S info to self
    for iat, pl_atom in enumerate(pl_mol.atoms):
        # Check for R/S information
        if pl_atom.properties.rdkit.stereo:
            self.atoms[iat].properties.rdkit.stereo = pl_atom.properties.rdkit.stereo

    # Add cis/trans information to self
    for ibond, pl_bond in enumerate(pl_mol.bonds):
        if pl_bond.properties.rdkit.stereo:
            self.bonds[ibond] = pl_bond.properties.rdkit.stereo


@add_to_class(Molecule)
@requires_optional_package("rdkit")
def get_chirality(self):
    """
    Returns the chirality of the atoms
    """
    from rdkit import Chem

    rd_mol = to_rdmol(self, assignChirality=True)
    return Chem.FindMolChiralCenters(rd_mol, force=True, includeUnassigned=True)


[docs]@requires_optional_package("rdkit")
def canonicalize_mol(mol, inplace=False, **kwargs):
    r"""Take a PLAMS molecule and sort its atoms based on their canonical rank.

    Example:

    .. code:: python

        >>> from scm.plams import Molecule, canonicalize_mol

        # Methane
        >>> mol: Molecule = ...
        >>> print(mol)
        Atoms:
            1         H      0.640510      0.640510     -0.640510
            2         H      0.640510     -0.640510      0.640510
            3         C      0.000000      0.000000      0.000000
            4         H     -0.640510      0.640510      0.640510
            5         H     -0.640510     -0.640510     -0.640510

        >>> print(canonicalize_mol(mol))
        Atoms:
            1         C      0.000000      0.000000      0.000000
            2         H     -0.640510     -0.640510     -0.640510
            3         H     -0.640510      0.640510      0.640510
            4         H      0.640510     -0.640510      0.640510
            5         H      0.640510      0.640510     -0.640510

    :parameter mol: The to-be canonicalized molecule.
    :type mol: |Molecule|
    :parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule.
    :parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_.
    :return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``.
    :rtype: None or |Molecule|

    .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms

    """
    from rdkit import Chem

    if not isinstance(mol, Molecule):
        raise TypeError("`mol` expected a plams Molecule")
    rdmol = to_rdmol(mol)
    idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs)

    if inplace:
        mol.atoms = [at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)]
        return None
    else:
        ret = mol.copy()
        ret.atoms = [at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)]
        return ret


[docs]@requires_optional_package("rdkit")
@requires_optional_package("PIL")
def to_image(
    mol: Molecule,
    remove_hydrogens: bool = True,
    filename: Optional[str] = None,
    fmt: str = "svg",
    size: Sequence[int] = (200, 100),
    as_string: bool = True,
):
    """
    Convert single molecule to single image object

    * ``mol`` -- PLAMS Molecule object
    * ``remove_hydrogens`` -- Wether or not to remove the H-atoms from the image
    * ``filename`` -- Optional: Name of image file to be created.
    * ``fmt`` -- One of "svg", "png", "eps", "pdf", "jpeg"
    * ``size`` -- Tuple/list containing width and height of image in pixels.
    * ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format
                       will be returned, which can be either a PIL image or SVG text
                       We do this because after converting a PIL image to a bytestring it is not possible
                       to further edit it (with our version of PIL).
    * Returns -- Image text file / binary of image text file / PIL Image object.
    """
    from io import BytesIO
    from PIL import Image
    from rdkit.Chem import Draw
    from rdkit.Chem.Draw import rdMolDraw2D
    from rdkit.Chem.Draw import MolsToGridImage

    extensions = ["svg", "png", "eps", "pdf", "jpeg"]

    classes: Dict[str, Any] = {}
    classes["svg"] = _MolsToGridSVG
    for ext in extensions[1:]:
        classes[ext] = None
    # PNG can only be created in this way with later version of RDKit
    if hasattr(rdMolDraw2D, "MolDraw2DCairo"):
        for ext in extensions[1:]:
            classes[ext] = MolsToGridImage

    # Determine the type of image file
    if filename is not None:
        if "." in filename:
            extension = filename.split(".")[-1]
            if extension in classes.keys():
                fmt = extension
            else:
                msg = [f"Image type {extension} not available."]
                msg += [f"Available extensions are: {' '.join(extensions)}"]
                raise Exception("\n".join(msg))
        else:
            filename = ".".join([filename, fmt])

    if fmt not in classes.keys():
        raise Exception(f"Image type {fmt} not available.")

    rdmol = _rdmol_for_image(mol, remove_hydrogens)

    # Draw the image
    if classes[fmt.lower()] is None:
        # With AMS version of RDKit MolsToGridImage fails for eps (because of paste)
        img = Draw.MolToImage(rdmol, size=size)
        buf = BytesIO()
        img.save(buf, format=fmt)
        img_text = buf.getvalue()
    else:
        # This fails for a C=C=O molecule, with AMS rdkit version
        img = classes[fmt.lower()]([rdmol], molsPerRow=1, subImgSize=size)
        img_text = img
        if isinstance(img, Image.Image):
            buf = BytesIO()
            img.save(buf, format=fmt)
            img_text = buf.getvalue()
    # If I do not make this correction to the SVG text, it is not readable in JupyterLab
    if fmt.lower() == "svg":
        img_text = _correct_svg(img_text)

    # Write to file, if required
    if filename is not None:
        mode = "w"
        if isinstance(img_text, bytes):
            mode = "wb"
        with open(filename, mode) as outfile:
            outfile.write(img_text)

    if as_string:
        img = img_text
    return img


[docs]@requires_optional_package("rdkit")
@requires_optional_package("PIL")
def get_reaction_image(
    reactants: Sequence[Molecule],
    products: Sequence[Molecule],
    filename: Optional[str] = None,
    fmt: str = "svg",
    size: Sequence[int] = (200, 100),
    as_string: bool = True,
):
    """
    Create a 2D reaction image from reactants and products (PLAMS molecules)

    * ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
    * ``products`` -- Iterable of PLAMS Molecule objects representing the products.
    * ``filename`` -- Optional: Name of image file to be created.
    * ``fmt``  -- The format of the image (svg, png, eps, pdf, jpeg).
                     The extension in the filename, if provided, takes precedence.
    * ``size`` -- Tuple/list containing width and height of image in pixels.
    * ``as_string`` -- Returns the image as a string or bytestring. If set to False, the original format
                       will be returned, which can be either a PIL image or SVG text
    *      Returns -- SVG image text file.
    """
    extensions = ["svg", "png", "eps", "pdf", "jpeg"]

    # Determine the type of image file
    if filename is not None:
        if "." in filename:
            extension = filename.split(".")[-1]
            if extension in extensions:
                fmt = extension
            else:
                msg = [f"Image type {extension} not available."]
                msg += [f"Available extensions are: {' '.join(extensions)}"]
                raise Exception("\n".join(msg))
        else:
            filename = ".".join([filename, fmt])

    if fmt.lower() not in extensions:
        raise Exception(f"Image type {fmt} not available.")

    # Get the actual image
    width = size[0]
    height = size[1]
    if fmt.lower() == "svg":
        img_text = _get_reaction_image_svg(reactants, products, width, height)
    else:
        img_text = _get_reaction_image_pil(reactants, products, fmt, width, height, as_string=as_string)

    # Write to file, if required
    if filename is not None:
        mode = "w"
        if isinstance(img_text, bytes):
            mode = "wb"
        with open(filename, mode) as outfile:
            outfile.write(img_text)
    return img_text


def _get_reaction_image_svg(
    reactants: Sequence[Molecule], products: Sequence[Molecule], width: int = 200, height: int = 100
):
    """
    Create a 2D reaction image from reactants and products (PLAMS molecules)

    * ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
    * ``products`` -- Iterable of PLAMS Molecule objects representing the products.
    *      Returns -- SVG image text file.
    """
    from rdkit import Chem

    def svg_arrow(x1, y1, x2, y2, prefix=""):
        """
        The reaction arrow in html format
        """
        # The arrow head
        l = ['<%sdefs> <%smarker id="arrow" viewBox="0 0 10 10" refX="5" refY="5" ' % (prefix, prefix)]
        l += ['markerWidth="6" markerHeight="6" ']
        l += ['orient="auto-start-reverse"> ']
        l += ['<%spath d="M 0 0 L 10 5 L 0 10 z" /></%smarker></%sdefs>' % (prefix, prefix, prefix)]
        arrow = "".join(l)
        # The line
        l = ['<%sline x1="%i" y1="%i" x2="%i" y2="%i" ' % (prefix, x1, y1, x2, y2)]
        l += ['stroke="black" marker-end="url(#arrow)" />']
        line = "".join(l)
        return [arrow, line]

    def add_plus_signs_svg(img_text, width, height, nmols, nreactants, prefix=""):
        """
        Add the lines with + signs to the SVG image
        """
        y = int(0.55 * height)
        t = []
        for i in range(nmols - 1):
            x = int(((i + 1) * width) - (0.1 * width))
            if i + 1 in (nreactants, nreactants + 1):
                continue
            t += ['<%stext x="%i" y="%i" font-size="16">+</%stext>' % (prefix, x, y, prefix)]
        lines = img_text.split("\n")
        lines = lines[:-2] + t + lines[-2:]
        return "\n".join(lines)

    def add_arrow_svg(img_text, width, height, nreactants, prefix=""):
        """
        Add the arrow to the SVG image
        """
        y = int(0.5 * height)
        x1 = int((nreactants * width) + (0.3 * width))
        x2 = int((nreactants * width) + (0.7 * width))
        t = svg_arrow(x1, y, x2, y, prefix)
        lines = img_text.split("\n")
        lines = lines[:-2] + t + lines[-2:]
        return "\n".join(lines)

    # Get the rdkit molecules
    rdmols = [_rdmol_for_image(mol) for mol in reactants]
    rdmols += [Chem.Mol()]  # This is where the arrow will go
    rdmols += [_rdmol_for_image(mol) for mol in products]
    nmols = len(rdmols)

    # Place the molecules in a row of images
    subimg_size = [width, height]
    kwargs = {"legendFontSize": 16}  # ,"legendFraction":0.1}
    img_text = _MolsToGridSVG(rdmols, molsPerRow=nmols, subImgSize=subimg_size, **kwargs)
    img_text = _correct_svg(img_text)

    # Add + and =>
    nreactants = len(reactants)
    img_text = add_plus_signs_svg(img_text, width, height, nmols, nreactants)
    img_text = add_arrow_svg(img_text, width, height, nreactants)

    return img_text


def _get_reaction_image_pil(
    reactants: Sequence[Molecule],
    products: Sequence[Molecule],
    fmt: str,
    width: int = 200,
    height: int = 100,
    as_string: bool = True,
):
    """
    Create a 2D reaction image from reactants and products (PLAMS molecules)

    * ``reactants`` -- Iterable of PLAMS Molecule objects representing the reactants.
    * ``products`` -- Iterable of PLAMS Molecule objects representing the products.
    *      Returns -- SVG image text file.
    """
    from io import BytesIO
    from PIL import Image
    from PIL import ImageDraw
    from rdkit import Chem
    from rdkit.Chem.Draw import rdMolDraw2D, MolsToGridImage

    def add_arrow_pil(img, width, height, nreactants):
        """
        Add the arrow to the PIL image
        """
        y1 = int(0.5 * height)
        y2 = y1
        x1 = int((nreactants * width) + (0.3 * width))
        x2 = int((nreactants * width) + (0.7 * width))

        # Draw a line
        black = (0, 0, 0)
        draw = ImageDraw.Draw(img)
        draw.line(((x1, y1), (x2, y2)), fill=128)

        # Draw the arrow head
        headscale = 20
        xshift = width // headscale
        yshift = img.size[1] // headscale
        p1 = (x2, y2 + yshift)
        p2 = (x2, y2 - yshift)
        p3 = (x2 + xshift, y2)
        draw.polygon((p1, p2, p3), fill=black)

        return img

    def add_plus_signs_pil(img, width, height, nmols, nreactants):
        """
        Add the lines with + signs to the SVG image
        """
        black = (0, 0, 0)

        I1 = ImageDraw.Draw(img)
        y = int(0.5 * height)
        # myfont = ImageFont.truetype('FreeMono.ttf', 25)
        for i in range(nmols):
            x = int(((i + 1) * width) - (0.05 * width))
            if i + 1 in (nreactants, nreactants + 1):
                continue
            # I1.text((x, y), "+", font=myfont, fill=black)
            I1.text((x, y), "+", fill=black)
        return img

    def join_pil_images(pil_images):
        """
        Create a new image which connects the ones above with text
        """
        white = (255, 255, 255)

        widths = [img.width for img in pil_images]
        width = sum(widths)
        height = max([img.height for img in pil_images])
        final_img = Image.new("RGB", (width, height), white)

        # Concatenate the PIL images
        for i, img in enumerate(pil_images):
            pos = sum(widths[:i])
            h = int((height - img.height) / 2)
            final_img.paste(img, (pos, h))

        return final_img

    nreactants = len(reactants)
    nmols = nreactants + len(products)

    if not hasattr(rdMolDraw2D, "MolDraw2DCairo"):
        # We are working with the old AMS version of RDKit
        white = (255, 255, 255)
        rimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(reactants)]
        pimages = [to_image(mol, fmt=fmt, as_string=False) for i, mol in enumerate(products)]
        blanc = Image.new("RGB", (width, height), white)

        # Get the image (with arrow)
        nreactants = len(reactants)
        all_images = rimages + [blanc] + pimages
        img = join_pil_images(all_images)

    else:
        # We have a later version of RDKit that can regulate the font sizes
        rdmols = [_rdmol_for_image(mol) for mol in reactants]
        rdmols += [Chem.Mol()]  # This is where the arrow will go
        rdmols += [_rdmol_for_image(mol) for mol in products]

        # Place the molecules in a row of images
        subimg_size = [width, height]
        kwargs = {"legendFontSize": 16}  # ,"legendFraction":0.1}
        img = MolsToGridImage(rdmols, molsPerRow=nmols + 1, subImgSize=subimg_size, **kwargs)

    # Add + and =>
    img = add_plus_signs_pil(img, width, height, nmols, nreactants)
    img = add_arrow_pil(img, width, height, nreactants)

    # Get the bytestring
    img_text = img
    if as_string:
        buf = BytesIO()
        img.save(buf, format=fmt)
        img_text = buf.getvalue()

    return img_text


def _correct_svg(image):
    """
    Correct for a bug in the AMS rdkit created SVG file
    """
    if not "svg:" in image:
        return image
    image = image.replace("svg:", "")
    lines = image.split("\n")
    for iline, line in enumerate(lines):
        if "xmlns:svg=" in line:
            lines[iline] = line.replace("xmlns:svg", "xmlns")
            break
    image = "\n".join(lines)
    return image


def _presanitize(mol, rdmol):
    """
    Change bonding and atom charges to avoid failed sanitization

    Note: Used by to_rdmol
    """
    from rdkit import Chem

    mol = mol.copy()
    for i in range(10):
        try:
            rdmol_test = copy.deepcopy(rdmol)
            Chem.SanitizeMol(rdmol_test)
            stored_exc = None
            break
        except ValueError as exc:
            stored_exc = exc
            text = repr(exc)
        # Fix the problem
        rdmol, bonds, charges = _kekulize(mol, rdmol, text)
        # print ("REB bonds charges: ",bonds, charges)
        # print (rdmol.Debug())
        # rdmol = to_rdmol(mol, sanitize=False)
        # print (rdmol.Debug())

    if stored_exc is not None:
        raise stored_exc
    else:
        rdmol = rdmol_test
    return rdmol


def _kekulize(mol, rdmol, text, use_dfs=True):
    """
    Kekulize the atoms indicated as problematic by RDKit

    * ``mol`` - PLAMS molecule
    * ``text`` - Sanitation error produced by RDKit

    Note: Returns the changes in bond orders and atomic charges, that will make sanitation succeed.
    """
    from rdkit import Chem

    # Find the indices of the problematic atoms
    indices = _find_aromatic_sequence(rdmol, text)
    if indices is None:
        return rdmol, {}, {}

    # Set the bond orders along the chain to 2, 1, 2, 1,...
    altered_bonds = {}
    if len(indices) > 1:
        emol = Chem.RWMol(rdmol)
        if use_dfs:
            # It may be better to create the tree first, and then start at a leaf
            altered_bonds = _alter_aromatic_bonds(emol, indices[0])
            indices = list(set([ind for tup in altered_bonds.keys() for ind in tup]))
        else:
            indices = _order_atom_indices(emol, indices)
            altered_bonds = _alter_bonds_along_chain(emol, indices)
        _adjust_atom_aromaticity(emol, altered_bonds)
        rdmol = emol.GetMol()
        # Adjust the PLAMS molecule as well
        for (iat, jat), order in altered_bonds.items():
            for bond in mol.atoms[iat].bonds:
                if mol.index(bond.other_end(mol.atoms[iat])) - 1 == jat:
                    bond.order = order
                    break

    # If the atom at the chain end has the wrong bond order, give it a charge.
    altered_charge = {}
    atom_indices, dangling_bonds = _get_charged_atoms(rdmol, indices)
    if len(atom_indices) > 0:
        iat = atom_indices[0]
        ndangling = dangling_bonds[iat]
        altered_charge = _guess_atomic_charge(iat, ndangling, rdmol, mol)
        rdmol.GetAtomWithIdx(iat).SetFormalCharge(altered_charge[iat])
        for k, v in altered_charge.items():
            mol.atoms[k].properties.rdkit.charge = v

    return rdmol, altered_bonds, altered_charge


def _find_aromatic_sequence(rdmol, text):
    """
    Find the sequence of atoms with 1.5 bond orders
    """
    indices = None
    lines = text.split("\n")
    line = lines[-1]
    if "Unkekulized atoms:" in line:
        text = line.split("Unkekulized atoms:")[-1].split("\\n")[0]
        if '"' in text:
            text = text.split('"')[0]
        indices = [int(w) for w in text.split()]
        if len(indices) > 1:
            return indices
        line = "atom %i marked aromatic" % (indices[0])
    iat: Any
    if "marked aromatic" in line:
        iat = int(line.split("atom")[-1].split()[0])
        indices = [iat]
        while iat is not None:
            icurrent = iat
            iat = None
            for bond in rdmol.GetAtomWithIdx(icurrent).GetBonds():
                if str(bond.GetBondType()) == "AROMATIC":
                    iat = bond.GetOtherAtomIdx(icurrent)
                    if iat in indices:
                        iat = None
                        continue
                    indices.append(iat)
                    break
    elif "Explicit valence for atom" in line:
        iat = int(line.split("#")[-1].split()[0])
        indices = [iat]
    return indices


def _alter_aromatic_bonds(emol, iat, depth=0, double_first=False):
    """
    Switch all thearomitic bonds to single/double, starting at iat

    * ``emol`` -- RDKit EditableMol type, for which bond orders will be changed
    * ``iat``  -- Starting point for depth first search
    """
    from collections import OrderedDict
    from rdkit import Chem
    from scm.plams import PeriodicTable as PT

    # Use OrderedDict, so that the leaves of the tree
    # will be at the end
    bonds_changed = OrderedDict()

    at = emol.GetAtomWithIdx(iat)
    valence = PT.get_connectors(at.GetAtomicNum())
    bonds = at.GetBonds()
    are_aromatic = [str(b.GetBondType()) == "AROMATIC" for b in bonds]
    int_orders = sum([b.GetBondTypeAsDouble() for i, b in enumerate(bonds) if not are_aromatic[i]])
    numbonds = len([b for i, b in enumerate(bonds) if are_aromatic[i]])
    valence -= int(int_orders)
    # Here I place the double bond first.
    # I could also start with a single bond instead.
    orders = [1 for i in range(numbonds)]
    if valence > numbonds:
        for i in range(min(numbonds, valence - numbonds)):
            if double_first:
                orders[i] = 2
            else:
                orders[numbonds - i - 1] = 2

    for i, bond in enumerate(bonds):
        jat = bond.GetOtherAtomIdx(iat)
        if are_aromatic[i]:
            pair = tuple(sorted([iat, jat]))
            order = orders.pop(0)
            bond.SetBondType(Chem.BondType(order))
            bond.SetIsAromatic(False)  # This is necessary with the newer RDKit
            bonds_changed[pair] = order
            d = _alter_aromatic_bonds(emol, jat, depth + 1)
            bonds_changed.update(d)
    return bonds_changed


def _alter_bonds_along_chain(emol, indices):
    """
    Along the chain of atoms (indices), alternate double and single bonds
    """
    from scm.plams import PeriodicTable as PT
    from rdkit import Chem

    if len(indices) > 1:
        # The first bond order is set to 2, if aromic
        # Else it is flipped
        first_bond = emol.GetBondBetweenAtoms(indices[0], indices[1])
        if str(first_bond.GetBondType()) == "AROMATIC":
            new_order = 2
        else:
            new_order = first_bond.GetBondTypeAsDouble()
            new_order = (new_order % 2) + 1
        # Unless this breaks valence rules.
        iat = indices[0]
        at = emol.GetAtomWithIdx(iat)
        valence = PT.get_connectors(at.GetAtomicNum())
        orders = [b.GetBondTypeAsDouble() for b in at.GetBonds()]
        jat = indices[1]
        at_next = emol.GetAtomWithIdx(jat)
        valence_next = PT.get_connectors(at_next.GetAtomicNum())
        orders_next = [b.GetBondTypeAsDouble() for b in at_next.GetBonds()]
        if sum(orders) > valence or sum(orders_next) > valence_next:
            new_order = 1
    # Set the bond orders along the chain to 2, 1, 2, 1,...
    altered_bonds = {}
    for i, iat in enumerate(indices[:-1]):
        bond = emol.GetBondBetweenAtoms(iat, indices[i + 1])
        bond.SetBondType(Chem.BondType(new_order))
        bond.SetIsAromatic(False)  # This is necessary with the newer RDKit versions
        altered_bonds[iat, indices[i + 1]] = new_order
        new_order = ((new_order) % 2) + 1
    return altered_bonds


def _order_atom_indices(rdmol, indices):
    """
    Order the atomic indices so that they are consecutive along a bonded chain
    """
    # Order the indices, so that they are consecutive in the molecule
    start = 0
    for i, iat in enumerate(indices):
        at = rdmol.GetAtomWithIdx(iat)
        neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()]
        relevant_neighbors = [jat for jat in neighbors if jat in indices]
        if len(relevant_neighbors) == 1:
            start = i
            break
    iat = indices[start]
    atoms = [iat]
    while 1:
        at = rdmol.GetAtomWithIdx(iat)
        neighbors = [b.GetOtherAtomIdx(iat) for b in at.GetBonds()]
        relevant_neighbors = [jat for jat in neighbors if jat in indices and not jat in atoms]
        if len(relevant_neighbors) == 0:
            break
        iat = relevant_neighbors[0]
        atoms.append(iat)
    if len(atoms) < len(indices):
        raise Exception("The unkekulized atoms are not in a consecutive chain")
    indices = atoms
    return indices


def _adjust_atom_aromaticity(emol, altered_bonds):
    """
    Assign aromaticity to the atoms based on the new bond orders
    """
    indices = list(set([ind for tup in altered_bonds.keys() for ind in tup]))
    for ind in indices:
        at = emol.GetAtomWithIdx(ind)
        if not at.GetIsAromatic():
            continue
        # Only change this if we are sure the atom is not aromatic anymore
        aromatic = False
        for bond in at.GetBonds():
            if str(bond.GetBondType()) == "AROMATIC":
                aromatic = True
                break
        if not aromatic:
            at.SetIsAromatic(False)


def _get_charged_atoms(rdmol, indices):
    """
    Locate the atoms that need a charge
    """
    from scm.plams import PeriodicTable as PT

    atom_indices = []
    dangling_bonds = {}
    for iat in indices[::-1]:
        at = rdmol.GetAtomWithIdx(iat)
        valence = PT.get_connectors(at.GetAtomicNum())
        bonds = at.GetBonds()
        orders = [b.GetBondTypeAsDouble() for b in bonds]
        ndangling = int(valence - sum(orders))
        if ndangling != 0:
            dangling_bonds[iat] = ndangling
            atom_indices.append(iat)
    return atom_indices, dangling_bonds


def _guess_atomic_charge(iat, ndangling, rdmol, mol):
    """
    Guess the best atomic charge for atom iat
    """
    # Get the total charge from atomic charges already set
    charges = [at.GetFormalCharge() for at in rdmol.GetAtoms()]
    totcharge = sum(charges) - charges[iat]

    # Here I hope that the estimated charge will be more reliable than the
    # actual (user defined) system charge, but am not sure
    est_charges = mol.guess_atomic_charges(adjust_to_systemcharge=False, depth=0)
    molcharge = int(sum(est_charges))
    molcharge = molcharge - totcharge

    # Adjust the sign based on the estimated charge
    sign = ndangling / (abs(ndangling))
    if molcharge != 0:
        sign = molcharge / (abs(molcharge))
    elif est_charges[iat] != 0:
        sign = est_charges[iat] / abs(est_charges[iat])

    # Then use the over/under valence with the new sign as the atomic charge
    charge = int(sign * abs(ndangling))
    # Perhaps we tried this already
    if charge == charges[iat]:
        charge = -charge
    altered_charge = {iat: charge}
    return altered_charge


def _rdmol_for_image(mol, remove_hydrogens=True):
    """
    Convert PLAMS molecule to an RDKit molecule specifically for a 2D image
    """
    from rdkit.Chem import AllChem
    from rdkit.Chem import RemoveHs

    rdmol = to_rdmol(mol, presanitize=True)

    # Flatten the molecule
    AllChem.Compute2DCoords(rdmol)
    # Remove the Hs only if there are carbon atoms in this system
    # Otherwise this will turn an OH radical into a water molecule.
    carbons = [i for i, at in enumerate(mol.atoms) if at.symbol in ["C", "Si"]]
    if remove_hydrogens and len(carbons) > 0:
        rdmol = RemoveHs(rdmol)
    else:
        for atom in rdmol.GetAtoms():
            atom.SetNoImplicit(True)

    ids = [c.GetId() for c in rdmol.GetConformers()]
    for cid in ids:
        rdmol.RemoveConformer(cid)
    return rdmol


def _MolsToGridSVG(
    mols,
    molsPerRow=3,
    subImgSize=(200, 200),
    legends=None,
    highlightAtomLists=None,
    highlightBondLists=None,
    drawOptions=None,
    **kwargs,
):
    """
    Replaces the old version of this function in our RDKit for a more recent one, with more options
    """
    from rdkit.Chem.Draw import rdMolDraw2D

    if legends is None:
        legends = [""] * len(mols)

    nRows = len(mols) // molsPerRow
    if len(mols) % molsPerRow:
        nRows += 1

    fullSize = (molsPerRow * subImgSize[0], nRows * subImgSize[1])

    d2d = rdMolDraw2D.MolDraw2DSVG(fullSize[0], fullSize[1], subImgSize[0], subImgSize[1])
    if drawOptions is not None:
        d2d.SetDrawOptions(drawOptions)
    else:
        dops = d2d.drawOptions()
        for k, v in list(kwargs.items()):
            if hasattr(dops, k):
                setattr(dops, k, v)
                del kwargs[k]

    d2d.DrawMolecules(
        list(mols),
        legends=legends or None,
        highlightAtoms=highlightAtomLists or [],
        highlightBonds=highlightBondLists or [],
        **kwargs,
    )
    d2d.FinishDrawing()
    res = d2d.GetDrawingText()
    return res
Electronic Structure

ADF

Periodic DFT

DFTB & MOPAC

Interatomic Potentials

ReaxFF

Machine Learning Potentials

Force Fields

kMC and Microkinetics

Bumblebee: OLED stacks

Fluid Thermodynamics

COSMO-RS

Workflows and Utilities

OLED workflows

ChemTraYzer2

Conformers

Reactions Discovery

AMS Driver

Properties

PES Exploration

Molecular Dynamics

Monte Carlo

Interfaces

ParAMS

PLAMS

GUI

VASP

Downloads

Windows

Mac

Linux

Documentation

Overview

Tutorials

Installation Manual

Brochures

Other Resources

Changelog

Webinars

Workshops

Knowledgebank

FAQ

Pricing and licensing

Source code for scm.plams.interfaces.molecule.rdkit