Source code for scm.plams.interfaces.molecule.rdkit


__all__ = ['add_Hs', 'apply_reaction_smarts', 'apply_template',
           'gen_coords_rdmol', 'get_backbone_atoms', 'modify_atom',
           'to_rdmol', 'from_rdmol', 'from_sequence', 'from_smiles', 'from_smarts',
           'partition_protein', 'readpdb', 'writepdb', 'get_substructure']

"""
@author: Lars Ridder
@description: A set of functions to manipulate molecules based on RDKit

This is a series of functions that apply RDKit functionality on PLAMS molecules
"""

import sys
import random
from warnings import warn
try:
    import dill as pickle
except ImportError:
    import pickle

try:
    from rdkit import Chem, Geometry
    from rdkit.Chem import AllChem
except ImportError:
    __all__ = []

from ...mol.bond import Bond
from ...mol.atom import Atom
from ...mol.molecule import Molecule


[docs]def from_rdmol(rdkit_mol, confid=-1, properties=True): """ Translate an RDKit molecule into a PLAMS molecule type. RDKit properties will be unpickled if their name ends with '_pickled'. :parameter rdkit_mol: RDKit molecule :type rdkit_mol: rdkit.Chem.Mol :parameter int confid: conformer identifier from which to take coordinates :parameter bool properties: If all Chem.Mol, Chem.Atom and Chem.Bond properties should be converted from RDKit to PLAMS format. :return: a PLAMS molecule :rtype: |Molecule| """ if isinstance(rdkit_mol, Molecule): return rdkit_mol # Create PLAMS molecule plams_mol = Molecule() total_charge = 0 try: Chem.Kekulize(rdkit_mol) except Exception: pass conf = rdkit_mol.GetConformer(id=confid) # Add atoms and assign properties to the PLAMS atom if *properties* = True for rd_atom in rdkit_mol.GetAtoms(): pos = conf.GetAtomPosition(rd_atom.GetIdx()) ch = rd_atom.GetFormalCharge() pl_atom = Atom( rd_atom.GetAtomicNum(), coords=(pos.x, pos.y, pos.z), charge=ch) if properties and rd_atom.GetPDBResidueInfo(): pl_atom.properties.pdb_info = get_PDBResidueInfo(rd_atom) plams_mol.add_atom(pl_atom) total_charge += ch # Check for R/S information stereo = str(rd_atom.GetChiralTag()) if stereo == 'CHI_TETRAHEDRAL_CCW': pl_atom.properties.stereo = 'counter-clockwise' elif stereo == 'CHI_TETRAHEDRAL_CW': pl_atom.properties.stereo = 'clockwise' # Add bonds to the PLAMS molecule for bond in rdkit_mol.GetBonds(): at1 = plams_mol.atoms[bond.GetBeginAtomIdx()] at2 = plams_mol.atoms[bond.GetEndAtomIdx()] plams_mol.add_bond(Bond(at1, at2, bond.GetBondTypeAsDouble())) # Check for cis/trans information stereo, bond_dir = str(bond.GetStereo()), str(bond.GetBondDir()) if stereo == 'STEREOZ' or stereo == 'STEREOCIS': plams_mol.bonds[-1].properties.stereo = 'Z' elif stereo == 'STEREOE' or stereo == 'STEREOTRANS': plams_mol.bonds[-1].properties.stereo = 'E' elif bond_dir == 'ENDUPRIGHT': plams_mol.bonds[-1].properties.stereo = 'up' elif bond_dir == 'ENDDOWNRIGHT': plams_mol.bonds[-1].properties.stereo = 'down' # Set charge and assign properties to PLAMS molecule and bonds if *properties* = True plams_mol.charge = total_charge if properties: prop_from_rdmol(plams_mol, rdkit_mol) for rd_atom, plams_atom in zip(rdkit_mol.GetAtoms(), plams_mol): prop_from_rdmol(plams_atom, rd_atom) for rd_bond, plams_bond in zip(rdkit_mol.GetBonds(), plams_mol.bonds): prop_from_rdmol(plams_bond, rd_bond) return plams_mol
[docs]def to_rdmol(plams_mol, sanitize=True, properties=True): """ Translate a PLAMS molecule into an RDKit molecule type. PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats, integers, floats nor strings, the resulting property names are appended with '_pickled'. :parameter plams_mol: A PLAMS molecule :parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization :parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format. :type plams_mol: |Molecule| :return: an RDKit molecule :rtype: rdkit.Chem.Mol """ if isinstance(plams_mol, Chem.Mol): return plams_mol # Create rdkit molecule e = Chem.EditableMol(Chem.Mol()) # Add atoms and assign properties to the RDKit atom if *properties* = True for pl_atom in plams_mol.atoms: rd_atom = Chem.Atom(pl_atom.atnum) if 'charge' in pl_atom.properties: rd_atom.SetFormalCharge(pl_atom.properties.charge) if properties: if 'pdb_info' in pl_atom.properties: set_PDBresidueInfo(rd_atom, pl_atom.properties.pdb_info) for prop in pl_atom.properties: if prop not in ('charge', 'pdb_info', 'stereo'): prop_to_rdmol(pl_atom, rd_atom, prop) # Check for R/S information if pl_atom.properties.stereo: stereo = pl_atom.properties.stereo.lower() if stereo == 'counter-clockwise': rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW) elif stereo == 'clockwise': rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW) e.AddAtom(rd_atom) # Add bonds to the RDKit molecule for bond in plams_mol.bonds: a1 = plams_mol.atoms.index(bond.atom1) a2 = plams_mol.atoms.index(bond.atom2) e.AddBond(a1, a2, Chem.BondType(bond.order)) rdmol = e.GetMol() # Check for cis/trans information for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): if pl_bond.properties.stereo: stereo = pl_bond.properties.stereo.lower() if stereo == 'e' or stereo == 'trans': rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) elif stereo == 'z' or stereo == 'cis': rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) elif stereo == 'up': rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT) elif stereo == 'down': rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT) # Assign properties to RDKit molecule and bonds if *properties* = True if properties: for prop in plams_mol.properties: prop_to_rdmol(plams_mol, rdmol, prop) for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): for prop in pl_bond.properties: if prop != 'stereo': prop_to_rdmol(pl_bond, rd_bond, prop) if sanitize: Chem.SanitizeMol(rdmol) conf = Chem.Conformer() for i, atom in enumerate(plams_mol.atoms): xyz = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz()) conf.SetAtomPosition(i, xyz) rdmol.AddConformer(conf) return rdmol
pdb_residue_info_items = [ 'AltLoc', 'ChainId', 'InsertionCode', 'IsHeteroAtom', 'Name', 'Occupancy', 'ResidueName', 'ResidueNumber', 'SecondaryStructure', 'SegmentNumber', 'SerialNumber', 'TempFactor'] # 'MonomerType' was excluded because it is an rdkit type that cannot easilty be serialized def get_PDBResidueInfo(rdkit_atom): pdb_info = {} for item in pdb_residue_info_items: get_function = 'Get' + item pdb_info[item] = rdkit_atom.GetPDBResidueInfo().__getattribute__(get_function)() return pdb_info def set_PDBresidueInfo(rdkit_atom, pdb_info): atom_pdb_residue_info = Chem.AtomPDBResidueInfo() for item, value in pdb_info.items(): set_function = 'Set' + item atom_pdb_residue_info.__getattribute__(set_function)(value) rdkit_atom.SetMonomerInfo(atom_pdb_residue_info) def prop_to_rdmol(pl_obj, rd_obj, prop): """ Convert a single PLAMS property into an RDKit property. :paramter pl_obj: A PLAMS object. :type pl_obj: |Molecule|, |Atom| or |Bond|. :parameter rd_obj: An RDKit object. :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond :parameter str prop: The |Settings| key of the PLAMS property. """ obj = type(pl_obj.properties.get(prop)) obj_dict = {bool: rd_obj.SetBoolProp, float: rd_obj.SetDoubleProp, int: rd_obj.SetIntProp, str: rd_obj.SetProp} if obj_dict.get(obj): obj_dict[obj](prop, pl_obj.properties.get(prop)) else: name = prop + '_pickled' try: rd_obj.SetProp(name, pickle.dumps(pl_obj.properties.get(prop), 0).decode()) except (Exception, pickle.PicklingError): pass def prop_from_rdmol(pl_obj, rd_obj): """ Convert one or more RDKit properties into PLAMS properties. :paramter pl_obj: A PLAMS object. :type pl_obj: |Molecule|, |Atom| or |Bond|. :parameter rd_obj: An RDKit object. :type rd_obj: rdkit.Chem.Mol, rdkit.Chem.Atom or rdkit.Chem.Bond """ prop_dict = rd_obj.GetPropsAsDict() for propname in prop_dict.keys(): if propname == '__computedProps': continue if '_pickled' not in propname: pl_obj.properties[propname] = prop_dict[propname] else: prop = prop_dict[propname] propname = propname.rsplit('_pickled', 1)[0] pl_obj.properties[propname] = pickle.loads(prop.encode())
[docs]def from_smiles(smiles, nconfs=1, name=None, forcefield=None, rms=0.1): """ Generates PLAMS molecule(s) from a smiles strings. :parameter str smiles: A smiles string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ smiles = str(smiles.split()[0]) smiles = Chem.CanonSmiles(smiles) rdkit_mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) rdkit_mol.SetProp('smiles', smiles) return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
[docs]def from_smarts(smarts, nconfs=1, name=None, forcefield=None, rms=0.1): """ Generates PLAMS molecule(s) from a smarts strings. This allows for example to define hydrogens explicitly. However it is less suitable for aromatic molecules (use from_smiles in that case). :parameter str smarts: A smarts string :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ smiles = str(smarts.split()[0]) mol = Chem.MolFromSmarts(smiles) Chem.SanitizeMol(mol) molecule = Chem.AddHs(mol) molecule.SetProp('smiles', smiles) return get_conformations(molecule, nconfs, name, forcefield, rms)
def get_conformations(rdkit_mol, nconfs=1, name=None, forcefield=None, rms=-1): """ Generates 3D conformation(s) for an rdkit_mol :parameter rdkit_mol: RDKit molecule :type rdkit_mol: rdkit.Chem.Mol :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ def MMFFenergy(cid): ff = AllChem.MMFFGetMoleculeForceField( rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy def UFFenergy(cid): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy if name: rdkit_mol.SetProp('name', name) try: cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1)) except: # ``useRandomCoords = True`` prevents (poorly documented) crash for large systems cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1, useRandomCoords=True)) if forcefield: # Select the forcefield (UFF or MMFF) optimize_molecule, energy = { 'uff': [AllChem.UFFOptimizeMolecule, UFFenergy], 'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy], }[forcefield] # Optimize and sort conformations for cid in cids: optimize_molecule(rdkit_mol, confId=cid) cids.sort(key=energy) # Remove duplicate conformations based on RMS if rms > 0: keep = [cids[0]] for cid in cids[1:]: for idx in keep: try: r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx) except: r = rms + 1 message = "Alignment failed in multiple conformation generation: " message += Chem.MolToSmiles(rdkit_mol) message += "\nAssuming different conformations." warn(message) if r < rms: break else: keep.append(cid) cids = keep if nconfs == 1: return from_rdmol(rdkit_mol) else: return [from_rdmol(rdkit_mol, cid) for cid in cids]
[docs]def from_sequence(sequence, nconfs=1, name=None, forcefield=None, rms=0.1): """ Generates PLAMS molecule from a peptide sequence. Includes explicit hydrogens and 3D coordinates. :parameter str sequence: A peptide sequence, e.g. 'HAG' :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step. :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A peptide molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ rdkit_mol = Chem.AddHs(Chem.MolFromSequence(sequence)) rdkit_mol.SetProp('sequence', sequence) return get_conformations(rdkit_mol, nconfs, name, forcefield, rms)
def calc_rmsd(mol1, mol2): """ Superimpose two molecules and calculate the root-mean-squared deviations of the atomic positions. The molecules should be identical, but the ordering of the atoms may differ. :param mol1: Molecule 1 :param mol2: Molecule 2 :return: The rmsd after superposition :rtype: float """ rdkit_mol1 = to_rdmol(mol1) rdkit_mol2 = to_rdmol(mol2) try: return AllChem.GetBestRMS(rdkit_mol1, rdkit_mol2) except: return -999
[docs]def modify_atom(mol, idx, element): """ Change atom "idx" in molecule "mol" to "element" and add or remove hydrogens accordingly :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter int idx: index of the atom to be modified :parameter str element: :return: Molecule with new element and possibly added or removed hydrogens :rtype: |Molecule| """ rdmol = to_rdmol(mol) if rdmol.GetAtomWithIdx(idx).GetSymbol() == element: return mol else: e = Chem.EditableMol(rdmol) for neighbor in reversed(rdmol.GetAtomWithIdx(idx - 1).GetNeighbors()): if neighbor.GetSymbol() == 'H': e.RemoveAtom(neighbor.GetIdx()) e.ReplaceAtom(idx - 1, Chem.Atom(element)) newmol = e.GetMol() Chem.SanitizeMol(newmol) newmol = Chem.AddHs(newmol, addCoords=True) return from_rdmol(newmol)
[docs]def apply_template(mol, template): """ Modifies bond orders in PLAMS molecule according template smiles structure. :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter str template: smiles string defining the correct chemical structure :return: Molecule with correct chemical structure and provided 3D coordinates :rtype: |Molecule| """ rdmol = to_rdmol(mol, sanitize=False) template_mol = Chem.AddHs(Chem.MolFromSmiles(template)) newmol = Chem.AllChem.AssignBondOrdersFromTemplate(template_mol, rdmol) return from_rdmol(newmol)
[docs]def apply_reaction_smarts( mol, reaction_smarts, complete=False, forcefield=None, return_rdmol=False): """ Applies reaction smirks and returns product. If returned as a PLAMS molecule, thismolecule.properties.orig_atoms is a list of indices of atoms that have not been changed (which can for example be used partially optimize new atoms only with the freeze keyword) :parameter mol: molecule to be modified :type mol: |Molecule| or rdkit.Chem.Mol :parameter str reactions_smarts: Reactions smarts to be applied to molecule :parameter complete: Apply reaction until no further changes occur or given fraction of reaction centers have been modified :type complete: bool or float (value between 0 and 1) :parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization of product structures. :type forcefield: str :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: (product molecule, list of unchanged atoms) :rtype: (|Molecule|, list of int) """ def react(reactant, reaction): """ Apply reaction to reactant and return products """ ps = reaction.RunReactants([reactant]) # if reaction doesn't apply, return the reactant if len(ps) == 0: return [(reactant, range(reactant.GetNumAtoms()))] full = len(ps) while complete: # when complete is True # apply reaction until no further changes r = random.randint(0, len(ps) - 1) reactant = ps[r][0] ps = reaction.RunReactants([reactant]) if len(ps) == 0 or len(ps) / full < (1 - complete): ps = [[reactant]] break # add hydrogens and generate coordinates for new atoms products = [] for p in ps[0]: Chem.SanitizeMol(p) q = Chem.AddHs(p) Chem.SanitizeMol(q) u = gen_coords_rdmol(q) # These are the atoms that have not changed products.append((q, u)) return products mol = to_rdmol(mol) reaction = AllChem.ReactionFromSmarts(reaction_smarts) # RDKit removes fragments that are disconnected from the reaction center # In order to keep these, the molecule is first split in separate fragments # and the results, including non-reacting parts, are re-combined afterwards frags = (Chem.GetMolFrags(mol, asMols=True)) product = Chem.Mol() unchanged = [] # List of atoms that have not changed for frag in frags: for p, u in react(frag, reaction): unchanged += [product.GetNumAtoms() + i for i in u] product = Chem.CombineMols(product, p) if forcefield: optimize_coordinates(product, forcefield, fixed=unchanged) # The molecule is returned together with a list of atom indices of the atoms # that are identical to those # in the reactants. This list can be used in subsequent partial optimization of the molecule if not return_rdmol: product = from_rdmol(product) product.properties.orig_atoms = [a + 1 for a in unchanged] return product
def gen_coords(plamsmol): """ Calculate 3D positions only for atoms without coordinates """ rdmol = to_rdmol(plamsmol) unchanged = gen_coords_rdmol(rdmol) conf = rdmol.GetConformer() for a in range(len(plamsmol.atoms)): pos = conf.GetAtomPosition(a) atom = plamsmol.atoms[a] atom._setx(pos.x) atom._sety(pos.y) atom._setz(pos.z) return [a + 1 for a in unchanged] def gen_coords_rdmol(rdmol): ref = rdmol.__copy__() conf = rdmol.GetConformer() coordDict = {} unchanged = [] maps = [] # Put known coordinates in coordDict for i in range(rdmol.GetNumAtoms()): pos = conf.GetAtomPosition(i) if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and \ (-0.0001 < pos.z < 0.0001): continue # atom without coordinates coordDict[i] = pos unchanged.append(i) maps.append((i, i)) # compute coordinates for new atoms, keeping known coordinates rms = 1 rs = 1 # repeat embedding and alignment until the rms of mapped atoms is sufficiently small if rdmol.GetNumAtoms() > len(maps): while rms > 0.1: AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True) # align new molecule to original coordinates rms = AllChem.AlignMol(rdmol, ref, atomMap=maps) rs += 1 return unchanged def optimize_coordinates(rdkit_mol, forcefield, fixed=[]): def MMFFminimize(): ff = AllChem.MMFFGetMoleculeForceField( rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol)) for f in fixed: ff.AddFixedPoint(f) try: ff.Minimize() except: warn("MMFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol)) def UFFminimize(): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, ignoreInterfragInteractions=True) for f in fixed: ff.AddFixedPoint(f) try: ff.Minimize() except: warn("UFF geometry optimization failed for molecule: " + Chem.MolToSmiles(rdkit_mol)) optimize_molecule = { 'uff': UFFminimize, 'mmff': MMFFminimize}[forcefield] Chem.SanitizeMol(rdkit_mol) optimize_molecule() return def write_molblock(plams_mol, file=sys.stdout): file.write(Chem.MolToMolBlock(to_rdmol(plams_mol)))
[docs]def readpdb(pdb_file, removeHs=False, proximityBonding=False, return_rdmol=False): """ Generate a molecule from a PDB file :param pdb_file: The PDB file to read :type pdb_file: str or file :param bool removeHs: Hydrogens are removed if True :param bool proximityBonding: Enables automatic proximity bonding :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: The molecule :rtype: |Molecule| or rdkit.Chem.Mol """ if isinstance(pdb_file, str): pdb_file = open(pdb_file, 'r') pdb_mol = Chem.MolFromPDBBlock(pdb_file.read(), removeHs=removeHs, proximityBonding=proximityBonding) return pdb_mol if return_rdmol else from_rdmol(pdb_mol)
[docs]def writepdb(mol, pdb_file=sys.stdout): """ Write a PDB file from a molecule :parameter mol: molecule to be exported to PDB :type mol: |Molecule| or rdkit.Chem.Mol :param pdb_file: The PDB file to write to, or a filename :type pdb_file: str or file """ mol = to_rdmol(mol) if isinstance(pdb_file, str): pdb_file = open(pdb_file, 'w') pdb_file.write(Chem.MolToPDBBlock(mol))
[docs]def add_Hs(mol, forcefield=None, return_rdmol=False): """ Add hydrogens to protein molecules read from PDB. Makes sure that the hydrogens get the correct PDBResidue info. :param mol: Molecule to be protonated :type mol: |Molecule| or rdkit.Chem.Mol :param str forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization on new atoms. :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: A molecule with explicit hydrogens added :rtype: |Molecule| or rdkit.Chem.Mol """ mol = to_rdmol(mol) retmol = Chem.AddHs(mol) for atom in retmol.GetAtoms(): if atom.GetPDBResidueInfo() is None and atom.GetSymbol() == 'H': bond = atom.GetBonds()[0] if bond.GetBeginAtom().GetIdx() == atom.GetIdx: connected_atom = bond.GetEndAtom() else: connected_atom = bond.GetBeginAtom() try: ResInfo = connected_atom.GetPDBResidueInfo() atom.SetMonomerInfo(ResInfo) atomname = 'H' + atom.GetPDBResidueInfo().GetName()[1:] atom.GetPDBResidueInfo().SetName(atomname) except: pass unchanged = gen_coords_rdmol(retmol) if forcefield: optimize_coordinates(retmol, forcefield, fixed=unchanged) return retmol if return_rdmol else from_rdmol(retmol)
def add_fragment(rwmol, frag, rwmol_atom_idx=None, frag_atom_idx=None, bond_order=None): molconf = rwmol.GetConformer() fragconf = frag.GetConformer() new_indices = [] for a in frag.GetAtoms(): new_index = rwmol.AddAtom(a) new_indices.append(new_index) molconf.SetAtomPosition(new_index, fragconf.GetAtomPosition(a.GetIdx())) for b in frag.GetBonds(): ba = b.GetBeginAtomIdx() ea = b.GetEndAtomIdx() rwmol.AddBond(new_indices[ba], new_indices[ea], b.GetBondType()) if bond_order: rwmol.AddBond(rwmol_atom_idx, new_indices[frag_atom_idx], Chem.BondType.values[bond_order]) rwmol.GetAtomWithIdx(new_indices[frag_atom_idx]).SetNumRadicalElectrons(0) def get_fragment(mol, indices, incl_expl_Hs=True, neutralize=True): molconf = mol.GetConformer() fragment = Chem.RWMol(Chem.Mol()) fragconf = Chem.Conformer() # Put atoms in fragment for i in indices: atom = mol.GetAtomWithIdx(i) new_index = fragment.AddAtom(atom) pos = molconf.GetAtomPosition(i) fragconf.SetAtomPosition(new_index, pos) # Put bonds in fragment for b in mol.GetBonds(): ba = b.GetBeginAtomIdx() ea = b.GetEndAtomIdx() if ba in indices and ea in indices: fragment.AddBond(indices.index(ba), indices.index(ea), b.GetBondType()) continue if not incl_expl_Hs: continue if ba in indices and mol.GetAtomWithIdx(ea).GetSymbol() == 'H': hi = fragment.AddAtom(mol.GetAtomWithIdx(ea)) fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ea)) fragment.AddBond(indices.index(ba), hi, Chem.BondType.SINGLE) continue if ea in indices and mol.GetAtomWithIdx(ba).GetSymbol() == 'H': hi = fragment.AddAtom(mol.GetAtomWithIdx(ba)) fragconf.SetAtomPosition(hi, molconf.GetAtomPosition(ba)) fragment.AddBond(indices.index(ea), hi, Chem.BondType.SINGLE) ret_frag = fragment.GetMol() Chem.SanitizeMol(ret_frag) if neutralize: for atom in ret_frag.GetAtoms(): nrad = atom.GetNumRadicalElectrons() if nrad > 0: atom.SetNumExplicitHs(atom.GetNumExplicitHs() + nrad) atom.SetNumRadicalElectrons(0) Chem.SanitizeMol(ret_frag) ret_frag.AddConformer(fragconf) return ret_frag
[docs]def partition_protein(mol, residue_bonds=None, split_heteroatoms=True, return_rdmol=False): """ Splits a protein molecule into capped amino acid fragments and caps. :param mol: A protein molecule :type mol: |Molecule| or rdkit.Chem.Mol :param tuple residue_bonds: a tuple of pairs of residue number indicating which peptide bonds to split. If none, split all peptide bonds. :param bool split_heteroatoms: if True, all bonds between a heteroatom and a non-heteroatom across residues are removed :return: list of fragments, list of caps """ mol = to_rdmol(mol) caps = [] em = Chem.RWMol(mol) if split_heteroatoms: for bond in mol.GetBonds(): resinfa = bond.GetBeginAtom().GetPDBResidueInfo() resinfb = bond.GetEndAtom().GetPDBResidueInfo() if resinfa.GetIsHeteroAtom() is not resinfb.GetIsHeteroAtom(): if resinfa.GetResidueNumber() != resinfb.GetResidueNumber(): em.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) # Split peptide bonds pept_bond = Chem.MolFromSmarts('[C;X4;H1,H2][CX3](=O)[NX3][C;X4;H1,H2][CX3](=O)') for match in mol.GetSubstructMatches(pept_bond): if residue_bonds: resa = mol.GetAtomWithIdx(match[1]).GetPDBResidueInfo().GetResidueNumber() resb = mol.GetAtomWithIdx(match[3]).GetPDBResidueInfo().GetResidueNumber() if (resa, resb) not in residue_bonds and (resb, resa) not in residue_bonds: continue cap = get_fragment(mol, match[0:5]) cap = add_Hs(cap, return_rdmol=True) caps.append(cap if return_rdmol else from_rdmol(cap)) cap_o_ind = cap.GetSubstructMatch(Chem.MolFromSmarts('[C;X4][CX3]=O')) cap_o = get_fragment(cap, cap_o_ind, neutralize=False) cap_n_ind = cap.GetSubstructMatch(Chem.MolFromSmarts('O=[CX3][NX3][C;X4]'))[2:] cap_n = get_fragment(cap, cap_n_ind, neutralize=False) em.RemoveBond(match[1], match[3]) add_fragment(em, cap_o, match[3], 1, 1) add_fragment(em, cap_n, match[1], 0, 1) # Split disulfide bonds ss_bond = Chem.MolFromSmarts('[C;X4;H1,H2]SS[C;X4;H1,H2]') for match in mol.GetSubstructMatches(ss_bond): cap = get_fragment(mol, match[0:5]) cap = add_Hs(cap, return_rdmol=True) caps.append(cap if return_rdmol else from_rdmol(cap)) cap_s_ind = cap.GetSubstructMatch(Chem.MolFromSmarts('[C;X4]SS[C;X4]')) cap_s1 = get_fragment(cap, cap_s_ind[0:2], neutralize=False) cap_s2 = get_fragment(cap, cap_s_ind[2:4], neutralize=False) em.RemoveBond(match[1], match[2]) add_fragment(em, cap_s1, match[2], 1, 1) add_fragment(em, cap_s2, match[1], 0, 1) frags = Chem.GetMolFrags(em.GetMol(), asMols=True, sanitizeFrags=False) if not return_rdmol: frags = [from_rdmol(frag) for frag in frags] return frags, caps
def charge_AAs(mol, return_rdmol=False): ionizations = { 'ARG_NH2': 1, 'LYS_NZ': 1, 'GLU_OE2': -1, 'ASP_OD2': -1} mol = to_rdmol(mol) for atom in mol.GetAtoms(): resinfo = atom.GetPDBResidueInfo() res_atom = resinfo.GetResidueName() + '_' + resinfo.GetName().strip() try: atom.SetFormalCharge(ionizations[res_atom]) Chem.SanitizeMol(mol) except KeyError: pass Chem.SanitizeMol(mol) return mol if return_rdmol else from_rdmol(mol)
[docs]def get_backbone_atoms(mol): """ Return a list of atom indices corresponding to the backbone atoms in a peptide molecule. This function assumes PDB information in properties.pdb_info of each atom, which is the case if the molecule is generated with the "readpdb" or "from_sequence" functions. :parameter mol: a peptide molecule :type mol: |Molecule| or rdkit.Chem.Mol :return: a list of atom indices :rtype: list """ mol = from_rdmol(mol) backbone = ['N', 'CA', 'C', 'O'] return [a for a in range(1, len(mol) + 1) if str(mol[a].properties.pdb_info.Name).strip() in backbone]
[docs]def get_substructure(mol, func_list): """ Search for functional groups within a molecule based on a list of reference functional groups. SMILES strings, PLAMS and/or RDKit molecules can be used interchangeably in "func_list". Example: .. code:: python >>> mol = from_smiles('OCCO') # Ethylene glycol >>> func_list = ['[H]O', 'C[N+]', 'O=PO'] >>> get_substructure(mol, func_list) {'[H]O': [(<scm.plams.mol.atom.Atom at 0x125183518>, <scm.plams.mol.atom.Atom at 0x1251836a0>), (<scm.plams.mol.atom.Atom at 0x125183550>, <scm.plams.mol.atom.Atom at 0x125183240>)]} :parameter mol: A PLAMS molecule. :type mol: |Molecule| :parameter list func_list: A list of functional groups. Functional groups can be represented by SMILES strings, PLAMS and/or RDKit molecules. :return: A dictionary with functional groups from "func_list" as keys and a list of n-tuples with matching PLAMS |Atom| as values. """ def _to_rdmol(functional_group): """ Turn a SMILES strings, RDKit or PLAMS molecules into an RDKit molecule. """ if isinstance(functional_group, str): # RDKit tends to remove explicit hydrogens if SANITIZE_ADJUSTHS is enabled sanitize = Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_ADJUSTHS ret = Chem.MolFromSmiles(functional_group, sanitize=False) Chem.rdmolops.SanitizeMol(ret, sanitizeOps=sanitize) return ret elif isinstance(functional_group, Molecule): return to_rdmol(functional_group) elif isinstance(functional_group, Chem.Mol): return functional_group raise TypeError('get_substructure: ' + str(type(functional_group)) + ' is not a supported \ object type') def _get_match(mol, rdmol, functional_group): """ Perform a substructure match on "mol". If a match is found, return a list of n-tuples consisting PLAMS |Atom|. Otherwise return False. """ matches = rdmol.GetSubstructMatches(functional_group) if matches: return [tuple(mol[j+1] for j in idx_tup) for idx_tup in matches] return False rdmol = to_rdmol(mol) rdmol_func_list = [_to_rdmol(i) for i in func_list] gen = (_get_match(mol, rdmol, i) for i in rdmol_func_list) return {key: value for key, value in zip(func_list, gen) if value}