Hydrogen bonds from MD

SCM Python tools define hydrogen bonds as X1-H—X2 patterns with an maximum X1-X2 distance of 3.2 Angstrom, a maximum H-X distance of 2.5 Angstrom, and maximum angle of 36 degrees. Here, we present a script that extracts the number of hydrogen bonds for a selected set of atoms along a trajectory, and creates a histogram. The atoms are selected via an input file named indices.txt.

Example usage: (Download get_hbonds.py)

The script get_hbonds.py accepts the name of an MD trajectory RKF file, and the name of a text file with the indices (and/or element names) of the atoms for which the number of hydrogen bonds are to be computed. The script can be called from the command line as follows.

amspython get_hbonds.py path/to/ams.rkf path/to/indices.txt

In its simplest form, the input file indices.txt will contain only one or more element names.

O C

A more complicated file will contain specific atom indices. At the bottom of this page an example script (get_water_indices.py) is presented that creates such a file for all oxygen atoms that belong to a water molecule.

The script get_hbonds.py.

import sys
from io import StringIO
import numpy
from scm.plams import RKFTrajectoryFile
from scm.flexmd import PDBMolecule


def main():
    """
    Main sctipt
    """
    if sys.argv[1] == "-h":
        raise SystemExit("amspython get_hbonds.py path/to/ams.rkf path/to/indices.txt")

    # Deal with the input
    filename = sys.argv[1]
    indexfile = sys.argv[2]
    indices, elements = read_indices(indexfile)

    # Open the trajectory file
    rkf = RKFTrajectoryFile(filename)
    mol = rkf.get_plamsmol()
    nsteps = len(rkf)
    print("NSteps: ", nsteps)
    for i, at in enumerate(mol.atoms):
        if at.symbol in elements:
            if not i in indices:
                indices.append(i)

    # Create the PDB object
    print("Creating the PDB object")
    f = StringIO()
    mol.writexyz(f)
    f.seek(0)
    text = f.read()
    f.close()
    pdb = PDBMolecule(xyzstring=text, bonddetectorflag=False)
    print("PDB created")

    # Get the actual number of HBonds per selected atom
    heavy_atoms = [i for i, at in enumerate(mol.atoms) if not at.symbol == "H"]
    hydrogens = [i for i, at in enumerate(mol.atoms) if at.symbol == "H"]
    values = []
    print("%8s %8s %s" % ("Step", "Atom", "Neighbors"))
    for istep in range(nsteps):
        crd, cell = rkf.read_frame(istep, molecule=mol)
        # Create neighborlists
        d_indices, boxlist = pdb.divide_into_cubes(range(len(mol)))
        pdb.coords = crd
        pdb.set_cellvectors(cell)
        for iat in indices:
            atomlists = (heavy_atoms, hydrogens)
            atoms, hs = pdb.find_neighbours_using_cubes(iat, d_indices, boxlist, atomlists)
            hbonds = pdb.get_hbonds(iat, atoms, hs)
            print("%8i %8i %s" % (istep, iat, str(hbonds)))
            values.append(len(hbonds))

    # Compute the histogram
    bins = [i for i in range(max(values) + 1)]
    yvalues, xvalues = numpy.histogram(values, bins=bins)

    # Write to output
    outfile = open("hist.txt", "w")
    for x, y in zip(xvalues, yvalues):
        outfile.write("%20.10f %20.10f\n" % (x, y / nsteps))
    outfile.close()


def read_indices(indexfilename):
    """
    Read atom indices from a file
    """
    infile = open(indexfilename)
    lines = infile.readlines()
    infile.close()
    indices = []
    elements = set()
    for line in lines:
        words = line.split()
        if len(words) == 0:
            continue
        digits = [w.isdigit() for w in words]
        for w in words:
            if w.isdigit():
                indices.append(int(w))
            else:
                elements.add(w)
    return indices, elements


if __name__ == "__main__":
    main()

(Download get_water_indices.py)

The script get_water_indices can create the file indices.txt that is required by the get_hbonds.py script, for the specific case where the indices of all water oxygen atoms are required. The script accepts the name of the MD trajectory RKF file.

import sys
from scm.plams import RKFTrajectoryFile


def main():
    """
    Main body of the script
    """
    filename = sys.argv[1]

    rkf = RKFTrajectoryFile(filename)
    mol = rkf.get_plamsmol()
    rkf.close()

    oxygens = get_water_oxygens(mol)

    outfile = open("indices.txt", "w")
    for i, io in enumerate(oxygens):
        if i % 10 == 0 and i != 0:
            outfile.write("\n")
        outfile.write("%8i " % (io))
    outfile.close()


def get_water_oxygens(mol):
    """
    Select the oxygens in water only
    """
    wo = []
    for iat, at in enumerate(mol.atoms):
        if at.symbol != "O":
            continue
        neighbors = [n.symbol for n in mol.neighbors(at)]
        if neighbors == ["H", "H"]:
            wo.append(iat)
    return wo


if __name__ == "__main__":
    main()