#!/usr/bin/env python
from scm.plams.core.errors import TrajectoryError
import numpy
from scm.plams.mol.molecule import Molecule
from scm.plams.tools.geometry import cell_shape, cellvectors_from_shape
from scm.plams.trajectories.trajectoryfile import TrajectoryFile
__all__ = ["XYZTrajectoryFile", "create_xyz_string"]
[docs]class XYZTrajectoryFile(TrajectoryFile):
"""
Class representing an XYZ file containing a molecular trajectory
An instance of this class has the following attributes:
* ``file_object`` -- A Python :py:class:`file` object, referring to the actual XYZ file
* ``position`` -- The frame to which the cursor is currently pointing in the XYZ file
* ``mode`` -- Designates whether the file is in read or write mode ('r' or 'w')
* ``ntap`` -- The number of atoms in the molecular system (needs to be constant throughout)
* ``elements`` -- The elements of the atoms in the system (needs to be constant throughout)
An |XYZTrajectoryFile| object behaves very similar to a regular file object.
It has read and write methods (:meth:`read_next` and :meth:`write_next`)
that read and write from/to the position of the cursor in the ``file_object`` attribute.
If the file is in read mode, an additional method :meth:`read_frame` can be used that moves
the cursor to any frame in the file and reads from there.
The amount of information stored in memory is kept to a minimum, as only information from the current frame
is ever stored.
Reading and writing to and from the files can be done as follows::
>>> from scm.plams import XYZTrajectoryFile
>>> xyz = XYZTrajectoryFile('old.xyz')
>>> mol = xyz.get_plamsmol()
>>> xyzout = XYZTrajectoryFile('new.xyz',mode='w')
>>> for i in range(xyz.get_length()) :
>>> crd,cell = xyz.read_frame(i,molecule=mol)
>>> xyzout.write_next(molecule=mol)
The above script reads information from the XYZ file ``old.xyz`` into the |Molecule| object ``mol``
in a step-by-step manner.
The |Molecule| object is then passed to the :meth:`write_next` method of the new |XYZTrajectoryFile|
object corresponding to the new xyz file ``new.xyz``.
The exact same result can also be achieved by iterating over the instance as a callable
>>> xyz = XYZTrajectoryFile('old.xyz')
>>> mol = xyz.get_plamsmol()
>>> xyzout = XYZTrajectoryFile('new.xyz',mode='w')
>>> for crd,cell in xyz(mol) :
>>> xyzout.write_next(molecule=mol)
This procedure requires all coordinate information to be passed to and from the |Molecule| object
for each frame, which can be time-consuming.
It is therefore also possible to bypass the |Molecule| object when reading through the frames::
>>> xyz = XYZTrajectoryFile('old.xyz')
>>> xyzout = XYZTrajectoryFile('new.xyz',mode='w')
>>> xyzout.set_elements(xyz.get_elements())
>>> for crd,cell in xyz :
>>> xyzout.write_next(coords=crd)
>>> xyzout.close()
By default the write mode will create a minimal version of the XYZ file, containing only elements
and coordinates.
Additional information can be written to the file by supplying additional arguments
to the :meth:`write_next` method.
The additional keywords `step` and `energy` trigger the writing of a remark containing
the molecule name, the step number, the energy, and the lattice vectors.
>>> mol = Molecule('singleframe.xyz')
>>> xyzout = XYZTrajectoryFile('new.xyz',mode='w')
>>> xyzout.set_name('MyMol')
>>> xyzout.write_next(molecule=mol, step=0, energy=5.)
"""
[docs] def __init__(self, filename, mode="r", fileobject=None, ntap=None):
"""
Initiates an XYZTrajectoryFile object
* ``filename`` -- The path to the XYZ file
* ``mode`` -- The mode in which to open the XYZ file ('r' or 'w')
* ``fileobject`` -- Optionally, a file object can be passed instead (filename needs to be set to None)
* ``ntap`` -- If the file is in write mode, the number of atoms needs to be passed here
"""
TrajectoryFile.__init__(self, filename, mode, fileobject, ntap)
# XYZ specific attributes
self.name = "PlamsMol"
# Specific XYZ stuff
self.include_historydata = False
self.historydata = None
self.nveclines = 0
# Required setup before frames can be read/written
if self.mode == "r":
self._read_header()
elif self.mode == "a":
self._move_cursor_to_append_pos()
[docs] def store_historydata(self):
"""
Additional data should be read from/written to file
"""
self.include_historydata = True
[docs] def set_name(self, name):
"""
Sets the name of the system, in case an extensive write is requested
* ``name`` -- A string containing the name of the molecule
"""
self.name = name
def _read_header(self):
"""
Set up info required for reading frames
"""
line = self.file_object.readline()
self.ntap = int(line.split()[0])
if self.coords.shape == (0, 3):
self.coords = numpy.zeros((self.ntap, 3))
self.file_object.readline()
elements = []
for i in range(self.ntap):
line = self.file_object.readline()
elements.append(line.split()[0])
self.elements = elements
# See if there are any vector lines
while True:
line = self.file_object.readline()
if len(line) > 0 and line.split()[0][:3] == "VEC":
self.nveclines += 1
else:
break
self.file_object.seek(0)
[docs] def read_next(self, molecule=None, read=True):
"""
Reads coordinates from the current position of the cursor and returns it
* ``molecule`` -- |Molecule| object in which the new coordinates need to be stored
* ``read`` -- If set to False the cursor will move to the next frame without reading
"""
if not read and not self.firsttime:
return self._move_cursor_without_reading()
cell = None
# Read the coordinates
crd, cell = self._read_coordinates(molecule)
if crd is None:
return None, None # End of file is reached
if self.firsttime:
self.firsttime = False
self.position += 1
return self.coords, cell
def _read_coordinates(self, molecule):
"""
Read the coordinates from file, and place them in the molecule
"""
cell = None
for i in range(2):
line = self.file_object.readline()
if i == 0 and len(line.split()) > 1:
raise TrajectoryError("Number of atoms changes. Try XYZHistoryFile")
elif i == 0 and int(line.split()[0]) != self.ntap:
raise TrajectoryError("Number of atoms changes. Try XYZHistoryFile")
if len(line) == 0:
return None, None # End of file is reached
# Handle the comment line
historydata = data_from_xyzcomment(line)
if "Lattice" in historydata:
cell = historydata["Lattice"]
del historydata["Lattice"]
if self.include_historydata:
self.historydata = historydata
# Reade coordinates
for i in range(self.ntap):
line = self.file_object.readline()
self.coords[i, :] = [float(w) for w in line.split()[1:4]]
# Possibly read lattice
lattice = []
for i in range(self.nveclines):
line = self.file_object.readline()
words = line.split()
lattice.append([float(w) for w in words[1:]])
if cell is None and len(lattice) > 0:
cell = lattice
if isinstance(molecule, Molecule):
self._set_plamsmol(self.coords, cell, molecule)
return self.coords, cell
def _is_endoffile(self):
"""
If the end of file is reached, return coords and cell as None
"""
end = False
for i in range(self.ntap + 2 + self.nveclines):
line = self.file_object.readline()
if len(line) == 0:
end = True
break
return end
[docs] def write_next(self, coords=None, molecule=None, cell=[0.0, 0.0, 0.0], conect=None, historydata=None):
"""
Write frame to next position in trajectory file
* ``coords`` -- A list or numpy array of (``ntap``,3) containing the system coordinates
* ``molecule`` -- A molecule object to read the molecular data from
* ``cell`` -- A set of lattice vectors or cell diameters
* ``conect`` -- A dictionary containing connectivity info (not used)
* ``historydata`` -- A dictionary containing additional variables to be written to the comment line
The ``historydata`` dictionary can contain for example:
('Step','Energy'), the frame number and the energy respectively
.. note::
Either ``coords`` or ``molecule`` are mandatory arguments
"""
if isinstance(molecule, Molecule):
coords, cell, elements = self._read_plamsmol(molecule)[:3]
if self.position == 0:
self.elements = elements
cell = self._convert_cell(cell)
self._write_moldata(coords, cell, historydata)
self.position += 1
def _write_moldata(self, coords, cell, historydata):
"""
Write all molecular info to file
"""
if historydata is None:
historydata = {}
if self.include_historydata and len(historydata) > 0:
step = self.position
if "Step" in historydata:
step = historydata["Step"]
energy = 0.0
if "Energy" in historydata:
energy = historydata["Energy"]
box = None
if cell is not None:
# box = PDBMolecule().box_from_vectors(cell)
box = cell_shape(cell)
name = self.name
if "Name" in historydata:
name = historydata["Name"]
line = None
if "Line" in historydata:
line = historydata["Line"]
block = create_xyz_string(self.elements, coords, energy, box, step, name, line)
else:
block = create_xyz_string(self.elements, coords)
self.file_object.write(block)
def _rewind_to_first_frame(self):
"""
Rewind the file to the first frame
"""
self.file_object.seek(0)
self.firsttime = True
self.position = 0
def _rewind_n_frames(self, nframes):
"""
Rewind the file by nframes frames
"""
new_frame = self.position - nframes
self._rewind_to_first_frame()
for i in range(new_frame):
self.read_next(read=False)
def create_xyz_string(elements, coords, energy=None, box=None, step=None, name="PlamsMol", line=None):
"""
Write an XYZ file based on the elements and the coordinates of the atoms
"""
block = "%i\n" % (len(elements))
if line is not None:
block += "%s" % (line)
elif step is not None:
if energy is None:
energy = 0.0
comment = "%-40s%6i %16.6f" % (name, step, energy)
if box is not None:
for value in box:
comment += "%7.2f" % (value)
block += comment
block += "\n"
for el, crd in zip(elements, coords):
block += "%8s " % (el)
for x in crd:
block += "%20.10f " % (x)
block += "\n"
return block
def data_from_xyzcomment(line):
"""
Convert XYZ comment line (cell angles are in radians)
"""
words = line.split()
if len(words) == 0:
return {}
if len(words) < 3:
return {"Line": line}
xyzdic = {}
xyzdic["Name"] = words[0]
try:
xyzdic["Step"] = int(words[1])
xyzdic["Energy"] = float(words[2])
except ValueError:
xyzdic["Line"] = line
return xyzdic
if len(words) >= 6:
try:
box = [float(w) for w in words[3:6]]
except ValueError:
return xyzdic
if len(words) >= 9:
try:
angles = [float(w) for w in words[6:9]]
except ValueError:
return xyzdic
angles = [a for a in angles]
box += angles
lattice = cellvectors_from_shape(box)
xyzdic["Lattice"] = lattice
return xyzdic