{ "cells": [ { "cell_type": "markdown", "id": "d9f04f8d-949a-421d-b65d-1b43878cc057", "metadata": {}, "source": [ "## Complete guide to storing and converting PLAMS Molecules between Python libraries and file formats" ] }, { "cell_type": "code", "execution_count": 1, "id": "f609f4c1-5dab-4349-9250-d4bb89ddb656", "metadata": {}, "outputs": [], "source": [ "import os\n", "from os.path import expandvars\n", "from pathlib import Path\n", "\n", "# Make sure to source amsbashrc.sh before launching this example so that\n", "# the AMSHOME environment variable is set. Requires AMS2025+ to run this example.\n", "\n", "AMSHOME = os.environ[\"AMSHOME\"]\n", "cif_file = f\"{AMSHOME}/atomicdata/Molecules/IZA-Zeolites/ABW.cif\"\n", "xyz_file = f\"{AMSHOME}/scripting/scm/params/examples/benchmark/ISOL6/e_13.xyz\"\n", "badxyz_file = f\"{AMSHOME}/scripting/scm/plams/unit_tests/xyz/reactant2.xyz\"\n", "\n", "assert Path(cif_file).exists(), f\"{cif_file} does not exist.\"\n", "assert Path(xyz_file).exists(), f\"{xyz_file} does not exist.\"\n", "\n", "\n", "def head(filename, n: int = 4):\n", " \"\"\"Print the first ``n`` lines of a file\"\"\"\n", " with open(filename, \"r\") as f:\n", " lines = f.readlines()\n", " lines = lines[: min(n, len(lines))]\n", " print(\"\".join(lines))" ] }, { "cell_type": "markdown", "id": "4b30b7c2-f933-4d69-a5c3-7e19b8bfb447", "metadata": {}, "source": [ "### SMILES" ] }, { "cell_type": "markdown", "id": "03c12f87-f53b-42b7-974e-1b6271e68019", "metadata": {}, "source": [ "#### Load PLAMS Molecule from SMILES string" ] }, { "cell_type": "code", "execution_count": 2, "id": "9ad1c2c4-29e9-4320-bf8b-1df2df7b7831", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import from_smiles, Molecule, plot_molecule\n", "\n", "mol = from_smiles(\"CCCCO\")\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "df623aba-c463-4734-aedb-86b259ec52b9", "metadata": {}, "source": [ "#### Convert PLAMS Molecule to SMILES string\n", "\n", "Note: This requires that bonds are defined in the PLAMS Molecule." ] }, { "cell_type": "code", "execution_count": 3, "id": "8f21e769-031a-414a-ac65-cb5cfe5a193b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CCCCO\n" ] } ], "source": [ "from scm.plams import to_smiles\n", "\n", "smiles = to_smiles(mol)\n", "print(smiles)" ] }, { "cell_type": "markdown", "id": "b9840965-c354-41fc-8cab-2306fc10b26f", "metadata": {}, "source": [ "### .xyz files" ] }, { "cell_type": "markdown", "id": "cb2fdb27-47ad-4835-acbb-d93d6df4bcf2", "metadata": {}, "source": [ "#### Load PLAMS Molecule from .xyz file" ] }, { "cell_type": "code", "execution_count": 4, "id": "c50ddd01-c601-4bb1-855c-5c57370123b7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import Molecule, plot_molecule\n", "\n", "mol = Molecule(xyz_file)\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "d2967ded-7cad-4d4c-92d2-d0cead4ebaf4", "metadata": { "tags": [] }, "source": [ "#### Write PLAMS Molecule to .xyz file" ] }, { "cell_type": "code", "execution_count": 5, "id": "01e5758f-cefa-4d80-8c75-2ec08fc0ff33", "metadata": {}, "outputs": [], "source": [ "mol.properties.comment = \"The comment line (2nd line after the number of atoms)\"\n", "mol.write(\"out.xyz\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "e6299460-fde0-4f9f-9df2-5d9cdf61a3fa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "26\n", "The comment line (2nd line after the number of atoms)\n", " C -1.47627000 -1.15316000 -0.29279600\n", " C -2.86892000 -1.15881000 -0.29469200\n", "\n" ] } ], "source": [ "head(\"out.xyz\")" ] }, { "cell_type": "markdown", "id": "40ad8899-7d61-4f3f-a865-17edae3fc99c", "metadata": {}, "source": [ "### .cif files" ] }, { "cell_type": "markdown", "id": "34858f49-4e5b-44d5-8b25-1722e3cb3e05", "metadata": {}, "source": [ "#### Load PLAMS Molecule from .cif file\n", "\n", "PLAMS cannot natively read .cif files. Instead, go through another library, for example ASE or pymatgen." ] }, { "cell_type": "code", "execution_count": 7, "id": "bd212e28-b10d-46c3-b1e8-e5022d19efee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/ormrodmorley/Documents/code/ams/amshome/bin.release/python3.8/lib/python3.8/site-packages/ase/io/cif.py:401: UserWarning: crystal system 'orthorhombic' is not interpreted for space group Spacegroup(74, setting=1). This may result in wrong setting!\n", " warnings.warn(\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from ase.io import read\n", "from scm.plams import fromASE\n", "\n", "mol: Molecule = fromASE(read(cif_file))\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "d826de39-6929-4c6b-a1f7-488b1b0003cf", "metadata": {}, "source": [ "#### Write PLAMS Molecule to .cif file\n", "\n", "PLAMS cannot natively export to .cif files. Instead, go through another library, for example ASE or pymatgen.\n", "\n", "ASE can be used to write many file formats. See https://wiki.fysik.dtu.dk/ase/ase/io/io.html" ] }, { "cell_type": "code", "execution_count": 8, "id": "4de4e588-9f26-48b8-a306-c7ad498ef511", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data_image0\n", "_chemical_formula_structural O16Si8\n", "_chemical_formula_sum \"O16 Si8\"\n", "_cell_length_a 9.873\n", "\n" ] } ], "source": [ "from scm.plams import toASE\n", "\n", "toASE(mol).write(\"out.cif\")\n", "head(\"out.cif\")" ] }, { "cell_type": "markdown", "id": "3b10bce0-fd11-4803-bb72-fe614ff435b7", "metadata": {}, "source": [ "### AMS .in system block format\n", "\n", "#### Write PLAMS Molecule to AMS .in system file" ] }, { "cell_type": "code", "execution_count": 9, "id": "2026498d-f78e-4337-993a-109a05094a8d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "System\n", " Atoms\n", " O 4.9365000000 1.3135000000 3.0896710000\n", " O 0.0000000000 3.9405000000 7.4746710000\n", "\n" ] } ], "source": [ "mol.write(\"ams_system_block.in\")\n", "head(\"ams_system_block.in\")" ] }, { "cell_type": "markdown", "id": "c0788fef-dd4d-4851-919f-4f423eb15f43", "metadata": {}, "source": [ "#### Load PLAMS Molecule from AMS .in system file" ] }, { "cell_type": "code", "execution_count": 10, "id": "1846a459-afdc-48d6-98c6-3741c41d1453", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import Molecule\n", "\n", "mol = Molecule(\"ams_system_block.in\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "4e3960d8-74af-41a6-92bf-b0ff798bef38", "metadata": {}, "source": [ "### POSCAR/CONTCAR (VASP input format)" ] }, { "cell_type": "markdown", "id": "e1d58bbd-46f0-42d0-91c4-a781e52fcf74", "metadata": {}, "source": [ "#### Write PLAMS Molecule to POSCAR/CONTCAR (VASP input format)\n", "\n", "ASE can be used to write many file formats. See https://wiki.fysik.dtu.dk/ase/ase/io/io.html" ] }, { "cell_type": "code", "execution_count": 11, "id": "13ba6b79-71e3-4cdb-bd9c-cebbfb25c9e6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " O Si \n", " 1.0000000000000000\n", " 9.8729999999999993 0.0000000000000000 0.0000000000000000\n", " 0.0000000000000000 5.2539999999999996 0.0000000000000000\n", " 0.0000000000000000 0.0000000000000000 8.7699999999999996\n", " O Si \n", " 16 8\n", "Cartesian\n", " 4.9364999999999997 1.3134999999999999 3.0896710000000001\n", " 0.0000000000000000 3.9405000000000001 7.4746709999999998\n", "\n" ] } ], "source": [ "from scm.plams import toASE\n", "\n", "toASE(mol).write(\"POSCAR\")\n", "head(\"POSCAR\", 10)" ] }, { "cell_type": "markdown", "id": "da19ccd8-8f02-4fb7-a8dd-daf850ace16d", "metadata": {}, "source": [ "#### Load PLAMS Molecule from POSCAR/CONTCAR (VASP input format)" ] }, { "cell_type": "code", "execution_count": 12, "id": "9d4538fe-ebf2-483e-9cb9-2bb79e74265d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import fromASE\n", "from ase.io import read\n", "\n", "mol: Molecule = fromASE(read(\"POSCAR\"))\n", "\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "22da54f8-642f-4251-bfca-19ff3f29ebf8", "metadata": {}, "source": [ "### ASE Atoms Python class" ] }, { "cell_type": "markdown", "id": "81ca9d0c-230c-4bde-ba32-7b84444e7d37", "metadata": {}, "source": [ "#### Convert PLAMS Molecule to ASE Atoms" ] }, { "cell_type": "code", "execution_count": 13, "id": "f475f91d-b65c-4566-a35d-dba3fd11c041", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n", "mol.get_formula()='O16Si8'\n", "type(ase_atoms)=\n", "ase_atoms.get_chemical_formula()='O16Si8'\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import toASE\n", "from ase import Atoms\n", "from ase.visualize.plot import plot_atoms\n", "import matplotlib.pyplot as plt\n", "\n", "print(f\"{type(mol)=}\")\n", "print(f\"{mol.get_formula()=}\")\n", "\n", "ase_atoms: Atoms = toASE(mol)\n", "print(f\"{type(ase_atoms)=}\")\n", "print(f\"{ase_atoms.get_chemical_formula()=}\")\n", "\n", "_, ax = plt.subplots(figsize=(2, 2))\n", "plot_atoms(ase_atoms, rotation=\"-85x,5y,0z\", ax=ax);" ] }, { "cell_type": "markdown", "id": "3627b652-b523-4113-a2f6-1cf716efbda7", "metadata": {}, "source": [ "#### Convert ASE Atoms to PLAMS Molecule" ] }, { "cell_type": "code", "execution_count": 14, "id": "3cf52bd7-5892-48f7-bc6b-9a5f24ca6ae2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import fromASE, plot_molecule, Molecule\n", "\n", "mol: Molecule = fromASE(ase_atoms)\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol, rotation=\"-85x,5y,0z\");" ] }, { "cell_type": "markdown", "id": "af5218b8-d203-4ceb-8d91-c4097068716a", "metadata": {}, "source": [ "### RDKit Mol Python class" ] }, { "cell_type": "markdown", "id": "9dd59753-79ca-45e8-9ba5-701e1010fb07", "metadata": {}, "source": [ "#### Convert PLAMS Molecule to RDKit Mol" ] }, { "cell_type": "code", "execution_count": 15, "id": "a9d0f944-cfcf-40bd-b874-0d4b081788af", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(rdkit_mol)=\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAD6CAIAAAAHjs1qAAAGV0lEQVR4nO3d0ZKbOBBAUbOVP3Y+w3wz++DaicuQmQWrW1Lfe55mU1mDxhchY+Ms27bdJIZ/eu+AlMfcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxfI3Lmv63r4c6UdIIwxzdy5S6f86r0Dn+o+3yTsAGGMOabP/X6/P3/o9ZQk7ABhjDlczAjE3EWy1fJ4PIptqOOmO44xiLP7Feu6fi1n893v99nX0L1Uy90UWul7SAeplnuCETrwqL6mYO6m8LkRDukIBXMPNU4HHtUXLNu29d6HEMsSMrSgh72GMMa2nN1PGK2DbduWZem9FzMpm7spXDPaId1W5bHdbre2xY/5uyKMsZXpPyL2jeYT1YAzH2GMDZVdzEh7ZXOPmKVGez1AGGNbNXOvfUbWZZXX7q+uHQD7/+s5+Y1wLO1348NZ+evRxhljc+1n9+538h52cO3JOzyzD3u6//DDsW8PNeYYP1RtMVN1WnoDGWZzIYuZoT7L8WEZh2f2vqf7nE2XXNKEzO73/0Q8+DeCnp6qZ/Yf5Q88eiVcbTHzJm5+6nUM1JtxM9W5MtPwFeregEuaHPljjF0Jf/hy/kc5t/ceDqT56PYPmPAL7Li5/I2+1hJRTvhiptddCDnvOO7/pJ60MSZsJGPt/vt37GvW0GXMODoOKqH4dV0TBpeRe+gwkiPoNcGXPID3Xq/mRVzZm/6X2GVqz9/oCLmPsA8fmvtCZIEn4P8YZJgFXqhMnPthBJnvOH7/JxrQxLnvZc6Cidcrhpjan2Y/qmd6m2k/ofbak0OnUri2813S/3Gju1F/95f7PmmdZ46f8viu77enoXsKp3bg8oGRPMy/rRhf//PC/izLn+5ff46WN7sfjvCncZ74NXQ5bi9v9NSBMdR5rNX+ZFb+JXXt3nbVN9Q6Mi3KzFGHDmrbMt5GfZOae5cRzm6oq0CjnWrOmvvKzCAT/OwRHEp4Ky1/+svI/fUjYs1HOEjxofIn+MS1WcJG/ugwuxebByFvbLUd5usjPX/OGUr4lZlluW3b/XY0wla2frdZ9F3GBA28y8eQHo/1dgu/2zN8ds+JgbmkidDrAM65L6LOa6z852mQLbbdjdo3D8x9ZeYVdoJvOPBKZR+qk3uyXmVM/Qq1u1K5Eyb4Q00GXnsZ81TtOyJrv557iljS1Cv7UKnZPQekjJLDLPgdkfur0a2+CXoch1fcL1+GJyxjnkp9R+SXtzP7+5frnPT6yON08LclzbWHarFHDfgdkco2ziHd3Ew3750S8Qb7aB2UHGPoSrh97tHfjKPavpqJ6L7s7H4LuCg51NT+RBhjQ5Vzr3ei3yOMsaGyY8v5ZGx3kGG24pWZE0b7kELhLoPUzH1d16AOHo9HxMNeE7Qz27YN9Y/JNVQz9zi9/nWGvXVdvfB1VsHc7eBz4xzVbRXMPdoIKXhIX1MtdztoZYSjurlquefom4KH9GVeyRKIs7tAzF0g5i4QcxeIuQvE3AUyd+59v9MmZwcIY0wzd+7SKdPfzdR9vknYAcIYc0yfe+idvIPsAGGMOVzMCMTcBeJHxATi7C4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLpB/AeNaeyPreSNuAAAAAElFTkSuQmCC", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "H\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from scm.plams import to_rdmol, Molecule\n", "from rdkit.Chem import Draw\n", "from rdkit.Chem.Draw import IPythonConsole\n", "\n", "IPythonConsole.ipython_useSVG = True\n", "IPythonConsole.molSize = 250, 250\n", "\n", "plams_mol = Molecule(xyz_file)\n", "# guess bonds, the bonds will be included in the RDKit molecule\n", "plams_mol.guess_bonds()\n", "\n", "rdkit_mol = to_rdmol(plams_mol)\n", "print(f\"{type(rdkit_mol)=}\")\n", "rdkit_mol" ] }, { "cell_type": "markdown", "id": "ca39cbe8-38fe-4b81-a427-5411b356ddcb", "metadata": {}, "source": [ "#### Convert RDKit Mol to PLAMS Molecule" ] }, { "cell_type": "code", "execution_count": 16, "id": "2154888a-8305-423b-8d99-341e5a52f44a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(rdkit_mol)=\n", "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.plams import from_rdmol, plot_molecule, Molecule\n", "\n", "mol: Molecule = from_rdmol(rdkit_mol)\n", "\n", "print(f\"{type(rdkit_mol)=}\")\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "dde8fe19-556c-4c9e-b6cf-2d0dd5a34ef3", "metadata": {}, "source": [ "#### Convert problematic PLAMS Molecule to RDKit Mol" ] }, { "cell_type": "code", "execution_count": 17, "id": "7da2ed9d-f9b5-4fec-86d6-c37af291b3ff", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mol = Molecule(badxyz_file)\n", "mol.guess_bonds()\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "c84e4100-2347-4490-b216-cd23a48b78e2", "metadata": {}, "source": [ "This molecule will fail to convert to an RDKit Mol object, because RDKit does not like the AMS assignment of double bonds." ] }, { "cell_type": "code", "execution_count": 18, "id": "8fe9140c-8de7-4c01-a64b-49ca37ca97a4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[25.02|10:29:13] RDKit Sanitization Error.\n", "[25.02|10:29:13] Most likely this is a problem with the assigned bond orders: Use chemical insight to adjust them.\n", "[25.02|10:29:13] Note that the atom indices below start at zero, while the AMS-GUI indices start at 1.\n", "Failed to convert\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "RDKit ERROR: [10:29:13] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n", "RDKit ERROR: \n", "[10:29:13] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n", "\n" ] } ], "source": [ "try:\n", " rdkit_mol = to_rdmol(mol)\n", "except ValueError as exc:\n", " print(\"Failed to convert\")" ] }, { "cell_type": "markdown", "id": "dce54ed2-5913-4230-9990-7f8f16c66fb9", "metadata": {}, "source": [ "The problem can be fixed by passing the argument `presanitize` to the `to_rdmol` function." ] }, { "cell_type": "code", "execution_count": 19, "id": "be1209b5-08a8-490f-a28c-961f423d9b9b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "RDKit ERROR: [10:29:13] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n", "RDKit ERROR: \n", "[10:29:13] Can't kekulize mol. Unkekulized atoms: 10 11 12 13 14\n", "\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAD6CAIAAAAHjs1qAAAGPElEQVR4nO3dXXLaShBA4VEqO5aXYdasPJByCPoxYDTd0+d8dR+u45Qt4qNGGoQ1LcvSJIZf0Rsg9WPuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvkd/QGqLXW2jT9+/9liduO4sw9gWn6L/G7D/U+HsxEW8e9LP8Ne71P2dwvl8u3fyKasrlLa+YuEHMXSOWVmTEO1q8npq7MdFE593mebz/MW//dUoytn6Zy7iMx8S48do/gsnoQcxfItPg02plnonGc7ml4hHM+c+/L0R4Kl3vS5Uh3gy5wuc/zHFa8TUfD5Z6Ru0EvxNxjBrxNJ0DMPcTusou7QUfQ3COP4BUHmntn07Tzcp6jvS9u7hkGvC8sdYa+iGB36Pb6Ln02QF+40z2crfeHzn1ZlunkK1VsOhV07oHcDULQcz91wNt0NvTcQ7gbRPG9qn8H/LP9Pfic8PXX7DsDc//r2UOap/K9/eKO9kDm3qO/155A9HYeu/dm9IHouXcbuh3W+PUteu5CQefe+XjaAR8Onfseo6yKu1ywN9rPHvnDLNFU/C2tLkRqS9HfwQ09mIka7W2II/i6d0eD5i4mYu6Bo/1qgAF/bJoGHfbE3PVTy3I9vLlcLuHv933KIKsE7xM+2kO+3XM2T0z3z1avxd/dGignp3uM1Mcz6xPTw5WZeZ6/Wk9+92bWQmSS0T6AondHY+WeSvargtNu2A+ADmYc7XK6R8o+4F+S6mD9DiV3R/ujfny9QOa7N4MOZnJKvURTDiJ3R/ujqlwKtgeRe3IO+G7qj7chRnuKjak+2pvTPYn41hmK5z7EaE8BMNpb+dylW1lyP+PSIkf7oxijveXJXepgjFdVDyb9x8fH3qcc7Q/BjPY2Su4Hbx04+JRlP2JqjfNvlCj3PhdXJN8Bbl9vir0rYEmJcn/7pUXDXW94t7VjbfwQPFXNYh332a0Dd6fiuXs5im4Vz117gKO9ES4Ra4P8aI/vJX/74d1fO/7sC9+usESnqtpz3OVx/Zt/7at1WvSI3IdYollv5Gvb/OBTBFP2CN4lf+5Xndfd2zj/Mm+BmO5tkAHfzkx8iId/Nldm6FBrtaDcUT/XNfjDvwLlnlZ4hZw9gZU75+e6Cf7wGy33hJKcQUL2BFzukJ/rnoOHH7/PnQ+XeypJRjsHMXcH/PbDr3I3yQPE3JMIHO3YJxRo7vABv6v6gIfmHi74qL161nu4uTvgt5XeE7i5B0qxIFM66z2UKyI3bV4m+eDIj+/1PNc9oeIDROe+6fE3v7321VKM9qu6We+h5/7ydfBZkj1J0UfnsXtXiUb7FewI3txdogEx937SjfarhJt0GnNvzQGPQT9VvbVZ/LvmcdLRDmPurT3zG7y+ZdOZmfs3ns33kV/ipSjm/ubDjL075Lzr6/dwu7W19lJz72GUX+rU2upWTbVedqWvzAxTYR/ruGu9DkXPvRvXOjNA5+5op0Hn/vn52fPbOeDDcXO/XC4H92RVSdzcQyzL0uf2sS9an5jWWpmBLkQ62nfdFV+o9eZ072+e59QDvrW2LP/+q4WYu6Mdy5W4GO5yIYjTXVjmHmOAI/iKPJgRiNNdIOYuEHMXiLkLpGzu63UPV0JUNndpzdwj+RTUmbkLpPIFwE5K3amc+901WNavyrkPwZ2wJ3MP5lNQT56qCqRs7us3T/h2CpXNXVrzeneBON0FYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0DMXSDmLhBzF4i5C8TcBWLuAjF3gZi7QMxdIOYuEHMXiLkLxNwFYu4CMXeBmLtAzF0g5i4QcxeIuQvE3AVi7gIxd4GYu0D+AMFvrvyJOO0wAAAAAElFTkSuQmCC", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "O\n", "O\n", "H\n", "H\n", "O\n", "H\n", "H\n", "H\n", "C-\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rdkit_mol = to_rdmol(mol, presanitize=True)\n", "rdkit_mol" ] }, { "cell_type": "markdown", "id": "3d18c8c5-a1f6-4ef7-bb63-44d14faa3333", "metadata": {}, "source": [ "### SCM libbase UnifiedChemicalSystem Python class\n", "\n", "#### Convert PLAMS Molecule to UnifiedChemicalSystem" ] }, { "cell_type": "code", "execution_count": 20, "id": "99d4138e-9dac-4f70-bdd3-be072b5dfc34", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(chemsys)=\n", "System\n", " Atoms\n", " C -1.47627 -1.15316 -0.292796\n", " C -2.86892 -1.15881 -0.294692\n", " C -3.59226 0.00184956 0.0382389\n", " C -2.86824 1.16159 0.37328\n", " C -1.47569 1.1545 0.373398\n", " C -0.739311 0.000234896 0.0410634\n", " C 1.47627 1.15316 -0.292796\n", " C 0.739311 -0.000234896 0.0410634\n", " C 1.47569 -1.1545 0.373398\n", " C 2.86824 -1.16159 0.37328\n", " C 3.59226 -0.00184956 0.0382389\n", " C 2.86892 1.15881 -0.294692\n", " N 4.99096 0.0138526 0.0923716\n", " N -4.99096 -0.0138526 0.0923716\n", " H -0.945575 -2.05785 -0.582591\n", " H -3.40776 -2.06659 -0.565539\n", " H -3.40667 2.06804 0.649043\n", " H -0.944434 2.05992 0.659738\n", " H 0.945575 2.05785 -0.582591\n", " H 0.944434 -2.05992 0.659738\n", " H 3.40667 -2.06804 0.649043\n", " H 3.40776 2.06659 -0.565539\n", " H 5.43534 -0.887836 -0.024637\n", " H 5.434540000000001 0.731266 -0.466878\n", " H -5.434540000000001 -0.731266 -0.466878\n", " H -5.43534 0.887836 -0.024637\n", " End\n", "End\n" ] } ], "source": [ "from scm.utils.conversions import plams_molecule_to_chemsys, chemsys_to_plams_molecule\n", "from scm.plams import Molecule\n", "from scm.libbase import UnifiedChemicalSystem\n", "\n", "mol = Molecule(xyz_file)\n", "chemsys = plams_molecule_to_chemsys(mol)\n", "print(f\"{type(chemsys)=}\")\n", "print(chemsys)" ] }, { "cell_type": "markdown", "id": "b50a29db-0a73-45e4-b15d-e181522819d6", "metadata": {}, "source": [ "#### Convert UnifiedChemicalSystem to PLAMS Molecule" ] }, { "cell_type": "code", "execution_count": 21, "id": "33026182-f8d0-4950-a9f4-58f2ff784f9f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(chemsys)=\n", "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scm.utils.conversions import plams_molecule_to_chemsys, chemsys_to_plams_molecule\n", "from scm.plams import Molecule\n", "from scm.libbase import UnifiedChemicalSystem\n", "\n", "mol = chemsys_to_plams_molecule(chemsys)\n", "print(f\"{type(chemsys)=}\")\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] }, { "cell_type": "markdown", "id": "6101a3ee-e500-4624-b5a8-fb737f55fb31", "metadata": {}, "source": [ "### pymatgen Structure and Molecule Python classes" ] }, { "cell_type": "markdown", "id": "52de8abe-c88e-43cb-adaa-f93a55e8b1de", "metadata": {}, "source": [ "Note that for this part of the example, the `pymatgen` package needs to be installed. This can be done via `amspackages`." ] }, { "cell_type": "code", "execution_count": 22, "id": "bc5b7a22-520e-48e7-b36b-e7e6390e2569", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requested packages are already installed.\n" ] } ], "source": [ "!\"${AMSBIN}/amspackages\" install pymatgen" ] }, { "cell_type": "markdown", "id": "3bc035d5-2b0a-4c6b-a611-a4372f820b17", "metadata": {}, "source": [ "#### Convert PLAMS Molecule to pymatgen Structure (periodic)\n", "\n", "There is no builtin converter between PLAMS Molecule and pymatgen Structure (periodic crystal). Instead, you need to go through the ASE interface to both packages:" ] }, { "cell_type": "code", "execution_count": 23, "id": "3cbe2fad-76f0-40cb-8d6a-ecde2b2b5be2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(mol)=\n", "type(pymatgen_structure)=\n", "Full Formula (Si8 O16)\n", "Reduced Formula: SiO2\n", "abc : 9.873000 5.254000 8.770000\n", "angles: 90.000000 90.000000 90.000000\n", "pbc : True True True\n", "Sites (24)\n", " # SP a b c\n", "--- ---- ------ ---- ------\n", " 0 O 0.5 0.25 0.3523\n", " 1 O 0 0.75 0.8523\n", " 2 O 0.5 0.75 0.6477\n", " 3 O 0 0.25 0.1477\n", " 4 O 0.31 0 0.5\n", " 5 O 0.81 0.5 0\n", " 6 O 0.69 0 0.5\n", " 7 O 0.19 0.5 0\n", " 8 O 0.31 0.5 0.5\n", " 9 O 0.81 0 0\n", " 10 O 0.69 0.5 0.5\n", " 11 O 0.19 0 0\n", " 12 O 0.25 0.25 0.25\n", " 13 O 0.75 0.75 0.75\n", " 14 O 0.75 0.25 0.25\n", " 15 O 0.25 0.75 0.75\n", " 16 Si 0.3428 0.25 0.4007\n", " 17 Si 0.8428 0.75 0.9007\n", " 18 Si 0.6572 0.25 0.4007\n", " 19 Si 0.1572 0.75 0.9007\n", " 20 Si 0.6572 0.75 0.5993\n", " 21 Si 0.1572 0.25 0.0993\n", " 22 Si 0.3428 0.75 0.5993\n", " 23 Si 0.8428 0.25 0.0993\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/ormrodmorley/Documents/code/ams/amshome_fix2025/bin.auto/python3.8/lib/python3.8/site-packages/ase/io/cif.py:401: UserWarning: crystal system 'orthorhombic' is not interpreted for space group Spacegroup(74, setting=1). This may result in wrong setting!\n", " warnings.warn(\n" ] } ], "source": [ "from pymatgen.core.structure import Structure\n", "from pymatgen.io.ase import AseAtomsAdaptor\n", "import scm.plams\n", "from scm.plams import fromASE, toASE, Molecule\n", "from ase.io import read\n", "\n", "\n", "def convert_plams_molecule_to_pymatgen_structure(mol: Molecule) -> Structure:\n", " return AseAtomsAdaptor().get_structure(toASE(mol))\n", "\n", "\n", "mol: scm.plams.Molecule = fromASE(read(cif_file))\n", "\n", "pymatgen_structure: Structure = convert_plams_molecule_to_pymatgen_structure(mol)\n", "\n", "print(f\"{type(mol)=}\")\n", "print(f\"{type(pymatgen_structure)=}\")\n", "print(pymatgen_structure)" ] }, { "cell_type": "markdown", "id": "e5cbf591-feb1-4ce9-b1b4-78d916809637", "metadata": {}, "source": [ "#### Convert pymatgen Structure (periodic) to PLAMS Molecule\n", "\n", "Go through the ASE interface:" ] }, { "cell_type": "code", "execution_count": 24, "id": "ec807839-44a4-4add-9be0-7879045dc747", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(pymatgen_structure)=\n", "type(mol)=\n" ] } ], "source": [ "from pymatgen.io.ase import AseAtomsAdaptor\n", "from pymatgen.core.structure import Structure\n", "from scm.plams import fromASE\n", "from scm.plams import Molecule\n", "\n", "\n", "def pymatgen_structure_to_plams_molecule(pymatgen_structure: Structure) -> Molecule:\n", " return fromASE(AseAtomsAdaptor().get_atoms(pymatgen_structure))\n", "\n", "\n", "print(f\"{type(pymatgen_structure)=}\")\n", "\n", "mol = pymatgen_structure_to_plams_molecule(pymatgen_structure)\n", "print(f\"{type(mol)=}\")" ] }, { "cell_type": "markdown", "id": "149b468f-a1f5-482b-a366-a2628fb179f3", "metadata": {}, "source": [ "#### Convert PLAMS Molecule to pymatgen Molecule (non-periodic)\n", "\n", "pymatgen has a special ``Molecule`` class for non-periodic systems. In PLAMS, the ``Molecule`` class is used for both periodic and non-periodic systems." ] }, { "cell_type": "code", "execution_count": 25, "id": "1e76a470-1c6c-4c51-85d1-4ffd423f6cbf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(plams_molecule)=\n", "type(pymatgen_molecule)=\n", "Full Formula (H12 C12 N2)\n", "Reduced Formula: H6C6N\n", "Charge = 0, Spin Mult = 1\n", "Sites (26)\n", "0 C -1.476270 -1.153160 -0.292796\n", "1 C -2.868920 -1.158810 -0.294692\n", "2 C -3.592260 0.001850 0.038239\n", "3 C -2.868240 1.161590 0.373280\n", "4 C -1.475690 1.154500 0.373398\n", "5 C -0.739311 0.000235 0.041063\n", "6 C 1.476270 1.153160 -0.292796\n", "7 C 0.739311 -0.000235 0.041063\n", "8 C 1.475690 -1.154500 0.373398\n", "9 C 2.868240 -1.161590 0.373280\n", "10 C 3.592260 -0.001850 0.038239\n", "11 C 2.868920 1.158810 -0.294692\n", "12 N 4.990960 0.013853 0.092372\n", "13 N -4.990960 -0.013853 0.092372\n", "14 H -0.945575 -2.057850 -0.582591\n", "15 H -3.407760 -2.066590 -0.565539\n", "16 H -3.406670 2.068040 0.649043\n", "17 H -0.944434 2.059920 0.659738\n", "18 H 0.945575 2.057850 -0.582591\n", "19 H 0.944434 -2.059920 0.659738\n", "20 H 3.406670 -2.068040 0.649043\n", "21 H 3.407760 2.066590 -0.565539\n", "22 H 5.435340 -0.887836 -0.024637\n", "23 H 5.434540 0.731266 -0.466878\n", "24 H -5.434540 -0.731266 -0.466878\n", "25 H -5.435340 0.887836 -0.024637\n" ] } ], "source": [ "import pymatgen.core.structure\n", "import scm.plams\n", "from pymatgen.io.ase import AseAtomsAdaptor\n", "from scm.plams import toASE\n", "\n", "\n", "def convert_plams_molecule_to_pymatgen_molecule(\n", " mol: scm.plams.Molecule,\n", ") -> pymatgen.core.structure.Molecule:\n", " return AseAtomsAdaptor().get_molecule(toASE(mol))\n", "\n", "\n", "plams_molecule = scm.plams.Molecule(xyz_file)\n", "\n", "pymatgen_molecule: pymatgen.core.structure.Molecule = convert_plams_molecule_to_pymatgen_molecule(plams_molecule)\n", "\n", "print(f\"{type(plams_molecule)=}\")\n", "print(f\"{type(pymatgen_molecule)=}\")\n", "print(pymatgen_molecule)" ] }, { "cell_type": "markdown", "id": "8fbfb59a-6e33-4cc3-8ca7-bd3e21b4030f", "metadata": {}, "source": [ "#### Convert pymatgen Molecule (non-periodic) to PLAMS Molecule" ] }, { "cell_type": "code", "execution_count": 26, "id": "bce3f898-2f36-4027-85cf-ce0a2f76b6d3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type(pymatgen_molecule)=\n", "type(mol)=\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pymatgen.io.ase import AseAtomsAdaptor\n", "import pymatgen.core.structure\n", "from scm.plams import fromASE\n", "from scm.plams import Molecule\n", "\n", "\n", "def pymatgen_molecule_to_plams_molecule(\n", " pymatgen_molecule: pymatgen.core.structure.Molecule,\n", ") -> scm.plams.Molecule:\n", " return fromASE(AseAtomsAdaptor().get_atoms(pymatgen_molecule))\n", "\n", "\n", "print(f\"{type(pymatgen_molecule)=}\")\n", "\n", "mol = pymatgen_molecule_to_plams_molecule(pymatgen_molecule)\n", "print(f\"{type(mol)=}\")\n", "plot_molecule(mol);" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }