{ "cells": [ { "cell_type": "markdown", "id": "c68ca2c1-a940-4eda-9ee4-1e0660e7cc67", "metadata": {}, "source": [ "## Create Example Jobs" ] }, { "cell_type": "markdown", "id": "ffa8d650-61fb-4b8f-8e60-38348d96cf51", "metadata": {}, "source": [ "To begin with, create a variety of AMS jobs with different settings, engines and calculation types." ] }, { "cell_type": "code", "execution_count": 1, "id": "d2c2776a-5100-41c7-a4d7-3a7b308b9a59", "metadata": {}, "outputs": [], "source": [ "from scm.plams import from_smiles, AMSJob, PlamsError, Settings, Molecule, Atom\n", "from scm.libbase import UnifiedChemicalSystem as ChemicalSystem\n", "from scm.input_classes.drivers import AMS\n", "from scm.input_classes.engines import DFTB\n", "from scm.utils.conversions import plams_molecule_to_chemsys\n", "\n", "\n", "def example_job_dftb(smiles, task, use_chemsys=False):\n", " # Generate molecule from smiles\n", " mol = from_smiles(smiles)\n", " if use_chemsys:\n", " mol = plams_molecule_to_chemsys(mol)\n", "\n", " # Set up calculation settings using PISA\n", " sett = Settings()\n", " sett.runscript.nproc = 1\n", " driver = AMS()\n", " driver.Task = task\n", " driver.Engine = DFTB()\n", " sett.input = driver\n", " return AMSJob(molecule=mol, settings=sett, name=\"dftb\")\n", "\n", "\n", "def example_job_adf(smiles, task, basis, gga=None, use_chemsys=False):\n", " # Generate molecule from smiles\n", " mol = from_smiles(smiles)\n", " if use_chemsys:\n", " mol = plams_molecule_to_chemsys(mol)\n", "\n", " # Set up calculation settings using standard settings\n", " sett = Settings()\n", " sett.runscript.nproc = 1\n", " sett.input.AMS.Task = task\n", " sett.input.ADF.Basis.Type = basis\n", " if gga:\n", " sett.input.ADF.XC.GGA = gga\n", " return AMSJob(molecule=mol, settings=sett, name=\"adf\")\n", "\n", "\n", "def example_job_neb(iterations, use_chemsys=False):\n", " # Set up molecules\n", " main_molecule = Molecule()\n", " main_molecule.add_atom(Atom(symbol=\"C\", coords=(0, 0, 0)))\n", " main_molecule.add_atom(Atom(symbol=\"N\", coords=(1.18, 0, 0)))\n", " main_molecule.add_atom(Atom(symbol=\"H\", coords=(2.196, 0, 0)))\n", " final_molecule = main_molecule.copy()\n", " final_molecule.atoms[1].x = 1.163\n", " final_molecule.atoms[2].x = -1.078\n", "\n", " mol = {\"\": main_molecule, \"final\": final_molecule}\n", "\n", " if use_chemsys:\n", " mol = {k: plams_molecule_to_chemsys(v) for k, v in mol.items()}\n", "\n", " # Set up calculation settings\n", " sett = Settings()\n", " sett.runscript.nproc = 1\n", " sett.input.ams.Task = \"NEB\"\n", " sett.input.ams.NEB.Images = 9\n", " sett.input.ams.NEB.Iterations = iterations\n", " sett.input.DFTB\n", "\n", " return AMSJob(molecule=mol, settings=sett, name=\"neb\")" ] }, { "cell_type": "markdown", "id": "aec4b300-ba2f-4883-bb72-a3dd303f4d22", "metadata": {}, "source": [ "Now, run a selection of them." ] }, { "cell_type": "code", "execution_count": 2, "id": "b41d6fac-5199-4e2f-987c-8d31e37b20e9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[15.05|09:44:38] JOB dftb STARTED\n", "[15.05|09:44:38] JOB adf STARTED\n", "[15.05|09:44:38] JOB adf STARTED\n", "[15.05|09:44:38] JOB dftb STARTED\n", "[15.05|09:44:38] Renaming job adf to adf.002\n", "[15.05|09:44:38] JOB adf STARTED\n", "[15.05|09:44:38] JOB adf STARTED\n", "[15.05|09:44:38] JOB dftb STARTED\n", "[15.05|09:44:38] JOB adf STARTED\n", "[15.05|09:44:38] JOB dftb RUNNING\n", "[15.05|09:44:38] JOB adf STARTED\n", "[15.05|09:44:38] JOB adf RUNNING\n", "[15.05|09:44:38] Renaming job adf to adf.003\n", "[15.05|09:44:38] Renaming job dftb to dftb.002\n", "[15.05|09:44:38] Renaming job dftb to dftb.003\n", "[15.05|09:44:38] Renaming job adf to adf.004\n", "[15.05|09:44:38] JOB adf.002 RUNNING\n", "[15.05|09:44:38] Renaming job adf to adf.005\n", "[15.05|09:44:38] JOB dftb.002 RUNNING\n", "[15.05|09:44:38] JOB adf.004 RUNNING\n", "[15.05|09:44:38] JOB adf.003 RUNNING\n", "[15.05|09:44:38] JOB adf.005 RUNNING\n", "[15.05|09:44:38] JOB dftb.003 RUNNING\n", "[15.05|09:44:38] JOB dftb FINISHED\n", "[15.05|09:44:38] JOB dftb SUCCESSFUL\n", "[15.05|09:44:38] Renaming job adf to adf.006\n", "[15.05|09:44:38] JOB dftb STARTED\n", "[15.05|09:44:38] JOB adf.006 RUNNING\n", "[15.05|09:44:39] JOB dftb.002 FINISHED\n", "[15.05|09:44:39] JOB dftb.002 SUCCESSFUL\n", "[15.05|09:44:39] Renaming job dftb to dftb.004\n", "[15.05|09:44:39] JOB adf STARTED\n", "[15.05|09:44:39] JOB dftb.004 RUNNING\n", "[15.05|09:44:39] JOB dftb.003 FINISHED\n", "[15.05|09:44:39] JOB dftb.003 SUCCESSFUL\n", "[15.05|09:44:39] Renaming job adf to adf.007\n", "[15.05|09:44:39] JOB adf STARTED\n", "[15.05|09:44:39] JOB adf.007 RUNNING\n", "[15.05|09:44:39] JOB dftb.004 FINISHED\n", "[15.05|09:44:39] JOB dftb.004 SUCCESSFUL\n", "[15.05|09:44:39] Renaming job adf to adf.008\n", "[15.05|09:44:39] JOB dftb STARTED\n", "[15.05|09:44:39] JOB adf.008 RUNNING\n", "[15.05|09:44:41] JOB adf.005 FINISHED\n", "[15.05|09:44:41] JOB adf.005 SUCCESSFUL\n", "[15.05|09:44:41] Renaming job dftb to dftb.005\n", "[15.05|09:44:41] JOB adf STARTED\n", "[15.05|09:44:41] JOB dftb.005 RUNNING\n", "[15.05|09:44:41] JOB dftb.005 FINISHED\n", "[15.05|09:44:41] JOB dftb.005 SUCCESSFUL\n", "[15.05|09:44:41] Renaming job adf to adf.009\n", "[15.05|09:44:41] JOB adf STARTED\n", "[15.05|09:44:41] JOB adf.009 RUNNING\n", "[15.05|09:44:42] JOB adf FINISHED\n", "[15.05|09:44:42] JOB adf SUCCESSFUL\n", "[15.05|09:44:42] Renaming job adf to adf.010\n", "[15.05|09:44:42] JOB dftb STARTED\n", "[15.05|09:44:42] JOB adf.010 RUNNING\n", "[15.05|09:44:43] JOB adf.006 FINISHED\n", "[15.05|09:44:43] JOB adf.006 SUCCESSFUL\n", "[15.05|09:44:43] Renaming job dftb to dftb.006\n", "[15.05|09:44:43] JOB adf STARTED\n", "[15.05|09:44:43] JOB dftb.006 RUNNING\n", "[15.05|09:44:43] JOB adf.009 FINISHED\n", "[15.05|09:44:43] JOB adf.009 SUCCESSFUL\n", "[15.05|09:44:43] Renaming job adf to adf.011\n", "[15.05|09:44:43] JOB adf STARTED\n", "[15.05|09:44:43] JOB adf.011 RUNNING\n", "[15.05|09:44:43] JOB dftb.006 FINISHED\n", "[15.05|09:44:43] JOB dftb.006 SUCCESSFUL\n", "[15.05|09:44:43] Renaming job adf to adf.012\n", "[15.05|09:44:43] JOB dftb STARTED\n", "[15.05|09:44:43] JOB adf.012 RUNNING\n", "[15.05|09:44:44] JOB adf.010 FINISHED\n", "[15.05|09:44:44] JOB adf.010 SUCCESSFUL\n", "[15.05|09:44:44] Renaming job dftb to dftb.007\n", "[15.05|09:44:44] JOB adf STARTED\n", "[15.05|09:44:44] JOB dftb.007 RUNNING\n", "[15.05|09:44:44] JOB dftb.007 FINISHED\n", "[15.05|09:44:45] JOB dftb.007 SUCCESSFUL\n", "[15.05|09:44:45] Renaming job adf to adf.013\n", "[15.05|09:44:45] JOB adf STARTED\n", "[15.05|09:44:45] JOB adf.013 RUNNING\n", "[15.05|09:44:45] JOB adf.007 FINISHED\n", "[15.05|09:44:45] JOB adf.007 SUCCESSFUL\n", "[15.05|09:44:45] Renaming job adf to adf.014\n", "[15.05|09:44:45] JOB dftb STARTED\n", "[15.05|09:44:45] JOB adf.014 RUNNING\n", "[15.05|09:44:46] JOB adf.011 FINISHED\n", "[15.05|09:44:46] JOB adf.011 SUCCESSFUL\n", "[15.05|09:44:46] Renaming job dftb to dftb.008\n", "[15.05|09:44:46] JOB adf STARTED\n", "[15.05|09:44:46] JOB dftb.008 RUNNING\n", "[15.05|09:44:47] JOB dftb.008 FINISHED\n", "[15.05|09:44:47] JOB dftb.008 SUCCESSFUL\n", "[15.05|09:44:47] JOB adf.002 FINISHED\n", "[15.05|09:44:47] Renaming job adf to adf.015\n", "[15.05|09:44:47] JOB adf STARTED\n", "[15.05|09:44:47] JOB adf.015 RUNNING\n", "[15.05|09:44:47] JOB adf.002 SUCCESSFUL\n", "[15.05|09:44:47] Renaming job adf to adf.016\n", "[15.05|09:44:47] JOB neb STARTED\n", "[15.05|09:44:47] JOB adf.016 RUNNING\n", "[15.05|09:44:48] JOB adf.012 FINISHED\n", "[15.05|09:44:48] JOB adf.012 SUCCESSFUL\n", "[15.05|09:44:48] JOB neb STARTED\n", "[15.05|09:44:48] JOB neb RUNNING\n", "[15.05|09:44:48] JOB adf.013 FINISHED\n", "[15.05|09:44:48] JOB adf.013 SUCCESSFUL\n", "[15.05|09:44:48] Renaming job neb to neb.002\n" ] } ], "source": [ "from scm.plams import config, JobRunner\n", "\n", "config.default_jobrunner = JobRunner(parallel=True, maxthreads=8)\n", "\n", "smiles = [\"CC\", \"C\", \"O\", \"CO\"]\n", "tasks = [\"SinglePoint\", \"GeometryOptimization\"]\n", "engines = [\"DFTB\", \"ADF\"]\n", "jobs = []\n", "for i, s in enumerate(smiles):\n", " for j, t in enumerate(tasks):\n", " job_dftb = example_job_dftb(s, t, use_chemsys=i % 2)\n", " job_adf1 = example_job_adf(s, t, \"DZ\", use_chemsys=True)\n", " job_adf2 = example_job_adf(s, t, \"TZP\", \"PBE\")\n", " jobs += [job_dftb, job_adf1, job_adf2]\n", "\n", "job_neb1 = example_job_neb(10)\n", "job_neb2 = example_job_neb(100, use_chemsys=True)\n", "jobs += [job_neb1, job_neb2]\n", "\n", "for j in jobs:\n", " j.run()" ] }, { "cell_type": "markdown", "id": "0d7e3247-a30d-4520-975a-4619ce927d71", "metadata": {}, "source": [ "## Job Analysis" ] }, { "cell_type": "markdown", "id": "402352bc-3f54-42ae-ac94-70c56f7a7953", "metadata": {}, "source": [ "### Adding and Loading Jobs\n", "\n", "Jobs can be loaded by passing job objects directly, or loading from a path." ] }, { "cell_type": "code", "execution_count": 3, "id": "4ebf6505-6489-4e29-af55-60b2e3d71663", "metadata": {}, "outputs": [], "source": [ "from scm.plams import JobAnalysis" ] }, { "cell_type": "code", "execution_count": 4, "id": "c60b2999-dda2-4dbd-8b37-96303407b68e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[15.05|09:44:48] Waiting for job adf.003 to finish\n", "[15.05|09:44:49] JOB neb FINISHED\n", "[15.05|09:44:49] Job neb reported errors. Please check the output\n", "[15.05|09:44:49] JOB neb FAILED\n", "[15.05|09:44:49] Job neb reported errors. Please check the output\n", "[15.05|09:44:49] Error message for job neb was:\n", "\tNEB optimization did NOT converge\n", "[15.05|09:44:49] Job neb reported errors. Please check the output\n", "[15.05|09:44:49] Job neb reported errors. Please check the output\n", "[15.05|09:44:49] JOB neb.002 FINISHED\n", "[15.05|09:44:49] JOB neb.002 SUCCESSFUL\n", "[15.05|09:44:52] JOB adf.014 FINISHED\n", "[15.05|09:44:52] JOB adf.014 SUCCESSFUL\n", "[15.05|09:44:53] JOB adf.008 FINISHED\n", "[15.05|09:44:53] JOB adf.008 SUCCESSFUL\n", "[15.05|09:44:57] JOB adf.015 FINISHED\n", "[15.05|09:44:57] JOB adf.015 SUCCESSFUL\n", "[15.05|09:45:00] JOB adf.003 FINISHED\n", "[15.05|09:45:00] JOB adf.003 SUCCESSFUL\n", "[15.05|09:45:00] Waiting for job adf.004 to finish\n", "[15.05|09:45:08] JOB adf.016 FINISHED\n", "[15.05|09:45:08] JOB adf.016 SUCCESSFUL\n", "[15.05|09:45:23] JOB adf.004 FINISHED\n", "[15.05|09:45:23] JOB adf.004 SUCCESSFUL\n" ] } ], "source": [ "ja = JobAnalysis(jobs=jobs[:10], paths=[j.path for j in jobs[10:-2]])" ] }, { "cell_type": "markdown", "id": "dbf478f5-474c-4913-b082-b7d23030b61b", "metadata": {}, "source": [ "Jobs can also be added or removed after initialization." ] }, { "cell_type": "code", "execution_count": 5, "id": "6d77b6f7-ccbd-4bac-b406-272d19386d64", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| Path | Name | OK | Check | ErrorMsg |\n", "|---------------------------------------------------------|----------|-------|-------|-----------------------------------|\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb | dftb | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf | adf | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.002 | adf.002 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.002 | dftb.002 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.003 | adf.003 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.004 | adf.004 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.003 | dftb.003 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.005 | adf.005 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.006 | adf.006 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.004 | dftb.004 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.007 | adf.007 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.008 | adf.008 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.005 | dftb.005 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.009 | adf.009 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.010 | adf.010 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.006 | dftb.006 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.011 | adf.011 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.012 | adf.012 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.007 | dftb.007 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.013 | adf.013 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.014 | adf.014 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/dftb.008 | dftb.008 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.015 | adf.015 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/adf.016 | adf.016 | True | True | None |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/neb | neb | False | False | NEB optimization did NOT converge |\n", "| /path/plams/examples/JobAnalysis/plams_workdir/neb.002 | neb.002 | True | True | None |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja = ja.add_job(jobs[-2]).load_job(jobs[-1].path)\n", "ja.display_table()" ] }, { "cell_type": "markdown", "id": "730eb6b2-411f-4022-840b-d686a5608055", "metadata": {}, "source": [ "### Adding and Removing Fields" ] }, { "cell_type": "markdown", "id": "43418de2-3abf-49b8-b00a-0f9d249638e2", "metadata": {}, "source": [ "A range of common standard fields can be added with the `add_standard_field(s)` methods. In addition, fields deriving from the job settings can be added with the `add_settings_input_fields` method, and fields from the output rkfs with the `add_rkf_field` method. Custom fields can also be added with the `add_field` method, by defining a field key, value accessor and optional arguments like display name and value formatting.\n", "\n", "Fields can be removed by calling `remove_field` with the corresponding field key." ] }, { "cell_type": "code", "execution_count": 6, "id": "42073591-bc82-4577-b08b-652fd10baf07", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| Name | OK | Check | ErrorMsg | Formula | Smiles | CPUTime | SysTime | AmsGeneralEngine | InputAmsTask | InputAdfBasisType | InputAdfXcGga | InputAmsNebImages | InputAmsNebIterations | Energy [kJ/mol] |\n", "|---------|-------|-------|-----------------------------------|-------------------|-------------------|-----------|----------|------------------|----------------------|-------------------|---------------|-------------------|-----------------------|-----------------|\n", "| dftb | True | True | None | C2H6 | CC | 0.189760 | 0.047422 | dftb | SinglePoint | None | None | None | None | -19594.01 |\n", "| adf | True | True | None | C2H6 | CC | 3.702566 | 0.122056 | adf | SinglePoint | DZ | None | None | None | -3973.29 |\n", "| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |\n", "| adf.016 | True | True | None | CH4O | CO | 18.829835 | 0.833251 | adf | GeometryOptimization | TZP | PBE | None | None | -2900.38 |\n", "| neb | False | False | NEB optimization did NOT converge | : CHN, final: CHN | : C=N, final: C#N | 0.471924 | 0.050081 | dftb | NEB | None | None | 9 | 10 | None |\n", "| neb.002 | True | True | None | : CHN, final: CHN | : C=N, final: C#N | 0.730961 | 0.090290 | dftb | NEB | None | None | 9 | 100 | -14936.53 |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja = (\n", " ja.remove_field(\"Path\")\n", " .add_standard_fields([\"Formula\", \"Smiles\", \"CPUTime\", \"SysTime\"])\n", " .add_rkf_field(\"General\", \"engine\")\n", " .add_settings_input_fields()\n", " .add_field(\"Energy\", lambda j: j.results.get_energy(unit=\"kJ/mol\"), display_name=\"Energy [kJ/mol]\", fmt=\".2f\")\n", ")\n", "ja.display_table(max_rows=5)" ] }, { "cell_type": "markdown", "id": "f9641dfa-a5d3-4555-a587-1837ad21e1ef", "metadata": {}, "source": [ "In addition to the fluent syntax, both dictionary and dot syntaxes are also supported for adding and removing fields." ] }, { "cell_type": "code", "execution_count": 7, "id": "bb4da583-2c81-4fb8-9bdc-d1ffc9fcdb4f", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| Name | OK | ErrorMsg | Formula | Smiles | CPUTime | AmsGeneralEngine | InputAmsTask | InputAdfBasisType | InputAdfXcGga | InputAmsNebImages | InputAmsNebIterations | Energy [kJ/mol] | AtomType | Charge | AtomCoords |\n", "|---------|-------|-----------------------------------|-------------------|-------------------|-----------|------------------|----------------------|-------------------|---------------|-------------------|-----------------------|-----------------|-----------------------------------|-----------------------------------|-----------------------------------|\n", "| dftb | True | None | C2H6 | CC | 0.189760 | dftb | SinglePoint | None | None | None | None | -19594.01 | ['C', 'C', 'H', 'H', 'H', 'H',... | [-0.07293185 -0.07372966 0.02... | [array([-0.74763668, 0.041837... |\n", "| adf | True | None | C2H6 | CC | 3.702566 | adf | SinglePoint | DZ | None | None | None | -3973.29 | ['C', 'C', 'H', 'H', 'H', 'H',... | [-0.83243445 -0.83187828 0.27... | [array([-0.74763668, 0.041837... |\n", "| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |\n", "| adf.016 | True | None | CH4O | CO | 18.829835 | adf | GeometryOptimization | TZP | PBE | None | None | -2900.38 | ['C', 'O', 'H', 'H', 'H', 'H'] | [ 0.58673094 -0.60299606 -0.10... | [array([-0.36298962, -0.021487... |\n", "| neb | False | NEB optimization did NOT conve... | : CHN, final: CHN | : C=N, final: C#N | 0.471924 | dftb | NEB | None | None | 9 | 10 | None | ['C', 'N', 'H'] | None | [array([0.46884763, 0.20209473... |\n", "| neb.002 | True | None | : CHN, final: CHN | : C=N, final: C#N | 0.730961 | dftb | NEB | None | None | 9 | 100 | -14936.53 | ['C', 'N', 'H'] | [-0.00732595 -0.21157426 0.21... | [array([0.56218708, 0.20551051... |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "\n", "ja[\"AtomType\"] = lambda j: [at.symbol for at in j.results.get_main_molecule()]\n", "ja.Charge = lambda j: j.results.get_charges()\n", "ja.AtomCoords = lambda j: [np.array(at.coords) for at in j.results.get_main_molecule()]\n", "\n", "del ja[\"Check\"]\n", "del ja.SysTime\n", "\n", "ja.display_table(max_rows=5, max_col_width=30)" ] }, { "cell_type": "markdown", "id": "3bce481d-494e-43c2-9a64-7d608d6e7e7f", "metadata": {}, "source": [ "### Processing Data" ] }, { "cell_type": "markdown", "id": "d40411a2-53d4-4df6-972a-bbdc788e4e5c", "metadata": {}, "source": [ "Once an initial analysis has been created, the data can be further processed, depending on the use case.\n", "For example, to inspect the difference between failed and successful jobs, jobs can be filtered down and irrelevant fields removed." ] }, { "cell_type": "code", "execution_count": 8, "id": "2b381c2f-f057-4443-b05a-d38adf18beb3", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| Name | OK | CPUTime | InputAmsNebIterations |\n", "|---------|-------|----------|-----------------------|\n", "| neb | False | 0.471924 | 10 |\n", "| neb.002 | True | 0.730961 | 100 |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja_neb = (\n", " ja.filter_jobs(lambda data: data[\"InputAmsTask\"] == \"NEB\")\n", " .remove_field(\"AtomCoords\")\n", " .remove_uniform_fields(ignore_empty=True)\n", ")\n", "\n", "ja_neb.display_table()" ] }, { "cell_type": "markdown", "id": "8cc4f829-07c6-419c-97ff-110b28dd876f", "metadata": {}, "source": [ "Another use case may be to analyze the results from one or more jobs.\n", "For this, it can be useful to utilize the `expand` functionality to convert job(s) to multiple rows.\n", "During this process, fields selected for expansion will have their values extracted into individual rows, whilst other fields have their values duplicated." ] }, { "cell_type": "code", "execution_count": 9, "id": "1f298dff-948c-4b45-91b1-2670286f46d7", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| Name | CPUTime | InputAdfBasisType | InputAdfXcGga | Energy [kJ/mol] | AtomType | Charge | AtomCoords |\n", "|---------|----------|-------------------|---------------|-----------------|----------|---------------------|---------------------------------------------------|\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | O | -0.8416865250737331 | [-2.17062120e-04 3.82347777e-01 0.00000000e+00] |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.42084716070260286 | [-0.81250923 -0.19167629 0. ] |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.4208393643711281 | [ 0.8127263 -0.19067148 0. ] |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | O | -0.6739805275850443 | [-2.46726007e-04 4.01580956e-01 0.00000000e+00] |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33698188085180536 | [-0.76455997 -0.2012764 0. ] |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33699864673323343 | [ 0.76480669 -0.20030455 0. ] |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja_adf_expanded = (\n", " ja.filter_jobs(\n", " lambda data: data[\"InputAmsTask\"] == \"GeometryOptimization\"\n", " and data[\"InputAdfBasisType\"] is not None\n", " and data[\"Smiles\"] == \"O\"\n", " )\n", " .expand_field(\"AtomType\")\n", " .expand_field(\"Charge\")\n", " .expand_field(\"AtomCoords\")\n", " .remove_uniform_fields()\n", ")\n", "\n", "ja_adf_expanded.display_table()" ] }, { "cell_type": "markdown", "id": "7d0cd5a1-ab6d-47b5-a249-ebf6c5677d6b", "metadata": {}, "source": [ "For more nested values, the depth of expansion can also be selected to further flatten the data." ] }, { "cell_type": "code", "execution_count": 10, "id": "764614dd-b085-4fa5-8ac4-08830b8778e7", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| Name | CPUTime | InputAdfBasisType | InputAdfXcGga | Energy [kJ/mol] | AtomType | Charge | AtomCoords | Coord |\n", "|---------|----------|-------------------|---------------|-----------------|----------|---------------------|-------------------------|-------|\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | O | -0.8416865250737331 | -0.00021706211955194217 | x |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | O | -0.8416865250737331 | 0.38234777653349844 | y |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | O | -0.8416865250737331 | 0.0 | z |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.42084716070260286 | -0.8125092343354401 | x |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.42084716070260286 | -0.19167629390344054 | y |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.42084716070260286 | 0.0 | z |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.4208393643711281 | 0.8127262964549918 | x |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.4208393643711281 | -0.19067148263005784 | y |\n", "| adf.011 | 2.697854 | DZ | None | -1316.30 | H | 0.4208393643711281 | 0.0 | z |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | O | -0.6739805275850443 | -0.00024672600727009935 | x |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | O | -0.6739805275850443 | 0.40158095623473306 | y |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | O | -0.6739805275850443 | 0.0 | z |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33698188085180536 | -0.7645599672263915 | x |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33698188085180536 | -0.2012764045590436 | y |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33698188085180536 | 0.0 | z |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33699864673323343 | 0.7648066932336616 | x |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33699864673323343 | -0.20030455167568945 | y |\n", "| adf.012 | 4.089876 | TZP | PBE | -1363.77 | H | 0.33699864673323343 | 0.0 | z |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja_adf_expanded2 = ja_adf_expanded.add_field(\n", " \"Coord\", lambda j: [(\"x\", \"y\", \"z\") for _ in j.results.get_main_molecule()], expansion_depth=2\n", ").expand_field(\"AtomCoords\", depth=2)\n", "\n", "ja_adf_expanded2.display_table()" ] }, { "cell_type": "markdown", "id": "77d0c620-3ca7-4dfb-969f-c5a5abbad76b", "metadata": {}, "source": [ "Expansion can be undone with the corresponding `collapse` method. \n", "\n", "Fields can be also further filtered, modified or reordered to customize the analysis." ] }, { "cell_type": "code", "execution_count": 11, "id": "e15126b0-4689-413b-b374-1545ba1b737b", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "| AtomType | Charge | Energy [kJ/mol] | CPUTime | Basis | InputAdfXcGga |\n", "|----------|---------|-----------------|---------|-------|---------------|\n", "| O | -0.8417 | -1316.30 | 2.70 | DZ | None |\n", "| H | 0.4208 | -1316.30 | 2.70 | DZ | None |\n", "| H | 0.4208 | -1316.30 | 2.70 | DZ | None |\n", "| O | -0.6740 | -1363.77 | 4.09 | TZP | PBE |\n", "| H | 0.3370 | -1363.77 | 4.09 | TZP | PBE |\n", "| H | 0.3370 | -1363.77 | 4.09 | TZP | PBE |" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja_adf = (\n", " ja_adf_expanded2.collapse_field(\"AtomCoords\")\n", " .collapse_field(\"Coord\")\n", " .filter_fields(lambda vals: all([not isinstance(v, list) for v in vals])) # remove arrays\n", " .remove_field(\"Name\")\n", " .format_field(\"CPUTime\", \".2f\")\n", " .format_field(\"Charge\", \".4f\")\n", " .rename_field(\"InputAdfBasisType\", \"Basis\")\n", " .reorder_fields([\"AtomType\", \"Charge\", \"Energy\"])\n", ")\n", "ja_adf.display_table()" ] }, { "cell_type": "markdown", "id": "62361885-1b81-44b4-b944-176acb33d17a", "metadata": {}, "source": [ "### Extracting Analysis Data" ] }, { "cell_type": "markdown", "id": "90b34dca-a351-4ec1-8646-1f38dbeed906", "metadata": {}, "source": [ "Analysis data can be extracted in a variety of ways.\n", "\n", "As has been demonstrated, a visual representation of the table can be easily generated using the `to_table` method (or `display_table` in a notebook).\n", "The format can be selected as markdown, html or rst. This will return the data with the specified display names and formatting." ] }, { "cell_type": "code", "execution_count": 12, "id": "1258b6ef-c2fb-42c2-a8c5-33cbf87ffbed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+----------+---------+-----------------+---------+-------+---------------+\n", "| AtomType | Charge | Energy [kJ/mol] | CPUTime | Basis | InputAdfXcGga |\n", "+==========+=========+=================+=========+=======+===============+\n", "| O | -0.8417 | -1316.30 | 2.70 | DZ | None |\n", "+----------+---------+-----------------+---------+-------+---------------+\n", "| H | 0.4208 | -1316.30 | 2.70 | DZ | None |\n", "+----------+---------+-----------------+---------+-------+---------------+\n", "| H | 0.4208 | -1316.30 | 2.70 | DZ | None |\n", "+----------+---------+-----------------+---------+-------+---------------+\n", "| O | -0.6740 | -1363.77 | 4.09 | TZP | PBE |\n", "+----------+---------+-----------------+---------+-------+---------------+\n", "| H | 0.3370 | -1363.77 | 4.09 | TZP | PBE |\n", "+----------+---------+-----------------+---------+-------+---------------+\n", "| H | 0.3370 | -1363.77 | 4.09 | TZP | PBE |\n", "+----------+---------+-----------------+---------+-------+---------------+\n" ] } ], "source": [ "print(ja_adf.to_table(fmt=\"rst\"))" ] }, { "cell_type": "markdown", "id": "b1ef86ba-79e9-486f-8adc-ab81f23c949b", "metadata": {}, "source": [ "Alternatively, raw data can be retrieved via the `get_analysis` method, which returns a dictionary of analysis keys to values." ] }, { "cell_type": "code", "execution_count": 13, "id": "cce8f9c5-13e7-423d-999d-cd9df8f22a72", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'AtomType': ['O', 'H', 'H', 'O', 'H', 'H'], 'Charge': [-0.8416865250737331, 0.42084716070260286, 0.4208393643711281, -0.6739805275850443, 0.33698188085180536, 0.33699864673323343], 'Energy': [-1316.2997406426532, -1316.2997406426532, -1316.2997406426532, -1363.766294275197, -1363.766294275197, -1363.766294275197], 'CPUTime': [2.697854, 2.697854, 2.697854, 4.089876, 4.089876, 4.089876], 'InputAdfBasisType': ['DZ', 'DZ', 'DZ', 'TZP', 'TZP', 'TZP'], 'InputAdfXcGga': [None, None, None, 'PBE', 'PBE', 'PBE']}\n" ] } ], "source": [ "print(ja_adf.get_analysis())" ] }, { "cell_type": "markdown", "id": "c95fe2f9-585b-46cc-9122-f0afbfc034da", "metadata": {}, "source": [ "Data can also be easily written to a csv file using `to_csv_file`, to be exported to another program." ] }, { "cell_type": "code", "execution_count": 14, "id": "cd5031bb-08f8-4a8f-b0a0-724baeca56d1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AtomType,Charge,Energy,CPUTime,InputAdfBasisType,InputAdfXcGga\n", "O,-0.8416865250737331,-1316.2997406426532,2.697854,DZ,\n", "H,0.42084716070260286,-1316.2997406426532,2.697854,DZ,\n", "H,0.4208393643711281,-1316.2997406426532,2.697854,DZ,\n", "O,-0.6739805275850443,-1363.766294275197,4.089876,TZP,PBE\n", "H,0.33698188085180536,-1363.766294275197,4.089876,TZP,PBE\n", "H,0.33699864673323343,-1363.766294275197,4.089876,TZP,PBE\n", "\n" ] } ], "source": [ "csv_name = \"./tmp.csv\"\n", "ja_adf.to_csv_file(csv_name)\n", "\n", "with open(csv_name) as csv:\n", " print(csv.read())" ] }, { "cell_type": "markdown", "id": "ae737a2a-7490-4750-ae01-21b79c862d0c", "metadata": {}, "source": [ "Finally, for more complex data analysis, the results can be converted to a [pandas](https://pandas.pydata.org) dataframe. This is recommended for more involved data manipulations, and can be installed using amspackages i.e. using the command: `\"${AMSBIN}/amspackages\" install pandas`." ] }, { "cell_type": "code", "execution_count": 15, "id": "35abbc0f-8565-441d-a894-f36152cddb48", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " AtomType Charge Energy CPUTime InputAdfBasisType InputAdfXcGga\n", "0 O -0.841687 -1316.299741 2.697854 DZ None\n", "1 H 0.420847 -1316.299741 2.697854 DZ None\n", "2 H 0.420839 -1316.299741 2.697854 DZ None\n", "3 O -0.673981 -1363.766294 4.089876 TZP PBE\n", "4 H 0.336982 -1363.766294 4.089876 TZP PBE\n", "5 H 0.336999 -1363.766294 4.089876 TZP PBE\n" ] } ], "source": [ "try:\n", " import pandas\n", "\n", " df = ja_adf.to_dataframe()\n", " print(df)\n", "\n", "except ImportError:\n", "\n", " print(\n", " \"Pandas not available. Please install with amspackages to run this example '${AMSBIN}/amspackages install pandas'\"\n", " )" ] }, { "cell_type": "markdown", "id": "e8109745-0d11-4e40-a146-323b2d381c28", "metadata": {}, "source": [ "### Additional Analysis Methods" ] }, { "cell_type": "markdown", "id": "cbf488cc-7ca1-443c-b7c9-db4a317bb84f", "metadata": {}, "source": [ "The `JobAnalysis` class does have some additional built in methods to aid with job analysis.\n", "\n", "For example, the `get_timeline` and `display_timeline` methods show pictorially when jobs started, how long they took to run and what their status is.\n", "\n", "This can be useful for visualizing the dependencies of jobs. Here you can see that the first 8 jobs started running in parallel, due to the `maxthreads` constraint, and the remaining jobs waited before starting. Also that the penultimate job failed." ] }, { "cell_type": "code", "execution_count": 16, "id": "f5d5ab1e-56f9-46f0-b519-f4936490355d", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | JobName | ↓2025-05-15 09:44:37 | ↓2025-05-15 09:44:49 | ↓2025-05-15 09:45:00 | ↓2025-05-15 09:45:12 | ↓2025-05-15 09:45:23 | WaitDuration | RunDuration | TotalDuration |\n", " +==========+======================+======================+======================+======================+======================+==============+=============+===============+\n", " | dftb | ==> | | | | | 0s | 0s | 1s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf | ========> | | | | | 0s | 4s | 4s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.002 | ================> | | | | | 0s | 9s | 9s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.002 | ==> | | | | | 0s | 0s | 1s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.003 | ==================== | ===================> | | | | 0s | 22s | 22s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.004 | ==================== | ==================== | ==================== | ===================* | > | 0s | 45s | 45s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.003 | ===> | | | | | 0s | 1s | 1s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.005 | ======> | | | | | 0s | 3s | 3s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.006 | --=======> | | | | | 0s | 5s | 5s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.004 | ..=> | | | | | 1s | 0s | 1s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.007 | ..-===========> | | | | | 1s | 6s | 8s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.008 | ...+================ | =======> | | | | 1s | 13s | 15s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.005 | ...---=> | | | | | 2s | 1s | 3s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.009 | ......-===> | | | | | 3s | 2s | 5s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.010 | .......-====> | | | | | 4s | 2s | 6s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.006 | ........-=> | | | | | 4s | 1s | 5s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.011 | .........-=====> | | | | | 5s | 3s | 8s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.012 | ..........========> | | | | | 5s | 4s | 10s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.007 | ..........--> | | | | | 5s | 1s | 6s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.013 | ............-=====> | | | | | 6s | 3s | 10s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.014 | .............-====== | ======> | | | | 7s | 7s | 14s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | dftb.008 | ..............-=> | | | | | 7s | 1s | 9s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.015 | ...............-==== | ===============> | | | | 8s | 11s | 19s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | adf.016 | ................==== | ==================== | =============> | | | 9s | 20s | 29s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | neb | ................--== | X | | | | 9s | 1s | 10s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+\n", " | neb.002 | ..................== | > | | | | 10s | 1s | 11s |\n", " +----------+----------------------+----------------------+----------------------+----------------------+----------------------+--------------+-------------+---------------+" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ja.display_timeline(fmt=\"rst\")" ] }, { "cell_type": "code", "execution_count": null, "id": "2fc077a1-5398-49f4-a9f9-e6772b56f512", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }