Complete Python code

#!/usr/bin/env amspython
# coding: utf-8

# ## Create Example Jobs

# To begin with, create a variety of AMS jobs with different settings, engines and calculation types.

from scm.plams import from_smiles, AMSJob, PlamsError, Settings, Molecule, Atom
from scm.libbase import UnifiedChemicalSystem as ChemicalSystem
from scm.input_classes.drivers import AMS
from scm.input_classes.engines import DFTB
from scm.utils.conversions import plams_molecule_to_chemsys


def example_job_dftb(smiles, task, use_chemsys=False):
    # Generate molecule from smiles
    mol = from_smiles(smiles)
    if use_chemsys:
        mol = plams_molecule_to_chemsys(mol)

    # Set up calculation settings using PISA
    sett = Settings()
    sett.runscript.nproc = 1
    driver = AMS()
    driver.Task = task
    driver.Engine = DFTB()
    sett.input = driver
    return AMSJob(molecule=mol, settings=sett, name="dftb")


def example_job_adf(smiles, task, basis, gga=None, use_chemsys=False):
    # Generate molecule from smiles
    mol = from_smiles(smiles)
    if use_chemsys:
        mol = plams_molecule_to_chemsys(mol)

    # Set up calculation settings using standard settings
    sett = Settings()
    sett.runscript.nproc = 1
    sett.input.AMS.Task = task
    sett.input.ADF.Basis.Type = basis
    if gga:
        sett.input.ADF.XC.GGA = gga
    return AMSJob(molecule=mol, settings=sett, name="adf")


def example_job_neb(iterations, use_chemsys=False):
    # Set up molecules
    main_molecule = Molecule()
    main_molecule.add_atom(Atom(symbol="C", coords=(0, 0, 0)))
    main_molecule.add_atom(Atom(symbol="N", coords=(1.18, 0, 0)))
    main_molecule.add_atom(Atom(symbol="H", coords=(2.196, 0, 0)))
    final_molecule = main_molecule.copy()
    final_molecule.atoms[1].x = 1.163
    final_molecule.atoms[2].x = -1.078

    mol = {"": main_molecule, "final": final_molecule}

    if use_chemsys:
        mol = {k: plams_molecule_to_chemsys(v) for k, v in mol.items()}

    # Set up calculation settings
    sett = Settings()
    sett.runscript.nproc = 1
    sett.input.ams.Task = "NEB"
    sett.input.ams.NEB.Images = 9
    sett.input.ams.NEB.Iterations = iterations
    sett.input.DFTB

    return AMSJob(molecule=mol, settings=sett, name="neb")


# Now, run a selection of them.

from scm.plams import config, JobRunner

config.default_jobrunner = JobRunner(parallel=True, maxthreads=8)

smiles = ["CC", "C", "O", "CO"]
tasks = ["SinglePoint", "GeometryOptimization"]
engines = ["DFTB", "ADF"]
jobs = []
for i, s in enumerate(smiles):
    for j, t in enumerate(tasks):
        job_dftb = example_job_dftb(s, t, use_chemsys=i % 2)
        job_adf1 = example_job_adf(s, t, "DZ", use_chemsys=True)
        job_adf2 = example_job_adf(s, t, "TZP", "PBE")
        jobs += [job_dftb, job_adf1, job_adf2]

job_neb1 = example_job_neb(10)
job_neb2 = example_job_neb(100, use_chemsys=True)
jobs += [job_neb1, job_neb2]

for j in jobs:
    j.run()


# ## Job Analysis

# ### Adding and Loading Jobs
#
# Jobs can be loaded by passing job objects directly, or loading from a path.

from scm.plams import JobAnalysis


ja = JobAnalysis(jobs=jobs[:10], paths=[j.path for j in jobs[10:-2]])


# Jobs can also be added or removed after initialization.

ja.add_job(jobs[-2]).load_job(jobs[-1].path).display_table()


# ### Adding and Removing Fields

# A range of common standard fields can be added with the `add_standard_field(s)` methods.
# Custom fields can also be added with the `add_field` method, by defining a field key, value accessor and optional arguments like display name and value formatting.
# Fields can be removed by calling `remove_field` with the corresponding field key.

(
    ja.remove_field("Path")
    .add_standard_fields(["Formula", "Smiles", "CPUTime", "SysTime"])
    .add_settings_input_fields()
    .add_field("Energy", lambda j: j.results.get_energy(unit="kJ/mol"), display_name="Energy [kJ/mol]", fmt=".2f")
    .display_table(max_rows=5)
)


# In addition to the fluent syntax, both dictionary and dot syntaxes are also supported for adding and removing fields.

import numpy as np

ja["AtomType"] = lambda j: [at.symbol for at in j.results.get_main_molecule()]
ja.Charge = lambda j: j.results.get_charges()
ja.AtomCoords = lambda j: [np.array(at.coords) for at in j.results.get_main_molecule()]

del ja["Check"]
del ja.SysTime

ja.display_table(max_rows=5, max_col_width=30)


# ### Processing Data

# Once an initial analysis has been created, the data can be further processed, depending on the use case.
# For example, to inspect the difference between failed and successful jobs, jobs can be filtered down and irrelevant fields removed.

ja_neb = (
    ja.copy()
    .filter_jobs(lambda data: data["InputAmsTask"] == "NEB")
    .remove_field("AtomCoords")
    .remove_uniform_fields(ignore_empty=True)
)

ja_neb.display_table()


# Another use case may be to analyze the results from one or more jobs.
# For this, it can be useful to utilize the `expand` functionality to convert job(s) to multiple rows.
# During this process, fields selected for expansion will have their values extracted into individual rows, whilst other fields have their values duplicated.

ja_adf = (
    ja.copy()
    .filter_jobs(
        lambda data: data["InputAmsTask"] == "GeometryOptimization"
        and data["InputAdfBasisType"] is not None
        and data["Smiles"] == "O"
    )
    .expand_field("AtomType")
    .expand_field("Charge")
    .expand_field("AtomCoords")
    .remove_uniform_fields()
)

ja_adf.display_table()


# For more nested values, the depth of expansion can also be selected to further flatten the data.

(
    ja_adf.add_field("Coord", lambda j: [("x", "y", "z") for _ in j.results.get_main_molecule()], expansion_depth=2)
    .expand_field("AtomCoords", depth=2)
    .display_table()
)


# Expansion can be undone with the corresponding `collapse` method.
#
# Fields can be also further filtered, modified or reordered to customize the analysis.

(
    ja_adf.collapse_field("AtomCoords")
    .collapse_field("Coord")
    .filter_fields(lambda vals: all([not isinstance(v, list) for v in vals]))  # remove arrays
    .remove_field("Name")
    .format_field("CPUTime", ".2f")
    .format_field("Charge", ".4f")
    .rename_field("InputAdfBasisType", "Basis")
    .reorder_fields(["AtomType", "Charge", "Energy"])
    .display_table()
)


# ### Extracting Analysis Data

# Analysis data can be extracted in a variety of ways.
#
# As has been demonstrated, a visual representation of the table can be easily generated using the `to_table` method (or `display_table` in a notebook).
# The format can be selected as markdown, html or rst. This will return the data with the specified display names and formatting.

print(ja_adf.to_table(fmt="rst"))


# Alternatively, raw data can be retrieved via the `get_analysis` method, which returns a dictionary of analysis keys to values.

print(ja_adf.get_analysis())


# Data can also be easily written to a csv file using `to_csv_file`, to be exported to another program.

csv_name = "./tmp.csv"
ja_adf.to_csv_file(csv_name)

with open(csv_name) as csv:
    print(csv.read())


# Finally, for more complex data analysis, the results can be converted to a [pandas](https://pandas.pydata.org) dataframe. This is recommended for more involved data manipulations, and can be installed using amspackages i.e. using the command: `"${AMSBIN}/amspackages" install pandas`.

try:
    import pandas

    df = ja_adf.to_dataframe()
    print(df)

except ImportError:

    print(
        "Pandas not available. Please install with amspackages to run this example '${AMSBIN}/amspackages install pandas'"
    )


# ### Additional Analysis Methods

# The `JobAnalysis` class does have some additional built in methods to aid with job analysis.
#
# For example, the `get_timeline` and `display_timeline` methods show pictorially when jobs started, how long they took to run and what their status is.
#
# This can be useful for visualising the dependencies of jobs. Here you can see that the first 8 jobs started running in parallel, due to the `maxthreads` constraint, and the remaining jobs waited before starting. Also that the penultimate job failed.

ja.display_timeline(fmt="rst")