#!/usr/bin/env amspython # coding: utf-8 # ## Create Example Jobs # To begin with, create a variety of AMS jobs with different settings, engines and calculation types. from scm.plams import from_smiles, AMSJob, PlamsError, Settings, Molecule, Atom from scm.libbase import UnifiedChemicalSystem as ChemicalSystem from scm.input_classes.drivers import AMS from scm.input_classes.engines import DFTB from scm.utils.conversions import plams_molecule_to_chemsys def example_job_dftb(smiles, task, use_chemsys=False): # Generate molecule from smiles mol = from_smiles(smiles) if use_chemsys: mol = plams_molecule_to_chemsys(mol) # Set up calculation settings using PISA sett = Settings() sett.runscript.nproc = 1 driver = AMS() driver.Task = task driver.Engine = DFTB() sett.input = driver return AMSJob(molecule=mol, settings=sett, name="dftb") def example_job_adf(smiles, task, basis, gga=None, use_chemsys=False): # Generate molecule from smiles mol = from_smiles(smiles) if use_chemsys: mol = plams_molecule_to_chemsys(mol) # Set up calculation settings using standard settings sett = Settings() sett.runscript.nproc = 1 sett.input.AMS.Task = task sett.input.ADF.Basis.Type = basis if gga: sett.input.ADF.XC.GGA = gga return AMSJob(molecule=mol, settings=sett, name="adf") def example_job_neb(iterations, use_chemsys=False): # Set up molecules main_molecule = Molecule() main_molecule.add_atom(Atom(symbol="C", coords=(0, 0, 0))) main_molecule.add_atom(Atom(symbol="N", coords=(1.18, 0, 0))) main_molecule.add_atom(Atom(symbol="H", coords=(2.196, 0, 0))) final_molecule = main_molecule.copy() final_molecule.atoms[1].x = 1.163 final_molecule.atoms[2].x = -1.078 mol = {"": main_molecule, "final": final_molecule} if use_chemsys: mol = {k: plams_molecule_to_chemsys(v) for k, v in mol.items()} # Set up calculation settings sett = Settings() sett.runscript.nproc = 1 sett.input.ams.Task = "NEB" sett.input.ams.NEB.Images = 9 sett.input.ams.NEB.Iterations = iterations sett.input.DFTB return AMSJob(molecule=mol, settings=sett, name="neb") # Now, run a selection of them. from scm.plams import config, JobRunner config.default_jobrunner = JobRunner(parallel=True, maxthreads=8) smiles = ["CC", "C", "O", "CO"] tasks = ["SinglePoint", "GeometryOptimization"] engines = ["DFTB", "ADF"] jobs = [] for i, s in enumerate(smiles): for j, t in enumerate(tasks): job_dftb = example_job_dftb(s, t, use_chemsys=i % 2) job_adf1 = example_job_adf(s, t, "DZ", use_chemsys=True) job_adf2 = example_job_adf(s, t, "TZP", "PBE") jobs += [job_dftb, job_adf1, job_adf2] job_neb1 = example_job_neb(10) job_neb2 = example_job_neb(100, use_chemsys=True) jobs += [job_neb1, job_neb2] for j in jobs: j.run() # ## Job Analysis # ### Adding and Loading Jobs # # Jobs can be loaded by passing job objects directly, or loading from a path. from scm.plams import JobAnalysis ja = JobAnalysis(jobs=jobs[:10], paths=[j.path for j in jobs[10:-2]]) # Jobs can also be added or removed after initialization. ja.add_job(jobs[-2]).load_job(jobs[-1].path).display_table() # ### Adding and Removing Fields # A range of common standard fields can be added with the `add_standard_field(s)` methods. # Custom fields can also be added with the `add_field` method, by defining a field key, value accessor and optional arguments like display name and value formatting. # Fields can be removed by calling `remove_field` with the corresponding field key. ( ja.remove_field("Path") .add_standard_fields(["Formula", "Smiles", "CPUTime", "SysTime"]) .add_settings_input_fields() .add_field("Energy", lambda j: j.results.get_energy(unit="kJ/mol"), display_name="Energy [kJ/mol]", fmt=".2f") .display_table(max_rows=5) ) # In addition to the fluent syntax, both dictionary and dot syntaxes are also supported for adding and removing fields. import numpy as np ja["AtomType"] = lambda j: [at.symbol for at in j.results.get_main_molecule()] ja.Charge = lambda j: j.results.get_charges() ja.AtomCoords = lambda j: [np.array(at.coords) for at in j.results.get_main_molecule()] del ja["Check"] del ja.SysTime ja.display_table(max_rows=5, max_col_width=30) # ### Processing Data # Once an initial analysis has been created, the data can be further processed, depending on the use case. # For example, to inspect the difference between failed and successful jobs, jobs can be filtered down and irrelevant fields removed. ja_neb = ( ja.copy() .filter_jobs(lambda data: data["InputAmsTask"] == "NEB") .remove_field("AtomCoords") .remove_uniform_fields(ignore_empty=True) ) ja_neb.display_table() # Another use case may be to analyze the results from one or more jobs. # For this, it can be useful to utilize the `expand` functionality to convert job(s) to multiple rows. # During this process, fields selected for expansion will have their values extracted into individual rows, whilst other fields have their values duplicated. ja_adf = ( ja.copy() .filter_jobs( lambda data: data["InputAmsTask"] == "GeometryOptimization" and data["InputAdfBasisType"] is not None and data["Smiles"] == "O" ) .expand_field("AtomType") .expand_field("Charge") .expand_field("AtomCoords") .remove_uniform_fields() ) ja_adf.display_table() # For more nested values, the depth of expansion can also be selected to further flatten the data. ( ja_adf.add_field("Coord", lambda j: [("x", "y", "z") for _ in j.results.get_main_molecule()], expansion_depth=2) .expand_field("AtomCoords", depth=2) .display_table() ) # Expansion can be undone with the corresponding `collapse` method. # # Fields can be also further filtered, modified or reordered to customize the analysis. ( ja_adf.collapse_field("AtomCoords") .collapse_field("Coord") .filter_fields(lambda vals: all([not isinstance(v, list) for v in vals])) # remove arrays .remove_field("Name") .format_field("CPUTime", ".2f") .format_field("Charge", ".4f") .rename_field("InputAdfBasisType", "Basis") .reorder_fields(["AtomType", "Charge", "Energy"]) .display_table() ) # ### Extracting Analysis Data # Analysis data can be extracted in a variety of ways. # # As has been demonstrated, a visual representation of the table can be easily generated using the `to_table` method (or `display_table` in a notebook). # The format can be selected as markdown, html or rst. This will return the data with the specified display names and formatting. print(ja_adf.to_table(fmt="rst")) # Alternatively, raw data can be retrieved via the `get_analysis` method, which returns a dictionary of analysis keys to values. print(ja_adf.get_analysis()) # Data can also be easily written to a csv file using `to_csv_file`, to be exported to another program. csv_name = "./tmp.csv" ja_adf.to_csv_file(csv_name) with open(csv_name) as csv: print(csv.read()) # Finally, for more complex data analysis, the results can be converted to a [pandas](https://pandas.pydata.org) dataframe. This is recommended for more involved data manipulations, and can be installed using amspackages i.e. using the command: `"${AMSBIN}/amspackages" install pandas`. try: import pandas df = ja_adf.to_dataframe() print(df) except ImportError: print( "Pandas not available. Please install with amspackages to run this example '${AMSBIN}/amspackages install pandas'" ) # ### Additional Analysis Methods # The `JobAnalysis` class does have some additional built in methods to aid with job analysis. # # For example, the `get_timeline` and `display_timeline` methods show pictorially when jobs started, how long they took to run and what their status is. # # This can be useful for visualising the dependencies of jobs. Here you can see that the first 8 jobs started running in parallel, due to the `maxthreads` constraint, and the remaining jobs waited before starting. Also that the penultimate job failed. ja.display_timeline(fmt="rst")