#!/usr/bin/env python # coding: utf-8 # ## Create Example Jobs # To begin with, we create a variety of AMS jobs with different settings, tasks, engines and calculation types. # # This allows us to generate diverse example single point/geometry optimization calculations with DFTB, ADF etc. from scm.plams import from_smiles, AMSJob, PlamsError, Settings, Molecule, Atom from scm.base import ChemicalSystem from scm.input_classes.drivers import AMS from scm.input_classes.engines import DFTB from scm.utils.conversions import plams_molecule_to_chemsys def example_job_dftb(smiles, task, use_chemsys=False): # Generate molecule from smiles mol = from_smiles(smiles) if use_chemsys: mol = plams_molecule_to_chemsys(mol) # Set up calculation settings using PISA sett = Settings() sett.runscript.nproc = 1 driver = AMS() driver.Task = task driver.Engine = DFTB() sett.input = driver return AMSJob(molecule=mol, settings=sett, name="dftb") def example_job_adf(smiles, task, basis, gga=None, use_chemsys=False): # Generate molecule from smiles mol = from_smiles(smiles) if use_chemsys: mol = plams_molecule_to_chemsys(mol) # Set up calculation settings using standard settings sett = Settings() sett.runscript.nproc = 1 sett.input.AMS.Task = task sett.input.ADF.Basis.Type = basis if gga: sett.input.ADF.XC.GGA = gga return AMSJob(molecule=mol, settings=sett, name="adf") def example_job_neb(iterations, use_chemsys=False): # Set up molecules main_molecule = Molecule() main_molecule.add_atom(Atom(symbol="C", coords=(0, 0, 0))) main_molecule.add_atom(Atom(symbol="N", coords=(1.18, 0, 0))) main_molecule.add_atom(Atom(symbol="H", coords=(2.196, 0, 0))) final_molecule = main_molecule.copy() final_molecule.atoms[1].x = 1.163 final_molecule.atoms[2].x = -1.078 mol = {"": main_molecule, "final": final_molecule} if use_chemsys: mol = {k: plams_molecule_to_chemsys(v) for k, v in mol.items()} # Set up calculation settings sett = Settings() sett.runscript.nproc = 1 sett.input.ams.Task = "NEB" sett.input.ams.NEB.Images = 9 sett.input.ams.NEB.Iterations = iterations sett.input.DFTB return AMSJob(molecule=mol, settings=sett, name="neb") # Now, we create a selection of jobs covering different systems and settings: from scm.plams import config, JobRunner config.default_jobrunner = JobRunner(parallel=True, maxthreads=8) smiles = ["CC", "C", "O", "CO"] tasks = ["SinglePoint", "GeometryOptimization"] engines = ["DFTB", "ADF"] jobs = [] for i, s in enumerate(smiles): for j, t in enumerate(tasks): job_dftb = example_job_dftb(s, t, use_chemsys=i % 2) job_adf1 = example_job_adf(s, t, "DZ", use_chemsys=True) job_adf2 = example_job_adf(s, t, "TZP", "PBE") jobs += [job_dftb, job_adf1, job_adf2] job_neb1 = example_job_neb(10) job_neb2 = example_job_neb(100, use_chemsys=True) jobs += [job_neb1, job_neb2] for j in jobs: j.run() for j in jobs: j.ok() # ## Job Analysis # The `JobAnalysis` tool can be used to extract data from a large number of jobs, and analyse the results. # ### Adding and Loading Jobs # # Jobs can be loaded by passing job objects directly to the `JobAnalysis`, or alternatively loading from a path. This latter option is useful for loading jobs run previously in other scripts. from scm.plams import JobAnalysis ja = JobAnalysis(jobs=jobs) # ja = JobAnalysis(paths=[j.path for j in jobs]) # alternatively load jobs from a set of paths # Additional jobs can also be added or removed after initialization of the `JobAnalysis` tool. extra_job = example_job_dftb("CCC", "SinglePoint") extra_job.run() extra_job.ok() ja = ja.add_job(extra_job) # The loaded jobs and the initial analysis fields can be shows by displaying the `JobAnalysis` table: print(ja.to_table()) # ### Adding and Removing Fields # On initialization, some analysis fields are automatically included in the analysis (`Path`, `Name`, `OK`, `Check` and `ErrorMsg`). These are useful to see which jobs were loaded, and whether they succeeded. However, one or more of these fields can be removed with the `remove_field` method. ja = ja.remove_field("Path") # A range of other common fields can be added with the `add_standard_field(s)` method. ja = ja.add_standard_fields(["Formula", "Smiles", "CPUTime", "SysTime"]) # In addition, all fields deriving from the job input settings can be added with the `add_settings_input_fields` method. By default, these will have names corresponding to the concatenated settings entries. Individual settings field can be added with the `add_settings_field` method. This is useful to see the differences in the input settings of various jobs which may have succeeded/failed. ja = ja.add_settings_input_fields() # For output results, fields from the rkfs can be added with the `add_rkf_field` method, using a specified rkf file (default `ams.rkf`), section and variable. ja = ja.add_rkf_field("General", "engine") # Finally, custom fields can also be added with the `add_field` method, by defining a field key, value accessor and optional arguments like the display name and value formatting. This is most useful to extract results from jobs using built-in methods on the job results class. ja = ja.add_field( "Energy", lambda j: j.results.get_energy(unit="kJ/mol"), display_name="Energy [kJ/mol]", fmt=".2f", ) ja = ja.add_field("AtomType", lambda j: [at.symbol for at in j.results.get_main_molecule()]) ja = ja.add_field("Charge", lambda j: j.results.get_charges()) print(ja.to_table(max_rows=5)) # ### Processing Data # Once an initial analysis has been created, the data can be further processed, depending on the use case. # For example, to inspect the difference between failed and successful jobs, jobs can be filtered down and irrelevant fields removed. # # Here we first filter the jobs to those which have the `NEB` task: ja_neb = ja.filter_jobs(lambda data: data["InputAmsTask"] == "NEB") # Then we remove the "uniform fields" i.e. fields where all the values are the same. This lets us remove the noise and focus on the fields which have differences. ja_neb = ja_neb.remove_uniform_fields(ignore_empty=True) print(ja_neb.to_table()) # Another use case may be to analyze the results from one or more jobs. # For this, it can be useful to utilize the `expand` functionality to convert job(s) to multiple rows. # During this process, fields selected for expansion will have their values extracted into individual rows, whilst other fields have their values duplicated. # # First we filter to a single job, the geometry optimization of water: ja_adf_water = ja.filter_jobs( lambda data: ( data["InputAmsTask"] == "GeometryOptimization" and data["InputAdfBasisType"] is not None and data["Smiles"] == "O" ) ) print(ja_adf_water.to_table()) # Then we "expand" a given field to flatten the arrays and have one row per entry in the array. This lets us see the charge per atom for each job: ja_adf_water_expanded = ja_adf_water.expand_field("AtomType").expand_field("Charge").remove_uniform_fields() print(ja_adf_water_expanded.to_table()) # Expansion can be undone with the corresponding `collapse` method. # # Fields can be also further filtered, modified or reordered to customize the analysis. This example also illustrates the "fluent" syntax of the `JobAnalysis` tool, whereb ja_adf = ( ja_adf_water_expanded.remove_field("Name") .format_field("CPUTime", ".2f") .format_field("Charge", ".4f") .rename_field("InputAdfBasisType", "Basis") .rename_field("InputAdfBasisType", "GGA") .reorder_fields(["AtomType", "Charge", "Energy"]) ) print(ja_adf.to_table()) # ### Extracting Analysis Data # Analysis data can be extracted in a variety of ways. # # As has been demonstrated, a visual representation of the table can be easily generated using the `to_table` method (or `display_table` in a notebook). # The format can be selected as markdown, html or rst. This will return the data with the specified display names and formatting. print(ja_adf.to_table(fmt="rst")) # Alternatively, raw data can be retrieved via the `get_analysis` method, which returns a dictionary of analysis keys to values. print(ja_adf.get_analysis()) # Data can also be easily written to a csv file using `to_csv_file`, to be exported to another program. csv_name = "./tmp.csv" ja_adf.to_csv_file(csv_name) with open(csv_name) as csv: print(csv.read()) # Finally, for more complex data analysis, the results can be converted to a [pandas](https://pandas.pydata.org) dataframe. This is recommended for more involved data manipulations. df = ja_adf.to_dataframe() print(df)