Benchmarking Reaction Energies across Basis Sets

Run reaction-energy workflow with multiple basis sets and compare the resulting trends in a reproducible PLAMS benchmark.

Initial Imports

import sys
import multiprocessing
from scm.plams import (
    JobRunner,
    config,
    from_smiles,
    Settings,
    AMSJob,
    JobAnalysis,
    jobs_in_directory,
    view,
    plot_image_grid,
)
import numpy as np

Set Up Job Runner

Set up job runner, running as many jobs as possible in parallel.

config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1

Set Up Molecules

Create the molecules we want to use in our benchmark from SMILES.

# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
    # Compute 10 conformers, optimize with UFF and pick the lowest in energy.
    molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]

plot_image_grid({k: view(v) for k, v in molecules.items()}, rows=1);
image generated from notebook

Initialize Calculation Settings

Set up the settings which are common across jobs. The basis type is added later for each job.

common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"
basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"

Run Calculations

results = {}
jobs = []
for bas in basis:
    for name, molecule in molecules.items():
        with jobs_in_directory(name):
            settings = common_settings.copy()
            settings.input.adf.Basis.Type = bas
            job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
            jobs.append(job)
            results[(name, bas)] = job.run()

for job in jobs:
    job.ok()
[02.04|11:36:45] JOB Methane_QZ4P STARTED
[02.04|11:36:45] JOB Ethane_QZ4P STARTED
[02.04|11:36:45] JOB Ethylene_QZ4P STARTED
[02.04|11:36:45] JOB Acetylene_QZ4P STARTED
[02.04|11:36:45] JOB Methane_TZ2P STARTED
[02.04|11:36:45] JOB Ethane_TZ2P STARTED
[02.04|11:36:45] JOB Ethylene_TZ2P STARTED
[02.04|11:36:45] JOB Acetylene_TZ2P STARTED
[02.04|11:36:45] JOB Methane_QZ4P RUNNING
[02.04|11:36:45] JOB Methane_TZP STARTED
... output trimmed ....
[02.04|11:36:55] Waiting for job Acetylene_DZP to finish
[02.04|11:36:55] JOB Acetylene_DZP FINISHED
[02.04|11:36:55] JOB Acetylene_DZP SUCCESSFUL
[02.04|11:36:55] Waiting for job Ethylene_DZ to finish
[02.04|11:36:55] JOB Ethane_SZ FINISHED
[02.04|11:36:55] JOB Ethane_SZ SUCCESSFUL
[02.04|11:36:55] JOB Acetylene_SZ FINISHED
[02.04|11:36:55] JOB Acetylene_SZ SUCCESSFUL
[02.04|11:36:56] JOB Ethylene_DZ FINISHED
[02.04|11:36:56] JOB Ethylene_DZ SUCCESSFUL

Results

Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.

import pandas as pd

results = []
for job in jobs:
    mol = job.results.get_main_molecule()
    results.append(
        {
            "Formula": mol.get_formula(),
            "Basis": job.settings.input.adf.basis.type,
            "NAtoms": len(mol),
            "Energy": job.results.get_energy(unit="kcal/mol"),
        }
    )

results_df = pd.DataFrame(results)
results_df

Formula

Basis

NAtoms

Energy

0

CH4

QZ4P

5

-572.626762

1

C2H6

QZ4P

8

-973.019461

2

C2H4

QZ4P

6

-770.413350

3

C2H2

QZ4P

4

-556.759053

4

CH4

TZ2P

5

-572.110159

5

C2H6

TZ2P

8

-971.882019

6

C2H4

TZ2P

6

-769.432903

7

C2H2

TZ2P

4

-555.667290

8

CH4

TZP

5

-571.044897

9

C2H6

TZP

8

-970.075889

10

C2H4

TZP

6

-767.327518

11

C2H2

TZP

4

-552.956286

12

CH4

DZP

5

-569.119016

13

C2H6

DZP

8

-966.091644

14

C2H4

DZP

6

-764.413298

15

C2H2

DZP

4

-550.646181

16

CH4

DZ

5

-560.934431

17

C2H6

DZ

8

-951.166697

18

C2H4

DZ

6

-750.174511

19

C2H2

DZ

4

-537.100802

20

CH4

SZ

5

-723.550123

21

C2H6

SZ

8

-1216.914233

22

C2H4

SZ

6

-934.655820

23

C2H2

SZ

4

-647.502984

ref_results_df = results_df[results_df["Basis"] == "QZ4P"]
comp_results_df = results_df[results_df["Basis"] != "QZ4P"]
comp_results_df = comp_results_df.merge(ref_results_df, on="Formula", suffixes=["", " (Ref)"])
comp_results_df["Average Error"] = (
    abs(comp_results_df["Energy"] - comp_results_df["Energy (Ref)"]) / comp_results_df["NAtoms"]
)
comp_results_df

Formula

Basis

NAtoms

Energy

Basis (Ref)

NAtoms (Ref)

Energy (Ref)

Average Error

0

CH4

TZ2P

5

-572.110159

QZ4P

5

-572.626762

0.103321

1

CH4

TZP

5

-571.044897

QZ4P

5

-572.626762

0.316373

2

CH4

DZP

5

-569.119016

QZ4P

5

-572.626762

0.701549

3

CH4

DZ

5

-560.934431

QZ4P

5

-572.626762

2.338466

4

CH4

SZ

5

-723.550123

QZ4P

5

-572.626762

30.184672

5

C2H6

TZ2P

8

-971.882019

QZ4P

8

-973.019461

0.142180

6

C2H6

TZP

8

-970.075889

QZ4P

8

-973.019461

0.367947

7

C2H6

DZP

8

-966.091644

QZ4P

8

-973.019461

0.865977

8

C2H6

DZ

8

-951.166697

QZ4P

8

-973.019461

2.731595

9

C2H6

SZ

8

-1216.914233

QZ4P

8

-973.019461

30.486847

10

C2H4

TZ2P

6

-769.432903

QZ4P

6

-770.413350

0.163408

11

C2H4

TZP

6

-767.327518

QZ4P

6

-770.413350

0.514305

12

C2H4

DZP

6

-764.413298

QZ4P

6

-770.413350

1.000009

13

C2H4

DZ

6

-750.174511

QZ4P

6

-770.413350

3.373140

14

C2H4

SZ

6

-934.655820

QZ4P

6

-770.413350

27.373745

15

C2H2

TZ2P

4

-555.667290

QZ4P

4

-556.759053

0.272941

16

C2H2

TZP

4

-552.956286

QZ4P

4

-556.759053

0.950692

17

C2H2

DZP

4

-550.646181

QZ4P

4

-556.759053

1.528218

18

C2H2

DZ

4

-537.100802

QZ4P

4

-556.759053

4.914563

19

C2H2

SZ

4

-647.502984

QZ4P

4

-556.759053

22.685983

benchmark_results_df = comp_results_df.groupby("Basis", as_index=False)["Average Error"].mean()
benchmark_results_df.rename(columns={"Average Error": "MAE [kcal/mol]"}, inplace=True)
benchmark_results_df.sort_values(by="MAE [kcal/mol]", inplace=True, ignore_index=True)
benchmark_results_df

Basis

MAE [kcal/mol]

0

TZ2P

0.170462

1

TZP

0.537329

2

DZP

1.023938

3

DZ

3.339441

4

SZ

27.682812

See also

Python Script

#!/usr/bin/env python
# coding: utf-8

# ## Initial Imports

import sys
import multiprocessing
from scm.plams import (
    JobRunner,
    config,
    from_smiles,
    Settings,
    AMSJob,
    JobAnalysis,
    jobs_in_directory,
    view,
    plot_image_grid,
)
import numpy as np


# ## Set Up Job Runner
# Set up job runner, running as many jobs as possible in parallel.

config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1


# ## Set Up Molecules
# Create the molecules we want to use in our benchmark from SMILES.

# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
    # Compute 10 conformers, optimize with UFF and pick the lowest in energy.
    molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]

plot_image_grid({k: view(v) for k, v in molecules.items()}, rows=1, save_path="picture1.png")
# ## Initialize Calculation Settings
# Set up the settings which are common across jobs. The basis type is added later for each job.

common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"


basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"


# ## Run Calculations

results = {}
jobs = []
for bas in basis:
    for name, molecule in molecules.items():
        with jobs_in_directory(name):
            settings = common_settings.copy()
            settings.input.adf.Basis.Type = bas
            job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
            jobs.append(job)
            results[(name, bas)] = job.run()

for job in jobs:
    job.ok()


# ## Results
# Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.

import pandas as pd

results = []
for job in jobs:
    mol = job.results.get_main_molecule()
    results.append(
        {
            "Formula": mol.get_formula(),
            "Basis": job.settings.input.adf.basis.type,
            "NAtoms": len(mol),
            "Energy": job.results.get_energy(unit="kcal/mol"),
        }
    )

results_df = pd.DataFrame(results)
print(results_df)


ref_results_df = results_df[results_df["Basis"] == "QZ4P"]
comp_results_df = results_df[results_df["Basis"] != "QZ4P"]
comp_results_df = comp_results_df.merge(ref_results_df, on="Formula", suffixes=["", " (Ref)"])
comp_results_df["Average Error"] = (
    abs(comp_results_df["Energy"] - comp_results_df["Energy (Ref)"]) / comp_results_df["NAtoms"]
)
print(comp_results_df)


benchmark_results_df = comp_results_df.groupby("Basis", as_index=False)["Average Error"].mean()
benchmark_results_df.rename(columns={"Average Error": "MAE [kcal/mol]"}, inplace=True)
benchmark_results_df.sort_values(by="MAE [kcal/mol]", inplace=True, ignore_index=True)
print(benchmark_results_df)