Benchmarking Reaction Energies across Basis Sets¶
Run reaction-energy workflow with multiple basis sets and compare the resulting trends in a reproducible PLAMS benchmark.
Initial Imports¶
import sys
import multiprocessing
from scm.plams import (
JobRunner,
config,
from_smiles,
Settings,
AMSJob,
JobAnalysis,
jobs_in_directory,
view,
plot_image_grid,
)
import numpy as np
Set Up Job Runner¶
Set up job runner, running as many jobs as possible in parallel.
config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1
Set Up Molecules¶
Create the molecules we want to use in our benchmark from SMILES.
# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
# Compute 10 conformers, optimize with UFF and pick the lowest in energy.
molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]
plot_image_grid({k: view(v) for k, v in molecules.items()}, rows=1);
Initialize Calculation Settings¶
Set up the settings which are common across jobs. The basis type is added later for each job.
common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"
basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"
Run Calculations¶
results = {}
jobs = []
for bas in basis:
for name, molecule in molecules.items():
with jobs_in_directory(name):
settings = common_settings.copy()
settings.input.adf.Basis.Type = bas
job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
jobs.append(job)
results[(name, bas)] = job.run()
for job in jobs:
job.ok()
[02.04|11:36:45] JOB Methane_QZ4P STARTED
[02.04|11:36:45] JOB Ethane_QZ4P STARTED
[02.04|11:36:45] JOB Ethylene_QZ4P STARTED
[02.04|11:36:45] JOB Acetylene_QZ4P STARTED
[02.04|11:36:45] JOB Methane_TZ2P STARTED
[02.04|11:36:45] JOB Ethane_TZ2P STARTED
[02.04|11:36:45] JOB Ethylene_TZ2P STARTED
[02.04|11:36:45] JOB Acetylene_TZ2P STARTED
[02.04|11:36:45] JOB Methane_QZ4P RUNNING
[02.04|11:36:45] JOB Methane_TZP STARTED
... output trimmed ....
[02.04|11:36:55] Waiting for job Acetylene_DZP to finish
[02.04|11:36:55] JOB Acetylene_DZP FINISHED
[02.04|11:36:55] JOB Acetylene_DZP SUCCESSFUL
[02.04|11:36:55] Waiting for job Ethylene_DZ to finish
[02.04|11:36:55] JOB Ethane_SZ FINISHED
[02.04|11:36:55] JOB Ethane_SZ SUCCESSFUL
[02.04|11:36:55] JOB Acetylene_SZ FINISHED
[02.04|11:36:55] JOB Acetylene_SZ SUCCESSFUL
[02.04|11:36:56] JOB Ethylene_DZ FINISHED
[02.04|11:36:56] JOB Ethylene_DZ SUCCESSFUL
Results¶
Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.
import pandas as pd
results = []
for job in jobs:
mol = job.results.get_main_molecule()
results.append(
{
"Formula": mol.get_formula(),
"Basis": job.settings.input.adf.basis.type,
"NAtoms": len(mol),
"Energy": job.results.get_energy(unit="kcal/mol"),
}
)
results_df = pd.DataFrame(results)
results_df
Formula | Basis | NAtoms | Energy | |
|---|---|---|---|---|
0 | CH4 | QZ4P | 5 | -572.626762 |
1 | C2H6 | QZ4P | 8 | -973.019461 |
2 | C2H4 | QZ4P | 6 | -770.413350 |
3 | C2H2 | QZ4P | 4 | -556.759053 |
4 | CH4 | TZ2P | 5 | -572.110159 |
5 | C2H6 | TZ2P | 8 | -971.882019 |
6 | C2H4 | TZ2P | 6 | -769.432903 |
7 | C2H2 | TZ2P | 4 | -555.667290 |
8 | CH4 | TZP | 5 | -571.044897 |
9 | C2H6 | TZP | 8 | -970.075889 |
10 | C2H4 | TZP | 6 | -767.327518 |
11 | C2H2 | TZP | 4 | -552.956286 |
12 | CH4 | DZP | 5 | -569.119016 |
13 | C2H6 | DZP | 8 | -966.091644 |
14 | C2H4 | DZP | 6 | -764.413298 |
15 | C2H2 | DZP | 4 | -550.646181 |
16 | CH4 | DZ | 5 | -560.934431 |
17 | C2H6 | DZ | 8 | -951.166697 |
18 | C2H4 | DZ | 6 | -750.174511 |
19 | C2H2 | DZ | 4 | -537.100802 |
20 | CH4 | SZ | 5 | -723.550123 |
21 | C2H6 | SZ | 8 | -1216.914233 |
22 | C2H4 | SZ | 6 | -934.655820 |
23 | C2H2 | SZ | 4 | -647.502984 |
ref_results_df = results_df[results_df["Basis"] == "QZ4P"]
comp_results_df = results_df[results_df["Basis"] != "QZ4P"]
comp_results_df = comp_results_df.merge(ref_results_df, on="Formula", suffixes=["", " (Ref)"])
comp_results_df["Average Error"] = (
abs(comp_results_df["Energy"] - comp_results_df["Energy (Ref)"]) / comp_results_df["NAtoms"]
)
comp_results_df
Formula | Basis | NAtoms | Energy | Basis (Ref) | NAtoms (Ref) | Energy (Ref) | Average Error | |
|---|---|---|---|---|---|---|---|---|
0 | CH4 | TZ2P | 5 | -572.110159 | QZ4P | 5 | -572.626762 | 0.103321 |
1 | CH4 | TZP | 5 | -571.044897 | QZ4P | 5 | -572.626762 | 0.316373 |
2 | CH4 | DZP | 5 | -569.119016 | QZ4P | 5 | -572.626762 | 0.701549 |
3 | CH4 | DZ | 5 | -560.934431 | QZ4P | 5 | -572.626762 | 2.338466 |
4 | CH4 | SZ | 5 | -723.550123 | QZ4P | 5 | -572.626762 | 30.184672 |
5 | C2H6 | TZ2P | 8 | -971.882019 | QZ4P | 8 | -973.019461 | 0.142180 |
6 | C2H6 | TZP | 8 | -970.075889 | QZ4P | 8 | -973.019461 | 0.367947 |
7 | C2H6 | DZP | 8 | -966.091644 | QZ4P | 8 | -973.019461 | 0.865977 |
8 | C2H6 | DZ | 8 | -951.166697 | QZ4P | 8 | -973.019461 | 2.731595 |
9 | C2H6 | SZ | 8 | -1216.914233 | QZ4P | 8 | -973.019461 | 30.486847 |
10 | C2H4 | TZ2P | 6 | -769.432903 | QZ4P | 6 | -770.413350 | 0.163408 |
11 | C2H4 | TZP | 6 | -767.327518 | QZ4P | 6 | -770.413350 | 0.514305 |
12 | C2H4 | DZP | 6 | -764.413298 | QZ4P | 6 | -770.413350 | 1.000009 |
13 | C2H4 | DZ | 6 | -750.174511 | QZ4P | 6 | -770.413350 | 3.373140 |
14 | C2H4 | SZ | 6 | -934.655820 | QZ4P | 6 | -770.413350 | 27.373745 |
15 | C2H2 | TZ2P | 4 | -555.667290 | QZ4P | 4 | -556.759053 | 0.272941 |
16 | C2H2 | TZP | 4 | -552.956286 | QZ4P | 4 | -556.759053 | 0.950692 |
17 | C2H2 | DZP | 4 | -550.646181 | QZ4P | 4 | -556.759053 | 1.528218 |
18 | C2H2 | DZ | 4 | -537.100802 | QZ4P | 4 | -556.759053 | 4.914563 |
19 | C2H2 | SZ | 4 | -647.502984 | QZ4P | 4 | -556.759053 | 22.685983 |
benchmark_results_df = comp_results_df.groupby("Basis", as_index=False)["Average Error"].mean()
benchmark_results_df.rename(columns={"Average Error": "MAE [kcal/mol]"}, inplace=True)
benchmark_results_df.sort_values(by="MAE [kcal/mol]", inplace=True, ignore_index=True)
benchmark_results_df
Basis | MAE [kcal/mol] | |
|---|---|---|
0 | TZ2P | 0.170462 |
1 | TZP | 0.537329 |
2 | DZP | 1.023938 |
3 | DZ | 3.339441 |
4 | SZ | 27.682812 |
See also¶
Python Script¶
#!/usr/bin/env python
# coding: utf-8
# ## Initial Imports
import sys
import multiprocessing
from scm.plams import (
JobRunner,
config,
from_smiles,
Settings,
AMSJob,
JobAnalysis,
jobs_in_directory,
view,
plot_image_grid,
)
import numpy as np
# ## Set Up Job Runner
# Set up job runner, running as many jobs as possible in parallel.
config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1
# ## Set Up Molecules
# Create the molecules we want to use in our benchmark from SMILES.
# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
# Compute 10 conformers, optimize with UFF and pick the lowest in energy.
molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]
plot_image_grid({k: view(v) for k, v in molecules.items()}, rows=1, save_path="picture1.png")
# ## Initialize Calculation Settings
# Set up the settings which are common across jobs. The basis type is added later for each job.
common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"
basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"
# ## Run Calculations
results = {}
jobs = []
for bas in basis:
for name, molecule in molecules.items():
with jobs_in_directory(name):
settings = common_settings.copy()
settings.input.adf.Basis.Type = bas
job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
jobs.append(job)
results[(name, bas)] = job.run()
for job in jobs:
job.ok()
# ## Results
# Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.
import pandas as pd
results = []
for job in jobs:
mol = job.results.get_main_molecule()
results.append(
{
"Formula": mol.get_formula(),
"Basis": job.settings.input.adf.basis.type,
"NAtoms": len(mol),
"Energy": job.results.get_energy(unit="kcal/mol"),
}
)
results_df = pd.DataFrame(results)
print(results_df)
ref_results_df = results_df[results_df["Basis"] == "QZ4P"]
comp_results_df = results_df[results_df["Basis"] != "QZ4P"]
comp_results_df = comp_results_df.merge(ref_results_df, on="Formula", suffixes=["", " (Ref)"])
comp_results_df["Average Error"] = (
abs(comp_results_df["Energy"] - comp_results_df["Energy (Ref)"]) / comp_results_df["NAtoms"]
)
print(comp_results_df)
benchmark_results_df = comp_results_df.groupby("Basis", as_index=False)["Average Error"].mean()
benchmark_results_df.rename(columns={"Average Error": "MAE [kcal/mol]"}, inplace=True)
benchmark_results_df.sort_values(by="MAE [kcal/mol]", inplace=True, ignore_index=True)
print(benchmark_results_df)