Benchmarking Reaction Energies across Basis Sets¶

Run reaction-energy workflow with multiple basis sets and compare the resulting trends in a reproducible PLAMS benchmark.

Downloads: Notebook | Script ?

Requires: AMS2026 or later

../_images/basis_set_benchmark_5_0_8de33cd9.png

Initial Imports¶

import sys
import multiprocessing
from scm.plams import (
    JobRunner,
    config,
    from_smiles,
    Settings,
    AMSJob,
    JobAnalysis,
    jobs_in_directory,
    view,
    plot_image_grid,
)
import numpy as np

Set Up Job Runner¶

Set up job runner, running as many jobs as possible in parallel.

config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1

Set Up Molecules¶

Create the molecules we want to use in our benchmark from SMILES.

# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
    # Compute 10 conformers, optimize with UFF and pick the lowest in energy.
    molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]

plot_image_grid({k: view(v) for k, v in molecules.items()}, rows=1);

Initialize Calculation Settings¶

Set up the settings which are common across jobs. The basis type is added later for each job.

common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"

basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"

Run Calculations¶

results = {}
jobs = []
for bas in basis:
    for name, molecule in molecules.items():
        with jobs_in_directory(name):
            settings = common_settings.copy()
            settings.input.adf.Basis.Type = bas
            job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
            jobs.append(job)
            results[(name, bas)] = job.run()

for job in jobs:
    job.ok()

[02.04|11:36:45] JOB Methane_QZ4P STARTED
[02.04|11:36:45] JOB Ethane_QZ4P STARTED
[02.04|11:36:45] JOB Ethylene_QZ4P STARTED
[02.04|11:36:45] JOB Acetylene_QZ4P STARTED
[02.04|11:36:45] JOB Methane_TZ2P STARTED
[02.04|11:36:45] JOB Ethane_TZ2P STARTED
[02.04|11:36:45] JOB Ethylene_TZ2P STARTED
[02.04|11:36:45] JOB Acetylene_TZ2P STARTED
[02.04|11:36:45] JOB Methane_QZ4P RUNNING
[02.04|11:36:45] JOB Methane_TZP STARTED
... output trimmed ....
[02.04|11:36:55] Waiting for job Acetylene_DZP to finish
[02.04|11:36:55] JOB Acetylene_DZP FINISHED
[02.04|11:36:55] JOB Acetylene_DZP SUCCESSFUL
[02.04|11:36:55] Waiting for job Ethylene_DZ to finish
[02.04|11:36:55] JOB Ethane_SZ FINISHED
[02.04|11:36:55] JOB Ethane_SZ SUCCESSFUL
[02.04|11:36:55] JOB Acetylene_SZ FINISHED
[02.04|11:36:55] JOB Acetylene_SZ SUCCESSFUL
[02.04|11:36:56] JOB Ethylene_DZ FINISHED
[02.04|11:36:56] JOB Ethylene_DZ SUCCESSFUL

Results¶

Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.

import pandas as pd

results = []
for job in jobs:
    mol = job.results.get_main_molecule()
    results.append(
        {
            "Formula": mol.get_formula(),
            "Basis": job.settings.input.adf.basis.type,
            "NAtoms": len(mol),
            "Energy": job.results.get_energy(unit="kcal/mol"),
        }
    )

results_df = pd.DataFrame(results)
results_df

	Formula	Basis	NAtoms	Energy
0	CH4	QZ4P	5	-572.626762
1	C2H6	QZ4P	8	-973.019461
2	C2H4	QZ4P	6	-770.413350
3	C2H2	QZ4P	4	-556.759053
4	CH4	TZ2P	5	-572.110159
5	C2H6	TZ2P	8	-971.882019
6	C2H4	TZ2P	6	-769.432903
7	C2H2	TZ2P	4	-555.667290
8	CH4	TZP	5	-571.044897
9	C2H6	TZP	8	-970.075889
10	C2H4	TZP	6	-767.327518
11	C2H2	TZP	4	-552.956286
12	CH4	DZP	5	-569.119016
13	C2H6	DZP	8	-966.091644
14	C2H4	DZP	6	-764.413298
15	C2H2	DZP	4	-550.646181
16	CH4	DZ	5	-560.934431
17	C2H6	DZ	8	-951.166697
18	C2H4	DZ	6	-750.174511
19	C2H2	DZ	4	-537.100802
20	CH4	SZ	5	-723.550123
21	C2H6	SZ	8	-1216.914233
22	C2H4	SZ	6	-934.655820
23	C2H2	SZ	4	-647.502984

ref_results_df = results_df[results_df["Basis"] == "QZ4P"]
comp_results_df = results_df[results_df["Basis"] != "QZ4P"]
comp_results_df = comp_results_df.merge(ref_results_df, on="Formula", suffixes=["", " (Ref)"])
comp_results_df["Average Error"] = (
    abs(comp_results_df["Energy"] - comp_results_df["Energy (Ref)"]) / comp_results_df["NAtoms"]
)
comp_results_df

	Formula	Basis	NAtoms	Energy	Basis (Ref)	NAtoms (Ref)	Energy (Ref)	Average Error
0	CH4	TZ2P	5	-572.110159	QZ4P	5	-572.626762	0.103321
1	CH4	TZP	5	-571.044897	QZ4P	5	-572.626762	0.316373
2	CH4	DZP	5	-569.119016	QZ4P	5	-572.626762	0.701549
3	CH4	DZ	5	-560.934431	QZ4P	5	-572.626762	2.338466
4	CH4	SZ	5	-723.550123	QZ4P	5	-572.626762	30.184672
5	C2H6	TZ2P	8	-971.882019	QZ4P	8	-973.019461	0.142180
6	C2H6	TZP	8	-970.075889	QZ4P	8	-973.019461	0.367947
7	C2H6	DZP	8	-966.091644	QZ4P	8	-973.019461	0.865977
8	C2H6	DZ	8	-951.166697	QZ4P	8	-973.019461	2.731595
9	C2H6	SZ	8	-1216.914233	QZ4P	8	-973.019461	30.486847
10	C2H4	TZ2P	6	-769.432903	QZ4P	6	-770.413350	0.163408
11	C2H4	TZP	6	-767.327518	QZ4P	6	-770.413350	0.514305
12	C2H4	DZP	6	-764.413298	QZ4P	6	-770.413350	1.000009
13	C2H4	DZ	6	-750.174511	QZ4P	6	-770.413350	3.373140
14	C2H4	SZ	6	-934.655820	QZ4P	6	-770.413350	27.373745
15	C2H2	TZ2P	4	-555.667290	QZ4P	4	-556.759053	0.272941
16	C2H2	TZP	4	-552.956286	QZ4P	4	-556.759053	0.950692
17	C2H2	DZP	4	-550.646181	QZ4P	4	-556.759053	1.528218
18	C2H2	DZ	4	-537.100802	QZ4P	4	-556.759053	4.914563
19	C2H2	SZ	4	-647.502984	QZ4P	4	-556.759053	22.685983

benchmark_results_df = comp_results_df.groupby("Basis", as_index=False)["Average Error"].mean()
benchmark_results_df.rename(columns={"Average Error": "MAE [kcal/mol]"}, inplace=True)
benchmark_results_df.sort_values(by="MAE [kcal/mol]", inplace=True, ignore_index=True)
benchmark_results_df

	Basis	MAE [kcal/mol]
0	TZ2P	0.170462
1	TZP	0.537329
2	DZP	1.023938
3	DZ	3.339441
4	SZ	27.682812

Python Script¶

#!/usr/bin/env python
# coding: utf-8

# ## Initial Imports

import sys
import multiprocessing
from scm.plams import (
    JobRunner,
    config,
    from_smiles,
    Settings,
    AMSJob,
    JobAnalysis,
    jobs_in_directory,
    view,
    plot_image_grid,
)
import numpy as np


# ## Set Up Job Runner
# Set up job runner, running as many jobs as possible in parallel.

config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1


# ## Set Up Molecules
# Create the molecules we want to use in our benchmark from SMILES.

# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
    # Compute 10 conformers, optimize with UFF and pick the lowest in energy.
    molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]

plot_image_grid({k: view(v) for k, v in molecules.items()}, rows=1, save_path="picture1.png")
# ## Initialize Calculation Settings
# Set up the settings which are common across jobs. The basis type is added later for each job.

common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"


basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"


# ## Run Calculations

results = {}
jobs = []
for bas in basis:
    for name, molecule in molecules.items():
        with jobs_in_directory(name):
            settings = common_settings.copy()
            settings.input.adf.Basis.Type = bas
            job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
            jobs.append(job)
            results[(name, bas)] = job.run()

for job in jobs:
    job.ok()


# ## Results
# Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.

import pandas as pd

results = []
for job in jobs:
    mol = job.results.get_main_molecule()
    results.append(
        {
            "Formula": mol.get_formula(),
            "Basis": job.settings.input.adf.basis.type,
            "NAtoms": len(mol),
            "Energy": job.results.get_energy(unit="kcal/mol"),
        }
    )

results_df = pd.DataFrame(results)
print(results_df)


ref_results_df = results_df[results_df["Basis"] == "QZ4P"]
comp_results_df = results_df[results_df["Basis"] != "QZ4P"]
comp_results_df = comp_results_df.merge(ref_results_df, on="Formula", suffixes=["", " (Ref)"])
comp_results_df["Average Error"] = (
    abs(comp_results_df["Energy"] - comp_results_df["Energy (Ref)"]) / comp_results_df["NAtoms"]
)
print(comp_results_df)


benchmark_results_df = comp_results_df.groupby("Basis", as_index=False)["Average Error"].mean()
benchmark_results_df.rename(columns={"Average Error": "MAE [kcal/mol]"}, inplace=True)
benchmark_results_df.sort_values(by="MAE [kcal/mol]", inplace=True, ignore_index=True)
print(benchmark_results_df)

Benchmarking Reaction Energies across Basis Sets¶

Initial Imports¶

Set Up Job Runner¶

Set Up Molecules¶

Initialize Calculation Settings¶

Run Calculations¶

Results¶

See also¶

Related examples¶

Related tutorials¶

Related documentation¶

Python Script¶