Benchmark accuracy of basis sets¶
To follow along, either
Download
BasisSetBenchmark.py
(run as$AMSBIN/amspython BasisSetBenchmark.py
).Download
BasisSetBenchmark.ipynb
(see also: how to install Jupyterlab in AMS)
Worked Example¶
Initial Imports¶
import sys
import multiprocessing
from scm.plams import JobRunner, config, from_smiles, Settings, AMSJob, init
import numpy as np
# this line is not required in AMS2025+
init();
PLAMS working folder: /path/plams/examples/BasisSetBenchmark/plams_workdir
Set Up Job Runner¶
Set up job runner, running as many jobs as possible in parallel.
config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1
Set Up Molecules¶
Create the molecules we want to use in our benchmark from SMILES.
# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
# Compute 10 conformers, optimize with UFF and pick the lowest in energy.
molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]
print(name, molecules[name])
Methane Atoms:
1 C 0.000000 0.000000 0.000000
2 H 0.538912 0.762358 -0.599295
3 H 0.731244 -0.596616 0.583182
4 H -0.567129 -0.670302 -0.678108
5 H -0.703028 0.504560 0.694220
Bonds:
(1)--1.0--(2)
(1)--1.0--(3)
(1)--1.0--(4)
(1)--1.0--(5)
Ethane Atoms:
1 C -0.757196 -0.040522 0.044605
2 C 0.757196 0.040522 -0.044605
3 H -1.205222 0.185290 -0.945970
4 H -1.130281 0.694397 0.788688
5 H -1.061719 -1.061491 0.357407
6 H 1.205222 -0.185290 0.945971
7 H 1.130281 -0.694396 -0.788689
8 H 1.061719 1.061491 -0.357406
Bonds:
(1)--1.0--(2)
(1)--1.0--(3)
(1)--1.0--(4)
(1)--1.0--(5)
(2)--1.0--(6)
(2)--1.0--(7)
(2)--1.0--(8)
Ethylene Atoms:
1 C 0.664485 0.027988 -0.023685
2 C -0.664485 -0.027988 0.023685
3 H 1.253433 -0.878614 0.070299
4 H 1.167038 0.980564 -0.156575
5 H -1.253433 0.878614 -0.070299
6 H -1.167038 -0.980564 0.156575
Bonds:
(1)--2.0--(2)
(1)--1.0--(3)
(1)--1.0--(4)
(2)--1.0--(5)
(2)--1.0--(6)
Acetylene Atoms:
1 C -0.587409 0.175060 -0.002211
2 C 0.587409 -0.094463 0.002211
3 H -1.618985 0.411721 -0.006095
4 H 1.618985 -0.331124 0.006094
Bonds:
(1)--3.0--(2)
(1)--1.0--(3)
(2)--1.0--(4)
Initialize Calculation Settings¶
Set up the settings which are common across jobs. The basis type is added later for each job.
common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"
basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"
Run Calculations¶
results = {}
jobs = []
for bas in basis:
for name, molecule in molecules.items():
settings = common_settings.copy()
settings.input.adf.Basis.Type = bas
job = AMSJob(name=name + "_" + bas, molecule=molecule, settings=settings)
jobs.append(job)
results[(name, bas)] = job.run()
[25.02|15:23:48] JOB Methane_QZ4P STARTED
[25.02|15:23:48] JOB Ethane_QZ4P STARTED
[25.02|15:23:48] JOB Ethylene_QZ4P STARTED
[25.02|15:23:48] JOB Acetylene_QZ4P STARTED
[25.02|15:23:48] JOB Methane_TZ2P STARTED
[25.02|15:23:48] JOB Ethane_TZ2P STARTED
[25.02|15:23:48] JOB Ethylene_TZ2P STARTED
[25.02|15:23:48] JOB Acetylene_TZ2P STARTED
[25.02|15:23:48] JOB Methane_TZP STARTED
[25.02|15:23:48] JOB Ethane_TZP STARTED
... (PLAMS log lines truncated) ...
Results¶
Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.
try:
# For AMS2025+ can use JobAnalysis class to perform results analysis
from scm.plams import JobAnalysis
ja = (
JobAnalysis(jobs=jobs, standard_fields=["Formula", "Smiles"])
.add_settings_field(("Input", "ADF", "Basis", "Type"), display_name="Basis")
.add_field("NAtoms", lambda j: len(j.molecule))
.add_field(
"Energy", lambda j: j.results.get_energy(unit="kcal/mol"), display_name="Energy [kcal/mol]", fmt=".2f"
)
.sort_jobs(["NAtoms", "Energy"])
)
ref_ja = ja.copy().filter_jobs(lambda data: data["InputAdfBasisType"] == "QZ4P")
ref_energies = {f: e for f, e in zip(ref_ja.Formula, ref_ja.Energy)}
def get_average_error(job):
return abs(job.results.get_energy(unit="kcal/mol") - ref_energies[job.molecule.get_formula()]) / len(
job.molecule
)
ja.add_field("AvErr", get_average_error, display_name="Average Error [kcal/mol]", fmt=".2f")
# Pretty-print if running in a notebook
if "ipykernel" in sys.modules:
ja.display_table()
else:
print(ja.to_table())
except ImportError:
average_errors = {}
for bas in basis:
if bas != reference_basis:
errors = []
for name, molecule in molecules.items():
reference_energy = results[(name, reference_basis)].get_energy(unit="kcal/mol")
energy = results[(name, bas)].get_energy(unit="kcal/mol")
errors.append(abs(energy - reference_energy) / len(molecule))
print("Energy for {} using {} basis set: {} [kcal/mol]".format(name, bas, energy))
average_errors[bas] = sum(errors) / len(errors)
[25.02|15:23:48] Waiting for job Methane_QZ4P to finish
[25.02|15:23:48] JOB Ethylene_TZ2P RUNNING
[25.02|15:23:48] JOB Acetylene_TZ2P RUNNING
[25.02|15:23:48] JOB Ethane_TZ2P RUNNING
[25.02|15:23:48] JOB Acetylene_TZP RUNNING
[25.02|15:23:48] JOB Acetylene_DZ RUNNING
[25.02|15:23:48] JOB Ethane_DZP RUNNING
[25.02|15:23:48] JOB Ethylene_DZP RUNNING
[25.02|15:23:48] JOB Methane_DZP RUNNING
[25.02|15:23:48] JOB Methane_SZ RUNNING
[25.02|15:23:48] JOB Ethane_TZP RUNNING
... (PLAMS log lines truncated) ...
[25.02|15:23:50] Waiting for job Ethane_QZ4P to finish
[25.02|15:23:52] Waiting for job Methane_TZP to finish
[25.02|15:23:54] Waiting for job Ethane_DZP to finish
Formula |
Smiles |
Basis |
NAtoms |
Energy [kcal/mol] |
Average Error [kcal/mol] |
---|---|---|---|---|---|
C2H2 |
C#C |
DZ |
4 |
-537.10 |
4.91 |
C2H2 |
C#C |
DZP |
4 |
-550.65 |
1.53 |
C2H2 |
C#C |
TZP |
4 |
-552.96 |
0.95 |
C2H2 |
C#C |
TZ2P |
4 |
-555.67 |
0.27 |
C2H2 |
C#C |
QZ4P |
4 |
-556.76 |
0.00 |
C2H2 |
C#C |
SZ |
4 |
-647.50 |
22.69 |
CH4 |
C |
DZ |
5 |
-560.93 |
2.34 |
CH4 |
C |
DZP |
5 |
-569.12 |
0.70 |
CH4 |
C |
TZP |
5 |
-571.04 |
0.32 |
CH4 |
C |
TZ2P |
5 |
-572.11 |
0.10 |
CH4 |
C |
QZ4P |
5 |
-572.63 |
0.00 |
CH4 |
C |
SZ |
5 |
-723.55 |
30.18 |
C2H4 |
C=C |
DZ |
6 |
-750.17 |
3.37 |
C2H4 |
C=C |
DZP |
6 |
-764.41 |
1.00 |
C2H4 |
C=C |
TZP |
6 |
-767.33 |
0.51 |
C2H4 |
C=C |
TZ2P |
6 |
-769.43 |
0.16 |
C2H4 |
C=C |
QZ4P |
6 |
-770.41 |
0.00 |
C2H4 |
C=C |
SZ |
6 |
-934.66 |
27.37 |
C2H6 |
CC |
SZ |
8 |
-1216.91 |
30.49 |
C2H6 |
CC |
DZ |
8 |
-951.17 |
2.73 |
C2H6 |
CC |
DZP |
8 |
-966.09 |
0.87 |
C2H6 |
CC |
TZP |
8 |
-970.08 |
0.37 |
C2H6 |
CC |
TZ2P |
8 |
-971.88 |
0.14 |
C2H6 |
CC |
QZ4P |
8 |
-973.02 |
0.00 |
print("== Results ==")
print("Average absolute error in bond energy per atom")
for bas in basis:
if bas != reference_basis:
if ja:
av = np.average(ja.copy().filter_jobs(lambda data: data["InputAdfBasisType"] == bas).AvErr)
else:
av = average_errors[bas]
print("Error for basis set {:<4}: {:>10.3f} [kcal/mol]".format(bas, av))
== Results ==
Average absolute error in bond energy per atom
Error for basis set TZ2P: 0.170 [kcal/mol]
Error for basis set TZP : 0.537 [kcal/mol]
Error for basis set DZP : 1.024 [kcal/mol]
Error for basis set DZ : 3.339 [kcal/mol]
Error for basis set SZ : 27.683 [kcal/mol]
Complete Python code¶
#!/usr/bin/env amspython
# coding: utf-8
# ## Initial Imports
import sys
import multiprocessing
from scm.plams import JobRunner, config, from_smiles, Settings, AMSJob, init
import numpy as np
# this line is not required in AMS2025+
init()
# ## Set Up Job Runner
# Set up job runner, running as many jobs as possible in parallel.
config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1
# ## Set Up Molecules
# Create the molecules we want to use in our benchmark from SMILES.
# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
# Compute 10 conformers, optimize with UFF and pick the lowest in energy.
molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]
print(name, molecules[name])
# ## Initialize Calculation Settings
# Set up the settings which are common across jobs. The basis type is added later for each job.
common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"
basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"
# ## Run Calculations
results = {}
jobs = []
for bas in basis:
for name, molecule in molecules.items():
settings = common_settings.copy()
settings.input.adf.Basis.Type = bas
job = AMSJob(name=name + "_" + bas, molecule=molecule, settings=settings)
jobs.append(job)
results[(name, bas)] = job.run()
# ## Results
# Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.
try:
# For AMS2025+ can use JobAnalysis class to perform results analysis
from scm.plams import JobAnalysis
ja = (
JobAnalysis(jobs=jobs, standard_fields=["Formula", "Smiles"])
.add_settings_field(("Input", "ADF", "Basis", "Type"), display_name="Basis")
.add_field("NAtoms", lambda j: len(j.molecule))
.add_field(
"Energy", lambda j: j.results.get_energy(unit="kcal/mol"), display_name="Energy [kcal/mol]", fmt=".2f"
)
.sort_jobs(["NAtoms", "Energy"])
)
ref_ja = ja.copy().filter_jobs(lambda data: data["InputAdfBasisType"] == "QZ4P")
ref_energies = {f: e for f, e in zip(ref_ja.Formula, ref_ja.Energy)}
def get_average_error(job):
return abs(job.results.get_energy(unit="kcal/mol") - ref_energies[job.molecule.get_formula()]) / len(
job.molecule
)
ja.add_field("AvErr", get_average_error, display_name="Average Error [kcal/mol]", fmt=".2f")
# Pretty-print if running in a notebook
if "ipykernel" in sys.modules:
ja.display_table()
else:
print(ja.to_table())
except ImportError:
average_errors = {}
for bas in basis:
if bas != reference_basis:
errors = []
for name, molecule in molecules.items():
reference_energy = results[(name, reference_basis)].get_energy(unit="kcal/mol")
energy = results[(name, bas)].get_energy(unit="kcal/mol")
errors.append(abs(energy - reference_energy) / len(molecule))
print("Energy for {} using {} basis set: {} [kcal/mol]".format(name, bas, energy))
average_errors[bas] = sum(errors) / len(errors)
print("== Results ==")
print("Average absolute error in bond energy per atom")
for bas in basis:
if bas != reference_basis:
if ja:
av = np.average(ja.copy().filter_jobs(lambda data: data["InputAdfBasisType"] == bas).AvErr)
else:
av = average_errors[bas]
print("Error for basis set {:<4}: {:>10.3f} [kcal/mol]".format(bas, av))