Benchmark accuracy of basis sets¶

To follow along, either

Download BasisSetBenchmark.py (run as $AMSBIN/amspython BasisSetBenchmark.py).
Download BasisSetBenchmark.ipynb (see also: how to install Jupyterlab in AMS)

Note that this example requires AMS2026+ to be able to run without modification.

Worked Example¶

Initial Imports¶

import sys
import multiprocessing
from scm.plams import JobRunner, config, from_smiles, Settings, AMSJob, init, JobAnalysis, jobs_in_directory
import numpy as np

# this line is not required in AMS2025+
init();

PLAMS working folder: /path/plams/examples/BasisSetBenchmark/plams_workdir

Set Up Job Runner¶

Set up job runner, running as many jobs as possible in parallel.

config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1

Set Up Molecules¶

Create the molecules we want to use in our benchmark from SMILES.

# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
    # Compute 10 conformers, optimize with UFF and pick the lowest in energy.
    molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]
    print(name, molecules[name])

Methane   Atoms:
    1         C       0.000000       0.000000       0.000000
    2         H       0.538912       0.762358      -0.599295
    3         H       0.731244      -0.596616       0.583182
    4         H      -0.567129      -0.670302      -0.678108
    5         H      -0.703028       0.504560       0.694220
  Bonds:
   (1)--1.0--(2)
   (1)--1.0--(3)
   (1)--1.0--(4)
   (1)--1.0--(5)

Ethane   Atoms:
    1         C      -0.757196      -0.040522       0.044605
    2         C       0.757196       0.040522      -0.044605
    3         H      -1.205222       0.185290      -0.945970
    4         H      -1.130281       0.694397       0.788688
    5         H      -1.061719      -1.061491       0.357407
    6         H       1.205222      -0.185290       0.945971
    7         H       1.130281      -0.694396      -0.788689
    8         H       1.061719       1.061491      -0.357406
  Bonds:
   (1)--1.0--(2)
   (1)--1.0--(3)
   (1)--1.0--(4)
   (1)--1.0--(5)
   (2)--1.0--(6)
   (2)--1.0--(7)
   (2)--1.0--(8)

Ethylene   Atoms:
    1         C       0.664485       0.027988      -0.023685
    2         C      -0.664485      -0.027988       0.023685
    3         H       1.253433      -0.878614       0.070299
    4         H       1.167038       0.980564      -0.156575
    5         H      -1.253433       0.878614      -0.070299
    6         H      -1.167038      -0.980564       0.156575
  Bonds:
   (1)--2.0--(2)
   (1)--1.0--(3)
   (1)--1.0--(4)
   (2)--1.0--(5)
   (2)--1.0--(6)

Acetylene   Atoms:
    1         C      -0.587409       0.175060      -0.002211
    2         C       0.587409      -0.094463       0.002211
    3         H      -1.618985       0.411721      -0.006095
    4         H       1.618985      -0.331124       0.006094
  Bonds:
   (1)--3.0--(2)
   (1)--1.0--(3)
   (2)--1.0--(4)

Initialize Calculation Settings¶

Set up the settings which are common across jobs. The basis type is added later for each job.

common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"

basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"

Run Calculations¶

results = {}
jobs = []
for bas in basis:
    for name, molecule in molecules.items():
        with jobs_in_directory(name):
            settings = common_settings.copy()
            settings.input.adf.Basis.Type = bas
            job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
            jobs.append(job)
            results[(name, bas)] = job.run()

[04.06|12:22:39] JOB Methane_QZ4P STARTED
[04.06|12:22:39] JOB Ethane_QZ4P STARTED
[04.06|12:22:39] JOB Ethylene_QZ4P STARTED
[04.06|12:22:39] JOB Acetylene_QZ4P STARTED
[04.06|12:22:39] JOB Methane_TZ2P STARTED
[04.06|12:22:39] JOB Ethane_TZ2P STARTED
[04.06|12:22:39] JOB Ethylene_TZ2P STARTED
[04.06|12:22:39] JOB Methane_QZ4P RUNNING
[04.06|12:22:39] JOB Acetylene_TZ2P STARTED
[04.06|12:22:39] JOB Methane_TZP STARTED
... (PLAMS log lines truncated) ...

Results¶

Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.

# For AMS2025+ can use JobAnalysis class to perform results analysis
from scm.plams import JobAnalysis

ja = (
    JobAnalysis(jobs=jobs, standard_fields=["Formula", "Smiles"])
    .add_settings_field(("Input", "ADF", "Basis", "Type"), display_name="Basis")
    .add_field("NAtoms", lambda j: len(j.molecule))
    .add_field("Energy", lambda j: j.results.get_energy(unit="kcal/mol"), display_name="Energy [kcal/mol]", fmt=".2f")
    .sort_jobs(["NAtoms", "Energy"])
)

ref_ja = ja.filter_jobs(lambda data: data["InputAdfBasisType"] == "QZ4P")

ref_energies = {f: e for f, e in zip(ref_ja.Formula, ref_ja.Energy)}


def get_average_error(job):
    return abs(job.results.get_energy(unit="kcal/mol") - ref_energies[job.molecule.get_formula()]) / len(job.molecule)


ja = ja.add_field("AvErr", get_average_error, display_name="Average Error [kcal/mol]", fmt=".2f")

[04.06|12:22:39] Waiting for job Methane_QZ4P to finish
[04.06|12:22:43] Waiting for job Ethane_QZ4P to finish
[04.06|12:22:47] Waiting for job Ethylene_TZP to finish
[04.06|12:22:48] Waiting for job Methane_DZP to finish
[04.06|12:22:48] Waiting for job Ethane_DZP to finish
[04.06|12:22:48] Waiting for job Methane_SZ to finish

# Pretty-print if running in a notebook
if "ipykernel" in sys.modules:
    ja.display_table()
else:
    print(ja.to_table())

Formula	Smiles	Basis	NAtoms	Energy [kcal/mol]	Average Error [kcal/mol]
C2H2	C#C	DZ	4	-537.10	4.91
C2H2	C#C	DZP	4	-550.65	1.53
C2H2	C#C	TZP	4	-552.96	0.95
C2H2	C#C	TZ2P	4	-555.67	0.27
C2H2	C#C	QZ4P	4	-556.76	0.00
C2H2	C#C	SZ	4	-647.50	22.69
CH4	C	DZ	5	-560.93	2.34
CH4	C	DZP	5	-569.12	0.70
CH4	C	TZP	5	-571.04	0.32
CH4	C	TZ2P	5	-572.11	0.10
CH4	C	QZ4P	5	-572.63	0.00
CH4	C	SZ	5	-723.55	30.18
C2H4	C=C	DZ	6	-750.17	3.37
C2H4	C=C	DZP	6	-764.41	1.00
C2H4	C=C	TZP	6	-767.33	0.51
C2H4	C=C	TZ2P	6	-769.43	0.16
C2H4	C=C	QZ4P	6	-770.41	0.00
C2H4	C=C	SZ	6	-934.66	27.37
C2H6	CC	SZ	8	-1216.91	30.49
C2H6	CC	DZ	8	-951.17	2.73
C2H6	CC	DZP	8	-966.09	0.87
C2H6	CC	TZP	8	-970.08	0.37
C2H6	CC	TZ2P	8	-971.88	0.14
C2H6	CC	QZ4P	8	-973.02	0.00

print("== Results ==")
print("Average absolute error in bond energy per atom")
for bas in basis:
    if bas != reference_basis:
        av = np.average(ja.filter_jobs(lambda data: data["InputAdfBasisType"] == bas).AvErr)
        print("Error for basis set {:<4}: {:>10.3f} [kcal/mol]".format(bas, av))

== Results ==
Average absolute error in bond energy per atom
Error for basis set TZ2P:      0.170 [kcal/mol]
Error for basis set TZP :      0.537 [kcal/mol]
Error for basis set DZP :      1.024 [kcal/mol]
Error for basis set DZ  :      3.339 [kcal/mol]
Error for basis set SZ  :     27.683 [kcal/mol]

Complete Python code¶

#!/usr/bin/env amspython
# coding: utf-8

# ## Initial Imports

import sys
import multiprocessing
from scm.plams import JobRunner, config, from_smiles, Settings, AMSJob, init, JobAnalysis, jobs_in_directory
import numpy as np

# this line is not required in AMS2025+
init()


# ## Set Up Job Runner
# Set up job runner, running as many jobs as possible in parallel.

config.default_jobrunner = JobRunner(parallel=True, maxjobs=multiprocessing.cpu_count())
config.job.runscript.nproc = 1


# ## Set Up Molecules
# Create the molecules we want to use in our benchmark from SMILES.

# The molecules we want to use in our benchmark:
mol_smiles = {"Methane": "C", "Ethane": "C-C", "Ethylene": "C=C", "Acetylene": "C#C"}
molecules = {}
for name, smiles in mol_smiles.items():
    # Compute 10 conformers, optimize with UFF and pick the lowest in energy.
    molecules[name] = from_smiles(smiles, nconfs=10, forcefield="uff")[0]
    print(name, molecules[name])


# ## Initialize Calculation Settings
# Set up the settings which are common across jobs. The basis type is added later for each job.

common_settings = Settings()
common_settings.input.ams.Task = "SinglePoint"
common_settings.input.ams.System.Symmetrize = "Yes"
common_settings.input.adf.Basis.Core = "None"


basis = ["QZ4P", "TZ2P", "TZP", "DZP", "DZ", "SZ"]
reference_basis = "QZ4P"


# ## Run Calculations

results = {}
jobs = []
for bas in basis:
    for name, molecule in molecules.items():
        with jobs_in_directory(name):
            settings = common_settings.copy()
            settings.input.adf.Basis.Type = bas
            job = AMSJob(name=f"{name}_{bas}", molecule=molecule, settings=settings)
            jobs.append(job)
            results[(name, bas)] = job.run()


# ## Results
# Extract the energy from each calculation. Calculate the average absolute error in bond energy per atom for each basis set.

# For AMS2025+ can use JobAnalysis class to perform results analysis
from scm.plams import JobAnalysis

ja = (
    JobAnalysis(jobs=jobs, standard_fields=["Formula", "Smiles"])
    .add_settings_field(("Input", "ADF", "Basis", "Type"), display_name="Basis")
    .add_field("NAtoms", lambda j: len(j.molecule))
    .add_field("Energy", lambda j: j.results.get_energy(unit="kcal/mol"), display_name="Energy [kcal/mol]", fmt=".2f")
    .sort_jobs(["NAtoms", "Energy"])
)

ref_ja = ja.filter_jobs(lambda data: data["InputAdfBasisType"] == "QZ4P")

ref_energies = {f: e for f, e in zip(ref_ja.Formula, ref_ja.Energy)}


def get_average_error(job):
    return abs(job.results.get_energy(unit="kcal/mol") - ref_energies[job.molecule.get_formula()]) / len(job.molecule)


ja = ja.add_field("AvErr", get_average_error, display_name="Average Error [kcal/mol]", fmt=".2f")


# Pretty-print if running in a notebook
if "ipykernel" in sys.modules:
    ja.display_table()
else:
    print(ja.to_table())


print("== Results ==")
print("Average absolute error in bond energy per atom")
for bas in basis:
    if bas != reference_basis:
        av = np.average(ja.filter_jobs(lambda data: data["InputAdfBasisType"] == bas).AvErr)
        print("Error for basis set {:<4}: {:>10.3f} [kcal/mol]".format(bas, av))