#!/usr/bin/env python
# coding: utf-8
# ## Benchmarking Hydrocarbon Isomerization Energies
# This benchmark aims to evaluate the accuracy of a range of machine learning potentials included in AMS, using the ISOC7 dataset from the paper: A.Karton and E.Semidalas, “Benchmarking Isomerization Energies for C5–C7 Hydrocarbons: The ISOC7 Database,” J. of Comp. Chem. 47(2) (2026): e70296, https://doi.org/10.1002/jcc.70296.
#
# The ISOC7 dataset contains a diverse range of hydrocarbons from simple alkanes to highly strained systems featuring tetrahedrane units, as well as alkanes, alkenes, alkynes, and cyclic compounds. It has reference data to CCSD(T)/CBS level of accuracy and therefore provides a good benchmark for evaluating the accuracy of ML potentials for small organic systems.
# ### Extract Reference ISOC7 Database
# We first extract the ISOC7 database:
import os
import tarfile
from pathlib import Path
isoc7_tar = Path(os.environ["AMSHOME"]) / "atomicdata" / "Benchmarks" / "ISOC7.tar.gz"
isoc7 = Path("./ISOC7")
if not isoc7.exists():
with tarfile.open(isoc7_tar, "r:gz") as tar:
tar.extractall(path=Path.cwd())
# ### Load XYZ Data
# Then we load all the xyz files as PLAMS molecules:
import scm.plams as plams
molecules = plams.read_molecules(isoc7, formats=["xyz"])
# We can see the number of loaded structures:
print(f"Number of loaded structures: {len(molecules)}")
# And view a selection of them, with their corresponding unique id:
imgs = {k: plams.view(v, width=300, height=300, guess_bonds=True) for k, v in list(molecules.items())[:4]}
plams.plot_image_grid(imgs, rows=1, save_path="picture1.png")
# ### Load Reference Data
# Next we load the reference CCSD(T)/CBS energies from the benchmark:
import pandas as pd
reference_data = pd.read_csv(
isoc7 / "ISOC7_W1-F12_Ref_Values.csv",
usecols=[
"reaction_name",
"species1",
"species2",
"W1-F12_isomerization_energy (CCSD(T)/CBS, kcal/mol)",
],
)
reference_data
# ### Set up Data Sets with ParAMS
# To run our benchmark, we are going to user ParAMS. First we set up the JobCollection. This contains the information on the input structures.
import scm.params as params
results_importer = params.ResultsImporter(settings={"units": {"energy": "kcal/mol"}})
for k, v in molecules.items():
jce = params.JCEntry()
jce.settings.input.AMS.Task = "SinglePoint"
jce.molecule = v
jce.metadata["Source"] = "https://doi.org/10.1002/jcc.70296"
results_importer.job_collection.add_entry(k, jce)
print(results_importer.job_collection.from_jobids(["C5H10_1"]))
# Next we set up the dataset containing the information on each isomerization reaction and the reference energy:
ha_to_kcal_mol = plams.Units.conversion_ratio("Ha", "kcal/mol")
for _, (s1, s2, ref_e) in reference_data[
["species1", "species2", "W1-F12_isomerization_energy (CCSD(T)/CBS, kcal/mol)"]
].iterrows():
results_importer.data_set.add_entry(
f"energy('{s2}')-energy('{s1}')",
reference=ref_e,
unit=("kcal/mol", ha_to_kcal_mol),
metadata={
"Source": "https://doi.org/10.1002/jcc.70296",
"reference_method": "CCSD(T)/CBS",
},
)
print(results_importer.data_set.from_jobids(["C5H6_1", "C5H6_20"]))
# We can also visualize one of these isomerization reactions, for example the first one with a high isomerization energy:
imgs = {k: plams.view(molecules[k], width=300, height=300, guess_bonds=True) for k in ["C5H6_20", "C5H6_1"]}
plams.plot_image_grid(imgs, rows=1, save_path="picture2.png")
# Now we store the yaml files needed to run the ParAMS calculations:
results_importer.store("./isoc7_benchmark")
# Note that the dataset can also be viewed using the ParAMS GUI:
# !params -gui ./isoc7_benchmark/job_collection.yaml
# ### Run Benchmarks with ParAMS
# Now we set up a benchmark job for each ML model of interest, and run them. We include all models suitable for small organic molecules, as well as some trained on materials/other domains for comparison.
models = [
"AIMNet2-B973c",
"AIMNet2-wB97MD3",
"ANI-1ccx",
"ANI-1x",
"ANI-2x",
"eSEN-S-Con-OMol",
"M3GNet-UP-2022",
"MACE-MPA-0",
"UMA-S-1.2-ODAC",
"UMA-S-1.2-OMat",
"UMA-S-1.2-OMol",
]
jobs = {}
for model in models:
job = params.ParAMSJob.from_yaml("./isoc7_benchmark", name=f"benchmark_{model}")
job.settings.runscript.nproc = 1
job.settings.runscript.preamble_lines = ["export OMP_NUM_THREADS=1"]
job.settings.input.parallellevels.jobs = 1
job.settings.input.mlpotential.model = model
job.settings.input.task = "SinglePoint"
jobs[model] = job
for job in jobs.values():
job.run()
job.ok()
# Each completed job now has results for the errors between the model calculations and the reference. For example, with AIMNet2-wB97MD3:
print("\n".join(str(jobs["AIMNet2-wB97MD3"].results.get_data_set_evaluator()).split("\n")[:20]))
# Results can alternatively be displayed in a correlation plot of the reference vs. predicted energies:
ax = jobs["AIMNet2-wB97MD3"].results.plot_simple_correlation("energy", validation_set=False)
ax
ax.figure.savefig("picture3.png")
# Note that all these can also again be viewed in the ParAMS GUI:
# !params -gui ./plams_workdir/benchmark_AIMNet2-wB97MD3/results
# ## Evaluating Benchmark Results
# To evaluate the results, we extract the Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE) from the job results:
results = pd.DataFrame({"Model": models})
results["MAE"] = [jobs[m].results.get_data_set_evaluator().results.mae for m in models]
results["RMSE"] = [jobs[m].results.get_data_set_evaluator().results.rmse for m in models]
results.sort_values(by="MAE", inplace=True)
results.rename(
columns={
"MAE": "MAE*",
"RMSE": "RMSE*",
},
inplace=True,
)
results.style.hide(axis="index").background_gradient(
subset=["MAE*", "RMSE*"], cmap="Blues", vmin=0, vmax=10
).format("{:.2f}", subset=["MAE*", "RMSE*"]).set_caption(
"
Benchmark Results for ISOC7 Dataset Isomerization Energies"
"
* Reference method CCSD(T)/CBS, in kcal/mol"
).set_table_styles(
[
{"selector": "th", "props": [("border-bottom", "2px solid black")]},
{"selector": "th, td", "props": [("min-width", "100px")]},
{"selector": "td", "props": [("border-bottom", "1px solid lightgray")]},
{"selector": "table", "props": [("border-collapse", "collapse")]},
]
)
# From these results, we see that `AIMNet2-wB97MD3` performs best, achieving the lowest errors (MAE = 1.29 kcal/mol, RMSE = 1.60 kcal/mol), clearly outperforming all other models. In a broader context, this is on a par with the best performing DFT functionals, at a fraction of the computational cost.
#
# Other models targeted at small molecules also perform well, including `eSEN-S-Con-OMol` and `ANI-1ccx` all with MAE < 5 kcal/mol.
#
# We find that, as expected, the worst performers in this case are models trained primarily on materials data such as `M3GNet-UP-2022` and `MACE-MPA-0`, which are better suited to other chemical domains.
#
# One surprising result is for `UMA-S-1.2-OMol`, which would be expected to perform better for these systems than other UMA variants. We can compare and contrast the correlation plots for `UMA-S-1.2-OMol` with the previous plot for the best performing model, `AIMNet2-wB97MD3`:
ax = jobs["UMA-S-1.2-OMol"].results.plot_simple_correlation("energy", validation_set=False)
ax
ax.figure.savefig("picture4.png")
# This shows there are some structures with a very high energy difference with respect to the reference. We can inspect these structures:
print("\n".join(str(jobs["UMA-S-1.2-OMol"].results.get_data_set_evaluator()).split("\n")[:9]))
imgs = {
k: plams.view(molecules[k], width=300, height=300, guess_bonds=True)
for k in ["C6H6_2", "C7H8_3", "C7H8_5", "C6H6_21"]
}
plams.plot_image_grid(imgs, rows=1, save_path="picture5.png")
# As noted in the original paper, we find many of the systems with a high deviations compared to the reference values contain a tetrahedrane core, which is also a common issue with many of the tested DFT functionals.