#!/usr/bin/env python # coding: utf-8 # ## Benchmarking Hydrocarbon Isomerization Energies # This benchmark aims to evaluate the accuracy of a range of machine learning potentials included in AMS, using the ISOC7 dataset from the paper: A.Karton and E.Semidalas, “Benchmarking Isomerization Energies for C5–C7 Hydrocarbons: The ISOC7 Database,” J. of Comp. Chem. 47(2) (2026): e70296, https://doi.org/10.1002/jcc.70296. # # The ISOC7 dataset contains a diverse range of hydrocarbons from simple alkanes to highly strained systems featuring tetrahedrane units, as well as alkanes, alkenes, alkynes, and cyclic compounds. It has reference data to CCSD(T)/CBS level of accuracy and therefore provides a good benchmark for evaluating the accuracy of ML potentials for small organic systems. # ### Extract Reference ISOC7 Database # We first extract the ISOC7 database: import os import tarfile from pathlib import Path isoc7_tar = Path(os.environ["AMSHOME"]) / "atomicdata" / "Benchmarks" / "ISOC7.tar.gz" isoc7 = Path("./ISOC7") if not isoc7.exists(): with tarfile.open(isoc7_tar, "r:gz") as tar: tar.extractall(path=Path.cwd()) # ### Load XYZ Data # Then we load all the xyz files as PLAMS molecules: import scm.plams as plams molecules = plams.read_molecules(isoc7, formats=["xyz"]) # We can see the number of loaded structures: print(f"Number of loaded structures: {len(molecules)}") # And view a selection of them, with their corresponding unique id: imgs = {k: plams.view(v, width=300, height=300, guess_bonds=True) for k, v in list(molecules.items())[:4]} plams.plot_image_grid(imgs, rows=1, save_path="picture1.png") # ### Load Reference Data # Next we load the reference CCSD(T)/CBS energies from the benchmark: import pandas as pd reference_data = pd.read_csv( isoc7 / "ISOC7_W1-F12_Ref_Values.csv", usecols=[ "reaction_name", "species1", "species2", "W1-F12_isomerization_energy (CCSD(T)/CBS, kcal/mol)", ], ) reference_data # ### Set up Data Sets with ParAMS # To run our benchmark, we are going to user ParAMS. First we set up the JobCollection. This contains the information on the input structures. import scm.params as params results_importer = params.ResultsImporter(settings={"units": {"energy": "kcal/mol"}}) for k, v in molecules.items(): jce = params.JCEntry() jce.settings.input.AMS.Task = "SinglePoint" jce.molecule = v jce.metadata["Source"] = "https://doi.org/10.1002/jcc.70296" results_importer.job_collection.add_entry(k, jce) print(results_importer.job_collection.from_jobids(["C5H10_1"])) # Next we set up the dataset containing the information on each isomerization reaction and the reference energy: ha_to_kcal_mol = plams.Units.conversion_ratio("Ha", "kcal/mol") for _, (s1, s2, ref_e) in reference_data[ ["species1", "species2", "W1-F12_isomerization_energy (CCSD(T)/CBS, kcal/mol)"] ].iterrows(): results_importer.data_set.add_entry( f"energy('{s2}')-energy('{s1}')", reference=ref_e, unit=("kcal/mol", ha_to_kcal_mol), metadata={ "Source": "https://doi.org/10.1002/jcc.70296", "reference_method": "CCSD(T)/CBS", }, ) print(results_importer.data_set.from_jobids(["C5H6_1", "C5H6_20"])) # We can also visualize one of these isomerization reactions, for example the first one with a high isomerization energy: imgs = {k: plams.view(molecules[k], width=300, height=300, guess_bonds=True) for k in ["C5H6_20", "C5H6_1"]} plams.plot_image_grid(imgs, rows=1, save_path="picture2.png") # Now we store the yaml files needed to run the ParAMS calculations: results_importer.store("./isoc7_benchmark") # Note that the dataset can also be viewed using the ParAMS GUI: # !params -gui ./isoc7_benchmark/job_collection.yaml # ### Run Benchmarks with ParAMS # Now we set up a benchmark job for each ML model of interest, and run them. We include all models suitable for small organic molecules, as well as some trained on materials/other domains for comparison. models = [ "AIMNet2-B973c", "AIMNet2-wB97MD3", "ANI-1ccx", "ANI-1x", "ANI-2x", "eSEN-S-Con-OMol", "M3GNet-UP-2022", "MACE-MPA-0", "UMA-S-1.2-ODAC", "UMA-S-1.2-OMat", "UMA-S-1.2-OMol", ] jobs = {} for model in models: job = params.ParAMSJob.from_yaml("./isoc7_benchmark", name=f"benchmark_{model}") job.settings.runscript.nproc = 1 job.settings.runscript.preamble_lines = ["export OMP_NUM_THREADS=1"] job.settings.input.parallellevels.jobs = 1 job.settings.input.mlpotential.model = model job.settings.input.task = "SinglePoint" jobs[model] = job for job in jobs.values(): job.run() job.ok() # Each completed job now has results for the errors between the model calculations and the reference. For example, with AIMNet2-wB97MD3: print("\n".join(str(jobs["AIMNet2-wB97MD3"].results.get_data_set_evaluator()).split("\n")[:20])) # Results can alternatively be displayed in a correlation plot of the reference vs. predicted energies: ax = jobs["AIMNet2-wB97MD3"].results.plot_simple_correlation("energy", validation_set=False) ax ax.figure.savefig("picture3.png") # Note that all these can also again be viewed in the ParAMS GUI: # !params -gui ./plams_workdir/benchmark_AIMNet2-wB97MD3/results # ## Evaluating Benchmark Results # To evaluate the results, we extract the Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE) from the job results: results = pd.DataFrame({"Model": models}) results["MAE"] = [jobs[m].results.get_data_set_evaluator().results.mae for m in models] results["RMSE"] = [jobs[m].results.get_data_set_evaluator().results.rmse for m in models] results.sort_values(by="MAE", inplace=True) results.rename( columns={ "MAE": "MAE*", "RMSE": "RMSE*", }, inplace=True, ) results.style.hide(axis="index").background_gradient( subset=["MAE*", "RMSE*"], cmap="Blues", vmin=0, vmax=10 ).format("{:.2f}", subset=["MAE*", "RMSE*"]).set_caption( "
Benchmark Results for ISOC7 Dataset Isomerization Energies" "
* Reference method CCSD(T)/CBS, in kcal/mol" ).set_table_styles( [ {"selector": "th", "props": [("border-bottom", "2px solid black")]}, {"selector": "th, td", "props": [("min-width", "100px")]}, {"selector": "td", "props": [("border-bottom", "1px solid lightgray")]}, {"selector": "table", "props": [("border-collapse", "collapse")]}, ] ) # From these results, we see that `AIMNet2-wB97MD3` performs best, achieving the lowest errors (MAE = 1.29 kcal/mol, RMSE = 1.60 kcal/mol), clearly outperforming all other models. In a broader context, this is on a par with the best performing DFT functionals, at a fraction of the computational cost. # # Other models targeted at small molecules also perform well, including `eSEN-S-Con-OMol` and `ANI-1ccx` all with MAE < 5 kcal/mol. # # We find that, as expected, the worst performers in this case are models trained primarily on materials data such as `M3GNet-UP-2022` and `MACE-MPA-0`, which are better suited to other chemical domains. # # One surprising result is for `UMA-S-1.2-OMol`, which would be expected to perform better for these systems than other UMA variants. We can compare and contrast the correlation plots for `UMA-S-1.2-OMol` with the previous plot for the best performing model, `AIMNet2-wB97MD3`: ax = jobs["UMA-S-1.2-OMol"].results.plot_simple_correlation("energy", validation_set=False) ax ax.figure.savefig("picture4.png") # This shows there are some structures with a very high energy difference with respect to the reference. We can inspect these structures: print("\n".join(str(jobs["UMA-S-1.2-OMol"].results.get_data_set_evaluator()).split("\n")[:9])) imgs = { k: plams.view(molecules[k], width=300, height=300, guess_bonds=True) for k in ["C6H6_2", "C7H8_3", "C7H8_5", "C6H6_21"] } plams.plot_image_grid(imgs, rows=1, save_path="picture5.png") # As noted in the original paper, we find many of the systems with a high deviations compared to the reference values contain a tetrahedrane core, which is also a common issue with many of the tested DFT functionals.