Active Learning: Ru/H Part 4, Initial Training¶
Train the first custom M3GNet model for the Ru/H case study from the accumulated reference data. Run the earlier Ru/H parts first.
Downloads: Notebook | Script ?
Additional files required by this example:
common_ru_h.py
Requires: AMS2026 or later
Required AMS Packages: m3gnet
Related examples
Related tutorials
Related documentation
Initial imports¶
import scm.plams as plams
from scm.params import ResultsImporter, ParAMSJob
from scm.plams import Settings, AMSJob, log, Molecule, packmol_on_slab
from pathlib import Path
import matplotlib.pyplot as plt
# common_ru_h.py must exist in the current working directory
from common_ru_h import rotation, check_installation
# register dependencies for AMSjobs, to support submitting this notebook directly to a cluster in AMS2025+
# dependency: {} common_ru_h.py
# dependency: {} reference_data_3
Initialize PLAMS working directory¶
old_ref_dir = "reference_data_3"
check_installation(old_ref_dir)
new_ref_dir = "reference_data_4"
plams.init()
Current AMS version: 2024.102
05-31 15:46:43 m3gnet is installed: M3GNet ML Backend v[0.2.4] - build:0 [06668e0a45ce742d8f66ff23484b8a1e]
05-31 15:46:43 qe is installed: Quantum ESPRESSO (AMSPIPE) v[7.1] - build:115 [777d72eb480fe4d632a003cc62e9c1cb]
PLAMS working folder: /path/to/plams_workdir.004
Perform training/validation split¶
## Create a training/validation split
ri = ResultsImporter.from_yaml(old_ref_dir)
log("Performing training/validation split")
training_set, validation_set = ri.get_data_set("training_set").split_by_jobids(
0.95, 0.05, seed=314
)
ri.data_sets = {"training_set": training_set, "validation_set": validation_set}
log(
f"{len(training_set)} training set entries; {len(validation_set)} validation set entries."
)
log(f"Storing in {new_ref_dir}")
ri.store(new_ref_dir);
[31.05|15:46:45] Performing training/validation split
[31.05|15:46:45] 126 training set entries; 8 validation set entries.
[31.05|15:46:45] Storing in reference_data_4
Create a ParAMS Job for transfer learning on the M3GNet universal potential¶
job = ParAMSJob.from_yaml(new_ref_dir)
job.name = "initial_training"
inp = job.settings.input
inp.Task = "MachineLearning"
inp.MachineLearning.CommitteeSize = 1 # train only a single model
inp.MachineLearning.MaxEpochs = 250
inp.MachineLearning.LossCoeffs.Energy = 10.0
inp.MachineLearning.LossCoeffs.Forces = 1.0
inp.MachineLearning.Backend = "M3GNet"
inp.MachineLearning.M3GNet.LearningRate = 1e-3
inp.MachineLearning.M3GNet.Model = "UniversalPotential"
inp.MachineLearning.M3GNet.UniversalPotential = Settings(
Featurizer="No", # must use strings here, not Python booleans
ThreeDInteractions1="No",
GraphLayer1="No",
ThreeDInteractions2="No",
GraphLayer2="No",
ThreeDInteractions3="Yes",
GraphLayer3="Yes",
Final="Yes",
)
inp.MachineLearning.Target.Forces.Enabled = "Yes"
inp.MachineLearning.Target.Forces.MAE = 0.05
inp.MachineLearning.RunAMSAtEnd = "Yes"
# Larger batch sizes require more (GPU) memory but will also typically train faster
# The amount of memory also depends on the number of atoms in the structures
# So set the batch size to some appropriate number
inp.DataSet[0].BatchSize = 10 # training set batch size
inp.DataSet[1].BatchSize = 10 # validation set batch size
print(job.get_input())
Task MachineLearning
DataSet
BatchSize 10
Name training_set
... output trimmed ....
End
End
end
job.run();
[31.05|15:46:45] JOB initial_training STARTED
[31.05|15:46:45] JOB initial_training RUNNING
[31.05|15:56:49] JOB initial_training FINISHED
[31.05|15:56:49] JOB initial_training SUCCESSFUL
Plot some results of the training¶
job.results.plot_simple_correlation("forces");
job.results.plot_all_pes()
plt.subplots_adjust(top=2, hspace=0.5);
Copy the results directory to a known place¶
import shutil
orig_training_results_dir = str(job.results.path)
new_training_results_dir = Path("initial_training_results").resolve()
log(f"Copying {orig_training_results_dir} to {new_training_results_dir}")
shutil.copytree(orig_training_results_dir, new_training_results_dir, dirs_exist_ok=True)
log(f"Use {new_training_results_dir} as the LoadModel in upcoming active learning.")
[31.05|15:56:50] Copying /path/to/plams_workdir.004/initial_training/results to /home/hellstrom/SALRuH/fix2024/initial_training_results
[31.05|15:56:50] Use /home/hellstrom/SALRuH/fix2024/initial_training_results as the LoadModel in upcoming active learning.
See also¶
Python Script¶
#!/usr/bin/env python
# coding: utf-8
# ## Initial imports
import scm.plams as plams
from scm.params import ResultsImporter, ParAMSJob
from scm.plams import Settings, AMSJob, log, Molecule, packmol_on_slab
from pathlib import Path
import matplotlib.pyplot as plt
# common_ru_h.py must exist in the current working directory
from common_ru_h import rotation, check_installation
# register dependencies for AMSjobs, to support submitting this notebook directly to a cluster in AMS2025+
# dependency: {} common_ru_h.py
# dependency: {} reference_data_3
# ## Initialize PLAMS working directory
old_ref_dir = "reference_data_3"
check_installation(old_ref_dir)
new_ref_dir = "reference_data_4"
plams.init()
# ## Perform training/validation split
## Create a training/validation split
ri = ResultsImporter.from_yaml(old_ref_dir)
log("Performing training/validation split")
training_set, validation_set = ri.get_data_set("training_set").split_by_jobids(0.95, 0.05, seed=314)
ri.data_sets = {"training_set": training_set, "validation_set": validation_set}
log(f"{len(training_set)} training set entries; {len(validation_set)} validation set entries.")
log(f"Storing in {new_ref_dir}")
ri.store(new_ref_dir)
# ## Create a ParAMS Job for transfer learning on the M3GNet universal potential
job = ParAMSJob.from_yaml(new_ref_dir)
job.name = "initial_training"
inp = job.settings.input
inp.Task = "MachineLearning"
inp.MachineLearning.CommitteeSize = 1 # train only a single model
inp.MachineLearning.MaxEpochs = 250
inp.MachineLearning.LossCoeffs.Energy = 10.0
inp.MachineLearning.LossCoeffs.Forces = 1.0
inp.MachineLearning.Backend = "M3GNet"
inp.MachineLearning.M3GNet.LearningRate = 1e-3
inp.MachineLearning.M3GNet.Model = "UniversalPotential"
inp.MachineLearning.M3GNet.UniversalPotential = Settings(
Featurizer="No", # must use strings here, not Python booleans
ThreeDInteractions1="No",
GraphLayer1="No",
ThreeDInteractions2="No",
GraphLayer2="No",
ThreeDInteractions3="Yes",
GraphLayer3="Yes",
Final="Yes",
)
inp.MachineLearning.Target.Forces.Enabled = "Yes"
inp.MachineLearning.Target.Forces.MAE = 0.05
inp.MachineLearning.RunAMSAtEnd = "Yes"
# Larger batch sizes require more (GPU) memory but will also typically train faster
# The amount of memory also depends on the number of atoms in the structures
# So set the batch size to some appropriate number
inp.DataSet[0].BatchSize = 10 # training set batch size
inp.DataSet[1].BatchSize = 10 # validation set batch size
print(job.get_input())
job.run()
# ## Plot some results of the training
job.results.plot_simple_correlation("forces")
job.results.plot_all_pes()
plt.subplots_adjust(top=2, hspace=0.5)
plt.gcf().savefig("picture1.png")
plt.clf()
# ## Copy the results directory to a known place
import shutil
orig_training_results_dir = str(job.results.path)
new_training_results_dir = Path("initial_training_results").resolve()
log(f"Copying {orig_training_results_dir} to {new_training_results_dir}")
shutil.copytree(orig_training_results_dir, new_training_results_dir, dirs_exist_ok=True)
log(f"Use {new_training_results_dir} as the LoadModel in upcoming active learning.")