#!/usr/bin/env amspython # coding: utf-8 # ## Initial imports import scm.plams as plams from scm.params import ResultsImporter, ParAMSJob from scm.plams import Settings, AMSJob, log, Molecule, packmol_on_slab from pathlib import Path import matplotlib.pyplot as plt # common_ru_h.py must exist in the current working directory from common_ru_h import rotation, check_installation # ## Initialize PLAMS working directory old_ref_dir = "reference_data_3" check_installation(old_ref_dir) new_ref_dir = "reference_data_4" plams.init() # ## Perform training/validation split ## Create a training/validation split ri = ResultsImporter.from_yaml(old_ref_dir) log("Performing training/validation split") training_set, validation_set = ri.get_data_set("training_set").split_by_jobids(0.95, 0.05, seed=314) ri.data_sets = {"training_set": training_set, "validation_set": validation_set} log(f"{len(training_set)} training set entries; {len(validation_set)} validation set entries.") log(f"Storing in {new_ref_dir}") ri.store(new_ref_dir) # ## Create a ParAMS Job for transfer learning on the M3GNet universal potential job = ParAMSJob.from_yaml(new_ref_dir) job.name = "initial_training" inp = job.settings.input inp.Task = "MachineLearning" inp.MachineLearning.CommitteeSize = 1 # train only a single model inp.MachineLearning.MaxEpochs = 250 inp.MachineLearning.LossCoeffs.Energy = 10.0 inp.MachineLearning.LossCoeffs.Forces = 1.0 inp.MachineLearning.Backend = "M3GNet" inp.MachineLearning.M3GNet.LearningRate = 1e-3 inp.MachineLearning.M3GNet.Model = "UniversalPotential" inp.MachineLearning.M3GNet.UniversalPotential = Settings( Featurizer="No", # must use strings here, not Python booleans ThreeDInteractions1="No", GraphLayer1="No", ThreeDInteractions2="No", GraphLayer2="No", ThreeDInteractions3="Yes", GraphLayer3="Yes", Final="Yes", ) inp.MachineLearning.Target.Forces.Enabled = "Yes" inp.MachineLearning.Target.Forces.MAE = 0.05 inp.MachineLearning.RunAMSAtEnd = "Yes" # Larger batch sizes require more (GPU) memory but will also typically train faster # The amount of memory also depends on the number of atoms in the structures # So set the batch size to some appropriate number inp.DataSet[0].BatchSize = 10 # training set batch size inp.DataSet[1].BatchSize = 10 # validation set batch size print(job.get_input()) job.run() # ## Plot some results of the training job.results.plot_simple_correlation("forces") job.results.plot_all_pes() plt.subplots_adjust(top=2, hspace=0.5) # ## Copy the results directory to a known place import shutil orig_training_results_dir = str(job.results.path) new_training_results_dir = Path("initial_training_results").resolve() log(f"Copying {orig_training_results_dir} to {new_training_results_dir}") shutil.copytree(orig_training_results_dir, new_training_results_dir, dirs_exist_ok=True) log(f"Use {new_training_results_dir} as the LoadModel in upcoming active learning.")