#!/usr/bin/env amspython from scm.plams import * from scm.params import * import numpy as np import os import matplotlib.pyplot as plt def running_loss(job): train_eval, train_loss = job.results.get_running_loss() val_eval, val_loss = job.results.get_running_loss(data_set="validation_set") plt.clf() plt.plot(train_eval, np.log10(train_loss)) plt.plot(val_eval, np.log10(val_loss)) plt.legend(["Training set", "Validation set"]) plt.xlabel("Evaluation") plt.ylabel("log10(loss)") plt.savefig("running_loss.png") def predictions_for_best_parameters_on_validation_set(job): # first sanity-check that the parameter interface in # results/training_set_results/validation_set_best_parameters and # results/validation_set_results/best # are the same best_val_interf = job.results.get_parameter_interface(source="best", data_set="validation_set") val_set_best_interf = job.results.get_parameter_interface( source="validation_set_best_parameters", data_set="training_set" ) assert best_val_interf == val_set_best_interf train_forces_results = job.results.get_data_set_evaluator( source="validation_set_best_parameters", data_set="training_set" ).results["forces"] val_forces_results = job.results.get_data_set_evaluator(source="best", data_set="validation_set").results["forces"] eval_id = job.results.get_evaluation_number(source="best", data_set="validation_set") rmin = best_val_interf["rmin"].value eps = best_val_interf["eps"].value title = f"Evaluation {eval_id}, rmin = {rmin:.2f}, eps = {eps:.6f}" unit = train_forces_results.unit plt.clf() plt.title(title) plt.plot(train_forces_results.reference_values, train_forces_results.predictions, ".") plt.plot(val_forces_results.reference_values, val_forces_results.predictions, "x") plt.legend(["Training set", "Validation set"]) plt.xlabel(f"Reference force ({unit})") plt.ylabel(f"Predicted force ({unit})") plt.savefig("forces_scatter_plot.png") def data_set_random_split(): # get training set from LJ_Ar example (5 entries) data_set = DataSet(os.path.expandvars("$AMSHOME/scripting/scm/params/examples/LJ_Ar/training_set.yaml")) subsets = data_set.split(0.6, 0.4, seed=319) training_set = subsets[0] validation_set = subsets[1] print("Results from random train/validation split:") print(f"Training set #entries: {len(training_set)}, entries: {training_set.keys()}") print(f"Validation set #entries: {len(validation_set)}, entries: {validation_set.keys()}") training_set.store("training_set_from_random_split.yaml") validation_set.store("validation_set_from_random_split.yaml") def main(): init() inputfile = os.path.expandvars("$AMSHOME/scripting/scm/params/examples/LJ_Ar_validation_set/params.in") job = ParAMSJob.from_inputfile(inputfile, name="LJ_Ar_with_validation_set") job.run() running_loss(job) predictions_for_best_parameters_on_validation_set(job) # demonstrate how to do a random split of a dataset into training and validation sets data_set_random_split() finish() if __name__ == "__main__": main()