#!/usr/bin/env amspython from scm.plams import * from scm.params import * import numpy as np import os import matplotlib.pyplot as plt def running_loss(job): train_eval, train_loss = job.results.get_running_loss() val_eval, val_loss = job.results.get_running_loss(data_set='validation_set') plt.clf() plt.plot(train_eval, np.log10(train_loss)) plt.plot(val_eval, np.log10(val_loss)) plt.legend(["Training set", "Validation set"]) plt.xlabel("Evaluation") plt.ylabel("log10(loss)") plt.savefig("running_loss.png") def predictions_for_best_parameters_on_validation_set(job): # first sanity-check that the parameter interface in # results/training_set_results/validation_set_best_parameters and # results/validation_set_results/best # are the same best_val_interf = job.results.get_parameter_interface(source='best', data_set='validation_set') val_set_best_interf = job.results.get_parameter_interface(source='validation_set_best_parameters', data_set='training_set') assert best_val_interf == val_set_best_interf train_forces_results = job.results.get_data_set_evaluator(source='validation_set_best_parameters', data_set='training_set').results['forces'] val_forces_results = job.results.get_data_set_evaluator(source='best', data_set='validation_set').results['forces'] eval_id = job.results.get_evaluation_number(source='best', data_set='validation_set') rmin = best_val_interf['rmin'].value eps = best_val_interf['eps'].value title = f"Evaluation {eval_id}, rmin = {rmin:.2f}, eps = {eps:.6f}" unit = train_forces_results.unit plt.clf() plt.title(title) plt.plot(train_forces_results.reference_values, train_forces_results.predictions, '.') plt.plot(val_forces_results.reference_values, val_forces_results.predictions, 'x') plt.legend(["Training set", "Validation set"]) plt.xlabel(f"Reference force ({unit})") plt.ylabel(f"Predicted force ({unit})") plt.savefig("forces_scatter_plot.png") def data_set_random_split(): # get training set from LJ_Ar example (5 entries) data_set = DataSet(os.path.expandvars('$AMSHOME/scripting/scm/params/examples/LJ_Ar/training_set.yaml')) subsets = data_set.split(0.6, 0.4, seed=319) training_set = subsets[0] validation_set = subsets[1] print("Results from random train/validation split:") print(f"Training set #entries: {len(training_set)}, entries: {training_set.keys()}") print(f"Validation set #entires: {len(validation_set)}, entries: {validation_set.keys()}") training_set.store('training_set_from_random_split.yaml') validation_set.store('validation_set_from_random_split.yaml') def main(): init() inputfile = os.path.expandvars('$AMSHOME/scripting/scm/params/examples/LJ_Ar_validation_set/params.in') job = ParAMSJob.from_inputfile(inputfile, name="LJ_Ar_with_validation_set") job.run() running_loss(job) predictions_for_best_parameters_on_validation_set(job) # demonstrate how to do a random split of a dataset into training and validation sets data_set_random_split() finish() if __name__ == '__main__': main()