#!/usr/bin/env amspython # coding: utf-8 # ## Initial imports from scm.plams import plot_molecule import scm.plams as plams from scm.params import ParAMSJob, ResultsImporter import glob import matplotlib.pyplot as plt # ## Initialize PLAMS environment plams.init() # ## Run a quick reference MD job for liquid Ar # # In this example we generate some simple reference data. # # If you # # * already have reference data in the ParAMS .yaml format, then you can skip this step # * already have reference data in the ASE .xyz or .db format, you can convert it to the ParAMS .yaml format. See the section "Convert ASE format to ParAMS" below molecule = plams.Molecule(numbers=[18], positions=[(0, 0, 0)]) box = plams.packmol(molecule, n_atoms=32, density=1.4, tolerance=3.0) plot_molecule(box) reference_engine_settings = plams.Settings() reference_engine_settings.runscript.nproc = 1 reference_engine_settings.input.ForceField.Type = "UFF" md_job = plams.AMSNVTJob( settings=reference_engine_settings, molecule=box, name="uff_md", nsteps=5000, temperature=500, writeenginegradients=True, samplingfreq=500, ) md_job.run() # ## Import reference results with ParAMS ResultsImporter # # Here we use the ``add_trajectory_singlepoints`` results importer. For more details about usage of the results importers, see the corresponding tutorials. ri = ResultsImporter(settings={"units": {"energy": "eV", "forces": "eV/angstrom"}}) ri.add_trajectory_singlepoints(md_job.results.rkfpath(), properties=["energy", "forces"]) # feel free to add other trajectories as well: # ri.add_trajectory_singlepoints(job2.results.rkfpath(), properties=["energy", "forces"]) # etc... # ## Optional: split into training/validation sets # # Machine learning potentials in ParAMS can only be trained if there is both a training set and a validation set. # # If you do not specify a validation set, the training set will automatically be split into a training and validation set when the parametrization starts. # # Here, we will manually split the data set ourselves. # # Let's first print the information in the current ResultsImporter training set: def print_data_set_summary(data_set, title): number_of_entries = len(data_set) jobids = data_set.jobids number_of_jobids = len(jobids) print(f"{title}:") print(f" number of entries: {number_of_entries}") print(f" number of jobids: {number_of_jobids}") print(f" jobids: {jobids}") print_data_set_summary(ri.data_sets["training_set"], "Original training set") # Above, the number of entries is twice the number of jobids because the ``energy`` and ``forces`` extractors are separate entries. # # The energy and force extractors for a given structure (e.g. frame006) must belong to the same data set. For this reason, when doing the split, we call ``split_by_jobid`` training_set, validation_set = ri.data_sets["training_set"].split_by_jobids(0.8, 0.2, seed=314) ri.data_sets["training_set"] = training_set ri.data_sets["validation_set"] = validation_set print_data_set_summary(ri.data_sets["training_set"], "New training set") print_data_set_summary(ri.data_sets["validation_set"], "New validation set") # ## Store the reference results in ParAMS yaml format # # Use ``ResultsImporter.store()`` to store all the data in the results importer in the ParAMS .yaml format: yaml_dir = "yaml_ref_data" ri.store(yaml_dir, backup=False) # print the contents of the directory for x in glob.glob(f"{yaml_dir}/*"): print(x) # ## Set up and run a ParAMSJob for training ML Potentials # # See the ParAMS MachineLearning documentation for all available input options. # # Training the model may take a few minutes. job = ParAMSJob.from_yaml(yaml_dir) job.name = "params_training_ml_potential" job.settings.input.Task = "MachineLearning" job.settings.input.MachineLearning.CommitteeSize = 1 # train only a single model job.settings.input.MachineLearning.MaxEpochs = 200 job.settings.input.MachineLearning.LossCoeffs.Energy = 10 job.settings.input.MachineLearning.Backend = "M3GNet" job.settings.input.MachineLearning.M3GNet.Model = "UniversalPotential" job.settings.input.MachineLearning.Target.Forces.Enabled = "No" job.settings.input.MachineLearning.RunAMSAtEnd = "Yes" job.run() # ## Results of the ML potential training # # Use ``job.results.get_running_loss()`` to get the loss value as a function of epoch: epoch, training_loss = job.results.get_running_loss(data_set="training_set") plt.plot(epoch, training_loss) epoch, validation_loss = job.results.get_running_loss(data_set="validation_set") plt.plot(epoch, validation_loss) plt.legend(["training loss", "validation loss"]) # If you set ``MachineLearning%RunAMSAtEnd`` (it is on by default), this will run the ML potential through AMS at the end of the fitting procedure, similar to the ParAMS SinglePoint task. # # This will give you access to more results, for example the predicted-vs-reference energy and forces for all entries in the training and validation set. Plot them in a scatter plot like this: fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6)) for i, data_set in enumerate(["training_set", "validation_set"]): for j, key in enumerate(["energy", "forces"]): dse = job.results.get_data_set_evaluator(data_set=data_set, source="best") data = dse.results[key] ax = axes[i][j] ax.plot(data.reference_values, data.predictions, ".") ax.set_xlabel(f"Reference {key} ({data.unit})") ax.set_ylabel(f"Predicted {key} ({data.unit})") ax.set_title(f"{data_set}\n{key} MAE: {data.mae:.3f} {data.unit}") ax.set_xlim(auto=True) ax.autoscale(False) ax.plot([-10, 10], [-10, 10], linewidth=5, zorder=-1, alpha=0.3, c="red") plt.subplots_adjust(hspace=0.6, wspace=0.4) # ## Get the engine settings for production jobs # # First, let's find the path to where the trained m3gnet model resides using ``get_deployed_model_paths()``. This function returns a list of paths to the trained models. In this case we only trained one model, so we access the first element of the list with ``[0]``: print(job.results.get_deployed_model_paths()[0]) # The above is the path we need to give as the ``ParameterDir`` input option in the AMS MLPotential engine. For other backends it might instead be the ``ParameterFile`` option. # # To get the complete engine settings as a PLAMS Settings object, use the method ``get_production_engine_settings()``: production_engine_settings = job.results.get_production_engine_settings() print(plams.AMSJob(settings=production_engine_settings).get_input()) # ## Run a short MD simulation with the trained potential production_engine_settings.runscript.nproc = 1 # run AMS Driver in serial new_md_job = plams.AMSNVTJob( settings=production_engine_settings, molecule=box, nsteps=5000, temperature=300, samplingfreq=100, name="production_md", timestep=1.0, ) new_md_job.run(watch=True) # ## Open trajectory file in AMSmovie # With the production trajectory you can run analysis tools in AMSmovie, or access them from Python. See the AMS manual for details. trajectory_file = new_md_job.results.rkfpath() # ## Finish PLAMS plams.finish()