#!/usr/bin/env amspython
# coding: utf-8

# ## Initial imports

import scm.plams as plams
from scm.params import ParAMSJob, ResultsImporter
import matplotlib.pyplot as plt


# ## Initialize PLAMS environment

plams.init()


# ## Initialize results importer

ri = ResultsImporter()


# ## Initial water molecule structure

molecule = plams.from_smiles("O")
for at in molecule:
    at.properties = plams.Settings()

plams.plot_molecule(molecule)


# ## Reference bond scan #1: O-H

s = plams.Settings()
s.input.ams.Task = "PESScan"
s.input.ams.PESScan.ScanCoordinate = [plams.Settings()]
# Scan O-H bond from 0.8 to 1.2 angstrom in 7 steps
s.input.ams.PESScan.ScanCoordinate[0].Distance = ["1 2 0.8 1.2"]
s.input.ams.PESScan.ScanCoordinate[0].nPoints = 7
# CalcPropertiesAtPESPoints is required to be able to extract the forces later
s.input.ams.PESScan.CalcPropertiesAtPESPoints = "Yes"
s.input.ForceField.Type = "UFF"
s.runscript.nproc = 1


refjob1 = plams.AMSJob(settings=s, molecule=molecule, name="OH_scan")
refjob1.run()


# ``add_pesscan_singlepoints`` will give points that ARE used for training/validation:

ri.add_pesscan_singlepoints(refjob1, properties=["energy", "forces"])


# ``add_singlejob`` with ``task="PESScan"`` gives a job that is used neither for training nor validation. Instead it will be part of the "test set", no matter if it's given in the training or validation sets in the input.

ri.add_singlejob(refjob1, task="PESScan", properties="pes")


# ## Reference bond scan #2: H-H

s = plams.Settings()
s.input.ams.Task = "PESScan"
s.input.ams.PESScan.ScanCoordinate = [plams.Settings()]
# Scan H-H distance from 0.8 to 1.2 angstrom in 7 steps
s.input.ams.PESScan.ScanCoordinate[0].Distance = ["3 2 1.2 1.8"]
s.input.ams.PESScan.ScanCoordinate[0].nPoints = 7
# CalcPropertiesAtPESPoints is required to be able to extract the forces later
s.input.ams.PESScan.CalcPropertiesAtPESPoints = "Yes"
s.input.ForceField.Type = "UFF"
s.runscript.nproc = 1
refjob2 = plams.AMSJob(settings=s, molecule=molecule, name="HH_scan")
refjob2.run()


# For the H-H, we will *only* add it to the "test set" with task PESScan.
#
# This will clearly illustrate that while the O-H bond scan is well reproduced by the trained model, the H-H scan will not be well reproduced.

ri.add_singlejob(refjob2, task="PESScan", properties="pes")


# ## Reference geometry optimization

s = plams.Settings()
s.input.ams.Task = "GeometryOptimization"
s.input.ForceField.Type = "UFF"
s.runscript.nproc = 1


refjob3 = plams.AMSJob(settings=s, molecule=molecule, name="geometry_optimization")
refjob3.run()


# ``add_trajectory_singlepoints`` will give points that ARE used for training/validation:

ri.add_trajectory_singlepoints(refjob3, properties=["energy", "forces"], N=5, data_set="validation_set")


# ``add_singlejob`` with ``task="GeometryOptimization"`` gives a job that is used neither for training nor validation. Instead it will be part of the "test set", no matter if it's given in the training or validation sets in the input.

ri.add_singlejob(refjob3, task="GeometryOptimization", properties=["energy", "distance(0,1)"])


# ## Store the reference results in ParAMS yaml format
#
# Use ``ResultsImporter.store()`` to store all the data in the results importer in the ParAMS .yaml format:

yaml_dir = "yaml_ref_data"
ri.store(yaml_dir, backup=False)


# ## Set up and run a ParAMSJob for training ML Potentials
#
# See the ParAMS MachineLearning documentation for all available input options.
#
# Training the model may take a few minutes.

job = ParAMSJob.from_yaml(yaml_dir)
job.name = "params_training_ml_potential"
job.settings.input.Task = "MachineLearning"
job.settings.input.MachineLearning.CommitteeSize = 1  # train only a single model
job.settings.input.MachineLearning.MaxEpochs = 50
job.settings.input.MachineLearning.LossCoeffs.Energy = 10
job.settings.input.MachineLearning.Backend = "M3GNet"
job.settings.input.MachineLearning.M3GNet.Model = "UniversalPotential"
job.settings.input.MachineLearning.Target.Forces.Enabled = "No"
job.settings.input.MachineLearning.RunAMSAtEnd = "Yes"


job.run()


# ## Results of the ML potential training

epoch, training_loss = job.results.get_running_loss(data_set="training_set")
plt.plot(epoch, training_loss)

epoch, validation_loss = job.results.get_running_loss(data_set="validation_set")
plt.plot(epoch, validation_loss)
plt.legend(["training loss", "validation loss"])


# If you set ``MachineLearning%RunAMSAtEnd`` (it is on by default), this will run the ML potential through AMS at the end of the fitting procedure, similar to the ParAMS SinglePoint task.
#
# This means that also entries that are not used during the training/validation will be evaluated, for example the "pes" (energy vs. bond length) extractor and the "distance(0, 1)" (O-H optimized bond length) extractor.

# ## Plot results for actual training/validation sets

job.results.plot_simple_correlation("forces")


# ## Plot results for "test set"
# These jobs or results were NOT used during the training/validation, even if the extractors were part of the training or validation sets!
#
# The jobs were simply run at the end of the parametrization ("RunAMSAtEnd") and the results extracted at that points.
#
# First let's plot the **energy vs. bond length** curves:

job.results.plot_all_pes()
plt.subplots_adjust(top=2, hspace=0.5)


# Above we clearly see that the O-H scan is perfectly well reproduced by the trained model but not the H-H scan. This is because we called ``add_pesscan_singlepoints`` on the O-H bond scan reference job.
#
# Then let's get the **reference and predicted optimized O-H bond length in water**:

print(job.results.get_data_set_evaluator().results["distance"].detailed_string())


# ## Finish PLAMS

plams.finish()