#!/usr/bin/env amspython
# coding: utf-8

# ## Initial imports

import scm.plams as plams
from scm.params import ResultsImporter, ParAMSJob
from scm.plams import Settings, AMSJob, log, Molecule, packmol_on_slab
from pathlib import Path
import matplotlib.pyplot as plt

# common_ru_h.py must exist in the current working directory
from common_ru_h import rotation, check_installation


# ## Initialize PLAMS working directory

old_ref_dir = "reference_data_3"
check_installation(old_ref_dir)
new_ref_dir = "reference_data_4"
plams.init()


# ## Perform training/validation split

## Create a training/validation split
ri = ResultsImporter.from_yaml(old_ref_dir)
log("Performing training/validation split")
training_set, validation_set = ri.get_data_set("training_set").split_by_jobids(0.95, 0.05, seed=314)
ri.data_sets = {"training_set": training_set, "validation_set": validation_set}
log(f"{len(training_set)} training set entries; {len(validation_set)} validation set entries.")
log(f"Storing in {new_ref_dir}")
ri.store(new_ref_dir)


# ## Create a ParAMS Job for transfer learning on the M3GNet universal potential

job = ParAMSJob.from_yaml(new_ref_dir)
job.name = "initial_training"
inp = job.settings.input
inp.Task = "MachineLearning"
inp.MachineLearning.CommitteeSize = 1  # train only a single model
inp.MachineLearning.MaxEpochs = 250
inp.MachineLearning.LossCoeffs.Energy = 10.0
inp.MachineLearning.LossCoeffs.Forces = 1.0
inp.MachineLearning.Backend = "M3GNet"
inp.MachineLearning.M3GNet.LearningRate = 1e-3
inp.MachineLearning.M3GNet.Model = "UniversalPotential"
inp.MachineLearning.M3GNet.UniversalPotential = Settings(
    Featurizer="No",  # must use strings here, not Python booleans
    ThreeDInteractions1="No",
    GraphLayer1="No",
    ThreeDInteractions2="No",
    GraphLayer2="No",
    ThreeDInteractions3="Yes",
    GraphLayer3="Yes",
    Final="Yes",
)
inp.MachineLearning.Target.Forces.Enabled = "Yes"
inp.MachineLearning.Target.Forces.MAE = 0.05
inp.MachineLearning.RunAMSAtEnd = "Yes"
# Larger batch sizes require more (GPU) memory but will also typically train faster
# The amount of memory also depends on the number of atoms in the structures
# So set the batch size to some appropriate number
inp.DataSet[0].BatchSize = 10  # training set batch size
inp.DataSet[1].BatchSize = 10  # validation set batch size
print(job.get_input())


job.run()


# ## Plot some results of the training

job.results.plot_simple_correlation("forces")


job.results.plot_all_pes()
plt.subplots_adjust(top=2, hspace=0.5)


# ## Copy the results directory to a known place

import shutil

orig_training_results_dir = str(job.results.path)
new_training_results_dir = Path("initial_training_results").resolve()
log(f"Copying {orig_training_results_dir} to {new_training_results_dir}")
shutil.copytree(orig_training_results_dir, new_training_results_dir, dirs_exist_ok=True)
log(f"Use {new_training_results_dir} as the LoadModel in upcoming active learning.")