#!/usr/bin/env amspython
# coding: utf-8

# ## Load a data_set_predictions.yaml file
#
# The most common way to use a DataSetEvaluator is to load the ``data_set_predictions.yaml`` file produced during an optimization.

from scm.params import *
import os

# if you go via ParAMSJob:
# job = ParAMSJob.load_external('/path/results/')
# dse = job.results.get_data_set_evaluator()

# to just load the .yaml file:
yaml_file = os.path.expandvars(
    "$AMSHOME/scripting/scm/params/examples/ZnS_ReaxFF/example_output/best/data_set_predictions.yaml"
)
dse = DataSetEvaluator(yaml_file)


# ### Summary statistics (stats.txt)
#
# The results can be grouped in different ways. By default, the data is grouped first by ``'Extractor'`` and then by ``'Expression'``. To get a file like ``stats.txt``, simply call the ``.str()`` method:

print(dse.str())


# Note that the extractor names for the various expressions are not shown if there are no arguments to the extractor. This makes the output more readable.
#
# You can access individual entries from the above table as follows:

print(len(dse.results["charges"].residuals))  # the N for the charges


print(dse.results["charges"]["zincblende_sp"].mae)  # MAE for an expression


print(dse.results["forces"].rmse)  # RMSE for an extractor


print(dse.results["forces"].unit)  # unit for an extractor


print(
    dse.results["charges"]["wurtzite_sp"].weight
)  # the weight is returned as a scalar, even for array reference values


print(
    dse.results["energy"]["1.0*zincblende_sp-0.5*wurtzite_sp"].my_loss
)  # "my_loss" refers to the loss of the individual entry


print(dse.results["forces"].contribution)  # fractional contribution to the weighted loss function


print(dse.results.total_loss)  # total loss function value


print(dse.results.loss_type)  # type of loss function


# You can also just print a summary of a part of the table:

print(dse.results["forces"].str())


# You can also modify the grouping to only go one level deep:

dse.group_by(("Extractor",))  # the default is group_by(('Extractor', 'Expression'))
print(dse.str())


# If there is metadata attached to the training set entries, you can also group by those. For example, when creating a training set with a ``ResultsImporter``, the ``Group`` and ``SubGroup`` metadata are automatically set:

dse.group_by(("Group", "SubGroup"))
print(dse.str())


print(dse.results["Forces"].mae)  # capital F in the Group metadata


# ### Access individual predictions and reference values (scatter_plots/)
#
# Call the ``.detailed_string()`` method to get files similar to ``scatter_plots/forces.txt`` etc.

dse.group_by(("Extractor", "Expression"))  # reset to the original grouping
results = dse.results["pes"]  # look at the results for the pes extractor
print(results.detailed_string())


print(results.reference_values)  # list of reference values


print(results.predictions)  # list of predicted values


print(results.unit)  # the unit


print(results.accuracies)  # the Sigma values (per expression)


print(results.weights)  # the Weights (per reference/prediction)


print(results.contributions)  # list of individual contributions (per expression)


print(results.expressions)  # list of expressions


# Note that the number of reference values is different from the number of expressions when the reference values are arrays. To get the reference values per expression:

for e in results.expressions:
    print(f"Expression: {e}, Ref. values: {results[e].reference_values}")