#!/usr/bin/env amspython # coding: utf-8 # ## Load a data_set_predictions.yaml file # # The most common way to use a DataSetEvaluator is to load the ``data_set_predictions.yaml`` file produced during an optimization. from scm.params import * import os # if you go via ParAMSJob: # job = ParAMSJob.load_external('/path/results/') # dse = job.results.get_data_set_evaluator() # to just load the .yaml file: yaml_file = os.path.expandvars( "$AMSHOME/scripting/scm/params/examples/ZnS_ReaxFF/example_output/best/data_set_predictions.yaml" ) dse = DataSetEvaluator(yaml_file) # ### Summary statistics (stats.txt) # # The results can be grouped in different ways. By default, the data is grouped first by ``'Extractor'`` and then by ``'Expression'``. To get a file like ``stats.txt``, simply call the ``.str()`` method: print(dse.str()) # Note that the extractor names for the various expressions are not shown if there are no arguments to the extractor. This makes the output more readable. # # You can access individual entries from the above table as follows: print(len(dse.results["charges"].residuals)) # the N for the charges print(dse.results["charges"]["zincblende_sp"].mae) # MAE for an expression print(dse.results["forces"].rmse) # RMSE for an extractor print(dse.results["forces"].unit) # unit for an extractor print( dse.results["charges"]["wurtzite_sp"].weight ) # the weight is returned as a scalar, even for array reference values print( dse.results["energy"]["1.0*zincblende_sp-0.5*wurtzite_sp"].my_loss ) # "my_loss" refers to the loss of the individual entry print(dse.results["forces"].contribution) # fractional contribution to the weighted loss function print(dse.results.total_loss) # total loss function value print(dse.results.loss_type) # type of loss function # You can also just print a summary of a part of the table: print(dse.results["forces"].str()) # You can also modify the grouping to only go one level deep: dse.group_by(("Extractor",)) # the default is group_by(('Extractor', 'Expression')) print(dse.str()) # If there is metadata attached to the training set entries, you can also group by those. For example, when creating a training set with a ``ResultsImporter``, the ``Group`` and ``SubGroup`` metadata are automatically set: dse.group_by(("Group", "SubGroup")) print(dse.str()) print(dse.results["Forces"].mae) # capital F in the Group metadata # ### Access individual predictions and reference values (scatter_plots/) # # Call the ``.detailed_string()`` method to get files similar to ``scatter_plots/forces.txt`` etc. dse.group_by(("Extractor", "Expression")) # reset to the original grouping results = dse.results["pes"] # look at the results for the pes extractor print(results.detailed_string()) print(results.reference_values) # list of reference values print(results.predictions) # list of predicted values print(results.unit) # the unit print(results.accuracies) # the Sigma values (per expression) print(results.weights) # the Weights (per reference/prediction) print(results.contributions) # list of individual contributions (per expression) print(results.expressions) # list of expressions # Note that the number of reference values is different from the number of expressions when the reference values are arrays. To get the reference values per expression: for e in results.expressions: print(f"Expression: {e}, Ref. values: {results[e].reference_values}")