#!/usr/bin/env python
# coding: utf-8

# ## Create Example Jobs

# To begin with, we create a variety of AMS jobs with different settings, tasks, engines and calculation types.
#
# This allows us to generate diverse example single point/geometry optimization calculations with DFTB, ADF etc.

from scm.plams import from_smiles, AMSJob, PlamsError, Settings, Molecule, Atom
from scm.base import ChemicalSystem
from scm.input_classes.drivers import AMS
from scm.input_classes.engines import DFTB
from scm.utils.conversions import plams_molecule_to_chemsys


def example_job_dftb(smiles, task, use_chemsys=False):
    # Generate molecule from smiles
    mol = from_smiles(smiles)
    if use_chemsys:
        mol = plams_molecule_to_chemsys(mol)

    # Set up calculation settings using PISA
    sett = Settings()
    sett.runscript.nproc = 1
    driver = AMS()
    driver.Task = task
    driver.Engine = DFTB()
    sett.input = driver
    return AMSJob(molecule=mol, settings=sett, name="dftb")


def example_job_adf(smiles, task, basis, gga=None, use_chemsys=False):
    # Generate molecule from smiles
    mol = from_smiles(smiles)
    if use_chemsys:
        mol = plams_molecule_to_chemsys(mol)

    # Set up calculation settings using standard settings
    sett = Settings()
    sett.runscript.nproc = 1
    sett.input.AMS.Task = task
    sett.input.ADF.Basis.Type = basis
    if gga:
        sett.input.ADF.XC.GGA = gga
    return AMSJob(molecule=mol, settings=sett, name="adf")


def example_job_neb(iterations, use_chemsys=False):
    # Set up molecules
    main_molecule = Molecule()
    main_molecule.add_atom(Atom(symbol="C", coords=(0, 0, 0)))
    main_molecule.add_atom(Atom(symbol="N", coords=(1.18, 0, 0)))
    main_molecule.add_atom(Atom(symbol="H", coords=(2.196, 0, 0)))
    final_molecule = main_molecule.copy()
    final_molecule.atoms[1].x = 1.163
    final_molecule.atoms[2].x = -1.078

    mol = {"": main_molecule, "final": final_molecule}

    if use_chemsys:
        mol = {k: plams_molecule_to_chemsys(v) for k, v in mol.items()}

    # Set up calculation settings
    sett = Settings()
    sett.runscript.nproc = 1
    sett.input.ams.Task = "NEB"
    sett.input.ams.NEB.Images = 9
    sett.input.ams.NEB.Iterations = iterations
    sett.input.DFTB

    return AMSJob(molecule=mol, settings=sett, name="neb")


# Now, we create a selection of jobs covering different systems and settings:

from scm.plams import config, JobRunner

config.default_jobrunner = JobRunner(parallel=True, maxthreads=8)

smiles = ["CC", "C", "O", "CO"]
tasks = ["SinglePoint", "GeometryOptimization"]
engines = ["DFTB", "ADF"]
jobs = []
for i, s in enumerate(smiles):
    for j, t in enumerate(tasks):
        job_dftb = example_job_dftb(s, t, use_chemsys=i % 2)
        job_adf1 = example_job_adf(s, t, "DZ", use_chemsys=True)
        job_adf2 = example_job_adf(s, t, "TZP", "PBE")
        jobs += [job_dftb, job_adf1, job_adf2]

job_neb1 = example_job_neb(10)
job_neb2 = example_job_neb(100, use_chemsys=True)
jobs += [job_neb1, job_neb2]

for j in jobs:
    j.run()

for j in jobs:
    j.ok()


# ## Job Analysis

# The `JobAnalysis` tool can be used to extract data from a large number of jobs, and analyse the results.

# ### Adding and Loading Jobs
#
# Jobs can be loaded by passing job objects directly to the `JobAnalysis`, or alternatively loading from a path. This latter option is useful for loading jobs run previously in other scripts.

from scm.plams import JobAnalysis


ja = JobAnalysis(jobs=jobs)
# ja = JobAnalysis(paths=[j.path for j in jobs]) # alternatively load jobs from a set of paths


# Additional jobs can also be added or removed after initialization of the `JobAnalysis` tool.

extra_job = example_job_dftb("CCC", "SinglePoint")
extra_job.run()
extra_job.ok()

ja = ja.add_job(extra_job)


# The loaded jobs and the initial analysis fields can be shows by displaying the `JobAnalysis` table:

print(ja.to_table())


# ### Adding and Removing Fields

# On initialization, some analysis fields are automatically included in the analysis (`Path`, `Name`, `OK`, `Check` and `ErrorMsg`). These are useful to see which jobs were loaded, and whether they succeeded. However, one or more of these fields can be removed with the `remove_field` method.

ja = ja.remove_field("Path")


# A range of other common fields can be added with the `add_standard_field(s)` method.

ja = ja.add_standard_fields(["Formula", "Smiles", "CPUTime", "SysTime"])


# In addition, all fields deriving from the job input settings can be added with the `add_settings_input_fields` method. By default, these will have names corresponding to the concatenated settings entries. Individual settings field can be added with the `add_settings_field` method. This is useful to see the differences in the input settings of various jobs which may have succeeded/failed.

ja = ja.add_settings_input_fields()


# For output results, fields from the rkfs can be added with the `add_rkf_field` method, using a specified rkf file (default `ams.rkf`), section and variable.

ja = ja.add_rkf_field("General", "engine")


# Finally, custom fields can also be added with the `add_field` method, by defining a field key, value accessor and optional arguments like the display name and value formatting. This is most useful to extract results from jobs using built-in methods on the job results class.

ja = ja.add_field(
    "Energy",
    lambda j: j.results.get_energy(unit="kJ/mol"),
    display_name="Energy [kJ/mol]",
    fmt=".2f",
)
ja = ja.add_field("AtomType", lambda j: [at.symbol for at in j.results.get_main_molecule()])
ja = ja.add_field("Charge", lambda j: j.results.get_charges())

print(ja.to_table(max_rows=5))


# ### Processing Data

# Once an initial analysis has been created, the data can be further processed, depending on the use case.
# For example, to inspect the difference between failed and successful jobs, jobs can be filtered down and irrelevant fields removed.
#
# Here we first filter the jobs to those which have the `NEB` task:

ja_neb = ja.filter_jobs(lambda data: data["InputAmsTask"] == "NEB")


# Then we remove the "uniform fields" i.e. fields where all the values are the same. This lets us remove the noise and focus on the fields which have differences.

ja_neb = ja_neb.remove_uniform_fields(ignore_empty=True)
print(ja_neb.to_table())


# Another use case may be to analyze the results from one or more jobs.
# For this, it can be useful to utilize the `expand` functionality to convert job(s) to multiple rows.
# During this process, fields selected for expansion will have their values extracted into individual rows, whilst other fields have their values duplicated.
#
# First we filter to a single job, the geometry optimization of water:

ja_adf_water = ja.filter_jobs(
    lambda data: (
        data["InputAmsTask"] == "GeometryOptimization"
        and data["InputAdfBasisType"] is not None
        and data["Smiles"] == "O"
    )
)
print(ja_adf_water.to_table())


# Then we "expand" a given field to flatten the arrays and have one row per entry in the array. This lets us see the charge per atom for each job:

ja_adf_water_expanded = ja_adf_water.expand_field("AtomType").expand_field("Charge").remove_uniform_fields()
print(ja_adf_water_expanded.to_table())


# Expansion can be undone with the corresponding `collapse` method.
#
# Fields can be also further filtered, modified or reordered to customize the analysis. This example also illustrates the "fluent" syntax of the `JobAnalysis` tool, whereb

ja_adf = (
    ja_adf_water_expanded.remove_field("Name")
    .format_field("CPUTime", ".2f")
    .format_field("Charge", ".4f")
    .rename_field("InputAdfBasisType", "Basis")
    .rename_field("InputAdfBasisType", "GGA")
    .reorder_fields(["AtomType", "Charge", "Energy"])
)
print(ja_adf.to_table())


# ### Extracting Analysis Data

# Analysis data can be extracted in a variety of ways.
#
# As has been demonstrated, a visual representation of the table can be easily generated using the `to_table` method (or `display_table` in a notebook).
# The format can be selected as markdown, html or rst. This will return the data with the specified display names and formatting.

print(ja_adf.to_table(fmt="rst"))


# Alternatively, raw data can be retrieved via the `get_analysis` method, which returns a dictionary of analysis keys to values.

print(ja_adf.get_analysis())


# Data can also be easily written to a csv file using `to_csv_file`, to be exported to another program.

csv_name = "./tmp.csv"
ja_adf.to_csv_file(csv_name)

with open(csv_name) as csv:
    print(csv.read())


# Finally, for more complex data analysis, the results can be converted to a [pandas](https://pandas.pydata.org) dataframe. This is recommended for more involved data manipulations.

df = ja_adf.to_dataframe()
print(df)