#!/usr/bin/env plams

import csv

import numpy as np
from scipy.signal import argrelextrema
from scipy.stats import linregress

# extract_diag extracts diagonal elements from upper triangular matrices (IQA2Save)
# extract_off_diag extracts off-diagonal elements from upper triangular matrices (IQA2Save)
# get_indices retrieves original indices (i,j) from off-diagonal elements vector


def extract_diag(Vec, n):
    diag = []
    index = 0
    for i in range(n):
        diag.append(Vec[index])
        index += n - i
    return diag


def extract_off_diag(vec, n):
    non_diagonal_elements = []
    k = 0
    m = n
    for i in range(int(n * (n + 1) / 2)):
        if k != 0:
            non_diagonal_elements.append(vec[i])
        k += 1
        if k == m:
            k = 0
            m -= 1
    return non_diagonal_elements


def get_indices(n, k):
    i = 1
    j = 2
    for m in range(k):
        if j < n:
            j += 1
        else:
            i += 1
            j = i + 1
    return i - 1, j - 1


# Function process_LR to perform linear regression for each IQA descriptor (so far: EiqaTotal, ECTot and EeeX)
# intra is a boolean :  true (diagonal elements in array), false (off diagonal elements in array)


def process_LR(array, label, all_reg, all_r2, all_Pearson, results, labels_list, intra):
    for j in range(array.shape[1]):
        if intra:
            label2 = f"{mol.symbols[j]}{j+1}"
        else:
            k, l = get_indices(numAtoms, j)
            if k != l:
                label2 = f"{mol.symbols[k]}{k+1}{'-'}{''.join([mol.symbols[l], str(l+1)])}"
            else:
                continue
        slope, intercept, r_value, _, _ = linregress(x, array[:, j])
        r2 = r_value**2
        coeff = slope
        pearson = r_value
        result_row = [label, label2, f"{r2:>10.3f}", f"{pearson:>10.3f}", f"{coeff:>10.3f}"]
        results.append(result_row)
        labels_list.append(label + label2)
        all_reg.append(coeff)
        all_r2.append(r2)
        all_Pearson.append(pearson)


# Make sure this script was called correctly
if "resultsdir" not in globals():
    print("\nUSAGE: $AMSBIN/plams REGScan.py -v resultsdir=[path to .results folder]")
    print("For instance: $AMSBIN/plams REGScan.py -v resultsdir=./dimerH2O.results\n")
    quit()

# Names of the two output files
results_filename = resultsdir[:-8] + "_REGScan.txt"
csv_filename = resultsdir[:-8] + "_REGScan.csv"

# Load the job associated with that .results folder
amsrkf = AMSJob.load_external(resultsdir)

# Get the number of scan coordinates
nCoords = amsrkf.results.readrkf("PESScan", "nPoints")

# Get the number of atoms
mol = amsrkf.results.get_input_molecule()
numAtoms = len(mol)

# Get the scan coordinates and their values
coordValues = amsrkf.results.readrkf("PESScan", "PESCoords")
nbCoord = len(coordValues)

# Get the converged bond energies
beValues = amsrkf.results.readrkf("PESScan", "PES")

# Create empty lists to store IQA descriptors
iqaKin_matrix = []
iqaeN_matrix = []
iqaeeC_matrix = []
iqaeeX_matrix = []
iqaeeDisp_matrix = []
iqaeeTotal_matrix = []
iqaCTotal_matrix = []
iqaTotal_matrix = []
totalEnergyValues = []

# Read IQA data (and coordinate values + converged energies)
# A loop over all points in the scan - Some data are unused so far but can be needed later...

for i in range(nCoords):
    filename = "PESPoint({:d})".format(i + 1)
    iqaKin_point = amsrkf.results.readrkf("Properties", "IQA kinetic energy", file=filename)
    iqaKin_matrix.append(iqaKin_point)
    iqaeN_point = amsrkf.results.readrkf("Properties", "IQA electron-nucleus", file=filename)
    iqaeN_matrix.append(iqaeN_point)
    iqaeeC_point = amsrkf.results.readrkf("Properties", "IQA e-e Coulomb", file=filename)
    iqaeeC_matrix.append(iqaeeC_point)
    iqaeeX_point = amsrkf.results.readrkf("Properties", "IQA e-e exchange", file=filename)
    iqaeeX_matrix.append(extract_off_diag(iqaeeX_point, numAtoms))
    iqaeeDisp_point = amsrkf.results.readrkf("Properties", "IQA pairs disp", file=filename)
    iqaeeDisp_matrix.append(iqaeeDisp_point)
    iqaeeTotal_point = amsrkf.results.readrkf("Properties", "IQA e-e total", file=filename)
    iqaeeTotal_matrix.append(iqaeeTotal_point)
    iqaCTotal_point = amsrkf.results.readrkf("Properties", "IQA Coulomb total", file=filename)
    iqaCTotal_matrix.append(extract_off_diag(iqaCTotal_point, numAtoms))
    iqaTotal_point = amsrkf.results.readrkf(
        "Properties", "IQA atom-atom total", file=filename
    )  # Diagonal : intra terms
    iqaTotal_matrix.append(extract_diag(iqaTotal_point, numAtoms))
    totalEnergyValue_point = amsrkf.results.readrkf("Total Energy", "Total energy", file=filename)
    totalEnergyValues.append(totalEnergyValue_point)

# End Reading IQA DATA

# Convert lists to np.arrays
iqaKin_matrix = np.array(iqaKin_matrix)
iqaeN_matrix = np.array(iqaeN_matrix)
iqaeeC_matrix = np.array(iqaeeC_matrix)
iqaeeX_matrix = np.array(iqaeeX_matrix)
iqaeeDisp_matrix = np.array(iqaeeDisp_matrix)
iqaeeTotal_matrix = np.array(iqaeeTotal_matrix)
iqaCTotal_matrix = np.array(iqaCTotal_matrix)
iqaTotal_matrix = np.array(iqaTotal_matrix)
beValues = np.array(beValues)
coordValues = np.array(coordValues)
totalEnergyValues = np.array(totalEnergyValues)

# Find the local maxima and minima for segmentation
maxima_idx = argrelextrema(beValues, np.greater)
minima_idx = argrelextrema(beValues, np.less)

# Combine the indices and sort them
extrema_idx = np.sort(np.column_stack((maxima_idx, minima_idx)))

# Add the endpoints of the scan as extrema
extrema_idx = np.insert(extrema_idx, 0, 0)
extrema_idx = np.append(extrema_idx, len(coordValues) - 1)

with open(results_filename, "w", encoding="utf-8") as f:
    f.write("****************************************************************\n")
    f.write("REG ANALYSIS ALONG A PATH COORDINATE (1D PES Scan) - version 1.2\n")
    f.write("****************************************************************\n")
    f.write("Written by L. Joubert and V. Tognetti - University of Rouen\n")
    f.write("Original reference for REG Analysis: Thacker, J.C.R., Popelier, P.L.A.\n")
    f.write("The ANANKE relative energy gradient (REG) method to automate IQA analysis\n")
    f.write("over configurational change. Theor Chem Acc 136, 86 (2017).\n\n")
    f.write("IQA terms: Eintra (total intra-atomic energy)\n")
    f.write("           VCl (total coulombic intra- or inter-atomic energy)\n")
    f.write("           VX (intra or inter-atomic exchange energy)\n\n")
    f.write("R2: coefficient of determination / r: Pearson coefficient\n")
    f.write("m(REG): relative energy gradient (slope)\n\n")

    # main loop for each segment
    for i in range(len(extrema_idx) - 1):
        start_idx = extrema_idx[i]
        end_idx = extrema_idx[i + 1]
        f.write(f"Segment {i+1}\n")
        f.write(f"Path coordinate range: {coordValues[start_idx]:.4f} -> {coordValues[end_idx]:.4f}\n")
        nbPoints = end_idx + 1 - start_idx
        f.write(f"{nbPoints} points selected for this segment.\n")
        f.write(f"First point: {start_idx+1}\n")
        f.write(f"Last point: {end_idx+1}\n\n")

        x = totalEnergyValues[start_idx : end_idx + 1]
        y1, y2, y3 = (
            iqaTotal_matrix[start_idx : end_idx + 1, :],
            iqaCTotal_matrix[start_idx : end_idx + 1, :],
            iqaeeX_matrix[start_idx : end_idx + 1, :],
        )

        # Initialization
        results, all_reg, all_r2, all_Pearson = [], [], [], []
        labels_list1, labels_list2, labels_list3 = [], [], []

        # Process linear regressions for the three descriptors
        label = "Eintra "
        process_LR(y1, label, all_reg, all_r2, all_Pearson, results, labels_list1, True)
        label = "VCl    "
        process_LR(y2, label, all_reg, all_r2, all_Pearson, results, labels_list2, False)
        label = "VX     "
        process_LR(y3, label, all_reg, all_r2, all_Pearson, results, labels_list3, False)

        # Sort m(REG) for negative values
        negative_indices = [i for i in range(len(all_reg)) if all_reg[i] < 0]
        negative_sorted_indices = sorted(negative_indices, key=lambda k: all_reg[k])

        # Sort m(REG) for positive values
        positive_indices = [i for i in range(len(all_reg)) if all_reg[i] >= 0]
        positive_sorted_indices = sorted(positive_indices, key=lambda k: all_reg[k], reverse=True)

        # Reorganize lines for results
        sorted_results = [results[i] for i in positive_sorted_indices]
        header = ["IQA term", "Atom(s)", "R2", "r", "m(REG)"]
        formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results]

        # Write positive results to file
        f.write(
            "\n".join(["{:<10}  {:^10}  {:>10}  {:>10}  {:>10}".format(*row) for row in formatted_results]) + "\n\n"
        )

        sorted_results = [results[i] for i in negative_sorted_indices]
        formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results]

        # Write negative results to file
        f.write(
            "\n".join(["{:<10}  {:^10}  {:>10}  {:>10}  {:>10}".format(*row) for row in formatted_results]) + "\n\n"
        )

with open(csv_filename, "w", newline="", encoding="utf-8") as file_csv:
    writer = csv.writer(file_csv)
    # Write column headers
    writer.writerow(["path coordinate"] + ["Bond energy", "Total energy"] + labels_list1 + labels_list2 + labels_list3)

    # Write data for each row
    for i in range(len(coordValues)):
        line = (
            [f"{coordValues[i]:.3f}"]
            + [f"{beValues[i]:.6f}", f"{totalEnergyValues[i]:.6f}"]
            + [f"{value:.6f}" for value in iqaTotal_matrix[i, :]]
            + [f"{value:.6f}" for value in iqaCTotal_matrix[i, :]]
            + [f"{value:.6f}" for value in iqaeeX_matrix[i, :]]
        )
        writer.writerow(line)