import os
import numpy as np
import csv
from scipy.signal import argrelextrema
from scipy.stats import pearsonr
from scipy.stats import linregress

# extract_diag extracts diagonal elements from upper triangular matrices (IQA2Save)
# extract_off_diag extracts off-diagonal elements from upper triangular matrices (IQA2Save)
# get_indices retrieves original indices (i,j) from off-diagonal elements vector

def extract_diag(Vec, n):
    diag = []
    index = 0
    for i in range(n):
        diag.append(Vec[index])
        index += (n-i)
    return diag

def extract_off_diag(vec, n):
    non_diagonal_elements = []
    k=0
    m = n
    for i in range(int(n*(n+1)/2)):
        if k !=0:
            non_diagonal_elements.append(vec[i])
        k += 1
        if k == m:
            k = 0
            m -= 1
    return non_diagonal_elements

def get_indices(n, k):
    i=1
    j=2
    for m in range(k):
        if j<n:
            j += 1
        else:
            i += 1
            j = i+1
    return i-1, j-1

# Function process_LR to perform linear regression for each IQA descriptor (so far: EiqaTotal, ECTot and EeeX)
# intra is a boolean :  true (diagonal elements in array), false (off diagonal elements in array)

def process_LR(array, label, all_reg, all_r2, all_Pearson, results, labels_list, intra):
    for j in range(array.shape[1]):
        if intra:
            label2=f"{mol.symbols[j]}{j+1}"
        else:
            k, l = get_indices(numAtoms,j)
            if k != l: 
                label2=f"{mol.symbols[k]}{k+1}{'-'}{''.join([mol.symbols[l], str(l+1)])}"
            else:
               continue
        slope, intercept, r_value, _, _ = linregress(x, array[:,j])
        r2 = r_value**2
        coeff = slope
        pearson = r_value
        result_row = [label,label2, f"{r2:>10.3f}", f"{pearson:>10.3f}",f"{coeff:>10.3f}"]
        results.append(result_row)
        labels_list.append(label+label2)
        all_reg.append(coeff)
        all_r2.append(r2)
        all_Pearson.append(pearson)

# Make sure this script was called correctly
if not 'resultsdir' in globals():
    print("\nUSAGE: $AMSBIN/plams REGScan.py -v resultsdir=[path to .results folder]")
    print("For instance: $AMSBIN/plams REGScan.py -v resultsdir=./dimerH2O.results\n") 
    quit()

# Names of the two output files
results_filename = resultsdir[:-8] + "_REGScan.txt"
csv_filename = resultsdir[:-8] + "_REGScan.csv"

# Load the job associated with that .results folder
amsrkf = AMSJob.load_external(resultsdir)

# Get the number of scan coordinates
nCoords = amsrkf.results.readrkf('PESScan','nPoints')

# Get the number of atoms
mol = amsrkf.results.get_input_molecule()
numAtoms = len(mol)

# Get the scan coordinates and their values
coordValues = amsrkf.results.readrkf('PESScan','PESCoords')
nbCoord=len(coordValues)

# Get the converged bond energies
beValues = amsrkf.results.readrkf('PESScan','PES')

# Create empty lists to store IQA descriptors
iqaKin_matrix = []
iqaeN_matrix = []
iqaeeC_matrix = []
iqaeeX_matrix = []
iqaeeDisp_matrix = []
iqaeeTotal_matrix = []
iqaCTotal_matrix = []
iqaTotal_matrix = []
totalEnergyValues = []

# Read IQA data (and coordinate values + converged energies) 
# A loop over all points in the scan - Some data are unused so far but can be needed later...

for i in range(nCoords):
    filename='PESPoint({:d})'.format(i+1)  
    iqaKin_point = amsrkf.results.readrkf('Properties','IQA kinetic energy',file=filename)            
    iqaKin_matrix.append(iqaKin_point)
    iqaeN_point = amsrkf.results.readrkf('Properties','IQA electron-nucleus',file=filename)
    iqaeN_matrix.append(iqaeN_point)
    iqaeeC_point = amsrkf.results.readrkf('Properties','IQA e-e Coulomb',file=filename)
    iqaeeC_matrix.append(iqaeeC_point)
    iqaeeX_point = amsrkf.results.readrkf('Properties','IQA e-e exchange',file=filename)
    iqaeeX_matrix.append(extract_off_diag(iqaeeX_point,numAtoms))
    iqaeeDisp_point = amsrkf.results.readrkf('Properties','IQA pairs disp',file=filename)
    iqaeeDisp_matrix.append(iqaeeDisp_point)
    iqaeeTotal_point = amsrkf.results.readrkf('Properties','IQA e-e total',file=filename)
    iqaeeTotal_matrix.append(iqaeeTotal_point)
    iqaCTotal_point = amsrkf.results.readrkf('Properties','IQA Coulomb total',file=filename)
    iqaCTotal_matrix.append(extract_off_diag(iqaCTotal_point,numAtoms))
    iqaTotal_point = amsrkf.results.readrkf('Properties','IQA atom-atom total',file=filename) # Diagonal : intra terms
    iqaTotal_matrix.append(extract_diag(iqaTotal_point,numAtoms))
    totalEnergyValue_point = amsrkf.results.readrkf('Total Energy','Total energy',file=filename)
    totalEnergyValues.append(totalEnergyValue_point)

# End Reading IQA DATA

# Convert lists to np.arrays
iqaKin_matrix = np.array(iqaKin_matrix)
iqaeN_matrix = np.array(iqaeN_matrix)
iqaeeC_matrix = np.array(iqaeeC_matrix)
iqaeeX_matrix = np.array(iqaeeX_matrix)
iqaeeDisp_matrix = np.array(iqaeeDisp_matrix)
iqaeeTotal_matrix = np.array(iqaeeTotal_matrix)
iqaCTotal_matrix = np.array(iqaCTotal_matrix)
iqaTotal_matrix = np.array(iqaTotal_matrix)
beValues = np.array(beValues)
coordValues = np.array(coordValues)
totalEnergyValues = np.array(totalEnergyValues)

# Find the local maxima and minima for segmentation
maxima_idx = argrelextrema(beValues, np.greater)
minima_idx = argrelextrema(beValues, np.less)

# Combine the indices and sort them
extrema_idx = np.sort(np.column_stack((maxima_idx,minima_idx)))

# Add the endpoints of the scan as extrema
extrema_idx = np.insert(extrema_idx, 0, 0)
extrema_idx = np.append(extrema_idx, len(coordValues) - 1)

with open(results_filename, 'w', encoding='utf-8') as f:
    f.write(f"****************************************************************\n")
    f.write(f"REG ANALYSIS ALONG A PATH COORDINATE (1D PES Scan) - version 1.2\n")
    f.write(f"****************************************************************\n")
    f.write(f"Written by L. Joubert and V. Tognetti - University of Rouen\n")
    f.write(f"Original reference for REG Analysis: Thacker, J.C.R., Popelier, P.L.A.\n")
    f.write(f"The ANANKE relative energy gradient (REG) method to automate IQA analysis\n")
    f.write(f"over configurational change. Theor Chem Acc 136, 86 (2017).\n\n")
    f.write(f"IQA terms: Eintra (total intra-atomic energy)\n")
    f.write(f"           VCl (total coulombic intra- or inter-atomic energy)\n")
    f.write(f"           VX (intra or inter-atomic exchange energy)\n\n")
    f.write(f"R2: coefficient of determination / r: Pearson coefficient\n")
    f.write(f"m(REG): relative energy gradient (slope)\n\n")

    # main loop for each segment
    for i in range(len(extrema_idx) - 1):
        start_idx = extrema_idx[i]
        end_idx = extrema_idx[i+1]
        f.write(f"Segment {i+1}\n")
        f.write(f"Path coordinate range: {coordValues[start_idx]:.4f} -> {coordValues[end_idx]:.4f}\n") 
        nbPoints=end_idx+1-start_idx
        f.write(f"{nbPoints} points selected for this segment.\n")
        f.write(f"First point: {start_idx+1}\n")
        f.write(f"Last point: {end_idx+1}\n\n")

        x = totalEnergyValues[start_idx:end_idx+1]
        y1, y2, y3 = iqaTotal_matrix[start_idx:end_idx+1,:], iqaCTotal_matrix[start_idx:end_idx+1,:], iqaeeX_matrix[start_idx:end_idx+1,:]

        # Initialization
        results, all_reg, all_r2, all_Pearson = [], [], [], []
        labels_list1, labels_list2,labels_list3 = [], [], []

        # Process linear regressions for the three descriptors
        label='Eintra '
        process_LR(y1, label, all_reg, all_r2, all_Pearson, results, labels_list1, True)
        label='VCl    '
        process_LR(y2, label, all_reg, all_r2, all_Pearson, results, labels_list2, False)
        label='VX     '
        process_LR(y3, label, all_reg, all_r2, all_Pearson, results, labels_list3, False)

        # Sort m(REG) for negative values
        negative_indices = [i for i in range(len(all_reg)) if all_reg[i] < 0]
        negative_sorted_indices = sorted(negative_indices, key=lambda k: all_reg[k])

        # Sort m(REG) for positive values
        positive_indices = [i for i in range(len(all_reg)) if all_reg[i] >= 0]
        positive_sorted_indices = sorted(positive_indices, key=lambda k: all_reg[k], reverse=True)

        # Reorganize lines for results
        sorted_results = [results[i] for i in positive_sorted_indices]
        header = ['IQA term', 'Atom(s)', 'R2', 'r', 'm(REG)']
        formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results]

        # Write positive results to file
        f.write('\n'.join(['{:<10}  {:^10}  {:>10}  {:>10}  {:>10}'.format(*row) for row in formatted_results]) + '\n\n')

        sorted_results = [results[i] for i in negative_sorted_indices]
        formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results]

        # Write negative results to file
        f.write('\n'.join(['{:<10}  {:^10}  {:>10}  {:>10}  {:>10}'.format(*row) for row in formatted_results]) + '\n\n')

with open(csv_filename, "w", newline="", encoding='utf-8') as file_csv:
    writer = csv.writer(file_csv)
    # Write column headers
    writer.writerow(['path coordinate'] + ['Bond energy', 'Total energy'] + labels_list1 + labels_list2 + labels_list3)

    # Write data for each row
    for i in range(len(coordValues)):
        line = [f"{coordValues[i]:.3f}"] + [f"{beValues[i]:.6f}", f"{totalEnergyValues[i]:.6f}"] + \
        [f"{value:.6f}" for value in iqaTotal_matrix[i,:]] + \
        [f"{value:.6f}" for value in iqaCTotal_matrix[i,:]] + \
        [f"{value:.6f}" for value in iqaeeX_matrix[i,:]] 
        writer.writerow(line)