#!/usr/bin/env plams import csv import numpy as np from scipy.signal import argrelextrema from scipy.stats import linregress # extract_diag extracts diagonal elements from upper triangular matrices (IQA2Save) # extract_off_diag extracts off-diagonal elements from upper triangular matrices (IQA2Save) # get_indices retrieves original indices (i,j) from off-diagonal elements vector def extract_diag(Vec, n): diag = [] index = 0 for i in range(n): diag.append(Vec[index]) index += n - i return diag def extract_off_diag(vec, n): non_diagonal_elements = [] k = 0 m = n for i in range(int(n * (n + 1) / 2)): if k != 0: non_diagonal_elements.append(vec[i]) k += 1 if k == m: k = 0 m -= 1 return non_diagonal_elements def get_indices(n, k): i = 1 j = 2 for m in range(k): if j < n: j += 1 else: i += 1 j = i + 1 return i - 1, j - 1 # Function process_LR to perform linear regression for each IQA descriptor (so far: EiqaTotal, ECTot and EeeX) # intra is a boolean : true (diagonal elements in array), false (off diagonal elements in array) def process_LR(array, label, all_reg, all_r2, all_Pearson, results, labels_list, intra): for j in range(array.shape[1]): if intra: label2 = f"{mol.symbols[j]}{j+1}" else: k, l = get_indices(numAtoms, j) if k != l: label2 = f"{mol.symbols[k]}{k+1}{'-'}{''.join([mol.symbols[l], str(l+1)])}" else: continue slope, intercept, r_value, _, _ = linregress(x, array[:, j]) r2 = r_value**2 coeff = slope pearson = r_value result_row = [label, label2, f"{r2:>10.3f}", f"{pearson:>10.3f}", f"{coeff:>10.3f}"] results.append(result_row) labels_list.append(label + label2) all_reg.append(coeff) all_r2.append(r2) all_Pearson.append(pearson) # Make sure this script was called correctly if "resultsdir" not in globals(): print("\nUSAGE: $AMSBIN/plams REGScan.py -v resultsdir=[path to .results folder]") print("For instance: $AMSBIN/plams REGScan.py -v resultsdir=./dimerH2O.results\n") quit() # Names of the two output files results_filename = resultsdir[:-8] + "_REGScan.txt" csv_filename = resultsdir[:-8] + "_REGScan.csv" # Load the job associated with that .results folder amsrkf = AMSJob.load_external(resultsdir) # Get the number of scan coordinates nCoords = amsrkf.results.readrkf("PESScan", "nPoints") # Get the number of atoms mol = amsrkf.results.get_input_molecule() numAtoms = len(mol) # Get the scan coordinates and their values coordValues = amsrkf.results.readrkf("PESScan", "PESCoords") nbCoord = len(coordValues) # Get the converged bond energies beValues = amsrkf.results.readrkf("PESScan", "PES") # Create empty lists to store IQA descriptors iqaKin_matrix = [] iqaeN_matrix = [] iqaeeC_matrix = [] iqaeeX_matrix = [] iqaeeDisp_matrix = [] iqaeeTotal_matrix = [] iqaCTotal_matrix = [] iqaTotal_matrix = [] totalEnergyValues = [] # Read IQA data (and coordinate values + converged energies) # A loop over all points in the scan - Some data are unused so far but can be needed later... for i in range(nCoords): filename = "PESPoint({:d})".format(i + 1) iqaKin_point = amsrkf.results.readrkf("Properties", "IQA kinetic energy", file=filename) iqaKin_matrix.append(iqaKin_point) iqaeN_point = amsrkf.results.readrkf("Properties", "IQA electron-nucleus", file=filename) iqaeN_matrix.append(iqaeN_point) iqaeeC_point = amsrkf.results.readrkf("Properties", "IQA e-e Coulomb", file=filename) iqaeeC_matrix.append(iqaeeC_point) iqaeeX_point = amsrkf.results.readrkf("Properties", "IQA e-e exchange", file=filename) iqaeeX_matrix.append(extract_off_diag(iqaeeX_point, numAtoms)) iqaeeDisp_point = amsrkf.results.readrkf("Properties", "IQA pairs disp", file=filename) iqaeeDisp_matrix.append(iqaeeDisp_point) iqaeeTotal_point = amsrkf.results.readrkf("Properties", "IQA e-e total", file=filename) iqaeeTotal_matrix.append(iqaeeTotal_point) iqaCTotal_point = amsrkf.results.readrkf("Properties", "IQA Coulomb total", file=filename) iqaCTotal_matrix.append(extract_off_diag(iqaCTotal_point, numAtoms)) iqaTotal_point = amsrkf.results.readrkf( "Properties", "IQA atom-atom total", file=filename ) # Diagonal : intra terms iqaTotal_matrix.append(extract_diag(iqaTotal_point, numAtoms)) totalEnergyValue_point = amsrkf.results.readrkf("Total Energy", "Total energy", file=filename) totalEnergyValues.append(totalEnergyValue_point) # End Reading IQA DATA # Convert lists to np.arrays iqaKin_matrix = np.array(iqaKin_matrix) iqaeN_matrix = np.array(iqaeN_matrix) iqaeeC_matrix = np.array(iqaeeC_matrix) iqaeeX_matrix = np.array(iqaeeX_matrix) iqaeeDisp_matrix = np.array(iqaeeDisp_matrix) iqaeeTotal_matrix = np.array(iqaeeTotal_matrix) iqaCTotal_matrix = np.array(iqaCTotal_matrix) iqaTotal_matrix = np.array(iqaTotal_matrix) beValues = np.array(beValues) coordValues = np.array(coordValues) totalEnergyValues = np.array(totalEnergyValues) # Find the local maxima and minima for segmentation maxima_idx = argrelextrema(beValues, np.greater) minima_idx = argrelextrema(beValues, np.less) # Combine the indices and sort them extrema_idx = np.sort(np.column_stack((maxima_idx, minima_idx))) # Add the endpoints of the scan as extrema extrema_idx = np.insert(extrema_idx, 0, 0) extrema_idx = np.append(extrema_idx, len(coordValues) - 1) with open(results_filename, "w", encoding="utf-8") as f: f.write("****************************************************************\n") f.write("REG ANALYSIS ALONG A PATH COORDINATE (1D PES Scan) - version 1.2\n") f.write("****************************************************************\n") f.write("Written by L. Joubert and V. Tognetti - University of Rouen\n") f.write("Original reference for REG Analysis: Thacker, J.C.R., Popelier, P.L.A.\n") f.write("The ANANKE relative energy gradient (REG) method to automate IQA analysis\n") f.write("over configurational change. Theor Chem Acc 136, 86 (2017).\n\n") f.write("IQA terms: Eintra (total intra-atomic energy)\n") f.write(" VCl (total coulombic intra- or inter-atomic energy)\n") f.write(" VX (intra or inter-atomic exchange energy)\n\n") f.write("R2: coefficient of determination / r: Pearson coefficient\n") f.write("m(REG): relative energy gradient (slope)\n\n") # main loop for each segment for i in range(len(extrema_idx) - 1): start_idx = extrema_idx[i] end_idx = extrema_idx[i + 1] f.write(f"Segment {i+1}\n") f.write(f"Path coordinate range: {coordValues[start_idx]:.4f} -> {coordValues[end_idx]:.4f}\n") nbPoints = end_idx + 1 - start_idx f.write(f"{nbPoints} points selected for this segment.\n") f.write(f"First point: {start_idx+1}\n") f.write(f"Last point: {end_idx+1}\n\n") x = totalEnergyValues[start_idx : end_idx + 1] y1, y2, y3 = ( iqaTotal_matrix[start_idx : end_idx + 1, :], iqaCTotal_matrix[start_idx : end_idx + 1, :], iqaeeX_matrix[start_idx : end_idx + 1, :], ) # Initialization results, all_reg, all_r2, all_Pearson = [], [], [], [] labels_list1, labels_list2, labels_list3 = [], [], [] # Process linear regressions for the three descriptors label = "Eintra " process_LR(y1, label, all_reg, all_r2, all_Pearson, results, labels_list1, True) label = "VCl " process_LR(y2, label, all_reg, all_r2, all_Pearson, results, labels_list2, False) label = "VX " process_LR(y3, label, all_reg, all_r2, all_Pearson, results, labels_list3, False) # Sort m(REG) for negative values negative_indices = [i for i in range(len(all_reg)) if all_reg[i] < 0] negative_sorted_indices = sorted(negative_indices, key=lambda k: all_reg[k]) # Sort m(REG) for positive values positive_indices = [i for i in range(len(all_reg)) if all_reg[i] >= 0] positive_sorted_indices = sorted(positive_indices, key=lambda k: all_reg[k], reverse=True) # Reorganize lines for results sorted_results = [results[i] for i in positive_sorted_indices] header = ["IQA term", "Atom(s)", "R2", "r", "m(REG)"] formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results] # Write positive results to file f.write( "\n".join(["{:<10} {:^10} {:>10} {:>10} {:>10}".format(*row) for row in formatted_results]) + "\n\n" ) sorted_results = [results[i] for i in negative_sorted_indices] formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results] # Write negative results to file f.write( "\n".join(["{:<10} {:^10} {:>10} {:>10} {:>10}".format(*row) for row in formatted_results]) + "\n\n" ) with open(csv_filename, "w", newline="", encoding="utf-8") as file_csv: writer = csv.writer(file_csv) # Write column headers writer.writerow(["path coordinate"] + ["Bond energy", "Total energy"] + labels_list1 + labels_list2 + labels_list3) # Write data for each row for i in range(len(coordValues)): line = ( [f"{coordValues[i]:.3f}"] + [f"{beValues[i]:.6f}", f"{totalEnergyValues[i]:.6f}"] + [f"{value:.6f}" for value in iqaTotal_matrix[i, :]] + [f"{value:.6f}" for value in iqaCTotal_matrix[i, :]] + [f"{value:.6f}" for value in iqaeeX_matrix[i, :]] ) writer.writerow(line)