import os import numpy as np import csv from scipy.signal import argrelextrema from scipy.stats import pearsonr from scipy.stats import linregress # extract_diag extracts diagonal elements from upper triangular matrices (IQA2Save) # extract_off_diag extracts off-diagonal elements from upper triangular matrices (IQA2Save) # get_indices retrieves original indices (i,j) from off-diagonal elements vector def extract_diag(Vec, n): diag = [] index = 0 for i in range(n): diag.append(Vec[index]) index += (n-i) return diag def extract_off_diag(vec, n): non_diagonal_elements = [] k=0 m = n for i in range(int(n*(n+1)/2)): if k !=0: non_diagonal_elements.append(vec[i]) k += 1 if k == m: k = 0 m -= 1 return non_diagonal_elements def get_indices(n, k): i=1 j=2 for m in range(k): if j10.3f}", f"{pearson:>10.3f}",f"{coeff:>10.3f}"] results.append(result_row) labels_list.append(label+label2) all_reg.append(coeff) all_r2.append(r2) all_Pearson.append(pearson) # Make sure this script was called correctly if not 'resultsdir' in globals(): print("\nUSAGE: $AMSBIN/plams REGScan.py -v resultsdir=[path to .results folder]") print("For instance: $AMSBIN/plams REGScan.py -v resultsdir=./dimerH2O.results\n") quit() # Names of the two output files results_filename = resultsdir[:-8] + "_REGScan.txt" csv_filename = resultsdir[:-8] + "_REGScan.csv" # Load the job associated with that .results folder amsrkf = AMSJob.load_external(resultsdir) # Get the number of scan coordinates nCoords = amsrkf.results.readrkf('PESScan','nPoints') # Get the number of atoms mol = amsrkf.results.get_input_molecule() numAtoms = len(mol) # Get the scan coordinates and their values coordValues = amsrkf.results.readrkf('PESScan','PESCoords') nbCoord=len(coordValues) # Get the converged bond energies beValues = amsrkf.results.readrkf('PESScan','PES') # Create empty lists to store IQA descriptors iqaKin_matrix = [] iqaeN_matrix = [] iqaeeC_matrix = [] iqaeeX_matrix = [] iqaeeDisp_matrix = [] iqaeeTotal_matrix = [] iqaCTotal_matrix = [] iqaTotal_matrix = [] totalEnergyValues = [] # Read IQA data (and coordinate values + converged energies) # A loop over all points in the scan - Some data are unused so far but can be needed later... for i in range(nCoords): filename='PESPoint({:d})'.format(i+1) iqaKin_point = amsrkf.results.readrkf('Properties','IQA kinetic energy',file=filename) iqaKin_matrix.append(iqaKin_point) iqaeN_point = amsrkf.results.readrkf('Properties','IQA electron-nucleus',file=filename) iqaeN_matrix.append(iqaeN_point) iqaeeC_point = amsrkf.results.readrkf('Properties','IQA e-e Coulomb',file=filename) iqaeeC_matrix.append(iqaeeC_point) iqaeeX_point = amsrkf.results.readrkf('Properties','IQA e-e exchange',file=filename) iqaeeX_matrix.append(extract_off_diag(iqaeeX_point,numAtoms)) iqaeeDisp_point = amsrkf.results.readrkf('Properties','IQA pairs disp',file=filename) iqaeeDisp_matrix.append(iqaeeDisp_point) iqaeeTotal_point = amsrkf.results.readrkf('Properties','IQA e-e total',file=filename) iqaeeTotal_matrix.append(iqaeeTotal_point) iqaCTotal_point = amsrkf.results.readrkf('Properties','IQA Coulomb total',file=filename) iqaCTotal_matrix.append(extract_off_diag(iqaCTotal_point,numAtoms)) iqaTotal_point = amsrkf.results.readrkf('Properties','IQA atom-atom total',file=filename) # Diagonal : intra terms iqaTotal_matrix.append(extract_diag(iqaTotal_point,numAtoms)) totalEnergyValue_point = amsrkf.results.readrkf('Total Energy','Total energy',file=filename) totalEnergyValues.append(totalEnergyValue_point) # End Reading IQA DATA # Convert lists to np.arrays iqaKin_matrix = np.array(iqaKin_matrix) iqaeN_matrix = np.array(iqaeN_matrix) iqaeeC_matrix = np.array(iqaeeC_matrix) iqaeeX_matrix = np.array(iqaeeX_matrix) iqaeeDisp_matrix = np.array(iqaeeDisp_matrix) iqaeeTotal_matrix = np.array(iqaeeTotal_matrix) iqaCTotal_matrix = np.array(iqaCTotal_matrix) iqaTotal_matrix = np.array(iqaTotal_matrix) beValues = np.array(beValues) coordValues = np.array(coordValues) totalEnergyValues = np.array(totalEnergyValues) # Find the local maxima and minima for segmentation maxima_idx = argrelextrema(beValues, np.greater) minima_idx = argrelextrema(beValues, np.less) # Combine the indices and sort them extrema_idx = np.sort(np.column_stack((maxima_idx,minima_idx))) # Add the endpoints of the scan as extrema extrema_idx = np.insert(extrema_idx, 0, 0) extrema_idx = np.append(extrema_idx, len(coordValues) - 1) with open(results_filename, 'w', encoding='utf-8') as f: f.write(f"****************************************************************\n") f.write(f"REG ANALYSIS ALONG A PATH COORDINATE (1D PES Scan) - version 1.2\n") f.write(f"****************************************************************\n") f.write(f"Written by L. Joubert and V. Tognetti - University of Rouen\n") f.write(f"Original reference for REG Analysis: Thacker, J.C.R., Popelier, P.L.A.\n") f.write(f"The ANANKE relative energy gradient (REG) method to automate IQA analysis\n") f.write(f"over configurational change. Theor Chem Acc 136, 86 (2017).\n\n") f.write(f"IQA terms: Eintra (total intra-atomic energy)\n") f.write(f" VCl (total coulombic intra- or inter-atomic energy)\n") f.write(f" VX (intra or inter-atomic exchange energy)\n\n") f.write(f"R2: coefficient of determination / r: Pearson coefficient\n") f.write(f"m(REG): relative energy gradient (slope)\n\n") # main loop for each segment for i in range(len(extrema_idx) - 1): start_idx = extrema_idx[i] end_idx = extrema_idx[i+1] f.write(f"Segment {i+1}\n") f.write(f"Path coordinate range: {coordValues[start_idx]:.4f} -> {coordValues[end_idx]:.4f}\n") nbPoints=end_idx+1-start_idx f.write(f"{nbPoints} points selected for this segment.\n") f.write(f"First point: {start_idx+1}\n") f.write(f"Last point: {end_idx+1}\n\n") x = totalEnergyValues[start_idx:end_idx+1] y1, y2, y3 = iqaTotal_matrix[start_idx:end_idx+1,:], iqaCTotal_matrix[start_idx:end_idx+1,:], iqaeeX_matrix[start_idx:end_idx+1,:] # Initialization results, all_reg, all_r2, all_Pearson = [], [], [], [] labels_list1, labels_list2,labels_list3 = [], [], [] # Process linear regressions for the three descriptors label='Eintra ' process_LR(y1, label, all_reg, all_r2, all_Pearson, results, labels_list1, True) label='VCl ' process_LR(y2, label, all_reg, all_r2, all_Pearson, results, labels_list2, False) label='VX ' process_LR(y3, label, all_reg, all_r2, all_Pearson, results, labels_list3, False) # Sort m(REG) for negative values negative_indices = [i for i in range(len(all_reg)) if all_reg[i] < 0] negative_sorted_indices = sorted(negative_indices, key=lambda k: all_reg[k]) # Sort m(REG) for positive values positive_indices = [i for i in range(len(all_reg)) if all_reg[i] >= 0] positive_sorted_indices = sorted(positive_indices, key=lambda k: all_reg[k], reverse=True) # Reorganize lines for results sorted_results = [results[i] for i in positive_sorted_indices] header = ['IQA term', 'Atom(s)', 'R2', 'r', 'm(REG)'] formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results] # Write positive results to file f.write('\n'.join(['{:<10} {:^10} {:>10} {:>10} {:>10}'.format(*row) for row in formatted_results]) + '\n\n') sorted_results = [results[i] for i in negative_sorted_indices] formatted_results = [header] + [[r[header.index(col)] for col in header] for r in sorted_results] # Write negative results to file f.write('\n'.join(['{:<10} {:^10} {:>10} {:>10} {:>10}'.format(*row) for row in formatted_results]) + '\n\n') with open(csv_filename, "w", newline="", encoding='utf-8') as file_csv: writer = csv.writer(file_csv) # Write column headers writer.writerow(['path coordinate'] + ['Bond energy', 'Total energy'] + labels_list1 + labels_list2 + labels_list3) # Write data for each row for i in range(len(coordValues)): line = [f"{coordValues[i]:.3f}"] + [f"{beValues[i]:.6f}", f"{totalEnergyValues[i]:.6f}"] + \ [f"{value:.6f}" for value in iqaTotal_matrix[i,:]] + \ [f"{value:.6f}" for value in iqaCTotal_matrix[i,:]] + \ [f"{value:.6f}" for value in iqaeeX_matrix[i,:]] writer.writerow(line)