Source code for scm.plams.recipes.adfcosmorsconformers

from scm.plams.interfaces.adfsuite.ams import AMSJob
from scm.plams.interfaces.adfsuite.densf import DensfJob
from scm.plams.mol.molecule import Molecule
from scm.plams.core.basejob import MultiJob
from scm.plams.core.results import Results
from scm.plams.core.settings import Settings
from scm.plams.core.functions import log
from scm.plams.recipes.adfcosmorscompound import ADFCOSMORSCompoundJob
from scm.conformers import ConformersJob
from scm.base import KFFile, Units
from typing import List, Optional, Union, Dict, Tuple
from collections import OrderedDict
import os, re
from pathlib import Path
import numpy as np


[docs]class ADFCOSMORSConfFilter: """ This class allows the user to specify criteria on which to filter sets of conformers. Args: max_num_confs : The maximum number of conformers to bring forward to the next step. These are automatically ordered by energy, so the first *n* lowest energy structures are carried carried forward to the next step. max_energy_range : The maximum allowable difference (in kcal/mol) between the lowest energy conformer and the highest energy conformer that will be carried forward. For example, for a max_energy_range of 2 kcal/mol and a lowest energy structure of 1 kcal/mol, any structure with an energy above 3 (1+2) kcal/mol will be filtered out. """ def __init__(self, max_num_confs: Optional[int] = None, max_energy_range: Optional[float] = None): self.max_num_confs = max_num_confs self.max_energy_range = max_energy_range
[docs]class ADFCOSMORSConfResults(Results): pass
[docs]class ADFCOSMORSConfJob(MultiJob): """ Multi-step conformer workflow for generating COSMO-RS `.coskf` files: .. code-block:: text job_0: initial conformer generation (default RDKit generator or user-provided conf_gen) |-- apply first_filter (if provided) v job_i: optional refinement stages from additional (for each (Settings, Filter) tuple, e.g. DFTB) |-- apply its filter after each stage v adf_gas_job: gas-phase ADF conformer job |-- if adf_singlepoint=True: single-point only v final_filter_job: apply final_filter (if provided) v Branch on hbc_from_MESP: |-- False: replay_job -> COSMO results (.coskf) `-- True: per conformer job_cosmo + job_densf `-> COSMO + hydrogen bond center (.coskf) Args: molecule: Input PLAMS molecule. conf_gen: User-provided job for initial conformer generation. initial_conformers: Number of conformers generated by the default `conf_gen` (RDKit). Defaults to `500`. first_filter: Filter applied after initial conformer generation. additional: Optional extra conformer-refinement stages and their filters (e.g., with a fast engine such as DFTB). final_filter: Filter applied after gas-phase ADF conformer generation. adf_singlepoint: If `True`, run gas-phase ADF conformer jobs without optimization. Defaults to `False`. hbc_from_MESP: If `True`, determine the hydrogen-bond center (hbc) via DENSF analysis (COSMOSAC-DHB-MESP). Defaults to `False`. coskf_dir: Output directory for generated `.coskf` files. coskf_name: Base filename for conformers (e.g., `{coskf_name}_{i}.coskf`). Default to `conformer`. mol_info: Extra metadata written to "Compound Data" (e.g. `IUPAC`, `CAS`, `SMILES` and `Other Name`). name: Optional PLAMS job name. Default to `plams`. """ _result_type = ADFCOSMORSConfResults def __init__( self, molecule: Molecule, conf_gen: Optional[ConformersJob] = None, first_filter: Optional[ADFCOSMORSConfFilter] = None, additional: Optional[List[Tuple[Settings, Optional[ADFCOSMORSConfFilter]]]] = None, final_filter: Optional[ADFCOSMORSConfFilter] = None, adf_singlepoint: bool = False, initial_conformers: int = 500, coskf_dir: Optional[str] = None, coskf_name: Optional[str] = None, mol_info: Optional[Dict[str, Union[float, int, str]]] = None, hbc_from_MESP: bool = False, **kwargs, ): MultiJob.__init__(self, children=OrderedDict(), **kwargs) self.mol = molecule self.mol_info, self.atomic_ion = ADFCOSMORSCompoundJob.get_compound_properties(molecule, mol_info) self.initial_conformers = initial_conformers self.adf_singlepoint = adf_singlepoint self.coskf_dir = coskf_dir self.coskf_name = coskf_name if coskf_name is not None else "conformer" if self.coskf_dir is not None and not os.path.exists(self.coskf_dir): os.makedirs(self.coskf_dir, exist_ok=True) self._has_valid_conf_gen = conf_gen is None or isinstance(conf_gen, ConformersJob) if conf_gen is None or self._has_valid_conf_gen: self.job_0 = conf_gen if conf_gen is not None else self._make_default_confgen_job() else: self.job_0 = self._make_default_confgen_job() self.job_settings = [self.job_0.settings] self.filters = [None, first_filter] if additional is not None: for js, f in additional: self.job_settings.append(js) self.filters.append(f) self.final_filter = final_filter self.filters.append(final_filter) self.hbc_from_MESP = hbc_from_MESP self.children["job_0"] = self.job_0 @staticmethod def _get_compound_properties( mol: Molecule, mol_info: Optional[Dict[str, Union[float, int, str]]] = None ) -> Tuple[Dict[str, Union[float, int, str]], bool]: if mol_info is None: mol_info = dict() mol_info["Molar Mass"] = mol.get_mass() mol_info["Formula"] = mol.get_formula() try: rings = mol.locate_rings() flatten_atoms = [atom for subring in rings for atom in subring] nring = len(set(flatten_atoms)) mol_info["Nring"] = int(nring) except Exception as exc: log(f"[ADFCOSMORSConfJob] Nring can not be located correctly: {exc}") atomic_ion = len(mol.atoms) == 1 return mol_info, atomic_ion
[docs] def new_children(self): # job_1 ... job_N: optional intermediate conformer refinement jobs for job_id in range(1, len(self.job_settings)): key = f"job_{job_id}" if key not in self.children: return {key: self._make_intermediate_job(job_id)} # ADF gas-phase conformer job adf_job_id = len(self.job_settings) if "adf_gas_job" not in self.children: return {"adf_gas_job": self._make_adf_job(adf_job_id)} # Optional final filtering step on ADF conformers if self.final_filter is not None and "final_filter_job" not in self.children: return {"final_filter_job": self._make_filter_job(adf_job_id + 1)} # In the MESP branch, jobs_cosmo/jobs_densf are created in postrun(): # the final number of conformers is only known after all filtering is complete. # In the non-MESP branch, COSMO results for all conformers are generated via one replay job. if not self.hbc_from_MESP and "replay_job" not in self.children: replay_id = adf_job_id + (2 if self.final_filter is not None else 1) return {"replay_job": self._make_replay_job(replay_id)} return None
[docs] def prerun(self): if not self._has_valid_conf_gen: msg = ( "[ADFCOSMORSConfJob] Wrong type for argument conf_gen. Expected ConformersJob instance. " "Using the default conformer generator." ) log(msg) if not self._has_valid_filter_settings(): msg = "[ADFCOSMORSConfJob] Wrong type for filter or settings" log(msg) raise TypeError(msg)
[docs] def postrun(self): self.adf_gas_job = self.children["adf_gas_job"] self.final_adf_job = self._get_final_adf_job() self._iterate_conformers()
def _get_final_adf_job(self): if self.final_filter is not None and "final_filter_job" in self.children: return self.children["final_filter_job"] return self.children["adf_gas_job"] def _make_default_confgen_job(self): sett = Settings() sett.input.AMS.Generator.RDKit.InitialNConformers = self.initial_conformers return ConformersJob(name="job0_conformers_uff", molecule=self.mol, settings=sett) def _make_intermediate_job(self, job_id): settings = self.job_settings[job_id].copy() settings.input.AMS.InputConformersSet = self.children[f"job_{job_id - 1}"].results self._add_filter(settings, job_id) return ConformersJob(name=f"job{job_id}_additional", settings=settings) def _make_adf_job(self, job_id): sett = ADFCOSMORSCompoundJob.adf_settings(False, atomic_ion=self.atomic_ion) if not self.adf_singlepoint: sett.input.AMS.Task = "Optimize" sett.input.AMS.GeometryOptimization.UseAMSWorker = "False" # sett.input.ams.GeometryOptimization.ConvergenceQuality = 'Good' else: sett.input.AMS.Task = "Score" sett.input.AMS.InputConformersSet = self.children[f"job_{job_id - 1}"].results if self.hbc_from_MESP: sett.input.AMS.GeometryOptimization.Keep = "All" sett.input.AMS.Output.KeepWorkDir = "Yes" self._add_filter(sett, job_id) return ConformersJob(name=f"job{job_id}_adf", settings=sett) def _make_filter_job(self, job_id): sett = Settings() sett.input.AMS.Task = "Filter" sett.input.AMS.InputConformersSet = self.children["adf_gas_job"].results self._add_filter(sett, job_id) return ConformersJob(name=f"job{job_id}_adf_filter", settings=sett) def _iterate_conformers(self): if self.hbc_from_MESP: self._iterate_conformers_mesp() else: self._iterate_conformers_replay() def _iterate_conformers_replay(self): if self.coskf_dir is None: self.coskf_dir = self.final_adf_job.path self.replay_job = self.children["replay_job"] self.replay_job.results.wait() conformer_E = self.final_adf_job.results.get_energies("Ha") for i, conf_E in enumerate(conformer_E): frame_name = f"Frame{i+1}" cosmo_section = self.replay_job.results.read_rkf_section("COSMO", frame_name) cosmo_section["Gas Phase Bond Energy"] = conf_E coskfname = f"{self.coskf_name}_{i}.coskf" fullname = Path(self.coskf_dir) / coskfname fullname.unlink(missing_ok=True) coskf = KFFile(str(fullname), autosave=False) for key, val in cosmo_section.items(): coskf.write("COSMO", key, val) for key, value in self.mol_info.items(): coskf.write("Compound Data", key, value) coskf.save() def _iterate_conformers_mesp(self): if self.coskf_dir is None: self.coskf_dir = self.final_adf_job.path # prepare gas phase rkf files for per-conformer COSMO + DENSF reruns rkf_path = Path(self.adf_gas_job.results.rkfpath()).parent gas_rkf_path = list(rkf_path.glob("go*")) gas_rkf_path = sorted(gas_rkf_path, key=lambda p: int(re.search(r"go(\d+)", p.as_posix()).group(1))) au2ha = Units.conversion_factor("au", "hartree") gas_rkf_energy = [] for rkf_path in gas_rkf_path: rkf = KFFile(str(rkf_path / "adf.rkf")) egas = rkf.read_real("AMSResults", "Energy") * au2ha gas_rkf_energy.append(egas) conformer_E = self.final_adf_job.results.get_energies("Ha") num_conformer = len(conformer_E) self.jobs_cosmo = [None] * num_conformer self.jobs_densf = [None] * num_conformer conformer_names = [f"{self.coskf_name}_{i}.coskf" for i in range(num_conformer)] used_rkf_idx = set() for i, (conf_E, coskfname) in enumerate(zip(conformer_E, conformer_names)): Path(self.coskf_dir, coskfname).unlink(missing_ok=True) matches = [ idx for idx, ref_E in enumerate(gas_rkf_energy) if idx not in used_rkf_idx and np.isclose(conf_E, ref_E, rtol=1e-12, atol=1e-8) ] if not matches: msg = f"[ADFCOSMORSConfJob] No match found for conf_E={conf_E:.12f}" log(msg) raise FloatingPointError(msg) if len(matches) > 1: rkf_idx = min(matches, key=lambda j: abs(conf_E - gas_rkf_energy[j])) msg = ( f"[ADFCOSMORSConfJob] Multiple matches for conf_E={conf_E:.12f}, " f"choose idx={rkf_idx} with min |dE|={abs(conf_E - gas_rkf_energy[rkf_idx]):.3e}" ) log(msg) else: rkf_idx = matches[0] used_rkf_idx.add(rkf_idx) log(f"[ADFCOSMORSConfJob] matched gas_energy index {rkf_idx} for conformer {i}") self.jobs_cosmo[i] = self._make_cosmo_job(gas_rkf_path[rkf_idx], i) self.jobs_cosmo[i].run() self.jobs_densf[i] = self._make_densf_job(gas_rkf_path[rkf_idx], i) self.jobs_densf[i].run() self._make_conformer_coskf(i, coskfname) def _make_conformer_coskf(self, i, name): self.jobs_cosmo[i].results.wait() t41 = None if self.hbc_from_MESP: self.jobs_densf[i].results.wait() t41 = self.jobs_densf[i].results.kfpath() ADFCOSMORSCompoundJob.convert_to_coskf( rkf_path=self.jobs_cosmo[i].results.rkfpath(file="adf"), coskf_name=name, plams_dir=self.jobs_cosmo[i].path, coskf_dir=self.coskf_dir, mol_info=self.mol_info, densf_path=t41, ) def _make_replay_job(self, job_id): sett = ADFCOSMORSCompoundJob.adf_settings( True, elements=list(set(at.symbol for at in self.mol)), atomic_ion=self.atomic_ion ) sett.input.AMS.Task = "Replay" self._get_final_adf_job().results.wait() sett.input.AMS.Replay.File = self._get_final_adf_job().results["conformers.rkf"] sett.input.AMS.Replay.StoreAllResultFiles = "True" return AMSJob(name=f"job{job_id}_replay", settings=sett) def _make_cosmo_job(self, rkf_path, i): sett = ADFCOSMORSCompoundJob.adf_settings( True, elements=list(set(at.symbol for at in self.mol)), atomic_ion=self.atomic_ion ) sett.input.AMS.Task = "SinglePoint" sett.input.AMS.LoadSystem.File = str(rkf_path / "ams.rkf") sett.input.AMS.EngineRestart = str(rkf_path / "adf.rkf") return AMSJob(name=f"job_cosmo_c{i}", settings=sett) def _make_densf_job(self, rkf_path, i): densf_job = DensfJob(settings=ADFCOSMORSCompoundJob.densf_settings()) densf_job.inputjob = str(rkf_path / "adf.rkf") densf_job.name = f"job_densf_c{i}" return densf_job def _add_filter(self, sett, job_id): if job_id < len(self.filters): filt = self.filters[job_id] else: filt = self.final_filter if filt is not None: if filt.max_num_confs is not None: sett.input.AMS.InputMaxConfs = filt.max_num_confs if filt.max_energy_range is not None: sett.input.AMS.InputMaxEnergy = filt.max_energy_range def _has_valid_filter_settings(self): for js, f in zip(self.job_settings, self.filters): if js is not None and not isinstance(js, Settings): log(f"[ADFCOSMORSConfJob] Wrong type for job_settings : {js}") return False if f is not None and not isinstance(f, ADFCOSMORSConfFilter): log(f"[ADFCOSMORSConfJob] Wrong type for job_settings : {f}") return False return True