Source code for scm.oledtools.utils

import math
from dataclasses import dataclass
from typing import TYPE_CHECKING, List, Optional, Tuple

import numpy as np
from numpy.typing import NDArray

if TYPE_CHECKING:
    import h5py

    from scm.libbase import UnifiedChemicalSystem as ChemicalSystem


def has_bonds(mol: "ChemicalSystem") -> bool:
    return len(mol.bonds) > 0


def has_ff_atom_types(mol: "ChemicalSystem") -> bool:
    return all(atom.forcefield is not None and atom.forcefield.type is not None for atom in mol.atoms)


def has_ff_charges(mol: "ChemicalSystem") -> bool:
    return all(atom.forcefield is not None and atom.forcefield.charge is not None for atom in mol.atoms)


def has_complete_ff_input(mol: "ChemicalSystem") -> bool:
    return has_bonds(mol) and has_ff_atom_types(mol) and has_ff_charges(mol)


def guess_spin_settings(mol: "ChemicalSystem") -> Tuple[bool, int]:
    num_electrons = sum(atom.Z for atom in mol.atoms)
    num_electrons -= round(mol.charge)
    spinpol = num_electrons % 2
    unrestricted = num_electrons % 2 == 1
    return unrestricted, spinpol


def reject_outliers(data: NDArray, m: float = 16):
    d = np.abs(data - np.median(data))
    mdev = 0.6745 * np.median(d)
    s = d / mdev if mdev != 0.0 else 0.0
    # print("Number of discarded outliers:", data.size-data[s<m].size)
    # print("Discarded outliers:", data[s>=m])
    return data[s < m]


@dataclass
class OLEDPropertiesSpeciesSummmary:

    species_name: str
    species_smiles: str
    species_nummol: int

    # Ground state properties:

    IP_mean: float
    IP_sigma: float

    EA_mean: float
    EA_sigma: float

    Dmag_mean: float
    Dmag_sigma: float

    Dorient_mean: float
    Dorient_mean_uncertainty: float

    # Exciton properties:

    S1_mean: Optional[float] = None
    S1_sigma: Optional[float] = None

    T1_mean: Optional[float] = None
    T1_sigma: Optional[float] = None

    TDMmag_mean: Optional[float] = None
    TDMmag_sigma: Optional[float] = None

    TDMorient_mean: Optional[float] = None
    TDMorient_mean_uncertainty: Optional[float] = None

    def __str__(self) -> str:
        ret = f"""\
Summary for species: {self.species_name}
---------------------{"-" * len(self.species_name)}
SMILES: {self.species_smiles}
Number of molecules: {self.species_nummol}
Energies:
    IP: Ionization potential.
        IP = {self.IP_mean:.2f} +- {self.IP_sigma:.3f} eV
    EA: Electron affinity.
        EA = {self.EA_mean:.2f} +- {self.EA_sigma:.3f} eV
Static dipole moments:
    Magnitude:
        |d| = {self.Dmag_mean:.2f} +- {self.Dmag_sigma:.2f} Debye
    Orientation: theta = angle(d, e_z)
        <cos(theta)> = {self.Dorient_mean:.3f}  (uncertainty of mean = {self.Dorient_mean_uncertainty:.3f})
        (in range [-1:1] with 0 being isotropic)
"""
        if (
            self.S1_mean is not None
            and self.S1_sigma is not None
            and self.T1_mean is not None
            and self.T1_sigma is not None
            and self.TDMorient_mean is not None
            and self.TDMorient_mean_uncertainty is not None
        ):
            ret += f"""\
Exciton energies:
    S1: Energy of 1st excited singlet state with respect to the ground state.
        S1 = {self.S1_mean:.2f} +- {self.S1_sigma:.3f} eV
    T1: Energy of 1st excited triplet state with respect to the ground state.
        T1 = {self.T1_mean:.2f} +- {self.T1_sigma:.3f} eV
Transition dipole moments: S0 -> S1
    Magnitude:
        |d| = {self.TDMmag_mean:.2f} +- {self.TDMmag_sigma:.2f} Debye
    Orientation: theta = angle(d, e_z)
        <cos(theta)^2> = {self.TDMorient_mean:.3f}  (uncertainty of mean = {self.TDMorient_mean_uncertainty:.3f})
        (in range [0:1] with 1/3 being isotropic)
"""
        return ret


[docs]class OLEDPropertiesSummary: """A summary of the results of the OLEDProperties workflow."""
[docs] def __init__(self, f: "h5py.File", outlier_Zmax: float = 16): """Calculates a summary of the results on an HDF5 file. :param f: The HDF5 file to summarize. :type f: h5py.File :param outlier_Zmax: The modified Z score above which a datapoint is excluded as an outlier. Defaults to 16. :type outlier_Zmax: float, optional """ self.species_summaries: List[OLEDPropertiesSpeciesSummmary] = [] molSpec = f["molecules/species"][:] IP = f["energies/IP"][:] EA = f["energies/EA"][:] Dmag = np.linalg.norm(f["static_multipole_moments/dipole_moment"][()], axis=1) Dorient = [] for imol in range(f["static_multipole_moments/dipole_moment"].shape[0]): d = f["static_multipole_moments/dipole_moment"][imol, :] Dorient.append(np.dot(d / np.linalg.norm(d), [0, 0, 1])) Dorient = np.array(Dorient) if "exciton_energies" in f: S1 = f["exciton_energies/S1"][:] T1 = f["exciton_energies/T1"][:] TDMmag = np.linalg.norm(f["transition_dipole_moments/S1_S0"][()], axis=1) TDMorient = [] for imol in range(f["transition_dipole_moments/S1_S0"].shape[0]): d = f["transition_dipole_moments/S1_S0"][imol, :] TDMorient.append(np.dot(d / np.linalg.norm(d), [0, 0, 1]) ** 2) TDMorient = np.array(TDMorient) for spec, specName in enumerate(f["species/name"]): # Ground state properties: species_summary = OLEDPropertiesSpeciesSummmary( species_name=specName.decode("ascii"), species_smiles=f["species/smiles"][spec].decode("ascii"), species_nummol=int(((molSpec == spec)).sum()), IP_mean=reject_outliers(IP[(molSpec == spec) & (~np.isnan(IP))], outlier_Zmax).mean(), IP_sigma=reject_outliers(IP[(molSpec == spec) & (~np.isnan(IP))], outlier_Zmax).std(), EA_mean=reject_outliers(EA[(molSpec == spec) & (~np.isnan(EA))], outlier_Zmax).mean(), EA_sigma=reject_outliers(EA[(molSpec == spec) & (~np.isnan(EA))], outlier_Zmax).std(), Dmag_mean=reject_outliers(Dmag[(molSpec == spec) & (~np.isnan(Dmag))], outlier_Zmax).mean(), Dmag_sigma=reject_outliers(Dmag[(molSpec == spec) & (~np.isnan(Dmag))], outlier_Zmax).std(), Dorient_mean=Dorient[(molSpec == spec) & (~np.isnan(Dorient))].mean(), Dorient_mean_uncertainty=( Dorient[(molSpec == spec) & (~np.isnan(Dorient))].std() / math.sqrt(((molSpec == spec) & (~np.isnan(Dorient))).sum()) ), ) # Exciton properties: species_summary.S1_mean = reject_outliers(S1[(molSpec == spec) & (~np.isnan(S1))], outlier_Zmax).mean() species_summary.S1_sigma = reject_outliers(S1[(molSpec == spec) & (~np.isnan(S1))], outlier_Zmax).std() species_summary.T1_mean = reject_outliers(T1[(molSpec == spec) & (~np.isnan(T1))], outlier_Zmax).mean() species_summary.T1_sigma = reject_outliers(T1[(molSpec == spec) & (~np.isnan(T1))], outlier_Zmax).std() species_summary.TDMmag_mean = TDMmag[(molSpec == spec) & (~np.isnan(TDMmag))].mean() species_summary.TDMmag_sigma = TDMmag[(molSpec == spec) & (~np.isnan(TDMmag))].std() species_summary.TDMorient_mean = TDMorient[(molSpec == spec) & (~np.isnan(TDMorient))].mean() species_summary.TDMorient_mean_uncertainty = TDMorient[ (molSpec == spec) & (~np.isnan(TDMorient)) ].std() / math.sqrt(((molSpec == spec) & (~np.isnan(TDMorient))).sum()) self.species_summaries.append(species_summary)
[docs] def __str__(self) -> str: """Produces a human readable summary of the results. The same summary is also printed at the end of an ``oled-properties`` workflow run. """ return "\n".join(str(s) for s in self.species_summaries)
[docs] def as_yaml(self) -> str: """Returns a a summary of the calculation in a YAML format that can be imported into Bumblebee Web.""" ret = """\ --- materials: """ for i, ss in enumerate(self.species_summaries): ret += f"""\ - id: {i+1} name: {ss.species_name} description: {ss.species_smiles} homo: {-ss.IP_mean:.2f} lumo: {-ss.EA_mean:.2f} sigma_homo: {ss.IP_sigma:.3f} sigma_lumo: {ss.EA_sigma:.3f} singlet: {ss.S1_mean:.2f} triplet: {ss.T1_mean:.2f} sigma_singlet: {ss.S1_sigma:.3f} sigma_triplet: {ss.T1_sigma:.3f} dipole_strength: {ss.Dmag_mean:.2f} transition_dipole_anisotropy: {ss.TDMorient_mean:.3f} """ ret += f"""\ compositions: - id: 0 name: {" + ".join(s.species_name for s in self.species_summaries)} (AMS) description: Calculated by the AMS OLED workflows fractions: """ for i, ss in enumerate(self.species_summaries): ret += f"""\ - material_id: {i+1} fraction: {ss.species_nummol / sum(s.species_nummol for s in self.species_summaries)} """ return ret