Source code for scm.input_classes.drivers.paramsmachinelearning

from __future__ import annotations
from pathlib import Path
from typing import Iterable, Literal, Sequence
from scm.pisa.block import DriverBlock,EngineBlock,FixedBlock,FreeBlock,InputBlock
from scm.pisa.key import BoolKey,FloatKey,FloatListKey,IntKey,IntListKey,MultipleChoiceKey,PathStringKey,StringKey,BoolType

[docs]class ParAMSMachineLearning(DriverBlock): r""" :ivar EngineCollection: Path to (optional) JobCollection Engines YAML file. :vartype EngineCollection: str | StringKey :ivar JobCollection: Path to JobCollection YAML file. :vartype JobCollection: str | StringKey :ivar ResultsDirectory: Directory in which output files will be created. :vartype ResultsDirectory: str | Path | StringKey :ivar Task: Task to run. Available options: •MachineLearning: Optimization for machine learning models. •Optimization: Global optimization powered by GloMPO •Generate Reference: Run jobs with reference engine to get reference values •Single Point: Evaluate the current configuration of jobs, training data, and parameters •Sensitivity: Measure the sensitivity of the loss function to each of the active parameters :vartype Task: Literal["Optimization", "GenerateReference", "SinglePoint", "Sensitivity", "MachineLearning"] :ivar DataSet: Configuration settings for each data set in the optimization. :vartype DataSet: ParAMSMachineLearning._DataSet :ivar MachineLearning: Options for Task MachineLearning. :vartype MachineLearning: ParAMSMachineLearning._MachineLearning :ivar ParallelLevels: Distribution of threads/processes between the parallelization levels. :vartype ParallelLevels: ParAMSMachineLearning._ParallelLevels """
[docs] class _DataSet(FixedBlock): r""" Configuration settings for each data set in the optimization. :ivar BatchSize: Number of data set entries to be evaluated per epoch. Default 0 means all entries. :vartype BatchSize: int | IntKey :ivar EvaluateEvery: This data set is evaluated every n evaluations of the training set. This will always be set to 1 for the training set. For other data sets it will be adjusted to the closest multiple of LoggingInterval%General, i.e., you cannot evaluate an extra data set more frequently than you log it. :vartype EvaluateEvery: int | IntKey :ivar LossFunction: Loss function used to quantify the error between model and reference values. This becomes the minimization task. Available options: • mae: Mean absolute error • rmse: Root mean squared error • sse: Sum of squared errors • sae: Sum of absolute errors :vartype LossFunction: Literal["mae", "rmse", "sse", "sae"] :ivar MaxJobs: Limit each evaluation to a subset of n jobs. Default 0 meaning all jobs are used. :vartype MaxJobs: int | IntKey :ivar MaxJobsShuffle: Use a different job subset every for every evaluation. :vartype MaxJobsShuffle: BoolType | BoolKey :ivar Name: Unique data set identifier. The first occurrence of DataSet will always be called training_set. The second will always be called validation_set. These cannot be overwritten. Later occurrences will default to data_set_xx where xx starts at 03 and increments from there. This field can be used to customize the latter names. :vartype Name: str | StringKey :ivar Path: Path to DataSet YAML file. :vartype Path: str | StringKey :ivar UsePipe: Use AMS Pipe for suitable jobs to speed-up evaluation. :vartype UsePipe: BoolType | BoolKey """ def __post_init__(self): self.BatchSize: int | IntKey = IntKey(name='BatchSize', comment='Number of data set entries to be evaluated per epoch. Default 0 means all entries.', default=0) self.EvaluateEvery: int | IntKey = IntKey(name='EvaluateEvery', comment='This data set is evaluated every n evaluations of the training set.\n\nThis will always be set to 1 for the training set. For other data sets it will be adjusted to the closest multiple of LoggingInterval%General, i.e., you cannot evaluate an extra data set more frequently than you log it.', default=1) self.LossFunction: Literal["mae", "rmse", "sse", "sae"] = MultipleChoiceKey(name='LossFunction', comment='Loss function used to quantify the error between model and reference values. This becomes the minimization task.\n\nAvailable options:\n• mae: Mean absolute error\n• rmse: Root mean squared error\n• sse: Sum of squared errors\n• sae: Sum of absolute errors', default='sse', choices=['mae', 'rmse', 'sse', 'sae']) self.MaxJobs: int | IntKey = IntKey(name='MaxJobs', comment='Limit each evaluation to a subset of n jobs. Default 0 meaning all jobs are used.', default=0) self.MaxJobsShuffle: BoolType | BoolKey = BoolKey(name='MaxJobsShuffle', comment='Use a different job subset every for every evaluation.', default=False) self.Name: str | StringKey = StringKey(name='Name', comment='Unique data set identifier.\n\nThe first occurrence of DataSet will always be called training_set.\nThe second will always be called validation_set.\nThese cannot be overwritten.\n\nLater occurrences will default to data_set_xx where xx starts at 03 and increments from there. This field can be used to customize the latter names.', default='') self.Path: str | StringKey = StringKey(name='Path', comment='Path to DataSet YAML file.') self.UsePipe: BoolType | BoolKey = BoolKey(name='UsePipe', comment='Use AMS Pipe for suitable jobs to speed-up evaluation.', default=True)
[docs] class _MachineLearning(FixedBlock): r""" Options for Task MachineLearning. :ivar Backend: The backend to use. You must separately install the backend before running a training job. :vartype Backend: Literal["Custom", "M3GNet", "NequIP", "Test"] :ivar CommitteeSize: The number of independently trained ML potentials. :vartype CommitteeSize: int | IntKey :ivar LoadModel: Load a previously fitted model from a ParAMS results directory. A ParAMS results directory should contain two subdirectories ``optimization`` and ``settings_and_initial_data``. This option ignores all settings inside model blocks. :vartype LoadModel: str | Path | StringKey :ivar MaxEpochs: Set the maximum number of epochs a backend should perform. :vartype MaxEpochs: int | IntKey :ivar RunAMSAtEnd: Whether to run the (committee) ML potential through AMS at the end. This will create the energy/forces scatter plots for the final trained model. :vartype RunAMSAtEnd: BoolType | BoolKey :ivar Custom: Set up a custom fitting program within ParAMS :vartype Custom: ParAMSMachineLearning._MachineLearning._Custom :ivar LossCoeffs: Modify the coefficients for the machine learning loss function. For backends that support weights, this is on top of the supplied dataset weights and sigmas. :vartype LossCoeffs: ParAMSMachineLearning._MachineLearning._LossCoeffs :ivar M3GNet: Options for M3GNet fitting. :vartype M3GNet: ParAMSMachineLearning._MachineLearning._M3GNet :ivar NequIP: Options for NequIP fitting. :vartype NequIP: ParAMSMachineLearning._MachineLearning._NequIP :ivar Target: Target values for stopping training. If both the training and validation metrics are smaller than the specified values, the training will stop early. Only supported by the M3GNet backend. :vartype Target: ParAMSMachineLearning._MachineLearning._Target """
[docs] class _Custom(FixedBlock): r""" Set up a custom fitting program within ParAMS :ivar File: Python file containing a function called 'get_fit_job' that returns a subclass of 'FitJob' :vartype File: str | Path | StringKey :ivar Arguments: Pass on keyword arguments to the 'get_fit_job' function. :vartype Arguments: str | Sequence[str] | FreeBlock """
[docs] class _Arguments(FreeBlock): r""" Pass on keyword arguments to the 'get_fit_job' function. """ def __post_init__(self): pass
def __post_init__(self): self.File: str | Path | StringKey = PathStringKey(name='File', comment="Python file containing a function called 'get_fit_job' that returns a subclass of 'FitJob'", ispath=True) self.Arguments: str | Sequence[str] | FreeBlock = self._Arguments(name='Arguments', comment="Pass on keyword arguments to the 'get_fit_job' function.")
[docs] class _LossCoeffs(FixedBlock): r""" Modify the coefficients for the machine learning loss function. For backends that support weights, this is on top of the supplied dataset weights and sigmas. :ivar AverageForcePerAtom: For each force data entry, divide the loss contribution by the number of concomittent atoms. This is the same as the behavior for ParAMS Optimization, but it is turned off by default in Task MachineLearning. For machine learning, setting this to 'No' can be better since larger molecules will contribute more to the loss. For backends that support weights, this is on top of the supplied dataset weights and sigmas. :vartype AverageForcePerAtom: BoolType | BoolKey :ivar Energy: Coefficient for the contribution of loss due to the energy. For backends that support weights, this is on top of the supplied dataset weights and sigmas. :vartype Energy: float | FloatKey :ivar Forces: Coefficient for the contribution of loss due to the forces. For backends that support weights, this is on top of the supplied dataset weights and sigmas. :vartype Forces: float | FloatKey """ def __post_init__(self): self.AverageForcePerAtom: BoolType | BoolKey = BoolKey(name='AverageForcePerAtom', comment="For each force data entry, divide the loss contribution by the number of concomittent atoms. This is the same as the behavior for ParAMS Optimization, but it is turned off by default in Task MachineLearning. For machine learning, setting this to 'No' can be better since larger molecules will contribute more to the loss. For backends that support weights, this is on top of the supplied dataset weights and sigmas.", default=False) self.Energy: float | FloatKey = FloatKey(name='Energy', comment='Coefficient for the contribution of loss due to the energy. For backends that support weights, this is on top of the supplied dataset weights and sigmas.', gui_name='Energy coefficient:', default=10.0) self.Forces: float | FloatKey = FloatKey(name='Forces', comment='Coefficient for the contribution of loss due to the forces. For backends that support weights, this is on top of the supplied dataset weights and sigmas.', gui_name='Forces coefficient:', default=1.0)
[docs] class _M3GNet(FixedBlock): r""" Options for M3GNet fitting. :ivar LearningRate: Learning rate for the M3GNet weight optimization. :vartype LearningRate: float | FloatKey :ivar Model: How to specify the model for the M3GNet backend. Either a Custom model can be made from scratch or an existing model directory can be loaded to obtain the model settings. :vartype Model: Literal["UniversalPotential", "Custom", "ModelDir"] :ivar ModelDir: Path to the directory defining the model. This folder should contain the files: 'checkpoint', 'm3gnet.data-00000-of-00001', ' m3gnet.index' and 'm3gnet.json' :vartype ModelDir: str | Path | StringKey :ivar Custom: Specify a custom M3GNet model. :vartype Custom: ParAMSMachineLearning._MachineLearning._M3GNet._Custom :ivar UniversalPotential: Settings for (transfer) learning with the M3GNet Universal Potential. :vartype UniversalPotential: ParAMSMachineLearning._MachineLearning._M3GNet._UniversalPotential """
[docs] class _Custom(FixedBlock): r""" Specify a custom M3GNet model. :ivar Cutoff: Cutoff radius of the graph :vartype Cutoff: float | FloatKey :ivar MaxL: Include spherical components up to order MaxL. Higher gives a better angular resolution, but increases computational cost substantially. :vartype MaxL: int | IntKey :ivar MaxN: Include radial components up to the MaxN'th root of the spherical Bessel function. Higher gives a better radial resolution, but increases computational cost substantially. :vartype MaxN: int | IntKey :ivar NumBlocks: Number of convolution blocks. :vartype NumBlocks: int | IntKey :ivar NumNeurons: Number of neurons in each layer. :vartype NumNeurons: int | IntKey :ivar ThreebodyCutoff: Cutoff radius of the three-body interaction. :vartype ThreebodyCutoff: float | FloatKey """ def __post_init__(self): self.Cutoff: float | FloatKey = FloatKey(name='Cutoff', comment='Cutoff radius of the graph', default=5.0, unit='angstrom') self.MaxL: int | IntKey = IntKey(name='MaxL', comment='Include spherical components up to order MaxL. Higher gives a better angular resolution, but increases computational cost substantially.', default=3) self.MaxN: int | IntKey = IntKey(name='MaxN', comment="Include radial components up to the MaxN'th root of the spherical Bessel function. Higher gives a better radial resolution, but increases computational cost substantially.", default=3) self.NumBlocks: int | IntKey = IntKey(name='NumBlocks', comment='Number of convolution blocks.', gui_name='Number of convolution blocks: ', default=3) self.NumNeurons: int | IntKey = IntKey(name='NumNeurons', comment='Number of neurons in each layer.', gui_name='Number of neurons per layer:', default=64) self.ThreebodyCutoff: float | FloatKey = FloatKey(name='ThreebodyCutoff', comment='Cutoff radius of the three-body interaction.', default=4.0, unit='angstrom')
[docs] class _UniversalPotential(FixedBlock): r""" Settings for (transfer) learning with the M3GNet Universal Potential. :ivar Featurizer: Train the Featurizer layer of the M3GNet universal potential. :vartype Featurizer: BoolType | BoolKey :ivar Final: Train the Final layer of the M3GNet universal potential. :vartype Final: BoolType | BoolKey :ivar GraphLayer1: Train the first Graph layer of the M3GNet universal potential. :vartype GraphLayer1: BoolType | BoolKey :ivar GraphLayer2: Train the second Graph layer of the M3GNet universal potential. :vartype GraphLayer2: BoolType | BoolKey :ivar GraphLayer3: Train the third Graph layer of the M3GNet universal potential. :vartype GraphLayer3: BoolType | BoolKey :ivar ThreeDInteractions1: Train the first ThreeDInteractions (three-body terms) layer of the M3GNet universal potential. :vartype ThreeDInteractions1: BoolType | BoolKey :ivar ThreeDInteractions2: Train the second ThreeDInteractions (three-body terms) layer of the M3GNet universal potential. :vartype ThreeDInteractions2: BoolType | BoolKey :ivar ThreeDInteractions3: Train the third ThreeDInteractions (three-body terms) layer of the M3GNet universal potential. :vartype ThreeDInteractions3: BoolType | BoolKey :ivar Version: Which version of the M3GNet Universal Potential to use. :vartype Version: Literal["2022"] """ def __post_init__(self): self.Featurizer: BoolType | BoolKey = BoolKey(name='Featurizer', comment='Train the Featurizer layer of the M3GNet universal potential.', gui_name='Train featurizer:', default=False) self.Final: BoolType | BoolKey = BoolKey(name='Final', comment='Train the Final layer of the M3GNet universal potential.', gui_name='Train final layer:', default=True) self.GraphLayer1: BoolType | BoolKey = BoolKey(name='GraphLayer1', comment='Train the first Graph layer of the M3GNet universal potential.', gui_name='Train layer 1 - graph:', default=False) self.GraphLayer2: BoolType | BoolKey = BoolKey(name='GraphLayer2', comment='Train the second Graph layer of the M3GNet universal potential.', gui_name='Train layer 2 - graph:', default=False) self.GraphLayer3: BoolType | BoolKey = BoolKey(name='GraphLayer3', comment='Train the third Graph layer of the M3GNet universal potential.', gui_name='Train layer 3 - graph:', default=True) self.ThreeDInteractions1: BoolType | BoolKey = BoolKey(name='ThreeDInteractions1', comment='Train the first ThreeDInteractions (three-body terms) layer of the M3GNet universal potential.', gui_name='Train layer 1 - 3D interactions:', default=False) self.ThreeDInteractions2: BoolType | BoolKey = BoolKey(name='ThreeDInteractions2', comment='Train the second ThreeDInteractions (three-body terms) layer of the M3GNet universal potential.', gui_name='Train layer 2 - 3D interactions:', default=False) self.ThreeDInteractions3: BoolType | BoolKey = BoolKey(name='ThreeDInteractions3', comment='Train the third ThreeDInteractions (three-body terms) layer of the M3GNet universal potential.', gui_name='Train layer 3 - 3D interactions:', default=True) self.Version: Literal["2022"] = MultipleChoiceKey(name='Version', comment='Which version of the M3GNet Universal Potential to use.', hidden=True, default='2022', choices=['2022'])
def __post_init__(self): self.LearningRate: float | FloatKey = FloatKey(name='LearningRate', comment='Learning rate for the M3GNet weight optimization.', default=0.001) self.Model: Literal["UniversalPotential", "Custom", "ModelDir"] = MultipleChoiceKey(name='Model', comment='How to specify the model for the M3GNet backend. Either a Custom model can be made from scratch or an existing model directory can be loaded to obtain the model settings.', default='UniversalPotential', choices=['UniversalPotential', 'Custom', 'ModelDir']) self.ModelDir: str | Path | StringKey = PathStringKey(name='ModelDir', comment="Path to the directory defining the model. This folder should contain the files: 'checkpoint', 'm3gnet.data-00000-of-00001', ' m3gnet.index' and 'm3gnet.json'", ispath=True, gui_type='file') self.Custom: ParAMSMachineLearning._MachineLearning._M3GNet._Custom = self._Custom(name='Custom', comment='Specify a custom M3GNet model.') self.UniversalPotential: ParAMSMachineLearning._MachineLearning._M3GNet._UniversalPotential = self._UniversalPotential(name='UniversalPotential', comment='Settings for (transfer) learning with the M3GNet Universal Potential.')
[docs] class _NequIP(FixedBlock): r""" Options for NequIP fitting. :ivar LearningRate: Learning rate for the NequIP weight optimization :vartype LearningRate: float | FloatKey :ivar Model: How to specify the model for the NequIP backend. Either a Custom model can be made from scratch or an existing 'model.pth' file can be loaded to obtain the model settings. :vartype Model: Literal["Custom", "ModelFile"] :ivar ModelFile: Path to the model.pth file defining the model. :vartype ModelFile: str | Path | StringKey :ivar UseRescalingFromLoadedModel: When loading a model with LoadModel or NequiP%ModelFile do not recalculate the dataset rescaling but use the value from the loaded model. :vartype UseRescalingFromLoadedModel: BoolType | BoolKey :ivar Custom: Specify a custom NequIP model. :vartype Custom: ParAMSMachineLearning._MachineLearning._NequIP._Custom """
[docs] class _Custom(FixedBlock): r""" Specify a custom NequIP model. :ivar LMax: Maximum L value. 1 is probably high enough. :vartype LMax: int | IntKey :ivar MetricsKey: Which metric to use to generate the 'best' model. :vartype MetricsKey: Literal["training_loss", "validation_loss"] :ivar NumLayers: Number of interaction layers in the NequIP neural network. :vartype NumLayers: int | IntKey :ivar RMax: Distance cutoff for interactions. :vartype RMax: float | FloatKey """ def __post_init__(self): self.LMax: int | IntKey = IntKey(name='LMax', comment='Maximum L value. 1 is probably high enough.', default=1) self.MetricsKey: Literal["training_loss", "validation_loss"] = MultipleChoiceKey(name='MetricsKey', comment="Which metric to use to generate the 'best' model.", default='validation_loss', choices=['training_loss', 'validation_loss']) self.NumLayers: int | IntKey = IntKey(name='NumLayers', comment='Number of interaction layers in the NequIP neural network.', default=4) self.RMax: float | FloatKey = FloatKey(name='RMax', comment='Distance cutoff for interactions.', gui_name='Distance cutoff:', default=3.5, unit='angstrom')
def __post_init__(self): self.LearningRate: float | FloatKey = FloatKey(name='LearningRate', comment='Learning rate for the NequIP weight optimization', default=0.005) self.Model: Literal["Custom", "ModelFile"] = MultipleChoiceKey(name='Model', comment="How to specify the model for the NequIP backend. Either a Custom model can be made from scratch or an existing 'model.pth' file can be loaded to obtain the model settings.", default='Custom', choices=['Custom', 'ModelFile']) self.ModelFile: str | Path | StringKey = PathStringKey(name='ModelFile', comment='Path to the model.pth file defining the model.', ispath=True, gui_type='file') self.UseRescalingFromLoadedModel: BoolType | BoolKey = BoolKey(name='UseRescalingFromLoadedModel', comment='When loading a model with LoadModel or NequiP%ModelFile do not recalculate the dataset rescaling but use the value from the loaded model.', default=True) self.Custom: ParAMSMachineLearning._MachineLearning._NequIP._Custom = self._Custom(name='Custom', comment='Specify a custom NequIP model.')
[docs] class _Target(FixedBlock): r""" Target values for stopping training. If both the training and validation metrics are smaller than the specified values, the training will stop early. Only supported by the M3GNet backend. :ivar Forces: Forces (as reported by the backend) :vartype Forces: ParAMSMachineLearning._MachineLearning._Target._Forces """
[docs] class _Forces(FixedBlock): r""" Forces (as reported by the backend) :ivar Enabled: Whether to use target values for forces. :vartype Enabled: BoolType | BoolKey :ivar MAE: MAE for forces (as reported by the backend). :vartype MAE: float | FloatKey """ def __post_init__(self): self.Enabled: BoolType | BoolKey = BoolKey(name='Enabled', comment='Whether to use target values for forces.', default=True) self.MAE: float | FloatKey = FloatKey(name='MAE', comment='MAE for forces (as reported by the backend).', default=0.05, unit='eV/angstrom')
def __post_init__(self): self.Forces: ParAMSMachineLearning._MachineLearning._Target._Forces = self._Forces(name='Forces', comment='Forces (as reported by the backend)')
def __post_init__(self): self.Backend: Literal["Custom", "M3GNet", "NequIP", "Test"] = MultipleChoiceKey(name='Backend', comment='The backend to use. You must separately install the backend before running a training job.', default='M3GNet', choices=['Custom', 'M3GNet', 'NequIP', 'Test'], hiddenchoices=['Custom', 'Test'], gui_type='literal choices') self.CommitteeSize: int | IntKey = IntKey(name='CommitteeSize', comment='The number of independently trained ML potentials.', default=1) self.LoadModel: str | Path | StringKey = PathStringKey(name='LoadModel', comment='Load a previously fitted model from a ParAMS results directory. A ParAMS results directory should contain two subdirectories ``optimization`` and ``settings_and_initial_data``. This option ignores all settings inside model blocks.', ispath=True, gui_type='directory') self.MaxEpochs: int | IntKey = IntKey(name='MaxEpochs', comment='Set the maximum number of epochs a backend should perform.', default=1000) self.RunAMSAtEnd: BoolType | BoolKey = BoolKey(name='RunAMSAtEnd', comment='Whether to run the (committee) ML potential through AMS at the end. This will create the energy/forces scatter plots for the final trained model.', gui_name='Run AMS at end:', default=True) self.Custom: ParAMSMachineLearning._MachineLearning._Custom = self._Custom(name='Custom', comment='Set up a custom fitting program within ParAMS', hidden=True) self.LossCoeffs: ParAMSMachineLearning._MachineLearning._LossCoeffs = self._LossCoeffs(name='LossCoeffs', comment='Modify the coefficients for the machine learning loss function. For backends that support weights, this is on top of the supplied dataset weights and sigmas.') self.M3GNet: ParAMSMachineLearning._MachineLearning._M3GNet = self._M3GNet(name='M3GNet', comment='Options for M3GNet fitting.') self.NequIP: ParAMSMachineLearning._MachineLearning._NequIP = self._NequIP(name='NequIP', comment='Options for NequIP fitting.') self.Target: ParAMSMachineLearning._MachineLearning._Target = self._Target(name='Target', comment='Target values for stopping training. If both the training and validation metrics are smaller than the specified values, the training will stop early. Only supported by the M3GNet backend.')
[docs] class _ParallelLevels(FixedBlock): r""" Distribution of threads/processes between the parallelization levels. :ivar CommitteeMembers: Maximum number of committee member optimizations to run in parallel. If set to zero will take the minimum of MachineLearning%CommitteeSize and the number of available cores (NSCM) :vartype CommitteeMembers: int | IntKey :ivar Cores: Number of cores to use per committee member optimization. By default (0) the available cores (NSCM) divided equally among committee members. When using GPU offloading, consider setting this to 1. :vartype Cores: int | IntKey :ivar Jobs: Number of JobCollection jobs to run in parallel for each loss function evaluation. :vartype Jobs: int | IntKey :ivar Optimizations: Number of independent optimizers to run in parallel. :vartype Optimizations: int | IntKey :ivar ParameterVectors: Number of parameter vectors to try in parallel for each optimizer iteration. This level of parallelism can only be used with optimizers that support parallel optimization! Default (0) will set this value to the number of cores on the system divided by the number of optimizers run in parallel, i.e., each optimizer will be given an equal share of the resources. :vartype ParameterVectors: int | IntKey :ivar Processes: Number of processes (MPI ranks) to spawn for each JobCollection job. This effectively sets the NSCM environment variable for each job. A value of `-1` will disable explicit setting of related variables. We recommend a value of `1` in almost all cases. A value greater than 1 would only be useful if you parametrize DFTB with a serial optimizer and have very few jobs in the job collection. :vartype Processes: int | IntKey :ivar Threads: Number of threads to use for each of the processes. This effectively set the OMP_NUM_THREADS environment variable. Note that the DFTB engine does not use threads, so the value of this variable would not have any effect. We recommend always leaving it at the default value of 1. Please consult the manual of the engine you are parameterizing. A value of `-1` will disable explicit setting of related variables. :vartype Threads: int | IntKey """ def __post_init__(self): self.CommitteeMembers: int | IntKey = IntKey(name='CommitteeMembers', comment='Maximum number of committee member optimizations to run in parallel. If set to zero will take the minimum of MachineLearning%CommitteeSize and the number of available cores (NSCM)', gui_name='Number of parallel committee members:', default=1) self.Cores: int | IntKey = IntKey(name='Cores', comment='Number of cores to use per committee member optimization. By default (0) the available cores (NSCM) divided equally among committee members. When using GPU offloading, consider setting this to 1.', gui_name='Processes (per Job):', default=0) self.Jobs: int | IntKey = IntKey(name='Jobs', comment='Number of JobCollection jobs to run in parallel for each loss function evaluation.', gui_name='Jobs (per loss function evaluation):', default=0) self.Optimizations: int | IntKey = IntKey(name='Optimizations', comment='Number of independent optimizers to run in parallel.', gui_name='Number of parallel optimizers:', default=1) self.ParameterVectors: int | IntKey = IntKey(name='ParameterVectors', comment='Number of parameter vectors to try in parallel for each optimizer iteration. This level of parallelism can only be used with optimizers that support parallel optimization!\n\nDefault (0) will set this value to the number of cores on the system divided by the number of optimizers run in parallel, i.e., each optimizer will be given an equal share of the resources.', gui_name='Loss function evaluations (per optimizer):', default=0) self.Processes: int | IntKey = IntKey(name='Processes', comment='Number of processes (MPI ranks) to spawn for each JobCollection job. This effectively sets the NSCM environment variable for each job.\n\nA value of `-1` will disable explicit setting of related variables. We recommend a value of `1` in almost all cases. A value greater than 1 would only be useful if you parametrize DFTB with a serial optimizer and have very few jobs in the job collection.', gui_name='Processes (per Job):', default=1) self.Threads: int | IntKey = IntKey(name='Threads', comment='Number of threads to use for each of the processes. This effectively set the OMP_NUM_THREADS environment variable.\nNote that the DFTB engine does not use threads, so the value of this variable would not have any effect. We recommend always leaving it at the default value of 1. Please consult the manual of the engine you are parameterizing.\n\nA value of `-1` will disable explicit setting of related variables.', gui_name='Threads (per Process):', default=1)
def __post_init__(self): self.EngineCollection: str | StringKey = StringKey(name='EngineCollection', comment='Path to (optional) JobCollection Engines YAML file.', default='job_collection_engines.yaml') self.JobCollection: str | StringKey = StringKey(name='JobCollection', comment='Path to JobCollection YAML file.', default='job_collection.yaml') self.ResultsDirectory: str | Path | StringKey = PathStringKey(name='ResultsDirectory', comment='Directory in which output files will be created.', gui_name='Working directory: ', default='results', ispath=True) self.Task: Literal["Optimization", "GenerateReference", "SinglePoint", "Sensitivity", "MachineLearning"] = MultipleChoiceKey(name='Task', comment='Task to run.\n\nAvailable options:\n•MachineLearning: Optimization for machine learning models.\n•Optimization: Global optimization powered by GloMPO\n•Generate Reference: Run jobs with reference engine to get reference values\n•Single Point: Evaluate the current configuration of jobs, training data, and parameters\n•Sensitivity: Measure the sensitivity of the loss function to each of the active parameters', default='Optimization', choices=['Optimization', 'GenerateReference', 'SinglePoint', 'Sensitivity', 'MachineLearning']) self.DataSet: ParAMSMachineLearning._DataSet = self._DataSet(name='DataSet', comment='Configuration settings for each data set in the optimization.', unique=False, gui_type='Repeat at least once') self.MachineLearning: ParAMSMachineLearning._MachineLearning = self._MachineLearning(name='MachineLearning', comment='Options for Task MachineLearning.') self.ParallelLevels: ParAMSMachineLearning._ParallelLevels = self._ParallelLevels(name='ParallelLevels', comment='Distribution of threads/processes between the parallelization levels.', gui_name='Parallelization distribution: ')