from __future__ import annotations
from pathlib import Path
from typing import Iterable, Literal, Sequence
from scm.pisa.block import DriverBlock,EngineBlock,FixedBlock,FreeBlock,InputBlock
from scm.pisa.key import BoolKey,FloatKey,FloatListKey,IntKey,IntListKey,MultipleChoiceKey,PathStringKey,StringKey,BoolType
[docs]class ParAMSSinglePoint(DriverBlock):
r"""
:ivar EngineCollection: Path to (optional) JobCollection Engines YAML file.
:vartype EngineCollection: str | StringKey
:ivar EvaluateLoss: Evaluate the loss function based on the job results. This will produce the same output files as Task Optimization.
If No, this will be skipped and only the jobs will be run (and saved).
Warning: If both Store Jobs and Evaluate Loss are No then this task will not produce any output.
:vartype EvaluateLoss: BoolType | BoolKey
:ivar JobCollection: Path to JobCollection YAML file.
:vartype JobCollection: str | StringKey
:ivar ParameterInterface: Path to parameter interface YAML file.
:vartype ParameterInterface: str | StringKey
:ivar RestartDirectory: Specify a directory to continue interrupted GenerateReference or SinglePoint calculations. The directory depends on the task:
GenerateReference: results/reference_jobs
SinglePoint: results/single_point/jobs
Note: If you use the GUI this directory will be COPIED into the results folder and the name will be prepended with 'dep-'. This can take up a lot of disk space, so you may want to remove the 'dep-' folder after the job has finished.
:vartype RestartDirectory: str | Path | StringKey
:ivar ResultsDirectory: Directory in which output files will be created.
:vartype ResultsDirectory: str | Path | StringKey
:ivar StoreJobs: Keeps the results files for each of the jobs.
If No, all pipeable jobs will be run through the AMS Pipe and no files will be saved (not even the ones not run through the pipe). If Auto, the pipeable jobs are run through the pipe and the results of nonpipeable jobs are saved to disk. If Yes, no jobs are run through the pipe and all job results are stored on disk.
Warning: If both Store Jobs and Evaluate Loss are No then task SinglePoint will not produce any output.
:vartype StoreJobs: Literal["Auto", "Yes", "No"]
:ivar Task: Task to run.
Available options:
•MachineLearning: Optimization for machine learning models.
•Optimization: Global optimization powered by GloMPO
•Generate Reference: Run jobs with reference engine to get reference values
•Single Point: Evaluate the current configuration of jobs, training data, and parameters
•Sensitivity: Measure the sensitivity of the loss function to each of the active parameters
:vartype Task: Literal["Optimization", "GenerateReference", "SinglePoint", "Sensitivity", "MachineLearning"]
:ivar DataSet: Configuration settings for each data set in the optimization.
:vartype DataSet: ParAMSSinglePoint._DataSet
:ivar Engine: If set, use this engine for the ParAMS SinglePoint. Mutually exclusive with EngineCollection.
:vartype Engine: EngineBlock
:ivar ParallelLevels: Distribution of threads/processes between the parallelization levels.
:vartype ParallelLevels: ParAMSSinglePoint._ParallelLevels
"""
[docs] class _DataSet(FixedBlock):
r"""
Configuration settings for each data set in the optimization.
:ivar BatchSize: Number of data set entries to be evaluated per epoch. Default 0 means all entries.
:vartype BatchSize: int | IntKey
:ivar EvaluateEvery: This data set is evaluated every n evaluations of the training set.
This will always be set to 1 for the training set. For other data sets it will be adjusted to the closest multiple of LoggingInterval%General, i.e., you cannot evaluate an extra data set more frequently than you log it.
:vartype EvaluateEvery: int | IntKey
:ivar LossFunction: Loss function used to quantify the error between model and reference values. This becomes the minimization task.
Available options:
• mae: Mean absolute error
• rmse: Root mean squared error
• sse: Sum of squared errors
• sae: Sum of absolute errors
:vartype LossFunction: Literal["mae", "rmse", "sse", "sae"]
:ivar MaxJobs: Limit each evaluation to a subset of n jobs. Default 0 meaning all jobs are used.
:vartype MaxJobs: int | IntKey
:ivar MaxJobsShuffle: Use a different job subset every for every evaluation.
:vartype MaxJobsShuffle: BoolType | BoolKey
:ivar Name: Unique data set identifier.
The first occurrence of DataSet will always be called training_set.
The second will always be called validation_set.
These cannot be overwritten.
Later occurrences will default to data_set_xx where xx starts at 03 and increments from there. This field can be used to customize the latter names.
:vartype Name: str | StringKey
:ivar Path: Path to DataSet YAML file.
:vartype Path: str | StringKey
:ivar UsePipe: Use AMS Pipe for suitable jobs to speed-up evaluation.
:vartype UsePipe: BoolType | BoolKey
"""
def __post_init__(self):
self.BatchSize: int | IntKey = IntKey(name='BatchSize', comment='Number of data set entries to be evaluated per epoch. Default 0 means all entries.', default=0)
self.EvaluateEvery: int | IntKey = IntKey(name='EvaluateEvery', comment='This data set is evaluated every n evaluations of the training set.\n\nThis will always be set to 1 for the training set. For other data sets it will be adjusted to the closest multiple of LoggingInterval%General, i.e., you cannot evaluate an extra data set more frequently than you log it.', default=1)
self.LossFunction: Literal["mae", "rmse", "sse", "sae"] = MultipleChoiceKey(name='LossFunction', comment='Loss function used to quantify the error between model and reference values. This becomes the minimization task.\n\nAvailable options:\n• mae: Mean absolute error\n• rmse: Root mean squared error\n• sse: Sum of squared errors\n• sae: Sum of absolute errors', default='sse', choices=['mae', 'rmse', 'sse', 'sae'])
self.MaxJobs: int | IntKey = IntKey(name='MaxJobs', comment='Limit each evaluation to a subset of n jobs. Default 0 meaning all jobs are used.', default=0)
self.MaxJobsShuffle: BoolType | BoolKey = BoolKey(name='MaxJobsShuffle', comment='Use a different job subset every for every evaluation.', default=False)
self.Name: str | StringKey = StringKey(name='Name', comment='Unique data set identifier.\n\nThe first occurrence of DataSet will always be called training_set.\nThe second will always be called validation_set.\nThese cannot be overwritten.\n\nLater occurrences will default to data_set_xx where xx starts at 03 and increments from there. This field can be used to customize the latter names.', default='')
self.Path: str | StringKey = StringKey(name='Path', comment='Path to DataSet YAML file.')
self.UsePipe: BoolType | BoolKey = BoolKey(name='UsePipe', comment='Use AMS Pipe for suitable jobs to speed-up evaluation.', default=True)
[docs] class _Engine(EngineBlock):
r"""
If set, use this engine for the ParAMS SinglePoint. Mutually exclusive with EngineCollection.
"""
def __post_init__(self):
pass
[docs] class _ParallelLevels(FixedBlock):
r"""
Distribution of threads/processes between the parallelization levels.
:ivar CommitteeMembers: Maximum number of committee member optimizations to run in parallel. If set to zero will take the minimum of MachineLearning%CommitteeSize and the number of available cores (NSCM)
:vartype CommitteeMembers: int | IntKey
:ivar Cores: Number of cores to use per committee member optimization. By default (0) the available cores (NSCM) divided equally among committee members. When using GPU offloading, consider setting this to 1.
:vartype Cores: int | IntKey
:ivar Jobs: Number of JobCollection jobs to run in parallel for each loss function evaluation.
:vartype Jobs: int | IntKey
:ivar Optimizations: Number of independent optimizers to run in parallel.
:vartype Optimizations: int | IntKey
:ivar ParameterVectors: Number of parameter vectors to try in parallel for each optimizer iteration. This level of parallelism can only be used with optimizers that support parallel optimization!
Default (0) will set this value to the number of cores on the system divided by the number of optimizers run in parallel, i.e., each optimizer will be given an equal share of the resources.
:vartype ParameterVectors: int | IntKey
:ivar Processes: Number of processes (MPI ranks) to spawn for each JobCollection job. This effectively sets the NSCM environment variable for each job.
A value of `-1` will disable explicit setting of related variables. We recommend a value of `1` in almost all cases. A value greater than 1 would only be useful if you parametrize DFTB with a serial optimizer and have very few jobs in the job collection.
:vartype Processes: int | IntKey
:ivar Threads: Number of threads to use for each of the processes. This effectively set the OMP_NUM_THREADS environment variable.
Note that the DFTB engine does not use threads, so the value of this variable would not have any effect. We recommend always leaving it at the default value of 1. Please consult the manual of the engine you are parameterizing.
A value of `-1` will disable explicit setting of related variables.
:vartype Threads: int | IntKey
"""
def __post_init__(self):
self.CommitteeMembers: int | IntKey = IntKey(name='CommitteeMembers', comment='Maximum number of committee member optimizations to run in parallel. If set to zero will take the minimum of MachineLearning%CommitteeSize and the number of available cores (NSCM)', gui_name='Number of parallel committee members:', default=1)
self.Cores: int | IntKey = IntKey(name='Cores', comment='Number of cores to use per committee member optimization. By default (0) the available cores (NSCM) divided equally among committee members. When using GPU offloading, consider setting this to 1.', gui_name='Processes (per Job):', default=0)
self.Jobs: int | IntKey = IntKey(name='Jobs', comment='Number of JobCollection jobs to run in parallel for each loss function evaluation.', gui_name='Jobs (per loss function evaluation):', default=0)
self.Optimizations: int | IntKey = IntKey(name='Optimizations', comment='Number of independent optimizers to run in parallel.', gui_name='Number of parallel optimizers:', default=1)
self.ParameterVectors: int | IntKey = IntKey(name='ParameterVectors', comment='Number of parameter vectors to try in parallel for each optimizer iteration. This level of parallelism can only be used with optimizers that support parallel optimization!\n\nDefault (0) will set this value to the number of cores on the system divided by the number of optimizers run in parallel, i.e., each optimizer will be given an equal share of the resources.', gui_name='Loss function evaluations (per optimizer):', default=0)
self.Processes: int | IntKey = IntKey(name='Processes', comment='Number of processes (MPI ranks) to spawn for each JobCollection job. This effectively sets the NSCM environment variable for each job.\n\nA value of `-1` will disable explicit setting of related variables. We recommend a value of `1` in almost all cases. A value greater than 1 would only be useful if you parametrize DFTB with a serial optimizer and have very few jobs in the job collection.', gui_name='Processes (per Job):', default=1)
self.Threads: int | IntKey = IntKey(name='Threads', comment='Number of threads to use for each of the processes. This effectively set the OMP_NUM_THREADS environment variable.\nNote that the DFTB engine does not use threads, so the value of this variable would not have any effect. We recommend always leaving it at the default value of 1. Please consult the manual of the engine you are parameterizing.\n\nA value of `-1` will disable explicit setting of related variables.', gui_name='Threads (per Process):', default=1)
def __post_init__(self):
self.EngineCollection: str | StringKey = StringKey(name='EngineCollection', comment='Path to (optional) JobCollection Engines YAML file.', default='job_collection_engines.yaml')
self.EvaluateLoss: BoolType | BoolKey = BoolKey(name='EvaluateLoss', comment='Evaluate the loss function based on the job results. This will produce the same output files as Task Optimization. \nIf No, this will be skipped and only the jobs will be run (and saved).\n\nWarning: If both Store Jobs and Evaluate Loss are No then this task will not produce any output.', default=True)
self.JobCollection: str | StringKey = StringKey(name='JobCollection', comment='Path to JobCollection YAML file.', default='job_collection.yaml')
self.ParameterInterface: str | StringKey = StringKey(name='ParameterInterface', comment='Path to parameter interface YAML file.', default='parameter_interface.yaml')
self.RestartDirectory: str | Path | StringKey = PathStringKey(name='RestartDirectory', comment="Specify a directory to continue interrupted GenerateReference or SinglePoint calculations. The directory depends on the task: \n\nGenerateReference: results/reference_jobs\nSinglePoint: results/single_point/jobs\n\nNote: If you use the GUI this directory will be COPIED into the results folder and the name will be prepended with 'dep-'. This can take up a lot of disk space, so you may want to remove the 'dep-' folder after the job has finished.", gui_name='Load jobs from: ', default='', ispath=True, gui_type='directory')
self.ResultsDirectory: str | Path | StringKey = PathStringKey(name='ResultsDirectory', comment='Directory in which output files will be created.', gui_name='Working directory: ', default='results', ispath=True)
self.StoreJobs: Literal["Auto", "Yes", "No"] = MultipleChoiceKey(name='StoreJobs', comment='Keeps the results files for each of the jobs.\nIf No, all pipeable jobs will be run through the AMS Pipe and no files will be saved (not even the ones not run through the pipe). If Auto, the pipeable jobs are run through the pipe and the results of nonpipeable jobs are saved to disk. If Yes, no jobs are run through the pipe and all job results are stored on disk. \n\nWarning: If both Store Jobs and Evaluate Loss are No then task SinglePoint will not produce any output.', default='Auto', choices=['Auto', 'Yes', 'No'])
self.Task: Literal["Optimization", "GenerateReference", "SinglePoint", "Sensitivity", "MachineLearning"] = MultipleChoiceKey(name='Task', comment='Task to run.\n\nAvailable options:\n•MachineLearning: Optimization for machine learning models.\n•Optimization: Global optimization powered by GloMPO\n•Generate Reference: Run jobs with reference engine to get reference values\n•Single Point: Evaluate the current configuration of jobs, training data, and parameters\n•Sensitivity: Measure the sensitivity of the loss function to each of the active parameters', default='Optimization', choices=['Optimization', 'GenerateReference', 'SinglePoint', 'Sensitivity', 'MachineLearning'])
self.DataSet: ParAMSSinglePoint._DataSet = self._DataSet(name='DataSet', comment='Configuration settings for each data set in the optimization.', unique=False, gui_type='Repeat at least once')
self.Engine: EngineBlock = self._Engine(name='Engine', comment='If set, use this engine for the ParAMS SinglePoint. Mutually exclusive with EngineCollection. ', header=True)
self.ParallelLevels: ParAMSSinglePoint._ParallelLevels = self._ParallelLevels(name='ParallelLevels', comment='Distribution of threads/processes between the parallelization levels.', gui_name='Parallelization distribution: ')