# This file was automatically generated by build-ipynb_FILES.sh from # the PhaseTransitions-ADP.ipynb file. All changes to this file will be lost # We studied the effect of changing the composition of the gas phase, namely # partial pressures for $O_2$ and $CO$, in the $CO_2$ Turnover frequency (TOF), # in some of the pyZacros tutorials for the Ziff-Gulari-Barshad model (see # especially **Phase Transitions in the ZGB model** and # **Ziff-Gulari-Barshad model: Steady State Conditions**). We used an evenly # distributed grid on the $CO$ gas phase molar fraction in those tutorials. # From the results, it is possible to see that, typically, the lower the $CO_2$ # production, the more difficult it is to achieve a steady state (except when # $CO_2$ production is zero, likely because the surface got poisoned, which # makes the calculation finish quickly). This means that most of the # computational time is spent on points that, from a catalytic standpoint, # are not as interesting as points with higher $CO_2$ production. Thus, to # reduce the overall computational cost, it would be ideal to generate more # points in the more interesting areas automatically. This is the main goal # of the **Adaptive Design Procedure (ADP)**. # # The ADP was created to generate training data for Machine Learning (ML) algorithms, # with a particular emphasis on approximating computationally-intensive # first-principles kinetic models in catalysis. The procedure is based on # function topology and behavior, with the discrete gradient and relative # importance of the independent variables calculated. See more details in the # original publication [Chem. Eng. J 400, (2020), 125469](https://doi.org/10.1016/j.cej.2020.125469). # The authors demonstrate that the procedure could design a dataset (achieving # the same accuracy) using only between 60 and 80% fewer data points as are # needed in an evenly distributed grid. Furthermore, the ADP generates a # **Surrogate Model** of the data based on ML techniques, # allowing interpolation of points not included in the original data set, # which is critical for multiscale simulations of complex chemical reactors. # # In this tutorial, we will likewise examine the effects of altering the gas # phase's composition in the $CO_2$ Turnover frequency (TOF) in the ZGB model, # but we will do so while utilizing the ADP to both suggest the values of the # $CO$ molar fractions to evaluate and generate a surrogate model for this solution. # In practice, the surrogate model is a [PKL file](https://docs.python.org/3/library/pickle.html) # that contains all of the parameters of the ML model, allowing it to be regenerated # and used subsequently. The main goal of this tutorial is to obtain this file. # First of all, we must install the package **adaptiveDesignProcedure**. You # can either follow the procedure described in its GitHub repository # [https://github.com/mbracconi/adaptiveDesignProcedure](https://github.com/mbracconi/adaptiveDesignProcedure), # or if you are using AMS, you can do it as follows by typing in a terminal: # ``` # $ amspackages install adaptivedesignprocedure # ``` # Now, let's start with the script. Foremost, we import all packages we need: import multiprocessing import numpy import scm.plams import scm.pyzacros as pz import scm.pyzacros.models import adaptiveDesignProcedure as adp import warnings warnings.simplefilter("ignore", UserWarning) # The ``import warning`` line is just needed to get clean output messages further down. # The ADP method needs a function that generates the data for the model; # we named it ``get_rate()``. This function accepts a single argument, # a 2d-array containing the conditions to be computed with the shape # ``(number of conditions, number of input variables)``, and returns a # 2d-array containing the calculated values with the shape # ``(number of conditions, number of output variables)``. The number of # conditions will be determined on the fly by the ADP. On the contrary, # we must decide on the number of input and output variables. In our # example, we have ``one input variable``, the molar fraction of CO, and # ``three output variables``, the average coverage for $O*$ and $CO*$ # and the $CO_2$ TOF. # # This ``get_rate()`` function performs a ``ZacrosParametersScanJob`` # calculation. To follow the details, please refer to the example # **Phase Transitions in the ZGB model**. In a nutshell, it configures # the ``ZacrosJob`` calculation with the ZGB predefined model. Then it # configures and runs the ``ZacrosParametersScanJob``, using the # ``ZacrosJob`` defined before as the reference and setting the $CO$ # molar fraction equal to the conditions established by the ADP. Finally, # it retrieves the results for each condition by calling the # ``turnover frequency()`` and ``average coverage()`` functions and # storing them in the output array in the correct order. def get_rate(conditions): print("") print(" Requesting:") for cond in conditions: print(" x_CO = ", cond[0]) print("") # --------------------------------------- # Zacros calculation # --------------------------------------- zgb = pz.models.ZiffGulariBarshad() z_sett = pz.Settings() z_sett.random_seed = 953129 z_sett.temperature = 500.0 z_sett.pressure = 1.0 z_sett.species_numbers = ("time", 0.1) z_sett.max_time = 10.0 z_job = pz.ZacrosJob( settings=z_sett, lattice=zgb.lattice, mechanism=zgb.mechanism, cluster_expansion=zgb.cluster_expansion ) # --------------------------------------- # Parameters scan calculation # --------------------------------------- ps_params = pz.ZacrosParametersScanJob.Parameters() ps_params.add("x_CO", "molar_fraction.CO", [cond[0] for cond in conditions]) ps_params.add("x_O2", "molar_fraction.O2", lambda params: 1.0 - params["x_CO"]) ps_job = pz.ZacrosParametersScanJob(reference=z_job, parameters=ps_params) # --------------------------------------- # Running the calculations # --------------------------------------- results = ps_job.run() if not results.job.ok(): print("Something went wrong!") # --------------------------------------- # Collecting the results # --------------------------------------- data = numpy.nan * numpy.empty((len(conditions), 3)) if results.job.ok(): results_dict = results.turnover_frequency() results_dict = results.average_coverage(last=20, update=results_dict) for i in range(len(results_dict)): data[i, 0] = results_dict[i]["average_coverage"]["O*"] data[i, 1] = results_dict[i]["average_coverage"]["CO*"] data[i, 2] = results_dict[i]["turnover_frequency"]["CO2"] return data # Now, let's look at the script's main part. # As usual, we initialize the pyZacros environment: scm.pyzacros.init() # We'll use the ``plams.JobRunner`` class, which easily allows us to run as many parallel # instances as we request. In this case, we choose to use the maximum number of # simultaneous processes (``maxjobs``) equal to the number of processors in the # machine. Additionally, by setting ``nproc = 1`` we establish that only one # processor will be used for each zacros instance. maxjobs = multiprocessing.cpu_count() scm.plams.config.default_jobrunner = scm.plams.JobRunner(parallel=True, maxjobs=maxjobs) scm.plams.config.job.runscript.nproc = 1 print("Running up to {} jobs in parallel simultaneously".format(maxjobs)) # Firstly, we must define the input and output variables. As previously stated, for # the input variables, we only have the molar fraction of CO (``x_CO``). In terms of # ADP, we must provide a name as well as a range to cover during the execution, # ranging from ``min`` to ``max`` and beginning with ``num`` evenly spaced samples. # Regarding the output variables, we have three: the average coverage for $O*$ and # $CO*$, as well as the $CO_2$ TOF. For them, in ADP, we only need to provide their # names (``ac_O``, ``ac_CO``, ``TOF_CO2``). Notice these names are arbitrary, and # they do not have any effect on the final results. However, the number of input # and output variables should be in correspondence with the array sizes used in the # ``get_rate()`` function. input_var = ({"name": "x_CO", "min": 0.2, "max": 0.8, "num": 5},) output_var = ( {"name": "ac_O"}, {"name": "ac_CO"}, {"name": "TOF_CO2"}, ) # Then, we create an ``adaptativeDesignProcedure`` object by calling its constructor, # which needs the input and output variables, the function to calculate the rates # ``get_rates``. Additionally, for convenience, we set the output directory as a # subdirectory of the pyZacros working directory and set the random seed # (``randomState``) to get reproducible results. The last two parameters are optional. # It is also possible to provide several parameters to control the algorithm using # the keyword ``algorithmParams''. But we will get back to that later. adpML = adp.adaptiveDesignProcedure( input_var, output_var, get_rate, outputDir=scm.pyzacros.workdir() + "/adp.results", randomState=10 ) # Now, we begin the calculation by invoking the method ``createTrainingDataAndML()``, # which, as the name implies, generates the training data as well as the surrogate model # (or ML model). The program runs several cycles, and for each cycle, # increases the number of points and calls the "get rate()" function to evaluate them. # When the Relative Approximation Error and the Out-Of-Bag error are less than # ``algorithmParams['RADth']`` (default=10%) and ``algorithmParams['OOBth']`` (default=0.05), # respectively, the calculation converges. See the original ADP documentation for # more details. adpML.createTrainingDataAndML() # If the execution got up to this point, everything worked as expected. Hooray! # # The results are then collected by accessing the ``trainingData`` attribute of the # ``adpML`` object, and they are presented nicely in a table in the lines that follow. x_CO, ac_O, ac_CO, TOF_CO2 = adpML.trainingData.T print("-------------------------------------------------") print("%4s" % "cond", "%8s" % "x_CO", "%10s" % "ac_O", "%10s" % "ac_CO", "%12s" % "TOF_CO2") print("-------------------------------------------------") for i in range(len(x_CO)): print("%4d" % i, "%8.3f" % x_CO[i], "%10.6f" % ac_O[i], "%10.6f" % ac_CO[i], "%12.6f" % TOF_CO2[i]) # The above results are the final aim of the calculation. However, we # can take advantage of python libraries to visualize them. Here, we # use matplotlib. Please check the matplotlib documentation for more # details at [matplotlib](https://matplotlib.org/). The following lines # of code allow visualizing the effect of changing the $CO$ molar # fraction on the average coverage of $O*$ and $CO*$ and the production # rate of $CO_2$. In the Figure, we show the training data generated by # the Zacros model as points and the Surrogate Model's prediction as lines. import matplotlib.pyplot as plt fig = plt.figure() x_CO_model = numpy.linspace(0.2, 0.8, 201) ac_O_model, ac_CO_model, TOF_CO2_model = adpML.predict(x_CO_model).T ax = plt.axes() ax.set_xlabel("Molar Fraction CO", fontsize=14) ax.set_ylabel("Coverage Fraction (%)", color="blue", fontsize=14) ax.plot(x_CO_model, ac_O_model, color="blue", linestyle="-.", lw=2, zorder=1) ax.plot(x_CO, ac_O, marker="$\u25EF$", color="blue", markersize=4, lw=0, zorder=1) ax.plot(x_CO_model, ac_CO_model, color="blue", linestyle="-", lw=2, zorder=2) ax.plot(x_CO, ac_CO, marker="$\u25EF$", color="blue", markersize=4, lw=0, zorder=1) plt.text(0.3, 0.9, "O", fontsize=18, color="blue") plt.text(0.7, 0.9, "CO", fontsize=18, color="blue") ax2 = ax.twinx() ax2.set_ylabel("TOF (mol/s/site)", color="red", fontsize=14) ax2.plot(x_CO_model, TOF_CO2_model, color="red", linestyle="-", lw=2, zorder=0) ax2.plot(x_CO, TOF_CO2, marker="$\u25EF$", color="red", markersize=4, lw=0, zorder=1) plt.text(0.37, 1.5, "CO$_2$", fontsize=18, color="red") plt.show() # We also found three regions involving the two phase transitions, as shown # in the Figure, which are similar to the ones obtained in the example # **Phase Transitions in the ZGB model**. However, as you can see, the # points are not equidistant here. The ADP generated more points for # $x_\text{CO}$ between 0.35 and 0.55, which is likely to represent the # small oscillations that occur there accurately. It is worth noting that # the Surrogate Model has some difficulty reproducing the narrow peak at a # maximum $CO_2$ TOF. Certainly, the model could perform better. Unfortunately, # the only way to do so is by increasing the number of points in the training # set by tuning the ADP parameters' (``algorithmParams``). However, now that # we have the surrogate model, we can quickly obtain the average coverage for # $O*$ and $CO*$ and the $CO_2$ TOF for any $CO$ molar fraction. # Now, we can close the pyZacros environment: scm.pyzacros.finish() # As a final note, you don't have to recalculate any training points if you load the # surrogate model directly from the disk. As a result, you can apply it to any other # situation, such as e.g., a simulation of reactive computational fluid dynamics (rCFD). # The following code creates the image above exactly, but it starts by reading the # surrogate model from the directory ``plams_workdir/adp.results/``. It can be used # in a separate script. import numpy as np import adaptiveDesignProcedure as adp import matplotlib.pyplot as plt path = "plams_workdir/adp.results/" x_CO_model = np.linspace(0.2, 0.8, 201) ac_O_model, ac_CO_model, TOF_CO2_model = adp.predict(x_CO_model, path).T ax = plt.axes() ax.set_xlabel("Molar Fraction CO", fontsize=14) ax.set_ylabel("Coverage Fraction (%)", color="blue", fontsize=14) ax.plot(x_CO_model, ac_O_model, color="blue", linestyle="-.", lw=2, zorder=1) ax.plot(x_CO_model, ac_CO_model, color="blue", linestyle="-", lw=2, zorder=2) plt.text(0.3, 0.9, "O", fontsize=18, color="blue") plt.text(0.7, 0.9, "CO", fontsize=18, color="blue") ax2 = ax.twinx() ax2.set_ylabel("TOF (mol/s/site)", color="red", fontsize=14) ax2.plot(x_CO_model, TOF_CO2_model, color="red", linestyle="-", lw=2, zorder=0) plt.text(0.37, 1.5, "CO$_2$", fontsize=18, color="red") plt.show() # By default, the Surrogate Model's file is ``ml_ExtraTrees_forCFD.pkl``. The option # ``forestFile`` in the ADP constructor allows you to alter the prefix ``ml_ExtraTrees``. # In the ``adp.predict()`` method, you can provide the complete path to this file, but if # a directory is supplied instead, it will try to discover the proper file inside, # as shown in the lines of code above.