{ "cells": [ { "cell_type": "markdown", "id": "31139769-1daf-4765-803f-321d2eb342d5", "metadata": {}, "source": [ "## Initial imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "f31e99f3-bde3-41b7-abde-54f3b6a3e361", "metadata": {}, "outputs": [], "source": [ "import pyCRS\n", "import matplotlib.pyplot as plt\n", "from rdkit import Chem\n", "from rdkit.Chem.Draw import IPythonConsole\n", "\n", "IPythonConsole.ipython_useSVG = True\n", "IPythonConsole.molSize = 150, 150" ] }, { "cell_type": "markdown", "id": "4cde646d-77e3-498f-ae62-e280172f2525", "metadata": {}, "source": [ "## Property prediction from SMILES (ethyl acetate)" ] }, { "cell_type": "code", "execution_count": 2, "id": "a30a9c33-ce52-44cf-a113-214ac16cc8b9", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAACt0lEQVR4nO3dyXLCMBAAUSuV//9l5eCKwx6jbdSi+wQnG55H2EBByjlvRu4regesNgnxSYhPQnwS4pMQn4T4JMQnIT4J8UmIT0J8EuKTEJ+E+CTEJyE+CfFJiE9CfBLikxCfhPgkxCchPgnxSYhPQnwS4pMQn4T4JMQnIT4J8UmIT0J8EuKTEJ+E+CTEJyE+CfFJiE9CfBLikxDfd/QOLFFKf7eH/ziohNWldMV2c7d/LqT4JMQnIT4J8UmIzzPS6nL2ooJf6B9FuJDWdTl/QUnYuuGoEuKTsKLh76U9TEJ8EuKTEJ+ETYt4dZQQn4SlzXE6ukm4QBLik7Cw+PdGf5OwZSGuEuKTsKSU0jx/Ji9hs6Jc5yJMKaUJPkRlNcsXL3a5/Si+vG3/Fkx4zNwl2AGp4pliCB/K3ZRzdhzPNJrwLZU5x3G2/RlEeGbsnrWP41TP2n2Be9idsMli6KL6ol6ENWP3sDkX1RlqT9h1XPL+7fdQxeOhTbIwNDuom4/d643tW+q+oafbv7qKjVWs3fxQubttD1Z89mBjx7GccIplZIjiycM0ahxLtjoF3lFPxXcfaYjiEid4zV8aU9q2LRUdpuOP7yUI95qMY6OjYeQ4LkS41Sm2HuVhimsRFnR8PNnheRizqH4w4aiLy97j+JGEw98Z6Kq4NGHoD1Hc1E9xXcLoX7e7r9NL41xff1q7nPPxqVnDJBxdc0UJA9oVW0FKGFPDRVXCyJoorntGus11UfGiyjPVpQlRFV84upDOUvGiKuFEOYUfmoT4JMQnIT4J8UmIT0J8EuKTEJ+E+CTEJyE+CfFJiE9CfBLikxCfhPgkxCchPgnxSYhPQnwS4pMQn4T4JMQnIT4J8UmIT0J8EuKTEJ+E+CTEJyE+CfFJiE9CfBLikxCfhPgkxCchPgnxSYhPQnw/xXYWAm6Y2C0AAAAASUVORK5CYII=\n", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "O\n", "O\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# smiles = 'CCO' # ethanol\n", "smiles = \"O=C(OCC)C\" # ethyl acetate\n", "rdkit_mol = Chem.MolFromSmiles(smiles)\n", "rdkit_mol # show the molecule in a Jupyter notebook" ] }, { "cell_type": "markdown", "id": "5e2885e9-b22c-43db-93ff-909be9138bb8", "metadata": {}, "source": [ "### Temperature-independent properties" ] }, { "cell_type": "code", "execution_count": 3, "id": "c26688f6-8254-4e7f-b7cb-4660d76be837", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SMILES: O=C(OCC)C\n", "\n", "boilingpoint : 339.131 K\n", "criticalpressure : 38.243 bar\n", "criticaltemp : 544.189 K\n", "criticalvol : 0.271 L/mol\n", "density : 0.894 kg/L (298.15 K)\n", "dielectricconstant : 6.834 \n", "entropygas : 382.780 J/(mol K)\n", "flashpoint : 265.005 K\n", "gidealgas : -323.540 kJ/mol\n", "hcombust : -2075.882 kJ/mol\n", "hformstd : -462.595 kJ/mol\n", "hfusion : 11.717 kJ/mol\n", "hidealgas : -437.815 kJ/mol\n", "hsublimation : 55.394 kJ/mol\n", "meltingpoint : 179.420 K\n", "molarvol : 0.098 L/mol\n", "parachor : 215.764 \n", "solubilityparam : 9.069 √(cal/cm^3)\n", "synacc : 1.756 \n", "tpt : 178.434 K\n", "vdwarea : 129.168 Ų\n", "vdwvol : 89.171 ų\n" ] } ], "source": [ "print(f\"SMILES: {smiles}\\n\")\n", "mol = pyCRS.Input.read_smiles(smiles)\n", "\n", "temperatures = [298.15, 308.15, 318.15, 328.15, 338.15]\n", "pyCRS.PropPred.estimate(mol, temperatures=temperatures)\n", "\n", "for prop, value in mol.properties.items():\n", " unit = pyCRS.PropPred.units[prop]\n", " print(f\"{prop:<20s}: {value:.3f} {unit}\")" ] }, { "cell_type": "markdown", "id": "42fe82f4-3c76-45b9-99a9-9992462587fd", "metadata": {}, "source": [ "### Temperature-dependent properties (vapor pressure)" ] }, { "cell_type": "code", "execution_count": 4, "id": "52cd71c8-a973-4a2a-9022-45b85f01ddf5", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "prop = \"vaporpressure\"\n", "unit = pyCRS.PropPred.units[prop]\n", "temperatures_K, vaporpressures = mol.get_tdep_values(prop)\n", "temperatures_C = [t - 273.15 for t in temperatures_K] # convert to Celsius\n", "\n", "plt.figure(figsize=(3, 3))\n", "plt.plot(temperatures_C, vaporpressures)\n", "plt.plot(temperatures_C, vaporpressures, \".\")\n", "plt.xlabel(\"Temperature (degree Celsius)\")\n", "plt.title(f\"SMILES: {smiles}\")\n", "plt.ylabel(f\"{prop} [{unit}]\");" ] }, { "cell_type": "markdown", "id": "54618cc8-2fc0-4481-b733-edee5fe2172f", "metadata": {}, "source": [ "## Create .csv for multiple compounds\n", "\n", "Define a list of compounds by their SMILES strings. This example also shows how to only calculate a subset of all properties.\n", "\n", "Note: The SMILES string 'C' corresponds to methane which is too small to be used with the property prediction tool, so the results are given as 'nan' (not a number)." ] }, { "cell_type": "code", "execution_count": 5, "id": "0b85d972-3604-4510-8d3b-ad772c9b40ec", "metadata": {}, "outputs": [], "source": [ "smiles_list = [\n", " \"CCO\",\n", " \"CCOC\",\n", " \"OCCCN\",\n", " \"C\", # methane is too small to be used with property prediction and will return \"nan\"\n", " \"C1=CC=C(C=C1)COCC2=CC=CC=C2\",\n", "]\n", "temperatures = list(range(280, 340, 10))\n", "\n", "mols = [pyCRS.Input.read_smiles(s) for s in smiles_list]\n", "\n", "properties = [\"boilingpoint\", \"criticaltemp\", \"hformstd\"]\n", "\n", "for mol in mols:\n", " pyCRS.PropPred.estimate(mol, properties, temperatures=temperatures)" ] }, { "cell_type": "code", "execution_count": 6, "id": "ac4fb39a-ed79-4b7b-942a-1f0063cff42e", "metadata": {}, "outputs": [], "source": [ "def get_csv(mols, properties):\n", " header = \"SMILES\"\n", " for prop in properties:\n", " unit = pyCRS.PropPred.units[prop]\n", " if unit:\n", " unit = f\" [{unit}]\"\n", " else:\n", " unit = \"\"\n", "\n", " header += f\",{prop}{unit}\"\n", " ret = header + \"\\n\"\n", "\n", " for mol in mols:\n", " s = f\"{mol.smiles}\"\n", " for prop in properties:\n", " value = mol.properties.get(prop, \"\")\n", " try:\n", " s += f\",{value:.4f}\"\n", " except TypeError:\n", " s += f\",{value}\"\n", " s += \"\\n\"\n", " ret += s\n", " return ret" ] }, { "cell_type": "code", "execution_count": 7, "id": "be9db892-4183-4006-a388-e312b095bfe6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SMILES,boilingpoint [K],criticaltemp [K],hformstd [kJ/mol]\n", "CCO,337.6259,442.0008,-271.9478\n", "CCOC,296.4144,425.7282,-213.6494\n", "OCCCN,432.7489,541.4748,-290.7921\n", "C,nan,nan,nan\n", "C1=CC=C(C=C1)COCC2=CC=CC=C2,558.7903,661.7012,-167.0454\n", "\n" ] } ], "source": [ "csv = get_csv(mols, properties)\n", "print(csv)\n", "\n", "# To write to a .csv file:\n", "# with open('outputfile.csv', 'w') as f:\n", "# f.write(csv)" ] }, { "cell_type": "markdown", "id": "8abe2182-f759-494d-a6c0-3a705e525af8", "metadata": {}, "source": [ "### Bar chart for multiple compounds\n", "\n", "Continuing from the previous example, you can also create e.g. a bar chart with the boiling points:" ] }, { "cell_type": "code", "execution_count": 8, "id": "0372299c-0074-456f-95ec-0ccff98bee7d", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "prop = \"boilingpoint\"\n", "values = [mol.properties.get(prop, None) for mol in mols]\n", "plt.barh(smiles_list, values)\n", "plt.title(\"Boiling point [K]\");" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }