From 56adbcb6f437b82d0ed18376066d0d80fb7898ac Mon Sep 17 00:00:00 2001 From: Marius Ciepluch <11855163+norandom@users.noreply.github.com> Date: Wed, 31 Jul 2024 19:25:57 +0200 Subject: [PATCH] added better model flows, solved issues w pca and scalers --- ...AE_sysmon_dataset_(Excel_implant_C2).ipynb | 8400 ++++++++++++++--- 1 file changed, 7271 insertions(+), 1129 deletions(-) diff --git a/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb b/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb index 023a117..11c199e 100644 --- a/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb +++ b/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb @@ -17,7 +17,7 @@ "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { - "13c7730f61b24661bca4b4406f488fbb": { + "a336f820e5124f5d9e344bb8b0d12ee5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -32,14 +32,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_82838b6e6693488682c97ce82ed723bf", - "IPY_MODEL_41b566d613234e03876a7c6a68431d7b", - "IPY_MODEL_dd83f610ca584acbba3738f2c137b2b9" + "IPY_MODEL_150475d8c96148eaa9d5230b85283e64", + "IPY_MODEL_19f3e10b3dd94c71b9add7ee1d8f4c13", + "IPY_MODEL_5aa33fe21abc482c997975ec5712d0da" ], - "layout": "IPY_MODEL_ad4f22541b284758b868c95e5e8ceff0" + "layout": "IPY_MODEL_0b51be1ad43c43fa89041fea2b062768" } }, - "82838b6e6693488682c97ce82ed723bf": { + "150475d8c96148eaa9d5230b85283e64": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -54,697 +54,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_13d9c546446a482ab6634e2c69c70685", + "layout": "IPY_MODEL_5ab992330571497ab2a07cd04841b9d3", "placeholder": "​", - "style": "IPY_MODEL_10c31ef1ded9471cae583be49e7092e5", - "value": "100%" - } - }, - "41b566d613234e03876a7c6a68431d7b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c1ad9d79f8464e71bd79ae848c103a58", - "max": 8039037, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_0de9cbe7986c49fb99ae4f3abdba42d9", - "value": 8039037 - } - }, - "dd83f610ca584acbba3738f2c137b2b9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6b12aceb2e4c46eeb708d0877a9ca639", - "placeholder": "​", - "style": "IPY_MODEL_b6edc45065c7488a8b8be5e1349b27ba", - "value": " 8.04M/8.04M [00:00<00:00, 9.88MiB/s]" - } - }, - "ad4f22541b284758b868c95e5e8ceff0": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "13d9c546446a482ab6634e2c69c70685": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "10c31ef1ded9471cae583be49e7092e5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c1ad9d79f8464e71bd79ae848c103a58": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0de9cbe7986c49fb99ae4f3abdba42d9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6b12aceb2e4c46eeb708d0877a9ca639": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b6edc45065c7488a8b8be5e1349b27ba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6d9d173f139a43e3a6b0b1deb2fb557b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_34569214f37b44e0802ddd403a4fa1a9", - "IPY_MODEL_2bdfcb2139bb4ae4b670c1fb49db6911", - "IPY_MODEL_b27ff271d01a49e38a32cd3ddcebe77a" - ], - "layout": "IPY_MODEL_92e050af8c674152b4d279fdda960458" - } - }, - "34569214f37b44e0802ddd403a4fa1a9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6f00da7475754dad8d5d5ce800ae673d", - "placeholder": "​", - "style": "IPY_MODEL_1a9b0c9e9cb941fbb57529d10d054084", - "value": "100%" - } - }, - "2bdfcb2139bb4ae4b670c1fb49db6911": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_545d68c2b8b742da9dd7983a3321ebf1", - "max": 1716416, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8956f45722f841dab504335dcc1921bc", - "value": 1716416 - } - }, - "b27ff271d01a49e38a32cd3ddcebe77a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1785191337814317989989f0800e1424", - "placeholder": "​", - "style": "IPY_MODEL_d25bea1e539c4d328f05ded6af8b8154", - "value": " 1.72M/1.72M [00:00<00:00, 9.13MiB/s]" - } - }, - "92e050af8c674152b4d279fdda960458": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6f00da7475754dad8d5d5ce800ae673d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1a9b0c9e9cb941fbb57529d10d054084": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "545d68c2b8b742da9dd7983a3321ebf1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8956f45722f841dab504335dcc1921bc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "1785191337814317989989f0800e1424": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d25bea1e539c4d328f05ded6af8b8154": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "bcf5338c159d4d53bc3e39e717885292": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_7276e9b8b79d41ab991296d44521e2ee", - "IPY_MODEL_f2ab86c6413649a595edf4b8feeb6a66", - "IPY_MODEL_decc923894ab4ef4b35fa627b1b2dfb4" - ], - "layout": "IPY_MODEL_9875e07950114beca6741a51962c1f30" - } - }, - "7276e9b8b79d41ab991296d44521e2ee": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_90db1bae92c94a49ab8bc9c12073e552", - "placeholder": "​", - "style": "IPY_MODEL_92a42873a8ef4c00ad24ef47a62b4093", + "style": "IPY_MODEL_4a8897c90c19427fa82afa4fa62f31da", "value": "Optimization Progress: 100%" } }, - "f2ab86c6413649a595edf4b8feeb6a66": { + "19f3e10b3dd94c71b9add7ee1d8f4c13": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -760,15 +76,15 @@ "bar_style": "", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_1826f0b9003a4f8bb1c074f526560c86", + "layout": "IPY_MODEL_7df01173ffee4067b1c7490fab0b571b", "max": 120, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_549831432f244f5080c74eb842bae875", + "style": "IPY_MODEL_6bd42f421a4e4e60b2d7edc8784e07aa", "value": 120 } }, - "decc923894ab4ef4b35fa627b1b2dfb4": { + "5aa33fe21abc482c997975ec5712d0da": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -783,13 +99,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_20be31d5be524d76bc2dbb3baad4edf3", + "layout": "IPY_MODEL_09932ad0a1fe4fd196b38b16dc74f94a", "placeholder": "​", - "style": "IPY_MODEL_fc71d71e57774cb1a0df69e82d3e7c2d", - "value": " 120/120 [02:00<00:00,  1.16pipeline/s]" + "style": "IPY_MODEL_f7354d5ee6e04705968e30d7a43ebbe7", + "value": " 120/120 [01:53<00:00,  1.21pipeline/s]" } }, - "9875e07950114beca6741a51962c1f30": { + "0b51be1ad43c43fa89041fea2b062768": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -841,7 +157,7 @@ "width": null } }, - "90db1bae92c94a49ab8bc9c12073e552": { + "5ab992330571497ab2a07cd04841b9d3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -893,7 +209,7 @@ "width": null } }, - "92a42873a8ef4c00ad24ef47a62b4093": { + "4a8897c90c19427fa82afa4fa62f31da": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -908,7 +224,7 @@ "description_width": "" } }, - "1826f0b9003a4f8bb1c074f526560c86": { + "7df01173ffee4067b1c7490fab0b571b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -960,7 +276,7 @@ "width": null } }, - "549831432f244f5080c74eb842bae875": { + "6bd42f421a4e4e60b2d7edc8784e07aa": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -976,7 +292,7 @@ "description_width": "" } }, - "20be31d5be524d76bc2dbb3baad4edf3": { + "09932ad0a1fe4fd196b38b16dc74f94a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -1028,7 +344,3085 @@ "width": null } }, - "fc71d71e57774cb1a0df69e82d3e7c2d": { + "f7354d5ee6e04705968e30d7a43ebbe7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "feb69d6d248b4e2abf52d6622dd3638b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_712ff38a74994c5082fe487de437ec67", + "IPY_MODEL_81a29faa64d44f2c9a173a9213618f4c", + "IPY_MODEL_35012b6363cf4ca88c4c5221d264ec25" + ], + "layout": "IPY_MODEL_598537621c514f51be27ebaf9bde7d54" + } + }, + "712ff38a74994c5082fe487de437ec67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_825b3f67b44b4136a36909da95412031", + "placeholder": "​", + "style": "IPY_MODEL_8b121673d208412ab1f426a3ff1698ba", + "value": "Optimization Progress: 100%" + } + }, + "81a29faa64d44f2c9a173a9213618f4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_02a6753498b14c459016b7ff31c4b6d7", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_384c150f10e94ba78243d1a260e66887", + "value": 120 + } + }, + "35012b6363cf4ca88c4c5221d264ec25": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bb1d1e093dec47f393c713548bfc6cc6", + "placeholder": "​", + "style": "IPY_MODEL_05b2a6fb28aa434a94c18c7d64cd55f1", + "value": " 120/120 [06:21<00:00,  1.55s/pipeline]" + } + }, + "598537621c514f51be27ebaf9bde7d54": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "825b3f67b44b4136a36909da95412031": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b121673d208412ab1f426a3ff1698ba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "02a6753498b14c459016b7ff31c4b6d7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "384c150f10e94ba78243d1a260e66887": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bb1d1e093dec47f393c713548bfc6cc6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "05b2a6fb28aa434a94c18c7d64cd55f1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a2f43127d721436185ba241b84cc87d7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3618c2d6264c4359a17bbdecc7db70d8", + "IPY_MODEL_286cc95f7d6c47a6863c7fda02f83d3f", + "IPY_MODEL_cabaca33a5cc4d0fb9fa99f42ece4167" + ], + "layout": "IPY_MODEL_153d243e01b24b5cad305ec4d886b922" + } + }, + "3618c2d6264c4359a17bbdecc7db70d8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e8bf60db6d1b47b2a6752fcceebad0e7", + "placeholder": "​", + "style": "IPY_MODEL_600f0f18ccaf4c89aa37feaa0b360f13", + "value": "Optimization Progress: 100%" + } + }, + "286cc95f7d6c47a6863c7fda02f83d3f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33f5284de07e4be29e4f39d5aaaa2d9b", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4a850f0d51014dfa9b55c86d04707b24", + "value": 120 + } + }, + "cabaca33a5cc4d0fb9fa99f42ece4167": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ae8a589b32c44836894c6d3b708b896d", + "placeholder": "​", + "style": "IPY_MODEL_a57766432bb54d24a3869e0553c026d1", + "value": " 120/120 [05:13<00:00,  1.83s/pipeline]" + } + }, + "153d243e01b24b5cad305ec4d886b922": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "e8bf60db6d1b47b2a6752fcceebad0e7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "600f0f18ccaf4c89aa37feaa0b360f13": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "33f5284de07e4be29e4f39d5aaaa2d9b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4a850f0d51014dfa9b55c86d04707b24": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ae8a589b32c44836894c6d3b708b896d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a57766432bb54d24a3869e0553c026d1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2cf8dcb41b3340cea65c251b3d8681d4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6e3a5bb9d6d7456982660f0edcc3b8c0", + "IPY_MODEL_0bb2408ba21a481983c0a74f1411ef0a", + "IPY_MODEL_4528e59b16d04228a5cc508fadb56a54" + ], + "layout": "IPY_MODEL_61072176986f4c9fb48f403c25ac3024" + } + }, + "6e3a5bb9d6d7456982660f0edcc3b8c0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9881b9f569b3445897972c0340870318", + "placeholder": "​", + "style": "IPY_MODEL_f09971fa706c4509b3530edabeb47864", + "value": "Optimization Progress: 100%" + } + }, + "0bb2408ba21a481983c0a74f1411ef0a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7510b9c108494e3dbe5ee031bf367e2b", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2ed7ace57f89415fba714200fe44c2f9", + "value": 120 + } + }, + "4528e59b16d04228a5cc508fadb56a54": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bb51c5c7834c419a87a30752cd769d91", + "placeholder": "​", + "style": "IPY_MODEL_cc9bf86d64fc48f490a38fae5483ddb2", + "value": " 120/120 [07:12<00:00,  2.45s/pipeline]" + } + }, + "61072176986f4c9fb48f403c25ac3024": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "9881b9f569b3445897972c0340870318": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f09971fa706c4509b3530edabeb47864": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7510b9c108494e3dbe5ee031bf367e2b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2ed7ace57f89415fba714200fe44c2f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bb51c5c7834c419a87a30752cd769d91": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cc9bf86d64fc48f490a38fae5483ddb2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ed38c4b5280a4e0da6b3e979ca380db5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_045cf04f33a643e8816a674d659cc252", + "IPY_MODEL_c5b0b6bebd0b49e0b10116770251023e", + "IPY_MODEL_c42b63c849f7469496fa292cc53efae3" + ], + "layout": "IPY_MODEL_a336fafd8e334185a39ca81268ca9f90" + } + }, + "045cf04f33a643e8816a674d659cc252": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_007b3fd39d1c480c959db4ed7abb37fe", + "placeholder": "​", + "style": "IPY_MODEL_5247fe5ea6724b15934a7142b8a8a8b1", + "value": "Optimization Progress: 100%" + } + }, + "c5b0b6bebd0b49e0b10116770251023e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ff008c746112461781825f85f212dd81", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c290371939ed4a8999c1411b426448ed", + "value": 120 + } + }, + "c42b63c849f7469496fa292cc53efae3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_09e3fa8828024d1cab95af2452cbf516", + "placeholder": "​", + "style": "IPY_MODEL_b1f5ea5f99b249dc9c8d2b26f43485f0", + "value": " 120/120 [04:12<00:00,  1.85s/pipeline]" + } + }, + "a336fafd8e334185a39ca81268ca9f90": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "007b3fd39d1c480c959db4ed7abb37fe": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5247fe5ea6724b15934a7142b8a8a8b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ff008c746112461781825f85f212dd81": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c290371939ed4a8999c1411b426448ed": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "09e3fa8828024d1cab95af2452cbf516": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b1f5ea5f99b249dc9c8d2b26f43485f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b41836beb11a4623b7fa6091494c0e85": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d1fb8e94033f47d3b7123c513473519d", + "IPY_MODEL_0f90f79a7f0d4bcbaa2ada2379b65f53", + "IPY_MODEL_dedf553fc5c24b14a86a779a61a39be5" + ], + "layout": "IPY_MODEL_6566843282bc4bb59c7044ab3b812f40" + } + }, + "d1fb8e94033f47d3b7123c513473519d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fcefb4debd9a4445a94a9fc5847662e4", + "placeholder": "​", + "style": "IPY_MODEL_bbe6750092e44b2aafc5afb33e71ae88", + "value": "Optimization Progress: 100%" + } + }, + "0f90f79a7f0d4bcbaa2ada2379b65f53": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_50769ac7392e4bccaf83e32f8e5e3754", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f202232d07f946ccaf899e672c2b7dd7", + "value": 120 + } + }, + "dedf553fc5c24b14a86a779a61a39be5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_111571775b67440db61746dc6a653380", + "placeholder": "​", + "style": "IPY_MODEL_1fc51b4708134f18995b5dd73631446d", + "value": " 120/120 [05:59<00:00,  2.15s/pipeline]" + } + }, + "6566843282bc4bb59c7044ab3b812f40": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "fcefb4debd9a4445a94a9fc5847662e4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bbe6750092e44b2aafc5afb33e71ae88": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "50769ac7392e4bccaf83e32f8e5e3754": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f202232d07f946ccaf899e672c2b7dd7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "111571775b67440db61746dc6a653380": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1fc51b4708134f18995b5dd73631446d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f466f42deeac4321b8b9768a11d6a755": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_06b74e83419b4d2b94913aeb490a7cb0", + "IPY_MODEL_de16524e0e5d46f9a8b511fa82e748a8", + "IPY_MODEL_e13204ee59ab400280347f23c0d31705" + ], + "layout": "IPY_MODEL_ad599c9ce9b04059b37c94dcae4b23a8" + } + }, + "06b74e83419b4d2b94913aeb490a7cb0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aeefdbc3453543baa3d6e72ccd15ee54", + "placeholder": "​", + "style": "IPY_MODEL_3a84a44443a944c399486902d4db6bdc", + "value": "Optimization Progress: 100%" + } + }, + "de16524e0e5d46f9a8b511fa82e748a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2e55913ae8944ef9923724a8351bacb4", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dcc3e817b40643388a5f99a54fe13e43", + "value": 120 + } + }, + "e13204ee59ab400280347f23c0d31705": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_272dc71a7ccf4608bfdea6a17524a886", + "placeholder": "​", + "style": "IPY_MODEL_5a3d943f85e34887aeb64a0e6b189f32", + "value": " 120/120 [09:53<00:00,  1.47s/pipeline]" + } + }, + "ad599c9ce9b04059b37c94dcae4b23a8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "aeefdbc3453543baa3d6e72ccd15ee54": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3a84a44443a944c399486902d4db6bdc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2e55913ae8944ef9923724a8351bacb4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dcc3e817b40643388a5f99a54fe13e43": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "272dc71a7ccf4608bfdea6a17524a886": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5a3d943f85e34887aeb64a0e6b189f32": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "065f516618c041f1ba57df39d17cc95d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ca2767bb06444f03a1a31dd9e5216c45", + "IPY_MODEL_1d9710db98c04adeb3855f4062301e24", + "IPY_MODEL_57141f99793f45708db3961cf4178fc9" + ], + "layout": "IPY_MODEL_6c4c02ca0cb64050ac7e201a1a60be72" + } + }, + "ca2767bb06444f03a1a31dd9e5216c45": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b6b5382bb2684e74a2c36b49f423eee3", + "placeholder": "​", + "style": "IPY_MODEL_a4a4a02a4332458789866a0e0e7839d3", + "value": "Optimization Progress: 100%" + } + }, + "1d9710db98c04adeb3855f4062301e24": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_871438bb5a0d4d31b3b8a0f18fcdac78", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_294ffc18963140ea96c01095e3a1300f", + "value": 120 + } + }, + "57141f99793f45708db3961cf4178fc9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6dda83426ca749bd822d2bd0d39744ed", + "placeholder": "​", + "style": "IPY_MODEL_d37293f1d67f45babbe2a875df701c40", + "value": " 120/120 [04:43<00:00,  5.74s/pipeline]" + } + }, + "6c4c02ca0cb64050ac7e201a1a60be72": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "b6b5382bb2684e74a2c36b49f423eee3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a4a4a02a4332458789866a0e0e7839d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "871438bb5a0d4d31b3b8a0f18fcdac78": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "294ffc18963140ea96c01095e3a1300f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6dda83426ca749bd822d2bd0d39744ed": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d37293f1d67f45babbe2a875df701c40": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2ae5d11987c44076b95c1294af800c28": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_75c27d61248b477e9c58211b066ab62f", + "IPY_MODEL_00628b308f4244c4ad533a67cb90ba43", + "IPY_MODEL_851e8257d678458db2612a04803e8c17" + ], + "layout": "IPY_MODEL_8f472688c98745f28345f31e852b361b" + } + }, + "75c27d61248b477e9c58211b066ab62f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f1b8071a183e47af81e8d96290ca7370", + "placeholder": "​", + "style": "IPY_MODEL_18d1acb1cdad46f1832c98ab44f0bc53", + "value": "Optimization Progress: 100%" + } + }, + "00628b308f4244c4ad533a67cb90ba43": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_21abac8483874bb1acdc45f6e8f78b53", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b4e4db272e35407899de20c7b30d1551", + "value": 120 + } + }, + "851e8257d678458db2612a04803e8c17": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9d4143827e3f4e949cde68384a7beb59", + "placeholder": "​", + "style": "IPY_MODEL_aa805872f24140f6aa6ee2ac72b34b59", + "value": " 120/120 [04:12<00:00,  1.73pipeline/s]" + } + }, + "8f472688c98745f28345f31e852b361b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "f1b8071a183e47af81e8d96290ca7370": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "18d1acb1cdad46f1832c98ab44f0bc53": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "21abac8483874bb1acdc45f6e8f78b53": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b4e4db272e35407899de20c7b30d1551": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9d4143827e3f4e949cde68384a7beb59": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa805872f24140f6aa6ee2ac72b34b59": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ed028f92fe6544159b14dcf220623223": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2ff14125384a4f3cad97a03f9d2ff55f", + "IPY_MODEL_d70eced834c64f399a3db48dcc19f802", + "IPY_MODEL_b85cfd236fbc4c50b2572f0ff7ddea1c" + ], + "layout": "IPY_MODEL_b4e51caccd1e40baac5143667206d8cb" + } + }, + "2ff14125384a4f3cad97a03f9d2ff55f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8031586f1f8d4dbbaf44708b12f4eb13", + "placeholder": "​", + "style": "IPY_MODEL_a2278a2947354727a76758cb4bf5e682", + "value": "Optimization Progress: 100%" + } + }, + "d70eced834c64f399a3db48dcc19f802": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a2debbdff91649f29c7a579911090df4", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7d4b85786ff04c9d843753c91c80f854", + "value": 120 + } + }, + "b85cfd236fbc4c50b2572f0ff7ddea1c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb681559245a4d12bd4551bcfea063c4", + "placeholder": "​", + "style": "IPY_MODEL_0967c0fe1d9a4048b4e7b4266a75d893", + "value": " 120/120 [05:41<00:00,  2.27s/pipeline]" + } + }, + "b4e51caccd1e40baac5143667206d8cb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "8031586f1f8d4dbbaf44708b12f4eb13": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a2278a2947354727a76758cb4bf5e682": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a2debbdff91649f29c7a579911090df4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d4b85786ff04c9d843753c91c80f854": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cb681559245a4d12bd4551bcfea063c4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0967c0fe1d9a4048b4e7b4266a75d893": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -2752,6 +5146,1374 @@ "_view_name": "StyleView", "description_width": "" } + }, + "d35be0ff26cc4d0e85b498638ec728da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6c3982ed895846f2a07befc28277debd", + "IPY_MODEL_a031633d17474670a3055d94249d58ee", + "IPY_MODEL_08d30171bbb44668aea730cc7e2e828a" + ], + "layout": "IPY_MODEL_bda89cfac064435b8de02f41e689e0a3" + } + }, + "6c3982ed895846f2a07befc28277debd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_603fa104247d4ae8888440acca48221e", + "placeholder": "​", + "style": "IPY_MODEL_bf6290dc87bd447da28102899e57ffe5", + "value": "100%" + } + }, + "a031633d17474670a3055d94249d58ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fe8beb6587574f93a2947c2bcc907091", + "max": 8039037, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3b5d864ced1149c4979b491cbc3070f7", + "value": 8039037 + } + }, + "08d30171bbb44668aea730cc7e2e828a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e2306dc620194ce485bd2663c7d25738", + "placeholder": "​", + "style": "IPY_MODEL_329389fea5f74dca965f296707c2a8d0", + "value": " 8.04M/8.04M [00:00<00:00, 29.6MiB/s]" + } + }, + "bda89cfac064435b8de02f41e689e0a3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "603fa104247d4ae8888440acca48221e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf6290dc87bd447da28102899e57ffe5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fe8beb6587574f93a2947c2bcc907091": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3b5d864ced1149c4979b491cbc3070f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e2306dc620194ce485bd2663c7d25738": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "329389fea5f74dca965f296707c2a8d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9d26aed57a8244ada934039dd928555c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b04abee189594c62b235e5076deafc84", + "IPY_MODEL_ff0cc2d2cf8f4009bd200fd17d7ba517", + "IPY_MODEL_49f821f89aa44034af5988b888734185" + ], + "layout": "IPY_MODEL_f10f65b1cb0547928b1c7b87c6385de4" + } + }, + "b04abee189594c62b235e5076deafc84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bdeb6c6df5c049a69c0b613a42a69e35", + "placeholder": "​", + "style": "IPY_MODEL_565b8a229f66455886175a51a975878d", + "value": "100%" + } + }, + "ff0cc2d2cf8f4009bd200fd17d7ba517": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3491f280a58046518c5c28686cd80eb5", + "max": 1716416, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ef3de5b665104edcab59a1200259b897", + "value": 1716416 + } + }, + "49f821f89aa44034af5988b888734185": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ed1d6f1156f748f8993dda1ee97f5caf", + "placeholder": "​", + "style": "IPY_MODEL_e84db532db0643fd81ff21364b3b2e53", + "value": " 1.72M/1.72M [00:00<00:00, 29.0MiB/s]" + } + }, + "f10f65b1cb0547928b1c7b87c6385de4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bdeb6c6df5c049a69c0b613a42a69e35": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "565b8a229f66455886175a51a975878d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3491f280a58046518c5c28686cd80eb5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ef3de5b665104edcab59a1200259b897": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ed1d6f1156f748f8993dda1ee97f5caf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e84db532db0643fd81ff21364b3b2e53": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "48125358d93747af8d60adf49cb4bed3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ce66e5dab6a7433498679f44d7922fde", + "IPY_MODEL_037cca2c146a429498219af57498f92d", + "IPY_MODEL_a4dd416d3a3f400a958713c9e1338830" + ], + "layout": "IPY_MODEL_c98d82d69aa5436e86a00b3b24c71dc8" + } + }, + "ce66e5dab6a7433498679f44d7922fde": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e842df16ae04f7c9fa0500251ea6070", + "placeholder": "​", + "style": "IPY_MODEL_125527f8c490490f93c88276e8da441e", + "value": "100%" + } + }, + "037cca2c146a429498219af57498f92d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_38ea9e2fdf934b299dbf47b478f454cd", + "max": 3288938, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_25123bc15acb4025a468103612181d4b", + "value": 3288938 + } + }, + "a4dd416d3a3f400a958713c9e1338830": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_38eadb841c66408ca51a85054a2da63c", + "placeholder": "​", + "style": "IPY_MODEL_283ef3c448dd4fb0baaa1621123a5ced", + "value": " 3.29M/3.29M [00:00<00:00, 9.27MiB/s]" + } + }, + "c98d82d69aa5436e86a00b3b24c71dc8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3e842df16ae04f7c9fa0500251ea6070": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "125527f8c490490f93c88276e8da441e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "38ea9e2fdf934b299dbf47b478f454cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "25123bc15acb4025a468103612181d4b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "38eadb841c66408ca51a85054a2da63c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "283ef3c448dd4fb0baaa1621123a5ced": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0ee9d77e31c343d2b226c9e1c370899a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_cefbf3ec854e463eac3047d7f2366a9b", + "IPY_MODEL_5d5a04539517439c9a47f725bbfca641", + "IPY_MODEL_487df5716a1f42b9ad08a05300882448" + ], + "layout": "IPY_MODEL_f6f95cf2eb1f43cdbb1be5a1ffc19621" + } + }, + "cefbf3ec854e463eac3047d7f2366a9b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a8b2545d7d94490b8df9d8a63d7fde2d", + "placeholder": "​", + "style": "IPY_MODEL_18b7438e17ef4851aa9c8d0dfc52f2d5", + "value": "Optimization Progress: 100%" + } + }, + "5d5a04539517439c9a47f725bbfca641": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_939ebe26aaf74df78c81c255aefcc7e5", + "max": 20, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c5677148554a4e65b74ae9fc95ae4c37", + "value": 20 + } + }, + "487df5716a1f42b9ad08a05300882448": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_328be8dce5cf4b2a8418e7693f1829c0", + "placeholder": "​", + "style": "IPY_MODEL_48d9d503aebd44578cb545fdb4b28776", + "value": " 40/40 [02:13<00:00,  2.60s/pipeline]" + } + }, + "f6f95cf2eb1f43cdbb1be5a1ffc19621": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "a8b2545d7d94490b8df9d8a63d7fde2d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "18b7438e17ef4851aa9c8d0dfc52f2d5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "939ebe26aaf74df78c81c255aefcc7e5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c5677148554a4e65b74ae9fc95ae4c37": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "328be8dce5cf4b2a8418e7693f1829c0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48d9d503aebd44578cb545fdb4b28776": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } } } } @@ -2793,7 +6555,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": { "id": "ayxWRSxzgwh_" }, @@ -2834,9 +6596,9 @@ "base_uri": "https://localhost:8080/" }, "id": "vyKWa35bkFcG", - "outputId": "61b4bff8-193b-40f0-fb90-d1d1a8c2010c" + "outputId": "afaf2961-374c-4ced-ba84-a8eb74963834" }, - "execution_count": 2, + "execution_count": 9, "outputs": [ { "output_type": "stream", @@ -2944,23 +6706,23 @@ "base_uri": "https://localhost:8080/", "height": 85, "referenced_widgets": [ - "13c7730f61b24661bca4b4406f488fbb", - "82838b6e6693488682c97ce82ed723bf", - "41b566d613234e03876a7c6a68431d7b", - "dd83f610ca584acbba3738f2c137b2b9", - "ad4f22541b284758b868c95e5e8ceff0", - "13d9c546446a482ab6634e2c69c70685", - "10c31ef1ded9471cae583be49e7092e5", - "c1ad9d79f8464e71bd79ae848c103a58", - "0de9cbe7986c49fb99ae4f3abdba42d9", - "6b12aceb2e4c46eeb708d0877a9ca639", - "b6edc45065c7488a8b8be5e1349b27ba" + "d35be0ff26cc4d0e85b498638ec728da", + "6c3982ed895846f2a07befc28277debd", + "a031633d17474670a3055d94249d58ee", + "08d30171bbb44668aea730cc7e2e828a", + "bda89cfac064435b8de02f41e689e0a3", + "603fa104247d4ae8888440acca48221e", + "bf6290dc87bd447da28102899e57ffe5", + "fe8beb6587574f93a2947c2bcc907091", + "3b5d864ced1149c4979b491cbc3070f7", + "e2306dc620194ce485bd2663c7d25738", + "329389fea5f74dca965f296707c2a8d0" ] }, "id": "5EEoa3gUmFpn", - "outputId": "83f18cff-926c-467d-dc4e-089e69342895" + "outputId": "806090b8-b3d5-473b-9da5-52de1860cb00" }, - "execution_count": 3, + "execution_count": 10, "outputs": [ { "output_type": "stream", @@ -2978,7 +6740,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "13c7730f61b24661bca4b4406f488fbb" + "model_id": "d35be0ff26cc4d0e85b498638ec728da" } }, "metadata": {} @@ -3023,23 +6785,23 @@ "base_uri": "https://localhost:8080/", "height": 85, "referenced_widgets": [ - "6d9d173f139a43e3a6b0b1deb2fb557b", - "34569214f37b44e0802ddd403a4fa1a9", - "2bdfcb2139bb4ae4b670c1fb49db6911", - "b27ff271d01a49e38a32cd3ddcebe77a", - "92e050af8c674152b4d279fdda960458", - "6f00da7475754dad8d5d5ce800ae673d", - "1a9b0c9e9cb941fbb57529d10d054084", - "545d68c2b8b742da9dd7983a3321ebf1", - "8956f45722f841dab504335dcc1921bc", - "1785191337814317989989f0800e1424", - "d25bea1e539c4d328f05ded6af8b8154" + "9d26aed57a8244ada934039dd928555c", + "b04abee189594c62b235e5076deafc84", + "ff0cc2d2cf8f4009bd200fd17d7ba517", + "49f821f89aa44034af5988b888734185", + "f10f65b1cb0547928b1c7b87c6385de4", + "bdeb6c6df5c049a69c0b613a42a69e35", + "565b8a229f66455886175a51a975878d", + "3491f280a58046518c5c28686cd80eb5", + "ef3de5b665104edcab59a1200259b897", + "ed1d6f1156f748f8993dda1ee97f5caf", + "e84db532db0643fd81ff21364b3b2e53" ] }, "id": "PA-8JXLyn4Uv", - "outputId": "27f96d7f-c2ef-4809-a075-541d06920852" + "outputId": "1850bb3e-a2bc-4cf0-d988-98f3986e2ab1" }, - "execution_count": 4, + "execution_count": 75, "outputs": [ { "output_type": "stream", @@ -3057,7 +6819,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "6d9d173f139a43e3a6b0b1deb2fb557b" + "model_id": "9d26aed57a8244ada934039dd928555c" } }, "metadata": {} @@ -3071,6 +6833,85 @@ } ] }, + { + "cell_type": "markdown", + "source": [ + "# Download pre-vecorized data from the project" + ], + "metadata": { + "id": "rTw3shjR33yL" + } + }, + { + "cell_type": "code", + "source": [ + "# File name to search for\n", + "file_name = \"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_vectors.parquet\"\n", + "\n", + "# Get the download URL of the specific file\n", + "# download_url = get_specific_file_from_latest_release(github_token, repository_name, file_name)\n", + "download_url = get_specific_file_from_tagged_release(github_token, repository_name, \"lab\", file_name)\n", + "print(download_url)\n", + "\n", + "if download_url:\n", + " local_file_path = file_name\n", + " download_file(download_url, github_token, local_file_path)\n", + "else:\n", + " print(\"File not found.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85, + "referenced_widgets": [ + "48125358d93747af8d60adf49cb4bed3", + "ce66e5dab6a7433498679f44d7922fde", + "037cca2c146a429498219af57498f92d", + "a4dd416d3a3f400a958713c9e1338830", + "c98d82d69aa5436e86a00b3b24c71dc8", + "3e842df16ae04f7c9fa0500251ea6070", + "125527f8c490490f93c88276e8da441e", + "38ea9e2fdf934b299dbf47b478f454cd", + "25123bc15acb4025a468103612181d4b", + "38eadb841c66408ca51a85054a2da63c", + "283ef3c448dd4fb0baaa1621123a5ced" + ] + }, + "id": "WdRLtddt378k", + "outputId": "58f22716-5958-4cf4-a2d4-eff7384b87cc" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "https://api.github.com/repos/norandom/log2ml/releases/assets/182698628\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0.00/3.29M [00:00 0:\n", + " # Get a random index\n", + " random_index = random.randint(0, bad_messages.height - 1)\n", + "\n", + " # Select the message at that random index\n", + " random_bad_message = bad_messages.row(random_index)[0]\n", + " print(random_bad_message)\n", + "else:\n", + " print(\"No bad messages labeled found.\")\n", + "\n", + "print(\"\\n\\n\")\n", + "\n", + "if good_messages.height > 0:\n", + " # Get a random index\n", + " random_index = random.randint(0, good_messages.height - 1)\n", + "\n", + " # Select the message at that random index\n", + " random_good_message = good_messages.row(random_index)[0]\n", + " print(random_good_message)\n", + "else:\n", + " print(\"No good messages labeled found.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4SguqPPKSdpJ", - "outputId": "9e9a39cb-10b8-44fb-bc22-65deafe45989" + "outputId": "b873ab30-300f-4b23-bf61-f7ed5ced345d" }, - "execution_count": null, + "execution_count": 123, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "['@timestamp', 'host.hostname', 'host.ip', 'log.level', 'winlog.event_id', 'winlog.task', 'message', 'filtered_message', 'image', 'target_filename', 'parent_image', 'text', 'temp_folder', 'filename', 'label']\n" + "['@timestamp', 'host.hostname', 'host.ip', 'log.level', 'winlog.event_id', 'winlog.task', 'message', 'filtered_message', 'image', 'target_filename', 'parent_image', 'text', 'temp_folder', 'filename', 'label', 'message_vector']\n", + "shape: (13_455, 2)\n", + "┌───────┬──────────────────────────────┐\n", + "│ label ┆ filtered_message │\n", + "│ --- ┆ --- │\n", + "│ str ┆ str │\n", + "╞═══════╪══════════════════════════════╡\n", + "│ good ┆ Network connection detected: │\n", + "│ ┆ Rul… │\n", + "│ good ┆ Network connection detected: │\n", + "│ ┆ Rul… │\n", + "│ good ┆ Process accessed: │\n", + "│ ┆ RuleName: - │\n", + "│ ┆ Ut… │\n", + "│ good ┆ Process accessed: │\n", + "│ ┆ RuleName: - │\n", + "│ ┆ Ut… │\n", + "│ … ┆ … │\n", + "│ good ┆ Process accessed: │\n", + "│ ┆ RuleName: - │\n", + "│ ┆ Ut… │\n", + "│ good ┆ Process accessed: │\n", + "│ ┆ RuleName: - │\n", + "│ ┆ Ut… │\n", + "│ good ┆ Process accessed: │\n", + "│ ┆ RuleName: - │\n", + "│ ┆ Ut… │\n", + "│ good ┆ Process Create: │\n", + "│ ┆ RuleName: - │\n", + "│ ┆ UtcT… │\n", + "└───────┴──────────────────────────────┘\n", + "File created:\n", + "RuleName: EXE\n", + "UtcTime: 2024-07-28 21:54:31.900\n", + "ProcessGuid: {18e8265a-be12-66a6-3010-000000004400}\n", + "ProcessId: 1248\n", + "Image: C:\\Program Files\\Microsoft Office\\Root\\Office16\\EXCEL.EXE\n", + "TargetFilename: C:\\Users\\student\\AppData\\Local\\Temp\\file.exe\n", + "CreationUtcTime: 2024-07-23 14:24:50.520\n", + "\n", + "\n", + "\n", + "Dns query:\n", + "RuleName: -\n", + "UtcTime: 2024-07-28 19:05:11.949\n", + "ProcessGuid: {18e8265a-9666-66a6-4f0b-000000004400}\n", + "ProcessId: 3232\n", + "QueryName: messaging\n", + "QueryStatus: 0\n", + "QueryResults: type: 5 prod-campaignaggregator.omexexternallfb.office.net.akadns.net;::ffff:52.109.16.3;\n", + "Image: C:\\Program Files\\Microsoft Office\\root\\Office16\\EXCEL.EXE\n" ] } ] @@ -3846,22 +7762,171 @@ " # Assuming outputs is the tensor of interest\n", " vector = outputs.mean(dim=1).detach() # Detach the tensor from the GPU\n", " return vector.cpu().numpy() # Move tensor back to CPU and convert to numpy\n", - "\n", - "# Assuming `better_columns_df` is a Polars DataFrame with a column \"filtered_message\"\n", + "\n" + ], + "metadata": { + "id": "EyvdFj83SQKI" + }, + "execution_count": 127, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Assuming df_f is a Polars DataFrame with a column \"filtered_message\"\n", "df_f = df_f.with_columns(\n", " pl.col(\"filtered_message\").map_elements(lambda x: vectorize_text(x).flatten(), return_dtype=pl.Object).alias(\"message_vector\")\n", + ")" + ], + "metadata": { + "id": "doS16Nq4S37g" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# broken\n", + "\n", + "\n", + "import polars as pl\n", + "import numpy as np\n", + "import json\n", + "\n", + "# Function to convert vector to string\n", + "def vector_to_string(vec):\n", + " return json.dumps(vec.tolist())\n", + "\n", + "# Function to convert string back to vector\n", + "def string_to_vector(s):\n", + " return np.array(json.loads(s))\n", + "\n", + "# Convert vector column to string representation\n", + "df_f = df_f.with_columns(\n", + " pl.col(\"message_vector\").map_elements(vector_to_string).alias(\"message_vector_str\")\n", ")\n", "\n", - "print(df_f)" + "# Drop the original vector column\n", + "df_f = df_f.drop(\"message_vector\")\n", + "\n", + "# Write to Parquet\n", + "df_f.write_parquet(\"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_with_vectors.parquet\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "EyvdFj83SQKI", - "outputId": "2482d1f0-3f47-41e4-df13-5f73325e5e80" + "id": "clEaIJjHXRhM", + "outputId": "35b77b4d-5c35-46af-a0ae-645bd3f47328" }, - "execution_count": null, + "execution_count": 39, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "shape: (5, 1)\n", + "┌────────────────┐\n", + "│ message_vector │\n", + "│ --- │\n", + "│ u32 │\n", + "╞════════════════╡\n", + "│ 30000 │\n", + "│ 30000 │\n", + "│ 30000 │\n", + "│ 30000 │\n", + "│ 30000 │\n", + "└────────────────┘\n", + "Sample vector length: 30000\n", + "Numpy vector shape: (30000,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# broken\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Read from Parquet using pandas\n", + "pdf_read = pd.read_parquet(\"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_with_vectors.parquet\")\n", + "\n", + "# Convert lists back to numpy arrays if needed\n", + "pdf_read['message_vector'] = pdf_read['message_vector_str'].apply(np.array)\n", + "\n", + "# Verify the shape of the vector\n", + "print(pdf_read['message_vector'].str.len().head())\n", + "\n", + "# Check a sample vector to ensure dimensionality is preserved\n", + "sample_vector = pdf_read['message_vector'].iloc[0]\n", + "print(f\"Sample vector shape: {sample_vector.shape}\")\n", + "\n", + "# If you need to convert back to Polars\n", + "df_read = pl.from_pandas(pdf_read)\n", + "\n", + "# Verify in Polars\n", + "print(df_read.select(pl.col(\"message_vector\").list.len()).head())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 327 + }, + "id": "981FBJ_rdh-V", + "outputId": "f363fc9e-071a-44ca-c9b5-1a1ce009e23a" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 636264\n", + "1 636780\n", + "2 640357\n", + "3 640529\n", + "4 640428\n", + "Name: message_vector, dtype: int64\n" + ] + }, + { + "output_type": "error", + "ename": "AttributeError", + "evalue": "'str' object has no attribute 'shape'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m# Check a sample vector to ensure dimensionality is preserved\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0msample_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpdf_read\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'message_vector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Sample vector shape: {sample_vector.shape}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;31m# If you need to convert back to Polars\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'shape'" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import polars as pl\n", + "import numpy as np\n", + "\n", + "print(df_f)\n", + "\n", + "print()\n", + "# Convert the 'message_vector' column to a NumPy array\n", + "X = np.array(df_f['message_vector'].to_list())\n", + "print(\"Original data shape:\", X.shape)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W849gxLgM3vP", + "outputId": "368b0055-d479-405f-b9bd-388f4348a7ef" + }, + "execution_count": 19, "outputs": [ { "output_type": "stream", @@ -3871,200 +7936,57 @@ "┌────────────┬────────────┬────────────┬────────────┬───┬───────────┬──────────┬───────┬───────────┐\n", "│ @timestamp ┆ host.hostn ┆ host.ip ┆ log.level ┆ … ┆ temp_fold ┆ filename ┆ label ┆ message_v │\n", "│ --- ┆ ame ┆ --- ┆ --- ┆ ┆ er ┆ --- ┆ --- ┆ ector │\n", - "│ str ┆ --- ┆ str ┆ str ┆ ┆ --- ┆ str ┆ str ┆ --- │\n", - "│ ┆ str ┆ ┆ ┆ ┆ str ┆ ┆ ┆ object │\n", + "│ datetime[μ ┆ --- ┆ str ┆ str ┆ ┆ --- ┆ str ┆ str ┆ --- │\n", + "│ s, UTC] ┆ str ┆ ┆ ┆ ┆ str ┆ ┆ ┆ object │\n", "╞════════════╪════════════╪════════════╪════════════╪═══╪═══════════╪══════════╪═══════╪═══════════╡\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.45116 │\n", - "│ T15:08:24. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 934 0.01 │\n", - "│ 277Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 940297 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.4095… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.45242 │\n", - "│ T15:08:24. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 962 0.02 │\n", - "│ 488Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 170923 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.3832… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.37145 │\n", - "│ T15:08:25. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 707 0.04 │\n", - "│ 005Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 775189 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.2652… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.34181 │\n", - "│ T15:08:25. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 97 0.04 │\n", - "│ 005Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 779522 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.2722… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.48557 │\n", + "│ 15:08:24.2 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 746 0.19 │\n", + "│ 77 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 012241 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0323… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.44357 │\n", + "│ 15:08:24.4 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 03 0.22 │\n", + "│ 88 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 111408 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0301… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.29139 │\n", + "│ 15:08:25.0 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 94 0.17 │\n", + "│ 05 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 932689 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1016… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.29095 │\n", + "│ 15:08:25.0 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 116 0.16 │\n", + "│ 05 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 983014 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1212… │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.35288 │\n", - "│ T23:35:53. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 972 0.03 │\n", - "│ 054Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 555745 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.2832… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.37340 │\n", - "│ T23:35:54. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 525 0.03 │\n", - "│ 133Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 428246 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.2805… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.36035 │\n", - "│ T23:35:54. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 54 0.03 │\n", - "│ 133Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 481918 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.2802… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.41050 │\n", - "│ T23:41:55. ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 297 0.01 │\n", - "│ 301Z ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 828452 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ -0.3112… │\n", - "└────────────┴────────────┴────────────┴────────────┴───┴───────────┴──────────┴───────┴───────────┘\n" + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.27548 │\n", + "│ 23:35:53.0 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 81 0.19 │\n", + "│ 54 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 213162 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1174… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.29359 │\n", + "│ 23:35:54.1 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 23 0.18 │\n", + "│ 33 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 748309 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1262… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.27579 │\n", + "│ 23:35:54.1 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 793 0.18 │\n", + "│ 33 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 788172 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1131… │\n", + "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.35910 │\n", + "│ 23:41:55.3 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 064 │\n", + "│ 01 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 0.1997092 │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0654… │\n", + "└────────────┴────────────┴────────────┴────────────┴───┴───────────┴──────────┴───────┴───────────┘\n", + "\n", + "Original data shape: (13455, 30000)\n" ] } ] }, { - "cell_type": "code", + "cell_type": "markdown", "source": [ - "import polars as pl\n", - "import pyarrow as pa\n", - "import pyarrow.parquet as pq\n", - "import numpy as np\n", - "\n", - "# Print the column names and data types\n", - "print(\"Column names and data types:\")\n", - "for col in df_f.columns:\n", - " print(f\"{col}: {df_f[col].dtype}\")\n", - "\n", - "# Create PyArrow arrays for each column\n", - "pa_arrays = []\n", - "pa_field_names = []\n", - "\n", - "for col_name in df_f.columns:\n", - " col_data = df_f[col_name].to_list()\n", - "\n", - " if df_f[col_name].dtype == pl.Object:\n", - " # For Object dtype, we'll create a list of float64 arrays\n", - " try:\n", - " pa_array = pa.list_(pa.float64()).from_pandas(col_data)\n", - " except:\n", - " # If conversion fails, store as string\n", - " pa_array = pa.array([str(x) for x in col_data])\n", - " else:\n", - " pa_array = pa.array(col_data)\n", - "\n", - " pa_arrays.append(pa_array)\n", - " pa_field_names.append(col_name)\n", - "\n", - "# Create PyArrow table\n", - "pa_table = pa.Table.from_arrays(pa_arrays, names=pa_field_names)\n", - "\n", - "# Write the PyArrow table to Parquet\n", - "pq.write_table(pa_table, \"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_vectors.parquet\")\n", - "\n", - "print(\"Parquet file written successfully.\")" + "### Read vectors into DataFrame" ], "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "RWRNV5iCTPHB", - "outputId": "ce896f59-95ef-4794-efb1-a50357ff6292" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Column names and data types:\n", - "@timestamp: Datetime(time_unit='us', time_zone='UTC')\n", - "host.hostname: Utf8\n", - "host.ip: Utf8\n", - "log.level: Utf8\n", - "winlog.event_id: Int64\n", - "winlog.task: Utf8\n", - "message: Utf8\n", - "filtered_message: Utf8\n", - "image: Utf8\n", - "target_filename: Utf8\n", - "parent_image: Utf8\n", - "text: Utf8\n", - "temp_folder: Utf8\n", - "filename: Utf8\n", - "label: Utf8\n", - "message_vector_list: Object\n", - "Parquet file written successfully.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "import pyarrow.parquet as pq\n", - "import numpy as np\n", - "import pandas as pd\n", - "import re\n", - "\n", - "# Read the Parquet file\n", - "table = pq.read_table(\"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_vectors.parquet\")\n", - "print(\"Parquet file read successfully\")\n", - "\n", - "# Convert to pandas DataFrame\n", - "df = table.to_pandas()\n", - "print(\"Converted to pandas DataFrame successfully\")\n", - "\n", - "# Function to convert string representation of array to numpy array\n", - "def string_to_array(s):\n", - " # Remove square brackets\n", - " s = s.strip('[]')\n", - " # Split by whitespace, handling the '...' case\n", - " nums = re.split(r'\\s+', s)\n", - " # Convert to float, ignoring '...' and empty strings\n", - " return np.array([float(num) for num in nums if num not in ['...', '']])\n", - "\n", - "# Convert message_vector_list to numpy arrays\n", - "try:\n", - " df['message_vector'] = df['message_vector_list'].apply(string_to_array)\n", - " print(\"Converted message_vector_list to numpy arrays successfully\")\n", - "except Exception as e:\n", - " print(f\"Error converting message_vector_list to numpy arrays: {e}\")\n", - " # Print a few examples of the problematic data\n", - " print(df['message_vector_list'].head())\n", - " raise\n", - "\n", - "# Drop the original message_vector_list column if you don't need it\n", - "df = df.drop(columns=['message_vector_list'])\n", - "\n", - "print(\"Final DataFrame columns:\")\n", - "print(df.columns)\n", - "\n", - "# Print the shape of the first few message vectors to verify\n", - "print(\"Shape of first few message vectors:\")\n", - "print(df['message_vector'].head().apply(lambda x: x.shape))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6eQyNWIAW-ts", - "outputId": "7aa26ca7-cbe6-4ee2-982f-e219a01dda18" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Parquet file read successfully\n", - "Converted to pandas DataFrame successfully\n", - "Converted message_vector_list to numpy arrays successfully\n", - "Final DataFrame columns:\n", - "Index(['@timestamp', 'host.hostname', 'host.ip', 'log.level',\n", - " 'winlog.event_id', 'winlog.task', 'message', 'filtered_message',\n", - " 'image', 'target_filename', 'parent_image', 'text', 'temp_folder',\n", - " 'filename', 'label', 'message_vector'],\n", - " dtype='object')\n", - "Shape of first few message vectors:\n", - "0 (6,)\n", - "1 (6,)\n", - "2 (6,)\n", - "3 (6,)\n", - "4 (6,)\n", - "Name: message_vector, dtype: object\n" - ] - } - ] + "id": "oyMG2LKK4LqU" + } }, { "cell_type": "markdown", @@ -4078,51 +8000,123 @@ { "cell_type": "code", "source": [ - "print(\"Polars Df\")\n", - "print(df_f.head())\n", - "print(df_f.schema)\n", - "print()\n", - "print(\"Pandas Df\")\n", - "print(df.info())\n", - "print(df.dtypes)" + "import polars as pl\n", + "import pandas as pd\n", + "\n", + "# Assuming df_f is your Polars DataFrame\n", + "try:\n", + " # Extract data from Polars DataFrame columns\n", + " data = {col: df_f[col].to_list() for col in df_f.columns}\n", + " # Create a Pandas DataFrame from the extracted data\n", + " df = pd.DataFrame(data)\n", + "except Exception as e:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + "# Display the Pandas DataFrame info and dtypes\n", + "if 'df' in locals():\n", + " print(\"Pandas DataFrame:\")\n", + " print(df.info())\n", + " print(df.dtypes)\n" ], "metadata": { "colab": { - "base_uri": "https://localhost:8080/", - "height": 236 + "base_uri": "https://localhost:8080/" }, "id": "9JjYJzacaD-T", - "outputId": "3f5a9112-2530-4f39-bd8b-8e11d0edb51d" + "outputId": "a80aa8c4-1624-438b-9407-135cf4f3c284" }, - "execution_count": 2, + "execution_count": 28, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Polars Df\n" - ] - }, - { - "output_type": "error", - "ename": "NameError", - "evalue": "name 'df_f' is not defined", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Polars Df\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Pandas Df\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'df_f' is not defined" + "Pandas DataFrame:\n", + "\n", + "RangeIndex: 13455 entries, 0 to 13454\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 @timestamp 13455 non-null datetime64[ns, UTC]\n", + " 1 host.hostname 13455 non-null object \n", + " 2 host.ip 13455 non-null object \n", + " 3 log.level 13455 non-null object \n", + " 4 winlog.event_id 13455 non-null int64 \n", + " 5 winlog.task 13455 non-null object \n", + " 6 message 13455 non-null object \n", + " 7 filtered_message 13455 non-null object \n", + " 8 image 13455 non-null object \n", + " 9 target_filename 13455 non-null object \n", + " 10 parent_image 13455 non-null object \n", + " 11 text 13455 non-null object \n", + " 12 temp_folder 13455 non-null object \n", + " 13 filename 13455 non-null object \n", + " 14 label 13455 non-null object \n", + " 15 message_vector 13455 non-null object \n", + "dtypes: datetime64[ns, UTC](1), int64(1), object(14)\n", + "memory usage: 1.6+ MB\n", + "None\n", + "@timestamp datetime64[ns, UTC]\n", + "host.hostname object\n", + "host.ip object\n", + "log.level object\n", + "winlog.event_id int64\n", + "winlog.task object\n", + "message object\n", + "filtered_message object\n", + "image object\n", + "target_filename object\n", + "parent_image object\n", + "text object\n", + "temp_folder object\n", + "filename object\n", + "label object\n", + "message_vector object\n", + "dtype: object\n" ] } ] }, + { + "cell_type": "code", + "source": [ + "X = np.array(df['message_vector'].tolist())\n", + "print(X.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5NHDYuDAiUis", + "outputId": "2a639822-3cba-4e68-9124-9ec8af95c818" + }, + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(13455, 30000)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## TPOT for Supervised Learning with Algo stats" + ], + "metadata": { + "id": "Gf4Fu4cDs4pr" + } + }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder\n", "from tpot import TPOTClassifier\n", - "from sklearn.metrics import f1_score\n", + "from sklearn.metrics import recall_score\n", "import numpy as np\n", "import pandas as pd\n", "from collections import defaultdict\n", @@ -4130,7 +8124,6 @@ "import re\n", "from tabulate import tabulate\n", "\n", - "\n", "# Assuming df is already loaded and contains 'message_vector' and 'label' columns\n", "\n", "# Encode labels\n", @@ -4145,7 +8138,7 @@ "results = defaultdict(list)\n", "\n", "# Number of runs\n", - "n_runs = 1\n", + "n_runs = 10\n", "\n", "# Function to extract number of features selected\n", "def get_n_features(pipeline_str):\n", @@ -4155,9 +8148,9 @@ " return int(X.shape[1] * percentile / 100)\n", " return X.shape[1] # If no feature selection, return all features\n", "\n", - "# Initialize best_tpot and best_f1\n", + "# Initialize best_tpot and best_recall\n", "best_tpot = None\n", - "best_f1 = 0\n", + "best_recall = 0\n", "\n", "for run in range(n_runs):\n", " print(f\"\\nStarting run {run + 1}/{n_runs}\")\n", @@ -4166,9 +8159,9 @@ " # Split data for this run\n", " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42 + run)\n", "\n", - " # TPOT classifier with f1 score as the metric\n", + " # TPOT classifier with recall as the metric\n", " tpot = TPOTClassifier(\n", - " scoring='f1_weighted', # Use weighted F1 score for multi-class problems\n", + " scoring='recall', # Use recall for single-label classification\n", " verbosity=2,\n", " generations=5,\n", " population_size=20,\n", @@ -4178,13 +8171,13 @@ " # Fit\n", " tpot.fit(X_train, y_train)\n", "\n", - " # Predict and calculate F1 score\n", + " # Predict and calculate recall score\n", " y_pred = tpot.predict(X_test)\n", - " f1 = f1_score(y_test, y_pred, average='weighted')\n", + " recall = recall_score(y_test, y_pred, average='macro') # 'macro' for single-label multi-class\n", "\n", - " # Update best_tpot if this run has better f1 score\n", - " if f1 > best_f1:\n", - " best_f1 = f1\n", + " # Update best_tpot if this run has better recall score\n", + " if recall > best_recall:\n", + " best_recall = recall\n", " best_tpot = tpot\n", "\n", " # Get pipeline string and extract number of features\n", @@ -4193,44 +8186,154 @@ "\n", " # Store results\n", " results['run'].append(run + 1)\n", - " results['f1_score'].append(f1)\n", + " results['recall_score'].append(recall)\n", " results['best_pipeline'].append(pipeline_str)\n", " results['n_features'].append(n_features)\n", " results['runtime'].append(time.time() - start_time)\n", " results['pipelines_tested'].append(tpot.evaluated_individuals_)\n", "\n", - " print(f\"Run {run + 1} completed. F1 Score: {f1:.4f}, Features selected: {n_features}, Pipelines tested: {len(tpot.evaluated_individuals_)}\")\n", - "\n" + " print(f\"Run {run + 1} completed. Recall Score: {recall:.4f}, Features selected: {n_features}, Pipelines tested: {len(tpot.evaluated_individuals_)}\")\n", + "\n", + "# Convert results to DataFrame\n", + "results_df = pd.DataFrame(results)\n", + "\n", + "# Print summary statistics\n", + "print(\"\\nSummary Statistics:\")\n", + "print(results_df.describe())\n", + "\n", + "# Save results to CSV\n", + "results_df.to_csv('tpot_recall_results.csv', index=False)\n", + "\n", + "print(\"\\nResults saved to 'tpot_recall_results.csv'\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 289, + "height": 1000, "referenced_widgets": [ - "bcf5338c159d4d53bc3e39e717885292", - "7276e9b8b79d41ab991296d44521e2ee", - "f2ab86c6413649a595edf4b8feeb6a66", - "decc923894ab4ef4b35fa627b1b2dfb4", - "9875e07950114beca6741a51962c1f30", - "90db1bae92c94a49ab8bc9c12073e552", - "92a42873a8ef4c00ad24ef47a62b4093", - "1826f0b9003a4f8bb1c074f526560c86", - "549831432f244f5080c74eb842bae875", - "20be31d5be524d76bc2dbb3baad4edf3", - "fc71d71e57774cb1a0df69e82d3e7c2d" + "a336f820e5124f5d9e344bb8b0d12ee5", + "150475d8c96148eaa9d5230b85283e64", + "19f3e10b3dd94c71b9add7ee1d8f4c13", + "5aa33fe21abc482c997975ec5712d0da", + "0b51be1ad43c43fa89041fea2b062768", + "5ab992330571497ab2a07cd04841b9d3", + "4a8897c90c19427fa82afa4fa62f31da", + "7df01173ffee4067b1c7490fab0b571b", + "6bd42f421a4e4e60b2d7edc8784e07aa", + "09932ad0a1fe4fd196b38b16dc74f94a", + "f7354d5ee6e04705968e30d7a43ebbe7", + "feb69d6d248b4e2abf52d6622dd3638b", + "712ff38a74994c5082fe487de437ec67", + "81a29faa64d44f2c9a173a9213618f4c", + "35012b6363cf4ca88c4c5221d264ec25", + "598537621c514f51be27ebaf9bde7d54", + "825b3f67b44b4136a36909da95412031", + "8b121673d208412ab1f426a3ff1698ba", + "02a6753498b14c459016b7ff31c4b6d7", + "384c150f10e94ba78243d1a260e66887", + "bb1d1e093dec47f393c713548bfc6cc6", + "05b2a6fb28aa434a94c18c7d64cd55f1", + "a2f43127d721436185ba241b84cc87d7", + "3618c2d6264c4359a17bbdecc7db70d8", + "286cc95f7d6c47a6863c7fda02f83d3f", + "cabaca33a5cc4d0fb9fa99f42ece4167", + "153d243e01b24b5cad305ec4d886b922", + "e8bf60db6d1b47b2a6752fcceebad0e7", + "600f0f18ccaf4c89aa37feaa0b360f13", + "33f5284de07e4be29e4f39d5aaaa2d9b", + "4a850f0d51014dfa9b55c86d04707b24", + "ae8a589b32c44836894c6d3b708b896d", + "a57766432bb54d24a3869e0553c026d1", + "2cf8dcb41b3340cea65c251b3d8681d4", + "6e3a5bb9d6d7456982660f0edcc3b8c0", + "0bb2408ba21a481983c0a74f1411ef0a", + "4528e59b16d04228a5cc508fadb56a54", + "61072176986f4c9fb48f403c25ac3024", + "9881b9f569b3445897972c0340870318", + "f09971fa706c4509b3530edabeb47864", + "7510b9c108494e3dbe5ee031bf367e2b", + "2ed7ace57f89415fba714200fe44c2f9", + "bb51c5c7834c419a87a30752cd769d91", + "cc9bf86d64fc48f490a38fae5483ddb2", + "ed38c4b5280a4e0da6b3e979ca380db5", + "045cf04f33a643e8816a674d659cc252", + "c5b0b6bebd0b49e0b10116770251023e", + "c42b63c849f7469496fa292cc53efae3", + "a336fafd8e334185a39ca81268ca9f90", + "007b3fd39d1c480c959db4ed7abb37fe", + "5247fe5ea6724b15934a7142b8a8a8b1", + "ff008c746112461781825f85f212dd81", + "c290371939ed4a8999c1411b426448ed", + "09e3fa8828024d1cab95af2452cbf516", + "b1f5ea5f99b249dc9c8d2b26f43485f0", + "b41836beb11a4623b7fa6091494c0e85", + "d1fb8e94033f47d3b7123c513473519d", + "0f90f79a7f0d4bcbaa2ada2379b65f53", + "dedf553fc5c24b14a86a779a61a39be5", + "6566843282bc4bb59c7044ab3b812f40", + "fcefb4debd9a4445a94a9fc5847662e4", + "bbe6750092e44b2aafc5afb33e71ae88", + "50769ac7392e4bccaf83e32f8e5e3754", + "f202232d07f946ccaf899e672c2b7dd7", + "111571775b67440db61746dc6a653380", + "1fc51b4708134f18995b5dd73631446d", + "f466f42deeac4321b8b9768a11d6a755", + "06b74e83419b4d2b94913aeb490a7cb0", + "de16524e0e5d46f9a8b511fa82e748a8", + "e13204ee59ab400280347f23c0d31705", + "ad599c9ce9b04059b37c94dcae4b23a8", + "aeefdbc3453543baa3d6e72ccd15ee54", + "3a84a44443a944c399486902d4db6bdc", + "2e55913ae8944ef9923724a8351bacb4", + "dcc3e817b40643388a5f99a54fe13e43", + "272dc71a7ccf4608bfdea6a17524a886", + "5a3d943f85e34887aeb64a0e6b189f32", + "065f516618c041f1ba57df39d17cc95d", + "ca2767bb06444f03a1a31dd9e5216c45", + "1d9710db98c04adeb3855f4062301e24", + "57141f99793f45708db3961cf4178fc9", + "6c4c02ca0cb64050ac7e201a1a60be72", + "b6b5382bb2684e74a2c36b49f423eee3", + "a4a4a02a4332458789866a0e0e7839d3", + "871438bb5a0d4d31b3b8a0f18fcdac78", + "294ffc18963140ea96c01095e3a1300f", + "6dda83426ca749bd822d2bd0d39744ed", + "d37293f1d67f45babbe2a875df701c40", + "2ae5d11987c44076b95c1294af800c28", + "75c27d61248b477e9c58211b066ab62f", + "00628b308f4244c4ad533a67cb90ba43", + "851e8257d678458db2612a04803e8c17", + "8f472688c98745f28345f31e852b361b", + "f1b8071a183e47af81e8d96290ca7370", + "18d1acb1cdad46f1832c98ab44f0bc53", + "21abac8483874bb1acdc45f6e8f78b53", + "b4e4db272e35407899de20c7b30d1551", + "9d4143827e3f4e949cde68384a7beb59", + "aa805872f24140f6aa6ee2ac72b34b59", + "ed028f92fe6544159b14dcf220623223", + "2ff14125384a4f3cad97a03f9d2ff55f", + "d70eced834c64f399a3db48dcc19f802", + "b85cfd236fbc4c50b2572f0ff7ddea1c", + "b4e51caccd1e40baac5143667206d8cb", + "8031586f1f8d4dbbaf44708b12f4eb13", + "a2278a2947354727a76758cb4bf5e682", + "a2debbdff91649f29c7a579911090df4", + "7d4b85786ff04c9d843753c91c80f854", + "cb681559245a4d12bd4551bcfea063c4", + "0967c0fe1d9a4048b4e7b4266a75d893" ] }, "id": "usxaQgkSbEND", - "outputId": "1f3cbeaa-d197-4877-b4d1-3f0f7d035f27" + "outputId": "7ace5c69-7761-48c4-ab9f-ff2623cf9355" }, - "execution_count": 2, + "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", - "Starting run 1/1\n" + "Starting run 1/10\n" ] }, { @@ -4242,7 +8345,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "bcf5338c159d4d53bc3e39e717885292" + "model_id": "a336f820e5124f5d9e344bb8b0d12ee5" } }, "metadata": {} @@ -4252,22 +8355,359 @@ "name": "stdout", "text": [ "\n", - "Generation 1 - Current best internal CV score: 0.9876147693616538\n", + "Generation 1 - Current best internal CV score: 1.0\n", "\n", - "Generation 2 - Current best internal CV score: 0.9876147693616538\n", + "Generation 2 - Current best internal CV score: 1.0\n", "\n", - "Generation 3 - Current best internal CV score: 0.9876147693616538\n", + "Generation 3 - Current best internal CV score: 1.0\n", "\n", - "Generation 4 - Current best internal CV score: 0.9876147693616538\n", + "Generation 4 - Current best internal CV score: 1.0\n", "\n", - "Generation 5 - Current best internal CV score: 0.9876147693616538\n", + "Generation 5 - Current best internal CV score: 1.0\n", "\n", "Best pipeline: XGBClassifier(input_matrix, learning_rate=0.001, max_depth=9, min_child_weight=7, n_estimators=100, n_jobs=1, subsample=0.45, verbosity=0)\n", - "Run 1 completed. F1 Score: 0.9861, Features selected: 6, Pipelines tested: 117\n" + "Run 1 completed. Recall Score: 0.5000, Features selected: 6, Pipelines tested: 117\n", + "\n", + "Starting run 2/10\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Optimization Progress: 0%| | 0/120 [00:00" ], - "image/png": "\n" + "image/png": "\n" }, "metadata": {} } ] }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import confusion_matrix, classification_report\n", + "\n", + "# Assuming you have access to the best model and test data\n", + "best_model = best_tpot.fitted_pipeline_\n", + "y_pred = best_model.predict(X_test)\n", + "\n", + "# Print confusion matrix\n", + "print(\"Confusion Matrix:\")\n", + "print(confusion_matrix(y_test, y_pred))\n", + "\n", + "# Print classification report\n", + "print(\"\\nClassification Report:\")\n", + "print(classification_report(y_test, y_pred))\n", + "\n", + "# Check unique predictions\n", + "unique_predictions = np.unique(y_pred)\n", + "print(\"\\nUnique Predictions:\", unique_predictions)\n", + "\n", + "# Check class distribution in test set\n", + "unique_classes, class_counts = np.unique(y_test, return_counts=True)\n", + "print(\"\\nClass Distribution in Test Set:\")\n", + "for cls, count in zip(unique_classes, class_counts):\n", + " print(f\"Class {cls}: {count}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 218 + }, + "id": "8KwJfQvG0ZvJ", + "outputId": "13426649-eaaf-4fc9-8392-f648a79da6fe" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'best_tpot' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# Assuming you have access to the best model and test data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mbest_model\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbest_tpot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfitted_pipeline_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbest_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'best_tpot' is not defined" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Handling Imbalanced Data" + ], + "metadata": { + "id": "RG5bRw2lt0GD" + } + }, + { + "cell_type": "code", + "source": [ + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.under_sampling import RandomUnderSampler\n", + "from imblearn.pipeline import Pipeline\n", + "from sklearn.metrics import balanced_accuracy_score, roc_auc_score, average_precision_score\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.feature_selection import RFECV\n", + "from sklearn.preprocessing import QuantileTransformer\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.pipeline import Pipeline as ImbPipeline\n", + "\n", + "\n", + "# Encode labels\n", + "le = LabelEncoder()\n", + "df['label_encoded'] = le.fit_transform(df['label'])\n", + "\n", + "# Split data\n", + "X = np.array(df['message_vector'].tolist())\n", + "y = df['label_encoded'].values\n", + "\n", + "# Split data for this run\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "\n", + "# Feature selection\n", + "feature_selector = RFECV(\n", + " estimator=RandomForestClassifier(n_estimators=10, class_weight='balanced', random_state=42),\n", + " step=5000,\n", + " min_features_to_select=1000,\n", + " cv=3,\n", + " scoring='balanced_accuracy',\n", + " n_jobs=-1\n", + ")\n", + "\n", + "\n", + "# Quantile transformer\n", + "quantile_transformer = QuantileTransformer(n_quantiles=1000, output_distribution='normal', random_state=42)\n", + "\n", + "# Dimensionality reduction\n", + "pca = PCA(n_components=0.95)\n", + "\n", + "# Create the pipeline\n", + "pipeline = ImbPipeline([\n", + " ('feature_selection', feature_selector),\n", + " ('quantile_transform', quantile_transformer),\n", + " ('pca', pca),\n", + " ('smote', SMOTE(random_state=42)),\n", + " ('classifier', RandomForestClassifier(class_weight='balanced', random_state=42))\n", + "])\n", + "\n", + "# Fit the pipeline and make predictions\n", + "pipeline.fit(X_train, y_train)\n", + "y_pred = pipeline.predict(X_test)\n", + "\n", + "# Calculate metrics\n", + "balanced_acc = balanced_accuracy_score(y_test, y_pred)\n", + "roc_auc = roc_auc_score(y_test, pipeline.predict_proba(X_test)[:, 1])\n", + "avg_precision = average_precision_score(y_test, pipeline.predict_proba(X_test)[:, 1])\n", + "\n", + "print(f\"Balanced Accuracy: {balanced_acc:.4f}\")\n", + "print(f\"ROC AUC: {roc_auc:.4f}\")\n", + "print(f\"Average Precision: {avg_precision:.4f}\")\n", + "\n", + "# Cross-validation\n", + "cv_scores = cross_val_score(pipeline, X, y, cv=5, scoring='balanced_accuracy')\n", + "print(f\"Cross-validation Balanced Accuracy: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zh-ULZZ0099E", + "outputId": "9bb9313a-7e4c-40bd-8c64-f78d1c57636d" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Balanced Accuracy: 0.6000\n", + "ROC AUC: 0.9933\n", + "Average Precision: 0.9999\n", + "Cross-validation Balanced Accuracy: 0.7230 (+/- 0.1013)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Fit the feature selector and print the number of selected features\n", + "feature_selector.fit(X_train, y_train)\n", + "print(f\"Number of features selected: {feature_selector.n_features_}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "brczGHV083MW", + "outputId": "54081516-0074-47d2-da92-ad921d8500f8" + }, + "execution_count": 60, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of features selected: 25000\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Testing the predictions" + ], + "metadata": { + "id": "zCJ3jHyZQIS_" + } + }, + { + "cell_type": "code", + "source": [ + "test_message_benign = random_good_message\n", + "test_message_bad = random_bad_message\n", + "\n", + "\n", + "from tokenizers import Tokenizer\n", + "import torch\n", + "import numpy as np\n", + "import polars as pl\n", + "import numpy as np\n", + "\n", + "# Load the custom tokenizer\n", + "tokenizer = Tokenizer.from_file(\"log_tokenizer.json\")\n", + "\n", + "# Define the device (assuming you're using PyTorch and want to specify CPU or GPU)\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "vector_good = vectorize_text(test_message_benign).flatten()\n", + "print(len(vector_good))\n", + "\n", + "\n", + "# Assuming 'vector' is your input\n", + "vector_good = np.array(vector_good)\n", + "vector_2d_good = vector_good.reshape(1, -1)\n", + "\n", + "# Class prediction\n", + "prediction = pipeline.predict(vector_2d_good)\n", + "print(\"Predicted class:\", prediction[0])\n", + "\n", + "# Probability prediction\n", + "probabilities = pipeline.predict_proba(vector_2d_good)\n", + "print(\"Class probabilities:\", probabilities[0])\n", + "\n", + "\n", + "vector_bad = vectorize_text(test_message_bad).flatten()\n", + "print(len(vector_bad))\n", + "\n", + "# Assuming 'vector' is your input\n", + "vector_bad = np.array(vector_bad)\n", + "vector_2d_bad = vector_bad.reshape(1, -1)\n", + "\n", + "# Class prediction\n", + "prediction = pipeline.predict(vector_2d_bad)\n", + "print(\"Predicted class:\", prediction[0])\n", + "\n", + "# Probability prediction\n", + "probabilities = pipeline.predict_proba(vector_2d_bad)\n", + "print(\"Class probabilities:\", probabilities[0])\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3HpxPWlIPHT4", + "outputId": "4501c78a-74d8-49b7-bc3e-0c647ba5e71f" + }, + "execution_count": 53, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "30000\n", + "Predicted class: 1\n", + "Class probabilities: [0. 1.]\n", + "30000\n", + "Predicted class: 0\n", + "Class probabilities: [0.56 0.44]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Save the Supervised Learning Model (joblib)\n", + "\n", + "Generating the model can take roughly 1h of computation time on a modern 8 core / 64 GB RAM server." + ], + "metadata": { + "id": "YVPPGLSA6uZ-" + } + }, + { + "cell_type": "code", + "source": [ + "import joblib\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import f1_score\n", + "\n", + "\n", + "# Save the model using joblib\n", + "model_filename = 'manual_model_123.joblib'\n", + "joblib.dump(pipeline, model_filename)\n", + "\n", + "print(f\"\\nModel exported as joblib: {model_filename}\")\n", + "\n", + "\n", + "# Verify the saved model\n", + "try:\n", + " loaded_model = joblib.load(model_filename)\n", + "except Exception as e:\n", + " print(f\"\\nError verifying saved model: {str(e)}\")\n", + "\n", + "# Print information about how to use the saved model\n", + "print(\"\\nTo use the saved model in the future:\")\n", + "print(\"1. Load the model: loaded_model = joblib.load('best_tpot_nn_model.joblib')\")\n", + "print(\"2. Make predictions: predictions = loaded_model.predict(X)\")\n", + "print(\"Note: Make sure to preprocess your input data (X) the same way as during training.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lpbrnU_06Gfi", + "outputId": "007bae7d-2533-477b-b23f-8699f09321bb" + }, + "execution_count": 36, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Model exported as joblib: manual_model_123.joblib\n", + "\n", + "To use the saved model in the future:\n", + "1. Load the model: loaded_model = joblib.load('best_tpot_nn_model.joblib')\n", + "2. Make predictions: predictions = loaded_model.predict(X)\n", + "Note: Make sure to preprocess your input data (X) the same way as during training.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Confusion matrix" + ], + "metadata": { + "id": "l1u70hEaAYgD" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Make predictions\n", + "y_pred = pipeline.predict(X_test)\n", + "\n", + "# Create the confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "\n", + "# Create a ConfusionMatrixDisplay object\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['good', 'bad'])\n", + "\n", + "# Plot the confusion matrix\n", + "plt.figure(figsize=(10, 7))\n", + "disp.plot(cmap='Blues', values_format='d')\n", + "plt.title('Confusion Matrix')\n", + "plt.show()\n", + "\n", + "# Print the confusion matrix\n", + "print(\"Confusion Matrix:\")\n", + "print(cm)\n", + "\n", + "# Calculate and print additional metrics\n", + "tn, fp, fn, tp = cm.ravel()\n", + "precision = tp / (tp + fp)\n", + "recall = tp / (tp + fn)\n", + "f1_score = 2 * (precision * recall) / (precision + recall)\n", + "\n", + "print(f\"\\nPrecision: {precision:.4f}\")\n", + "print(f\"Recall: {recall:.4f}\")\n", + "print(f\"F1-score: {f1_score:.4f}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 617 + }, + "id": "5GaLAsg661Qh", + "outputId": "a546f9f0-9c8f-433c-d5f5-8440a058f840" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Confusion Matrix:\n", + "[[ 5 20]\n", + " [ 0 2666]]\n", + "\n", + "Precision: 0.9926\n", + "Recall: 1.0000\n", + "F1-score: 0.9963\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## CV scores" + ], + "metadata": { + "id": "0jtLBuvnAeND" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.metrics import balanced_accuracy_score, roc_auc_score, average_precision_score\n", + "import numpy as np\n", + "\n", + "# Define the number of folds\n", + "n_splits = 5\n", + "\n", + "# Create a StratifiedKFold object\n", + "skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)\n", + "\n", + "# Initialize lists to store the scores\n", + "balanced_accuracy_scores = []\n", + "roc_auc_scores = []\n", + "average_precision_scores = []\n", + "\n", + "# Perform cross-validation manuall\n", + "for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):\n", + " X_train, X_val = X[train_index], X[val_index]\n", + " y_train, y_val = y[train_index], y[val_index]\n", + "\n", + " # Fit the pipeline\n", + " pipeline.fit(X_train, y_train)\n", + "\n", + " # Make predictions\n", + " y_pred = pipeline.predict(X_val)\n", + " y_pred_proba = pipeline.predict_proba(X_val)[:, 1]\n", + "\n", + " # Calculate scores\n", + " balanced_accuracy_scores.append(balanced_accuracy_score(y_val, y_pred))\n", + " roc_auc_scores.append(roc_auc_score(y_val, y_pred_proba))\n", + " average_precision_scores.append(average_precision_score(y_val, y_pred_proba))\n", + "\n", + " print(f\"Fold {fold}:\")\n", + " print(f\" Balanced Accuracy: {balanced_accuracy_scores[-1]:.4f}\")\n", + " print(f\" ROC AUC: {roc_auc_scores[-1]:.4f}\")\n", + " print(f\" Average Precision: {average_precision_scores[-1]:.4f}\")\n", + "\n", + "# Print the overall results\n", + "print(\"\\nCross-validation results:\")\n", + "print(f\"Balanced Accuracy: {np.mean(balanced_accuracy_scores):.4f} (+/- {np.std(balanced_accuracy_scores) * 2:.4f})\")\n", + "print(f\"ROC AUC: {np.mean(roc_auc_scores):.4f} (+/- {np.std(roc_auc_scores) * 2:.4f})\")\n", + "print(f\"Average Precision: {np.mean(average_precision_scores):.4f} (+/- {np.std(average_precision_scores) * 2:.4f})\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cZqWaRDr61mL", + "outputId": "d77b060e-d380-4978-c920-97d260f2cf64" + }, + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Fold 1:\n", + " Balanced Accuracy: 0.7493\n", + " ROC AUC: 0.9968\n", + " Average Precision: 1.0000\n", + "Fold 2:\n", + " Balanced Accuracy: 0.7605\n", + " ROC AUC: 0.9944\n", + " Average Precision: 0.9999\n", + "Fold 3:\n", + " Balanced Accuracy: 0.7172\n", + " ROC AUC: 0.9915\n", + " Average Precision: 0.9999\n", + "Fold 4:\n", + " Balanced Accuracy: 0.7170\n", + " ROC AUC: 0.9932\n", + " Average Precision: 0.9999\n", + "Fold 5:\n", + " Balanced Accuracy: 0.6953\n", + " ROC AUC: 0.9957\n", + " Average Precision: 1.0000\n", + "\n", + "Cross-validation results:\n", + "Balanced Accuracy: 0.7278 (+/- 0.0475)\n", + "ROC AUC: 0.9943 (+/- 0.0037)\n", + "Average Precision: 0.9999 (+/- 0.0000)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Grid search optimization" + ], + "metadata": { + "id": "_XUIxpRdKzf9" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "import itertools\n", + "\n", + "# Define parameter grid\n", + "param_grid = {\n", + " 'n_estimators': [100, 200, 300],\n", + " 'max_depth': [None, 10, 20, 30],\n", + " 'min_samples_split': [2, 5, 10],\n", + " 'min_samples_leaf': [1, 2, 4]\n", + "}\n", + "\n", + "# Generate all combinations of parameters\n", + "param_combinations = list(itertools.product(*param_grid.values()))\n", + "\n", + "best_score = -float('inf')\n", + "best_params = None\n", + "\n", + "# Manually perform grid search\n", + "for params in param_combinations:\n", + " # Create a new pipeline with current parameters\n", + " current_pipeline = Pipeline([\n", + " ('smote', SMOTE(sampling_strategy=0.35, random_state=42)),\n", + " ('undersampler', RandomUnderSampler(sampling_strategy=1.0, random_state=42)),\n", + " ('classifier', RandomForestClassifier(\n", + " n_estimators=params[0],\n", + " max_depth=params[1],\n", + " min_samples_split=params[2],\n", + " min_samples_leaf=params[3],\n", + " class_weight='balanced',\n", + " random_state=42\n", + " ))\n", + " ])\n", + "\n", + " # Perform cross-validation\n", + " scores = cross_val_score(current_pipeline, X, y, cv=5, scoring='balanced_accuracy')\n", + " mean_score = scores.mean()\n", + "\n", + " # Update best score and parameters if current is better\n", + " if mean_score > best_score:\n", + " best_score = mean_score\n", + " best_params = params\n", + "\n", + " print(f\"Parameters: {params}, Score: {mean_score}\")\n", + "\n", + "print(f\"\\nBest parameters: {best_params}\")\n", + "print(f\"Best cross-validation score: {best_score}\")\n", + "\n", + "# Create the best pipeline\n", + "best_pipeline = Pipeline([\n", + " ('smote', SMOTE(sampling_strategy=0.35, random_state=42)),\n", + " ('undersampler', RandomUnderSampler(sampling_strategy=1.0, random_state=42)),\n", + " ('classifier', RandomForestClassifier(\n", + " n_estimators=best_params[0],\n", + " max_depth=best_params[1],\n", + " min_samples_split=best_params[2],\n", + " min_samples_leaf=best_params[3],\n", + " class_weight='balanced',\n", + " random_state=42\n", + " ))\n", + "])\n", + "\n", + "# Fit the best pipeline\n", + "best_pipeline.fit(X, y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "yIeeLfSlBXI7", + "outputId": "ee84da33-1d68-4cc3-8f82-82c20b4c2846" + }, + "execution_count": 61, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Parameters: (100, None, 2, 1), Score: 0.6601958996148235\n", + "Parameters: (100, None, 2, 2), Score: 0.6575723238043774\n", + "Parameters: (100, None, 2, 4), Score: 0.6832200924981155\n", + "Parameters: (100, None, 5, 1), Score: 0.6550882823515521\n", + "Parameters: (100, None, 5, 2), Score: 0.6617703512425128\n", + "Parameters: (100, None, 5, 4), Score: 0.6832200924981155\n", + "Parameters: (100, None, 10, 1), Score: 0.6531768364259575\n", + "Parameters: (100, None, 10, 2), Score: 0.6652185568886595\n", + "Parameters: (100, None, 10, 4), Score: 0.6606836474980943\n", + "Parameters: (100, 10, 2, 1), Score: 0.7538128573575538\n", + "Parameters: (100, 10, 2, 2), Score: 0.7490530042380288\n", + "Parameters: (100, 10, 2, 4), Score: 0.7528384007866634\n", + "Parameters: (100, 10, 5, 1), Score: 0.7531383491639173\n", + "Parameters: (100, 10, 5, 2), Score: 0.7575612641582747\n", + "Parameters: (100, 10, 5, 4), Score: 0.7528384007866634\n", + "Parameters: (100, 10, 10, 1), Score: 0.7454542837406529\n", + "Parameters: (100, 10, 10, 2), Score: 0.7450421584057931\n", + "Parameters: (100, 10, 10, 4), Score: 0.7454544241724976\n", + "Parameters: (100, 20, 2, 1), Score: 0.667890060811074\n", + "Parameters: (100, 20, 2, 2), Score: 0.6709738726297466\n", + "Parameters: (100, 20, 2, 4), Score: 0.6855542931725662\n", + "Parameters: (100, 20, 5, 1), Score: 0.6718902772293134\n", + "Parameters: (100, 20, 5, 2), Score: 0.6761462302501122\n", + "Parameters: (100, 20, 5, 4), Score: 0.6855542931725662\n", + "Parameters: (100, 20, 10, 1), Score: 0.6663908946953685\n", + "Parameters: (100, 20, 10, 2), Score: 0.6653415739080288\n", + "Parameters: (100, 20, 10, 4), Score: 0.6765587768805064\n", + "Parameters: (100, 30, 2, 1), Score: 0.659548594346173\n", + "Parameters: (100, 30, 2, 2), Score: 0.6622950046145905\n", + "Parameters: (100, 30, 2, 4), Score: 0.6784974819038248\n", + "Parameters: (100, 30, 5, 1), Score: 0.6547135399737714\n", + "Parameters: (100, 30, 5, 2), Score: 0.6574599783285577\n", + "Parameters: (100, 30, 5, 4), Score: 0.6784974819038248\n", + "Parameters: (100, 30, 10, 1), Score: 0.6532142895989589\n", + "Parameters: (100, 30, 10, 2), Score: 0.6739245014401413\n", + "Parameters: (100, 30, 10, 4), Score: 0.665106450146976\n", + "Parameters: (200, None, 2, 1), Score: 0.6643938427938523\n", + "Parameters: (200, None, 2, 2), Score: 0.6576473705822248\n", + "Parameters: (200, None, 2, 4), Score: 0.6868933120891442\n", + "Parameters: (200, None, 5, 1), Score: 0.6553131418214051\n", + "Parameters: (200, None, 5, 2), Score: 0.6623325701330676\n", + "Parameters: (200, None, 5, 4), Score: 0.6868933120891442\n", + "Parameters: (200, None, 10, 1), Score: 0.6578994329770638\n", + "Parameters: (200, None, 10, 2), Score: 0.6652935615369535\n", + "Parameters: (200, None, 10, 4), Score: 0.6698392267301753\n", + "Parameters: (200, 10, 2, 1), Score: 0.745267074048384\n", + "Parameters: (200, 10, 2, 2), Score: 0.7449298971890804\n", + "Parameters: (200, 10, 2, 4), Score: 0.7447048832441983\n", + "Parameters: (200, 10, 5, 1), Score: 0.7492776671032991\n", + "Parameters: (200, 10, 5, 2), Score: 0.74897785915789\n", + "Parameters: (200, 10, 5, 4), Score: 0.7447048832441983\n", + "Parameters: (200, 10, 10, 1), Score: 0.7457916712477236\n", + "Parameters: (200, 10, 10, 2), Score: 0.7454544241724976\n", + "Parameters: (200, 10, 10, 4), Score: 0.7451547004861954\n", + "Parameters: (200, 20, 2, 1), Score: 0.6638045192806029\n", + "Parameters: (200, 20, 2, 2), Score: 0.6673279261796262\n", + "Parameters: (200, 20, 2, 4), Score: 0.6721734274046259\n", + "Parameters: (200, 20, 5, 1), Score: 0.6592693009399793\n", + "Parameters: (200, 20, 5, 2), Score: 0.6672631436929786\n", + "Parameters: (200, 20, 5, 4), Score: 0.6721734274046259\n", + "Parameters: (200, 20, 10, 1), Score: 0.6754715254518933\n", + "Parameters: (200, 20, 10, 2), Score: 0.6794446232042534\n", + "Parameters: (200, 20, 10, 4), Score: 0.6768585286531775\n", + "Parameters: (200, 30, 2, 1), Score: 0.6599233788535073\n", + "Parameters: (200, 30, 2, 2), Score: 0.6574974595879279\n", + "Parameters: (200, 30, 2, 4), Score: 0.6783100896501577\n", + "Parameters: (200, 30, 5, 1), Score: 0.6550133198328116\n", + "Parameters: (200, 30, 5, 2), Score: 0.6624449717816251\n", + "Parameters: (200, 30, 5, 4), Score: 0.6783100896501577\n", + "Parameters: (200, 30, 10, 1), Score: 0.6581617737062869\n", + "Parameters: (200, 30, 10, 2), Score: 0.6695289298026146\n", + "Parameters: (200, 30, 10, 4), Score: 0.6699142032921004\n", + "Parameters: (300, None, 2, 1), Score: 0.6602333949173783\n", + "Parameters: (300, None, 2, 2), Score: 0.6577598143603358\n", + "Parameters: (300, None, 2, 4), Score: 0.6747867810534257\n", + "Parameters: (300, None, 5, 1), Score: 0.6598858835509525\n", + "Parameters: (300, None, 5, 2), Score: 0.662444971781625\n", + "Parameters: (300, None, 5, 4), Score: 0.6747867810534257\n", + "Parameters: (300, None, 10, 1), Score: 0.6583491800031385\n", + "Parameters: (300, None, 10, 2), Score: 0.6609831745798139\n", + "Parameters: (300, None, 10, 4), Score: 0.6698495331509285\n", + "Parameters: (300, 10, 2, 1), Score: 0.7449297427140511\n", + "Parameters: (300, 10, 2, 2), Score: 0.7497272877407137\n", + "Parameters: (300, 10, 2, 4), Score: 0.7446298083799818\n", + "Parameters: (300, 10, 5, 1), Score: 0.7494274517089357\n", + "Parameters: (300, 10, 5, 2), Score: 0.7490527514607083\n", + "Parameters: (300, 10, 5, 4), Score: 0.7446298083799818\n", + "Parameters: (300, 10, 10, 1), Score: 0.7452669336165393\n", + "Parameters: (300, 10, 10, 2), Score: 0.7453419242216488\n", + "Parameters: (300, 10, 10, 4), Score: 0.7447797474606476\n", + "Parameters: (300, 20, 2, 1), Score: 0.6591841443459611\n", + "Parameters: (300, 20, 2, 2), Score: 0.6669530854995541\n", + "Parameters: (300, 20, 2, 4), Score: 0.6768960941716546\n", + "Parameters: (300, 20, 5, 1), Score: 0.6676651451684832\n", + "Parameters: (300, 20, 5, 2), Score: 0.6758463520887806\n", + "Parameters: (300, 20, 5, 4), Score: 0.6768960941716546\n", + "Parameters: (300, 20, 10, 1), Score: 0.6711509725058449\n", + "Parameters: (300, 20, 10, 2), Score: 0.6749843392960013\n", + "Parameters: (300, 20, 10, 4), Score: 0.6810939530915764\n", + "Parameters: (300, 30, 2, 1), Score: 0.6600357805020648\n", + "Parameters: (300, 30, 2, 2), Score: 0.6577223471441499\n", + "Parameters: (300, 30, 2, 4), Score: 0.6746743653616838\n", + "Parameters: (300, 30, 5, 1), Score: 0.6596235287785447\n", + "Parameters: (300, 30, 5, 2), Score: 0.662145135749847\n", + "Parameters: (300, 30, 5, 4), Score: 0.6746743653616838\n", + "Parameters: (300, 30, 10, 1), Score: 0.6583491800031385\n", + "Parameters: (300, 30, 10, 2), Score: 0.660870730801703\n", + "Parameters: (300, 30, 10, 4), Score: 0.6699244816264847\n", + "\n", + "Best parameters: (100, 10, 5, 2)\n", + "Best cross-validation score: 0.7575612641582747\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Pipeline(steps=[('smote', SMOTE(random_state=42, sampling_strategy=0.35)),\n", + " ('undersampler',\n", + " RandomUnderSampler(random_state=42, sampling_strategy=1.0)),\n", + " ('classifier',\n", + " RandomForestClassifier(class_weight='balanced', max_depth=10,\n", + " min_samples_leaf=2, min_samples_split=5,\n", + " random_state=42))])" + ], + "text/html": [ + "
Pipeline(steps=[('smote', SMOTE(random_state=42, sampling_strategy=0.35)),\n",
+              "                ('undersampler',\n",
+              "                 RandomUnderSampler(random_state=42, sampling_strategy=1.0)),\n",
+              "                ('classifier',\n",
+              "                 RandomForestClassifier(class_weight='balanced', max_depth=10,\n",
+              "                                        min_samples_leaf=2, min_samples_split=5,\n",
+              "                                        random_state=42))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 61 + } + ] + }, + { + "cell_type": "code", + "source": [ + "\"\"\"\n", + "Pipeline(steps=[('smote', SMOTE(random_state=42, sampling_strategy=0.35)),\n", + " ('undersampler',\n", + " RandomUnderSampler(random_state=42, sampling_strategy=1.0)),\n", + " ('classifier',\n", + " RandomForestClassifier(class_weight='balanced', max_depth=10,\n", + " min_samples_leaf=2, min_samples_split=5,\n", + " random_state=42))])\n", + "\n", + "SMOTE\n", + "SMOTE(random_state=42, sampling_strategy=0.35)\n", + "\n", + "RandomUnderSampler\n", + "RandomUnderSampler(random_state=42, sampling_strategy=1.0)\n", + "\n", + "RandomForestClassifier\n", + "RandomForestClassifier(class_weight='balanced', max_depth=10,\n", + " min_samples_leaf=2, min_samples_split=5,\n", + " random_state=42)\n", + "\"\"\"\n", + "\n", + "from imblearn.pipeline import make_pipeline\n", + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.under_sampling import RandomUnderSampler\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import classification_report, balanced_accuracy_score\n", + "\n", + "# Assuming you have your features X and target y already defined\n", + "# Encode labels\n", + "le = LabelEncoder()\n", + "df['label_encoded'] = le.fit_transform(df['label'])\n", + "\n", + "# Split data\n", + "X = np.array(df['message_vector'].tolist())\n", + "y = df['label_encoded'].values\n", + "\n", + "\n", + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Create the pipeline using make_pipeline\n", + "pipeline = make_pipeline(\n", + " SMOTE(random_state=42, sampling_strategy=0.35),\n", + " RandomUnderSampler(random_state=42, sampling_strategy=1.0),\n", + " RandomForestClassifier(\n", + " class_weight='balanced',\n", + " max_depth=10,\n", + " min_samples_leaf=2,\n", + " min_samples_split=5,\n", + " random_state=42\n", + " )\n", + ")\n", + "\n", + "# Fit the pipeline\n", + "pipeline.fit(X_train, y_train)\n", + "\n", + "# Use the fitted pipeline to make predictions\n", + "y_pred = pipeline.predict(X_test)\n", + "\n", + "# Evaluate the model\n", + "print(classification_report(y_test, y_pred))\n", + "print(f\"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred)}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "saqEW4G2HbI3", + "outputId": "d149844d-0cf7-4e33-89d9-cadf77890678" + }, + "execution_count": 90, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.78 0.72 0.75 25\n", + " 1 1.00 1.00 1.00 2666\n", + "\n", + " accuracy 1.00 2691\n", + " macro avg 0.89 0.86 0.87 2691\n", + "weighted avg 1.00 1.00 1.00 2691\n", + "\n", + "Balanced Accuracy: 0.8590622655663915\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Make predictions\n", + "y_pred = pipeline.predict(X_test)\n", + "\n", + "# Create the confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "\n", + "# Create a ConfusionMatrixDisplay object\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['good', 'bad'])\n", + "\n", + "# Plot the confusion matrix\n", + "plt.figure(figsize=(10, 7))\n", + "disp.plot(cmap='Blues', values_format='d')\n", + "plt.title('Confusion Matrix')\n", + "plt.show()\n", + "\n", + "# Print the confusion matrix\n", + "print(\"Confusion Matrix:\")\n", + "print(cm)\n", + "\n", + "# Calculate and print additional metrics\n", + "tn, fp, fn, tp = cm.ravel()\n", + "precision = tp / (tp + fp)\n", + "recall = tp / (tp + fn)\n", + "f1_score = 2 * (precision * recall) / (precision + recall)\n", + "\n", + "print(f\"\\nPrecision: {precision:.4f}\")\n", + "print(f\"Recall: {recall:.4f}\")\n", + "print(f\"F1-score: {f1_score:.4f}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 617 + }, + "id": "raf1OKv4_E23", + "outputId": "6f7ab778-99af-401d-a8e0-2d8bff230902" + }, + "execution_count": 91, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Confusion Matrix:\n", + "[[ 18 7]\n", + " [ 5 2661]]\n", + "\n", + "Precision: 0.9974\n", + "Recall: 0.9981\n", + "F1-score: 0.9978\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.metrics import balanced_accuracy_score, roc_auc_score, average_precision_score\n", + "import numpy as np\n", + "\n", + "# Define the number of folds\n", + "n_splits = 5\n", + "\n", + "# Create a StratifiedKFold object\n", + "skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)\n", + "\n", + "# Initialize lists to store the scores\n", + "balanced_accuracy_scores = []\n", + "roc_auc_scores = []\n", + "average_precision_scores = []\n", + "\n", + "# Perform cross-validation manually\n", + "for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):\n", + " X_train, X_val = X[train_index], X[val_index]\n", + " y_train, y_val = y[train_index], y[val_index]\n", + "\n", + " # Fit the pipeline\n", + " pipeline.fit(X_train, y_train)\n", + "\n", + " # Make predictions\n", + " y_pred = pipeline.predict(X_val)\n", + " y_pred_proba = pipeline.predict_proba(X_val)[:, 1]\n", + "\n", + " # Calculate scores\n", + " balanced_accuracy_scores.append(balanced_accuracy_score(y_val, y_pred))\n", + " roc_auc_scores.append(roc_auc_score(y_val, y_pred_proba))\n", + " average_precision_scores.append(average_precision_score(y_val, y_pred_proba))\n", + "\n", + " print(f\"Fold {fold}:\")\n", + " print(f\" Balanced Accuracy: {balanced_accuracy_scores[-1]:.4f}\")\n", + " print(f\" ROC AUC: {roc_auc_scores[-1]:.4f}\")\n", + " print(f\" Average Precision: {average_precision_scores[-1]:.4f}\")\n", + "\n", + "# Print the overall results\n", + "print(\"\\nCross-validation results:\")\n", + "print(f\"Balanced Accuracy: {np.mean(balanced_accuracy_scores):.4f} (+/- {np.std(balanced_accuracy_scores) * 2:.4f})\")\n", + "print(f\"ROC AUC: {np.mean(roc_auc_scores):.4f} (+/- {np.std(roc_auc_scores) * 2:.4f})\")\n", + "print(f\"Average Precision: {np.mean(average_precision_scores):.4f} (+/- {np.std(average_precision_scores) * 2:.4f})\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WAJiYO5dK7eL", + "outputId": "21d68432-3b20-4fca-9775-bdf302ce8c0e" + }, + "execution_count": 63, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Fold 1:\n", + " Balanced Accuracy: 0.9761\n", + " ROC AUC: 0.9994\n", + " Average Precision: 1.0000\n", + "Fold 2:\n", + " Balanced Accuracy: 0.8679\n", + " ROC AUC: 0.9965\n", + " Average Precision: 1.0000\n", + "Fold 3:\n", + " Balanced Accuracy: 0.8686\n", + " ROC AUC: 0.9984\n", + " Average Precision: 1.0000\n", + "Fold 4:\n", + " Balanced Accuracy: 0.8692\n", + " ROC AUC: 0.9970\n", + " Average Precision: 1.0000\n", + "Fold 5:\n", + " Balanced Accuracy: 0.9115\n", + " ROC AUC: 0.9979\n", + " Average Precision: 1.0000\n", + "\n", + "Cross-validation results:\n", + "Balanced Accuracy: 0.8987 (+/- 0.0843)\n", + "ROC AUC: 0.9979 (+/- 0.0020)\n", + "Average Precision: 1.0000 (+/- 0.0000)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "test_message_benign = random_good_message\n", + "test_message_bad = random_bad_message\n", + "\n", + "\n", + "from tokenizers import Tokenizer\n", + "import torch\n", + "import numpy as np\n", + "import polars as pl\n", + "import numpy as np\n", + "\n", + "# Load the custom tokenizer\n", + "tokenizer = Tokenizer.from_file(\"log_tokenizer.json\")\n", + "\n", + "# Define the device (assuming you're using PyTorch and want to specify CPU or GPU)\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "vector_good = vectorize_text(test_message_benign).flatten()\n", + "print(len(vector_good))\n", + "\n", + "\n", + "# Assuming 'vector' is your input\n", + "vector_good = np.array(vector_good)\n", + "vector_2d_good = vector_good.reshape(1, -1)\n", + "\n", + "# Class prediction\n", + "prediction = pipeline.predict(vector_2d_good)\n", + "print(\"Predicted class:\", prediction[0])\n", + "\n", + "# Probability prediction\n", + "probabilities = pipeline.predict_proba(vector_2d_good)\n", + "print(\"Class probabilities:\", probabilities[0])\n", + "\n", + "\n", + "vector_bad = vectorize_text(test_message_bad).flatten()\n", + "print(len(vector_bad))\n", + "\n", + "# Assuming 'vector' is your input\n", + "vector_bad = np.array(vector_bad)\n", + "vector_2d_bad = vector_bad.reshape(1, -1)\n", + "\n", + "# Class prediction\n", + "prediction = pipeline.predict(vector_2d_bad)\n", + "print(\"Predicted class:\", prediction[0])\n", + "\n", + "# Probability prediction\n", + "probabilities = pipeline.predict_proba(vector_2d_bad)\n", + "print(\"Class probabilities:\", probabilities[0])\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RbXSx4Go-6pd", + "outputId": "6fbe80e4-4818-42dd-d5b6-895cfbcce41f" + }, + "execution_count": 95, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "30000\n", + "Predicted class: 1\n", + "Class probabilities: [0. 1.]\n", + "30000\n", + "Predicted class: 0\n", + "Class probabilities: [0.68148054 0.31851946]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Tpot (Supervised)" + ], + "metadata": { + "id": "ZWRH59wKLaAX" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import make_scorer, confusion_matrix, classification_report\n", + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.under_sampling import RandomUnderSampler\n", + "from imblearn.pipeline import Pipeline as ImbPipeline\n", + "from tpot import TPOTClassifier\n", + "from sklearn.decomposition import TruncatedSVD\n", + "\n", + "def print_step_info(X, y, step_name):\n", + " print(f\"\\n--- {step_name} ---\")\n", + " print(f\"X shape: {X.shape}\")\n", + " print(f\"y shape: {y.shape}\")\n", + " print(f\"Class distribution: {np.bincount(y)}\")\n", + "\n", + "def custom_scorer(y_true, y_pred):\n", + " cm = confusion_matrix(y_true, y_pred)\n", + " tn, fp, fn, tp = cm.ravel()\n", + " if (tp + fn) == 0 or (tn + fp) == 0:\n", + " return 0.0\n", + " recall = tp / (tp + fn + 1e-8)\n", + " specificity = tn / (tn + fp + 1e-8)\n", + " recall_weight = 1.0\n", + " specificity_weight = 2.0\n", + " score = (recall_weight * recall + specificity_weight * specificity) / (recall_weight + specificity_weight)\n", + " return score\n", + "\n", + "custom_scorer_obj = make_scorer(custom_scorer, greater_is_better=True)\n", + "\n", + "# Assuming you have your features X and target y already defined\n", + "# Encode labels\n", + "le = LabelEncoder()\n", + "df['label_encoded'] = le.fit_transform(df['label'])\n", + "\n", + "# Split data\n", + "X = np.array(df['message_vector'].tolist())\n", + "y = df['label_encoded'].values\n", + "\n", + "print_step_info(X, y, \"Original Data\")\n", + "\n", + "# Step 1: Preprocessing\n", + "imputer = SimpleImputer(strategy='mean')\n", + "scaler = StandardScaler()\n", + "pca = TruncatedSVD(n_components=100, random_state=42) # Reduce to 100 components\n", + "\n", + "X = imputer.fit_transform(X)\n", + "X = scaler.fit_transform(X)\n", + "X = pca.fit_transform(X)\n", + "\n", + "print_step_info(X, y, \"After Preprocessing\")\n", + "\n", + "# Step 2: Split the data\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n", + "print_step_info(X_train, y_train, \"Training Data\")\n", + "\n", + "# Step 3: Initial model on imbalanced data\n", + "initial_model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42, n_jobs=-1)\n", + "initial_model.fit(X_train, y_train)\n", + "y_pred_initial = initial_model.predict(X_test)\n", + "print(\"\\nInitial Model Performance:\")\n", + "print(classification_report(y_test, y_pred_initial, zero_division=0))\n", + "print(\"Custom Score:\", custom_scorer(y_test, y_pred_initial))\n", + "\n", + "# Step 4: Resampling\n", + "smote = SMOTE(sampling_strategy=0.5, random_state=42)\n", + "rus = RandomUnderSampler(sampling_strategy=0.5, random_state=42)\n", + "X_resampled, y_resampled = rus.fit_resample(*smote.fit_resample(X_train, y_train))\n", + "print_step_info(X_resampled, y_resampled, \"Resampled Data\")\n", + "\n", + "# Step 5: Model on resampled data\n", + "resampled_model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42, n_jobs=-1)\n", + "resampled_model.fit(X_resampled, y_resampled)\n", + "y_pred_resampled = resampled_model.predict(X_test)\n", + "print(\"\\nResampled Model Performance:\")\n", + "print(classification_report(y_test, y_pred_resampled, zero_division=0))\n", + "print(\"Custom Score:\", custom_scorer(y_test, y_pred_resampled))\n", + "\n", + "# Step 6: Create a new dataset with original minority samples and top predicted minority samples\n", + "y_pred_proba = resampled_model.predict_proba(X_train)[:, 1]\n", + "minority_indices = np.where(y_train == 0)[0]\n", + "predicted_minority_indices = np.argsort(y_pred_proba)[-len(minority_indices)*10:]\n", + "selected_indices = np.union1d(minority_indices, predicted_minority_indices)\n", + "\n", + "X_selected = X_train[selected_indices]\n", + "y_selected = y_train[selected_indices]\n", + "print_step_info(X_selected, y_selected, \"Selected Data for Final Model\")\n", + "\n", + "# Step 7: Final TPOT optimization\n", + "tpot_config = {\n", + " 'sklearn.ensemble.RandomForestClassifier': {\n", + " 'n_estimators': [100, 200, 500],\n", + " 'max_depth': [None, 5, 10, 20],\n", + " 'min_samples_split': [2, 5, 10],\n", + " 'min_samples_leaf': [1, 2, 4],\n", + " 'class_weight': ['balanced', 'balanced_subsample']\n", + " },\n", + " 'xgboost.XGBClassifier': {\n", + " 'n_estimators': [100, 200, 500],\n", + " 'max_depth': [3, 5, 7, 9],\n", + " 'learning_rate': [0.01, 0.1, 0.3],\n", + " 'subsample': [0.5, 0.7, 1.0],\n", + " 'scale_pos_weight': [1, 10, 100]\n", + " },\n", + " 'imblearn.ensemble.BalancedRandomForestClassifier': {\n", + " 'n_estimators': [100, 200, 500],\n", + " 'replacement': [True, False]\n", + " },\n", + " 'imblearn.ensemble.EasyEnsembleClassifier': {\n", + " 'n_estimators': [10, 20, 30, 50],\n", + " 'replacement': [True, False]\n", + " }\n", + "}\n", + "\n", + "tpot = TPOTClassifier(\n", + " config_dict=tpot_config,\n", + " generations=1,\n", + " population_size=20,\n", + " verbosity=2,\n", + " scoring=custom_scorer_obj,\n", + " random_state=42,\n", + " n_jobs=-1,\n", + " max_time_mins=60,\n", + " max_eval_time_mins=10\n", + ")\n", + "\n", + "try:\n", + " tpot.fit(X_selected, y_selected)\n", + " y_pred_tpot = tpot.predict(X_test)\n", + " print(\"\\nTPOT Model Performance:\")\n", + " print(classification_report(y_test, y_pred_tpot, zero_division=0))\n", + " print(\"Custom Score:\", custom_scorer(y_test, y_pred_tpot))\n", + " print(\"\\nConfusion Matrix:\")\n", + " print(confusion_matrix(y_test, y_pred_tpot))\n", + " tpot.export('tpot_best_pipeline.py')\n", + " print(\"\\nBest pipeline exported to 'tpot_best_pipeline.py'\")\n", + "except Exception as e:\n", + " print(f\"An error occurred during TPOT optimization: {str(e)}\")\n", + "\n", + "# Final evaluation\n", + "print(\"\\nFinal Evaluation:\")\n", + "print(\"Initial Model Custom Score:\", custom_scorer(y_test, y_pred_initial))\n", + "print(\"Resampled Model Custom Score:\", custom_scorer(y_test, y_pred_resampled))\n", + "if 'y_pred_tpot' in locals():\n", + " print(\"TPOT Model Custom Score:\", custom_scorer(y_test, y_pred_tpot))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "0ee9d77e31c343d2b226c9e1c370899a", + "cefbf3ec854e463eac3047d7f2366a9b", + "5d5a04539517439c9a47f725bbfca641", + "487df5716a1f42b9ad08a05300882448", + "f6f95cf2eb1f43cdbb1be5a1ffc19621", + "a8b2545d7d94490b8df9d8a63d7fde2d", + "18b7438e17ef4851aa9c8d0dfc52f2d5", + "939ebe26aaf74df78c81c255aefcc7e5", + "c5677148554a4e65b74ae9fc95ae4c37", + "328be8dce5cf4b2a8418e7693f1829c0", + "48d9d503aebd44578cb545fdb4b28776" + ] + }, + "id": "TnChV008alMz", + "outputId": "2e7242bb-41eb-4657-9aba-80bf41b9e218" + }, + "execution_count": 137, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "--- Original Data ---\n", + "X shape: (13455, 30000)\n", + "y shape: (13455,)\n", + "Class distribution: [ 114 13341]\n", + "\n", + "--- After Preprocessing ---\n", + "X shape: (13455, 100)\n", + "y shape: (13455,)\n", + "Class distribution: [ 114 13341]\n", + "\n", + "--- Training Data ---\n", + "X shape: (10764, 100)\n", + "y shape: (10764,)\n", + "Class distribution: [ 91 10673]\n", + "\n", + "Initial Model Performance:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.00 0.00 0.00 23\n", + " 1 0.99 1.00 1.00 2668\n", + "\n", + " accuracy 0.99 2691\n", + " macro avg 0.50 0.50 0.50 2691\n", + "weighted avg 0.98 0.99 0.99 2691\n", + "\n", + "Custom Score: 0.333333333332084\n", + "\n", + "--- Resampled Data ---\n", + "X shape: (16008, 100)\n", + "y shape: (16008,)\n", + "Class distribution: [ 5336 10672]\n", + "\n", + "Resampled Model Performance:\n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 0.22 0.36 23\n", + " 1 0.99 1.00 1.00 2668\n", + "\n", + " accuracy 0.99 2691\n", + " macro avg 1.00 0.61 0.68 2691\n", + "weighted avg 0.99 0.99 0.99 2691\n", + "\n", + "Custom Score: 0.47826086950095603\n", + "\n", + "--- Selected Data for Final Model ---\n", + "X shape: (1001, 100)\n", + "y shape: (1001,)\n", + "Class distribution: [ 91 910]\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Optimization Progress: 0%| | 0/20 [00:00 0 else 0\n", + "recall = tp / (tp + fn) if (tp + fn) > 0 else 0\n", + "f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0\n", + "\n", + "print(f\"\\nPrecision: {precision:.4f}\")\n", + "print(f\"Recall: {recall:.4f}\")\n", + "print(f\"F1-score: {f1_score:.4f}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 617 + }, + "id": "top9wzRCdAKK", + "outputId": "decb8210-d1fc-4452-bc9c-ca6962caff0f" + }, + "execution_count": 105, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Confusion Matrix:\n", + "[[ 23 0]\n", + " [ 292 2376]]\n", + "\n", + "Precision: 1.0000\n", + "Recall: 0.8906\n", + "F1-score: 0.9421\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.metrics import balanced_accuracy_score, roc_auc_score, average_precision_score\n", + "import numpy as np\n", + "\n", + "# Define the number of folds\n", + "n_splits = 5\n", + "\n", + "# Create a StratifiedKFold object\n", + "skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)\n", + "\n", + "# Initialize lists to store the scores\n", + "balanced_accuracy_scores = []\n", + "roc_auc_scores = []\n", + "average_precision_scores = []\n", + "\n", + "# Perform cross-validation\n", + "for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):\n", + " X_val, y_val = X[val_index], y[val_index]\n", + "\n", + " # Make predictions using the existing TPOT model\n", + " y_pred = tpot.predict(X_val)\n", + " y_pred_proba = tpot.predict_proba(X_val)[:, 1]\n", + "\n", + " # Calculate scores\n", + " balanced_accuracy_scores.append(balanced_accuracy_score(y_val, y_pred))\n", + " roc_auc_scores.append(roc_auc_score(y_val, y_pred_proba))\n", + " average_precision_scores.append(average_precision_score(y_val, y_pred_proba))\n", + "\n", + " print(f\"Fold {fold}:\")\n", + " print(f\" Balanced Accuracy: {balanced_accuracy_scores[-1]:.4f}\")\n", + " print(f\" ROC AUC: {roc_auc_scores[-1]:.4f}\")\n", + " print(f\" Average Precision: {average_precision_scores[-1]:.4f}\")\n", + "\n", + "# Print the overall results\n", + "print(\"\\nCross-validation results:\")\n", + "print(f\"Balanced Accuracy: {np.mean(balanced_accuracy_scores):.4f} (+/- {np.std(balanced_accuracy_scores) * 2:.4f})\")\n", + "print(f\"ROC AUC: {np.mean(roc_auc_scores):.4f} (+/- {np.std(roc_auc_scores) * 2:.4f})\")\n", + "print(f\"Average Precision: {np.mean(average_precision_scores):.4f} (+/- {np.std(average_precision_scores) * 2:.4f})\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nmu5z7OVdQHA", + "outputId": "8193a942-2d4f-45b6-d1dc-e0dfed3e1d6c" + }, + "execution_count": 77, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Fold 1:\n", + " Balanced Accuracy: 0.9507\n", + " ROC AUC: 0.9996\n", + " Average Precision: 1.0000\n", + "Fold 2:\n", + " Balanced Accuracy: 0.9472\n", + " ROC AUC: 0.9963\n", + " Average Precision: 1.0000\n", + "Fold 3:\n", + " Balanced Accuracy: 0.9400\n", + " ROC AUC: 0.9982\n", + " Average Precision: 1.0000\n", + "Fold 4:\n", + " Balanced Accuracy: 0.9457\n", + " ROC AUC: 1.0000\n", + " Average Precision: 1.0000\n", + "Fold 5:\n", + " Balanced Accuracy: 0.9468\n", + " ROC AUC: 0.9956\n", + " Average Precision: 1.0000\n", + "\n", + "Cross-validation results:\n", + "Balanced Accuracy: 0.9461 (+/- 0.0069)\n", + "ROC AUC: 0.9979 (+/- 0.0035)\n", + "Average Precision: 1.0000 (+/- 0.0000)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.decomposition import TruncatedSVD\n", + "\n", + "# Assume these are saved from the training process\n", + "saved_imputer = imputer # The SimpleImputer used in training\n", + "saved_scaler = scaler # The StandardScaler used in training\n", + "saved_pca = pca # The TruncatedSVD (PCA) used in training\n", + "\n", + "# Vectorize the messages\n", + "vector1 = vectorize_text(random_good_message)\n", + "vector2 = vectorize_text(random_bad_message)\n", + "\n", + "print(\"Shape of vector1:\", vector1.shape)\n", + "print(\"Shape of vector2:\", vector2.shape)\n", + "\n", + "# Combine the vectors\n", + "X_new = np.vstack((vector1, vector2))\n", + "\n", + "print(\"Shape of X_new before preprocessing:\", X_new.shape)\n", + "\n", + "# Apply the saved preprocessing steps\n", + "X_new = saved_imputer.transform(X_new)\n", + "print(\"Shape after imputation:\", X_new.shape)\n", + "\n", + "X_new = saved_scaler.transform(X_new)\n", + "print(\"Shape after scaling:\", X_new.shape)\n", + "\n", + "X_new = saved_pca.transform(X_new)\n", + "print(\"Shape after PCA:\", X_new.shape)\n", + "\n", + "# Ensure X_new has exactly 100 features\n", + "assert X_new.shape[1] == 100, f\"X_new has {X_new.shape[1]} features instead of 100\"\n", + "\n", + "# Make predictions using your TPOT model\n", + "predictions = tpot.predict(X_new)\n", + "probabilities = tpot.predict_proba(X_new)\n", + "\n", + "# Print results\n", + "for i, (pred, prob) in enumerate(zip(predictions, probabilities)):\n", + " print(f\"Message {i+1}:\")\n", + " print(f\"Predicted class: {pred}\")\n", + " print(f\"Class probabilities: {prob}\")\n", + " print()\n", + "\n", + "# Print the shape of X_new for final confirmation\n", + "print(f\"Final shape of X_new: {X_new.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_AkVOj7ehjcQ", + "outputId": "b4cd08a9-38e0-4f33-c910-e2cb43610837" + }, + "execution_count": 138, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of vector1: (1, 30000)\n", + "Shape of vector2: (1, 30000)\n", + "Shape of X_new before preprocessing: (2, 30000)\n", + "Shape after imputation: (2, 30000)\n", + "Shape after scaling: (2, 30000)\n", + "Shape after PCA: (2, 100)\n", + "Message 1:\n", + "Predicted class: 1\n", + "Class probabilities: [0.186 0.814]\n", + "\n", + "Message 2:\n", + "Predicted class: 0\n", + "Class probabilities: [0.75 0.25]\n", + "\n", + "Final shape of X_new: (2, 100)\n" + ] + } + ] + }, { "cell_type": "code", "source": [ @@ -4430,42 +10515,23 @@ ], "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 218 }, "id": "9qalTrUsqpVD", - "outputId": "ab89ae33-96cb-41ad-83ab-2d0f7d21a7fe" + "outputId": "38d1ca00-fa5d-4cc8-bbf1-a5386be19c6b" }, - "execution_count": 7, + "execution_count": 111, "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Top 10 Pipelines Across All Runs and Evaluations:\n", - "+------------------------+---------+\n", - "| Pipeline | Count |\n", - "+========================+=========+\n", - "| xgbclassifier | 44 |\n", - "+------------------------+---------+\n", - "| extratreesclassifier | 20 |\n", - "+------------------------+---------+\n", - "| bernoullinb | 13 |\n", - "+------------------------+---------+\n", - "| decisiontreeclassifier | 12 |\n", - "+------------------------+---------+\n", - "| gaussiannb | 7 |\n", - "+------------------------+---------+\n", - "| kneighborsclassifier | 7 |\n", - "+------------------------+---------+\n", - "| randomforestclassifier | 6 |\n", - "+------------------------+---------+\n", - "| mlpclassifier | 3 |\n", - "+------------------------+---------+\n", - "| sgdclassifier | 3 |\n", - "+------------------------+---------+\n", - "| multinomialnb | 1 |\n", - "+------------------------+---------+\n" + "output_type": "error", + "ename": "NameError", + "evalue": "name 'results_df' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# Iterate through all runs and all evaluated pipelines\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresults_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mpipeline_str\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pipelines_tested'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0msimple_pipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msimplify_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpipeline_str\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'results_df' is not defined" ] } ] @@ -4508,7 +10574,7 @@ "id": "SGH18PwBSdeE", "outputId": "6bb659aa-bdf4-49fc-ad4a-c9f8968ce3c5" }, - "execution_count": 5, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -4697,7 +10763,7 @@ "id": "iyExRJRjsDBO", "outputId": "dfaff146-db7c-47d5-bd93-535282346bea" }, - "execution_count": 24, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -4793,6 +10859,82 @@ } ] }, + { + "cell_type": "code", + "source": [ + "import joblib\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import f1_score\n", + "\n", + "# Find the best run (highest F1 score)\n", + "best_run_index = results_df['f1_score'].idxmax()\n", + "best_pipeline_str = results_df.loc[best_run_index, 'best_pipeline']\n", + "\n", + "# Re-fit the best pipeline on the entire dataset\n", + "X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)\n", + "best_model = tpot.fitted_pipeline_\n", + "\n", + "# Save the model using joblib\n", + "model_filename = 'best_tpot_nn_model.joblib'\n", + "joblib.dump(best_model, model_filename)\n", + "\n", + "print(f\"\\nBest model exported as joblib: {model_filename}\")\n", + "\n", + "# You can also save the best pipeline as a Python script\n", + "python_filename = 'best_tpot_nn_pipeline.py'\n", + "best_pipeline_script = tpot.export(python_filename)\n", + "\n", + "print(f\"Best pipeline saved as Python script: {python_filename}\")\n", + "\n", + "# Print the best pipeline\n", + "print(f\"\\nBest Pipeline:\\n{best_pipeline_str}\")\n", + "\n", + "# Verify the saved model\n", + "try:\n", + " loaded_model = joblib.load(model_filename)\n", + " y_pred = loaded_model.predict(X_test_full)\n", + " f1 = f1_score(y_test_full, y_pred, average='weighted')\n", + " print(f\"\\nLoaded model verified. F1 Score: {f1:.4f}\")\n", + "except Exception as e:\n", + " print(f\"\\nError verifying saved model: {str(e)}\")\n", + "\n", + "# Print information about how to use the saved model\n", + "print(\"\\nTo use the saved model in the future:\")\n", + "print(\"1. Load the model: loaded_model = joblib.load('best_tpot_nn_model.joblib')\")\n", + "print(\"2. Make predictions: predictions = loaded_model.predict(X)\")\n", + "print(\"Note: Make sure to preprocess your input data (X) the same way as during training.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tQ06A8TMgdKw", + "outputId": "8e9cbfe1-8098-437a-c184-9172d0284db7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Best model exported as joblib: best_tpot_nn_model.joblib\n", + "Best pipeline saved as Python script: best_tpot_nn_pipeline.py\n", + "\n", + "Best Pipeline:\n", + "Pipeline(steps=[('gaussiannb', GaussianNB())])\n", + "\n", + "Loaded model verified. F1 Score: 0.9988\n", + "\n", + "To use the saved model in the future:\n", + "1. Load the model: loaded_model = joblib.load('best_tpot_nn_model.joblib')\n", + "2. Make predictions: predictions = loaded_model.predict(X)\n", + "Note: Make sure to preprocess your input data (X) the same way as during training.\n" + ] + } + ] + }, { "cell_type": "code", "source": [ @@ -4907,7 +11049,7 @@ "id": "5m0mtcVZduol", "outputId": "1d8b8679-4cbc-4574-dc35-6e7ab8b3d1cd" }, - "execution_count": 25, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -5008,7 +11150,7 @@ "id": "3lBpziEae4sf", "outputId": "08837957-1ab6-4acb-ab65-b0f0fca2b56c" }, - "execution_count": 26, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -5189,7 +11331,7 @@ "id": "pDB9efndcv9g", "outputId": "922183ba-2287-40a7-8d73-32a476c0557f" }, - "execution_count": 27, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -5362,7 +11504,7 @@ "id": "VgKt6AU28-6f", "outputId": "ab7671a3-1229-42f5-807a-eec12327733a" }, - "execution_count": 28, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -5463,7 +11605,7 @@ "id": "SYRHdR4_YKXA", "outputId": "a25b02c4-ae41-4ef9-c701-a77d227b7ab9" }, - "execution_count": 29, + "execution_count": null, "outputs": [ { "output_type": "display_data",