From f189ee9c9659aa356d1b4e12ea9146d7034d41ad Mon Sep 17 00:00:00 2001 From: Marius Ciepluch <11855163+norandom@users.noreply.github.com> Date: Thu, 1 Aug 2024 10:32:33 +0200 Subject: [PATCH] fixed parquet serialisation with pandas and polars --- ...AE_sysmon_dataset_(Excel_implant_C2).ipynb | 2087 ++++++++++------- 1 file changed, 1207 insertions(+), 880 deletions(-) diff --git a/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb b/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb index 11c199e..046af77 100644 --- a/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb +++ b/LinFormer_AutoML_on_AE_sysmon_dataset_(Excel_implant_C2).ipynb @@ -5147,690 +5147,6 @@ "description_width": "" } }, - "d35be0ff26cc4d0e85b498638ec728da": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6c3982ed895846f2a07befc28277debd", - "IPY_MODEL_a031633d17474670a3055d94249d58ee", - "IPY_MODEL_08d30171bbb44668aea730cc7e2e828a" - ], - "layout": "IPY_MODEL_bda89cfac064435b8de02f41e689e0a3" - } - }, - "6c3982ed895846f2a07befc28277debd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_603fa104247d4ae8888440acca48221e", - "placeholder": "​", - "style": "IPY_MODEL_bf6290dc87bd447da28102899e57ffe5", - "value": "100%" - } - }, - "a031633d17474670a3055d94249d58ee": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fe8beb6587574f93a2947c2bcc907091", - "max": 8039037, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3b5d864ced1149c4979b491cbc3070f7", - "value": 8039037 - } - }, - "08d30171bbb44668aea730cc7e2e828a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e2306dc620194ce485bd2663c7d25738", - "placeholder": "​", - "style": "IPY_MODEL_329389fea5f74dca965f296707c2a8d0", - "value": " 8.04M/8.04M [00:00<00:00, 29.6MiB/s]" - } - }, - "bda89cfac064435b8de02f41e689e0a3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "603fa104247d4ae8888440acca48221e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bf6290dc87bd447da28102899e57ffe5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fe8beb6587574f93a2947c2bcc907091": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3b5d864ced1149c4979b491cbc3070f7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "e2306dc620194ce485bd2663c7d25738": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "329389fea5f74dca965f296707c2a8d0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9d26aed57a8244ada934039dd928555c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b04abee189594c62b235e5076deafc84", - "IPY_MODEL_ff0cc2d2cf8f4009bd200fd17d7ba517", - "IPY_MODEL_49f821f89aa44034af5988b888734185" - ], - "layout": "IPY_MODEL_f10f65b1cb0547928b1c7b87c6385de4" - } - }, - "b04abee189594c62b235e5076deafc84": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bdeb6c6df5c049a69c0b613a42a69e35", - "placeholder": "​", - "style": "IPY_MODEL_565b8a229f66455886175a51a975878d", - "value": "100%" - } - }, - "ff0cc2d2cf8f4009bd200fd17d7ba517": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3491f280a58046518c5c28686cd80eb5", - "max": 1716416, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ef3de5b665104edcab59a1200259b897", - "value": 1716416 - } - }, - "49f821f89aa44034af5988b888734185": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ed1d6f1156f748f8993dda1ee97f5caf", - "placeholder": "​", - "style": "IPY_MODEL_e84db532db0643fd81ff21364b3b2e53", - "value": " 1.72M/1.72M [00:00<00:00, 29.0MiB/s]" - } - }, - "f10f65b1cb0547928b1c7b87c6385de4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bdeb6c6df5c049a69c0b613a42a69e35": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "565b8a229f66455886175a51a975878d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3491f280a58046518c5c28686cd80eb5": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ef3de5b665104edcab59a1200259b897": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ed1d6f1156f748f8993dda1ee97f5caf": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e84db532db0643fd81ff21364b3b2e53": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, "48125358d93747af8d60adf49cb4bed3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", @@ -6514,6 +5830,690 @@ "_view_name": "StyleView", "description_width": "" } + }, + "2cb39463ffff4f9f8c6c6ece6f0e2769": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c51ec13acbd346de98bf66fd543c8ddd", + "IPY_MODEL_b325e19f0a7c4baebd5660a78e45131a", + "IPY_MODEL_3878d666a20c45d995dfc79b02c2c165" + ], + "layout": "IPY_MODEL_e2aee7171ace4dc0bb8cd09220db5d12" + } + }, + "c51ec13acbd346de98bf66fd543c8ddd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48622890a0254e49b2d26af09626560f", + "placeholder": "​", + "style": "IPY_MODEL_bcc8ab2a76e54b20a36e8b953a78f0e4", + "value": "100%" + } + }, + "b325e19f0a7c4baebd5660a78e45131a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4dcbd8944dcc4ea1b6992ad7740da36e", + "max": 8039037, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_961a05a5d5b446699f82905129b7cd19", + "value": 8039037 + } + }, + "3878d666a20c45d995dfc79b02c2c165": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8454cae7dcf54932a356ad972733775f", + "placeholder": "​", + "style": "IPY_MODEL_5a0d14012c40493081d1cca56b5ffde2", + "value": " 8.04M/8.04M [00:00<00:00, 70.2MiB/s]" + } + }, + "e2aee7171ace4dc0bb8cd09220db5d12": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48622890a0254e49b2d26af09626560f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bcc8ab2a76e54b20a36e8b953a78f0e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4dcbd8944dcc4ea1b6992ad7740da36e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "961a05a5d5b446699f82905129b7cd19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8454cae7dcf54932a356ad972733775f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5a0d14012c40493081d1cca56b5ffde2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6043ef5c70a3493d8f45d8231726d2a6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_317e803897b64bc58bb4b7d9d0ef2504", + "IPY_MODEL_97270bdece1442719201203cfe72f262", + "IPY_MODEL_ef2ae2c4892247919876aa97f42a3561" + ], + "layout": "IPY_MODEL_e5482a1a80244f039aeadfd8fe87534f" + } + }, + "317e803897b64bc58bb4b7d9d0ef2504": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1c213845078846779f14d6143da25162", + "placeholder": "​", + "style": "IPY_MODEL_805d5e39429847c893d38d02524b6a59", + "value": "100%" + } + }, + "97270bdece1442719201203cfe72f262": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_09e0bcda1a3645268fc0ec1b7540538f", + "max": 1716416, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_07ffb70db96a431cba11b328e65b312d", + "value": 1716416 + } + }, + "ef2ae2c4892247919876aa97f42a3561": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ce339a8ee46c435493635d30526a8f35", + "placeholder": "​", + "style": "IPY_MODEL_2b4516e19f694d9c98a038ac222735be", + "value": " 1.72M/1.72M [00:00<00:00, 29.5MiB/s]" + } + }, + "e5482a1a80244f039aeadfd8fe87534f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1c213845078846779f14d6143da25162": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "805d5e39429847c893d38d02524b6a59": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "09e0bcda1a3645268fc0ec1b7540538f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "07ffb70db96a431cba11b328e65b312d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ce339a8ee46c435493635d30526a8f35": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2b4516e19f694d9c98a038ac222735be": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } } } } @@ -6555,7 +6555,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": { "id": "ayxWRSxzgwh_" }, @@ -6578,6 +6578,61 @@ "\n" ] }, + { + "cell_type": "markdown", + "source": [ + "## Tpot can use RAPIDS for better GPU support\n", + "\n", + "https://medium.com/rapids-ai/faster-automl-with-tpot-and-rapids-758455cd89e5\n", + "\n", + "https://docs.rapids.ai/api/cuml/stable/" + ], + "metadata": { + "id": "67jTk-_N42-A" + } + }, + { + "cell_type": "code", + "source": [ + "import cuml\n", + "cuml.__version__" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "0Gu0pYEO4pxy", + "outputId": "2b25f0cc-39a9-41f2-ae05-67226ae91040" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'24.04.00'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 2 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Initialize GitHub authentication (token)\n", + "\n", + "GitHub's API doesn't allow anonymous release downloads. We can use a read-only token." + ], + "metadata": { + "id": "OQTe2IE95P7c" + } + }, { "cell_type": "code", "source": [ @@ -6596,9 +6651,9 @@ "base_uri": "https://localhost:8080/" }, "id": "vyKWa35bkFcG", - "outputId": "afaf2961-374c-4ced-ba84-a8eb74963834" + "outputId": "10f15782-6ed1-4669-aa9b-b349408b4578" }, - "execution_count": 9, + "execution_count": 3, "outputs": [ { "output_type": "stream", @@ -6616,9 +6671,16 @@ "\n", "These samples contain Sysmon log activity of Dropper Malware (C2 Dropper, MS Excel VBA, Covenant).\n", "\n", - "No AE campaigns, just the Dropper itself.\n", + "No full AE campaigns with post-exploitation, just the Dropper itself.\n", "\n", - "We are looking at 1000 documents, some malicious and some not. Which ones are malicious? How does the VBA Excel malware behave? How not? Can ML help to find out?" + "The set contains security agent telemetry (aka sysmon lohs) of 1000 documents, some malicious and some not.\n", + "\n", + "## Objectives\n", + "\n", + "1. Which ones are malicious?\n", + "2. How does the VBA Excel malware behave? \n", + "3. Can ML help to find out?\n", + "4. Can Tpot autogen the ML models?" ], "metadata": { "id": "oYVNy4rNojZc" @@ -6706,23 +6768,23 @@ "base_uri": "https://localhost:8080/", "height": 85, "referenced_widgets": [ - "d35be0ff26cc4d0e85b498638ec728da", - "6c3982ed895846f2a07befc28277debd", - "a031633d17474670a3055d94249d58ee", - "08d30171bbb44668aea730cc7e2e828a", - "bda89cfac064435b8de02f41e689e0a3", - "603fa104247d4ae8888440acca48221e", - "bf6290dc87bd447da28102899e57ffe5", - "fe8beb6587574f93a2947c2bcc907091", - "3b5d864ced1149c4979b491cbc3070f7", - "e2306dc620194ce485bd2663c7d25738", - "329389fea5f74dca965f296707c2a8d0" + "2cb39463ffff4f9f8c6c6ece6f0e2769", + "c51ec13acbd346de98bf66fd543c8ddd", + "b325e19f0a7c4baebd5660a78e45131a", + "3878d666a20c45d995dfc79b02c2c165", + "e2aee7171ace4dc0bb8cd09220db5d12", + "48622890a0254e49b2d26af09626560f", + "bcc8ab2a76e54b20a36e8b953a78f0e4", + "4dcbd8944dcc4ea1b6992ad7740da36e", + "961a05a5d5b446699f82905129b7cd19", + "8454cae7dcf54932a356ad972733775f", + "5a0d14012c40493081d1cca56b5ffde2" ] }, "id": "5EEoa3gUmFpn", - "outputId": "806090b8-b3d5-473b-9da5-52de1860cb00" + "outputId": "c2f808b1-a2f2-455d-e3cd-939658d5b658" }, - "execution_count": 10, + "execution_count": 4, "outputs": [ { "output_type": "stream", @@ -6740,7 +6802,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "d35be0ff26cc4d0e85b498638ec728da" + "model_id": "2cb39463ffff4f9f8c6c6ece6f0e2769" } }, "metadata": {} @@ -6785,23 +6847,23 @@ "base_uri": "https://localhost:8080/", "height": 85, "referenced_widgets": [ - "9d26aed57a8244ada934039dd928555c", - "b04abee189594c62b235e5076deafc84", - "ff0cc2d2cf8f4009bd200fd17d7ba517", - "49f821f89aa44034af5988b888734185", - "f10f65b1cb0547928b1c7b87c6385de4", - "bdeb6c6df5c049a69c0b613a42a69e35", - "565b8a229f66455886175a51a975878d", - "3491f280a58046518c5c28686cd80eb5", - "ef3de5b665104edcab59a1200259b897", - "ed1d6f1156f748f8993dda1ee97f5caf", - "e84db532db0643fd81ff21364b3b2e53" + "6043ef5c70a3493d8f45d8231726d2a6", + "317e803897b64bc58bb4b7d9d0ef2504", + "97270bdece1442719201203cfe72f262", + "ef2ae2c4892247919876aa97f42a3561", + "e5482a1a80244f039aeadfd8fe87534f", + "1c213845078846779f14d6143da25162", + "805d5e39429847c893d38d02524b6a59", + "09e0bcda1a3645268fc0ec1b7540538f", + "07ffb70db96a431cba11b328e65b312d", + "ce339a8ee46c435493635d30526a8f35", + "2b4516e19f694d9c98a038ac222735be" ] }, "id": "PA-8JXLyn4Uv", - "outputId": "1850bb3e-a2bc-4cf0-d988-98f3986e2ab1" + "outputId": "08639279-a742-40ca-b95e-f9cfe9d7b58e" }, - "execution_count": 75, + "execution_count": 5, "outputs": [ { "output_type": "stream", @@ -6819,7 +6881,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "9d26aed57a8244ada934039dd928555c" + "model_id": "6043ef5c70a3493d8f45d8231726d2a6" } }, "metadata": {} @@ -6880,7 +6942,7 @@ "id": "WdRLtddt378k", "outputId": "58f22716-5958-4cf4-a2d4-eff7384b87cc" }, - "execution_count": 11, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -6939,9 +7001,9 @@ "base_uri": "https://localhost:8080/" }, "id": "SCKEWW-aoNDQ", - "outputId": "058d265e-3989-4641-86fa-3d518efbb04a" + "outputId": "ce3d8ea3-0f89-487f-835a-15365a46dff8" }, - "execution_count": 1, + "execution_count": 6, "outputs": [ { "output_type": "stream", @@ -7019,7 +7081,7 @@ "metadata": { "id": "Dzc4ZwG0NcMQ" }, - "execution_count": 2, + "execution_count": 7, "outputs": [] }, { @@ -7089,9 +7151,9 @@ "base_uri": "https://localhost:8080/" }, "id": "_CJUqGQUqFew", - "outputId": "19a5362a-01dd-4208-f7f0-93f019ac6738" + "outputId": "4aba53a3-25a2-4a49-8a4b-89f4ec5b1783" }, - "execution_count": 3, + "execution_count": 8, "outputs": [ { "output_type": "stream", @@ -7192,9 +7254,9 @@ "base_uri": "https://localhost:8080/" }, "id": "WBy3Rqj_orz_", - "outputId": "619326ed-d925-4816-cae6-c1a0274d8a43" + "outputId": "303db2d0-8fac-48be-b275-5d58409c70c2" }, - "execution_count": 4, + "execution_count": 9, "outputs": [ { "output_type": "stream", @@ -7238,35 +7300,35 @@ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Ut… │\n", "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘\n", "shape: (41, 2)\n", - "┌─────────────────────┬───────┐\n", - "│ parent_image ┆ count │\n", - "│ --- ┆ --- │\n", - "│ str ┆ u32 │\n", - "╞═════════════════════╪═══════╡\n", - "│ pip.exe ┆ 9 │\n", - "│ Ec2Config.exe ┆ 2 │\n", - "│ CompatTelRunner.exe ┆ 2 │\n", - "│ PLUGScheduler.exe ┆ 2 │\n", - "│ … ┆ … │\n", - "│ AvLaunch.exe ┆ 1 │\n", - "│ AvastSvc.exe ┆ 7 │\n", - "│ EXCEL.EXE ┆ 116 │\n", - "│ Update.exe ┆ 1 │\n", - "└─────────────────────┴───────┘\n", + "┌───────────────────────────────────┬───────┐\n", + "│ parent_image ┆ count │\n", + "│ --- ┆ --- │\n", + "│ str ┆ u32 │\n", + "╞═══════════════════════════════════╪═══════╡\n", + "│ PLUGScheduler.exe ┆ 2 │\n", + "│ MpCmdRun.exe ┆ 1 │\n", + "│ MicrosoftEdge_X64_127.0.2651.74_… ┆ 1 │\n", + "│ csc.exe ┆ 1 │\n", + "│ … ┆ … │\n", + "│ upfc.exe ┆ 1 │\n", + "│ overseer.exe ┆ 4 │\n", + "│ pip.exe ┆ 9 │\n", + "│ smss.exe ┆ 3 │\n", + "└───────────────────────────────────┴───────┘\n", "shape: (91, 2)\n", "┌───────────────────────────────────┬───────┐\n", "│ target_filename ┆ count │\n", "│ --- ┆ --- │\n", "│ str ┆ u32 │\n", "╞═══════════════════════════════════╪═══════╡\n", - "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", - "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", + "│ C:\\Program Files (x86)\\Microsoft… ┆ 1 │\n", "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", "│ C:\\Program Files (x86)\\Microsoft… ┆ 1 │\n", + "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", "│ … ┆ … │\n", "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", - "│ C:\\Program Files\\WindowsApps\\Mic… ┆ 1 │\n", - "│ C:\\Users\\student\\AppData\\Local\\M… ┆ 1 │\n", + "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", + "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", "│ C:\\Users\\student\\AppData\\Local\\T… ┆ 1 │\n", "└───────────────────────────────────┴───────┘\n" ] @@ -7328,9 +7390,9 @@ "base_uri": "https://localhost:8080/" }, "id": "b0wLxKfzsg1e", - "outputId": "6719c91c-42fd-447a-c1f1-d29737af744f" + "outputId": "a08e43b0-a259-4112-b9f6-b62d126a9425" }, - "execution_count": 5, + "execution_count": 10, "outputs": [ { "output_type": "stream", @@ -7376,9 +7438,9 @@ "base_uri": "https://localhost:8080/" }, "id": "uj7NV2wGPHi2", - "outputId": "6cbee9fb-4710-4855-b556-c5fed2dfce7e" + "outputId": "f4baf9a2-4dbd-4fa1-fd61-8529159cd4fa" }, - "execution_count": 6, + "execution_count": 11, "outputs": [ { "output_type": "stream", @@ -7406,7 +7468,7 @@ "metadata": { "id": "ngGJUIsXu5fL" }, - "execution_count": 7, + "execution_count": 12, "outputs": [] }, { @@ -7417,7 +7479,7 @@ "metadata": { "id": "lRQS5xLTvpZv" }, - "execution_count": 8, + "execution_count": 13, "outputs": [] }, { @@ -7454,9 +7516,9 @@ "base_uri": "https://localhost:8080/" }, "id": "X5ptzPnfxP9M", - "outputId": "4ef582a3-b626-466e-e3f7-5adab909eb13" + "outputId": "6bc45ef2-75a9-4055-e30d-83de249c8065" }, - "execution_count": 9, + "execution_count": 14, "outputs": [ { "output_type": "stream", @@ -7492,8 +7554,8 @@ "│ --- ┆ --- │\n", "│ str ┆ u32 │\n", "╞═══════╪═══════╡\n", - "│ bad ┆ 114 │\n", "│ good ┆ 13341 │\n", + "│ bad ┆ 114 │\n", "└───────┴───────┘\n" ] } @@ -7522,9 +7584,9 @@ "base_uri": "https://localhost:8080/" }, "id": "dRH3AReO_i5D", - "outputId": "3f93ab3d-f474-486a-9434-7991efd54c4d" + "outputId": "264a8f1e-5f67-4be4-88a5-a8962df60071" }, - "execution_count": 10, + "execution_count": 15, "outputs": [ { "output_type": "stream", @@ -7570,7 +7632,7 @@ "metadata": { "id": "_Vgi0rjn_RIn" }, - "execution_count": 11, + "execution_count": 16, "outputs": [] }, { @@ -7610,7 +7672,18 @@ "metadata": { "id": "SsW6vlQNQhAy" }, - "execution_count": 12, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "## The model can get fine-tuned." + ], + "metadata": { + "id": "Q-Y1HQMqzN-5" + }, + "execution_count": null, "outputs": [] }, { @@ -7664,15 +7737,15 @@ "base_uri": "https://localhost:8080/" }, "id": "4SguqPPKSdpJ", - "outputId": "b873ab30-300f-4b23-bf61-f7ed5ced345d" + "outputId": "1dda19c4-66e1-48e2-e3b5-e7ec63e68102" }, - "execution_count": 123, + "execution_count": 18, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "['@timestamp', 'host.hostname', 'host.ip', 'log.level', 'winlog.event_id', 'winlog.task', 'message', 'filtered_message', 'image', 'target_filename', 'parent_image', 'text', 'temp_folder', 'filename', 'label', 'message_vector']\n", + "['@timestamp', 'host.hostname', 'host.ip', 'log.level', 'winlog.event_id', 'winlog.task', 'message', 'filtered_message', 'image', 'target_filename', 'parent_image', 'text', 'temp_folder', 'filename', 'label']\n", "shape: (13_455, 2)\n", "┌───────┬──────────────────────────────┐\n", "│ label ┆ filtered_message │\n", @@ -7705,24 +7778,24 @@ "└───────┴──────────────────────────────┘\n", "File created:\n", "RuleName: EXE\n", - "UtcTime: 2024-07-28 21:54:31.900\n", - "ProcessGuid: {18e8265a-be12-66a6-3010-000000004400}\n", - "ProcessId: 1248\n", + "UtcTime: 2024-07-28 17:05:05.424\n", + "ProcessGuid: {18e8265a-7a3c-66a6-d507-000000004400}\n", + "ProcessId: 3736\n", "Image: C:\\Program Files\\Microsoft Office\\Root\\Office16\\EXCEL.EXE\n", "TargetFilename: C:\\Users\\student\\AppData\\Local\\Temp\\file.exe\n", "CreationUtcTime: 2024-07-23 14:24:50.520\n", "\n", "\n", "\n", - "Dns query:\n", - "RuleName: -\n", - "UtcTime: 2024-07-28 19:05:11.949\n", - "ProcessGuid: {18e8265a-9666-66a6-4f0b-000000004400}\n", - "ProcessId: 3232\n", - "QueryName: messaging\n", - "QueryStatus: 0\n", - "QueryResults: type: 5 prod-campaignaggregator.omexexternallfb.office.net.akadns.net;::ffff:52.109.16.3;\n", - "Image: C:\\Program Files\\Microsoft Office\\root\\Office16\\EXCEL.EXE\n" + "Registry value set:\n", + "RuleName: InvDB-Path\n", + "EventType: SetValue\n", + "UtcTime: 2024-07-28 15:13:19.757\n", + "ProcessGuid: {18e8265a-5f49-66a6-2601-000000004400}\n", + "ProcessId: 10812\n", + "Image: C:\\Windows\\system32\\CompatTelRunner.exe\n", + "TargetObject: \\REGISTRY\\A\\{90cbbb87-bac4-4fa3-1d8b-b1a042a75259}\\Root\\InventoryApplicationFile\\msedge_pwa_launc|326a60d0d6b1ca83\\LowerCaseLongPath\n", + "Details: c:\\program files (x86)\\microsoft\\edgecore\\126.0.2592.113\\msedge_pwa_launcher.exe\n" ] } ] @@ -7740,6 +7813,7 @@ "\n", "# Define the device (assuming you're using PyTorch and want to specify CPU or GPU)\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "print(device)\n", "\n", "def vectorize_text(text):\n", " MAX_LENGTH = 700 # Define the maximum length of tokens for the model\n", @@ -7767,7 +7841,7 @@ "metadata": { "id": "EyvdFj83SQKI" }, - "execution_count": 127, + "execution_count": 19, "outputs": [] }, { @@ -7781,15 +7855,23 @@ "metadata": { "id": "doS16Nq4S37g" }, - "execution_count": 15, + "execution_count": 20, "outputs": [] }, + { + "cell_type": "markdown", + "source": [ + "## Saving the vectorized dataset as Parquet\n", + "\n", + "This may require up to 64 GB RAM." + ], + "metadata": { + "id": "ScKF9UeB66LK" + } + }, { "cell_type": "code", "source": [ - "# broken\n", - "\n", - "\n", "import polars as pl\n", "import numpy as np\n", "import json\n", @@ -7813,14 +7895,101 @@ "# Write to Parquet\n", "df_f.write_parquet(\"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_with_vectors.parquet\")" ], + "metadata": { + "id": "clEaIJjHXRhM" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(df_f.columns)" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "clEaIJjHXRhM", - "outputId": "35b77b4d-5c35-46af-a0ae-645bd3f47328" + "id": "iixa8gHy80B6", + "outputId": "cd9ead43-a885-423c-81a2-a7b8cfa4c997" }, - "execution_count": 39, + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['@timestamp', 'host.hostname', 'host.ip', 'log.level', 'winlog.event_id', 'winlog.task', 'message', 'filtered_message', 'image', 'target_filename', 'parent_image', 'text', 'temp_folder', 'filename', 'label', 'message_vector_str']\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import json\n", + "import polars as pl\n", + "\n", + "# Read from Parquet using pandas\n", + "pdf_read = pd.read_parquet(\"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_with_vectors.parquet\")\n", + "\n", + "# Function to convert JSON-encoded strings back to numpy arrays\n", + "def string_to_vector(s):\n", + " return np.array(json.loads(s))\n", + "\n", + "# Convert JSON strings back to numpy arrays\n", + "pdf_read['message_vector'] = pdf_read['message_vector_str'].apply(string_to_vector)\n", + "\n", + "# Verify the shape of the vector\n", + "print(pdf_read['message_vector'].apply(len).head())\n", + "\n", + "# Check a sample vector to ensure dimensionality is preserved\n", + "sample_vector = pdf_read['message_vector'].iloc[0]\n", + "print(f\"Sample vector shape: {sample_vector.shape}\")\n", + "\n", + "# Convert back to Polars and drop the string column\n", + "df_read = pl.from_pandas(pdf_read.drop(columns='message_vector_str'))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "981FBJ_rdh-V", + "outputId": "828c7fb5-e1e4-4ffd-bbd2-0346625262f6" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 30000\n", + "1 30000\n", + "2 30000\n", + "3 30000\n", + "4 30000\n", + "Name: message_vector, dtype: int64\n", + "Sample vector shape: (30000,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Verify in Polars\n", + "print(df_read.select([pl.col(\"message_vector\").list.len().alias(\"vector_lengths\")]).head())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZZ1CPkp0AL-z", + "outputId": "897f19f7-37ab-48a5-b36a-f55ee2c26d4e" + }, + "execution_count": 10, "outputs": [ { "output_type": "stream", @@ -7828,7 +7997,7 @@ "text": [ "shape: (5, 1)\n", "┌────────────────┐\n", - "│ message_vector │\n", + "│ vector_lengths │\n", "│ --- │\n", "│ u32 │\n", "╞════════════════╡\n", @@ -7837,9 +8006,7 @@ "│ 30000 │\n", "│ 30000 │\n", "│ 30000 │\n", - "└────────────────┘\n", - "Sample vector length: 30000\n", - "Numpy vector shape: (30000,)\n" + "└────────────────┘\n" ] } ] @@ -7847,61 +8014,155 @@ { "cell_type": "code", "source": [ - "# broken\n", - "\n", "import pandas as pd\n", "import numpy as np\n", + "import json\n", "\n", "# Read from Parquet using pandas\n", "pdf_read = pd.read_parquet(\"lab_logs_blindtest_activity_sysmon_1000samples_july_28_2024_filtered_with_vectors.parquet\")\n", "\n", - "# Convert lists back to numpy arrays if needed\n", - "pdf_read['message_vector'] = pdf_read['message_vector_str'].apply(np.array)\n", + "# Function to convert JSON-encoded strings back to numpy arrays\n", + "def string_to_vector(s):\n", + " return np.array(json.loads(s))\n", + "\n", + "# Convert JSON strings back to numpy arrays\n", + "pdf_read['message_vector'] = pdf_read['message_vector_str'].apply(string_to_vector)\n", "\n", "# Verify the shape of the vector\n", - "print(pdf_read['message_vector'].str.len().head())\n", + "print(\"Vector lengths:\")\n", + "print(pdf_read['message_vector'].apply(len).head())\n", "\n", "# Check a sample vector to ensure dimensionality is preserved\n", "sample_vector = pdf_read['message_vector'].iloc[0]\n", - "print(f\"Sample vector shape: {sample_vector.shape}\")\n", + "print(f\"\\nSample vector shape: {sample_vector.shape}\")\n", "\n", - "# If you need to convert back to Polars\n", - "df_read = pl.from_pandas(pdf_read)\n", + "# Drop the string column as it's no longer needed\n", + "pdf_read = pdf_read.drop(columns='message_vector_str')\n", "\n", - "# Verify in Polars\n", - "print(df_read.select(pl.col(\"message_vector\").list.len()).head())" + "# Verify vector lengths (equivalent to the Polars operation)\n", + "print(\"\\nVector lengths (pandas equivalent of Polars operation):\")\n", + "print(pdf_read['message_vector'].apply(len).head())\n", + "\n", + "# If you need to see the full DataFrame structure\n", + "print(\"\\nDataFrame info:\")\n", + "pdf_read.info()\n", + "\n", + "# If you want to see the first few rows of the DataFrame\n", + "print(\"\\nFirst few rows of the DataFrame:\")\n", + "print(pdf_read.head())" ], "metadata": { "colab": { - "base_uri": "https://localhost:8080/", - "height": 327 + "base_uri": "https://localhost:8080/" }, - "id": "981FBJ_rdh-V", - "outputId": "f363fc9e-071a-44ca-c9b5-1a1ce009e23a" + "id": "ePLz8udYDHsK", + "outputId": "3935d756-51ba-4359-bb91-5a663145d65f" }, - "execution_count": 4, + "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "0 636264\n", - "1 636780\n", - "2 640357\n", - "3 640529\n", - "4 640428\n", - "Name: message_vector, dtype: int64\n" - ] - }, - { - "output_type": "error", - "ename": "AttributeError", - "evalue": "'str' object has no attribute 'shape'", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m# Check a sample vector to ensure dimensionality is preserved\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0msample_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpdf_read\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'message_vector'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Sample vector shape: {sample_vector.shape}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;31m# If you need to convert back to Polars\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'shape'" + "Vector lengths:\n", + "0 30000\n", + "1 30000\n", + "2 30000\n", + "3 30000\n", + "4 30000\n", + "Name: message_vector, dtype: int64\n", + "\n", + "Sample vector shape: (30000,)\n", + "\n", + "Vector lengths (pandas equivalent of Polars operation):\n", + "0 30000\n", + "1 30000\n", + "2 30000\n", + "3 30000\n", + "4 30000\n", + "Name: message_vector, dtype: int64\n", + "\n", + "DataFrame info:\n", + "\n", + "RangeIndex: 13455 entries, 0 to 13454\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 @timestamp 13455 non-null datetime64[us, UTC]\n", + " 1 host.hostname 13455 non-null object \n", + " 2 host.ip 13455 non-null object \n", + " 3 log.level 13455 non-null object \n", + " 4 winlog.event_id 13455 non-null int64 \n", + " 5 winlog.task 13455 non-null object \n", + " 6 message 13455 non-null object \n", + " 7 filtered_message 13455 non-null object \n", + " 8 image 13455 non-null object \n", + " 9 target_filename 13455 non-null object \n", + " 10 parent_image 13455 non-null object \n", + " 11 text 13455 non-null object \n", + " 12 temp_folder 13455 non-null object \n", + " 13 filename 13455 non-null object \n", + " 14 label 13455 non-null object \n", + " 15 message_vector 13455 non-null object \n", + "dtypes: datetime64[us, UTC](1), int64(1), object(14)\n", + "memory usage: 1.6+ MB\n", + "\n", + "First few rows of the DataFrame:\n", + " @timestamp host.hostname host.ip \\\n", + "0 2024-07-28 15:08:24.277000+00:00 win10 fe80::c1af:35de:6006:d4cf \n", + "1 2024-07-28 15:08:24.488000+00:00 win10 fe80::c1af:35de:6006:d4cf \n", + "2 2024-07-28 15:08:25.005000+00:00 win10 fe80::c1af:35de:6006:d4cf \n", + "3 2024-07-28 15:08:25.005000+00:00 win10 fe80::c1af:35de:6006:d4cf \n", + "4 2024-07-28 15:08:25.030000+00:00 win10 fe80::c1af:35de:6006:d4cf \n", + "\n", + " log.level winlog.event_id \\\n", + "0 information 3 \n", + "1 information 3 \n", + "2 information 10 \n", + "3 information 10 \n", + "4 information 10 \n", + "\n", + " winlog.task \\\n", + "0 Network connection detected (rule: NetworkConn... \n", + "1 Network connection detected (rule: NetworkConn... \n", + "2 Process accessed (rule: ProcessAccess) \n", + "3 Process accessed (rule: ProcessAccess) \n", + "4 Process accessed (rule: ProcessAccess) \n", + "\n", + " message \\\n", + "0 Network connection detected:\\nRuleName: -\\nUtc... \n", + "1 Network connection detected:\\nRuleName: -\\nUtc... \n", + "2 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "3 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "4 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "\n", + " filtered_message \\\n", + "0 Network connection detected:\\nRuleName: -\\nUtc... \n", + "1 Network connection detected:\\nRuleName: -\\nUtc... \n", + "2 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "3 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "4 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "\n", + " image target_filename parent_image \\\n", + "0 C:\\Windows\\System32\\svchost.exe \n", + "1 C:\\Windows\\System32\\svchost.exe \n", + "2 C:\\Windows\\system32\\svchost.exe \n", + "3 C:\\Windows\\system32\\svchost.exe \n", + "4 C:\\Windows\\system32\\svchost.exe \n", + "\n", + " text temp_folder filename \\\n", + "0 Network connection detected:\\nRuleName: -\\nUtc... No \n", + "1 Network connection detected:\\nRuleName: -\\nUtc... No \n", + "2 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "3 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "4 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "\n", + " label message_vector \n", + "0 good [-0.2214650958776474, 0.04250260442495346, 0.2... \n", + "1 good [-0.19032716751098633, 0.010175472125411034, 0... \n", + "2 good [-0.26184871792793274, 0.08533265441656113, 0.... \n", + "3 good [-0.25396400690078735, 0.10707883536815643, 0.... \n", + "4 good [-0.24748775362968445, 0.06198200583457947, 0.... \n" ] } ] @@ -7912,6 +8173,8 @@ "import polars as pl\n", "import numpy as np\n", "\n", + "df_f = pdf_read\n", + "\n", "print(df_f)\n", "\n", "print()\n", @@ -7924,55 +8187,119 @@ "base_uri": "https://localhost:8080/" }, "id": "W849gxLgM3vP", - "outputId": "368b0055-d479-405f-b9bd-388f4348a7ef" + "outputId": "d837bee0-508d-44c4-dc40-6bde9175e773" }, - "execution_count": 19, + "execution_count": 13, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "shape: (13_455, 16)\n", - "┌────────────┬────────────┬────────────┬────────────┬───┬───────────┬──────────┬───────┬───────────┐\n", - "│ @timestamp ┆ host.hostn ┆ host.ip ┆ log.level ┆ … ┆ temp_fold ┆ filename ┆ label ┆ message_v │\n", - "│ --- ┆ ame ┆ --- ┆ --- ┆ ┆ er ┆ --- ┆ --- ┆ ector │\n", - "│ datetime[μ ┆ --- ┆ str ┆ str ┆ ┆ --- ┆ str ┆ str ┆ --- │\n", - "│ s, UTC] ┆ str ┆ ┆ ┆ ┆ str ┆ ┆ ┆ object │\n", - "╞════════════╪════════════╪════════════╪════════════╪═══╪═══════════╪══════════╪═══════╪═══════════╡\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.48557 │\n", - "│ 15:08:24.2 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 746 0.19 │\n", - "│ 77 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 012241 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0323… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.44357 │\n", - "│ 15:08:24.4 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 03 0.22 │\n", - "│ 88 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 111408 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0301… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.29139 │\n", - "│ 15:08:25.0 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 94 0.17 │\n", - "│ 05 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 932689 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1016… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.29095 │\n", - "│ 15:08:25.0 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 116 0.16 │\n", - "│ 05 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 983014 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1212… │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.27548 │\n", - "│ 23:35:53.0 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 81 0.19 │\n", - "│ 54 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 213162 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1174… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.29359 │\n", - "│ 23:35:54.1 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 23 0.18 │\n", - "│ 33 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 748309 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1262… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.27579 │\n", - "│ 23:35:54.1 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 793 0.18 │\n", - "│ 33 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 788172 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.1131… │\n", - "│ 2024-07-28 ┆ win10 ┆ fe80::c1af ┆ informatio ┆ … ┆ No ┆ ┆ good ┆ [-0.35910 │\n", - "│ 23:41:55.3 ┆ ┆ :35de:6006 ┆ n ┆ ┆ ┆ ┆ ┆ 064 │\n", - "│ 01 UTC ┆ ┆ :d4cf ┆ ┆ ┆ ┆ ┆ ┆ 0.1997092 │\n", - "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 0.0654… │\n", - "└────────────┴────────────┴────────────┴────────────┴───┴───────────┴──────────┴───────┴───────────┘\n", + " @timestamp host.hostname \\\n", + "0 2024-07-28 15:08:24.277000+00:00 win10 \n", + "1 2024-07-28 15:08:24.488000+00:00 win10 \n", + "2 2024-07-28 15:08:25.005000+00:00 win10 \n", + "3 2024-07-28 15:08:25.005000+00:00 win10 \n", + "4 2024-07-28 15:08:25.030000+00:00 win10 \n", + "... ... ... \n", + "13450 2024-07-28 23:35:53.054000+00:00 win10 \n", + "13451 2024-07-28 23:35:53.054000+00:00 win10 \n", + "13452 2024-07-28 23:35:54.133000+00:00 win10 \n", + "13453 2024-07-28 23:35:54.133000+00:00 win10 \n", + "13454 2024-07-28 23:41:55.301000+00:00 win10 \n", + "\n", + " host.ip log.level winlog.event_id \\\n", + "0 fe80::c1af:35de:6006:d4cf information 3 \n", + "1 fe80::c1af:35de:6006:d4cf information 3 \n", + "2 fe80::c1af:35de:6006:d4cf information 10 \n", + "3 fe80::c1af:35de:6006:d4cf information 10 \n", + "4 fe80::c1af:35de:6006:d4cf information 10 \n", + "... ... ... ... \n", + "13450 fe80::c1af:35de:6006:d4cf information 10 \n", + "13451 fe80::c1af:35de:6006:d4cf information 10 \n", + "13452 fe80::c1af:35de:6006:d4cf information 10 \n", + "13453 fe80::c1af:35de:6006:d4cf information 10 \n", + "13454 fe80::c1af:35de:6006:d4cf information 1 \n", + "\n", + " winlog.task \\\n", + "0 Network connection detected (rule: NetworkConn... \n", + "1 Network connection detected (rule: NetworkConn... \n", + "2 Process accessed (rule: ProcessAccess) \n", + "3 Process accessed (rule: ProcessAccess) \n", + "4 Process accessed (rule: ProcessAccess) \n", + "... ... \n", + "13450 Process accessed (rule: ProcessAccess) \n", + "13451 Process accessed (rule: ProcessAccess) \n", + "13452 Process accessed (rule: ProcessAccess) \n", + "13453 Process accessed (rule: ProcessAccess) \n", + "13454 Process Create (rule: ProcessCreate) \n", + "\n", + " message \\\n", + "0 Network connection detected:\\nRuleName: -\\nUtc... \n", + "1 Network connection detected:\\nRuleName: -\\nUtc... \n", + "2 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "3 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "4 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "... ... \n", + "13450 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13451 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13452 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13453 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13454 Process Create:\\nRuleName: -\\nUtcTime: 2024-07... \n", + "\n", + " filtered_message \\\n", + "0 Network connection detected:\\nRuleName: -\\nUtc... \n", + "1 Network connection detected:\\nRuleName: -\\nUtc... \n", + "2 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "3 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "4 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "... ... \n", + "13450 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13451 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13452 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13453 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... \n", + "13454 Process Create:\\nRuleName: -\\nUtcTime: 2024-07... \n", + "\n", + " image target_filename parent_image \\\n", + "0 C:\\Windows\\System32\\svchost.exe \n", + "1 C:\\Windows\\System32\\svchost.exe \n", + "2 C:\\Windows\\system32\\svchost.exe \n", + "3 C:\\Windows\\system32\\svchost.exe \n", + "4 C:\\Windows\\system32\\svchost.exe \n", + "... ... ... ... \n", + "13450 C:\\Windows\\system32\\svchost.exe \n", + "13451 C:\\Windows\\system32\\svchost.exe \n", + "13452 C:\\Windows\\system32\\svchost.exe \n", + "13453 C:\\Windows\\system32\\svchost.exe \n", + "13454 C:\\Windows\\System32\\svchost.exe services.exe \n", + "\n", + " text temp_folder filename \\\n", + "0 Network connection detected:\\nRuleName: -\\nUtc... No \n", + "1 Network connection detected:\\nRuleName: -\\nUtc... No \n", + "2 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "3 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "4 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "... ... ... ... \n", + "13450 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "13451 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "13452 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "13453 Process accessed:\\nRuleName: -\\nUtcTime: 2024-... No \n", + "13454 Process Create:\\nRuleName: -\\nUtcTime: 2024-07... No \n", + "\n", + " label message_vector \n", + "0 good [-0.2214650958776474, 0.04250260442495346, 0.2... \n", + "1 good [-0.19032716751098633, 0.010175472125411034, 0... \n", + "2 good [-0.26184871792793274, 0.08533265441656113, 0.... \n", + "3 good [-0.25396400690078735, 0.10707883536815643, 0.... \n", + "4 good [-0.24748775362968445, 0.06198200583457947, 0.... \n", + "... ... ... \n", + "13450 good [-0.24104158580303192, 0.07091948390007019, 0.... \n", + "13451 good [-0.23055040836334229, 0.07124319672584534, 0.... \n", + "13452 good [-0.2496212273836136, 0.0811350867152214, 0.21... \n", + "13453 good [-0.2501278519630432, 0.0767292007803917, 0.21... \n", + "13454 good [-0.22038744390010834, 0.07044447958469391, 0.... \n", + "\n", + "[13455 rows x 16 columns]\n", "\n", "Original data shape: (13455, 30000)\n" ] @@ -8023,9 +8350,9 @@ "base_uri": "https://localhost:8080/" }, "id": "9JjYJzacaD-T", - "outputId": "a80aa8c4-1624-438b-9407-135cf4f3c284" + "outputId": "6cc29ed7-604a-4a04-8186-df7e8bb38ad3" }, - "execution_count": 28, + "execution_count": 14, "outputs": [ { "output_type": "stream", @@ -8088,9 +8415,9 @@ "base_uri": "https://localhost:8080/" }, "id": "5NHDYuDAiUis", - "outputId": "2a639822-3cba-4e68-9124-9ec8af95c818" + "outputId": "d91a5815-0930-43ae-eddc-6fef0f5fccc0" }, - "execution_count": 30, + "execution_count": 15, "outputs": [ { "output_type": "stream", @@ -8888,7 +9215,7 @@ "id": "8KwJfQvG0ZvJ", "outputId": "13426649-eaaf-4fc9-8392-f648a79da6fe" }, - "execution_count": 7, + "execution_count": null, "outputs": [ { "output_type": "error", @@ -8994,7 +9321,7 @@ "id": "zh-ULZZ0099E", "outputId": "9bb9313a-7e4c-40bd-8c64-f78d1c57636d" }, - "execution_count": 34, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9022,7 +9349,7 @@ "id": "brczGHV083MW", "outputId": "54081516-0074-47d2-da92-ad921d8500f8" }, - "execution_count": 60, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9101,7 +9428,7 @@ "id": "3HpxPWlIPHT4", "outputId": "4501c78a-74d8-49b7-bc3e-0c647ba5e71f" }, - "execution_count": 53, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9163,7 +9490,7 @@ "id": "lpbrnU_06Gfi", "outputId": "007bae7d-2533-477b-b23f-8699f09321bb" }, - "execution_count": 36, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9232,7 +9559,7 @@ "id": "5GaLAsg661Qh", "outputId": "a546f9f0-9c8f-433c-d5f5-8440a058f840" }, - "execution_count": 37, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -9330,7 +9657,7 @@ "id": "cZqWaRDr61mL", "outputId": "d77b060e-d380-4978-c920-97d260f2cf64" }, - "execution_count": 38, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9449,7 +9776,7 @@ "id": "yIeeLfSlBXI7", "outputId": "ee84da33-1d68-4cc3-8f82-82c20b4c2846" }, - "execution_count": 61, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9683,7 +10010,7 @@ "id": "saqEW4G2HbI3", "outputId": "d149844d-0cf7-4e33-89d9-cadf77890678" }, - "execution_count": 90, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9746,7 +10073,7 @@ "id": "raf1OKv4_E23", "outputId": "6f7ab778-99af-401d-a8e0-2d8bff230902" }, - "execution_count": 91, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -9835,7 +10162,7 @@ "id": "WAJiYO5dK7eL", "outputId": "21d68432-3b20-4fca-9775-bdf302ce8c0e" }, - "execution_count": 63, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -9929,7 +10256,7 @@ "id": "RbXSx4Go-6pd", "outputId": "6fbe80e4-4818-42dd-d5b6-895cfbcce41f" }, - "execution_count": 95, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -10127,7 +10454,7 @@ "id": "TnChV008alMz", "outputId": "2e7242bb-41eb-4657-9aba-80bf41b9e218" }, - "execution_count": 137, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -10276,7 +10603,7 @@ "id": "top9wzRCdAKK", "outputId": "decb8210-d1fc-4452-bc9c-ca6962caff0f" }, - "execution_count": 105, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -10361,7 +10688,7 @@ "id": "nmu5z7OVdQHA", "outputId": "8193a942-2d4f-45b6-d1dc-e0dfed3e1d6c" }, - "execution_count": 77, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -10455,7 +10782,7 @@ "id": "_AkVOj7ehjcQ", "outputId": "b4cd08a9-38e0-4f33-c910-e2cb43610837" }, - "execution_count": 138, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -10521,7 +10848,7 @@ "id": "9qalTrUsqpVD", "outputId": "38d1ca00-fa5d-4cc8-bbf1-a5386be19c6b" }, - "execution_count": 111, + "execution_count": null, "outputs": [ { "output_type": "error",