{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "eda4ddbf-c146-45b5-8a40-b9ac90f1465f", "metadata": {}, "outputs": [], "source": [ "# Elasticsearch \n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f8747542-a2d1-4814-8dc2-acf172db2d0c", "metadata": { "tags": [] }, "outputs": [], "source": [ "import requests\n", "import pandas as pd\n", "\n", "# Elasticsearch base URL\n", "base_url = \"http://192.168.20.106:9200\"\n", "# Index name\n", "index = \"winlogbeat-*\"\n", "\n", "# Initial search request to start scrolling\n", "initial_response = requests.post(\n", " f\"{base_url}/{index}/_search?scroll=1m\",\n", " json={\n", " \"size\": 10000, # Adjust the size as per your requirement\n", " \"query\": {\"match_all\": {}}\n", " }\n", ").json()\n", "\n", "# Extract scroll ID from the initial response\n", "scroll_id = initial_response[\"_scroll_id\"]\n", "\n", "# Process initial batch of documents\n", "hits = initial_response[\"hits\"][\"hits\"]\n", "data = [hit[\"_source\"] for hit in hits]\n", "\n", "# Track total documents retrieved\n", "total_documents_retrieved = len(data)\n", "print(f\"Retrieved {total_documents_retrieved} documents.\")\n", "\n", "# Loop to fetch subsequent batches of documents until no more documents are left\n", "while hits:\n", " # Fetch next batch of documents using scroll API\n", " response = requests.post(\n", " f\"{base_url}/_search/scroll\",\n", " json={\"scroll\": \"1m\", \"scroll_id\": scroll_id}\n", " ).json()\n", " \n", " # Extract scroll ID from the response\n", " scroll_id = response[\"_scroll_id\"]\n", " \n", " # Process batch of documents\n", " hits = response[\"hits\"][\"hits\"]\n", " \n", " # If no hits, break out of the loop\n", " if not hits:\n", " break\n", " \n", " # Extend data with new batch of documents\n", " data.extend([hit[\"_source\"] for hit in hits])\n", " \n", " # Update total documents retrieved\n", " total_documents_retrieved += len(hits)\n", " print(f\"Retrieved {total_documents_retrieved} documents.\")\n", "\n", "# Convert data to pandas DataFrame\n", "df = pd.DataFrame(data)\n", "\n", "# Display DataFrame\n", "print(df)\n" ] }, { "cell_type": "code", "execution_count": 21, "id": "deb60f70-f62d-4802-8928-5ea18bbc7b3e", "metadata": { "tags": [] }, "outputs": [], "source": [ "df.to_json(\"lab_logs_normal_activity_may_6_2024.json\")" ] }, { "cell_type": "code", "execution_count": null, "id": "9faae732-f464-4d11-924e-aeba4f293def", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }