diff --git a/Elasticsearch.ipynb b/Elasticsearch.ipynb new file mode 100644 index 0000000..5e60be2 --- /dev/null +++ b/Elasticsearch.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "eda4ddbf-c146-45b5-8a40-b9ac90f1465f", + "metadata": {}, + "outputs": [], + "source": [ + "# Elasticsearch \n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8747542-a2d1-4814-8dc2-acf172db2d0c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "\n", + "# Elasticsearch base URL\n", + "base_url = \"http://192.168.20.106:9200\"\n", + "# Index name\n", + "index = \"winlogbeat-*\"\n", + "\n", + "# Initial search request to start scrolling\n", + "initial_response = requests.post(\n", + " f\"{base_url}/{index}/_search?scroll=1m\",\n", + " json={\n", + " \"size\": 10000, # Adjust the size as per your requirement\n", + " \"query\": {\"match_all\": {}}\n", + " }\n", + ").json()\n", + "\n", + "# Extract scroll ID from the initial response\n", + "scroll_id = initial_response[\"_scroll_id\"]\n", + "\n", + "# Process initial batch of documents\n", + "hits = initial_response[\"hits\"][\"hits\"]\n", + "data = [hit[\"_source\"] for hit in hits]\n", + "\n", + "# Track total documents retrieved\n", + "total_documents_retrieved = len(data)\n", + "print(f\"Retrieved {total_documents_retrieved} documents.\")\n", + "\n", + "# Loop to fetch subsequent batches of documents until no more documents are left\n", + "while hits:\n", + " # Fetch next batch of documents using scroll API\n", + " response = requests.post(\n", + " f\"{base_url}/_search/scroll\",\n", + " json={\"scroll\": \"1m\", \"scroll_id\": scroll_id}\n", + " ).json()\n", + " \n", + " # Extract scroll ID from the response\n", + " scroll_id = response[\"_scroll_id\"]\n", + " \n", + " # Process batch of documents\n", + " hits = response[\"hits\"][\"hits\"]\n", + " \n", + " # If no hits, break out of the loop\n", + " if not hits:\n", + " break\n", + " \n", + " # Extend data with new batch of documents\n", + " data.extend([hit[\"_source\"] for hit in hits])\n", + " \n", + " # Update total documents retrieved\n", + " total_documents_retrieved += len(hits)\n", + " print(f\"Retrieved {total_documents_retrieved} documents.\")\n", + "\n", + "# Convert data to pandas DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Display DataFrame\n", + "print(df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "deb60f70-f62d-4802-8928-5ea18bbc7b3e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df.to_json(\"lab_logs_normal_activity_may_6_2024.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9faae732-f464-4d11-924e-aeba4f293def", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}