mirror of
https://github.com/norandom/log2ml.git
synced 2025-04-19 07:11:27 +00:00
adding GitHub data release manager prototype
This commit is contained in:
parent
8cea2d2eb5
commit
e38d2ab4c8
368
GitHub-Release-and-Secrets.ipynb
Normal file
368
GitHub-Release-and-Secrets.ipynb
Normal file
@ -0,0 +1,368 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ebb1428f6428646",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install pandas\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "b8b55c9e96c43bc3",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-05-07T11:57:54.752360Z",
|
||||||
|
"start_time": "2024-05-07T11:57:51.973091Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: python-dotenv in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (1.0.1)\r\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pip install python-dotenv"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 53,
|
||||||
|
"id": "b68da291930fbf4",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-05-07T16:43:17.166579Z",
|
||||||
|
"start_time": "2024-05-07T16:43:12.783177Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Collecting PyGithub\r\n",
|
||||||
|
" Downloading PyGithub-2.3.0-py3-none-any.whl.metadata (3.8 kB)\r\n",
|
||||||
|
"Collecting pynacl>=1.4.0 (from PyGithub)\r\n",
|
||||||
|
" Using cached PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)\r\n",
|
||||||
|
"Requirement already satisfied: requests>=2.14.0 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from PyGithub) (2.31.0)\r\n",
|
||||||
|
"Collecting pyjwt>=2.4.0 (from pyjwt[crypto]>=2.4.0->PyGithub)\r\n",
|
||||||
|
" Downloading PyJWT-2.8.0-py3-none-any.whl.metadata (4.2 kB)\r\n",
|
||||||
|
"Requirement already satisfied: typing-extensions>=4.0.0 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from PyGithub) (4.10.0)\r\n",
|
||||||
|
"Requirement already satisfied: urllib3>=1.26.0 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from PyGithub) (2.2.1)\r\n",
|
||||||
|
"Requirement already satisfied: Deprecated in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from PyGithub) (1.2.14)\r\n",
|
||||||
|
"Requirement already satisfied: cryptography>=3.4.0 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from pyjwt[crypto]>=2.4.0->PyGithub) (42.0.5)\r\n",
|
||||||
|
"Requirement already satisfied: cffi>=1.4.1 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from pynacl>=1.4.0->PyGithub) (1.16.0)\r\n",
|
||||||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from requests>=2.14.0->PyGithub) (3.3.2)\r\n",
|
||||||
|
"Requirement already satisfied: idna<4,>=2.5 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from requests>=2.14.0->PyGithub) (3.6)\r\n",
|
||||||
|
"Requirement already satisfied: certifi>=2017.4.17 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from requests>=2.14.0->PyGithub) (2024.2.2)\r\n",
|
||||||
|
"Requirement already satisfied: wrapt<2,>=1.10 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from Deprecated->PyGithub) (1.16.0)\r\n",
|
||||||
|
"Requirement already satisfied: pycparser in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from cffi>=1.4.1->pynacl>=1.4.0->PyGithub) (2.21)\r\n",
|
||||||
|
"Downloading PyGithub-2.3.0-py3-none-any.whl (354 kB)\r\n",
|
||||||
|
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m354.4/354.4 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\r\n",
|
||||||
|
"\u001b[?25hDownloading PyJWT-2.8.0-py3-none-any.whl (22 kB)\r\n",
|
||||||
|
"Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)\r\n",
|
||||||
|
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m856.7/856.7 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\r\n",
|
||||||
|
"\u001b[?25hInstalling collected packages: pyjwt, pynacl, PyGithub\r\n",
|
||||||
|
"Successfully installed PyGithub-2.3.0 pyjwt-2.8.0 pynacl-1.5.0\r\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pip install PyGithub\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "68cdc5d497e208e3",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-05-11T16:18:46.000392Z",
|
||||||
|
"start_time": "2024-05-11T16:18:45.992401Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"load_dotenv(\"thesis_env_ro\", verbose=True) # take environment variables from the file\n",
|
||||||
|
"token = os.getenv('GITHUB_PERSONAL_ACCESS_TOKEN')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "1069e0bfa4686f67",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-05-11T16:23:55.511058Z",
|
||||||
|
"start_time": "2024-05-11T16:23:23.511924Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"https://api.github.com/repos/norandom/log2ml/releases/assets/166259205\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
|
"model_id": "7d46bf8efe9e4e0591950e029c353573",
|
||||||
|
"version_major": 2,
|
||||||
|
"version_minor": 0
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
" 0%| | 0.00/1.90G [00:00<?, ?iB/s]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"File downloaded successfully and saved as lab_logs_normal_activity_may_11_2024.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from github import Github\n",
|
||||||
|
"import requests\n",
|
||||||
|
"from tqdm.notebook import tqdm\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def get_specific_file_from_tagged_release(token, repo_name, tag_name, filename):\n",
|
||||||
|
" g = Github(token)\n",
|
||||||
|
" repo = g.get_repo(repo_name)\n",
|
||||||
|
" releases = repo.get_releases()\n",
|
||||||
|
"\n",
|
||||||
|
" for release in releases:\n",
|
||||||
|
" if release.tag_name == tag_name:\n",
|
||||||
|
" for asset in release.get_assets():\n",
|
||||||
|
" if asset.name == filename:\n",
|
||||||
|
" return asset.url\n",
|
||||||
|
" print(\"File not found. Try get_specific_file_from_latest_release() instead.\")\n",
|
||||||
|
" return None\n",
|
||||||
|
"\n",
|
||||||
|
"def get_specific_file_from_latest_release(token, repo_name, filename):\n",
|
||||||
|
" g = Github(token)\n",
|
||||||
|
" repo = g.get_repo(repo_name)\n",
|
||||||
|
" release = repo.get_latest_release()\n",
|
||||||
|
"\n",
|
||||||
|
" for asset in release.get_assets():\n",
|
||||||
|
" if asset.name == filename:\n",
|
||||||
|
" return asset.url # Use asset.url which points to API URL needing headers\n",
|
||||||
|
"\n",
|
||||||
|
"def download_file(url, token, save_path):\n",
|
||||||
|
" headers = {'Authorization': f'token {token}', 'Accept': 'application/octet-stream'}\n",
|
||||||
|
" # First request to handle GitHub's redirection and authentication properly\n",
|
||||||
|
" with requests.get(url, headers=headers, stream=True) as initial_response:\n",
|
||||||
|
" initial_response.raise_for_status() # Ensure the initial request is successful\n",
|
||||||
|
" # Follow redirection if necessary, maintaining headers\n",
|
||||||
|
" if initial_response.history:\n",
|
||||||
|
" url = initial_response.url # Updated URL after redirection\n",
|
||||||
|
"\n",
|
||||||
|
" # Now, proceed with downloading the file\n",
|
||||||
|
" with requests.get(url, headers=headers, stream=True) as response:\n",
|
||||||
|
" response.raise_for_status()\n",
|
||||||
|
" total_size_in_bytes = int(response.headers.get('content-length', 0))\n",
|
||||||
|
" block_size = 1024\n",
|
||||||
|
" \n",
|
||||||
|
" progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)\n",
|
||||||
|
" with open(save_path, 'wb') as file:\n",
|
||||||
|
" for data in response.iter_content(block_size):\n",
|
||||||
|
" progress_bar.update(len(data))\n",
|
||||||
|
" file.write(data)\n",
|
||||||
|
" progress_bar.close()\n",
|
||||||
|
"\n",
|
||||||
|
" if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:\n",
|
||||||
|
" print(\"ERROR, something went wrong\")\n",
|
||||||
|
" else:\n",
|
||||||
|
" print(f\"File downloaded successfully and saved as {save_path}\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Your GitHub token\n",
|
||||||
|
"github_token = token\n",
|
||||||
|
"\n",
|
||||||
|
"# Repository name\n",
|
||||||
|
"repository_name = \"norandom/log2ml\"\n",
|
||||||
|
"\n",
|
||||||
|
"# File name to search for\n",
|
||||||
|
"file_name = \"lab_logs_normal_activity_may_6_2024.json\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Get the download URL of the specific file\n",
|
||||||
|
"# download_url = get_specific_file_from_latest_release(github_token, repository_name, file_name)\n",
|
||||||
|
"download_url = get_specific_file_from_tagged_release(github_token, repository_name, \"foundations\", file_name)\n",
|
||||||
|
"print(download_url)\n",
|
||||||
|
"\n",
|
||||||
|
"if download_url:\n",
|
||||||
|
" local_file_path = \"lab_logs_normal_activity_may_11_2024.csv\"\n",
|
||||||
|
" download_file(download_url, github_token, local_file_path)\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"File not found.\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "393703bd6e7a693f",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-05-12T07:36:38.888903Z",
|
||||||
|
"start_time": "2024-05-12T07:36:38.468176Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0 lab_logs_normal_activity_may_11_2024.csv\r\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!wc -l lab_logs_normal_activity_may_11_2024.csv"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "6b35fdc991ccea39",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Flattening"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a293810e0531690c",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"start_time": "2024-05-11T15:26:15.019788Z"
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"is_executing": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"# Process each line of the JSON file\n",
|
||||||
|
"with open('lab_logs_normal_activity_may_11_2024.json', 'r') as file:\n",
|
||||||
|
" for line in file:\n",
|
||||||
|
" # Normalize the JSON object from the line\n",
|
||||||
|
" data = json.loads(line)\n",
|
||||||
|
" temp_df = pd.json_normalize(data)\n",
|
||||||
|
"\n",
|
||||||
|
" # Append the DataFrame to a growing CSV file\n",
|
||||||
|
" temp_df.to_csv('lab_logs_normal_activity_may_11_2024_flat.csv', mode='a', header=False, index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "771af611ba60a456",
|
||||||
|
"metadata": {
|
||||||
|
"jupyter": {
|
||||||
|
"is_executing": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"flattened_df.head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a4f782c59bb52c3f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Save the DataFrame to a CSV file\n",
|
||||||
|
"df.to_csv('lab_logs_normal_activity_may_6_2024.csv', index=False)\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "803d4a7af2927bc8",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-05-07T11:43:06.171446Z",
|
||||||
|
"start_time": "2024-05-07T11:42:52.776821Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Collecting pandas\r\n",
|
||||||
|
" Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\r\n",
|
||||||
|
"Requirement already satisfied: numpy>=1.23.2 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from pandas) (1.26.4)\r\n",
|
||||||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from pandas) (2.9.0)\r\n",
|
||||||
|
"Requirement already satisfied: pytz>=2020.1 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from pandas) (2024.1)\r\n",
|
||||||
|
"Collecting tzdata>=2022.7 (from pandas)\r\n",
|
||||||
|
" Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)\r\n",
|
||||||
|
"Requirement already satisfied: six>=1.5 in /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\r\n",
|
||||||
|
"Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\r\n",
|
||||||
|
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\r\n",
|
||||||
|
"\u001b[?25hUsing cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)\r\n",
|
||||||
|
"Installing collected packages: tzdata, pandas\r\n",
|
||||||
|
"Successfully installed pandas-2.2.2 tzdata-2024.1\r\n",
|
||||||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4dda90a02f3fb809",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 2
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython2",
|
||||||
|
"version": "2.7.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user