mirror of
https://github.com/norandom/project_bookworm.git
synced 2024-11-25 09:13:42 +00:00
Created using Colaboratory
This commit is contained in:
parent
564ed54344
commit
eef5b798b4
@ -50,6 +50,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
|
"import subprocess\n",
|
||||||
|
"\n",
|
||||||
"IN_COLAB = 'google.colab' in sys.modules\n",
|
"IN_COLAB = 'google.colab' in sys.modules\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if not IN_COLAB:\n",
|
"if not IN_COLAB:\n",
|
||||||
@ -69,6 +71,12 @@
|
|||||||
" output_path_extracted_notes = \"/content/export.txt\"\n",
|
" output_path_extracted_notes = \"/content/export.txt\"\n",
|
||||||
" output_path_extracted_docs = \"/content/export.documents.txt\"\n",
|
" output_path_extracted_docs = \"/content/export.documents.txt\"\n",
|
||||||
" result_db = \"/content/evernote.db\"\n",
|
" result_db = \"/content/evernote.db\"\n",
|
||||||
|
" subprocess.run('''\n",
|
||||||
|
" source <(curl -s https://raw.githubusercontent.com/norandom/project_bookworm/main/scripts/prepare_colab_env.sh)\n",
|
||||||
|
" ''',\n",
|
||||||
|
" shell=True, check=True,\n",
|
||||||
|
" executable='/bin/bash')\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# To suppress some warnings\n",
|
"# To suppress some warnings\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
@ -76,16 +84,14 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"# Controls:"
|
"# Checks"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "8tcn27pzvpRi"
|
"id": "yuhXPdN_z2cW"
|
||||||
},
|
},
|
||||||
"id": "8tcn27pzvpRi",
|
"id": "yuhXPdN_z2cW"
|
||||||
"execution_count": 2,
|
|
||||||
"outputs": []
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
@ -111,6 +117,16 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## For the progress bars in Colab"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "B02AY_Gez61T"
|
||||||
|
},
|
||||||
|
"id": "B02AY_Gez61T"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
@ -668,7 +684,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 28,
|
||||||
"id": "3081256c9cf22780",
|
"id": "3081256c9cf22780",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@ -679,7 +695,7 @@
|
|||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
},
|
},
|
||||||
"id": "3081256c9cf22780",
|
"id": "3081256c9cf22780",
|
||||||
"outputId": "5630ab81-0756-4a31-dfd5-91f3e34365c1"
|
"outputId": "0a02f0bc-42ce-4f50-e670-fd8ca48111a9"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -698,7 +714,7 @@
|
|||||||
"import torch\n",
|
"import torch\n",
|
||||||
"use_cuda = torch.cuda.is_available()\n",
|
"use_cuda = torch.cuda.is_available()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"USE_GPU=False\n",
|
"USE_GPU=True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if use_cuda:\n",
|
"if use_cuda:\n",
|
||||||
" print('__CUDNN VERSION:', torch.backends.cudnn.version())\n",
|
" print('__CUDNN VERSION:', torch.backends.cudnn.version())\n",
|
||||||
@ -709,7 +725,8 @@
|
|||||||
" print(\"GPU enabled\")\n",
|
" print(\"GPU enabled\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if not use_cuda:\n",
|
"if not use_cuda:\n",
|
||||||
" print('No CUDA available')"
|
" print('No CUDA available')\n",
|
||||||
|
" USE_GPU=False\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -867,33 +884,33 @@
|
|||||||
"base_uri": "https://localhost:8080/",
|
"base_uri": "https://localhost:8080/",
|
||||||
"height": 49,
|
"height": 49,
|
||||||
"referenced_widgets": [
|
"referenced_widgets": [
|
||||||
"97a1b60642a848c28b7c9654dccd1de6",
|
"7639624db8b44b7194b33e1587015e7b",
|
||||||
"8e9c21cbf4e84b228589b04c28031986",
|
"d1f1b83ff52b4f339aaf7e3472e88f5f",
|
||||||
"c6b0f42e62b942ff93b0dd203b71afaf",
|
"a4c9656af7644c8794eea9cd95cdbb38",
|
||||||
"24988c14693f4b739617f6b148981712",
|
"eece25d48bd94132bd6e9c25001dd0a3",
|
||||||
"5c56bada895c45f8b955db0b322a30b4",
|
"77829cd4ef2341c58bd37ce7fb173fbf",
|
||||||
"b83ec3c178fa47a3b5466974280db85b",
|
"78e5715e33af4af9a72f348a3cff7a45",
|
||||||
"5d799e20928448c18071eecc9f513789",
|
"89b0f4fbd3c542c6abb5ea2ba0b937fc",
|
||||||
"4a251ff30d6448f3b256692453637a6a",
|
"6013ba1807144ee2b0b4c83d42cf1977",
|
||||||
"90238269c43647caa0a8731f0290d64e",
|
"20db3230c722479db16949f232e23fc8",
|
||||||
"4208c14a250c40feb246c67e8141ca99",
|
"54e9a4e180d74916b620c46cf4da6546",
|
||||||
"976ae07a9b19499991f724a7e40f7e6d"
|
"b1a5928250a94055a95a026804807cf0"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"id": "e6ffc345c26298ad",
|
"id": "e6ffc345c26298ad",
|
||||||
"outputId": "68ff33d9-8c75-4701-96ef-1484a0e5d9d4"
|
"outputId": "c63ea480-2c4f-447e-e948-f51ec0ea6224"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "display_data",
|
"output_type": "display_data",
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"Processing batches: 0%| | 0/966 [00:00<?, ?it/s]"
|
"Processing batches: 0%| | 0/1448 [00:00<?, ?it/s]"
|
||||||
],
|
],
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0,
|
"version_minor": 0,
|
||||||
"model_id": "97a1b60642a848c28b7c9654dccd1de6"
|
"model_id": "7639624db8b44b7194b33e1587015e7b"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"metadata": {}
|
"metadata": {}
|
||||||
@ -925,7 +942,7 @@
|
|||||||
"def vectorize_data_in_batches(chunks, embeddings):\n",
|
"def vectorize_data_in_batches(chunks, embeddings):\n",
|
||||||
"\n",
|
"\n",
|
||||||
" num_workers = 3\n",
|
" num_workers = 3\n",
|
||||||
" batch_size = 750 # Adjust based on your needs and memory constraints\n",
|
" batch_size = 500 # Adjust based on your needs and memory constraints\n",
|
||||||
"\n",
|
"\n",
|
||||||
" batches = list(divide_chunks(chunks, batch_size))\n",
|
" batches = list(divide_chunks(chunks, batch_size))\n",
|
||||||
" faiss_db = None\n",
|
" faiss_db = None\n",
|
||||||
@ -970,11 +987,11 @@
|
|||||||
"print(type(faiss))"
|
"print(type(faiss))"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "v6bhYHU5_9oo",
|
|
||||||
"outputId": "01bbcac4-01dc-4efe-e9e9-31c8d2b6ca56",
|
|
||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
}
|
},
|
||||||
|
"id": "v6bhYHU5_9oo",
|
||||||
|
"outputId": "01bbcac4-01dc-4efe-e9e9-31c8d2b6ca56"
|
||||||
},
|
},
|
||||||
"id": "v6bhYHU5_9oo",
|
"id": "v6bhYHU5_9oo",
|
||||||
"execution_count": 15,
|
"execution_count": 15,
|
||||||
@ -1013,7 +1030,7 @@
|
|||||||
"accelerator": "GPU",
|
"accelerator": "GPU",
|
||||||
"widgets": {
|
"widgets": {
|
||||||
"application/vnd.jupyter.widget-state+json": {
|
"application/vnd.jupyter.widget-state+json": {
|
||||||
"97a1b60642a848c28b7c9654dccd1de6": {
|
"7639624db8b44b7194b33e1587015e7b": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "HBoxModel",
|
"model_name": "HBoxModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
@ -1028,14 +1045,14 @@
|
|||||||
"_view_name": "HBoxView",
|
"_view_name": "HBoxView",
|
||||||
"box_style": "",
|
"box_style": "",
|
||||||
"children": [
|
"children": [
|
||||||
"IPY_MODEL_8e9c21cbf4e84b228589b04c28031986",
|
"IPY_MODEL_d1f1b83ff52b4f339aaf7e3472e88f5f",
|
||||||
"IPY_MODEL_c6b0f42e62b942ff93b0dd203b71afaf",
|
"IPY_MODEL_a4c9656af7644c8794eea9cd95cdbb38",
|
||||||
"IPY_MODEL_24988c14693f4b739617f6b148981712"
|
"IPY_MODEL_eece25d48bd94132bd6e9c25001dd0a3"
|
||||||
],
|
],
|
||||||
"layout": "IPY_MODEL_5c56bada895c45f8b955db0b322a30b4"
|
"layout": "IPY_MODEL_77829cd4ef2341c58bd37ce7fb173fbf"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"8e9c21cbf4e84b228589b04c28031986": {
|
"d1f1b83ff52b4f339aaf7e3472e88f5f": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "HTMLModel",
|
"model_name": "HTMLModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
@ -1050,13 +1067,13 @@
|
|||||||
"_view_name": "HTMLView",
|
"_view_name": "HTMLView",
|
||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_b83ec3c178fa47a3b5466974280db85b",
|
"layout": "IPY_MODEL_78e5715e33af4af9a72f348a3cff7a45",
|
||||||
"placeholder": "",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_5d799e20928448c18071eecc9f513789",
|
"style": "IPY_MODEL_89b0f4fbd3c542c6abb5ea2ba0b937fc",
|
||||||
"value": "Processing batches: 0%"
|
"value": "Processing batches: 66%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c6b0f42e62b942ff93b0dd203b71afaf": {
|
"a4c9656af7644c8794eea9cd95cdbb38": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "FloatProgressModel",
|
"model_name": "FloatProgressModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
@ -1072,15 +1089,15 @@
|
|||||||
"bar_style": "",
|
"bar_style": "",
|
||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_4a251ff30d6448f3b256692453637a6a",
|
"layout": "IPY_MODEL_6013ba1807144ee2b0b4c83d42cf1977",
|
||||||
"max": 966,
|
"max": 1448,
|
||||||
"min": 0,
|
"min": 0,
|
||||||
"orientation": "horizontal",
|
"orientation": "horizontal",
|
||||||
"style": "IPY_MODEL_90238269c43647caa0a8731f0290d64e",
|
"style": "IPY_MODEL_20db3230c722479db16949f232e23fc8",
|
||||||
"value": 3
|
"value": 957
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"24988c14693f4b739617f6b148981712": {
|
"eece25d48bd94132bd6e9c25001dd0a3": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "HTMLModel",
|
"model_name": "HTMLModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
@ -1095,13 +1112,13 @@
|
|||||||
"_view_name": "HTMLView",
|
"_view_name": "HTMLView",
|
||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_4208c14a250c40feb246c67e8141ca99",
|
"layout": "IPY_MODEL_54e9a4e180d74916b620c46cf4da6546",
|
||||||
"placeholder": "",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_976ae07a9b19499991f724a7e40f7e6d",
|
"style": "IPY_MODEL_b1a5928250a94055a95a026804807cf0",
|
||||||
"value": " 3/966 [00:52<3:11:12, 11.91s/it]"
|
"value": " 957/1448 [3:10:16<1:30:31, 11.06s/it]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"5c56bada895c45f8b955db0b322a30b4": {
|
"77829cd4ef2341c58bd37ce7fb173fbf": {
|
||||||
"model_module": "@jupyter-widgets/base",
|
"model_module": "@jupyter-widgets/base",
|
||||||
"model_name": "LayoutModel",
|
"model_name": "LayoutModel",
|
||||||
"model_module_version": "1.2.0",
|
"model_module_version": "1.2.0",
|
||||||
@ -1153,7 +1170,7 @@
|
|||||||
"width": null
|
"width": null
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"b83ec3c178fa47a3b5466974280db85b": {
|
"78e5715e33af4af9a72f348a3cff7a45": {
|
||||||
"model_module": "@jupyter-widgets/base",
|
"model_module": "@jupyter-widgets/base",
|
||||||
"model_name": "LayoutModel",
|
"model_name": "LayoutModel",
|
||||||
"model_module_version": "1.2.0",
|
"model_module_version": "1.2.0",
|
||||||
@ -1205,7 +1222,7 @@
|
|||||||
"width": null
|
"width": null
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"5d799e20928448c18071eecc9f513789": {
|
"89b0f4fbd3c542c6abb5ea2ba0b937fc": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "DescriptionStyleModel",
|
"model_name": "DescriptionStyleModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
@ -1220,7 +1237,7 @@
|
|||||||
"description_width": ""
|
"description_width": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"4a251ff30d6448f3b256692453637a6a": {
|
"6013ba1807144ee2b0b4c83d42cf1977": {
|
||||||
"model_module": "@jupyter-widgets/base",
|
"model_module": "@jupyter-widgets/base",
|
||||||
"model_name": "LayoutModel",
|
"model_name": "LayoutModel",
|
||||||
"model_module_version": "1.2.0",
|
"model_module_version": "1.2.0",
|
||||||
@ -1272,7 +1289,7 @@
|
|||||||
"width": null
|
"width": null
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"90238269c43647caa0a8731f0290d64e": {
|
"20db3230c722479db16949f232e23fc8": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "ProgressStyleModel",
|
"model_name": "ProgressStyleModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
@ -1288,7 +1305,7 @@
|
|||||||
"description_width": ""
|
"description_width": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"4208c14a250c40feb246c67e8141ca99": {
|
"54e9a4e180d74916b620c46cf4da6546": {
|
||||||
"model_module": "@jupyter-widgets/base",
|
"model_module": "@jupyter-widgets/base",
|
||||||
"model_name": "LayoutModel",
|
"model_name": "LayoutModel",
|
||||||
"model_module_version": "1.2.0",
|
"model_module_version": "1.2.0",
|
||||||
@ -1340,7 +1357,7 @@
|
|||||||
"width": null
|
"width": null
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"976ae07a9b19499991f724a7e40f7e6d": {
|
"b1a5928250a94055a95a026804807cf0": {
|
||||||
"model_module": "@jupyter-widgets/controls",
|
"model_module": "@jupyter-widgets/controls",
|
||||||
"model_name": "DescriptionStyleModel",
|
"model_name": "DescriptionStyleModel",
|
||||||
"model_module_version": "1.5.0",
|
"model_module_version": "1.5.0",
|
||||||
|
Loading…
Reference in New Issue
Block a user