Created using Colaboratory

This commit is contained in:
Marius Ciepluch 2024-04-04 19:16:29 +02:00
parent c7ffd0dcaf
commit 24d111b1a4

View File

@ -1264,7 +1264,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 22,
"id": "e6ffc345c26298ad", "id": "e6ffc345c26298ad",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@ -1273,7 +1273,7 @@
}, },
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 49, "height": 67,
"referenced_widgets": [ "referenced_widgets": [
"592f37baf1c74e149577e80678db668f", "592f37baf1c74e149577e80678db668f",
"2cdb27f1d7b14b558cf6f19fc0ab4fd9", "2cdb27f1d7b14b558cf6f19fc0ab4fd9",
@ -1305,6 +1305,13 @@
}, },
"metadata": {}, "metadata": {},
"output_type": "display_data" "output_type": "display_data"
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"All texts have been added to the database.\n"
]
} }
], ],
"source": [ "source": [
@ -1372,6 +1379,78 @@
"vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)" "vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)"
] ]
}, },
{
"cell_type": "markdown",
"source": [
"# Similarity search"
],
"metadata": {
"id": "WfjpAoJqE_L4"
},
"id": "WfjpAoJqE_L4"
},
{
"cell_type": "code",
"source": [
"loaded_db = FAISS.load_local(\"faiss_index_cosine\", embeddings, distance_strategy=\"COSINE\", allow_dangerous_deserialization=True)"
],
"metadata": {
"id": "nJG7eD8eFBnV"
},
"id": "nJG7eD8eFBnV",
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"source": [
"results_with_scores = loaded_db.similarity_search_with_score(\"What is an exploit?\")"
],
"metadata": {
"id": "Wwbebp6wF9fG"
},
"id": "Wwbebp6wF9fG",
"execution_count": 33,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for doc, score in results_with_scores:\n",
" print(f\"Content: {doc.page_content}, Score: {score}\")"
],
"metadata": {
"id": "iWfiO0KFGZXN",
"outputId": "0dc2aed8-9689-4efa-e8e8-3d73bb81361b",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"id": "iWfiO0KFGZXN",
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Content: `exploit', Score: 0.24956563115119934\n",
"Content: `exploit', Score: 0.24956563115119934\n",
"Content: `exploit', Score: 0.24956563115119934\n",
"Content: `exploit', Score: 0.24956563115119934\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Sandbox code - test area"
],
"metadata": {
"id": "82gFVyrNCYOF"
},
"id": "82gFVyrNCYOF"
},
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "source": [
@ -1381,17 +1460,19 @@
"faiss = FAISS.from_texts(texts, embeddings, distance_strategy=\"COSINE\")\n", "faiss = FAISS.from_texts(texts, embeddings, distance_strategy=\"COSINE\")\n",
"print(type(faiss))\n", "print(type(faiss))\n",
"\n", "\n",
"faiss.save_local(\"test\")" "faiss.save_local(\"test\")\n",
"\n",
"new_db = FAISS.load_local(\"test\", embeddings, allow_dangerous_deserialization=True)\n"
], ],
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
}, },
"id": "v6bhYHU5_9oo", "id": "v6bhYHU5_9oo",
"outputId": "0fec4528-d10b-4a83-f5b5-c75476d2b4d1" "outputId": "a88691e1-3ee4-4a34-edbf-4fac688dd78d"
}, },
"id": "v6bhYHU5_9oo", "id": "v6bhYHU5_9oo",
"execution_count": 21, "execution_count": 26,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -1422,7 +1503,6 @@
}, },
"colab": { "colab": {
"provenance": [], "provenance": [],
"toc_visible": true,
"machine_shape": "hm", "machine_shape": "hm",
"gpuType": "V100" "gpuType": "V100"
}, },
@ -5231,7 +5311,7 @@
"layout": "IPY_MODEL_0fbb5a8dd8c64e6c862779496a0c1867", "layout": "IPY_MODEL_0fbb5a8dd8c64e6c862779496a0c1867",
"placeholder": "", "placeholder": "",
"style": "IPY_MODEL_8c77c7def1804fd6884a601c76618fa7", "style": "IPY_MODEL_8c77c7def1804fd6884a601c76618fa7",
"value": "Processingbatches:49%" "value": "Processingbatches:100%"
} }
}, },
"ce8eed52d57c47479ab9a45b85296c04": { "ce8eed52d57c47479ab9a45b85296c04": {
@ -5247,7 +5327,7 @@
"_view_module": "@jupyter-widgets/controls", "_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0", "_view_module_version": "1.5.0",
"_view_name": "ProgressView", "_view_name": "ProgressView",
"bar_style": "", "bar_style": "success",
"description": "", "description": "",
"description_tooltip": null, "description_tooltip": null,
"layout": "IPY_MODEL_4f06cc3b83e641cd81deba9aaea93fbb", "layout": "IPY_MODEL_4f06cc3b83e641cd81deba9aaea93fbb",
@ -5255,7 +5335,7 @@
"min": 0, "min": 0,
"orientation": "horizontal", "orientation": "horizontal",
"style": "IPY_MODEL_b0e999f6c752439a8f4ba962815160ae", "style": "IPY_MODEL_b0e999f6c752439a8f4ba962815160ae",
"value": 703 "value": 1448
} }
}, },
"a18c165ea7fc485c91e64df34974d685": { "a18c165ea7fc485c91e64df34974d685": {
@ -5276,7 +5356,7 @@
"layout": "IPY_MODEL_a18f27c970524c048b424be9672e106f", "layout": "IPY_MODEL_a18f27c970524c048b424be9672e106f",
"placeholder": "", "placeholder": "",
"style": "IPY_MODEL_f6b2c8e5621143729c8d6e3129251f29", "style": "IPY_MODEL_f6b2c8e5621143729c8d6e3129251f29",
"value": "703/1448[14:11<15:18,1.23s/it]" "value": "1448/1448[30:16<00:00,1.23it/s]"
} }
}, },
"090543e0523a4d0e8dbd89e0152a3a15": { "090543e0523a4d0e8dbd89e0152a3a15": {