diff --git a/EverNote_Documents_To_FAISS_Colab_GPU.ipynb b/EverNote_Documents_To_FAISS_Colab_GPU.ipynb index a66fcbf..2565fd1 100644 --- a/EverNote_Documents_To_FAISS_Colab_GPU.ipynb +++ b/EverNote_Documents_To_FAISS_Colab_GPU.ipynb @@ -1264,7 +1264,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "e6ffc345c26298ad", "metadata": { "ExecuteTime": { @@ -1273,7 +1273,7 @@ }, "colab": { "base_uri": "https://localhost:8080/", - "height": 49, + "height": 67, "referenced_widgets": [ "592f37baf1c74e149577e80678db668f", "2cdb27f1d7b14b558cf6f19fc0ab4fd9", @@ -1305,6 +1305,13 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "All texts have been added to the database.\n" + ] } ], "source": [ @@ -1372,6 +1379,78 @@ "vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)" ] }, + { + "cell_type": "markdown", + "source": [ + "# Similarity search" + ], + "metadata": { + "id": "WfjpAoJqE_L4" + }, + "id": "WfjpAoJqE_L4" + }, + { + "cell_type": "code", + "source": [ + "loaded_db = FAISS.load_local(\"faiss_index_cosine\", embeddings, distance_strategy=\"COSINE\", allow_dangerous_deserialization=True)" + ], + "metadata": { + "id": "nJG7eD8eFBnV" + }, + "id": "nJG7eD8eFBnV", + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "results_with_scores = loaded_db.similarity_search_with_score(\"What is an exploit?\")" + ], + "metadata": { + "id": "Wwbebp6wF9fG" + }, + "id": "Wwbebp6wF9fG", + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "for doc, score in results_with_scores:\n", + " print(f\"Content: {doc.page_content}, Score: {score}\")" + ], + "metadata": { + "id": "iWfiO0KFGZXN", + "outputId": "0dc2aed8-9689-4efa-e8e8-3d73bb81361b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "iWfiO0KFGZXN", + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Content: `exploit', Score: 0.24956563115119934\n", + "Content: `exploit', Score: 0.24956563115119934\n", + "Content: `exploit', Score: 0.24956563115119934\n", + "Content: `exploit', Score: 0.24956563115119934\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Sandbox code - test area" + ], + "metadata": { + "id": "82gFVyrNCYOF" + }, + "id": "82gFVyrNCYOF" + }, { "cell_type": "code", "source": [ @@ -1381,17 +1460,19 @@ "faiss = FAISS.from_texts(texts, embeddings, distance_strategy=\"COSINE\")\n", "print(type(faiss))\n", "\n", - "faiss.save_local(\"test\")" + "faiss.save_local(\"test\")\n", + "\n", + "new_db = FAISS.load_local(\"test\", embeddings, allow_dangerous_deserialization=True)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "v6bhYHU5_9oo", - "outputId": "0fec4528-d10b-4a83-f5b5-c75476d2b4d1" + "outputId": "a88691e1-3ee4-4a34-edbf-4fac688dd78d" }, "id": "v6bhYHU5_9oo", - "execution_count": 21, + "execution_count": 26, "outputs": [ { "output_type": "stream", @@ -1422,7 +1503,6 @@ }, "colab": { "provenance": [], - "toc_visible": true, "machine_shape": "hm", "gpuType": "V100" }, @@ -5231,7 +5311,7 @@ "layout": "IPY_MODEL_0fbb5a8dd8c64e6c862779496a0c1867", "placeholder": "​", "style": "IPY_MODEL_8c77c7def1804fd6884a601c76618fa7", - "value": "Processing batches:  49%" + "value": "Processing batches: 100%" } }, "ce8eed52d57c47479ab9a45b85296c04": { @@ -5247,7 +5327,7 @@ "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", - "bar_style": "", + "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4f06cc3b83e641cd81deba9aaea93fbb", @@ -5255,7 +5335,7 @@ "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b0e999f6c752439a8f4ba962815160ae", - "value": 703 + "value": 1448 } }, "a18c165ea7fc485c91e64df34974d685": { @@ -5276,7 +5356,7 @@ "layout": "IPY_MODEL_a18f27c970524c048b424be9672e106f", "placeholder": "​", "style": "IPY_MODEL_f6b2c8e5621143729c8d6e3129251f29", - "value": " 703/1448 [14:11<15:18,  1.23s/it]" + "value": " 1448/1448 [30:16<00:00,  1.23it/s]" } }, "090543e0523a4d0e8dbd89e0152a3a15": {