Created using Colaboratory

main
Marius Ciepluch 2024-04-04 19:16:29 +02:00
parent c7ffd0dcaf
commit 24d111b1a4
1 changed files with 90 additions and 10 deletions

View File

@ -1264,7 +1264,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"id": "e6ffc345c26298ad",
"metadata": {
"ExecuteTime": {
@ -1273,7 +1273,7 @@
},
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"height": 67,
"referenced_widgets": [
"592f37baf1c74e149577e80678db668f",
"2cdb27f1d7b14b558cf6f19fc0ab4fd9",
@ -1305,6 +1305,13 @@
},
"metadata": {},
"output_type": "display_data"
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"All texts have been added to the database.\n"
]
}
],
"source": [
@ -1372,6 +1379,78 @@
"vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)"
]
},
{
"cell_type": "markdown",
"source": [
"# Similarity search"
],
"metadata": {
"id": "WfjpAoJqE_L4"
},
"id": "WfjpAoJqE_L4"
},
{
"cell_type": "code",
"source": [
"loaded_db = FAISS.load_local(\"faiss_index_cosine\", embeddings, distance_strategy=\"COSINE\", allow_dangerous_deserialization=True)"
],
"metadata": {
"id": "nJG7eD8eFBnV"
},
"id": "nJG7eD8eFBnV",
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"source": [
"results_with_scores = loaded_db.similarity_search_with_score(\"What is an exploit?\")"
],
"metadata": {
"id": "Wwbebp6wF9fG"
},
"id": "Wwbebp6wF9fG",
"execution_count": 33,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for doc, score in results_with_scores:\n",
" print(f\"Content: {doc.page_content}, Score: {score}\")"
],
"metadata": {
"id": "iWfiO0KFGZXN",
"outputId": "0dc2aed8-9689-4efa-e8e8-3d73bb81361b",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"id": "iWfiO0KFGZXN",
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Content: `exploit', Score: 0.24956563115119934\n",
"Content: `exploit', Score: 0.24956563115119934\n",
"Content: `exploit', Score: 0.24956563115119934\n",
"Content: `exploit', Score: 0.24956563115119934\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Sandbox code - test area"
],
"metadata": {
"id": "82gFVyrNCYOF"
},
"id": "82gFVyrNCYOF"
},
{
"cell_type": "code",
"source": [
@ -1381,17 +1460,19 @@
"faiss = FAISS.from_texts(texts, embeddings, distance_strategy=\"COSINE\")\n",
"print(type(faiss))\n",
"\n",
"faiss.save_local(\"test\")"
"faiss.save_local(\"test\")\n",
"\n",
"new_db = FAISS.load_local(\"test\", embeddings, allow_dangerous_deserialization=True)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "v6bhYHU5_9oo",
"outputId": "0fec4528-d10b-4a83-f5b5-c75476d2b4d1"
"outputId": "a88691e1-3ee4-4a34-edbf-4fac688dd78d"
},
"id": "v6bhYHU5_9oo",
"execution_count": 21,
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
@ -1422,7 +1503,6 @@
},
"colab": {
"provenance": [],
"toc_visible": true,
"machine_shape": "hm",
"gpuType": "V100"
},
@ -5231,7 +5311,7 @@
"layout": "IPY_MODEL_0fbb5a8dd8c64e6c862779496a0c1867",
"placeholder": "",
"style": "IPY_MODEL_8c77c7def1804fd6884a601c76618fa7",
"value": "Processingbatches:49%"
"value": "Processingbatches:100%"
}
},
"ce8eed52d57c47479ab9a45b85296c04": {
@ -5247,7 +5327,7 @@
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4f06cc3b83e641cd81deba9aaea93fbb",
@ -5255,7 +5335,7 @@
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b0e999f6c752439a8f4ba962815160ae",
"value": 703
"value": 1448
}
},
"a18c165ea7fc485c91e64df34974d685": {
@ -5276,7 +5356,7 @@
"layout": "IPY_MODEL_a18f27c970524c048b424be9672e106f",
"placeholder": "",
"style": "IPY_MODEL_f6b2c8e5621143729c8d6e3129251f29",
"value": "703/1448[14:11<15:18,1.23s/it]"
"value": "1448/1448[30:16<00:00,1.23it/s]"
}
},
"090543e0523a4d0e8dbd89e0152a3a15": {