From 6fd67189d19ce7f5c8d553aebc92cf1d386ecf20 Mon Sep 17 00:00:00 2001 From: Marius Ciepluch <11855163+norandom@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:20:46 +0200 Subject: [PATCH] Created using Colaboratory --- EverNote_Documents_To_FAISS_Colab_GPU.ipynb | 5411 +++++++++++++++++++ 1 file changed, 5411 insertions(+) create mode 100644 EverNote_Documents_To_FAISS_Colab_GPU.ipynb diff --git a/EverNote_Documents_To_FAISS_Colab_GPU.ipynb b/EverNote_Documents_To_FAISS_Colab_GPU.ipynb new file mode 100644 index 0000000..46e3026 --- /dev/null +++ b/EverNote_Documents_To_FAISS_Colab_GPU.ipynb @@ -0,0 +1,5411 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "18d62071e34b0d53", + "metadata": { + "collapsed": false, + "id": "18d62071e34b0d53" + }, + "source": [ + "# This is an experiment: create vectorized embeddings out of an EverNote DB (PDF, DOCX, HTML, TXT)\n", + "\n", + "## Features\n", + "\n", + "* vectorize text, html files, pdfs and docx into one vector DB, split in tables (sqlite vss)\n", + "* use local self-hosted embeddings (CPU or GPU computed)\n", + " * for sentences\n", + "* query a local sqlite vss vector db, use cache from LangChain (sqlite)\n", + "* use OpenAI API and (Ollama on-prem self-hosted) Mistral for the response processing\n", + "* compare with LLMware Bling\n", + "\n", + "## Anti-Features\n", + "\n", + "* due to cost reasons the OpenAI embeddings don't get used. So sorry :p" + ] + }, + { + "cell_type": "markdown", + "id": "94517a27e3148ff4", + "metadata": { + "collapsed": false, + "id": "94517a27e3148ff4" + }, + "source": [ + "# Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fd9747a54ea8fcef", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:35:54.949214Z", + "start_time": "2024-04-04T10:35:54.945013Z" + }, + "id": "fd9747a54ea8fcef" + }, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "IN_COLAB = 'google.colab' in sys.modules\n", + "\n", + "if not IN_COLAB:\n", + " # The Evernote DB path containing the extracted data.\n", + " extracted_evernote_db = \"/home/marius/data/it-sec-research-extracted/IT sec research\"\n", + "\n", + " # Output paths containing the Evernote text notes or documents data.\n", + " # These get generated by the data extraction process\n", + " output_path_extracted_notes = \"/home/marius/source/bookworm/export.txt\"\n", + " output_path_extracted_docs = \"/home/marius/source/bookworm/export.documents.txt\"\n", + "\n", + " # Resulting DB or vector store path.\n", + " result_db = \"/home/marius/source/bookworm/evernote.db\"\n", + "\n", + "else:\n", + " # For the Goog Colab env\n", + " output_path_extracted_notes = \"/content/export.txt\"\n", + " output_path_extracted_docs = \"/content/export.documents.txt\"\n", + " result_db = \"/content/evernote.db\"\n", + "\n", + "# To suppress some warnings\n", + "import os\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"True\"" + ] + }, + { + "cell_type": "code", + "source": [ + "# Controls:" + ], + "metadata": { + "id": "8tcn27pzvpRi" + }, + "id": "8tcn27pzvpRi", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(output_path_extracted_notes)" + ], + "metadata": { + "id": "6SPPaVEet9EO", + "outputId": "de267b51-fab0-4509-dfda-b8cfc7886d8b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "6SPPaVEet9EO", + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/export.txt\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2" + ], + "metadata": { + "id": "XGYNhuvrvnUD" + }, + "id": "XGYNhuvrvnUD", + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "a8c8692786d83c00", + "metadata": { + "collapsed": false, + "id": "a8c8692786d83c00" + }, + "source": [ + "## Dependencies\n", + "\n", + "* Cryptography is used to handle some PDF functions here (signatures)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bb34db1ea75a1edf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:32.520341Z", + "start_time": "2024-04-04T10:08:30.353678Z" + }, + "id": "bb34db1ea75a1edf", + "outputId": "25aac151-5cae-44e7-887b-27008e986821", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Name: cryptography\n", + "Version: 42.0.5\n", + "Summary: cryptography is a package which provides cryptographic recipes and primitives to Python developers.\n", + "Home-page: \n", + "Author: \n", + "Author-email: The Python Cryptographic Authority and individual contributors \n", + "License: Apache-2.0 OR BSD-3-Clause\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: cffi\n", + "Required-by: pyOpenSSL\n" + ] + } + ], + "source": [ + "%pip show cryptography" + ] + }, + { + "cell_type": "markdown", + "id": "297746c807e95fbf", + "metadata": { + "collapsed": false, + "id": "297746c807e95fbf" + }, + "source": [ + "* pikepdf is used to repair some PDFs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebc8af0183532fc2", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:34.665865Z", + "start_time": "2024-04-04T10:08:32.522020Z" + }, + "id": "ebc8af0183532fc2", + "outputId": "2398386a-d6d5-4574-9416-c7f8f92a082c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: pikepdf\r\n", + "Version: 8.13.0\r\n", + "Summary: Read and write PDFs with Python, powered by qpdf\r\n", + "Home-page: \r\n", + "Author: \r\n", + "Author-email: \"James R. Barlow\" \r\n", + "License: MPL-2.0\r\n", + "Location: /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages\r\n", + "Requires: Deprecated, lxml, packaging, Pillow\r\n", + "Required-by: \r\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip show pikepdf" + ] + }, + { + "cell_type": "markdown", + "id": "7c7a7f6b0db3719e", + "metadata": { + "collapsed": false, + "id": "7c7a7f6b0db3719e" + }, + "source": [ + "* pypdf with all features is needed because this DB consists of 100+ PDFs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "779f81e2ab00f73c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:37.436449Z", + "start_time": "2024-04-04T10:08:35.269255Z" + }, + "id": "779f81e2ab00f73c", + "outputId": "353f67d9-0d77-45ba-85d3-37d8a651580c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: pypdf\r\n", + "Version: 4.0.2\r\n", + "Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files\r\n", + "Home-page: \r\n", + "Author: \r\n", + "Author-email: Mathieu Fenniak \r\n", + "License: \r\n", + "Location: /home/marius/miniconda3/envs/llm_langchain/lib/python3.11/site-packages\r\n", + "Requires: \r\n", + "Required-by: \r\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip show \"pypdf\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "de3f715519fda6c4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:39.729429Z", + "start_time": "2024-04-04T10:08:37.438498Z" + }, + "id": "de3f715519fda6c4", + "outputId": "858cd8ae-32d3-4373-9ac4-971e424079bb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Name: torch\n", + "Version: 2.2.1+cu121\n", + "Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n", + "Home-page: https://pytorch.org/\n", + "Author: PyTorch Team\n", + "Author-email: packages@pytorch.org\n", + "License: BSD-3\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: filelock, fsspec, jinja2, networkx, nvidia-cublas-cu12, nvidia-cuda-cupti-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-runtime-cu12, nvidia-cudnn-cu12, nvidia-cufft-cu12, nvidia-curand-cu12, nvidia-cusolver-cu12, nvidia-cusparse-cu12, nvidia-nccl-cu12, nvidia-nvtx-cu12, sympy, triton, typing-extensions\n", + "Required-by: fastai, sentence-transformers, torchaudio, torchdata, torchtext, torchvision\n" + ] + } + ], + "source": [ + "%pip show torch" + ] + }, + { + "cell_type": "code", + "source": [ + "%pip show faiss_gpu" + ], + "metadata": { + "id": "HARY_QMJvttI", + "outputId": "1a3d2e43-b3d4-46f6-a526-ac0cb44bd1e6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "HARY_QMJvttI", + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Name: faiss-gpu\n", + "Version: 1.7.2\n", + "Summary: A library for efficient similarity search and clustering of dense vectors.\n", + "Home-page: https://github.com/kyamagu/faiss-wheels\n", + "Author: Kota Yamaguchi\n", + "Author-email: KotaYamaguchi1984@gmail.com\n", + "License: MIT\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: \n", + "Required-by: \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "id": "ce1350d2d6e3ed63", + "metadata": { + "collapsed": false, + "id": "ce1350d2d6e3ed63" + }, + "source": [ + "## Text extraction\n", + "\n", + "* Here the html and text data is extracted into one txt file\n", + "* The PDF and DOCX data is extracted into another txt file\n", + "\n", + "This will be used for weighted data fusion later.\n", + "\n", + "* the texts are normalized:\n", + " * unicode normalization\n", + " * surrogate characters get replaced\n", + " * html gets converted to text\n", + " * pdfs get repaired\n", + " * docx files get read\n", + "\n", + "* exceptions get handled (UTF-16 issues, PDF reference errors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b557444b8b1d4839", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T09:25:39.388933Z", + "start_time": "2024-04-04T09:25:39.320902Z" + }, + "id": "b557444b8b1d4839", + "outputId": "2f7f60e9-af8d-41c2-ff67-5b6f61574d7b" + }, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'html2text'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01municodedata\u001b[39;00m \u001b[38;5;66;03m# to normalize text\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mhtml2text\u001b[39;00m \u001b[38;5;66;03m# to convert html to text\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument_loaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PyPDFLoader, Docx2txtLoader\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpikepdf\u001b[39;00m \u001b[38;5;66;03m# to repair PDFs\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'html2text'" + ] + } + ], + "source": [ + "import glob\n", + "import os\n", + "\n", + "import unicodedata # to normalize text\n", + "import html2text # to convert html to text\n", + "from langchain.document_loaders import PyPDFLoader, Docx2txtLoader\n", + "import pikepdf # to repair PDFs\n", + "from pathlib import Path\n", + "from tqdm.notebook import tqdm\n", + "from concurrent.futures import ThreadPoolExecutor, as_completed\n", + "\n", + "def convert_html_to_text(html_blob: str) -> str:\n", + " \"\"\"\n", + " Converts a html blob into a string.\n", + " \"\"\"\n", + " h = html2text.HTML2Text()\n", + " h.mark_code = True\n", + " h.escape_snob = True\n", + " h.unicode_snob = True\n", + " # h.use_automatic_links = True\n", + " h.images_as_html = True\n", + " h.single_line_break = True\n", + " h.ignore_links = True\n", + " return h.handle(html_blob)\n", + "\n", + "def normalize_text(txt_blob: str) -> str:\n", + " \"\"\"\n", + " Normalize a text blob using NFKD normalization.\n", + " \"\"\"\n", + " return unicodedata.normalize(\"NFKD\", txt_blob)\n", + "\n", + "def repair_pdf(file_path: str) -> bool:\n", + " \"\"\"\n", + " Attempts to repair a PDF file using pikepdf.\n", + " \"\"\"\n", + " try:\n", + " with pikepdf.open(file_path, allow_overwriting_input=True) as pdf:\n", + " pdf.save(file_path)\n", + " return True\n", + " except pikepdf.PdfError as e:\n", + " print(f\"Failed to repair PDF {file_path}: {e}\")\n", + " return False\n", + "\n", + "def read_and_convert_file(file_path: str, is_html: bool, is_pdf: bool, is_docx: bool) -> str:\n", + " \"\"\"\n", + " Reads and converts a file from HTML, PDF, DOCX, or plain text to text.\n", + " :param file_path:\n", + " :param is_html:\n", + " :param is_pdf:\n", + " :param is_docx:\n", + " :return:\n", + " \"\"\"\n", + "\n", + " content = \"\"\n", + " if is_html:\n", + " try:\n", + " with open(file_path, 'r', encoding='utf-8') as file:\n", + " content = file.read()\n", + " return convert_html_to_text(content)\n", + " except Exception as e:\n", + " print(f\"Error reading {file_path}: {e}\")\n", + " return \"\"\n", + "\n", + " elif is_pdf:\n", + " try:\n", + " loader = PyPDFLoader(file_path)\n", + " # ... fixes \"Multiple definitions in dictionary at byte 0xb32 for key /ExtGState\" error\n", + " documents = loader.load()\n", + " content = \"\\n\".join(doc.page_content for doc in documents if hasattr(doc, 'page_content'))\n", + " except Exception as e:\n", + " print(f\"Error loading PDF {file_path}: {e}. Attempting to repair...\")\n", + " if repair_pdf(file_path):\n", + " try:\n", + " loader = PyPDFLoader(file_path)\n", + " documents = loader.load()\n", + " content = \"\\n\".join(doc.page_content for doc in documents if hasattr(doc, 'page_content'))\n", + " except Exception as e:\n", + " print(f\"Failed to process PDF {file_path} after repair: {e}\")\n", + " return \"\"\n", + " return normalize_text(content)\n", + "\n", + " elif is_docx:\n", + " try:\n", + " loader = Docx2txtLoader(file_path)\n", + " content = loader.load()\n", + " if isinstance(content, list):\n", + " content = \"\\n\".join(content)\n", + " except Exception as e:\n", + " print(f\"Error reading DOCX {file_path}: {e}\")\n", + " return \"\"\n", + " return normalize_text(content)\n", + "\n", + " else: # For plain text files\n", + " try:\n", + " with open(file_path, 'r', encoding='utf-8') as file:\n", + " return normalize_text(file.read())\n", + " except Exception as e:\n", + " print(f\"Error reading {file_path}: {e}\")\n", + " return \"\"\n", + "\n", + "def sanitize_text(text):\n", + " \"\"\"\n", + " Removes or replaces surrogate characters from a string.\n", + " \"\"\"\n", + " return text.encode('utf-8', 'replace').decode('utf-8')\n", + "\n", + "def append_to_output(data: str, is_pdf: bool, is_docx: bool, output_path: str):\n", + " \"\"\"\n", + " Appends sanitized data to an output file.\n", + " \"\"\"\n", + " sanitized_data = sanitize_text(data)\n", + " if is_pdf or is_docx:\n", + " output_path = str(Path(output_path).with_suffix('')) + \".documents.txt\"\n", + "\n", + " with open(output_path, \"a\", encoding='utf-8') as output_file:\n", + " output_file.write(sanitized_data)\n", + "\n", + "def process_file(file):\n", + " is_html = file.endswith('.html')\n", + " is_pdf = file.endswith('.pdf')\n", + " is_docx = file.endswith('.docx')\n", + "\n", + " file_content = read_and_convert_file(file, is_html, is_pdf, is_docx)\n", + " append_to_output(file_content, is_pdf, is_docx, output_path=output_path)\n", + "\n", + "def process_files_in_directory(directory: str):\n", + " txt_html_files = glob.glob(os.path.join(directory, \"*.txt\")) + glob.glob(os.path.join(directory, \"*.html\"))\n", + " pdf_docx_files = glob.glob(os.path.join(directory, \"img\", \"*.pdf\")) + glob.glob(os.path.join(directory, \"img\", \"*.docx\"))\n", + " all_files = txt_html_files + pdf_docx_files\n", + "\n", + " # Initialize the progress bar\n", + " pbar = tqdm(total=len(all_files), desc=\"Processing files\")\n", + "\n", + " with ThreadPoolExecutor(max_workers=3) as executor:\n", + " # Submit all files to the executor and store future objects\n", + " futures = [executor.submit(process_file, file) for file in all_files]\n", + "\n", + " # As tasks complete, update the progress bar\n", + " for future in as_completed(futures):\n", + " pbar.update(1) # Update the progress bar by one for each task completed\n", + "\n", + " # Ensure the progress bar is closed upon completion\n", + " pbar.close()\n", + "\n", + "process_files_in_directory(extracted_evernote_db)" + ] + }, + { + "cell_type": "markdown", + "id": "e1bcc07f980c865f", + "metadata": { + "collapsed": false, + "id": "e1bcc07f980c865f" + }, + "source": [ + "## Chunking of the texts\n", + "\n", + "The texts need to get chunked (pre-processing) before the embedding process." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "de8d9f18d8342c57", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:09:23.408646Z", + "start_time": "2024-04-04T10:08:56.104045Z" + }, + "id": "de8d9f18d8342c57", + "outputId": "2368c73f-dd6c-4737-f997-a5b1a285a15d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Now you have 723845 chunks in /content/export.txt\n", + "Now you have 151259 chunks in /content/export.documents.txt\n" + ] + } + ], + "source": [ + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "\n", + "def chunk_text_data(txt_file=output_path_extracted_notes):\n", + "\n", + " with open(txt_file) as f:\n", + " text_notes = f.read()\n", + "\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=100,\n", + " chunk_overlap=20,\n", + " length_function=len\n", + " )\n", + "\n", + " chunks = text_splitter.create_documents([text_notes])\n", + " print(f'Now you have {len(chunks)} chunks in {txt_file}')\n", + " return chunks\n", + "\n", + "# chunk individual text file containing the data\n", + "text_chunks = chunk_text_data(txt_file=output_path_extracted_notes)\n", + "doc_chunks = chunk_text_data(txt_file=output_path_extracted_docs)" + ] + }, + { + "cell_type": "markdown", + "id": "aea7ceb111fed5f3", + "metadata": { + "collapsed": false, + "id": "aea7ceb111fed5f3" + }, + "source": [ + "### Embedding costs - why no OpenAI?\n", + "\n", + "The OpenAI API has a cost for the embeddings.\n", + "At this point there seems to be no way to pre-estimate the costs reliably.\n", + "The following calculation is probably flawed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afb2c8feb9ca0bb4", + "metadata": { + "id": "afb2c8feb9ca0bb4" + }, + "outputs": [], + "source": [ + "def print_embedding_cost(texts):\n", + " import tiktoken\n", + " enc = tiktoken.encoding_for_model('gpt-4')\n", + " total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])\n", + " print(f'Total Tokens: {total_tokens}')\n", + " print(f'Embedding Cost in USD: { (0.03 / 1_000) * total_tokens}')\n", + "\n", + "print_embedding_cost(text_chunks)" + ] + }, + { + "cell_type": "markdown", + "id": "8012516604037e2f", + "metadata": { + "collapsed": false, + "id": "8012516604037e2f" + }, + "source": [ + "## Use Hugging Face Embeddings Sentence Transformers\n", + "\n", + "Here we:\n", + "\n", + "* use a self-hosted on-premises model for the embedding and vectorization\n", + "* configure it for the use with the CPU or GPU\n", + "\n", + "This model is from the Beijing Academy of Artificial Intelligence\n", + "* https://huggingface.co/BAAI/bge-large-en-v1.5\n", + "* It uses: https://huggingface.co/docs/transformers/model_doc/auto\n", + "\n", + "It will produce embeddings of 1024 dimensions, roughly 500 less than OpenAI Embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3081256c9cf22780", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:09:29.687485Z", + "start_time": "2024-04-04T10:09:23.410187Z" + }, + "id": "3081256c9cf22780", + "outputId": "33f99ec5-f3ec-4cf7-e489-4bb5c2b91f32", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "__CUDNN VERSION: 8902\n", + "__Number CUDA Devices: 1\n", + "__CUDA Device Name: Tesla V100-SXM2-16GB\n", + "__CUDA Device Total Memory [GB]: 16.935682048\n", + "GPU enabled\n" + ] + } + ], + "source": [ + "import torch\n", + "use_cuda = torch.cuda.is_available()\n", + "\n", + "USE_GPU=False\n", + "\n", + "if use_cuda:\n", + " print('__CUDNN VERSION:', torch.backends.cudnn.version())\n", + " print('__Number CUDA Devices:', torch.cuda.device_count())\n", + " print('__CUDA Device Name:',torch.cuda.get_device_name(0))\n", + " print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)\n", + " USE_GPU=True\n", + " print(\"GPU enabled\")\n", + "\n", + "if not use_cuda:\n", + " print('No CUDA available')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c1ca979bbc1610bb", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:09:29.889360Z", + "start_time": "2024-04-04T10:09:29.688832Z" + }, + "id": "c1ca979bbc1610bb" + }, + "outputs": [], + "source": [ + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "\n", + "# pre-trained model path\n", + "modelPath = \"BAAI/bge-large-en-v1.5\"\n", + "\n", + "# Create a dictionary with model configuration options, specifying to use the CPU or GPU for computations\n", + "if not USE_GPU:\n", + " model_kwargs = {'device':'cpu'}\n", + "else:\n", + " model_kwargs = {}\n", + "\n", + "# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to True\n", + "encode_kwargs = {'normalize_embeddings': True}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3c2b9cd67f161714", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:09:55.733575Z", + "start_time": "2024-04-04T10:09:34.059018Z" + }, + "id": "3c2b9cd67f161714", + "outputId": "a6f1ac07-a1ae-4261-9108-7bb35f2a7048", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 496, + "referenced_widgets": [ + "8483f598cf82452e8cc1e72632e2776b", + "beef757d2c4f483eb81e81cdf4979c53", + "f2d44ed3ad784d85b89fe85eacb4902e", + "6323e24242ec448bbd4d0bcc8474c2be", + "2a2b2e89b8304c4298c844b8211de167", + "8f79ebe13fe54840bce483acaaca04ff", + "9c5f5ee9bbf340bcbb06221fda88634c", + "8a089f081a50414d9655f4a16d85cf12", + "697a8e3d4ed5431aacd0c2cf379efa0e", + "929a2b83e0ef405e8340aca838f67fae", + "a207830eae514dba968ea69ba57b8c83", + "632f1e5872804ecb95630168ddfedc67", + "9ca5b11369cf4f74aa54b1ad08a90d13", + "c38f5766a3ef4e3cbea0c7fea9e34b73", + "b4cf9727180945fea027e40338a3036c", + "32573efabe434e1a8b22ee40efffd06a", + "b49f8ef7eb7043de869049a5be4282c2", + "ee8cf1ab475e4251b9ff585b38d6ac49", + "18a1ae367b0d4c539807a2d0136d5c74", + "9ce5cd9f2dd946c79eebb2fee078128e", + "36d267e780e34370adfd3c3108155ae9", + "50348c7badbb4b34b0e3dea1c200843d", + "d49be4c05cfa40afb113d381d8e607c1", + "26b039146a21460c8398e17c2402d424", + "066de3c247694e61a40ee7ce164b93b0", + "fbb11ab5105e4b9dbac42887adf18ab3", + "9382fca054b44477ae20df3273749ee5", + "57bca8e245c1430494876a4b11253a56", + "bf4db72d99b84e5086fb9a4d63e5f600", + "56e6c08acc2f44f097294471ff59068d", + "14543db7b0d04b8bab4259f3e503a709", + "fee292c094444cb89bbe0faad87f000c", + "35947ca3bc824bba945274ebb3df15f3", + "99e0d045915b4a38b54ee54d51bb0a4b", + "21511e55ed8a4f80b02ead5371fb9fc6", + "58aaf75e5b1a4941807fdcd6545c2814", + "a7c480a65ee94121abe6cf2f6f6721a0", + "6fa4c19fcf304768bd8f9dc730e3136a", + "c0b1966a0124461596798fe4fe93e242", + "9c5e2ad4c55c43fcb4d4be2bacf5f13a", + "39fb1201bdb847c6b9f5c1f14afff125", + "c9381dbd77d9418fb67c8863b68c8659", + "caca4e93a0634784bfd46a9ac9eb402d", + "c94f4cecd5584206ba5367f98086c019", + "ce2de3d85d1744ddb534e772b9175e72", + "530d12787679463f91c95bbbabbdc27f", + "a5ca8277d58846c4b7af700ec2306d91", + "7e4d67cd575c4f478dfb5b51415d63f7", + "207707226b69440096d3a609314c3028", + "1f41ff403f6a4a73bd09077b3e71bee3", + "52235364698646a39662ab8c7f15d496", + "4cf0cd0027484070bd1987b19cd5e1e0", + "f070bc651b8e44bbb9ee27a8c5df4bc7", + "a14087c6a60e4ae89d4bb4ade00891be", + "eedf7d8e981f4e849a82546bf96937c7", + "691e9f7c36c244fb9c6c4ac823d200ea", + "43e6a4b9fa91493fb829720492d40d75", + "fdb3b06da2ee4c0b857b8e5e0fd68592", + "4e247055f9a0459c8ea06859af21da4e", + "4409342104f04c38b1a753ce74ebb749", + "7e2d3506b38b47ca896e65f0964f9289", + "2c2885076c684599aa01111f69a2797a", + "23e1537216bf4d02962ad389faf5e0d4", + "8f6a77d9dd664ddb9e05a2c1c6e7f48b", + "f74d0ace48c9436b9d1f601b17d82d05", + "93a2ada054574e509bba17f1a6fc88e8", + "470aeb10ce004cbbb9ab0b4454d1c7d2", + "1db0f861eec140ba907855d471afc8b4", + "38c02eaa287742bb9b9981ac9418b095", + "aafe7804e780441196d7bf8774e14dfb", + "96452021cacb4c819ae0496daf2686a9", + "e6cec6bb9c2341679e5b83c31881c389", + "770a2acc6b5342a3b3ec7cb2a87366cd", + "77eef81327aa4245b4e725393423296d", + "53d165b361764f45a3b7b6049cda67c7", + "0d7e4fa9f537434eb7656a85c99926fb", + "4950a63544b041d284938b798bd6b2a8", + "3de4705948064cab86533d9c85810082", + "6f6d31664256410987be90f2a05b7db2", + "1c83038d2bb346b49e04b5bb62af58d6", + "3a981dbfc24a41289b8f5bcc06abe820", + "936cf565614f4ac98a18efafb98e76c3", + "9296b09ff8ca442c995ccb3b76370638", + "cdab0625d4aa4e1fabfa034381d116a1", + "34b74f2be8bb4198885fbaf0b3aae845", + "2a565db566a049d1b2d5daa7ef030733", + "fcb7c210124641969eb43ea8cea973ad", + "d96b7c65c1ae4a3582c4e7fb5562471d", + "d9b684e20f2b444cb1120422b3027759", + "c7d30e7b587243caa3aaaffbe87c0a88", + "57995c76fc1a497193b9ca9ec6478cab", + "1d4bdd4783c344f59efc4c6328723cb9", + "f33c382245334d4aa58fe06d42fa902e", + "c05deca308774c6dbde37815c5bd916e", + "3101294e893541fd8f93e79fd303bcb3", + "fd9804cc2d1e4454982cf105de9d1efb", + "21d082c9eceb4321ad58a3cb37bc840a", + "a75ff77436274553b0b7965337ff5c04", + "4b103ceee9c44da9bf8938ddecb27c57", + "3d853e79133442c2a0d2eaac4525bef7", + "558975101c044828a619345b8661a600", + "2b2e2b073035442792276bd80dd3eb68", + "840cbc2c5d1a485a96903a8a4aa89479", + "b6fe5088145b45e7a1a0803f9ffd1905", + "f244fc7cc56c44e49f2efacfcc7eca54", + "76ba48b5c01749908aae0044d4d0eb73", + "9eb2a7d4dc63477ba64e9c45e09faa8c", + "d3ea136c2bf24aa098e46d7417b79d88", + "b503ace6123b49a5bfaf55d319db3aaf", + "27c1c34295f7427fb0dde07698f550b4", + "1c74a9271f52488d94f940fbc74e6b7d", + "5bbffaaa66e540aab0056be199e874e2", + "0f6026caa9f3449694a5b559a7e9ddf6", + "ba0dcb34176b4f389cbca76f14eaacc6", + "efba751c47f84807bf8400e4ab4dbe53", + "481ee1d08f494739afee50cd6669237f", + "482773282c9e424b85c2b606929d72e9", + "1722973c296f4ebfb04760daddeba44a", + "648dd0b1a8be4cd1be934beea9e70925", + "8feb439b07ee4c20a2e035b597681a44", + "9e1280773fae4460b8e72068ebea3e1f" + ] + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "modules.json: 0%| | 0.00/349 [00:00\", line 619, in _exec\n", + " File \"\", line 883, in exec_module\n", + " File \"\", line 241, in _call_with_frames_removed\n", + " File \"/usr/local/lib/python3.10/dist-packages/faiss/swigfaiss.py\", line 711, in \n", + " class ParameterRangeVector(object):\n", + " File \"/usr/local/lib/python3.10/dist-packages/faiss/swigfaiss.py\", line 738, in ParameterRangeVector\n", + " __swig_destroy__ = _swigfaiss.delete_ParameterRangeVector\n", + "AttributeError: module 'faiss._swigfaiss' has no attribute 'delete_ParameterRangeVector'\n", + "]\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Exception occurred in add_texts_in_batches: name 'swig_ptr' is not defined\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Processing batches: 0%| | 0/14477 [00:00\u001b[0m in \u001b[0;36mvectorize_data_in_batches\u001b[0;34m(chunks, embeddings)\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mfuture\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mas_completed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfutures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0;31m# Each time a future completes, update the progress and collect the result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mas_completed\u001b[0;34m(fs, timeout)\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwait_timeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 606\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msignaled\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 607\u001b[0;31m \u001b[0msignaled\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cond\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 608\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msignaled\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 320\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 321\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: ", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0mfaiss_db\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_local\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"faiss_index\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 55\u001b[0;31m \u001b[0mvectorize_data_in_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtext_chunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0membeddings\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0membeddings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mvectorize_data_in_batches\u001b[0;34m(chunks, embeddings)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mfaiss_db\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;31m# List to collect the returned db objects\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mThreadPoolExecutor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_workers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_workers\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexecutor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;31m# Submit all the batches for processing\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mfutures\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mexecutor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubmit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madd_texts_in_batches\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0membeddings\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0membeddings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mbatch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbatches\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/concurrent/futures/_base.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, exc_type, exc_val, exc_tb)\u001b[0m\n\u001b[1;32m 647\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 648\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_tb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 649\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshutdown\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 650\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/concurrent/futures/thread.py\u001b[0m in \u001b[0;36mshutdown\u001b[0;34m(self, wait, cancel_futures)\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mwait\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_threads\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 235\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 236\u001b[0m \u001b[0mshutdown\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_base\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExecutor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshutdown\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/threading.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1095\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1096\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wait_for_tstate_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1097\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[0;31m# the behavior of a negative timeout isn't documented, but\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python3.10/threading.py\u001b[0m in \u001b[0;36m_wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 1114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1115\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1116\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblock\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1117\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelease\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "def add_texts_in_batches(batch: List[Document], sqlite_table: str = \"evernote\", embeddings=embeddings) -> None:\n", + " \"\"\"\n", + " Using type hints is a good idea here, because error messages get swallowed by the ThreadPoolExecutor.\n", + "\n", + " The exception handling serves the same purpose.\n", + " \"\"\"\n", + "\n", + " try:\n", + " db = FAISS.from_documents(batch, embeddings)\n", + " return db\n", + "\n", + " except Exception as e:\n", + " print(f\"Exception occurred in add_texts_in_batches: {e}\")\n", + "\n", + "def divide_chunks(chunks, n):\n", + " \"\"\"\n", + " Divide and conquer\n", + " \"\"\"\n", + " for i in range(0, len(chunks), n):\n", + " yield chunks[i:i + n]\n", + "\n", + "\n", + "def vectorize_data_in_batches(chunks, embeddings):\n", + " num_workers = 3\n", + " batch_size = 50 # Adjust based on your needs and memory constraints\n", + "\n", + " batches = list(divide_chunks(chunks, batch_size))\n", + " faiss_db = None # List to collect the returned db objects\n", + "\n", + " with ThreadPoolExecutor(max_workers=num_workers) as executor:\n", + " # Submit all the batches for processing\n", + " futures = {executor.submit(add_texts_in_batches, batch, embeddings=embeddings): batch for batch in batches}\n", + "\n", + " # Setup the tqdm progress bar\n", + " progress_bar = tqdm(total=len(futures), desc=\"Processing batches\")\n", + "\n", + " for future in as_completed(futures):\n", + " # Each time a future completes, update the progress and collect the result\n", + " progress_bar.update(1)\n", + " try:\n", + " db_result = future.result() # This is where you get the returned value from add_texts_in_batches\n", + " if faiss_db is not None:\n", + " faiss_db = db_result.merge_from(faiss_db)\n", + " else:\n", + " faiss_db = db_result\n", + "\n", + " except Exception as e:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + " progress_bar.close() # Ensure the progress bar is closed at the end\n", + "\n", + " print(\"All texts have been added to the database.\")\n", + " faiss_db.save_local(\"faiss_index\")\n", + "\n", + "vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + }, + "colab": { + "provenance": [], + "gpuType": "V100" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "8483f598cf82452e8cc1e72632e2776b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_beef757d2c4f483eb81e81cdf4979c53", + "IPY_MODEL_f2d44ed3ad784d85b89fe85eacb4902e", + "IPY_MODEL_6323e24242ec448bbd4d0bcc8474c2be" + ], + "layout": "IPY_MODEL_2a2b2e89b8304c4298c844b8211de167" + } + }, + "beef757d2c4f483eb81e81cdf4979c53": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8f79ebe13fe54840bce483acaaca04ff", + "placeholder": "​", + "style": "IPY_MODEL_9c5f5ee9bbf340bcbb06221fda88634c", + "value": "modules.json: 100%" + } + }, + "f2d44ed3ad784d85b89fe85eacb4902e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8a089f081a50414d9655f4a16d85cf12", + "max": 349, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_697a8e3d4ed5431aacd0c2cf379efa0e", + "value": 349 + } + }, + "6323e24242ec448bbd4d0bcc8474c2be": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_929a2b83e0ef405e8340aca838f67fae", + "placeholder": "​", + "style": "IPY_MODEL_a207830eae514dba968ea69ba57b8c83", + "value": " 349/349 [00:00<00:00, 11.7kB/s]" + } + }, + "2a2b2e89b8304c4298c844b8211de167": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f79ebe13fe54840bce483acaaca04ff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9c5f5ee9bbf340bcbb06221fda88634c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8a089f081a50414d9655f4a16d85cf12": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "697a8e3d4ed5431aacd0c2cf379efa0e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "929a2b83e0ef405e8340aca838f67fae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a207830eae514dba968ea69ba57b8c83": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "632f1e5872804ecb95630168ddfedc67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9ca5b11369cf4f74aa54b1ad08a90d13", + "IPY_MODEL_c38f5766a3ef4e3cbea0c7fea9e34b73", + "IPY_MODEL_b4cf9727180945fea027e40338a3036c" + ], + "layout": "IPY_MODEL_32573efabe434e1a8b22ee40efffd06a" + } + }, + "9ca5b11369cf4f74aa54b1ad08a90d13": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b49f8ef7eb7043de869049a5be4282c2", + "placeholder": "​", + "style": "IPY_MODEL_ee8cf1ab475e4251b9ff585b38d6ac49", + "value": "config_sentence_transformers.json: 100%" + } + }, + "c38f5766a3ef4e3cbea0c7fea9e34b73": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_18a1ae367b0d4c539807a2d0136d5c74", + "max": 124, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9ce5cd9f2dd946c79eebb2fee078128e", + "value": 124 + } + }, + "b4cf9727180945fea027e40338a3036c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_36d267e780e34370adfd3c3108155ae9", + "placeholder": "​", + "style": "IPY_MODEL_50348c7badbb4b34b0e3dea1c200843d", + "value": " 124/124 [00:00<00:00, 6.08kB/s]" + } + }, + "32573efabe434e1a8b22ee40efffd06a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b49f8ef7eb7043de869049a5be4282c2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee8cf1ab475e4251b9ff585b38d6ac49": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "18a1ae367b0d4c539807a2d0136d5c74": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9ce5cd9f2dd946c79eebb2fee078128e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "36d267e780e34370adfd3c3108155ae9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "50348c7badbb4b34b0e3dea1c200843d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d49be4c05cfa40afb113d381d8e607c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_26b039146a21460c8398e17c2402d424", + "IPY_MODEL_066de3c247694e61a40ee7ce164b93b0", + "IPY_MODEL_fbb11ab5105e4b9dbac42887adf18ab3" + ], + "layout": "IPY_MODEL_9382fca054b44477ae20df3273749ee5" + } + }, + "26b039146a21460c8398e17c2402d424": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57bca8e245c1430494876a4b11253a56", + "placeholder": "​", + "style": "IPY_MODEL_bf4db72d99b84e5086fb9a4d63e5f600", + "value": "README.md: 100%" + } + }, + "066de3c247694e61a40ee7ce164b93b0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_56e6c08acc2f44f097294471ff59068d", + "max": 94607, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_14543db7b0d04b8bab4259f3e503a709", + "value": 94607 + } + }, + "fbb11ab5105e4b9dbac42887adf18ab3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fee292c094444cb89bbe0faad87f000c", + "placeholder": "​", + "style": "IPY_MODEL_35947ca3bc824bba945274ebb3df15f3", + "value": " 94.6k/94.6k [00:00<00:00, 471kB/s]" + } + }, + "9382fca054b44477ae20df3273749ee5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "57bca8e245c1430494876a4b11253a56": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf4db72d99b84e5086fb9a4d63e5f600": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "56e6c08acc2f44f097294471ff59068d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "14543db7b0d04b8bab4259f3e503a709": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fee292c094444cb89bbe0faad87f000c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "35947ca3bc824bba945274ebb3df15f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "99e0d045915b4a38b54ee54d51bb0a4b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_21511e55ed8a4f80b02ead5371fb9fc6", + "IPY_MODEL_58aaf75e5b1a4941807fdcd6545c2814", + "IPY_MODEL_a7c480a65ee94121abe6cf2f6f6721a0" + ], + "layout": "IPY_MODEL_6fa4c19fcf304768bd8f9dc730e3136a" + } + }, + "21511e55ed8a4f80b02ead5371fb9fc6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c0b1966a0124461596798fe4fe93e242", + "placeholder": "​", + "style": "IPY_MODEL_9c5e2ad4c55c43fcb4d4be2bacf5f13a", + "value": "sentence_bert_config.json: 100%" + } + }, + "58aaf75e5b1a4941807fdcd6545c2814": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_39fb1201bdb847c6b9f5c1f14afff125", + "max": 52, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c9381dbd77d9418fb67c8863b68c8659", + "value": 52 + } + }, + "a7c480a65ee94121abe6cf2f6f6721a0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_caca4e93a0634784bfd46a9ac9eb402d", + "placeholder": "​", + "style": "IPY_MODEL_c94f4cecd5584206ba5367f98086c019", + "value": " 52.0/52.0 [00:00<00:00, 3.76kB/s]" + } + }, + "6fa4c19fcf304768bd8f9dc730e3136a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c0b1966a0124461596798fe4fe93e242": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9c5e2ad4c55c43fcb4d4be2bacf5f13a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "39fb1201bdb847c6b9f5c1f14afff125": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c9381dbd77d9418fb67c8863b68c8659": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "caca4e93a0634784bfd46a9ac9eb402d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c94f4cecd5584206ba5367f98086c019": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ce2de3d85d1744ddb534e772b9175e72": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_530d12787679463f91c95bbbabbdc27f", + "IPY_MODEL_a5ca8277d58846c4b7af700ec2306d91", + "IPY_MODEL_7e4d67cd575c4f478dfb5b51415d63f7" + ], + "layout": "IPY_MODEL_207707226b69440096d3a609314c3028" + } + }, + "530d12787679463f91c95bbbabbdc27f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1f41ff403f6a4a73bd09077b3e71bee3", + "placeholder": "​", + "style": "IPY_MODEL_52235364698646a39662ab8c7f15d496", + "value": "config.json: 100%" + } + }, + "a5ca8277d58846c4b7af700ec2306d91": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4cf0cd0027484070bd1987b19cd5e1e0", + "max": 779, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f070bc651b8e44bbb9ee27a8c5df4bc7", + "value": 779 + } + }, + "7e4d67cd575c4f478dfb5b51415d63f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a14087c6a60e4ae89d4bb4ade00891be", + "placeholder": "​", + "style": "IPY_MODEL_eedf7d8e981f4e849a82546bf96937c7", + "value": " 779/779 [00:00<00:00, 46.8kB/s]" + } + }, + "207707226b69440096d3a609314c3028": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1f41ff403f6a4a73bd09077b3e71bee3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "52235364698646a39662ab8c7f15d496": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4cf0cd0027484070bd1987b19cd5e1e0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f070bc651b8e44bbb9ee27a8c5df4bc7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a14087c6a60e4ae89d4bb4ade00891be": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eedf7d8e981f4e849a82546bf96937c7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "691e9f7c36c244fb9c6c4ac823d200ea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_43e6a4b9fa91493fb829720492d40d75", + "IPY_MODEL_fdb3b06da2ee4c0b857b8e5e0fd68592", + "IPY_MODEL_4e247055f9a0459c8ea06859af21da4e" + ], + "layout": "IPY_MODEL_4409342104f04c38b1a753ce74ebb749" + } + }, + "43e6a4b9fa91493fb829720492d40d75": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7e2d3506b38b47ca896e65f0964f9289", + "placeholder": "​", + "style": "IPY_MODEL_2c2885076c684599aa01111f69a2797a", + "value": "model.safetensors: 100%" + } + }, + "fdb3b06da2ee4c0b857b8e5e0fd68592": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_23e1537216bf4d02962ad389faf5e0d4", + "max": 1340616616, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8f6a77d9dd664ddb9e05a2c1c6e7f48b", + "value": 1340616616 + } + }, + "4e247055f9a0459c8ea06859af21da4e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f74d0ace48c9436b9d1f601b17d82d05", + "placeholder": "​", + "style": "IPY_MODEL_93a2ada054574e509bba17f1a6fc88e8", + "value": " 1.34G/1.34G [00:18<00:00, 82.9MB/s]" + } + }, + "4409342104f04c38b1a753ce74ebb749": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7e2d3506b38b47ca896e65f0964f9289": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2c2885076c684599aa01111f69a2797a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "23e1537216bf4d02962ad389faf5e0d4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f6a77d9dd664ddb9e05a2c1c6e7f48b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f74d0ace48c9436b9d1f601b17d82d05": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "93a2ada054574e509bba17f1a6fc88e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "470aeb10ce004cbbb9ab0b4454d1c7d2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1db0f861eec140ba907855d471afc8b4", + "IPY_MODEL_38c02eaa287742bb9b9981ac9418b095", + "IPY_MODEL_aafe7804e780441196d7bf8774e14dfb" + ], + "layout": "IPY_MODEL_96452021cacb4c819ae0496daf2686a9" + } + }, + "1db0f861eec140ba907855d471afc8b4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e6cec6bb9c2341679e5b83c31881c389", + "placeholder": "​", + "style": "IPY_MODEL_770a2acc6b5342a3b3ec7cb2a87366cd", + "value": "tokenizer_config.json: 100%" + } + }, + "38c02eaa287742bb9b9981ac9418b095": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_77eef81327aa4245b4e725393423296d", + "max": 366, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_53d165b361764f45a3b7b6049cda67c7", + "value": 366 + } + }, + "aafe7804e780441196d7bf8774e14dfb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0d7e4fa9f537434eb7656a85c99926fb", + "placeholder": "​", + "style": "IPY_MODEL_4950a63544b041d284938b798bd6b2a8", + "value": " 366/366 [00:00<00:00, 23.8kB/s]" + } + }, + "96452021cacb4c819ae0496daf2686a9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e6cec6bb9c2341679e5b83c31881c389": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "770a2acc6b5342a3b3ec7cb2a87366cd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "77eef81327aa4245b4e725393423296d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "53d165b361764f45a3b7b6049cda67c7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0d7e4fa9f537434eb7656a85c99926fb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4950a63544b041d284938b798bd6b2a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3de4705948064cab86533d9c85810082": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6f6d31664256410987be90f2a05b7db2", + "IPY_MODEL_1c83038d2bb346b49e04b5bb62af58d6", + "IPY_MODEL_3a981dbfc24a41289b8f5bcc06abe820" + ], + "layout": "IPY_MODEL_936cf565614f4ac98a18efafb98e76c3" + } + }, + "6f6d31664256410987be90f2a05b7db2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9296b09ff8ca442c995ccb3b76370638", + "placeholder": "​", + "style": "IPY_MODEL_cdab0625d4aa4e1fabfa034381d116a1", + "value": "vocab.txt: 100%" + } + }, + "1c83038d2bb346b49e04b5bb62af58d6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_34b74f2be8bb4198885fbaf0b3aae845", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2a565db566a049d1b2d5daa7ef030733", + "value": 231508 + } + }, + "3a981dbfc24a41289b8f5bcc06abe820": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fcb7c210124641969eb43ea8cea973ad", + "placeholder": "​", + "style": "IPY_MODEL_d96b7c65c1ae4a3582c4e7fb5562471d", + "value": " 232k/232k [00:00<00:00, 583kB/s]" + } + }, + "936cf565614f4ac98a18efafb98e76c3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9296b09ff8ca442c995ccb3b76370638": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cdab0625d4aa4e1fabfa034381d116a1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "34b74f2be8bb4198885fbaf0b3aae845": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2a565db566a049d1b2d5daa7ef030733": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fcb7c210124641969eb43ea8cea973ad": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d96b7c65c1ae4a3582c4e7fb5562471d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d9b684e20f2b444cb1120422b3027759": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c7d30e7b587243caa3aaaffbe87c0a88", + "IPY_MODEL_57995c76fc1a497193b9ca9ec6478cab", + "IPY_MODEL_1d4bdd4783c344f59efc4c6328723cb9" + ], + "layout": "IPY_MODEL_f33c382245334d4aa58fe06d42fa902e" + } + }, + "c7d30e7b587243caa3aaaffbe87c0a88": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c05deca308774c6dbde37815c5bd916e", + "placeholder": "​", + "style": "IPY_MODEL_3101294e893541fd8f93e79fd303bcb3", + "value": "tokenizer.json: 100%" + } + }, + "57995c76fc1a497193b9ca9ec6478cab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd9804cc2d1e4454982cf105de9d1efb", + "max": 711396, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_21d082c9eceb4321ad58a3cb37bc840a", + "value": 711396 + } + }, + "1d4bdd4783c344f59efc4c6328723cb9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a75ff77436274553b0b7965337ff5c04", + "placeholder": "​", + "style": "IPY_MODEL_4b103ceee9c44da9bf8938ddecb27c57", + "value": " 711k/711k [00:02<00:00, 256kB/s]" + } + }, + "f33c382245334d4aa58fe06d42fa902e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c05deca308774c6dbde37815c5bd916e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3101294e893541fd8f93e79fd303bcb3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fd9804cc2d1e4454982cf105de9d1efb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "21d082c9eceb4321ad58a3cb37bc840a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a75ff77436274553b0b7965337ff5c04": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b103ceee9c44da9bf8938ddecb27c57": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3d853e79133442c2a0d2eaac4525bef7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_558975101c044828a619345b8661a600", + "IPY_MODEL_2b2e2b073035442792276bd80dd3eb68", + "IPY_MODEL_840cbc2c5d1a485a96903a8a4aa89479" + ], + "layout": "IPY_MODEL_b6fe5088145b45e7a1a0803f9ffd1905" + } + }, + "558975101c044828a619345b8661a600": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f244fc7cc56c44e49f2efacfcc7eca54", + "placeholder": "​", + "style": "IPY_MODEL_76ba48b5c01749908aae0044d4d0eb73", + "value": "special_tokens_map.json: 100%" + } + }, + "2b2e2b073035442792276bd80dd3eb68": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9eb2a7d4dc63477ba64e9c45e09faa8c", + "max": 125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d3ea136c2bf24aa098e46d7417b79d88", + "value": 125 + } + }, + "840cbc2c5d1a485a96903a8a4aa89479": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b503ace6123b49a5bfaf55d319db3aaf", + "placeholder": "​", + "style": "IPY_MODEL_27c1c34295f7427fb0dde07698f550b4", + "value": " 125/125 [00:00<00:00, 9.01kB/s]" + } + }, + "b6fe5088145b45e7a1a0803f9ffd1905": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f244fc7cc56c44e49f2efacfcc7eca54": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76ba48b5c01749908aae0044d4d0eb73": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9eb2a7d4dc63477ba64e9c45e09faa8c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d3ea136c2bf24aa098e46d7417b79d88": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b503ace6123b49a5bfaf55d319db3aaf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "27c1c34295f7427fb0dde07698f550b4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1c74a9271f52488d94f940fbc74e6b7d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5bbffaaa66e540aab0056be199e874e2", + "IPY_MODEL_0f6026caa9f3449694a5b559a7e9ddf6", + "IPY_MODEL_ba0dcb34176b4f389cbca76f14eaacc6" + ], + "layout": "IPY_MODEL_efba751c47f84807bf8400e4ab4dbe53" + } + }, + "5bbffaaa66e540aab0056be199e874e2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_481ee1d08f494739afee50cd6669237f", + "placeholder": "​", + "style": "IPY_MODEL_482773282c9e424b85c2b606929d72e9", + "value": "1_Pooling/config.json: 100%" + } + }, + "0f6026caa9f3449694a5b559a7e9ddf6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1722973c296f4ebfb04760daddeba44a", + "max": 191, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_648dd0b1a8be4cd1be934beea9e70925", + "value": 191 + } + }, + "ba0dcb34176b4f389cbca76f14eaacc6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8feb439b07ee4c20a2e035b597681a44", + "placeholder": "​", + "style": "IPY_MODEL_9e1280773fae4460b8e72068ebea3e1f", + "value": " 191/191 [00:00<00:00, 11.6kB/s]" + } + }, + "efba751c47f84807bf8400e4ab4dbe53": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "481ee1d08f494739afee50cd6669237f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "482773282c9e424b85c2b606929d72e9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1722973c296f4ebfb04760daddeba44a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "648dd0b1a8be4cd1be934beea9e70925": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8feb439b07ee4c20a2e035b597681a44": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9e1280773fae4460b8e72068ebea3e1f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2d7fc8cf21224a0290120a398d07ebf7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a76ed11fca8c4d24bfba93e535e4d9dc", + "IPY_MODEL_f09b113f18eb4dd0b09d08bfdc77d425", + "IPY_MODEL_7f52f0bf4d6c43e1be130c3d4a39a1fd" + ], + "layout": "IPY_MODEL_9f775e37eac24e25a30cf8525a6c3695" + } + }, + "a76ed11fca8c4d24bfba93e535e4d9dc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3440f2e53a8648458be111e704fdc67d", + "placeholder": "​", + "style": "IPY_MODEL_a511312d268248bf9b911345ceaca827", + "value": "Processing batches:   6%" + } + }, + "f09b113f18eb4dd0b09d08bfdc77d425": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fa5b136b89714f8f893cd751b06d6e6b", + "max": 14477, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_38605ab14a234e5aa64293bfe7c1d622", + "value": 846 + } + }, + "7f52f0bf4d6c43e1be130c3d4a39a1fd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4fd0894102314e0fb970a6459972d677", + "placeholder": "​", + "style": "IPY_MODEL_476a1f07c1b24f8a845a410857ffd80e", + "value": " 846/14477 [03:40<1:11:07,  3.19it/s]" + } + }, + "9f775e37eac24e25a30cf8525a6c3695": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3440f2e53a8648458be111e704fdc67d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a511312d268248bf9b911345ceaca827": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fa5b136b89714f8f893cd751b06d6e6b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38605ab14a234e5aa64293bfe7c1d622": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4fd0894102314e0fb970a6459972d677": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "476a1f07c1b24f8a845a410857ffd80e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file