diff --git a/EverNote_Documents_To_FAISS_Colab_GPU.ipynb b/EverNote_Documents_To_FAISS_Colab_GPU.ipynb index 2565fd1..3102a27 100644 --- a/EverNote_Documents_To_FAISS_Colab_GPU.ipynb +++ b/EverNote_Documents_To_FAISS_Colab_GPU.ipynb @@ -1,5621 +1,5703 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "18d62071e34b0d53", - "metadata": { - "collapsed": false, - "id": "18d62071e34b0d53" - }, - "source": [ - "# This is an experiment: create vectorized embeddings out of an EverNote DB (PDF, DOCX, HTML, TXT)\n" - ] + "cells": [ + { + "cell_type": "markdown", + "id": "18d62071e34b0d53", + "metadata": { + "collapsed": false, + "id": "18d62071e34b0d53" + }, + "source": [ + "# This is an experiment: create vectorized embeddings out of an EverNote DB (PDF, DOCX, HTML, TXT)\n" + ] + }, + { + "cell_type": "markdown", + "id": "aLpLL0Wy2A8M", + "metadata": { + "id": "aLpLL0Wy2A8M" + }, + "source": [ + "\n", + "## Features\n", + "\n", + "* vectorize text, html files, pdfs and docx into one vector DB, split in tables (sqlite vss)\n", + "* use local self-hosted embeddings (CPU or GPU computed)\n", + " * for sentences\n", + "* query a local sqlite vss vector db, use cache from LangChain (sqlite)\n", + "* use OpenAI API and (Ollama on-prem self-hosted) Mistral for the response processing\n", + "* compare with LLMware Bling" + ] + }, + { + "cell_type": "markdown", + "id": "stgrzM3K2C-o", + "metadata": { + "id": "stgrzM3K2C-o" + }, + "source": [ + "## Anti-Features\n", + "\n", + "* due to cost reasons the OpenAI embeddings don't get used. So sorry ... not." + ] + }, + { + "cell_type": "markdown", + "id": "94517a27e3148ff4", + "metadata": { + "collapsed": false, + "id": "94517a27e3148ff4" + }, + "source": [ + "# Setup and configuration\n", + "\n", + "⚠ This config is automated and executes a Bash script from a GitHub repo if you execute it on Goog Colab ⚠" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "fd9747a54ea8fcef", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:27:09.463344Z", + "start_time": "2024-04-05T11:27:09.458830Z" }, - { - "cell_type": "markdown", - "source": [ - "\n", - "## Features\n", - "\n", - "* vectorize text, html files, pdfs and docx into one vector DB, split in tables (sqlite vss)\n", - "* use local self-hosted embeddings (CPU or GPU computed)\n", - " * for sentences\n", - "* query a local sqlite vss vector db, use cache from LangChain (sqlite)\n", - "* use OpenAI API and (Ollama on-prem self-hosted) Mistral for the response processing\n", - "* compare with LLMware Bling" - ], - "metadata": { - "id": "aLpLL0Wy2A8M" - }, - "id": "aLpLL0Wy2A8M" + "id": "fd9747a54ea8fcef" + }, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import subprocess\n", + "\n", + "IN_COLAB = 'google.colab' in sys.modules\n", + "\n", + "if not IN_COLAB:\n", + " # The Evernote DB path containing the extracted data\n", + " # It will not be needed on Colab\n", + " extracted_evernote_db = \"/home/marius/data/it-sec-research-extracted/IT sec research\"\n", + "\n", + " # Output paths containing the Evernote text notes or documents data.\n", + " # These get generated by the data extraction process\n", + " output_path_extracted_notes = \"/home/marius/source/bookworm/export.txt\"\n", + " output_path_extracted_docs = \"/home/marius/source/bookworm/export.documents.txt\"\n", + "\n", + " # Resulting DB or vector store path.\n", + " result_db = \"/home/marius/source/bookworm/evernote.db\"\n", + "\n", + "else:\n", + " # For the Goog Colab env we use different paths\n", + " output_path_extracted_notes = \"/content/export.txt\"\n", + " output_path_extracted_docs = \"/content/export.documents.txt\"\n", + " result_db = \"/content/evernote.db\"\n", + "\n", + " # Download the data locally (just some txt files here)\n", + " # Install pip dependencies in Colab\n", + " subprocess.run('''\n", + " source <(curl -s https://raw.githubusercontent.com/norandom/project_bookworm/main/scripts/prepare_colab_env.sh)\n", + " ''',\n", + " shell=True, check=True, executable='/bin/bash')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "oHbFM-721Uwf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:27:14.298345Z", + "start_time": "2024-04-05T11:27:14.295581Z" }, - { - "cell_type": "markdown", - "source": [ - "## Anti-Features\n", - "\n", - "* due to cost reasons the OpenAI embeddings don't get used. So sorry ... not." - ], - "metadata": { - "id": "stgrzM3K2C-o" - }, - "id": "stgrzM3K2C-o" - }, - { - "cell_type": "markdown", - "id": "94517a27e3148ff4", - "metadata": { - "collapsed": false, - "id": "94517a27e3148ff4" - }, - "source": [ - "# Setup and configuration\n", - "\n", - "⚠ This config is automated and executes a Bash script from a GitHub repo if you execute it on Goog Colab ⚠" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "fd9747a54ea8fcef", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:35:54.949214Z", - "start_time": "2024-04-04T10:35:54.945013Z" - }, - "id": "fd9747a54ea8fcef" - }, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "import subprocess\n", - "\n", - "IN_COLAB = 'google.colab' in sys.modules\n", - "\n", - "if not IN_COLAB:\n", - " # The Evernote DB path containing the extracted data\n", - " # It will not be needed on Colab\n", - " extracted_evernote_db = \"/home/marius/data/it-sec-research-extracted/IT sec research\"\n", - "\n", - " # Output paths containing the Evernote text notes or documents data.\n", - " # These get generated by the data extraction process\n", - " output_path_extracted_notes = \"/home/marius/source/bookworm/export.txt\"\n", - " output_path_extracted_docs = \"/home/marius/source/bookworm/export.documents.txt\"\n", - "\n", - " # Resulting DB or vector store path.\n", - " result_db = \"/home/marius/source/bookworm/evernote.db\"\n", - "\n", - "else:\n", - " # For the Goog Colab env we use different paths\n", - " output_path_extracted_notes = \"/content/export.txt\"\n", - " output_path_extracted_docs = \"/content/export.documents.txt\"\n", - " result_db = \"/content/evernote.db\"\n", - "\n", - " # Download the data locally (just some txt files here)\n", - " # Install pip dependencies in Colab\n", - " subprocess.run('''\n", - " source <(curl -s https://raw.githubusercontent.com/norandom/project_bookworm/main/scripts/prepare_colab_env.sh)\n", - " ''',\n", - " shell=True, check=True, executable='/bin/bash')" - ] - }, - { - "cell_type": "code", - "source": [ - "# To suppress some warnings\n", - "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"False\"" - ], - "metadata": { - "id": "oHbFM-721Uwf" - }, - "id": "oHbFM-721Uwf", - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Checks" - ], - "metadata": { - "id": "yuhXPdN_z2cW" - }, - "id": "yuhXPdN_z2cW" - }, - { - "cell_type": "code", - "source": [ - "print(output_path_extracted_notes)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6SPPaVEet9EO", - "outputId": "e7cb63ec-8192-43a4-9320-83ff5b3b2122" - }, - "id": "6SPPaVEet9EO", - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content/export.txt\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## For the progress bars in Colab\n", - "\n", - "⚛ If you don't add this magic commands the `tqdm` progress bars will not update properly ⚛" - ], - "metadata": { - "id": "B02AY_Gez61T" - }, - "id": "B02AY_Gez61T" - }, - { - "cell_type": "code", - "source": [ - "%reload_ext autoreload\n", - "%autoreload 2" - ], - "metadata": { - "id": "XGYNhuvrvnUD" - }, - "id": "XGYNhuvrvnUD", - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "id": "a8c8692786d83c00", - "metadata": { - "collapsed": false, - "id": "a8c8692786d83c00" - }, - "source": [ - "## Select key dependencies\n", - "\n", - "* `cryptography` is used to handle some PDF functions here (signatures)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "bb34db1ea75a1edf", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:08:32.520341Z", - "start_time": "2024-04-04T10:08:30.353678Z" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bb34db1ea75a1edf", - "outputId": "26af1f05-f9c1-4849-88e1-dc5c18cd3884" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Name: cryptography\n", - "Version: 42.0.5\n", - "Summary: cryptography is a package which provides cryptographic recipes and primitives to Python developers.\n", - "Home-page: \n", - "Author: \n", - "Author-email: The Python Cryptographic Authority and individual contributors \n", - "License: Apache-2.0 OR BSD-3-Clause\n", - "Location: /usr/local/lib/python3.10/dist-packages\n", - "Requires: cffi\n", - "Required-by: pyOpenSSL\n" - ] - } - ], - "source": [ - "%pip show cryptography" - ] - }, - { - "cell_type": "markdown", - "id": "297746c807e95fbf", - "metadata": { - "collapsed": false, - "id": "297746c807e95fbf" - }, - "source": [ - "* `pikepdf` is used to repair some PDFs" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ebc8af0183532fc2", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:08:34.665865Z", - "start_time": "2024-04-04T10:08:32.522020Z" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ebc8af0183532fc2", - "outputId": "7966230e-ec57-4b64-acf2-09d013bb2608" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Name: pikepdf\n", - "Version: 8.13.0\n", - "Summary: Read and write PDFs with Python, powered by qpdf\n", - "Home-page: \n", - "Author: \n", - "Author-email: \"James R. Barlow\" \n", - "License: MPL-2.0\n", - "Location: /usr/local/lib/python3.10/dist-packages\n", - "Requires: Deprecated, lxml, packaging, Pillow\n", - "Required-by: \n" - ] - } - ], - "source": [ - "%pip show pikepdf" - ] - }, - { - "cell_type": "markdown", - "id": "7c7a7f6b0db3719e", - "metadata": { - "collapsed": false, - "id": "7c7a7f6b0db3719e" - }, - "source": [ - "* `pypdf` with all features is needed because this DB consists of 100+ PDFs" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "779f81e2ab00f73c", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:08:37.436449Z", - "start_time": "2024-04-04T10:08:35.269255Z" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "779f81e2ab00f73c", - "outputId": "e1fcc840-7e3c-4d4a-9457-745be8db60c4" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Name: pypdf\n", - "Version: 4.0.2\n", - "Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files\n", - "Home-page: \n", - "Author: \n", - "Author-email: Mathieu Fenniak \n", - "License: \n", - "Location: /usr/local/lib/python3.10/dist-packages\n", - "Requires: \n", - "Required-by: \n" - ] - } - ], - "source": [ - "%pip show \"pypdf\"" - ] - }, - { - "cell_type": "markdown", - "source": [ - "* `torch` is used for tensors, and GPU processing" - ], - "metadata": { - "id": "A5l3rFo03NKq" - }, - "id": "A5l3rFo03NKq" - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "de3f715519fda6c4", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:08:39.729429Z", - "start_time": "2024-04-04T10:08:37.438498Z" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "de3f715519fda6c4", - "outputId": "9f328109-f568-411b-c78a-0c1be41090bb" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Name: torch\n", - "Version: 2.2.1+cu121\n", - "Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n", - "Home-page: https://pytorch.org/\n", - "Author: PyTorch Team\n", - "Author-email: packages@pytorch.org\n", - "License: BSD-3\n", - "Location: /usr/local/lib/python3.10/dist-packages\n", - "Requires: filelock, fsspec, jinja2, networkx, nvidia-cublas-cu12, nvidia-cuda-cupti-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-runtime-cu12, nvidia-cudnn-cu12, nvidia-cufft-cu12, nvidia-curand-cu12, nvidia-cusolver-cu12, nvidia-cusparse-cu12, nvidia-nccl-cu12, nvidia-nvtx-cu12, sympy, triton, typing-extensions\n", - "Required-by: fastai, sentence-transformers, torchaudio, torchdata, torchtext, torchvision\n" - ] - } - ], - "source": [ - "%pip show torch" - ] - }, - { - "cell_type": "markdown", - "source": [ - "* `faiss` is used in the CPU version as a general vector store library. The data is being serialzed with `LangChain`. FAISS CPU version uses AVX2. The GPU port has some implementation issues with disk persistance and merging." - ], - "metadata": { - "id": "ZxyhRz6-3p-c" - }, - "id": "ZxyhRz6-3p-c" - }, - { - "cell_type": "code", - "source": [ - "%pip show faiss_cpu" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "HARY_QMJvttI", - "outputId": "bb7f5758-135e-435d-bf26-9aa3a1417e81" - }, - "id": "HARY_QMJvttI", - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Name: faiss-cpu\n", - "Version: 1.8.0\n", - "Summary: A library for efficient similarity search and clustering of dense vectors.\n", - "Home-page: \n", - "Author: \n", - "Author-email: Kota Yamaguchi \n", - "License: MIT License\n", - "Location: /usr/local/lib/python3.10/dist-packages\n", - "Requires: numpy\n", - "Required-by: \n" - ] - } - ] - }, - { - "cell_type": "markdown", - "id": "ce1350d2d6e3ed63", - "metadata": { - "collapsed": false, - "id": "ce1350d2d6e3ed63" - }, - "source": [ - "## Text extraction\n", - "\n", - "⬛ This doesn't need to get executed if you already have the `*.txt` files.\n", - "\n", - "\n", - "* Here the html and text data is extracted into txt\n", - "* The PDF and DOCX data is extracted into another txt file. This will be used for weighted data fusion later.\n", - "\n", - "* the texts are normalized:\n", - " * unicode normalization\n", - " * surrogate characters get replaced\n", - " * html gets converted to text\n", - " * pdfs get repaired\n", - " * docx files get read\n", - "\n", - "* exceptions get handled (UTF-16 issues, PDF reference errors)" - ] - }, - { - "cell_type": "code", - "source": [ - "import glob\n", - "import os\n", - "\n", - "import unicodedata # to normalize text\n", - "import html2text # to convert html to text\n", - "from langchain.document_loaders import PyPDFLoader, Docx2txtLoader\n", - "import pikepdf # to repair PDFs\n", - "from pathlib import Path\n", - "from tqdm.notebook import tqdm\n", - "from concurrent.futures import ThreadPoolExecutor, as_completed" - ], - "metadata": { - "id": "yrzYeheF40jt" - }, - "id": "yrzYeheF40jt", - "execution_count": 11, - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b557444b8b1d4839", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T09:25:39.388933Z", - "start_time": "2024-04-04T09:25:39.320902Z" - }, - "id": "b557444b8b1d4839" - }, - "outputs": [], - "source": [ - "def convert_html_to_text(html_blob: str) -> str:\n", - " \"\"\"\n", - " Converts a html blob into a string.\n", - " \"\"\"\n", - " h = html2text.HTML2Text()\n", - " h.mark_code = True\n", - " h.escape_snob = True\n", - " h.unicode_snob = True\n", - " # h.use_automatic_links = True\n", - " h.images_as_html = True\n", - " h.single_line_break = True\n", - " h.ignore_links = True\n", - " return h.handle(html_blob)\n", - "\n", - "def normalize_text(txt_blob: str) -> str:\n", - " \"\"\"\n", - " Normalize a text blob using NFKD normalization.\n", - " \"\"\"\n", - " return unicodedata.normalize(\"NFKD\", txt_blob)\n", - "\n", - "def repair_pdf(file_path: str) -> bool:\n", - " \"\"\"\n", - " Attempts to repair a PDF file using pikepdf.\n", - " \"\"\"\n", - " try:\n", - " with pikepdf.open(file_path, allow_overwriting_input=True) as pdf:\n", - " pdf.save(file_path)\n", - " return True\n", - " except pikepdf.PdfError as e:\n", - " print(f\"Failed to repair PDF {file_path}: {e}\")\n", - " return False\n", - "\n", - "def read_and_convert_file(file_path: str, is_html: bool, is_pdf: bool, is_docx: bool) -> str:\n", - " \"\"\"\n", - " Reads and converts a file from HTML, PDF, DOCX, or plain text to text.\n", - " :param file_path:\n", - " :param is_html:\n", - " :param is_pdf:\n", - " :param is_docx:\n", - " :return:\n", - " \"\"\"\n", - "\n", - " content = \"\"\n", - " if is_html:\n", - " try:\n", - " with open(file_path, 'r', encoding='utf-8') as file:\n", - " content = file.read()\n", - " return convert_html_to_text(content)\n", - " except Exception as e:\n", - " print(f\"Error reading {file_path}: {e}\")\n", - " return \"\"\n", - "\n", - " elif is_pdf:\n", - " try:\n", - " loader = PyPDFLoader(file_path)\n", - " # ... fixes \"Multiple definitions in dictionary at byte 0xb32 for key /ExtGState\" error\n", - " documents = loader.load()\n", - " content = \"\\n\".join(doc.page_content for doc in documents if hasattr(doc, 'page_content'))\n", - " except Exception as e:\n", - " print(f\"Error loading PDF {file_path}: {e}. Attempting to repair...\")\n", - " if repair_pdf(file_path):\n", - " try:\n", - " loader = PyPDFLoader(file_path)\n", - " documents = loader.load()\n", - " content = \"\\n\".join(doc.page_content for doc in documents if hasattr(doc, 'page_content'))\n", - " except Exception as e:\n", - " print(f\"Failed to process PDF {file_path} after repair: {e}\")\n", - " return \"\"\n", - " return normalize_text(content)\n", - "\n", - " elif is_docx:\n", - " try:\n", - " loader = Docx2txtLoader(file_path)\n", - " content = loader.load()\n", - " if isinstance(content, list):\n", - " content = \"\\n\".join(content)\n", - " except Exception as e:\n", - " print(f\"Error reading DOCX {file_path}: {e}\")\n", - " return \"\"\n", - " return normalize_text(content)\n", - "\n", - " else: # For plain text files\n", - " try:\n", - " with open(file_path, 'r', encoding='utf-8') as file:\n", - " return normalize_text(file.read())\n", - " except Exception as e:\n", - " print(f\"Error reading {file_path}: {e}\")\n", - " return \"\"\n", - "\n", - "def sanitize_text(text):\n", - " \"\"\"\n", - " Removes or replaces surrogate characters from a string.\n", - " \"\"\"\n", - " return text.encode('utf-8', 'replace').decode('utf-8')\n", - "\n", - "def append_to_output(data: str, is_pdf: bool, is_docx: bool, output_path: str):\n", - " \"\"\"\n", - " Appends sanitized data to an output file.\n", - " \"\"\"\n", - " sanitized_data = sanitize_text(data)\n", - " if is_pdf or is_docx:\n", - " output_path = str(Path(output_path).with_suffix('')) + \".documents.txt\"\n", - "\n", - " with open(output_path, \"a\", encoding='utf-8') as output_file:\n", - " output_file.write(sanitized_data)\n", - "\n", - "def process_file(file):\n", - " is_html = file.endswith('.html')\n", - " is_pdf = file.endswith('.pdf')\n", - " is_docx = file.endswith('.docx')\n", - "\n", - " file_content = read_and_convert_file(file, is_html, is_pdf, is_docx)\n", - " append_to_output(file_content, is_pdf, is_docx, output_path=output_path)\n", - "\n", - "def process_files_in_directory(directory: str):\n", - " txt_html_files = glob.glob(os.path.join(directory, \"*.txt\")) + glob.glob(os.path.join(directory, \"*.html\"))\n", - " pdf_docx_files = glob.glob(os.path.join(directory, \"img\", \"*.pdf\")) + glob.glob(os.path.join(directory, \"img\", \"*.docx\"))\n", - " all_files = txt_html_files + pdf_docx_files\n", - "\n", - " # Initialize the progress bar\n", - " pbar = tqdm(total=len(all_files), desc=\"Processing files\")\n", - "\n", - " with ThreadPoolExecutor(max_workers=3) as executor:\n", - " # Submit all files to the executor and store future objects\n", - " futures = [executor.submit(process_file, file) for file in all_files]\n", - "\n", - " # As tasks complete, update the progress bar\n", - " for future in as_completed(futures):\n", - " pbar.update(1) # Update the progress bar by one for each task completed\n", - "\n", - " # Ensure the progress bar is closed upon completion\n", - " pbar.close()\n", - "\n", - "process_files_in_directory(extracted_evernote_db)" - ] - }, - { - "cell_type": "markdown", - "id": "e1bcc07f980c865f", - "metadata": { - "collapsed": false, - "id": "e1bcc07f980c865f" - }, - "source": [ - "# Chunking of the texts\n", - "\n", - "The texts need to get chunked (pre-processing) before the embedding process. We are processing text for the sake of similarity detection. Therefore we can use overlaps. For log-processing and detection engineering, overlaps would be counter-productive." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "de8d9f18d8342c57", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:09:23.408646Z", - "start_time": "2024-04-04T10:08:56.104045Z" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "de8d9f18d8342c57", - "outputId": "31fd5ade-1af8-4592-8d18-aa2b8d635c44" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Now you have 723845 chunks in /content/export.txt\n", - "Now you have 151259 chunks in /content/export.documents.txt\n" - ] - } - ], - "source": [ - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "\n", - "def chunk_text_data(txt_file=output_path_extracted_notes):\n", - "\n", - " with open(txt_file) as f:\n", - " text_notes = f.read()\n", - "\n", - " text_splitter = RecursiveCharacterTextSplitter(\n", - " chunk_size=100,\n", - " chunk_overlap=20,\n", - " length_function=len\n", - " )\n", - "\n", - " chunks = text_splitter.create_documents([text_notes])\n", - " print(f'Now you have {len(chunks)} chunks in {txt_file}')\n", - " return chunks\n", - "\n", - "# chunk individual text file containing the data\n", - "text_chunks = chunk_text_data(txt_file=output_path_extracted_notes)\n", - "doc_chunks = chunk_text_data(txt_file=output_path_extracted_docs)" - ] - }, - { - "cell_type": "markdown", - "id": "aea7ceb111fed5f3", - "metadata": { - "collapsed": false, - "id": "aea7ceb111fed5f3" - }, - "source": [ - "### Embedding costs - why no OpenAI?\n", - "\n", - "The OpenAI API has a cost for the embeddings.\n", - "At this point there seems to be no way to pre-estimate the costs reliably.\n", - "The following calculation is probably flawed. But if it's correct, I wish the OpenAPI team the best of luck with finding a new pricing model." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "afb2c8feb9ca0bb4", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "afb2c8feb9ca0bb4", - "outputId": "4bf4a714-b59e-4835-f3ac-3110b329b7b4" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Total Tokens: 15769414\n", - "Embedding Cost in USD: 473.08241999999996\n" - ] - } - ], - "source": [ - "def print_embedding_cost(texts):\n", - " import tiktoken\n", - " enc = tiktoken.encoding_for_model('gpt-4')\n", - " total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])\n", - " print(f'Total Tokens: {total_tokens}')\n", - " print(f'Embedding Cost in USD: { (0.03 / 1_000) * total_tokens}')\n", - "\n", - "print_embedding_cost(text_chunks)" - ] - }, - { - "cell_type": "markdown", - "id": "8012516604037e2f", - "metadata": { - "collapsed": false, - "id": "8012516604037e2f" - }, - "source": [ - "# Use Hugging Face Embeddings Sentence Transformers\n", - "\n", - "Here we:\n", - "\n", - "* use a self-hosted on-premises model for the embedding and vectorization\n", - "* configure it for the use with the CPU or GPU\n", - "\n", - "This model is from the Beijing Academy of Artificial Intelligence\n", - "* https://huggingface.co/BAAI/bge-large-en-v1.5\n", - "* It uses: https://huggingface.co/docs/transformers/model_doc/auto\n", - "\n", - "It will produce embeddings of 1024 dimensions, roughly 500 less than OpenAI Embeddings I wanted to use initially." - ] - }, - { - "cell_type": "markdown", - "source": [ - "## GPU detection (CUDA)\n", - "\n", - "Here we detect whether a GPU is present, and if that is the case, we initialize the model to use it later. If not, we can use the CPU as a fallback. But for this use-case / implementation the Nvidia V100 GPU is about 60x faster (estimation)." - ], - "metadata": { - "id": "LJIwSxNf5sm7" - }, - "id": "LJIwSxNf5sm7" - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "3081256c9cf22780", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:09:29.687485Z", - "start_time": "2024-04-04T10:09:23.410187Z" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3081256c9cf22780", - "outputId": "9b120f43-f2cd-47c2-b763-e089aca15ee2" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "__CUDNN VERSION: 8902\n", - "__Number CUDA Devices: 1\n", - "__CUDA Device Name: Tesla V100-SXM2-16GB\n", - "__CUDA Device Total Memory [GB]: 16.935682048\n", - "GPU enabled\n" - ] - } - ], - "source": [ - "import torch\n", - "use_cuda = torch.cuda.is_available()\n", - "\n", - "USE_GPU=True\n", - "\n", - "if use_cuda:\n", - " print('__CUDNN VERSION:', torch.backends.cudnn.version())\n", - " print('__Number CUDA Devices:', torch.cuda.device_count())\n", - " print('__CUDA Device Name:',torch.cuda.get_device_name(0))\n", - " print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)\n", - " USE_GPU=True\n", - " print(\"GPU enabled\")\n", - "\n", - "if not use_cuda:\n", - " print('No CUDA available')\n", - " USE_GPU=False\n" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## BAAI Model\n", - "\n", - "The Beijing Academy of Artificial Intelligence (BAAI) is a leading organization, which provides state of the art models on HuggingFace. Here the model is being used to create the Embeddings. An Embedding here isn't a plain Word2Vec style projection of text to a vector space. It has a semantic integration. I still have to research the details." - ], - "metadata": { - "id": "GY_nYdSO6JTc" - }, - "id": "GY_nYdSO6JTc" - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "c1ca979bbc1610bb", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:09:29.889360Z", - "start_time": "2024-04-04T10:09:29.688832Z" - }, - "id": "c1ca979bbc1610bb" - }, - "outputs": [], - "source": [ - "from langchain.embeddings import HuggingFaceEmbeddings\n", - "\n", - "# pre-trained model path\n", - "modelPath = \"BAAI/bge-large-en-v1.5\"\n", - "\n", - "# Create a dictionary with model configuration options, specifying to use the CPU or GPU for computations\n", - "if not USE_GPU:\n", - " model_kwargs = {'device':'cpu'}\n", - "else:\n", - " model_kwargs = {}\n", - "\n", - "# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to True\n", - "encode_kwargs = {'normalize_embeddings': True}" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Initialization of the Embedding model" - ], - "metadata": { - "id": "JSTLqLQj6ref" - }, - "id": "JSTLqLQj6ref" - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "3c2b9cd67f161714", - "metadata": { - "ExecuteTime": { - "end_time": "2024-04-04T10:09:55.733575Z", - "start_time": "2024-04-04T10:09:34.059018Z" - }, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 496, - "referenced_widgets": [ - "a1b39dadf1fd474296d47c00498b1d97", - "ce3cebc4664b4b4094f965e3d98b1ec3", - "cbd1ebc865344b2cbe09aaad9341f447", - "1fe3e7dd24a143b38fcc1f16048fce75", - "26fcfd7ef3784d02bed4f621377600b0", - "40346f9f5293495ea35a8b6a2234e8e5", - "32675b12f59c4b04ae03d4246c67145c", - "8ae260f36b444619be6f189b02dc54a4", - "e70eeeff503c4f2a83757cb0c202e7d0", - "5afae079bdb047a9adbe1352ca91899c", - "defeecb0c9034ef0835df87438c046e2", - "1b1bb145deac4bcbb720559e5a9f4cde", - "648ee76f36564501b7ae9a636aae4dde", - "3fa0a57165694311b6d5ad69f8e605de", - "96d9e6f90f974eaf9eb3b5a9d7bad983", - "9a19f5e7f06844ed9b6413aef416d180", - "249660f622c54831991237232196911b", - "a3fb4f16aea4427fa1218b61bd041d43", - "4c856a1624d54fd29b6abae9a395510c", - "c23cc4d3c457408eb352ab92dfbb86e0", - "c3d90bfa52bc40b3bf1d8dee186c48f9", - "072ba7cad09e4470bb04a44140eedb2c", - "2f3da07419074e548051002fecd36ce6", - "468f208672ab48ce83912d09913c18cc", - "308c05cc72f847588befc8c68696d752", - "fae60f6d7ecf4745b9e07b55f353036d", - "e75bdc1627624c878fde0f80ef9b71c5", - "fd4f5a1546e84d3f9741bb63a381b48f", - "b51b7655fb7546b2a4b61edc796af418", - "c1827bb2ba9047d3bae8a7bfa6702748", - "45d145a52f844606aed707cbb01e473f", - "c6d9dd3a6db445488bb8ad80e2e0554a", - "299413fffd184e28b7d8d03c741778cc", - "00473551f93a45fe8e8337c15d677848", - "8bf13bfe911c43798e87e8bf9e49047a", - "c7f64c8420074c469024b1b89ff0c114", - "94b6362a788b4c15ac67cc41e9f1b4ce", - "576c65f676f941bcb20c804191b1e63a", - "1388935349ac4673935f2521ed7d78d8", - "5c7f10c5efd14f29b76f91bdf8b13e11", - "5618a45a62f74f16899408521f6712b7", - "a205f9f2ec5543b7a50fd64d50fb53e8", - "d3a4973906bf490b87ac6b5905448b28", - "caebc821405542099a5e500f505d1169", - "9b7b90e2713f4f488a6921f89d96828c", - "2cf150ada6ca43449becf536e8444a23", - "cf604be4e8304922be58e20ee19ac70b", - "d201dbcd946d4173bb976a65bc24613b", - "b331d2862b3049eea1df4fb8b20f7927", - "c098aa33bf03498fa9a1762e88a82a93", - "964f3f6cda9a4815803d9c0e369ae64e", - "b9b38bfc63714441af3f22975eabed51", - "dfd40fb8dfe244b086f92f4299e11447", - "597f9a848328465eb48b5636039979ee", - "be67aea3e0b049e6b79f850f4082f449", - "612864d19f8a45618c574a6e9d90c0a7", - "4f820c0fbe5b4dba9186d726b54031ee", - "84eeb2a99f044c36bdf9428c62bdfee1", - "882898f5c0984ccca013458ac9246583", - "8236c6d4505c49869532d47e3c4bf9b2", - "47cb514ac09145148d657d1f43bd3343", - "77de5c5fe519498499d0703ad3d77523", - "915bf489198a4f518dcedc3a778b94cb", - "91a68937b987480c903f3ad73a35c30a", - "9981b06205f44f959182c06584909d46", - "12cb6ef492044740b8c0b48077d257de", - "9840d5d3dfc0421da994b1a48fc57690", - "0e9f877384c345bea8eddee0c2f896e4", - "141b953601f842f9a315cc254fff3925", - "012be145a1444889bfa30fae7812d62b", - "61cf60219cb94ae1a3413d27d2e5ed13", - "62c9641e6acd41ff915f0a86964560b3", - "ad2690ad145344e8a5744b400a2bb464", - "37b240a8a4c24e59bfc0b3f76e30b383", - "088a6b94e38247ad9f0d91d80202899f", - "d4438346655e45b0a029f2b99d3f02f9", - "5954d40f2cb1445a9ab1d4c814526f10", - "53b2f3605ae14ca9bb5e4fde8649f42b", - "f3561988bf9a438baab1e2c127d26b2a", - "af87303a2b084d128d4a5999c090ccf8", - "ae4c874164944325b74c7ac358bda6e6", - "46700f4115ff4084b740b10d7d6a9e93", - "119af24ab9b944de992ea90594e307a2", - "7a1e84942d694934ae4755034ce41d0c", - "149fe85c380e4cf79f3e511390243364", - "e20c0f77173f49468143522458560d4f", - "1441564670da4feaa7aec4be2e9dbf19", - "08d738c646b640a1a558d653a7c4f538", - "183f7840788b4409b954c244b02f94de", - "8b00ac065fc14f7da45586a43bf0226f", - "0e906e5ef8634475bb6dc19c484f2681", - "4511843fbda84081a0370376724082be", - "dce5f8fa907a40e1a96139028fd4466d", - "d56feb190ba54183a61baf6ffee1c74e", - "35b47fb99d604702b8da3b5f837c82ce", - "ed0871a86c2d4522bc9bca285be50677", - "30ead611258e4f7d971ac080e471c011", - "8b6525e5e421440c97b4d43146b5467c", - "00f1718ef79a405eb83b4190b80bc95d", - "5e70bb5e8d654635a510c83035366c34", - "cf86f986d20d41d4ae177a4a1c05cc21", - "3ea1b21763d044ffba9700a22b190beb", - "8dadf6a0f40e41d697a603d7ea746547", - "6d2cd8eb606c48df96f0680d161c753e", - "f1822ee028aa4920b224e0cd3b9ccc49", - "7268238e5b104504a2c1cd421973c8af", - "93aee68294744f4b9d6edc4db040b25a", - "be4f813c7272420cbb54a2e1b28be012", - "a54a03a4af42473f80d8808a5836654f", - "18ab9ee89f674b26bb23f620f8c217a6", - "61d05569258c45b88b6c59f73eff9ae1", - "1cceb8bd541d469ca7ffb02652201c9c", - "ad0a3c78287f443b93c185880652f14a", - "a059cdcef9d04fbeb4293185821c3243", - "adec61d016b1479481df33be3a74231a", - "edfb1273272a4625b9d10dba9c93af73", - "b2b7a587d64143439cfacdec2d1b9889", - "ce7c6faf4d884d60800a99a18ae4949b", - "42b2487a3e1e43b48083b6426aaaca81", - "120b025826cd4d288ae8715d6c53e830", - "ca452ea7819a4a6f90f70fe41454facb" - ] - }, - "id": "3c2b9cd67f161714", - "outputId": "4c3c646e-6b4a-4ec3-e975-930d445c2144" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "modules.json: 0%| | 0.00/349 [00:00 None:\n", - " \"\"\"\n", - " Using type hints is a good idea here, because error messages get swallowed by the ThreadPoolExecutor.\n", - " The exception handling serves the same purpose.\n", - " Exceptions can cost performance, but only on the CPU level here.\n", - " \"\"\"\n", - "\n", - " try:\n", - " db = FAISS.from_documents(batch, embeddings, distance_strategy=\"COSINE\")\n", - " return db\n", - "\n", - " except Exception as e:\n", - " print(f\"Exception occurred in add_texts_in_batches: {e}\")\n", - "\n", - "\n", - "def divide_chunks(chunks, n):\n", - " \"\"\"\n", - " Divide and conquer :)\n", - " \"\"\"\n", - " for i in range(0, len(chunks), n):\n", - " yield chunks[i:i + n]\n", - "\n", - "\n", - "def vectorize_data_in_batches(chunks, embeddings):\n", - " \"\"\"\n", - " This function orchestrates the embedding vectorization in batches.\n", - " \"\"\"\n", - "\n", - " num_workers = 3\n", - " batch_size = 500 # Adjust based on your needs and memory constraints\n", - "\n", - " batches = list(divide_chunks(chunks, batch_size))\n", - " faiss_db = None\n", - "\n", - " with ThreadPoolExecutor(max_workers=num_workers) as executor:\n", - " # Submit all the batches for processing\n", - " futures = {executor.submit(add_texts_in_batches, batch, embeddings=embeddings): batch for batch in batches}\n", - "\n", - " # Setup the tqdm progress bar\n", - " progress_bar = tqdm(total=len(futures), desc=\"Processing batches\")\n", - "\n", - " for future in as_completed(futures):\n", - " # Each time a future completes, update the progress and collect the result\n", - " progress_bar.update(1)\n", - " try:\n", - " db_result = future.result() # This is where you get the returned value from add_texts_in_batches\n", - " if faiss_db is not None:\n", - " faiss_db.merge_from(db_result)\n", - "\n", - " else:\n", - " faiss_db = db_result\n", - "\n", - " except Exception as e:\n", - " print(f\"An error occurred: {e}\")\n", - "\n", - " progress_bar.close() # Ensure the progress bar is closed at the end\n", - "\n", - " faiss_db.save_local(\"faiss_index_cosine\")\n", - " print(\"All texts have been added to the database.\")\n", - "\n", - "\n", - "vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Similarity search" - ], - "metadata": { - "id": "WfjpAoJqE_L4" - }, - "id": "WfjpAoJqE_L4" - }, - { - "cell_type": "code", - "source": [ - "loaded_db = FAISS.load_local(\"faiss_index_cosine\", embeddings, distance_strategy=\"COSINE\", allow_dangerous_deserialization=True)" - ], - "metadata": { - "id": "nJG7eD8eFBnV" - }, - "id": "nJG7eD8eFBnV", - "execution_count": 27, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "results_with_scores = loaded_db.similarity_search_with_score(\"What is an exploit?\")" - ], - "metadata": { - "id": "Wwbebp6wF9fG" - }, - "id": "Wwbebp6wF9fG", - "execution_count": 33, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "for doc, score in results_with_scores:\n", - " print(f\"Content: {doc.page_content}, Score: {score}\")" - ], - "metadata": { - "id": "iWfiO0KFGZXN", - "outputId": "0dc2aed8-9689-4efa-e8e8-3d73bb81361b", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "id": "iWfiO0KFGZXN", - "execution_count": 34, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Content: `exploit', Score: 0.24956563115119934\n", - "Content: `exploit', Score: 0.24956563115119934\n", - "Content: `exploit', Score: 0.24956563115119934\n", - "Content: `exploit', Score: 0.24956563115119934\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Sandbox code - test area" - ], - "metadata": { - "id": "82gFVyrNCYOF" - }, - "id": "82gFVyrNCYOF" - }, - { - "cell_type": "code", - "source": [ - "from langchain_community.vectorstores import FAISS\n", - "\n", - "texts = [\"FAISS is an important library\", \"LangChain supports FAISS\"]\n", - "faiss = FAISS.from_texts(texts, embeddings, distance_strategy=\"COSINE\")\n", - "print(type(faiss))\n", - "\n", - "faiss.save_local(\"test\")\n", - "\n", - "new_db = FAISS.load_local(\"test\", embeddings, allow_dangerous_deserialization=True)\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "v6bhYHU5_9oo", - "outputId": "a88691e1-3ee4-4a34-edbf-4fac688dd78d" - }, - "id": "v6bhYHU5_9oo", - "execution_count": 26, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "id": "oHbFM-721Uwf" + }, + "outputs": [], + "source": [ + "# To suppress some warnings\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"False\"" + ] + }, + { + "cell_type": "markdown", + "id": "yuhXPdN_z2cW", + "metadata": { + "id": "yuhXPdN_z2cW" + }, + "source": [ + "## Checks" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "6SPPaVEet9EO", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:27:18.222315Z", + "start_time": "2024-04-05T11:27:18.219343Z" }, "colab": { - "provenance": [], - "machine_shape": "hm", - "gpuType": "V100" + "base_uri": "https://localhost:8080/" }, - "accelerator": "GPU", - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "a1b39dadf1fd474296d47c00498b1d97": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ce3cebc4664b4b4094f965e3d98b1ec3", - "IPY_MODEL_cbd1ebc865344b2cbe09aaad9341f447", - "IPY_MODEL_1fe3e7dd24a143b38fcc1f16048fce75" - ], - "layout": "IPY_MODEL_26fcfd7ef3784d02bed4f621377600b0" - } - }, - "ce3cebc4664b4b4094f965e3d98b1ec3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_40346f9f5293495ea35a8b6a2234e8e5", - "placeholder": "​", - "style": "IPY_MODEL_32675b12f59c4b04ae03d4246c67145c", - "value": "modules.json: 100%" - } - }, - "cbd1ebc865344b2cbe09aaad9341f447": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8ae260f36b444619be6f189b02dc54a4", - "max": 349, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e70eeeff503c4f2a83757cb0c202e7d0", - "value": 349 - } - }, - "1fe3e7dd24a143b38fcc1f16048fce75": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5afae079bdb047a9adbe1352ca91899c", - "placeholder": "​", - "style": "IPY_MODEL_defeecb0c9034ef0835df87438c046e2", - "value": " 349/349 [00:00<00:00, 25.8kB/s]" - } - }, - "26fcfd7ef3784d02bed4f621377600b0": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "40346f9f5293495ea35a8b6a2234e8e5": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "32675b12f59c4b04ae03d4246c67145c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8ae260f36b444619be6f189b02dc54a4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e70eeeff503c4f2a83757cb0c202e7d0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5afae079bdb047a9adbe1352ca91899c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "defeecb0c9034ef0835df87438c046e2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1b1bb145deac4bcbb720559e5a9f4cde": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_648ee76f36564501b7ae9a636aae4dde", - "IPY_MODEL_3fa0a57165694311b6d5ad69f8e605de", - "IPY_MODEL_96d9e6f90f974eaf9eb3b5a9d7bad983" - ], - "layout": "IPY_MODEL_9a19f5e7f06844ed9b6413aef416d180" - } - }, - "648ee76f36564501b7ae9a636aae4dde": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_249660f622c54831991237232196911b", - "placeholder": "​", - "style": "IPY_MODEL_a3fb4f16aea4427fa1218b61bd041d43", - "value": "config_sentence_transformers.json: 100%" - } - }, - "3fa0a57165694311b6d5ad69f8e605de": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4c856a1624d54fd29b6abae9a395510c", - "max": 124, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c23cc4d3c457408eb352ab92dfbb86e0", - "value": 124 - } - }, - "96d9e6f90f974eaf9eb3b5a9d7bad983": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c3d90bfa52bc40b3bf1d8dee186c48f9", - "placeholder": "​", - "style": "IPY_MODEL_072ba7cad09e4470bb04a44140eedb2c", - "value": " 124/124 [00:00<00:00, 11.6kB/s]" - } - }, - "9a19f5e7f06844ed9b6413aef416d180": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "249660f622c54831991237232196911b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a3fb4f16aea4427fa1218b61bd041d43": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4c856a1624d54fd29b6abae9a395510c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c23cc4d3c457408eb352ab92dfbb86e0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c3d90bfa52bc40b3bf1d8dee186c48f9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "072ba7cad09e4470bb04a44140eedb2c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2f3da07419074e548051002fecd36ce6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_468f208672ab48ce83912d09913c18cc", - "IPY_MODEL_308c05cc72f847588befc8c68696d752", - "IPY_MODEL_fae60f6d7ecf4745b9e07b55f353036d" - ], - "layout": "IPY_MODEL_e75bdc1627624c878fde0f80ef9b71c5" - } - }, - "468f208672ab48ce83912d09913c18cc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fd4f5a1546e84d3f9741bb63a381b48f", - "placeholder": "​", - "style": "IPY_MODEL_b51b7655fb7546b2a4b61edc796af418", - "value": "README.md: 100%" - } - }, - "308c05cc72f847588befc8c68696d752": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c1827bb2ba9047d3bae8a7bfa6702748", - "max": 94607, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_45d145a52f844606aed707cbb01e473f", - "value": 94607 - } - }, - "fae60f6d7ecf4745b9e07b55f353036d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c6d9dd3a6db445488bb8ad80e2e0554a", - "placeholder": "​", - "style": "IPY_MODEL_299413fffd184e28b7d8d03c741778cc", - "value": " 94.6k/94.6k [00:00<00:00, 1.16MB/s]" - } - }, - "e75bdc1627624c878fde0f80ef9b71c5": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fd4f5a1546e84d3f9741bb63a381b48f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b51b7655fb7546b2a4b61edc796af418": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c1827bb2ba9047d3bae8a7bfa6702748": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "45d145a52f844606aed707cbb01e473f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c6d9dd3a6db445488bb8ad80e2e0554a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "299413fffd184e28b7d8d03c741778cc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "00473551f93a45fe8e8337c15d677848": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_8bf13bfe911c43798e87e8bf9e49047a", - "IPY_MODEL_c7f64c8420074c469024b1b89ff0c114", - "IPY_MODEL_94b6362a788b4c15ac67cc41e9f1b4ce" - ], - "layout": "IPY_MODEL_576c65f676f941bcb20c804191b1e63a" - } - }, - "8bf13bfe911c43798e87e8bf9e49047a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1388935349ac4673935f2521ed7d78d8", - "placeholder": "​", - "style": "IPY_MODEL_5c7f10c5efd14f29b76f91bdf8b13e11", - "value": "sentence_bert_config.json: 100%" - } - }, - "c7f64c8420074c469024b1b89ff0c114": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5618a45a62f74f16899408521f6712b7", - "max": 52, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a205f9f2ec5543b7a50fd64d50fb53e8", - "value": 52 - } - }, - "94b6362a788b4c15ac67cc41e9f1b4ce": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d3a4973906bf490b87ac6b5905448b28", - "placeholder": "​", - "style": "IPY_MODEL_caebc821405542099a5e500f505d1169", - "value": " 52.0/52.0 [00:00<00:00, 5.21kB/s]" - } - }, - "576c65f676f941bcb20c804191b1e63a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1388935349ac4673935f2521ed7d78d8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c7f10c5efd14f29b76f91bdf8b13e11": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5618a45a62f74f16899408521f6712b7": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a205f9f2ec5543b7a50fd64d50fb53e8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d3a4973906bf490b87ac6b5905448b28": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "caebc821405542099a5e500f505d1169": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9b7b90e2713f4f488a6921f89d96828c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2cf150ada6ca43449becf536e8444a23", - "IPY_MODEL_cf604be4e8304922be58e20ee19ac70b", - "IPY_MODEL_d201dbcd946d4173bb976a65bc24613b" - ], - "layout": "IPY_MODEL_b331d2862b3049eea1df4fb8b20f7927" - } - }, - "2cf150ada6ca43449becf536e8444a23": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c098aa33bf03498fa9a1762e88a82a93", - "placeholder": "​", - "style": "IPY_MODEL_964f3f6cda9a4815803d9c0e369ae64e", - "value": "config.json: 100%" - } - }, - "cf604be4e8304922be58e20ee19ac70b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b9b38bfc63714441af3f22975eabed51", - "max": 779, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_dfd40fb8dfe244b086f92f4299e11447", - "value": 779 - } - }, - "d201dbcd946d4173bb976a65bc24613b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_597f9a848328465eb48b5636039979ee", - "placeholder": "​", - "style": "IPY_MODEL_be67aea3e0b049e6b79f850f4082f449", - "value": " 779/779 [00:00<00:00, 76.1kB/s]" - } - }, - "b331d2862b3049eea1df4fb8b20f7927": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c098aa33bf03498fa9a1762e88a82a93": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "964f3f6cda9a4815803d9c0e369ae64e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b9b38bfc63714441af3f22975eabed51": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dfd40fb8dfe244b086f92f4299e11447": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "597f9a848328465eb48b5636039979ee": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "be67aea3e0b049e6b79f850f4082f449": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "612864d19f8a45618c574a6e9d90c0a7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4f820c0fbe5b4dba9186d726b54031ee", - "IPY_MODEL_84eeb2a99f044c36bdf9428c62bdfee1", - "IPY_MODEL_882898f5c0984ccca013458ac9246583" - ], - "layout": "IPY_MODEL_8236c6d4505c49869532d47e3c4bf9b2" - } - }, - "4f820c0fbe5b4dba9186d726b54031ee": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_47cb514ac09145148d657d1f43bd3343", - "placeholder": "​", - "style": "IPY_MODEL_77de5c5fe519498499d0703ad3d77523", - "value": "model.safetensors: 100%" - } - }, - "84eeb2a99f044c36bdf9428c62bdfee1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_915bf489198a4f518dcedc3a778b94cb", - "max": 1340616616, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_91a68937b987480c903f3ad73a35c30a", - "value": 1340616616 - } - }, - "882898f5c0984ccca013458ac9246583": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9981b06205f44f959182c06584909d46", - "placeholder": "​", - "style": "IPY_MODEL_12cb6ef492044740b8c0b48077d257de", - "value": " 1.34G/1.34G [00:07<00:00, 134MB/s]" - } - }, - "8236c6d4505c49869532d47e3c4bf9b2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "47cb514ac09145148d657d1f43bd3343": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "77de5c5fe519498499d0703ad3d77523": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "915bf489198a4f518dcedc3a778b94cb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "91a68937b987480c903f3ad73a35c30a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "9981b06205f44f959182c06584909d46": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "12cb6ef492044740b8c0b48077d257de": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9840d5d3dfc0421da994b1a48fc57690": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_0e9f877384c345bea8eddee0c2f896e4", - "IPY_MODEL_141b953601f842f9a315cc254fff3925", - "IPY_MODEL_012be145a1444889bfa30fae7812d62b" - ], - "layout": "IPY_MODEL_61cf60219cb94ae1a3413d27d2e5ed13" - } - }, - "0e9f877384c345bea8eddee0c2f896e4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_62c9641e6acd41ff915f0a86964560b3", - "placeholder": "​", - "style": "IPY_MODEL_ad2690ad145344e8a5744b400a2bb464", - "value": "tokenizer_config.json: 100%" - } - }, - "141b953601f842f9a315cc254fff3925": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_37b240a8a4c24e59bfc0b3f76e30b383", - "max": 366, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_088a6b94e38247ad9f0d91d80202899f", - "value": 366 - } - }, - "012be145a1444889bfa30fae7812d62b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d4438346655e45b0a029f2b99d3f02f9", - "placeholder": "​", - "style": "IPY_MODEL_5954d40f2cb1445a9ab1d4c814526f10", - "value": " 366/366 [00:00<00:00, 29.7kB/s]" - } - }, - "61cf60219cb94ae1a3413d27d2e5ed13": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "62c9641e6acd41ff915f0a86964560b3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ad2690ad145344e8a5744b400a2bb464": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "37b240a8a4c24e59bfc0b3f76e30b383": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "088a6b94e38247ad9f0d91d80202899f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d4438346655e45b0a029f2b99d3f02f9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5954d40f2cb1445a9ab1d4c814526f10": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "53b2f3605ae14ca9bb5e4fde8649f42b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f3561988bf9a438baab1e2c127d26b2a", - "IPY_MODEL_af87303a2b084d128d4a5999c090ccf8", - "IPY_MODEL_ae4c874164944325b74c7ac358bda6e6" - ], - "layout": "IPY_MODEL_46700f4115ff4084b740b10d7d6a9e93" - } - }, - "f3561988bf9a438baab1e2c127d26b2a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_119af24ab9b944de992ea90594e307a2", - "placeholder": "​", - "style": "IPY_MODEL_7a1e84942d694934ae4755034ce41d0c", - "value": "vocab.txt: 100%" - } - }, - "af87303a2b084d128d4a5999c090ccf8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_149fe85c380e4cf79f3e511390243364", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e20c0f77173f49468143522458560d4f", - "value": 231508 - } - }, - "ae4c874164944325b74c7ac358bda6e6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1441564670da4feaa7aec4be2e9dbf19", - "placeholder": "​", - "style": "IPY_MODEL_08d738c646b640a1a558d653a7c4f538", - "value": " 232k/232k [00:00<00:00, 1.41MB/s]" - } - }, - "46700f4115ff4084b740b10d7d6a9e93": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "119af24ab9b944de992ea90594e307a2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7a1e84942d694934ae4755034ce41d0c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "149fe85c380e4cf79f3e511390243364": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e20c0f77173f49468143522458560d4f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "1441564670da4feaa7aec4be2e9dbf19": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "08d738c646b640a1a558d653a7c4f538": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "183f7840788b4409b954c244b02f94de": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_8b00ac065fc14f7da45586a43bf0226f", - "IPY_MODEL_0e906e5ef8634475bb6dc19c484f2681", - "IPY_MODEL_4511843fbda84081a0370376724082be" - ], - "layout": "IPY_MODEL_dce5f8fa907a40e1a96139028fd4466d" - } - }, - "8b00ac065fc14f7da45586a43bf0226f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d56feb190ba54183a61baf6ffee1c74e", - "placeholder": "​", - "style": "IPY_MODEL_35b47fb99d604702b8da3b5f837c82ce", - "value": "tokenizer.json: 100%" - } - }, - "0e906e5ef8634475bb6dc19c484f2681": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ed0871a86c2d4522bc9bca285be50677", - "max": 711396, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_30ead611258e4f7d971ac080e471c011", - "value": 711396 - } - }, - "4511843fbda84081a0370376724082be": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8b6525e5e421440c97b4d43146b5467c", - "placeholder": "​", - "style": "IPY_MODEL_00f1718ef79a405eb83b4190b80bc95d", - "value": " 711k/711k [00:00<00:00, 2.17MB/s]" - } - }, - "dce5f8fa907a40e1a96139028fd4466d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d56feb190ba54183a61baf6ffee1c74e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "35b47fb99d604702b8da3b5f837c82ce": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ed0871a86c2d4522bc9bca285be50677": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "30ead611258e4f7d971ac080e471c011": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "8b6525e5e421440c97b4d43146b5467c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "00f1718ef79a405eb83b4190b80bc95d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5e70bb5e8d654635a510c83035366c34": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_cf86f986d20d41d4ae177a4a1c05cc21", - "IPY_MODEL_3ea1b21763d044ffba9700a22b190beb", - "IPY_MODEL_8dadf6a0f40e41d697a603d7ea746547" - ], - "layout": "IPY_MODEL_6d2cd8eb606c48df96f0680d161c753e" - } - }, - "cf86f986d20d41d4ae177a4a1c05cc21": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f1822ee028aa4920b224e0cd3b9ccc49", - "placeholder": "​", - "style": "IPY_MODEL_7268238e5b104504a2c1cd421973c8af", - "value": "special_tokens_map.json: 100%" - } - }, - "3ea1b21763d044ffba9700a22b190beb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_93aee68294744f4b9d6edc4db040b25a", - "max": 125, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_be4f813c7272420cbb54a2e1b28be012", - "value": 125 - } - }, - "8dadf6a0f40e41d697a603d7ea746547": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a54a03a4af42473f80d8808a5836654f", - "placeholder": "​", - "style": "IPY_MODEL_18ab9ee89f674b26bb23f620f8c217a6", - "value": " 125/125 [00:00<00:00, 9.90kB/s]" - } - }, - "6d2cd8eb606c48df96f0680d161c753e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f1822ee028aa4920b224e0cd3b9ccc49": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7268238e5b104504a2c1cd421973c8af": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "93aee68294744f4b9d6edc4db040b25a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "be4f813c7272420cbb54a2e1b28be012": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "a54a03a4af42473f80d8808a5836654f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "18ab9ee89f674b26bb23f620f8c217a6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "61d05569258c45b88b6c59f73eff9ae1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1cceb8bd541d469ca7ffb02652201c9c", - "IPY_MODEL_ad0a3c78287f443b93c185880652f14a", - "IPY_MODEL_a059cdcef9d04fbeb4293185821c3243" - ], - "layout": "IPY_MODEL_adec61d016b1479481df33be3a74231a" - } - }, - "1cceb8bd541d469ca7ffb02652201c9c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_edfb1273272a4625b9d10dba9c93af73", - "placeholder": "​", - "style": "IPY_MODEL_b2b7a587d64143439cfacdec2d1b9889", - "value": "1_Pooling/config.json: 100%" - } - }, - "ad0a3c78287f443b93c185880652f14a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ce7c6faf4d884d60800a99a18ae4949b", - "max": 191, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_42b2487a3e1e43b48083b6426aaaca81", - "value": 191 - } - }, - "a059cdcef9d04fbeb4293185821c3243": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_120b025826cd4d288ae8715d6c53e830", - "placeholder": "​", - "style": "IPY_MODEL_ca452ea7819a4a6f90f70fe41454facb", - "value": " 191/191 [00:00<00:00, 17.4kB/s]" - } - }, - "adec61d016b1479481df33be3a74231a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "edfb1273272a4625b9d10dba9c93af73": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b2b7a587d64143439cfacdec2d1b9889": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ce7c6faf4d884d60800a99a18ae4949b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "42b2487a3e1e43b48083b6426aaaca81": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "120b025826cd4d288ae8715d6c53e830": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ca452ea7819a4a6f90f70fe41454facb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "592f37baf1c74e149577e80678db668f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2cdb27f1d7b14b558cf6f19fc0ab4fd9", - "IPY_MODEL_ce8eed52d57c47479ab9a45b85296c04", - "IPY_MODEL_a18c165ea7fc485c91e64df34974d685" - ], - "layout": "IPY_MODEL_090543e0523a4d0e8dbd89e0152a3a15" - } - }, - "2cdb27f1d7b14b558cf6f19fc0ab4fd9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0fbb5a8dd8c64e6c862779496a0c1867", - "placeholder": "​", - "style": "IPY_MODEL_8c77c7def1804fd6884a601c76618fa7", - "value": "Processing batches: 100%" - } - }, - "ce8eed52d57c47479ab9a45b85296c04": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4f06cc3b83e641cd81deba9aaea93fbb", - "max": 1448, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b0e999f6c752439a8f4ba962815160ae", - "value": 1448 - } - }, - "a18c165ea7fc485c91e64df34974d685": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a18f27c970524c048b424be9672e106f", - "placeholder": "​", - "style": "IPY_MODEL_f6b2c8e5621143729c8d6e3129251f29", - "value": " 1448/1448 [30:16<00:00,  1.23it/s]" - } - }, - "090543e0523a4d0e8dbd89e0152a3a15": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0fbb5a8dd8c64e6c862779496a0c1867": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8c77c7def1804fd6884a601c76618fa7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4f06cc3b83e641cd81deba9aaea93fbb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b0e999f6c752439a8f4ba962815160ae": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "a18f27c970524c048b424be9672e106f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f6b2c8e5621143729c8d6e3129251f29": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } + "id": "6SPPaVEet9EO", + "outputId": "e7cb63ec-8192-43a4-9320-83ff5b3b2122" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/marius/source/bookworm/export.txt\n" + ] } + ], + "source": [ + "print(output_path_extracted_notes)" + ] }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file + { + "cell_type": "markdown", + "id": "B02AY_Gez61T", + "metadata": { + "id": "B02AY_Gez61T" + }, + "source": [ + "## For the progress bars in Colab\n", + "\n", + "⚛ If you don't add this magic commands the `tqdm` progress bars will not update properly ⚛" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "XGYNhuvrvnUD", + "metadata": { + "id": "XGYNhuvrvnUD" + }, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "id": "a8c8692786d83c00", + "metadata": { + "collapsed": false, + "id": "a8c8692786d83c00" + }, + "source": [ + "## Select key dependencies\n", + "\n", + "* `cryptography` is used to handle some PDF functions here (signatures)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bb34db1ea75a1edf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:32.520341Z", + "start_time": "2024-04-04T10:08:30.353678Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bb34db1ea75a1edf", + "outputId": "26af1f05-f9c1-4849-88e1-dc5c18cd3884" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: cryptography\n", + "Version: 42.0.5\n", + "Summary: cryptography is a package which provides cryptographic recipes and primitives to Python developers.\n", + "Home-page: \n", + "Author: \n", + "Author-email: The Python Cryptographic Authority and individual contributors \n", + "License: Apache-2.0 OR BSD-3-Clause\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: cffi\n", + "Required-by: pyOpenSSL\n" + ] + } + ], + "source": [ + "%pip show cryptography" + ] + }, + { + "cell_type": "markdown", + "id": "297746c807e95fbf", + "metadata": { + "collapsed": false, + "id": "297746c807e95fbf" + }, + "source": [ + "* `pikepdf` is used to repair some PDFs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ebc8af0183532fc2", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:34.665865Z", + "start_time": "2024-04-04T10:08:32.522020Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ebc8af0183532fc2", + "outputId": "7966230e-ec57-4b64-acf2-09d013bb2608" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: pikepdf\n", + "Version: 8.13.0\n", + "Summary: Read and write PDFs with Python, powered by qpdf\n", + "Home-page: \n", + "Author: \n", + "Author-email: \"James R. Barlow\" \n", + "License: MPL-2.0\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: Deprecated, lxml, packaging, Pillow\n", + "Required-by: \n" + ] + } + ], + "source": [ + "%pip show pikepdf" + ] + }, + { + "cell_type": "markdown", + "id": "7c7a7f6b0db3719e", + "metadata": { + "collapsed": false, + "id": "7c7a7f6b0db3719e" + }, + "source": [ + "* `pypdf` with all features is needed because this DB consists of 100+ PDFs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "779f81e2ab00f73c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:37.436449Z", + "start_time": "2024-04-04T10:08:35.269255Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "779f81e2ab00f73c", + "outputId": "e1fcc840-7e3c-4d4a-9457-745be8db60c4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: pypdf\n", + "Version: 4.0.2\n", + "Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files\n", + "Home-page: \n", + "Author: \n", + "Author-email: Mathieu Fenniak \n", + "License: \n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: \n", + "Required-by: \n" + ] + } + ], + "source": [ + "%pip show \"pypdf\"" + ] + }, + { + "cell_type": "markdown", + "id": "A5l3rFo03NKq", + "metadata": { + "id": "A5l3rFo03NKq" + }, + "source": [ + "* `torch` is used for tensors, and GPU processing" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "de3f715519fda6c4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:08:39.729429Z", + "start_time": "2024-04-04T10:08:37.438498Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "de3f715519fda6c4", + "outputId": "9f328109-f568-411b-c78a-0c1be41090bb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: torch\n", + "Version: 2.2.1+cu121\n", + "Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n", + "Home-page: https://pytorch.org/\n", + "Author: PyTorch Team\n", + "Author-email: packages@pytorch.org\n", + "License: BSD-3\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: filelock, fsspec, jinja2, networkx, nvidia-cublas-cu12, nvidia-cuda-cupti-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-runtime-cu12, nvidia-cudnn-cu12, nvidia-cufft-cu12, nvidia-curand-cu12, nvidia-cusolver-cu12, nvidia-cusparse-cu12, nvidia-nccl-cu12, nvidia-nvtx-cu12, sympy, triton, typing-extensions\n", + "Required-by: fastai, sentence-transformers, torchaudio, torchdata, torchtext, torchvision\n" + ] + } + ], + "source": [ + "%pip show torch" + ] + }, + { + "cell_type": "markdown", + "id": "ZxyhRz6-3p-c", + "metadata": { + "id": "ZxyhRz6-3p-c" + }, + "source": [ + "* `faiss` is used in the CPU version as a general vector store library. The data is being serialzed with `LangChain`. FAISS CPU version uses AVX2. The GPU port has some implementation issues with disk persistance and merging." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "HARY_QMJvttI", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HARY_QMJvttI", + "outputId": "bb7f5758-135e-435d-bf26-9aa3a1417e81" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name: faiss-cpu\n", + "Version: 1.8.0\n", + "Summary: A library for efficient similarity search and clustering of dense vectors.\n", + "Home-page: \n", + "Author: \n", + "Author-email: Kota Yamaguchi \n", + "License: MIT License\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: numpy\n", + "Required-by: \n" + ] + } + ], + "source": [ + "%pip show faiss_cpu" + ] + }, + { + "cell_type": "markdown", + "id": "ce1350d2d6e3ed63", + "metadata": { + "collapsed": false, + "id": "ce1350d2d6e3ed63" + }, + "source": [ + "## Text extraction\n", + "\n", + "⬛ This doesn't need to get executed if you already have the `*.txt` files.\n", + "\n", + "\n", + "* Here the html and text data is extracted into txt\n", + "* The PDF and DOCX data is extracted into another txt file. This will be used for weighted data fusion later.\n", + "\n", + "* the texts are normalized:\n", + " * unicode normalization\n", + " * surrogate characters get replaced\n", + " * html gets converted to text\n", + " * pdfs get repaired\n", + " * docx files get read\n", + "\n", + "* exceptions get handled (UTF-16 issues, PDF reference errors)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "yrzYeheF40jt", + "metadata": { + "id": "yrzYeheF40jt" + }, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "\n", + "import unicodedata # to normalize text\n", + "import html2text # to convert html to text\n", + "from langchain.document_loaders import PyPDFLoader, Docx2txtLoader\n", + "import pikepdf # to repair PDFs\n", + "from pathlib import Path\n", + "from tqdm.notebook import tqdm\n", + "from concurrent.futures import ThreadPoolExecutor, as_completed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b557444b8b1d4839", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T09:25:39.388933Z", + "start_time": "2024-04-04T09:25:39.320902Z" + }, + "id": "b557444b8b1d4839" + }, + "outputs": [], + "source": [ + "def convert_html_to_text(html_blob: str) -> str:\n", + " \"\"\"\n", + " Converts a html blob into a string.\n", + " \"\"\"\n", + " h = html2text.HTML2Text()\n", + " h.mark_code = True\n", + " h.escape_snob = True\n", + " h.unicode_snob = True\n", + " # h.use_automatic_links = True\n", + " h.images_as_html = True\n", + " h.single_line_break = True\n", + " h.ignore_links = True\n", + " return h.handle(html_blob)\n", + "\n", + "def normalize_text(txt_blob: str) -> str:\n", + " \"\"\"\n", + " Normalize a text blob using NFKD normalization.\n", + " \"\"\"\n", + " return unicodedata.normalize(\"NFKD\", txt_blob)\n", + "\n", + "def repair_pdf(file_path: str) -> bool:\n", + " \"\"\"\n", + " Attempts to repair a PDF file using pikepdf.\n", + " \"\"\"\n", + " try:\n", + " with pikepdf.open(file_path, allow_overwriting_input=True) as pdf:\n", + " pdf.save(file_path)\n", + " return True\n", + " except pikepdf.PdfError as e:\n", + " print(f\"Failed to repair PDF {file_path}: {e}\")\n", + " return False\n", + "\n", + "def read_and_convert_file(file_path: str, is_html: bool, is_pdf: bool, is_docx: bool) -> str:\n", + " \"\"\"\n", + " Reads and converts a file from HTML, PDF, DOCX, or plain text to text.\n", + " :param file_path:\n", + " :param is_html:\n", + " :param is_pdf:\n", + " :param is_docx:\n", + " :return:\n", + " \"\"\"\n", + "\n", + " content = \"\"\n", + " if is_html:\n", + " try:\n", + " with open(file_path, 'r', encoding='utf-8') as file:\n", + " content = file.read()\n", + " return convert_html_to_text(content)\n", + " except Exception as e:\n", + " print(f\"Error reading {file_path}: {e}\")\n", + " return \"\"\n", + "\n", + " elif is_pdf:\n", + " try:\n", + " loader = PyPDFLoader(file_path)\n", + " # ... fixes \"Multiple definitions in dictionary at byte 0xb32 for key /ExtGState\" error\n", + " documents = loader.load()\n", + " content = \"\\n\".join(doc.page_content for doc in documents if hasattr(doc, 'page_content'))\n", + " except Exception as e:\n", + " print(f\"Error loading PDF {file_path}: {e}. Attempting to repair...\")\n", + " if repair_pdf(file_path):\n", + " try:\n", + " loader = PyPDFLoader(file_path)\n", + " documents = loader.load()\n", + " content = \"\\n\".join(doc.page_content for doc in documents if hasattr(doc, 'page_content'))\n", + " except Exception as e:\n", + " print(f\"Failed to process PDF {file_path} after repair: {e}\")\n", + " return \"\"\n", + " return normalize_text(content)\n", + "\n", + " elif is_docx:\n", + " try:\n", + " loader = Docx2txtLoader(file_path)\n", + " content = loader.load()\n", + " if isinstance(content, list):\n", + " content = \"\\n\".join(content)\n", + " except Exception as e:\n", + " print(f\"Error reading DOCX {file_path}: {e}\")\n", + " return \"\"\n", + " return normalize_text(content)\n", + "\n", + " else: # For plain text files\n", + " try:\n", + " with open(file_path, 'r', encoding='utf-8') as file:\n", + " return normalize_text(file.read())\n", + " except Exception as e:\n", + " print(f\"Error reading {file_path}: {e}\")\n", + " return \"\"\n", + "\n", + "def sanitize_text(text):\n", + " \"\"\"\n", + " Removes or replaces surrogate characters from a string.\n", + " \"\"\"\n", + " return text.encode('utf-8', 'replace').decode('utf-8')\n", + "\n", + "def append_to_output(data: str, is_pdf: bool, is_docx: bool, output_path: str):\n", + " \"\"\"\n", + " Appends sanitized data to an output file.\n", + " \"\"\"\n", + " sanitized_data = sanitize_text(data)\n", + " if is_pdf or is_docx:\n", + " output_path = str(Path(output_path).with_suffix('')) + \".documents.txt\"\n", + "\n", + " with open(output_path, \"a\", encoding='utf-8') as output_file:\n", + " output_file.write(sanitized_data)\n", + "\n", + "def process_file(file):\n", + " is_html = file.endswith('.html')\n", + " is_pdf = file.endswith('.pdf')\n", + " is_docx = file.endswith('.docx')\n", + "\n", + " file_content = read_and_convert_file(file, is_html, is_pdf, is_docx)\n", + " append_to_output(file_content, is_pdf, is_docx, output_path=output_path)\n", + "\n", + "def process_files_in_directory(directory: str):\n", + " txt_html_files = glob.glob(os.path.join(directory, \"*.txt\")) + glob.glob(os.path.join(directory, \"*.html\"))\n", + " pdf_docx_files = glob.glob(os.path.join(directory, \"img\", \"*.pdf\")) + glob.glob(os.path.join(directory, \"img\", \"*.docx\"))\n", + " all_files = txt_html_files + pdf_docx_files\n", + "\n", + " # Initialize the progress bar\n", + " pbar = tqdm(total=len(all_files), desc=\"Processing files\")\n", + "\n", + " with ThreadPoolExecutor(max_workers=3) as executor:\n", + " # Submit all files to the executor and store future objects\n", + " futures = [executor.submit(process_file, file) for file in all_files]\n", + "\n", + " # As tasks complete, update the progress bar\n", + " for future in as_completed(futures):\n", + " pbar.update(1) # Update the progress bar by one for each task completed\n", + "\n", + " # Ensure the progress bar is closed upon completion\n", + " pbar.close()\n", + "\n", + "process_files_in_directory(extracted_evernote_db)" + ] + }, + { + "cell_type": "markdown", + "id": "e1bcc07f980c865f", + "metadata": { + "collapsed": false, + "id": "e1bcc07f980c865f" + }, + "source": [ + "# Chunking of the texts\n", + "\n", + "The texts need to get chunked (pre-processing) before the embedding process. We are processing text for the sake of similarity detection. Therefore we can use overlaps. For log-processing and detection engineering, overlaps would be counter-productive." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "de8d9f18d8342c57", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:28:01.724622Z", + "start_time": "2024-04-05T11:27:36.210305Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "de8d9f18d8342c57", + "outputId": "31fd5ade-1af8-4592-8d18-aa2b8d635c44" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Now you have 723845 chunks in /home/marius/source/bookworm/export.txt\n", + "Now you have 151259 chunks in /home/marius/source/bookworm/export.documents.txt\n" + ] + } + ], + "source": [ + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "\n", + "def chunk_text_data(txt_file=output_path_extracted_notes):\n", + "\n", + " with open(txt_file) as f:\n", + " text_notes = f.read()\n", + "\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=100,\n", + " chunk_overlap=20,\n", + " length_function=len\n", + " )\n", + "\n", + " chunks = text_splitter.create_documents([text_notes])\n", + " print(f'Now you have {len(chunks)} chunks in {txt_file}')\n", + " return chunks\n", + "\n", + "# chunk individual text file containing the data\n", + "text_chunks = chunk_text_data(txt_file=output_path_extracted_notes)\n", + "doc_chunks = chunk_text_data(txt_file=output_path_extracted_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "5c8dc13955c19d29", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:28:29.590616Z", + "start_time": "2024-04-05T11:28:29.586268Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'fields. Primitive fields are those finite fields in which the exponent n is 1. In primitive fields'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text_chunks[42].page_content" + ] + }, + { + "cell_type": "markdown", + "id": "aea7ceb111fed5f3", + "metadata": { + "collapsed": false, + "id": "aea7ceb111fed5f3" + }, + "source": [ + "### Embedding costs - why no OpenAI?\n", + "\n", + "The OpenAI API has a cost for the embeddings.\n", + "At this point there seems to be no way to pre-estimate the costs reliably.\n", + "The following calculation is probably flawed. But if it's correct, I wish the OpenAPI team the best of luck with finding a new pricing model." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "afb2c8feb9ca0bb4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "afb2c8feb9ca0bb4", + "outputId": "4bf4a714-b59e-4835-f3ac-3110b329b7b4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Tokens: 15769414\n", + "Embedding Cost in USD: 473.08241999999996\n" + ] + } + ], + "source": [ + "def print_embedding_cost(texts):\n", + " import tiktoken\n", + " enc = tiktoken.encoding_for_model('gpt-4')\n", + " total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])\n", + " print(f'Total Tokens: {total_tokens}')\n", + " print(f'Embedding Cost in USD: { (0.03 / 1_000) * total_tokens}')\n", + "\n", + "print_embedding_cost(text_chunks)" + ] + }, + { + "cell_type": "markdown", + "id": "8012516604037e2f", + "metadata": { + "collapsed": false, + "id": "8012516604037e2f" + }, + "source": [ + "# Use Hugging Face Embeddings Sentence Transformers\n", + "\n", + "Here we:\n", + "\n", + "* use a self-hosted on-premises model for the embedding and vectorization\n", + "* configure it for the use with the CPU or GPU\n", + "\n", + "This model is from the Beijing Academy of Artificial Intelligence\n", + "* https://huggingface.co/BAAI/bge-large-en-v1.5\n", + "* It uses: https://huggingface.co/docs/transformers/model_doc/auto\n", + "\n", + "It will produce embeddings of 1024 dimensions, roughly 500 less than OpenAI Embeddings I wanted to use initially." + ] + }, + { + "cell_type": "markdown", + "id": "LJIwSxNf5sm7", + "metadata": { + "id": "LJIwSxNf5sm7" + }, + "source": [ + "## GPU detection (CUDA)\n", + "\n", + "Here we detect whether a GPU is present, and if that is the case, we initialize the model to use it later. If not, we can use the CPU as a fallback. But for this use-case / implementation the Nvidia V100 GPU is about 60x faster (estimation)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3081256c9cf22780", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:16:40.738173Z", + "start_time": "2024-04-05T11:16:39.380789Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3081256c9cf22780", + "outputId": "9b120f43-f2cd-47c2-b763-e089aca15ee2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No CUDA available\n" + ] + } + ], + "source": [ + "import torch\n", + "use_cuda = torch.cuda.is_available()\n", + "\n", + "USE_GPU=True\n", + "\n", + "if use_cuda:\n", + " print('__CUDNN VERSION:', torch.backends.cudnn.version())\n", + " print('__Number CUDA Devices:', torch.cuda.device_count())\n", + " print('__CUDA Device Name:',torch.cuda.get_device_name(0))\n", + " print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)\n", + " USE_GPU=True\n", + " print(\"GPU enabled\")\n", + "\n", + "if not use_cuda:\n", + " print('No CUDA available')\n", + " USE_GPU=False\n" + ] + }, + { + "cell_type": "markdown", + "id": "GY_nYdSO6JTc", + "metadata": { + "id": "GY_nYdSO6JTc" + }, + "source": [ + "## BAAI BERT Model\n", + "\n", + "The Beijing Academy of Artificial Intelligence (BAAI) is a leading organization, which provides state of the art models on HuggingFace. Here the model is being used to create the Embeddings. An Embedding here isn't a plain Word2Vec style projection of text to a vector space. It has a semantic integration. I still have to research the details.\n", + "\n", + "Bidirectional Encoder Representations from Transformers (BERT) is a language model based on the transformer architecture, notable for its dramatic improvement over previous state of the art models. It was introduced in October 2018 by researchers at Google." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c1ca979bbc1610bb", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:16:41.041Z", + "start_time": "2024-04-05T11:16:41.037371Z" + }, + "id": "c1ca979bbc1610bb" + }, + "outputs": [], + "source": [ + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "\n", + "# pre-trained model path\n", + "modelPath = \"BAAI/bge-large-en-v1.5\"\n", + "\n", + "# Create a dictionary with model configuration options, specifying to use the CPU or GPU for computations\n", + "if not USE_GPU:\n", + " model_kwargs = {'device':'cpu'}\n", + "else:\n", + " model_kwargs = {}\n", + "\n", + "# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to True\n", + "encode_kwargs = {'normalize_embeddings': True}" + ] + }, + { + "cell_type": "markdown", + "id": "JSTLqLQj6ref", + "metadata": { + "id": "JSTLqLQj6ref" + }, + "source": [ + "### Initialization of the Embedding model" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3c2b9cd67f161714", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:16:47.504267Z", + "start_time": "2024-04-05T11:16:44.983827Z" + }, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 496, + "referenced_widgets": [ + "a1b39dadf1fd474296d47c00498b1d97", + "ce3cebc4664b4b4094f965e3d98b1ec3", + "cbd1ebc865344b2cbe09aaad9341f447", + "1fe3e7dd24a143b38fcc1f16048fce75", + "26fcfd7ef3784d02bed4f621377600b0", + "40346f9f5293495ea35a8b6a2234e8e5", + "32675b12f59c4b04ae03d4246c67145c", + "8ae260f36b444619be6f189b02dc54a4", + "e70eeeff503c4f2a83757cb0c202e7d0", + "5afae079bdb047a9adbe1352ca91899c", + "defeecb0c9034ef0835df87438c046e2", + "1b1bb145deac4bcbb720559e5a9f4cde", + "648ee76f36564501b7ae9a636aae4dde", + "3fa0a57165694311b6d5ad69f8e605de", + "96d9e6f90f974eaf9eb3b5a9d7bad983", + "9a19f5e7f06844ed9b6413aef416d180", + "249660f622c54831991237232196911b", + "a3fb4f16aea4427fa1218b61bd041d43", + "4c856a1624d54fd29b6abae9a395510c", + "c23cc4d3c457408eb352ab92dfbb86e0", + "c3d90bfa52bc40b3bf1d8dee186c48f9", + "072ba7cad09e4470bb04a44140eedb2c", + "2f3da07419074e548051002fecd36ce6", + "468f208672ab48ce83912d09913c18cc", + "308c05cc72f847588befc8c68696d752", + "fae60f6d7ecf4745b9e07b55f353036d", + "e75bdc1627624c878fde0f80ef9b71c5", + "fd4f5a1546e84d3f9741bb63a381b48f", + "b51b7655fb7546b2a4b61edc796af418", + "c1827bb2ba9047d3bae8a7bfa6702748", + "45d145a52f844606aed707cbb01e473f", + "c6d9dd3a6db445488bb8ad80e2e0554a", + "299413fffd184e28b7d8d03c741778cc", + "00473551f93a45fe8e8337c15d677848", + "8bf13bfe911c43798e87e8bf9e49047a", + "c7f64c8420074c469024b1b89ff0c114", + "94b6362a788b4c15ac67cc41e9f1b4ce", + "576c65f676f941bcb20c804191b1e63a", + "1388935349ac4673935f2521ed7d78d8", + "5c7f10c5efd14f29b76f91bdf8b13e11", + "5618a45a62f74f16899408521f6712b7", + "a205f9f2ec5543b7a50fd64d50fb53e8", + "d3a4973906bf490b87ac6b5905448b28", + "caebc821405542099a5e500f505d1169", + "9b7b90e2713f4f488a6921f89d96828c", + "2cf150ada6ca43449becf536e8444a23", + "cf604be4e8304922be58e20ee19ac70b", + "d201dbcd946d4173bb976a65bc24613b", + "b331d2862b3049eea1df4fb8b20f7927", + "c098aa33bf03498fa9a1762e88a82a93", + "964f3f6cda9a4815803d9c0e369ae64e", + "b9b38bfc63714441af3f22975eabed51", + "dfd40fb8dfe244b086f92f4299e11447", + "597f9a848328465eb48b5636039979ee", + "be67aea3e0b049e6b79f850f4082f449", + "612864d19f8a45618c574a6e9d90c0a7", + "4f820c0fbe5b4dba9186d726b54031ee", + "84eeb2a99f044c36bdf9428c62bdfee1", + "882898f5c0984ccca013458ac9246583", + "8236c6d4505c49869532d47e3c4bf9b2", + "47cb514ac09145148d657d1f43bd3343", + "77de5c5fe519498499d0703ad3d77523", + "915bf489198a4f518dcedc3a778b94cb", + "91a68937b987480c903f3ad73a35c30a", + "9981b06205f44f959182c06584909d46", + "12cb6ef492044740b8c0b48077d257de", + "9840d5d3dfc0421da994b1a48fc57690", + "0e9f877384c345bea8eddee0c2f896e4", + "141b953601f842f9a315cc254fff3925", + "012be145a1444889bfa30fae7812d62b", + "61cf60219cb94ae1a3413d27d2e5ed13", + "62c9641e6acd41ff915f0a86964560b3", + "ad2690ad145344e8a5744b400a2bb464", + "37b240a8a4c24e59bfc0b3f76e30b383", + "088a6b94e38247ad9f0d91d80202899f", + "d4438346655e45b0a029f2b99d3f02f9", + "5954d40f2cb1445a9ab1d4c814526f10", + "53b2f3605ae14ca9bb5e4fde8649f42b", + "f3561988bf9a438baab1e2c127d26b2a", + "af87303a2b084d128d4a5999c090ccf8", + "ae4c874164944325b74c7ac358bda6e6", + "46700f4115ff4084b740b10d7d6a9e93", + "119af24ab9b944de992ea90594e307a2", + "7a1e84942d694934ae4755034ce41d0c", + "149fe85c380e4cf79f3e511390243364", + "e20c0f77173f49468143522458560d4f", + "1441564670da4feaa7aec4be2e9dbf19", + "08d738c646b640a1a558d653a7c4f538", + "183f7840788b4409b954c244b02f94de", + "8b00ac065fc14f7da45586a43bf0226f", + "0e906e5ef8634475bb6dc19c484f2681", + "4511843fbda84081a0370376724082be", + "dce5f8fa907a40e1a96139028fd4466d", + "d56feb190ba54183a61baf6ffee1c74e", + "35b47fb99d604702b8da3b5f837c82ce", + "ed0871a86c2d4522bc9bca285be50677", + "30ead611258e4f7d971ac080e471c011", + "8b6525e5e421440c97b4d43146b5467c", + "00f1718ef79a405eb83b4190b80bc95d", + "5e70bb5e8d654635a510c83035366c34", + "cf86f986d20d41d4ae177a4a1c05cc21", + "3ea1b21763d044ffba9700a22b190beb", + "8dadf6a0f40e41d697a603d7ea746547", + "6d2cd8eb606c48df96f0680d161c753e", + "f1822ee028aa4920b224e0cd3b9ccc49", + "7268238e5b104504a2c1cd421973c8af", + "93aee68294744f4b9d6edc4db040b25a", + "be4f813c7272420cbb54a2e1b28be012", + "a54a03a4af42473f80d8808a5836654f", + "18ab9ee89f674b26bb23f620f8c217a6", + "61d05569258c45b88b6c59f73eff9ae1", + "1cceb8bd541d469ca7ffb02652201c9c", + "ad0a3c78287f443b93c185880652f14a", + "a059cdcef9d04fbeb4293185821c3243", + "adec61d016b1479481df33be3a74231a", + "edfb1273272a4625b9d10dba9c93af73", + "b2b7a587d64143439cfacdec2d1b9889", + "ce7c6faf4d884d60800a99a18ae4949b", + "42b2487a3e1e43b48083b6426aaaca81", + "120b025826cd4d288ae8715d6c53e830", + "ca452ea7819a4a6f90f70fe41454facb" + ] + }, + "id": "3c2b9cd67f161714", + "outputId": "4c3c646e-6b4a-4ec3-e975-930d445c2144" + }, + "outputs": [], + "source": [ + "# Initialize an instance of HuggingFaceEmbeddings with the specified parameters\n", + "# this model requires sentence_transformers\n", + "\n", + "embeddings = HuggingFaceEmbeddings(\n", + " model_name=modelPath, # Provide the pre-trained model's path\n", + " model_kwargs=model_kwargs, # Pass the model configuration options\n", + " encode_kwargs=encode_kwargs # Pass the encoding options\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3b9ff8cad49442cf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:16:56.483091Z", + "start_time": "2024-04-05T11:16:56.459721Z" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3b9ff8cad49442cf", + "outputId": "b289b59e-550c-4a81-f9c3-ef38b98e9761" + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'text_chunks' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m vector \u001b[38;5;241m=\u001b[39m embeddings\u001b[38;5;241m.\u001b[39membed_query(\u001b[43mtext_chunks\u001b[49m[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mpage_content)\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# print(vector)\u001b[39;00m\n\u001b[1;32m 3\u001b[0m n_dimensions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(vector)\n", + "\u001b[0;31mNameError\u001b[0m: name 'text_chunks' is not defined" + ] + } + ], + "source": [ + "vector = embeddings.embed_query(text_chunks[0].page_content)\n", + "# print(vector)\n", + "n_dimensions = len(vector)\n", + "print(n_dimensions, \" dimensions are going to be used\")" + ] + }, + { + "cell_type": "markdown", + "id": "e6ev40az7JZY", + "metadata": { + "id": "e6ev40az7JZY" + }, + "source": [ + "This means that per line of the txt, this model creates 1024 dimensions (per vector)." + ] + }, + { + "cell_type": "markdown", + "id": "b347fb5ee68daf60", + "metadata": { + "collapsed": false, + "id": "b347fb5ee68daf60" + }, + "source": [ + "## Batch process the embedding\n", + "\n", + "Many data-science tasks require to split a larger processing operation into batch jobs.\n", + "Like in the good old Mainframe days.\n", + "\n", + "Initially I wanted to use the basic vector DB sqlite-vss again: https://github.com/asg017/sqlite-vss\n", + "\n", + "This is based on FAISS as well, but sqlite-vss doesn't seem to be able to handle concurrency. Recent sqlite versions can.\n", + "\n", + "### FAISS\n", + "\n", + "https://faiss.ai/ - a library for efficient similarity search and clustering of dense vectors.\n", + "\n", + "### Concurrency and batch processing\n", + "\n", + "We add vectors of 1024 dimensions per chunk (sentence, line break delimited) to a vector store based on FAISS and LangChain.\n", + "The processing is done in batches of 50 chunks, using 3 threads in parallel." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "b03bfcb6c666db1", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:10:08.134514Z", + "start_time": "2024-04-04T10:10:07.895943Z" + }, + "id": "b03bfcb6c666db1" + }, + "outputs": [], + "source": [ + "from concurrent.futures import ThreadPoolExecutor, as_completed\n", + "import os\n", + "\n", + "from tqdm.notebook import tqdm\n", + "from typing import List\n", + "from langchain.schema.document import Document\n", + "\n", + "from langchain_community.vectorstores import FAISS" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e6ffc345c26298ad", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-04T10:32:48.905517Z", + "start_time": "2024-04-04T10:30:48.115043Z" + }, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67, + "referenced_widgets": [ + "592f37baf1c74e149577e80678db668f", + "2cdb27f1d7b14b558cf6f19fc0ab4fd9", + "ce8eed52d57c47479ab9a45b85296c04", + "a18c165ea7fc485c91e64df34974d685", + "090543e0523a4d0e8dbd89e0152a3a15", + "0fbb5a8dd8c64e6c862779496a0c1867", + "8c77c7def1804fd6884a601c76618fa7", + "4f06cc3b83e641cd81deba9aaea93fbb", + "b0e999f6c752439a8f4ba962815160ae", + "a18f27c970524c048b424be9672e106f", + "f6b2c8e5621143729c8d6e3129251f29" + ] + }, + "id": "e6ffc345c26298ad", + "outputId": "3d0276e2-aa07-4c66-c0a9-b20effd2cca5" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "592f37baf1c74e149577e80678db668f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Processing batches: 0%| | 0/1448 [00:00 None:\n", + " \"\"\"\n", + " Using type hints is a good idea here, because error messages get swallowed by the ThreadPoolExecutor.\n", + " The exception handling serves the same purpose.\n", + " Exceptions can cost performance, but only on the CPU level here.\n", + " \"\"\"\n", + "\n", + " try:\n", + " db = FAISS.from_documents(batch, embeddings, distance_strategy=\"COSINE\")\n", + " return db\n", + "\n", + " except Exception as e:\n", + " print(f\"Exception occurred in add_texts_in_batches: {e}\")\n", + "\n", + "\n", + "def divide_chunks(chunks, n):\n", + " \"\"\"\n", + " Divide and conquer :)\n", + " \"\"\"\n", + " for i in range(0, len(chunks), n):\n", + " yield chunks[i:i + n]\n", + "\n", + "\n", + "def vectorize_data_in_batches(chunks, embeddings):\n", + " \"\"\"\n", + " This function orchestrates the embedding vectorization in batches.\n", + " \"\"\"\n", + "\n", + " num_workers = 3\n", + " batch_size = 500 # Adjust based on your needs and memory constraints\n", + "\n", + " batches = list(divide_chunks(chunks, batch_size))\n", + " faiss_db = None\n", + "\n", + " with ThreadPoolExecutor(max_workers=num_workers) as executor:\n", + " # Submit all the batches for processing\n", + " futures = {executor.submit(add_texts_in_batches, batch, embeddings=embeddings): batch for batch in batches}\n", + "\n", + " # Setup the tqdm progress bar\n", + " progress_bar = tqdm(total=len(futures), desc=\"Processing batches\")\n", + "\n", + " for future in as_completed(futures):\n", + " # Each time a future completes, update the progress and collect the result\n", + " progress_bar.update(1)\n", + " try:\n", + " db_result = future.result() # This is where you get the returned value from add_texts_in_batches\n", + " if faiss_db is not None:\n", + " faiss_db.merge_from(db_result)\n", + "\n", + " else:\n", + " faiss_db = db_result\n", + "\n", + " except Exception as e:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + " progress_bar.close() # Ensure the progress bar is closed at the end\n", + "\n", + " faiss_db.save_local(\"faiss_index_cosine\")\n", + " print(\"All texts have been added to the database.\")\n", + "\n", + "\n", + "vectorize_data_in_batches(chunks=text_chunks, embeddings=embeddings)" + ] + }, + { + "cell_type": "markdown", + "id": "WfjpAoJqE_L4", + "metadata": { + "id": "WfjpAoJqE_L4" + }, + "source": [ + "# Similarity and MMR search\n", + "\n", + "* this works on the FAISS index without a GPU\n", + "* you can retrieve the data from Kaggle: https://www.kaggle.com/mariusciepluch/faiss-text-db-infosec-archive\n", + "* the data is a FAISS index with cosine similarity\n", + "* you can use this FAISS index with MMR search\n", + "\n", + "\n", + "\"Maximal Marginal Relevance a.k.a. MMR has been introduced in the paper The Use of MMR, Diversity-Based Reranking for Reordering Documents and Producing Summaries. MMR tries to reduce the redundancy of results while at the same time maintaining query relevance of results for already ranked documents/phrases etc.\" (https://medium.com/tech-that-works/maximal-marginal-relevance-to-rerank-results-in-unsupervised-keyphrase-extraction-22d95015c7c5)\n", + "\n", + "MMR search provides better results here." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "96e60ed06157e62d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:02:12.762744Z", + "start_time": "2024-04-05T11:02:12.759789Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from langchain_community.vectorstores import FAISS" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9ea3f5a0d14cada", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:11:46.382509Z", + "start_time": "2024-04-05T11:10:10.900581Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading faiss-text-db-infosec-archive.zip to /home/marius/source/bookworm\r\n", + "100%|██████████████████████████████████████| 2.59G/2.59G [01:34<00:00, 35.9MB/s]\r\n", + "100%|██████████████████████████████████████| 2.59G/2.59G [01:34<00:00, 29.4MB/s]\r\n" + ] + } + ], + "source": [ + "# DL FAISS index via API command (API key required afaik)\n", + "!/home/marius/miniconda3/envs/llm_langchain/bin/kaggle datasets download -d mariusciepluch/faiss-text-db-infosec-archive" + ] + }, + { + "cell_type": "code", + "id": "43458ad9399324dd", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Unzip the downloaded file\n", + "!7z x faiss-text-db-infosec-archive.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "nJG7eD8eFBnV", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:17:16.569043Z", + "start_time": "2024-04-05T11:17:06.997334Z" + }, + "id": "nJG7eD8eFBnV" + }, + "outputs": [], + "source": [ + "loaded_db = FAISS.load_local(\"faiss_index_cosine\", embeddings, distance_strategy=\"COSINE\", allow_dangerous_deserialization=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "1e9cb08cdf9cc837", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:33:25.027401Z", + "start_time": "2024-04-05T11:33:24.742258Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content=\"`exploit'\"), Document(page_content='* **Exploit** : An exploit is a software or procedure that uses a vulnerability to effect some'), Document(page_content='exploit techniques disclosed?'), Document(page_content='Exploit-Entwicklung.')]\n" + ] + } + ], + "source": [ + "results = loaded_db.search(search_type=\"mmr\", query=\"What is an exploit?\")\n", + "print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "Wwbebp6wF9fG", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:34:00.277813Z", + "start_time": "2024-04-05T11:33:59.955815Z" + }, + "id": "Wwbebp6wF9fG" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(Document(page_content=\"`exploit'\"), 0.24956527), (Document(page_content=\"`exploit'\"), 0.24956527), (Document(page_content=\"`exploit'\"), 0.24956527), (Document(page_content=\"`exploit'\"), 0.24956527)]\n" + ] + } + ], + "source": [ + "results_with_scores = loaded_db.similarity_search_with_score(\"What is an exploit?\")\n", + "print(results_with_scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "f4f39812d0cebde4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:31:58.672502Z", + "start_time": "2024-04-05T11:31:58.284632Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content=\"`exploit'\"), Document(page_content='* **Exploit** : An exploit is a software or procedure that uses a vulnerability to effect some'), Document(page_content='exploit techniques disclosed?'), Document(page_content='Exploit-Entwicklung.')]\n" + ] + } + ], + "source": [ + "retriever = loaded_db.as_retriever( search_type=\"mmr\",)\n", + "docs = retriever.invoke(\"What is an exploit?\")\n", + "print(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a650e47f9e73351f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:40:04.650321Z", + "start_time": "2024-04-05T11:40:00.463436Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content='### Exploit Development'), Document(page_content='Reverse engineering, specific to computer science, is the act of deriving a'), Document(page_content='This article is about the reverse engineering of the exploit found in the leak'), Document(page_content='the Software Exploitation challenges and I designed all the Reverse\\nEngineering challenges.'), Document(page_content='understand the thought process behind reverse engineering modern malware of'), Document(page_content='# Gdbinit | Reverse Engineering Mac OS X\\n**Created:**| _1/3/2012 4:12:17 PM_ \\n---|---'), Document(page_content='series. First thing said: “what is an exploit?”. It could be described as a'), Document(page_content='How might an attacker benefit from capturing or modifying the data?'), Document(page_content='to do Reverse Engineering and exploit development, yet some of it is required'), Document(page_content='hacking, crack, hack, unlock, unprotect, break, reverse engineer, recover,'), Document(page_content='#### Putting it all together\\n\\nThe exploitation process is:'), Document(page_content='Place you reverse engineering questions for Linux related topics here... Tux'), Document(page_content='level bit hacks**. Bit hacks are ingenious little programming tricks that'), Document(page_content='was found and exploited?'), Document(page_content='**First things first how the hell does the dumping of Windows hashes actually'), Document(page_content='1. Using SDbgExt to aid your debugging and reverse engineering efforts \\\\(part 1\\\\). SDbgExt is the'), Document(page_content='hardware hackers, reverse-engineers and exploit developers.'), Document(page_content='\\\\[forum\\\\]reverse-engineering.net \\nReverse Engineering the World \\nReversing for Newbies'), Document(page_content='as anti-debugs, exception triggers, Get IPs...it uses several tricks to be'), Document(page_content='to understand more of the exploit process and more of how Internet Explorer\\nworked.'), Document(page_content='is exploited by hackers. Indeed, no open-software initiative helps explicitly with understanding'), Document(page_content='discuss the sandbox implementation itself, how it works, and also provide some'), Document(page_content='classic tools in order to debug the exploit. Furthermore, your structure'), Document(page_content='* **Exploit** : An exploit is a software or procedure that uses a vulnerability to effect some'), Document(page_content='# JAVA Exploit Kit Malware \\\\#1 | inREVERSE\\n**Created:**| _1/7/2010 1:29:44 PM_ \\n---|---'), Document(page_content='* Reverse Engineering Automation\\n * Binary Exploitation Techniques'), Document(page_content='exploit, because if you create the exploit yourself you should know exactly'), Document(page_content='The question is : How do exploit writers build their exploits ? What does the'), Document(page_content='using Metasploit’s pattern\\\\_create.rb tool to help us pinpoint the exact part'), Document(page_content='Description: A PowerShell Post-Exploitation Framework used in many PowerShell\\nattack tools.'), Document(page_content='and how they are exploited. Detailed technical information on how to exploit'), Document(page_content='gain a reverse shell. For the SQLi attack there are few basic steps :'), Document(page_content='* Struct Builder: Tool commonly used in game hacking to reverse data structures. This tool is'), Document(page_content='A Few Thoughts on Cryptographic Engineering: How does the NSA break SSL?\\nCreated:'), Document(page_content='**Answer:** Backdoors are tools used by attackers to help them maintain access'), Document(page_content='exploitation process. On the other hand, the “E” attribute has been removed'), Document(page_content='understand SEH in the context of exploit writing. I encourage you to read up'), Document(page_content='Exploit-DB. \\n \\nThe exploitation process for this vulnerability will examine overwritng EIP'), Document(page_content='boot process, the following files of a Windows 10 1607 build have been reverse-engineered:'), Document(page_content='process explaining some of the basics of exploiting. The whole topic can be'), Document(page_content='exploits. Low integrity processes are used for processing and handling of'), Document(page_content=\"I wanted to hack something in javascript to see how it's like to build\\nprototypes with it.\"), Document(page_content='The return-into-library technique is the root on which all return-oriented exploit approaches are'), Document(page_content='22. Down the Rabbit Hole \\nSummary of the Rogue File/Process'), Document(page_content='talent you have after reverse-engineering your star-exploit back in 2010'), Document(page_content='Labels: Exploitation, Reverse Engineering'), Document(page_content='What is the process for identifying and addressing vulnerabilities in the\\napplication?'), Document(page_content='But is this level of obfuscation where exploit countermeasures are headed? How'), Document(page_content='bug, they just say its a DoS. Then a really smart exploit developer comes along and says, \"Hey'), Document(page_content=\"iOS App Reverse Engineering is the world's 1st book of very detailed iOS App\")]\n" + ] + } + ], + "source": [ + "retriever = loaded_db.as_retriever( search_type=\"mmr\", search_kwargs={'k': 50, 'fetch_k': 5000})\n", + "docs = retriever.invoke(\"What is an exploit and what is the process of creating it? How does reverse engineering contribute to exploit development?\")\n", + "print(docs)" + ] + }, + { + "cell_type": "markdown", + "id": "b8ad09a6a2b98e12", + "metadata": { + "collapsed": false + }, + "source": [ + "# Use the FAISS index with Mistral" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "cfbc6c6bf2fd7caf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:41:25.823973Z", + "start_time": "2024-04-05T11:41:24.910564Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from langchain_community.llms import Ollama\n", + "from langchain.globals import set_llm_cache\n", + "from langchain.cache import InMemoryCache\n", + "\n", + "set_llm_cache(InMemoryCache())\n", + "\n", + "llm = Ollama(model=\"mistral\")" + ] + }, + { + "cell_type": "markdown", + "id": "f75b4231f798edec", + "metadata": { + "collapsed": false + }, + "source": [ + "## Pass MMR search results to Mistral\n", + "\n", + "* I am using Ollama and Mistral, self-hosted\n", + "* The Mistral model is a large language model, which can be used for text generation and QA\n", + "* The Mistral model is being used to generate a response to the MMR search results" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "56cc32c360a4bd3c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:44:35.703318Z", + "start_time": "2024-04-05T11:44:34.995829Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "\n", + "chain = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=retriever)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "2fec48eac2aa2531", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-05T11:49:52.931101Z", + "start_time": "2024-04-05T11:49:42.877730Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'query': 'What is an exploit and how does the process of reverse engineering contribute to exploit development?', 'result': \" An exploit is a software or procedure that uses a vulnerability to effect some unwanted or unintended action in a system. The process of reverse engineering contributes significantly to exploit development as it involves understanding the inner workings of a software or system, identifying vulnerabilities, and developing code (exploits) to take advantage of those vulnerabilities. Reverse engineering tools and techniques enable researchers and attackers to analyze software, disassemble code, and modify it to create exploits. Exploit development requires reverse engineering skills, custom shellcode payloads, and a deep understanding of the target system's vulnerabilities and exploitability.\"}\n" + ] + } + ], + "source": [ + "query = \"What is an exploit and how does the process of reverse engineering contribute to exploit development?\"\n", + "answer = chain.invoke(query)\n", + "print(answer)" + ] + }, + { + "cell_type": "markdown", + "id": "82gFVyrNCYOF", + "metadata": { + "id": "82gFVyrNCYOF" + }, + "source": [ + "# Sandbox code - test area" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "v6bhYHU5_9oo", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v6bhYHU5_9oo", + "outputId": "a88691e1-3ee4-4a34-edbf-4fac688dd78d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "from langchain_community.vectorstores import FAISS\n", + "\n", + "texts = [\"FAISS is an important library\", \"LangChain supports FAISS\"]\n", + "faiss = FAISS.from_texts(texts, embeddings, distance_strategy=\"COSINE\")\n", + "print(type(faiss))\n", + "\n", + "faiss.save_local(\"test\")\n", + "\n", + "new_db = FAISS.load_local(\"test\", embeddings, allow_dangerous_deserialization=True)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "V100", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00473551f93a45fe8e8337c15d677848": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8bf13bfe911c43798e87e8bf9e49047a", + "IPY_MODEL_c7f64c8420074c469024b1b89ff0c114", + "IPY_MODEL_94b6362a788b4c15ac67cc41e9f1b4ce" + ], + "layout": "IPY_MODEL_576c65f676f941bcb20c804191b1e63a" + } + }, + "00f1718ef79a405eb83b4190b80bc95d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "012be145a1444889bfa30fae7812d62b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d4438346655e45b0a029f2b99d3f02f9", + "placeholder": "​", + "style": "IPY_MODEL_5954d40f2cb1445a9ab1d4c814526f10", + "value": " 366/366 [00:00<00:00, 29.7kB/s]" + } + }, + "072ba7cad09e4470bb04a44140eedb2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "088a6b94e38247ad9f0d91d80202899f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "08d738c646b640a1a558d653a7c4f538": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "090543e0523a4d0e8dbd89e0152a3a15": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0e906e5ef8634475bb6dc19c484f2681": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ed0871a86c2d4522bc9bca285be50677", + "max": 711396, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_30ead611258e4f7d971ac080e471c011", + "value": 711396 + } + }, + "0e9f877384c345bea8eddee0c2f896e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_62c9641e6acd41ff915f0a86964560b3", + "placeholder": "​", + "style": "IPY_MODEL_ad2690ad145344e8a5744b400a2bb464", + "value": "tokenizer_config.json: 100%" + } + }, + "0fbb5a8dd8c64e6c862779496a0c1867": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "119af24ab9b944de992ea90594e307a2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "120b025826cd4d288ae8715d6c53e830": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "12cb6ef492044740b8c0b48077d257de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1388935349ac4673935f2521ed7d78d8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "141b953601f842f9a315cc254fff3925": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_37b240a8a4c24e59bfc0b3f76e30b383", + "max": 366, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_088a6b94e38247ad9f0d91d80202899f", + "value": 366 + } + }, + "1441564670da4feaa7aec4be2e9dbf19": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "149fe85c380e4cf79f3e511390243364": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "183f7840788b4409b954c244b02f94de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8b00ac065fc14f7da45586a43bf0226f", + "IPY_MODEL_0e906e5ef8634475bb6dc19c484f2681", + "IPY_MODEL_4511843fbda84081a0370376724082be" + ], + "layout": "IPY_MODEL_dce5f8fa907a40e1a96139028fd4466d" + } + }, + "18ab9ee89f674b26bb23f620f8c217a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1b1bb145deac4bcbb720559e5a9f4cde": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_648ee76f36564501b7ae9a636aae4dde", + "IPY_MODEL_3fa0a57165694311b6d5ad69f8e605de", + "IPY_MODEL_96d9e6f90f974eaf9eb3b5a9d7bad983" + ], + "layout": "IPY_MODEL_9a19f5e7f06844ed9b6413aef416d180" + } + }, + "1cceb8bd541d469ca7ffb02652201c9c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_edfb1273272a4625b9d10dba9c93af73", + "placeholder": "​", + "style": "IPY_MODEL_b2b7a587d64143439cfacdec2d1b9889", + "value": "1_Pooling/config.json: 100%" + } + }, + "1fe3e7dd24a143b38fcc1f16048fce75": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5afae079bdb047a9adbe1352ca91899c", + "placeholder": "​", + "style": "IPY_MODEL_defeecb0c9034ef0835df87438c046e2", + "value": " 349/349 [00:00<00:00, 25.8kB/s]" + } + }, + "249660f622c54831991237232196911b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "26fcfd7ef3784d02bed4f621377600b0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "299413fffd184e28b7d8d03c741778cc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2cdb27f1d7b14b558cf6f19fc0ab4fd9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0fbb5a8dd8c64e6c862779496a0c1867", + "placeholder": "​", + "style": "IPY_MODEL_8c77c7def1804fd6884a601c76618fa7", + "value": "Processing batches: 100%" + } + }, + "2cf150ada6ca43449becf536e8444a23": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c098aa33bf03498fa9a1762e88a82a93", + "placeholder": "​", + "style": "IPY_MODEL_964f3f6cda9a4815803d9c0e369ae64e", + "value": "config.json: 100%" + } + }, + "2f3da07419074e548051002fecd36ce6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_468f208672ab48ce83912d09913c18cc", + "IPY_MODEL_308c05cc72f847588befc8c68696d752", + "IPY_MODEL_fae60f6d7ecf4745b9e07b55f353036d" + ], + "layout": "IPY_MODEL_e75bdc1627624c878fde0f80ef9b71c5" + } + }, + "308c05cc72f847588befc8c68696d752": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c1827bb2ba9047d3bae8a7bfa6702748", + "max": 94607, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_45d145a52f844606aed707cbb01e473f", + "value": 94607 + } + }, + "30ead611258e4f7d971ac080e471c011": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "32675b12f59c4b04ae03d4246c67145c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "35b47fb99d604702b8da3b5f837c82ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "37b240a8a4c24e59bfc0b3f76e30b383": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3ea1b21763d044ffba9700a22b190beb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_93aee68294744f4b9d6edc4db040b25a", + "max": 125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_be4f813c7272420cbb54a2e1b28be012", + "value": 125 + } + }, + "3fa0a57165694311b6d5ad69f8e605de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4c856a1624d54fd29b6abae9a395510c", + "max": 124, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c23cc4d3c457408eb352ab92dfbb86e0", + "value": 124 + } + }, + "40346f9f5293495ea35a8b6a2234e8e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "42b2487a3e1e43b48083b6426aaaca81": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4511843fbda84081a0370376724082be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b6525e5e421440c97b4d43146b5467c", + "placeholder": "​", + "style": "IPY_MODEL_00f1718ef79a405eb83b4190b80bc95d", + "value": " 711k/711k [00:00<00:00, 2.17MB/s]" + } + }, + "45d145a52f844606aed707cbb01e473f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "46700f4115ff4084b740b10d7d6a9e93": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "468f208672ab48ce83912d09913c18cc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd4f5a1546e84d3f9741bb63a381b48f", + "placeholder": "​", + "style": "IPY_MODEL_b51b7655fb7546b2a4b61edc796af418", + "value": "README.md: 100%" + } + }, + "47cb514ac09145148d657d1f43bd3343": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c856a1624d54fd29b6abae9a395510c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f06cc3b83e641cd81deba9aaea93fbb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f820c0fbe5b4dba9186d726b54031ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_47cb514ac09145148d657d1f43bd3343", + "placeholder": "​", + "style": "IPY_MODEL_77de5c5fe519498499d0703ad3d77523", + "value": "model.safetensors: 100%" + } + }, + "53b2f3605ae14ca9bb5e4fde8649f42b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f3561988bf9a438baab1e2c127d26b2a", + "IPY_MODEL_af87303a2b084d128d4a5999c090ccf8", + "IPY_MODEL_ae4c874164944325b74c7ac358bda6e6" + ], + "layout": "IPY_MODEL_46700f4115ff4084b740b10d7d6a9e93" + } + }, + "5618a45a62f74f16899408521f6712b7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "576c65f676f941bcb20c804191b1e63a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "592f37baf1c74e149577e80678db668f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2cdb27f1d7b14b558cf6f19fc0ab4fd9", + "IPY_MODEL_ce8eed52d57c47479ab9a45b85296c04", + "IPY_MODEL_a18c165ea7fc485c91e64df34974d685" + ], + "layout": "IPY_MODEL_090543e0523a4d0e8dbd89e0152a3a15" + } + }, + "5954d40f2cb1445a9ab1d4c814526f10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "597f9a848328465eb48b5636039979ee": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5afae079bdb047a9adbe1352ca91899c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5c7f10c5efd14f29b76f91bdf8b13e11": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5e70bb5e8d654635a510c83035366c34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_cf86f986d20d41d4ae177a4a1c05cc21", + "IPY_MODEL_3ea1b21763d044ffba9700a22b190beb", + "IPY_MODEL_8dadf6a0f40e41d697a603d7ea746547" + ], + "layout": "IPY_MODEL_6d2cd8eb606c48df96f0680d161c753e" + } + }, + "612864d19f8a45618c574a6e9d90c0a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4f820c0fbe5b4dba9186d726b54031ee", + "IPY_MODEL_84eeb2a99f044c36bdf9428c62bdfee1", + "IPY_MODEL_882898f5c0984ccca013458ac9246583" + ], + "layout": "IPY_MODEL_8236c6d4505c49869532d47e3c4bf9b2" + } + }, + "61cf60219cb94ae1a3413d27d2e5ed13": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "61d05569258c45b88b6c59f73eff9ae1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1cceb8bd541d469ca7ffb02652201c9c", + "IPY_MODEL_ad0a3c78287f443b93c185880652f14a", + "IPY_MODEL_a059cdcef9d04fbeb4293185821c3243" + ], + "layout": "IPY_MODEL_adec61d016b1479481df33be3a74231a" + } + }, + "62c9641e6acd41ff915f0a86964560b3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "648ee76f36564501b7ae9a636aae4dde": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_249660f622c54831991237232196911b", + "placeholder": "​", + "style": "IPY_MODEL_a3fb4f16aea4427fa1218b61bd041d43", + "value": "config_sentence_transformers.json: 100%" + } + }, + "6d2cd8eb606c48df96f0680d161c753e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7268238e5b104504a2c1cd421973c8af": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "77de5c5fe519498499d0703ad3d77523": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7a1e84942d694934ae4755034ce41d0c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8236c6d4505c49869532d47e3c4bf9b2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "84eeb2a99f044c36bdf9428c62bdfee1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_915bf489198a4f518dcedc3a778b94cb", + "max": 1340616616, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_91a68937b987480c903f3ad73a35c30a", + "value": 1340616616 + } + }, + "882898f5c0984ccca013458ac9246583": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9981b06205f44f959182c06584909d46", + "placeholder": "​", + "style": "IPY_MODEL_12cb6ef492044740b8c0b48077d257de", + "value": " 1.34G/1.34G [00:07<00:00, 134MB/s]" + } + }, + "8ae260f36b444619be6f189b02dc54a4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b00ac065fc14f7da45586a43bf0226f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d56feb190ba54183a61baf6ffee1c74e", + "placeholder": "​", + "style": "IPY_MODEL_35b47fb99d604702b8da3b5f837c82ce", + "value": "tokenizer.json: 100%" + } + }, + "8b6525e5e421440c97b4d43146b5467c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8bf13bfe911c43798e87e8bf9e49047a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1388935349ac4673935f2521ed7d78d8", + "placeholder": "​", + "style": "IPY_MODEL_5c7f10c5efd14f29b76f91bdf8b13e11", + "value": "sentence_bert_config.json: 100%" + } + }, + "8c77c7def1804fd6884a601c76618fa7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8dadf6a0f40e41d697a603d7ea746547": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a54a03a4af42473f80d8808a5836654f", + "placeholder": "​", + "style": "IPY_MODEL_18ab9ee89f674b26bb23f620f8c217a6", + "value": " 125/125 [00:00<00:00, 9.90kB/s]" + } + }, + "915bf489198a4f518dcedc3a778b94cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "91a68937b987480c903f3ad73a35c30a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "93aee68294744f4b9d6edc4db040b25a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94b6362a788b4c15ac67cc41e9f1b4ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d3a4973906bf490b87ac6b5905448b28", + "placeholder": "​", + "style": "IPY_MODEL_caebc821405542099a5e500f505d1169", + "value": " 52.0/52.0 [00:00<00:00, 5.21kB/s]" + } + }, + "964f3f6cda9a4815803d9c0e369ae64e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "96d9e6f90f974eaf9eb3b5a9d7bad983": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3d90bfa52bc40b3bf1d8dee186c48f9", + "placeholder": "​", + "style": "IPY_MODEL_072ba7cad09e4470bb04a44140eedb2c", + "value": " 124/124 [00:00<00:00, 11.6kB/s]" + } + }, + "9840d5d3dfc0421da994b1a48fc57690": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0e9f877384c345bea8eddee0c2f896e4", + "IPY_MODEL_141b953601f842f9a315cc254fff3925", + "IPY_MODEL_012be145a1444889bfa30fae7812d62b" + ], + "layout": "IPY_MODEL_61cf60219cb94ae1a3413d27d2e5ed13" + } + }, + "9981b06205f44f959182c06584909d46": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a19f5e7f06844ed9b6413aef416d180": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9b7b90e2713f4f488a6921f89d96828c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2cf150ada6ca43449becf536e8444a23", + "IPY_MODEL_cf604be4e8304922be58e20ee19ac70b", + "IPY_MODEL_d201dbcd946d4173bb976a65bc24613b" + ], + "layout": "IPY_MODEL_b331d2862b3049eea1df4fb8b20f7927" + } + }, + "a059cdcef9d04fbeb4293185821c3243": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_120b025826cd4d288ae8715d6c53e830", + "placeholder": "​", + "style": "IPY_MODEL_ca452ea7819a4a6f90f70fe41454facb", + "value": " 191/191 [00:00<00:00, 17.4kB/s]" + } + }, + "a18c165ea7fc485c91e64df34974d685": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a18f27c970524c048b424be9672e106f", + "placeholder": "​", + "style": "IPY_MODEL_f6b2c8e5621143729c8d6e3129251f29", + "value": " 1448/1448 [30:16<00:00,  1.23it/s]" + } + }, + "a18f27c970524c048b424be9672e106f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a1b39dadf1fd474296d47c00498b1d97": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ce3cebc4664b4b4094f965e3d98b1ec3", + "IPY_MODEL_cbd1ebc865344b2cbe09aaad9341f447", + "IPY_MODEL_1fe3e7dd24a143b38fcc1f16048fce75" + ], + "layout": "IPY_MODEL_26fcfd7ef3784d02bed4f621377600b0" + } + }, + "a205f9f2ec5543b7a50fd64d50fb53e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a3fb4f16aea4427fa1218b61bd041d43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a54a03a4af42473f80d8808a5836654f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ad0a3c78287f443b93c185880652f14a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ce7c6faf4d884d60800a99a18ae4949b", + "max": 191, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_42b2487a3e1e43b48083b6426aaaca81", + "value": 191 + } + }, + "ad2690ad145344e8a5744b400a2bb464": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "adec61d016b1479481df33be3a74231a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae4c874164944325b74c7ac358bda6e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1441564670da4feaa7aec4be2e9dbf19", + "placeholder": "​", + "style": "IPY_MODEL_08d738c646b640a1a558d653a7c4f538", + "value": " 232k/232k [00:00<00:00, 1.41MB/s]" + } + }, + "af87303a2b084d128d4a5999c090ccf8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_149fe85c380e4cf79f3e511390243364", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e20c0f77173f49468143522458560d4f", + "value": 231508 + } + }, + "b0e999f6c752439a8f4ba962815160ae": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b2b7a587d64143439cfacdec2d1b9889": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b331d2862b3049eea1df4fb8b20f7927": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b51b7655fb7546b2a4b61edc796af418": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b9b38bfc63714441af3f22975eabed51": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "be4f813c7272420cbb54a2e1b28be012": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "be67aea3e0b049e6b79f850f4082f449": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c098aa33bf03498fa9a1762e88a82a93": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c1827bb2ba9047d3bae8a7bfa6702748": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c23cc4d3c457408eb352ab92dfbb86e0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c3d90bfa52bc40b3bf1d8dee186c48f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6d9dd3a6db445488bb8ad80e2e0554a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c7f64c8420074c469024b1b89ff0c114": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5618a45a62f74f16899408521f6712b7", + "max": 52, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a205f9f2ec5543b7a50fd64d50fb53e8", + "value": 52 + } + }, + "ca452ea7819a4a6f90f70fe41454facb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "caebc821405542099a5e500f505d1169": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cbd1ebc865344b2cbe09aaad9341f447": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ae260f36b444619be6f189b02dc54a4", + "max": 349, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e70eeeff503c4f2a83757cb0c202e7d0", + "value": 349 + } + }, + "ce3cebc4664b4b4094f965e3d98b1ec3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40346f9f5293495ea35a8b6a2234e8e5", + "placeholder": "​", + "style": "IPY_MODEL_32675b12f59c4b04ae03d4246c67145c", + "value": "modules.json: 100%" + } + }, + "ce7c6faf4d884d60800a99a18ae4949b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ce8eed52d57c47479ab9a45b85296c04": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f06cc3b83e641cd81deba9aaea93fbb", + "max": 1448, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b0e999f6c752439a8f4ba962815160ae", + "value": 1448 + } + }, + "cf604be4e8304922be58e20ee19ac70b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b9b38bfc63714441af3f22975eabed51", + "max": 779, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dfd40fb8dfe244b086f92f4299e11447", + "value": 779 + } + }, + "cf86f986d20d41d4ae177a4a1c05cc21": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f1822ee028aa4920b224e0cd3b9ccc49", + "placeholder": "​", + "style": "IPY_MODEL_7268238e5b104504a2c1cd421973c8af", + "value": "special_tokens_map.json: 100%" + } + }, + "d201dbcd946d4173bb976a65bc24613b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_597f9a848328465eb48b5636039979ee", + "placeholder": "​", + "style": "IPY_MODEL_be67aea3e0b049e6b79f850f4082f449", + "value": " 779/779 [00:00<00:00, 76.1kB/s]" + } + }, + "d3a4973906bf490b87ac6b5905448b28": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d4438346655e45b0a029f2b99d3f02f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d56feb190ba54183a61baf6ffee1c74e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dce5f8fa907a40e1a96139028fd4466d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "defeecb0c9034ef0835df87438c046e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dfd40fb8dfe244b086f92f4299e11447": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e20c0f77173f49468143522458560d4f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e70eeeff503c4f2a83757cb0c202e7d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e75bdc1627624c878fde0f80ef9b71c5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ed0871a86c2d4522bc9bca285be50677": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "edfb1273272a4625b9d10dba9c93af73": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f1822ee028aa4920b224e0cd3b9ccc49": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f3561988bf9a438baab1e2c127d26b2a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_119af24ab9b944de992ea90594e307a2", + "placeholder": "​", + "style": "IPY_MODEL_7a1e84942d694934ae4755034ce41d0c", + "value": "vocab.txt: 100%" + } + }, + "f6b2c8e5621143729c8d6e3129251f29": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fae60f6d7ecf4745b9e07b55f353036d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c6d9dd3a6db445488bb8ad80e2e0554a", + "placeholder": "​", + "style": "IPY_MODEL_299413fffd184e28b7d8d03c741778cc", + "value": " 94.6k/94.6k [00:00<00:00, 1.16MB/s]" + } + }, + "fd4f5a1546e84d3f9741bb63a381b48f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}