mirror of
https://github.com/norandom/project_bookworm.git
synced 2025-01-13 01:53:43 +00:00
Example Notebook with LLMware CPU LLM and LangChain
This commit is contained in:
parent
ad0d782a9c
commit
69dd45cf3c
255
Local_CPU_LLM_Bling_Non_Interactive.ipynb
Normal file
255
Local_CPU_LLM_Bling_Non_Interactive.ipynb
Normal file
@ -0,0 +1,255 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea47b0b7196331ed",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# Use a local CPU Large Language Model (LLM) to generate text\n",
|
||||
"\n",
|
||||
"This is a basic LLM, which \n",
|
||||
"\n",
|
||||
"* does not require a GPU\n",
|
||||
"* is not fine-tuned for a specific task\n",
|
||||
"* is not optimized for speed\n",
|
||||
"* is not optimized for memory usage\n",
|
||||
"* has a smaller model size\n",
|
||||
"* ...\n",
|
||||
"* is not as good as a GPU LLM\n",
|
||||
"* is not as good as a fine-tuned LLM\n",
|
||||
"* is not as good as a larger LLM\n",
|
||||
"* ...\n",
|
||||
"\n",
|
||||
"Its purpose is to allow on-premises and self-hosted use of LLMs. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-17T11:26:30.714741Z",
|
||||
"start_time": "2024-03-17T11:26:30.711615Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You need to manage the dependencies of LangChain with\n",
|
||||
"# the requirements.txt file. The versions are pinned.\n",
|
||||
"# %pip install -r requirements.txt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c96a287c1fc724d2",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Use the Hugging Face pipeline with LLMware Bling\n",
|
||||
"\n",
|
||||
"* The Hugging Face pipeline is a convenient way to use a pre-trained model.\n",
|
||||
"* LLMware Bling is a CPU LLM.\n",
|
||||
"* The config of this model is to allow remote code from Hugging Face."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2108b1c9373e0ec8",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-17T11:23:02.052134Z",
|
||||
"start_time": "2024-03-17T11:22:45.974223Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading file vocab.json from cache at None\n",
|
||||
"loading file merges.txt from cache at None\n",
|
||||
"loading file tokenizer.json from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/tokenizer.json\n",
|
||||
"loading file added_tokens.json from cache at None\n",
|
||||
"loading file special_tokens_map.json from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/special_tokens_map.json\n",
|
||||
"loading file tokenizer_config.json from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/tokenizer_config.json\n",
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
||||
"loading configuration file config.json from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/config.json\n",
|
||||
"loading configuration file config.json from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/config.json\n",
|
||||
"Model config StableLMEpochConfig {\n",
|
||||
" \"_name_or_path\": \"llmware/bling-stable-lm-3b-4e1t-v0\",\n",
|
||||
" \"architectures\": [\n",
|
||||
" \"StableLMEpochForCausalLM\"\n",
|
||||
" ],\n",
|
||||
" \"auto_map\": {\n",
|
||||
" \"AutoConfig\": \"llmware/bling-stable-lm-3b-4e1t-v0--configuration_stablelm_epoch.StableLMEpochConfig\",\n",
|
||||
" \"AutoModelForCausalLM\": \"llmware/bling-stable-lm-3b-4e1t-v0--modeling_stablelm_epoch.StableLMEpochForCausalLM\"\n",
|
||||
" },\n",
|
||||
" \"bos_token_id\": 0,\n",
|
||||
" \"eos_token_id\": 0,\n",
|
||||
" \"hidden_act\": \"silu\",\n",
|
||||
" \"hidden_size\": 2560,\n",
|
||||
" \"initializer_range\": 0.02,\n",
|
||||
" \"intermediate_size\": 6912,\n",
|
||||
" \"max_position_embeddings\": 4096,\n",
|
||||
" \"model_type\": \"stablelm_epoch\",\n",
|
||||
" \"norm_eps\": 1e-05,\n",
|
||||
" \"num_attention_heads\": 32,\n",
|
||||
" \"num_heads\": 32,\n",
|
||||
" \"num_hidden_layers\": 32,\n",
|
||||
" \"num_key_value_heads\": 32,\n",
|
||||
" \"rope_pct\": 0.25,\n",
|
||||
" \"rope_theta\": 10000,\n",
|
||||
" \"rotary_scaling_factor\": 1.0,\n",
|
||||
" \"tie_word_embeddings\": false,\n",
|
||||
" \"torch_dtype\": \"bfloat16\",\n",
|
||||
" \"transformers_version\": \"4.38.2\",\n",
|
||||
" \"use_cache\": true,\n",
|
||||
" \"vocab_size\": 50304\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"loading weights file pytorch_model.bin from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/pytorch_model.bin\n",
|
||||
"Generate config GenerationConfig {\n",
|
||||
" \"bos_token_id\": 0,\n",
|
||||
" \"eos_token_id\": 0\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"All model checkpoint weights were used when initializing StableLMEpochForCausalLM.\n",
|
||||
"\n",
|
||||
"All the weights of StableLMEpochForCausalLM were initialized from the model checkpoint at llmware/bling-stable-lm-3b-4e1t-v0.\n",
|
||||
"If your task is similar to the task the model of the checkpoint was trained on, you can already use StableLMEpochForCausalLM for predictions without further training.\n",
|
||||
"loading configuration file generation_config.json from cache at /home/marius/.cache/huggingface/hub/models--llmware--bling-stable-lm-3b-4e1t-v0/snapshots/a9e4d8d478d76dd062d9acd01b6ce3417217a344/generation_config.json\n",
|
||||
"Generate config GenerationConfig {\n",
|
||||
" \"bos_token_id\": 0,\n",
|
||||
" \"eos_token_id\": 0\n",
|
||||
"}\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
|
||||
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
|
||||
"\n",
|
||||
"model_id = \"llmware/bling-stable-lm-3b-4e1t-v0\"\n",
|
||||
"\n",
|
||||
"# Ensure the directory for saving models is created and specified in your environment\n",
|
||||
"# This is more about ensuring that the model download doesn't prompt for storage location or confirmation\n",
|
||||
"import os\n",
|
||||
"from transformers import logging\n",
|
||||
"\n",
|
||||
"# Optionally, increase logging level if you want to see more details about the download process\n",
|
||||
"logging.set_verbosity_info()\n",
|
||||
"\n",
|
||||
"# Make sure you have set TRANSFORMERS_CACHE in your environment variables\n",
|
||||
"# os.environ[\"TRANSFORMERS_CACHE\"] = \"/path/to/your/preferred/cache/directory\"\n",
|
||||
"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
|
||||
"model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)\n",
|
||||
"\n",
|
||||
"pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=500)\n",
|
||||
"hf = HuggingFacePipeline(pipeline=pipe)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "904e6bf72c2ecf27",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Use the Hugging Face pipeline with LLMware Bling via LangChain\n",
|
||||
"\n",
|
||||
"* This is a basic prompt template with LangChain\n",
|
||||
"* The question is passed to the model via a chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1827b8c3423066b0",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-17T11:23:25.839334Z",
|
||||
"start_time": "2024-03-17T11:23:02.070024Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Disabling tokenizer parallelism, we're using DataLoader multithreading already\n",
|
||||
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"chain = prompt | hf\n",
|
||||
"\n",
|
||||
"question = \"What is electroencephalography?\"\n",
|
||||
"\n",
|
||||
"test = chain.invoke({\"question\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ac2a19b6fb9aa3e2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-17T11:23:25.847308Z",
|
||||
"start_time": "2024-03-17T11:23:25.841002Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" First, electroencephalography (EEG) is a medical test that measures electrical activity in the brain. Second, EEG is a type of electrodiagnostic test. Third, electrodiagnostic tests are used to evaluate neurological conditions.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user