v0.1.3-pre1 first bits of full pipeline code

christianazinn · Apr 18, 2024 · 6e07762 · 6e07762
1 parent 47682ad
commit 6e07762
Show file tree

Hide file tree

Showing 9 changed files with 279 additions and 38 deletions.
diff --git a/Homepage.py b/Homepage.py
@@ -1,5 +1,5 @@
 # main.py
-# Version 0.1.2: Queue fully implemented, primary pipeline is functional
+# Version 0.1.3-pre1: First bits of full pipeline UI code
 import streamlit as st
 st.set_page_config(layout="wide")
 from st_pages import Page, Section, show_pages, add_indentation
@@ -11,15 +11,18 @@
     [
     Page("Homepage.py", "Home", ":house:"),
     Page("pages/Docs.py", "Docs", ":books:"),
-    Section("Manually convert models", icon=":arrows_counterclockwise:"),
+    Section("Manually convert models", icon=":open_hands:"),
+    Page("pages/Full_Pipeline.py", "Full Pipeline Queue", ":arrows_clockwise:"),
+    Page("pages/Queue_GUI.py", "Queue GUI", ":arrows_counterclockwise:"),
+    Section("Manually convert models - Legacy", icon=":cd:"),
     Page("pages/Hugging_Face_Downloader.py", "Download model", ":inbox_tray:"),
     Page("pages/Convert_Safetensors.py", "Safetensors to GGUF", ":gem:"),
     Page("pages/Create_IMatrix.py", "Create Importance Matrix", ":chart_with_upwards_trend:"),
     Page("pages/Quantize_GGUF.py", "Quantize GGUF", ":heavy_plus_sign:" ),
     Page("pages/Upload_Converted_To_HF.py", "Upload model to HuggingFace", ":outbox_tray:"),
     Section("Extra Tools", icon=":toolbox:"),
     Page("pages/HF_Token_Encrypter.py", "Security", ":lock:"),
-    ]    
+    ]
 )
 
 add_indentation()

diff --git a/pages/Convert_Safetensors.py b/pages/Convert_Safetensors.py
@@ -1,4 +1,4 @@
-# Last updated v0.1.2
+# Last updated v0.1.3-pre1
 # IMPORTS ---------------------------------------------------------------------------------
 import streamlit as st
 st.set_page_config(layout="wide")
@@ -61,7 +61,6 @@ def queue_command(model_folder, out_type, input_dir, target_dir, vocab, ctx, pad
 
 
 # UI CODE ---------------------------------------------------------------------------------
-# TODO can you do gpu offloading?
 
 add_indentation()
 

diff --git a/pages/Full_Pipeline.py b/pages/Full_Pipeline.py
@@ -0,0 +1,219 @@
+# Last updated v0.1.3-pre1
+# IMPORTS ---------------------------------------------------------------------------------
+import streamlit as st
+st.set_page_config(layout="wide")
+from st_pages import add_indentation
+from util.constants import config
+from util.scheduler import *
+from util.paths import *
+from util.utils import *
+
+# FUNCTIONS ---------------------------------------------------------------------------------
+
+# UI CODE ---------------------------------------------------------------------------------
+
+add_indentation()
+
+st.title("Full Pipeline Queue")
+
+# Let the user decide which steps to use:
+# Download can be used on its own
+# Convert must either be used with Download or with a supplied model folder but is not required
+# Quantize must either be used with Convert or with a supplied high-precision model but is not required
+# Upload must be used with Quantize but is not required
+
+st.markdown("Select which steps to use.")
+optcols = st.columns(5)
+with optcols[0]:
+    download = st.checkbox("Download", value=True)
+
+with optcols[1]:
+    convert = st.checkbox("Convert")
+
+with optcols[2]:
+    imatrix = st.checkbox("Imatrix")
+
+with optcols[3]:
+    quantize = st.checkbox("Quantize")
+
+with optcols[4]:
+    upload = st.checkbox("Upload")
+
+# Download
+# DONE
+if download:
+    st.write("----")
+    st.markdown("### Download from HuggingFace")
+    model_name = st.text_input("Download PyTorch models from Huggingface", "Use the HuggingfaceUsername/Modelname")
+    if st.button("Get File List"):
+        _, file_links = get_files_from_repo(f"https://huggingface.co/api/models/{model_name}/tree/main", model_name)
+        if file_links:
+            st.session_state['file_links_dict'] = file_links
+            st.session_state['model_name'] = model_name
+            files_info = "\n".join(f"{name}, Size: {size}" for name, size in file_links.items())
+            st.text_area("Files Information", files_info, height=300)
+        else:
+            st.error("Unable to retrieve file links.")
+            if 'file_links_dict' in st.session_state:
+                del st.session_state['file_links_dict']
+                del st.session_state['model_name']
+else: # if download is not selected, remove the file_links_dict and model_name from the session state
+    if 'file_links_dict' in st.session_state:
+        del st.session_state['file_links_dict']
+        del st.session_state['model_name']
+
+# Convert
+# TODO
+if convert:
+    st.write("----")
+    st.markdown("### Convert Safetensors to High Precision")
+    if download: # automatically determine where download.py will download it
+        if 'model_name' in st.session_state:
+            original_model_name = st.session_state['model_name'].split("/")[1]
+            convert_model_folder = models_dir() / original_model_name
+            st.markdown(f"Using to-be-created model directory `{convert_model_folder}`")
+        else:
+            st.error("Please choose a model to download first.")
+    else:
+        convert_model_folders = [f.name for f in models_dir().iterdir() if f.is_dir()] if models_dir().exists() else ["Directory not found"]
+        convert_model_folder = st.selectbox("Select a model folder", convert_model_folders, key="convert_select_folder")
+
+    # write conversion options (fp16, fp32, int8)
+    conversion_cols = st.columns(len(config['conversion_quants']))
+    conversion_options = {}
+    for i in range(0, len(config['conversion_quants'])):
+        with conversion_cols[i]:
+            option = config['conversion_quants'][i]
+            conversion_options.update({option: st.checkbox(label=option, key=f"convert_{option}")})
+
+    # write cli flags (vocab, ctx, pad, skip)
+    with st.expander("Options/flags for conversion"):
+        conversion_optcols = st.columns(4)
+
+        with conversion_optcols[0]:
+            conversion_use_vocab = st.checkbox("Change vocab type, --vocabtype", key="convert_use_vocab")
+            conversion_vocab = st.selectbox("Vocab type", ["spm", "bpe", "hfft"], index=0, disabled=not conversion_use_vocab, key="convert_vocab")
+
+        with conversion_optcols[1]:
+            conversion_use_c = st.checkbox("Change context length, --ctx", key="convert_use_c")
+            conversion_c = st.number_input("Size of the prompt context, -c", value=2048, disabled=not conversion_use_c, key="convert_c")
+
+        with conversion_optcols[2]:
+            conversion_use_pad = st.checkbox("Pad vocab, --pad-vocab", key="convert_pad")
+
+        with conversion_optcols[3]:
+            conversion_use_skip = st.checkbox("Skip unknown, --skip-unknown", key="convert_skip")
+
+# Imatrix
+# TODO
+if imatrix:
+    st.write("----")
+    st.markdown("### Imatrix Creation")
+    if convert:
+        num_selected_high_precision = sum(1 for value in conversion_options.values() if value)
+        if num_selected_high_precision == 0:
+            st.error('Please select at least one high precision conversion option, or deselect the "Convert" option.')
+        elif num_selected_high_precision == 1:
+            for option, selected in conversion_options.items():
+                if selected:
+                    imatrix_selected_gguf_file = get_high_precision_outfile(convert_model_folder, option)
+        else:
+            high_precision_gguf_files = [get_high_precision_outfile(convert_model_folder, option) for option, selected in conversion_options.items() if selected]
+            imatrix_selected_gguf_file = st.selectbox("Select a high precision GGUF file to be created", high_precision_gguf_files, key="imatrix_select_gguf_multiple")
+    else:
+        high_precision_gguf_files = list_gguf_files()
+        imatrix_selected_gguf_file = st.selectbox("Select a high precision GGUF File", high_precision_gguf_files, key="imatrix_select_gguf")
+
+    data_files = list_data_files()
+    selected_data_file = st.selectbox("Select training data", data_files, key="imatrix_select_data")
+
+    with st.expander("Options/flags for imatrix creation"):
+        imatrix_optcols = st.columns(4)
+
+        with imatrix_optcols[0]:
+            imatrix_use_c = st.checkbox("Change context length", key="imatrix_use_c")
+            imatrix_c = st.number_input("Size of the prompt context, -c", value=512, disabled=not imatrix_use_c, key="imatrix_c")
+
+        with imatrix_optcols[1]:
+            imatrix_use_b = st.checkbox("Change processing batch size", key="imatrix_use_b")
+            imatrix_b = st.number_input("Logical maximum batch size, -b", value=2048, disabled=not imatrix_use_b, key="imatrix_b")
+
+        with imatrix_optcols[2]:
+            # only activate the ngl field if this box is checked
+            imatrix_use_ngl = st.checkbox("Use GPU offloading", key="imatrix_use_ngl")
+            imatrix_ngl = st.number_input("Number of GPU offloaded layers, -ngl", value=0, disabled=not imatrix_use_ngl, key="imatrix_ngl")
+
+        with imatrix_optcols[3]:
+            imatrix_use_t = st.checkbox("Change thread count", key="imatrix_use_t")
+            imatrix_t = st.number_input("Number of threads to use, -t", value=4, disabled=not imatrix_use_t, key="imatrix_t")
+
+# Quantize
+# TODO
+if quantize:
+    st.write("----")
+    st.markdown("### Quantize GGUF")
+    if convert:
+        num_selected_high_precision = sum(1 for value in conversion_options.values() if value)
+        if num_selected_high_precision == 0:
+            st.error('Please select at least one high precision conversion option, or deselect the "Convert" option.')
+        elif num_selected_high_precision == 1:
+            for option, selected in conversion_options.items():
+                if selected:
+                    quantize_selected_gguf_file = get_high_precision_outfile(convert_model_folder, option)
+                    # TODO rework get_high_precision_oufile when you bring trigger_commands over
+        else:
+            high_precision_gguf_files = [get_high_precision_outfile(convert_model_folder, option) for option, selected in conversion_options.items() if selected]
+            quantize_selected_gguf_file = st.selectbox("Select a high precision GGUF file to be created", high_precision_gguf_files, key="quantize_select_gguf_multiple")
+    else:
+        high_precision_gguf_files = list_gguf_files()
+        quantize_selected_gguf_file = st.selectbox("Select a high precision GGUF File", high_precision_gguf_files, key="quantize_select_gguf")
+
+    icol, kcol, lcol = st.columns(3)
+    with icol:
+        st.markdown("### I-Quants")
+        ioptions = {option: st.checkbox(label=option, key=f"i_quantize_{option}") for option in config['quantization_I']}
+
+    with kcol:
+        st.markdown("### K-Quants")
+        koptions = {option: st.checkbox(label=option, key=f"k_quantize_{option}") for option in config['quantization_K']}
+
+    with lcol:
+        st.markdown("### Legacy Quants")
+        legacy_options = {option: st.checkbox(label=option, key=f"legacy_quantize_{option}") for option in config['quantization_legacy']}
+
+    with st.expander("Options/parameters for quantization"):
+        quantize_optcols = st.columns(2)
+
+        with quantize_optcols[0]:
+            quantize_use_imatrix = st.checkbox("Use importance matrix, --imatrix", key="quantize_imatrix")
+            if imatrix:
+                # TODO make it automatically determine the imatrix file
+                # TODO likewise rework it when you bring over trigger_commands
+                pass
+            else:
+                imatrix_files = list_imatrix_files()
+                # TODO the disabling is not working properly
+                selected_imatrix = st.selectbox("Select imatrix file", imatrix_files, disabled=not quantize_use_imatrix, key="quantize_select_imatrix")
+
+        with quantize_optcols[1]:
+            quantize_use_nthreads = st.checkbox("Change thread count", key="quantize_use_t")
+            quantize_t = st.number_input("Number of threads to use, -nthreads", value=4, disabled=not quantize_use_nthreads, key="quantize_t")
+
+# Upload
+# TODO
+if upload:
+    st.write("----")
+    st.markdown("### Upload to HuggingFace")
+    if quantize or convert:
+        # make it automatically determine the quantized file, hp or mp
+        st.markdown("Placeholder - QorC")
+    else:
+        # give the user to choose a local quantized folder, hp or mp
+        st.markdown("Placeholder - NQorC")
+    # still need like token and such
+    st.markdown("Placeholder")
+
+if st.button("Queue All"):
+    # manage logic to block queueing if input is bad
+    # manage logic to queue the correct steps
+    pass
diff --git a/pages/Hugging_Face_Downloader.py b/pages/Hugging_Face_Downloader.py
@@ -1,11 +1,11 @@
-# Last updated v0.1.2
+# Last updated v0.1.3-pre1
 # IMPORTS ---------------------------------------------------------------------------------
 import requests, streamlit as st
 st.set_page_config(layout="wide")
 from pathlib import Path
 from st_pages import add_indentation
 from util.scheduler import *
-from util.paths import *
+from util.utils import get_files_from_repo
 
 # FUNCTIONS ---------------------------------------------------------------------------------
 

diff --git a/pages/Quantize_GGUF.py b/pages/Quantize_GGUF.py
@@ -1,4 +1,4 @@
-# Last updated v0.1.2
+# Last updated v0.1.3-pre1
 # IMPORTS ---------------------------------------------------------------------------------
 import os, streamlit as st
 st.set_page_config(layout="wide")
@@ -51,7 +51,6 @@ def queue_command(source_path, output_path, option, imatrix, nthreads):
     return " ".join(command)
 
 # UI CODE ---------------------------------------------------------------------------------
-# TODO can you do gpu offloading?
 
 add_indentation()
 

diff --git a/pages/Queue_GUI.py b/pages/Queue_GUI.py
@@ -1,4 +1,4 @@
-# Last updated v0.1.2
+# Last updated v0.1.3-pre1
 # IMPORTS ---------------------------------------------------------------------------------
 import streamlit as st
 st.set_page_config(layout="wide")
@@ -43,7 +43,7 @@ def update_active():
         st.rerun()
 
 # functionality you need:
-# TODO be able to queue later jobs for files that don't yet exist (i.e. convert then quantize) and handle errors in ordering
+# TODO eventually be able to handle errors if prequeued (Full_Pipeline.py) jobs are deleted and it breaks the chain
 
 st.write('----')
 

diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,18 @@ torch~=2.1.1
 einops~=0.7.0
 numpy~=1.24.4
 sentencepiece~=0.1.98
-transformers>=4.35.2,<5.0.0
+transformers>=4.37.2,<5.0.0
 gguf>=0.1.0
-protobuf>=4.21.0,<5.0.0
+protobuf>=4.21.0,<5.0.0
+datasets>=2.14.3
+accelerate>=0.27.2
+peft>=0.10.0
+trl>=0.8.1
+gradio>=4.0.0
+scipy
+uvicorn
+pydantic
+fastapi
+sse-starlette
+matplotlib
+fire
diff --git a/util/paths.py b/util/paths.py
@@ -1,4 +1,4 @@
-# Last updated v0.1.2
+# Last updated v0.1.3-pre1
 # IMPORTS ---------------------------------------------------------------------------------
 from pathlib import Path
 import streamlit as st, os
@@ -49,27 +49,4 @@ def list_data_files():
         for file in os.listdir(data_dir()):
             if file.lower().endswith('.dat') or file.lower().endswith('.txt'):
                 data_files.append(file)
-    return data_files
-
-# get the files from the Hugging Face repo - kept basically the same implementation as in the original
-def get_files_from_repo(url, repo_name):
-    try:
-        response = requests.get(url)
-        if response.status_code == 200:
-            files_info = response.json()
-            file_info_dict = {}
-            file_links_dict = {}
-
-            base_url = f"https://huggingface.co/{repo_name}/resolve/main/"
-            for file in files_info:
-                name = file.get('path', 'Unknown')
-                size = file.get('size', 0)
-                human_readable_size = f"{size / 1024 / 1024:.2f} MB"
-                file_info_dict[name] = human_readable_size
-                file_links_dict[name] = base_url + name
-
-            return file_info_dict, file_links_dict
-        else:
-            return {}, {}
-    except Exception as e:
-        return {}, {}
+    return data_files
diff --git a/util/utils.py b/util/utils.py
@@ -0,0 +1,32 @@
+# Last updated v0.1.3-pre1
+# IMPORTS ---------------------------------------------------------------------------------
+import requests
+from util.paths import models_dir
+
+# FUNCTIONS ---------------------------------------------------------------------------------
+# get the files from the Hugging Face repo - kept basically the same implementation as in the original
+def get_files_from_repo(url, repo_name):
+    try:
+        response = requests.get(url)
+        if response.status_code == 200:
+            files_info = response.json()
+            file_info_dict = {}
+            file_links_dict = {}
+
+            base_url = f"https://huggingface.co/{repo_name}/resolve/main/"
+            for file in files_info:
+                name = file.get('path', 'Unknown')
+                size = file.get('size', 0)
+                human_readable_size = f"{size / 1024 / 1024:.2f} MB"
+                file_info_dict[name] = human_readable_size
+                file_links_dict[name] = base_url + name
+
+            return file_info_dict, file_links_dict
+        else:
+            return {}, {}
+    except Exception as e:
+        return {}, {}
+
+# Return a high precision outfile path given model folder and conversion option for Full_Pipeline.py
+def get_high_precision_outfile(model_folder, option):
+    return str(models_dir() / model_folder / "High-Precision-Quantization" / f"{model_folder}-{option.lower()}.GGUF")