Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Turbine Tank Turbine Changes #437

Merged
merged 24 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions models/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ transformers==4.37.1
accelerate
diffusers==0.24.0
brevitas @ git+https://github.com/Xilinx/brevitas.git@6695e8df7f6a2c7715b9ed69c4b78157376bb60b
# turbine tank downloading/uploading
azure-storage-blob
# microsoft/phi model
einops
2 changes: 2 additions & 0 deletions models/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,7 @@ def load_version_info():
"transformers==4.37.1",
"accelerate",
"diffusers==0.24.0",
"azure-storage-blob",
"einops",
],
)
12 changes: 12 additions & 0 deletions models/turbine_models/custom_models/sd_inference/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import torch
import torch._dynamo as dynamo
from transformers import CLIPTextModel, CLIPTokenizer
from turbine_models.turbine_tank import turbine_tank

import argparse

Expand Down Expand Up @@ -57,13 +58,15 @@ def export_clip_model(
device=None,
target_triple=None,
max_alloc=None,
upload_ir=False,
):
# Load the tokenizer and text encoder to tokenize and encode the text.
tokenizer = CLIPTokenizer.from_pretrained(
hf_model_name,
subfolder="tokenizer",
token=hf_auth_token,
)

text_encoder_model = CLIPTextModel.from_pretrained(
hf_model_name,
subfolder="text_encoder",
Expand Down Expand Up @@ -94,6 +97,15 @@ def main(self, inp=AbstractTensor(1, 77, dtype=torch.int64)):

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-clip")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload += "-clip"
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str, tokenizer
else:
Expand Down
12 changes: 12 additions & 0 deletions models/turbine_models/custom_models/sd_inference/schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import safetensors
import argparse

from turbine_models.turbine_tank import turbine_tank

parser = argparse.ArgumentParser()
parser.add_argument(
"--hf_auth_token", type=str, help="The Hugging Face auth token, required"
Expand Down Expand Up @@ -111,6 +113,7 @@ def export_scheduler(
device=None,
target_triple=None,
max_alloc=None,
upload_ir=False,
):
mapper = {}
utils.save_external_weights(
Expand Down Expand Up @@ -145,6 +148,15 @@ def main(

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-scheduler")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload = model_name_upload + "-scheduler"
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str
else:
Expand Down
11 changes: 11 additions & 0 deletions models/turbine_models/custom_models/sd_inference/unet.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import safetensors
import argparse
from turbine_models.turbine_tank import turbine_tank

parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -90,6 +91,7 @@ def export_unet_model(
device=None,
target_triple=None,
max_alloc=None,
upload_ir=False,
):
mapper = {}
utils.save_external_weights(
Expand Down Expand Up @@ -125,6 +127,15 @@ def main(

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-unet")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload += "-unet"
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str
else:
Expand Down
11 changes: 11 additions & 0 deletions models/turbine_models/custom_models/sd_inference/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import safetensors
import argparse
from turbine_models.turbine_tank import turbine_tank

parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -89,6 +90,7 @@ def export_vae_model(
target_triple=None,
max_alloc=None,
variant="decode",
upload_ir=False,
):
mapper = {}
utils.save_external_weights(
Expand All @@ -113,6 +115,15 @@ def main(self, inp=AbstractTensor(*sample, dtype=torch.float32)):

module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = utils.create_safe_name(hf_model_name, "-vae")
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
model_name_upload = model_name_upload + "-vae-" + variant
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use underscores instead of dashes for consistency here and elsewhere

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the underscore is only used to separate the org and model name. Rest is all '-' (CompVis_stable-diffusion-v1-4-vae-decode)

turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str
else:
Expand Down
22 changes: 17 additions & 5 deletions models/turbine_models/custom_models/stateless_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import re
import json
from turbine_models.turbine_tank import turbine_tank

os.environ["TORCH_LOGS"] = "dynamic"
from transformers import AutoTokenizer, AutoModelForCausalLM
Expand Down Expand Up @@ -107,7 +108,14 @@ def export_transformer_model(
vulkan_max_allocation=None,
streaming_llm=False,
vmfb_path=None,
upload_ir=False,
):
tokenizer = AutoTokenizer.from_pretrained(
hf_model_name,
use_fast=False,
token=hf_auth_token,
)

mod = AutoModelForCausalLM.from_pretrained(
hf_model_name,
torch_dtype=torch.float,
Expand All @@ -121,11 +129,7 @@ def export_transformer_model(
if precision == "f16":
mod = mod.half()
dtype = torch.float16
tokenizer = AutoTokenizer.from_pretrained(
hf_model_name,
use_fast=False,
token=hf_auth_token,
)

# TODO: generate these values instead of magic numbers
NUM_LAYERS = mod.config.num_hidden_layers
HEADS = getattr(mod.config, "num_key_value_heads", None)
Expand Down Expand Up @@ -319,6 +323,14 @@ def evict_kvcache_space(self):
module_str = str(CompiledModule.get_mlir_module(inst))
safe_name = hf_model_name.split("/")[-1].strip()
safe_name = re.sub("-", "_", safe_name)
if upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = hf_model_name.replace("/", "_")
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
if compile_to != "vmfb":
return module_str, tokenizer
else:
Expand Down
58 changes: 44 additions & 14 deletions models/turbine_models/model_builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from transformers import AutoModel, AutoTokenizer, AutoConfig
import torch
import shark_turbine.aot as aot
from turbine_models.turbine_tank import turbine_tank
import os
import re


class HFTransformerBuilder:
Expand All @@ -18,36 +21,45 @@ class HFTransformerBuilder:
def __init__(
self,
example_input: torch.Tensor,
hf_id: str,
hf_id: str = None,
auto_model: AutoModel = AutoModel,
auto_tokenizer: AutoTokenizer = None,
auto_config: AutoConfig = None,
hf_auth_token=None,
upload_ir=False,
model=None,
model_type: str = None,
compile_to_vmfb: bool = None,
) -> None:
self.example_input = example_input
self.hf_id = hf_id
self.auto_model = auto_model
self.auto_tokenizer = auto_tokenizer
self.auto_config = auto_config
self.hf_auth_token = hf_auth_token
self.model = None
self.model = model
self.tokenizer = None
self.build_model()
self.upload_ir = upload_ir
self.model_type = model_type
self.compile_to_vmfb = compile_to_vmfb
if self.model == None:
self.build_model()

def build_model(self) -> None:
"""
Builds a PyTorch model using Hugging Face's transformers library.
"""
# TODO: check cloud storage for existing ir
self.model = self.auto_model.from_pretrained(
self.hf_id, token=self.hf_auth_token, config=self.auto_config
)
if self.auto_tokenizer is not None:
self.tokenizer = self.auto_tokenizer.from_pretrained(
self.hf_id, token=self.hf_auth_token
if self.hf_id:
self.model = self.auto_model.from_pretrained(
self.hf_id, token=self.hf_auth_token, config=self.auto_config
)
else:
self.tokenizer = None
if self.auto_tokenizer is not None:
self.tokenizer = self.auto_tokenizer.from_pretrained(
self.hf_id, token=self.hf_auth_token
)
else:
self.tokenizer = None

def get_compiled_module(self, save_to: str = None) -> aot.CompiledModule:
"""
Expand All @@ -59,6 +71,24 @@ def get_compiled_module(self, save_to: str = None) -> aot.CompiledModule:
Returns:
aot.CompiledModule: The compiled module binary.
"""
module = aot.export(self.model, self.example_input)
compiled_binary = module.compile(save_to=save_to)
return compiled_binary
if self.model_type and self.model_type == "hf_seq2seq":
module = aot.export(self.model, *self.example_input)
else:
module = aot.export(self.model, self.example_input)
if self.hf_id:
module_str = str(module.mlir_module)
safe_name = self.hf_id.split("/")[-1].strip()
safe_name = re.sub("-", "_", safe_name)
if self.upload_ir:
with open(f"{safe_name}.mlir", "w+") as f:
f.write(module_str)
model_name_upload = self.hf_id.replace("/", "_")
turbine_tank.uploadToBlobStorage(
str(os.path.abspath(f"{safe_name}.mlir")),
f"{model_name_upload}/{model_name_upload}.mlir",
)
os.remove(f"{safe_name}.mlir")
if self.compile_to_vmfb and not self.compile_to_vmfb:
return
compiled_binary = module.compile(save_to=save_to)
return compiled_binary
10 changes: 10 additions & 0 deletions models/turbine_models/tests/sd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@

class StableDiffusionTest(unittest.TestCase):
def testExportClipModel(self):
upload_ir_var = os.environ.get("TURBINE_TANK_ACTION", "not_upload")
with self.assertRaises(SystemExit) as cm:
clip.export_clip_model(
# This is a public model, so no auth required
Expand All @@ -77,6 +78,7 @@ def testExportClipModel(self):
"safetensors",
"stable_diffusion_v1_4_clip.safetensors",
"cpu",
upload_ir=upload_ir_var == "upload",
)
self.assertEqual(cm.exception.code, None)
arguments["external_weight_path"] = "stable_diffusion_v1_4_clip.safetensors"
Expand All @@ -98,6 +100,7 @@ def testExportClipModel(self):
os.remove("stable_diffusion_v1_4_clip.vmfb")

def testExportUnetModel(self):
upload_ir_var = os.environ.get("TURBINE_TANK_ACTION", "not_upload")
with self.assertRaises(SystemExit) as cm:
unet.export_unet_model(
unet_model,
Expand All @@ -111,6 +114,7 @@ def testExportUnetModel(self):
"safetensors",
"stable_diffusion_v1_4_unet.safetensors",
"cpu",
upload_ir=upload_ir_var == "upload",
)
self.assertEqual(cm.exception.code, None)
arguments["external_weight_path"] = "stable_diffusion_v1_4_unet.safetensors"
Expand Down Expand Up @@ -148,6 +152,7 @@ def testExportUnetModel(self):
os.remove("stable_diffusion_v1_4_unet.vmfb")

def testExportVaeModelDecode(self):
upload_ir_var = os.environ.get("TURBINE_TANK_ACTION", "not_upload")
with self.assertRaises(SystemExit) as cm:
vae.export_vae_model(
vae_model,
Expand All @@ -162,6 +167,7 @@ def testExportVaeModelDecode(self):
"stable_diffusion_v1_4_vae.safetensors",
"cpu",
variant="decode",
upload_ir=upload_ir_var == "upload",
)
self.assertEqual(cm.exception.code, None)
arguments["external_weight_path"] = "stable_diffusion_v1_4_vae.safetensors"
Expand Down Expand Up @@ -193,6 +199,7 @@ def testExportVaeModelDecode(self):
os.remove("stable_diffusion_v1_4_vae.vmfb")

def testExportVaeModelEncode(self):
upload_ir_var = os.environ.get("TURBINE_TANK_ACTION", "not_upload")
with self.assertRaises(SystemExit) as cm:
vae.export_vae_model(
vae_model,
Expand All @@ -207,6 +214,7 @@ def testExportVaeModelEncode(self):
"stable_diffusion_v1_4_vae.safetensors",
"cpu",
variant="encode",
upload_ir=upload_ir_var == "upload",
)
self.assertEqual(cm.exception.code, None)
arguments["external_weight_path"] = "stable_diffusion_v1_4_vae.safetensors"
Expand Down Expand Up @@ -239,6 +247,7 @@ def testExportVaeModelEncode(self):

@unittest.expectedFailure
def testExportPNDMScheduler(self):
upload_ir_var = os.environ.get("TURBINE_TANK_ACTION", "not_upload")
with self.assertRaises(SystemExit) as cm:
schedulers.export_scheduler(
scheduler_module,
Expand All @@ -252,6 +261,7 @@ def testExportPNDMScheduler(self):
"safetensors",
"stable_diffusion_v1_4_scheduler.safetensors",
"cpu",
upload_ir=upload_ir_var == "upload",
)
self.assertEqual(cm.exception.code, None)
arguments[
Expand Down
Loading
Loading