From eeba2997a13e88fedc6056c97ba6e27f08aa759c Mon Sep 17 00:00:00 2001 From: zR <2448370773@qq.com> Date: Sat, 5 Oct 2024 00:03:34 +0800 Subject: [PATCH] update --- show_model.py | 91 ------------------------------------------- show_model_cogview.py | 25 ------------ 2 files changed, 116 deletions(-) delete mode 100644 show_model.py delete mode 100644 show_model_cogview.py diff --git a/show_model.py b/show_model.py deleted file mode 100644 index 0127243117bd..000000000000 --- a/show_model.py +++ /dev/null @@ -1,91 +0,0 @@ -import torch -from diffusers.loaders.single_file_utils import convert_ldm_vae_checkpoint -from diffusers import AutoencoderKL -from huggingface_hub import hf_hub_download -from sgm.models.autoencoder import AutoencodingEngine - -# (1) create vae_sat -# AutoencodingEngine initialization arguments: -encoder_config={'target': 'sgm.modules.diffusionmodules.model.Encoder', 'params': {'attn_type': 'vanilla', 'double_z': True, 'z_channels': 16, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 4, 8, 8], 'num_res_blocks': 3, 'attn_resolutions': [], 'mid_attn': False, 'dropout': 0.0}} -decoder_config={'target': 'sgm.modules.diffusionmodules.model.Decoder', 'params': {'attn_type': 'vanilla', 'double_z': True, 'z_channels': 16, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 4, 8, 8], 'num_res_blocks': 3, 'attn_resolutions': [], 'mid_attn': False, 'dropout': 0.0}} -loss_config={'target': 'torch.nn.Identity'} -regularizer_config={'target': 'sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer'} -optimizer_config=None -lr_g_factor=1.0 -ckpt_path="/raid/.cache/huggingface/models--ZP2HF--CogView3-SAT/snapshots/ca86ce9ba94f9a7f2dd109e7a59e4c8ad04121be/3plus_ae/imagekl_ch16.pt" -ignore_keys= [] -kwargs = {"monitor": "val/rec_loss"} -vae_sat = AutoencodingEngine( - encoder_config=encoder_config, - decoder_config=decoder_config, - loss_config=loss_config, - regularizer_config=regularizer_config, - optimizer_config=optimizer_config, - lr_g_factor=lr_g_factor, - ckpt_path=ckpt_path, - ignore_keys=ignore_keys, - **kwargs) - - - -# (2) create vae (diffusers) -ckpt_path_vae_cogview3 = hf_hub_download(repo_id="ZP2HF/CogView3-SAT", subfolder="3plus_ae", filename="imagekl_ch16.pt") -cogview3_ckpt = torch.load(ckpt_path_vae_cogview3, map_location='cpu')["state_dict"] - -in_channels = 3 # Inferred from encoder.conv_in.weight shape -out_channels = 3 # Inferred from decoder.conv_out.weight shape -down_block_types = ("DownEncoderBlock2D",) * 4 # Inferred from the presence of 4 encoder.down blocks -up_block_types = ("UpDecoderBlock2D",) * 4 # Inferred from the presence of 4 decoder.up blocks -block_out_channels = (128, 512, 1024, 1024) # Inferred from the channel sizes in encoder.down blocks -layers_per_block = 3 # Inferred from the number of blocks in each encoder.down and decoder.up -act_fn = "silu" # This is the default, cannot be inferred from state_dict -latent_channels = 16 # Inferred from decoder.conv_in.weight shape -norm_num_groups = 32 # This is the default, cannot be inferred from state_dict -sample_size = 1024 # This is the default, cannot be inferred from state_dict -scaling_factor = 0.18215 # This is the default, cannot be inferred from state_dict -force_upcast = True # This is the default, cannot be inferred from state_dict -use_quant_conv = False # Inferred from the presence of encoder.conv_out -use_post_quant_conv = False # Inferred from the presence of decoder.conv_in -mid_block_add_attention = False # Inferred from the absence of attention layers in mid blocks - -vae = AutoencoderKL( - in_channels=in_channels, - out_channels=out_channels, - down_block_types=down_block_types, - up_block_types=up_block_types, - block_out_channels=block_out_channels, - layers_per_block=layers_per_block, - act_fn=act_fn, - latent_channels=latent_channels, - norm_num_groups=norm_num_groups, - sample_size=sample_size, - scaling_factor=scaling_factor, - force_upcast=force_upcast, - use_quant_conv=use_quant_conv, - use_post_quant_conv=use_post_quant_conv, - mid_block_add_attention=mid_block_add_attention, -) - -vae.eval() -vae_sat.eval() - -converted_vae_state_dict = convert_ldm_vae_checkpoint(cogview3_ckpt, vae.config) -vae.load_state_dict(converted_vae_state_dict, strict=False) - -# (3) run forward pass for both models - -# [2, 16, 128, 128] -> [2, 3, 1024, 1024 -z = torch.load("z.pt").float().to("cpu") - -with torch.no_grad(): - print(" ") - print(f" running forward pass for diffusers vae") - out = vae.decode(z).sample - print(f" ") - print(f" running forward pass for sgm vae") - out_sat = vae_sat.decode(z) - -print(f" output shape: {out.shape}") -print(f" expected output shape: {out_sat.shape}") -assert out.shape == out_sat.shape -assert (out - out_sat).abs().max() < 1e-4, f"max diff: {(out - out_sat).abs().max()}" \ No newline at end of file diff --git a/show_model_cogview.py b/show_model_cogview.py deleted file mode 100644 index 5314930cb127..000000000000 --- a/show_model_cogview.py +++ /dev/null @@ -1,25 +0,0 @@ -import torch -from diffusers import CogView3PlusTransformer2DModel - -model = CogView3PlusTransformer2DModel.from_pretrained("/share/home/zyx/Models/CogView3Plus_hf/transformer",torch_dtype=torch.bfloat16) - -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -model.to(device) - -batch_size = 1 -hidden_states = torch.ones((batch_size, 16, 256, 256), device=device, dtype=torch.bfloat16) -timestep = torch.full((batch_size,), 999.0, device=device, dtype=torch.bfloat16) -y = torch.ones((batch_size, 1536), device=device, dtype=torch.bfloat16) - -# 模拟调用 forward 方法 -outputs = model( - hidden_states=hidden_states, # hidden_states 输入 - timestep=timestep, # timestep 输入 - y=y, # 标签输入 - block_controlnet_hidden_states=None, # 如果不需要,可以忽略 - return_dict=True, # 保持默认值 - target_size=[(2048, 2048)], -) - -# 输出模型结果 -print("Output shape:", outputs.sample.shape) \ No newline at end of file