add profileing and small conv3d repoducer
This commit is contained in:
24
run.py
24
run.py
@ -6,6 +6,8 @@ import json
|
||||
from vae.autoencoder_kl_causal_3d import AutoencoderKLCausal3D
|
||||
import time
|
||||
|
||||
from torch.profiler import profile, record_function, ProfilerActivity
|
||||
|
||||
|
||||
def load_vae(path: str, compile_vae: bool):
|
||||
with open("hy_vae_config.json") as f:
|
||||
@ -25,6 +27,10 @@ def load_vae(path: str, compile_vae: bool):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
props = torch.cuda.get_device_properties(0)
|
||||
|
||||
print(f"Device: {props.name}")
|
||||
|
||||
latents = torch.randn((1, 16, 19, 120, 68)).to(torch.device(0), torch.bfloat16)
|
||||
print(f"Latent dims: {latents.size()}")
|
||||
|
||||
@ -40,11 +46,15 @@ if __name__ == "__main__":
|
||||
vae.enable_tiling()
|
||||
|
||||
print("decodeing")
|
||||
start = time.perf_counter()
|
||||
generator = torch.Generator(device=torch.device("cpu"))
|
||||
decoded = vae.decode(
|
||||
latents, return_dict=False, generator=generator
|
||||
)[0]
|
||||
print(f"decoded in {time.perf_counter() - start} seconds")
|
||||
print(f"decoded dims: {decoded.size()}")
|
||||
|
||||
with profile(activities=[ProfilerActivity.CUDA, ProfilerActivity.CPU], record_shapes=True, with_flops=True) as prof:
|
||||
start = time.perf_counter()
|
||||
generator = torch.Generator(device=torch.device("cpu"))
|
||||
decoded = vae.decode(
|
||||
latents, return_dict=False, generator=generator
|
||||
)[0]
|
||||
print(f"decoded in {time.perf_counter() - start} seconds")
|
||||
print(f"decoded dims: {decoded.size()}")
|
||||
|
||||
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cuda_time_total", row_limit=100))
|
||||
prof.export_chrome_trace("trace.json")
|
||||
|
Reference in New Issue
Block a user