Skip to content

Commit

Permalink
add Wave Execution Efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
feizheng10 committed Oct 19, 2024
1 parent a236fe0 commit 42d92bb
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,16 @@ Panel Config:
max: MAX((SQ_ACCUM_PREV_HIRES / $GRBM_GUI_ACTIVE_PER_XCD))
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
tips:
Wave Execution Efficiency:
avg:
AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min:
MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max:
MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: pct
tips: # VALU Active Threads / wavefront size
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,16 @@ Panel Config:
max: MAX((SQ_ACCUM_PREV_HIRES / $GRBM_GUI_ACTIVE_PER_XCD))
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
tips:
Wave Execution Efficiency:
avg:
AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min:
MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max:
MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: pct
tips: # VALU Active Threads / wavefront size
48 changes: 30 additions & 18 deletions src/omniperf_soc/analysis_configs/gfx90a/0700_wavefront-launch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,61 +24,61 @@ Panel Config:
min: MIN(Grid_Size)
max: MAX(Grid_Size)
unit: Work Items
tips:
tips:
Workgroup Size:
avg: AVG(Workgroup_Size)
min: MIN(Workgroup_Size)
max: MAX(Workgroup_Size)
unit: Work Items
tips:
tips:
Total Wavefronts:
avg: AVG(SPI_CSN_WAVE)
min: MIN(SPI_CSN_WAVE)
max: MAX(SPI_CSN_WAVE)
unit: Wavefronts
tips:
tips:
Saved Wavefronts:
avg: AVG(SQ_WAVES_SAVED)
min: MIN(SQ_WAVES_SAVED)
max: MAX(SQ_WAVES_SAVED)
unit: Wavefronts
tips:
tips:
Restored Wavefronts:
avg: AVG(SQ_WAVES_RESTORED)
min: MIN(SQ_WAVES_RESTORED)
max: MAX(SQ_WAVES_RESTORED)
unit: Wavefronts
tips:
tips:
VGPRs:
avg: AVG(Arch_VGPR)
min: MIN(Arch_VGPR)
max: MAX(Arch_VGPR)
unit: Registers
tips:
tips:
AGPRs:
avg: AVG(Accum_VGPR)
min: MIN(Accum_VGPR)
max: MAX(Accum_VGPR)
unit: Registers
tips:
tips:
SGPRs:
avg: AVG(SGPR)
min: MIN(SGPR)
max: MAX(SGPR)
unit: Registers
tips:
tips:
LDS Allocation:
avg: AVG(LDS_Per_Workgroup)
min: MIN(LDS_Per_Workgroup)
max: MAX(LDS_Per_Workgroup)
unit: Bytes
tips:
tips:
Scratch Allocation:
avg: AVG(Scratch_Per_Workitem)
min: MIN(Scratch_Per_Workitem)
max: MAX(Scratch_Per_Workitem)
unit: Bytes/Workitem
tips:
tips:

- metric_table:
id: 702
Expand All @@ -96,47 +96,59 @@ Panel Config:
min: MIN((End_Timestamp - Start_Timestamp))
max: MAX((End_Timestamp - Start_Timestamp))
unit: ns
tips:
tips:
Kernel Time (Cycles):
avg: AVG($GRBM_GUI_ACTIVE_PER_XCD)
min: MIN($GRBM_GUI_ACTIVE_PER_XCD)
max: MAX($GRBM_GUI_ACTIVE_PER_XCD)
unit: Cycle
tips:
tips:
Instructions per wavefront:
avg: AVG((SQ_INSTS / SQ_WAVES))
min: MIN((SQ_INSTS / SQ_WAVES))
max: MAX((SQ_INSTS / SQ_WAVES))
unit: Instr/wavefront
tips:
tips:
Wave Cycles:
avg: AVG(((4 * SQ_WAVE_CYCLES) / $denom))
min: MIN(((4 * SQ_WAVE_CYCLES) / $denom))
max: MAX(((4 * SQ_WAVE_CYCLES) / $denom))
unit: (Cycles + $normUnit)
tips:
tips:
Dependency Wait Cycles:
avg: AVG(((4 * SQ_WAIT_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
tips:
Issue Wait Cycles:
avg: AVG(((4 * SQ_WAIT_INST_ANY) / $denom))
min: MIN(((4 * SQ_WAIT_INST_ANY) / $denom))
max: MAX(((4 * SQ_WAIT_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
tips:
Active Cycles:
avg: AVG(((4 * SQ_ACTIVE_INST_ANY) / $denom))
min: MIN(((4 * SQ_ACTIVE_INST_ANY) / $denom))
max: MAX(((4 * SQ_ACTIVE_INST_ANY) / $denom))
unit: (Cycles + $normUnit)
tips:
tips:
Wavefront Occupancy:
avg: AVG((SQ_ACCUM_PREV_HIRES / $GRBM_GUI_ACTIVE_PER_XCD))
min: MIN((SQ_ACCUM_PREV_HIRES / $GRBM_GUI_ACTIVE_PER_XCD))
max: MAX((SQ_ACCUM_PREV_HIRES / $GRBM_GUI_ACTIVE_PER_XCD))
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
tips:
Wave Execution Efficiency:
avg:
AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min:
MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max:
MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: pct
tips: # VALU Active Threads / wavefront size
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,15 @@ Panel Config:
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
Wave Execution Efficiency:
avg:
AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min:
MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max:
MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: pct
tips: # VALU Active Threads / wavefront size
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,15 @@ Panel Config:
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
Wave Execution Efficiency:
avg:
AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min:
MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max:
MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: pct
tips: # VALU Active Threads / wavefront size
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,15 @@ Panel Config:
unit: Wavefronts
coll_level: SQ_LEVEL_WAVES
tips:
Wave Execution Efficiency:
avg:
AVG(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
min:
MIN(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
max:
MAX(((SQ_THREAD_CYCLES_VALU / SQ_ACTIVE_INST_VALU / $wave_size * 100) if (SQ_ACTIVE_INST_VALU
!= 0) else None))
unit: pct
tips: # VALU Active Threads / wavefront size
2 changes: 1 addition & 1 deletion src/utils/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
},
}

time_units = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1}
time_units = {"s": 10 ** 9, "ms": 10 ** 6, "us": 10 ** 3, "ns": 1}


def load_sys_info(f):
Expand Down
1 change: 1 addition & 0 deletions src/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug):
ammolite__hbm_bw = sys_info.hbm_bw
ammolite__total_l2_chan = calc_builtin_var("$total_l2_chan", sys_info)
ammolite__num_xcd = sys_info.num_xcd
ammolite__wave_size = sys_info.wave_size

# TODO: fix all $normUnit in Unit column or title

Expand Down
58 changes: 46 additions & 12 deletions src/utils/roofline_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,21 @@ def calc_ai(sort_type, ret_df):
df = df.sort_values(by=["Kernel_Name"])
df = df.reset_index(drop=True)

total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
mfma_flops_f32
) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = calls = (
totalDuration
) = avgDuration = 0.0
) = (
mfma_flops_f64
) = (
lds_data
) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0

kernelName = ""

Expand Down Expand Up @@ -381,11 +391,23 @@ def calc_ai(sort_type, ret_df):
kernelName, idx, calls
)
)
total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
mfma_flops_f32
) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = (
calls
) = totalDuration = avgDuration = 0.0
) = (
mfma_flops_f64
) = (
lds_data
) = (
L1cache_data
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0

if sort_type == "dispatches":
myList.append(
Expand All @@ -407,11 +429,23 @@ def calc_ai(sort_type, ret_df):
avgDuration,
)
)
total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = (
total_flops = (
valu_flops
) = (
mfma_flops_bf16
) = (
mfma_flops_f16
) = (
mfma_iops_i8
) = (
mfma_flops_f32
) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = (
calls
) = totalDuration = avgDuration = 0.0
) = (
mfma_flops_f64
) = (
lds_data
) = (
L1cache_data
) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0

myList.sort(key=lambda x: x.totalDuration, reverse=True)

Expand Down

0 comments on commit 42d92bb

Please sign in to comment.