Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for TMA 5.01 support #257

Merged
merged 10 commits into from
Dec 9, 2024
286 changes: 179 additions & 107 deletions scripts/create_perf_json.py

Large diffs are not rendered by default.

3,637 changes: 2,871 additions & 766 deletions scripts/perf/alderlake/adl-metrics.json

Large diffs are not rendered by default.

292 changes: 251 additions & 41 deletions scripts/perf/alderlake/cache.json

Large diffs are not rendered by default.

19 changes: 18 additions & 1 deletion scripts/perf/alderlake/floating-point.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
[
{
"BriefDescription": "Counts the number of cycles the floating point divider is in the loop stage.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xcd",
"EventName": "ARITH.FPDIV_ACTIVE",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_atom"
},
{
"BriefDescription": "ARITH.FPDIV_ACTIVE",
"Counter": "0,1,2,3,4,5,6,7",
Expand All @@ -9,6 +18,15 @@
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of floating point divider uops executed per cycle.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xcd",
"EventName": "ARITH.FPDIV_UOPS",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts all microcode FP assists.",
"Counter": "0,1,2,3,4,5,6,7",
Expand Down Expand Up @@ -187,7 +205,6 @@
"Counter": "0,1,2,3,4,5",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.FPDIV",
"PEBS": "1",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_atom"
Expand Down
19 changes: 0 additions & 19 deletions scripts/perf/alderlake/frontend.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x1",
"PEBS": "1",
"PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -68,7 +67,6 @@
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
"PEBS": "1",
"PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -81,7 +79,6 @@
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x14",
"PEBS": "1",
"PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -94,7 +91,6 @@
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x12",
"PEBS": "1",
"PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -107,7 +103,6 @@
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x13",
"PEBS": "1",
"PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -120,7 +115,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x600106",
"PEBS": "1",
"PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -133,7 +127,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"MSRValue": "0x608006",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -146,7 +139,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"MSRValue": "0x601006",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -159,7 +151,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"MSRValue": "0x600206",
"PEBS": "1",
"PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -172,7 +163,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"MSRValue": "0x610006",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -185,7 +175,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x100206",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -198,7 +187,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"MSRValue": "0x602006",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -211,7 +199,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"MSRValue": "0x600406",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -224,7 +211,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"MSRValue": "0x620006",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -237,7 +223,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"MSRValue": "0x604006",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -250,7 +235,6 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"MSRValue": "0x600806",
"PEBS": "1",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -263,7 +247,6 @@
"EventName": "FRONTEND_RETIRED.MS_FLOWS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
"PEBS": "1",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
Expand All @@ -275,7 +258,6 @@
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x15",
"PEBS": "1",
"PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -288,7 +270,6 @@
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"MSRIndex": "0x3F7",
"MSRValue": "0x17",
"PEBS": "1",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
Expand Down
32 changes: 22 additions & 10 deletions scripts/perf/alderlake/memory.json
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "53",
"UMask": "0x1",
Expand All @@ -147,7 +146,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "1009",
"UMask": "0x1",
Expand All @@ -161,7 +159,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "20011",
"UMask": "0x1",
Expand All @@ -175,7 +172,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "503",
"UMask": "0x1",
Expand All @@ -189,7 +185,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
"UMask": "0x1",
Expand All @@ -203,7 +198,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
"UMask": "0x1",
Expand All @@ -217,7 +211,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "101",
"UMask": "0x1",
Expand All @@ -231,7 +224,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "2003",
"UMask": "0x1",
Expand All @@ -245,7 +237,6 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
"UMask": "0x1",
Expand All @@ -257,12 +248,22 @@
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
"PEBS": "2",
"PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that were not supplied by the L3 cache.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_CODE_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
"MSRValue": "0x3F84400004",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
"Counter": "0,1,2,3,4,5",
Expand Down Expand Up @@ -329,6 +330,17 @@
"UMask": "0x1",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts L1 data cache software prefetches which include T0/T1/T2 and NTA (except PREFETCHW) that were not supplied by the L3 cache.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.SWPF_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
"MSRValue": "0x3F84404000",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts demand data read requests that miss the L3 cache.",
"Counter": "0,1,2,3",
Expand Down
10 changes: 9 additions & 1 deletion scripts/perf/alderlake/metricgroups.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"LockCont": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
Expand Down Expand Up @@ -89,7 +90,9 @@
"tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
"tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
"tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
"tma_code_stlb_miss_group": "Metrics contributing to tma_code_stlb_miss category",
"tma_core_bound_group": "Metrics contributing to tma_core_bound category",
"tma_divider_group": "Metrics contributing to tma_divider category",
"tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
"tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
"tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
Expand All @@ -99,6 +102,7 @@
"tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
"tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
"tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
"tma_icache_misses_group": "Metrics contributing to tma_icache_misses category",
"tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category",
"tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category",
"tma_int_operations_group": "Metrics contributing to tma_int_operations category",
Expand All @@ -121,10 +125,13 @@
"tma_issueSpSt": "Metrics related by the issue $issueSpSt",
"tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
"tma_issueTLB": "Metrics related by the issue $issueTLB",
"tma_itlb_misses_group": "Metrics contributing to tma_itlb_misses category",
"tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
"tma_l2_bound_group": "Metrics contributing to tma_l2_bound category",
"tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
"tma_light_operations_group": "Metrics contributing to tma_light_operations category",
"tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
"tma_load_stlb_miss_group": "Metrics contributing to tma_load_stlb_miss category",
"tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
"tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
"tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
Expand All @@ -138,5 +145,6 @@
"tma_retiring_group": "Metrics contributing to tma_retiring category",
"tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
"tma_store_bound_group": "Metrics contributing to tma_store_bound category",
"tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
"tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category",
"tma_store_stlb_miss_group": "Metrics contributing to tma_store_stlb_miss category"
}
Loading
Loading