Identifier: "SpeedOfLight_HierarchicalTensorRooflineChart" DisplayName: "GPU Speed Of Light Hierarchical Roofline Chart (Tensor Core)" Extends: "SpeedOfLight" Description: "High-level overview of the utilization for compute and memory resources of the GPU presented as a roofline chart." Order: 12 Sets { Identifier: "roofline" } Metrics { Metrics { Label: "L1/TEX peak writeback cycles" Name: "l1tex__lsu_writeback_active.sum.peak_sustained" Filter { MaxArch: CC_72 } } Metrics { Label: "L1/TEX peak local/global writeback cycles" Name: "l1tex__lsu_writeback_active_mem_lg.sum.peak_sustained" Filter { MinArch: CC_75 MaxArch: CC_89 } Options { Name: "l1tex__lsu_writeback_active_mem_lgds.sum.peak_sustained" Filter { MinArch: CC_90 } } } Metrics { Label: "L1/TEX active writeback cycles per second" Name: "l1tex__lsu_writeback_active.sum.per_second" Filter { MaxArch: CC_72 } } Metrics { Label: "L1/TEX active local/global writeback cycles per second" Name: "l1tex__lsu_writeback_active_mem_lg.sum.per_second" Filter { MinArch: CC_75 MaxArch: CC_89 } Options { Name: "l1tex__lsu_writeback_active_mem_lgds.sum.per_second" Filter { MinArch: CC_90 } } } Metrics { Label: "L2 peak writeback cycles" Name: "lts__lts2xbar_cycles_active.sum.peak_sustained" } Metrics { Label: "L2 active writeback cycles per second" Name: "lts__lts2xbar_cycles_active.sum.per_second" } } MetricDefinitions { MetricDefinitions { Name: "derived__l1tex__lsu_writeback_bytes.sum.peak_sustained" Expression: "l1tex__lsu_writeback_active.sum.peak_sustained * 128" Filter { MaxArch: CC_72 } } MetricDefinitions { Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" Expression: "l1tex__lsu_writeback_active_mem_lg.sum.peak_sustained * 128" Filter { MinArch: CC_75 MaxArch: CC_89 } } MetricDefinitions { Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" Expression: "l1tex__lsu_writeback_active_mem_lgds.sum.peak_sustained * 128" Filter { MinArch: CC_90 } } MetricDefinitions { Name: "derived__l1tex__lsu_writeback_bytes.sum.per_second" Expression: "l1tex__lsu_writeback_active.sum.per_second * 128" Filter { MaxArch: CC_72 } } MetricDefinitions { Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" Expression: "l1tex__lsu_writeback_active_mem_lg.sum.per_second * 128" Filter { MinArch: CC_75 MaxArch: CC_89 } } MetricDefinitions { Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" Expression: "l1tex__lsu_writeback_active_mem_lgds.sum.per_second * 128" Filter { MinArch: CC_90 } } MetricDefinitions { Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" Expression: "lts__lts2xbar_cycles_active.sum.peak_sustained * 32" } MetricDefinitions { Name: "derived__lts__lts2xbar_bytes.sum.per_second" Expression: "lts__lts2xbar_cycles_active.sum.per_second * 32" } } Body { DisplayName: "Roofline Tensor Core" # ================================================================================================ # GV100 # ================================================================================================ Items { Filter: { MinArch: CC_70 MaxArch: CC_70 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_fp16_dst_fp16 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp16 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp16 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp32 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32)" } } # -- Work:src_fp16_dst_fp32 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32)" } } # -- Work:src_fp16_dst_fp32 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32)" } } } } Items { Filter: { MinArch: CC_70 MaxArch: CC_70 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:fp16 Dst:fp16" Name: "sm__ops_path_tensor_src_fp16_dst_fp16" } BaseName { Label: "Src:fp16 Dst:fp32" Name: "sm__ops_path_tensor_src_fp16_dst_fp32" } } } } # ================================================================================================ # GV11b # ================================================================================================ Items { Filter: { MinArch: CC_72 MaxArch: CC_72 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_fp16_dst_fp16 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp16 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp32 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32)" } } # -- Work:src_fp16_dst_fp32 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8)" } } } } Items { Filter: { MinArch: CC_72 MaxArch: CC_72 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:fp16 Dst:fp16" Name: "sm__ops_path_tensor_src_fp16_dst_fp16" } BaseName { Label: "Src:fp16 Dst:fp32" Name: "sm__ops_path_tensor_src_fp16_dst_fp32" } BaseName { Label: "Src:int8" Name: "sm__ops_path_tensor_src_int8" } } } } # ================================================================================================ # TU10x # ================================================================================================ Items { Filter: { MinArch: CC_75 MaxArch: CC_75 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_fp16_bf16_tf32_dst_fp32 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16,bf16,tf32 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16,bf16,tf32 Dst:fp32)" } } # -- Work:src_fp16_bf16_tf32_dst_fp32 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16,bf16,tf32 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16,bf16,tf32 Dst:fp32)" } } # -- Work:src_fp16_bf16_tf32_dst_fp32 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16,bf16,tf32 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16,bf16,tf32 Dst:fp32)" } } # -- Work:src_fp16_dst_fp16 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp16 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_fp16_dst_fp16 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16)" } } # -- Work:src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int1)" } } # -- Work:src_int4 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4)" } } # -- Work:src_int4 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4)" } } # -- Work:src_int4 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4)" } } # -- Work:src_int8 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8)" } } # -- Work:src_int8 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8)" } } # -- Work:src_int8 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8)" } } } } Items { Filter: { MinArch: CC_75 MaxArch: CC_75 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:fp16,bf16,tf32 Dst:fp32" Name: "sm__ops_path_tensor_src_fp16_bf16_tf32_dst_fp32" } BaseName { Label: "Src:fp16 Dst:fp16" Name: "sm__ops_path_tensor_src_fp16_dst_fp16" } BaseName { Label: "Src:int1" Name: "sm__ops_path_tensor_src_int1" } BaseName { Label: "Src:int4" Name: "sm__ops_path_tensor_src_int4" } BaseName { Label: "Src:int8" Name: "sm__ops_path_tensor_src_int8" } } } } # ================================================================================================ # GA100 # ================================================================================================ Items { Filter: { MinArch: CC_80 MaxArch: CC_80 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp64 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp64)" } } # -- Work:src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int1)" } } # -- Work:src_int4_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:int8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:int8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:int8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } } } Items { Filter: { MinArch: CC_80 MaxArch: CC_80 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off" } BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp64" Name: "sm__ops_path_tensor_src_fp64" } BaseName { Label: "Src:int1" Name: "sm__ops_path_tensor_src_int1" } BaseName { Label: "Src:int4 Sparsity:off" Name: "sm__ops_path_tensor_src_int4_sparsity_off" } BaseName { Label: "Src:int4 Sparsity:on" Name: "sm__ops_path_tensor_src_int4_sparsity_on" } BaseName { Label: "Src:int8 Sparsity:off" Name: "sm__ops_path_tensor_src_int8_sparsity_off" } BaseName { Label: "Src:int8 Sparsity:on" Name: "sm__ops_path_tensor_src_int8_sparsity_on" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on" } } } } # ================================================================================================ # GA10x # ================================================================================================ Items { Filter: { MinArch: CC_86 MaxArch: CC_86 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp64 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp64)" } } # -- Work:src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int1)" } } # -- Work:src_int4_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_int8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_int8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } } } Items { Filter: { MinArch: CC_86 MaxArch: CC_86 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off" } BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp64" Name: "sm__ops_path_tensor_src_fp64" } BaseName { Label: "Src:int1" Name: "sm__ops_path_tensor_src_int1" } BaseName { Label: "Src:int4 Sparsity:off" Name: "sm__ops_path_tensor_src_int4_sparsity_off" } BaseName { Label: "Src:int4 Sparsity:on" Name: "sm__ops_path_tensor_src_int4_sparsity_on" } BaseName { Label: "Src:int8 Sparsity:off" Name: "sm__ops_path_tensor_src_int8_sparsity_off" } BaseName { Label: "Src:int8 Sparsity:on" Name: "sm__ops_path_tensor_src_int8_sparsity_on" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on" } } } } # ================================================================================================ # GA10y # ================================================================================================ Items { Filter: { MinArch: CC_87 MaxArch: CC_87 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp64 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp64)" } } # -- Work:src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int1)" } } # -- Work:src_int4_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_int8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_int8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "mcc__dram_data_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "mcc__dram_data_bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } } } Items { Filter: { MinArch: CC_87 MaxArch: CC_87 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off" } BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp64" Name: "sm__ops_path_tensor_src_fp64" } BaseName { Label: "Src:int1" Name: "sm__ops_path_tensor_src_int1" } BaseName { Label: "Src:int4 Sparsity:off" Name: "sm__ops_path_tensor_src_int4_sparsity_off" } BaseName { Label: "Src:int4 Sparsity:on" Name: "sm__ops_path_tensor_src_int4_sparsity_on" } BaseName { Label: "Src:int8 Sparsity:off" Name: "sm__ops_path_tensor_src_int8_sparsity_off" } BaseName { Label: "Src:int8 Sparsity:on" Name: "sm__ops_path_tensor_src_int8_sparsity_on" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on" } } } } # ================================================================================================ # AD10x # ================================================================================================ Items { Filter: { MinArch: CC_89 MaxArch: CC_89 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp16_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:src_fp64 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp8 Sparsity:off)" } } # -- Work:src_fp8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp8 Sparsity:off)" } } # -- Work:src_fp8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp8 Sparsity:off)" } } # -- Work:src_fp8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp8 Sparsity:on)" } } # -- Work:src_fp8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp8 Sparsity:on)" } } # -- Work:src_fp8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp8 Sparsity:on)" } } # -- Work:src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int1)" } } # -- Work:src_int4_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:off)" } } # -- Work:src_int4_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int4_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int4 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int4_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int4 Sparsity:on)" } } # -- Work:src_int8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:off)" } } # -- Work:src_int8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_int8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_int8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:src_tf32_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lg.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32 Sparsity:on)" } } } } Items { Filter: { MinArch: CC_89 MaxArch: CC_89 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_off" } BaseName { Label: "Src:bf16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_bf16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp16 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp16_sparsity_on" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_off" } BaseName { Label: "Src:fp16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_fp16_dst_fp32_sparsity_on" } BaseName { Label: "Src:fp64" Name: "sm__ops_path_tensor_src_fp64" } BaseName { Label: "Src:fp8 Sparsity:off" Name: "sm__ops_path_tensor_src_fp8_sparsity_off" } BaseName { Label: "Src:fp8 Sparsity:on" Name: "sm__ops_path_tensor_src_fp8_sparsity_on" } BaseName { Label: "Src:int1" Name: "sm__ops_path_tensor_src_int1" } BaseName { Label: "Src:int4 Sparsity:off" Name: "sm__ops_path_tensor_src_int4_sparsity_off" } BaseName { Label: "Src:int4 Sparsity:on" Name: "sm__ops_path_tensor_src_int4_sparsity_on" } BaseName { Label: "Src:int8 Sparsity:off" Name: "sm__ops_path_tensor_src_int8_sparsity_off" } BaseName { Label: "Src:int8 Sparsity:on" Name: "sm__ops_path_tensor_src_int8_sparsity_on" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_off" } BaseName { Label: "Src:tf32 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_src_tf32_dst_fp32_sparsity_on" } } } } # ================================================================================================ # GH100 # ================================================================================================ Items { Filter: { MinArch: CC_90 MaxArch: CC_90 } RooflineChart { Label: "Tensor Core Operations Roofline" AxisIntensity { Label: "Arithmetic Intensity [OP/byte]" } AxisWork { Label: "Performance [OP/s]" } # -- Work:op_bgmma_src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:bgmma Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:bgmma Src:int1)" } } # -- Work:op_bgmma_src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:bgmma Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:bgmma Src:int1)" } } # -- Work:op_bgmma_src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:bgmma Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bgmma_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:bgmma Src:int1)" } } # -- Work:op_bmma_src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:bmma Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:bmma Src:int1)" } } # -- Work:op_bmma_src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:bmma Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:bmma Src:int1)" } } # -- Work:op_bmma_src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:bmma Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_bmma_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:bmma Src:int1)" } } # -- Work:op_hgmma_src_bf16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hgmma Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hgmma Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hgmma_src_bf16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hgmma Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hgmma Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hgmma_src_bf16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hgmma Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hgmma Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hgmma_src_bf16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hgmma Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hgmma Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hgmma_src_bf16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hgmma Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hgmma Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hgmma_src_bf16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hgmma Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hgmma Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hgmma_src_fp16_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hgmma Src:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hgmma Src:fp16 Sparsity:off)" } } # -- Work:op_hgmma_src_fp16_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hgmma Src:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hgmma Src:fp16 Sparsity:off)" } } # -- Work:op_hgmma_src_fp16_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hgmma Src:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hgmma Src:fp16 Sparsity:off)" } } # -- Work:op_hgmma_src_fp16_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hgmma Src:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hgmma Src:fp16 Sparsity:on)" } } # -- Work:op_hgmma_src_fp16_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hgmma Src:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hgmma Src:fp16 Sparsity:on)" } } # -- Work:op_hgmma_src_fp16_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hgmma Src:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hgmma Src:fp16 Sparsity:on)" } } # -- Work:op_hgmma_src_tf32_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hgmma Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hgmma Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:op_hgmma_src_tf32_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hgmma Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hgmma Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:op_hgmma_src_tf32_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hgmma Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hgmma Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:op_hgmma_src_tf32_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hgmma Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hgmma Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:op_hgmma_src_tf32_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hgmma Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hgmma Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:op_hgmma_src_tf32_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hgmma Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hgmma Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_bf16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_bf16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_bf16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:bf16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:bf16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_bf16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_bf16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_bf16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:bf16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:bf16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_fp16_dst_fp16_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:op_hmma_src_fp16_dst_fp16_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:op_hmma_src_fp16_dst_fp16_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:fp16 Dst:fp16 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:fp16 Dst:fp16 Sparsity:off)" } } # -- Work:op_hmma_src_fp16_dst_fp16_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:op_hmma_src_fp16_dst_fp16_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:op_hmma_src_fp16_dst_fp16_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:fp16 Dst:fp16 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:fp16 Dst:fp16 Sparsity:on)" } } # -- Work:op_hmma_src_fp16_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_fp16_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_fp16_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:fp16 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:fp16 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_fp16_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_fp16_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_fp16_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:fp16 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:fp16 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_tf32_dst_fp32_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_tf32_dst_fp32_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_tf32_dst_fp32_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:tf32 Dst:fp32 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:tf32 Dst:fp32 Sparsity:off)" } } # -- Work:op_hmma_src_tf32_dst_fp32_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:hmma Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:hmma Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_tf32_dst_fp32_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:hmma Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:hmma Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:op_hmma_src_tf32_dst_fp32_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:hmma Src:tf32 Dst:fp32 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:hmma Src:tf32 Dst:fp32 Sparsity:on)" } } # -- Work:op_igmma_src_int8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:igmma Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:igmma Src:int8 Sparsity:off)" } } # -- Work:op_igmma_src_int8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:igmma Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:igmma Src:int8 Sparsity:off)" } } # -- Work:op_igmma_src_int8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:igmma Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:igmma Src:int8 Sparsity:off)" } } # -- Work:op_igmma_src_int8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:igmma Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:igmma Src:int8 Sparsity:on)" } } # -- Work:op_igmma_src_int8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:igmma Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:igmma Src:int8 Sparsity:on)" } } # -- Work:op_igmma_src_int8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:igmma Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:igmma Src:int8 Sparsity:on)" } } # -- Work:op_imma_src_int8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:imma Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:imma Src:int8 Sparsity:off)" } } # -- Work:op_imma_src_int8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:imma Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:imma Src:int8 Sparsity:off)" } } # -- Work:op_imma_src_int8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:imma Src:int8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:imma Src:int8 Sparsity:off)" } } # -- Work:op_imma_src_int8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Op:imma Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Op:imma Src:int8 Sparsity:on)" } } # -- Work:op_imma_src_int8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Op:imma Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Op:imma Src:int8 Sparsity:on)" } } # -- Work:op_imma_src_int8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Op:imma Src:int8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Op:imma Src:int8 Sparsity:on)" } } # -- Work:src_bf16_dst_fp32 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:bf16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:bf16 Dst:fp32)" } } # -- Work:src_bf16_dst_fp32 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:bf16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:bf16 Dst:fp32)" } } # -- Work:src_bf16_dst_fp32 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:bf16 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_bf16_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:bf16 Dst:fp32)" } } # -- Work:src_fp16 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp16)" } } # -- Work:src_fp16 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp16)" } } # -- Work:src_fp16 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp16.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp16)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp16.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp16)" } } # -- Work:src_fp64 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp64 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp64.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp64)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp64.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp64)" } } # -- Work:src_fp8_sparsity_off Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp8 Sparsity:off)" } } # -- Work:src_fp8_sparsity_off Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp8 Sparsity:off)" } } # -- Work:src_fp8_sparsity_off Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp8 Sparsity:off)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_off.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp8 Sparsity:off)" } } # -- Work:src_fp8_sparsity_on Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:fp8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:fp8 Sparsity:on)" } } # -- Work:src_fp8_sparsity_on Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:fp8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:fp8 Sparsity:on)" } } # -- Work:src_fp8_sparsity_on Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:fp8 Sparsity:on)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_fp8_sparsity_on.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:fp8 Sparsity:on)" } } # -- Work:src_int1 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int1)" } } # -- Work:src_int1 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int1.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int1)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int1.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int1)" } } # -- Work:src_int8 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:int8)" } } # -- Work:src_int8 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:int8)" } } # -- Work:src_int8 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_int8.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:int8)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_int8.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:int8)" } } # -- Work:src_tf32_dst_fp32 Traffic: DRAM ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical DRAM Bytes Accessible" Name: "dram__bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "DRAM Frequency" Name: "dram__cycles_elapsed.avg.per_second" } } } Options { Label: "DRAM Roofline (Src:tf32 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "DRAM Bandwidth" Name: "dram__bytes.sum.per_second" } } Options { Label: "DRAM Achieved Value (Src:tf32 Dst:fp32)" } } # -- Work:src_tf32_dst_fp32 Traffic: L2 Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L2 Cache Bytes Accessible" Name: "derived__lts__lts2xbar_bytes.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L2 Cache Frequency" Name: "lts__cycles_elapsed.avg.per_second" } } } Options { Label: "L2 Cache Roofline (Src:tf32 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L2 Cache Bandwidth" Name: "derived__lts__lts2xbar_bytes.sum.per_second" } } Options { Label: "L2 Cache Achieved Value (Src:tf32 Dst:fp32)" } } # -- Work:src_tf32_dst_fp32 Traffic: L1/TEX Cache ----------------------------------------- Rooflines { PeakWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical Tensor Operations" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.peak_sustained" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } PeakTraffic { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Theoretical L1/TEX Cache Bytes Accessible" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.peak_sustained" } CyclesPerSecondMetric { Label: "L1/TEX Cache Frequency" Name: "l1tex__cycles_elapsed.avg.per_second" } } } Options { Label: "L1/TEX Cache Roofline (Src:tf32 Dst:fp32)" ShowOnlyIfNotZero { Metric { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.per_cycle_elapsed" } } } } AchievedValues { AchievedWork { ValueCyclesPerSecondExpression { ValuePerCycleMetrics { Label: "Tensor Operations Per Cycle" Name: "sm__ops_path_tensor_src_tf32_dst_fp32.sum.per_cycle_elapsed" } CyclesPerSecondMetric { Label: "SM Frequency" Name: "sm__cycles_elapsed.avg.per_second" } } } AchievedTraffic { Metric { Label: "L1/TEX Cache Bandwidth" Name: "derived__l1tex__lsu_writeback_bytes_mem_lgds.sum.per_second" } } Options { Label: "L1/TEX Cache Achieved Value (Src:tf32 Dst:fp32)" } } } } Items { Filter: { MinArch: CC_90 MaxArch: CC_90 } SuffixTable: { Suffixes { Suffix { Label: "# Operations" Name: ".sum" } Suffix { Label: "# Operations / Cycle" Name: ".sum.per_cycle_elapsed" } Suffix { Label: "# Operations / s" Name: ".sum.per_second" } Suffix { Label: "Peak %" Name: ".sum.pct_of_peak_sustained_elapsed" } Suffix { Label: "Peak Operations / Cycle" Name: ".sum.peak_sustained" } Suffix { Label: "Peak Operations / s" Name: ".sum.peak_sustained_elapsed.per_second" } } BaseNames { BaseName { Label: "Op:bgmma Src:int1" Name: "sm__ops_path_tensor_op_bgmma_src_int1" } BaseName { Label: "Op:bmma Src:int1" Name: "sm__ops_path_tensor_op_bmma_src_int1" } BaseName { Label: "Op:hgmma Src:bf16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_off" } BaseName { Label: "Op:hgmma Src:bf16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_op_hgmma_src_bf16_dst_fp32_sparsity_on" } BaseName { Label: "Op:hgmma Src:fp16 Sparsity:off" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_off" } BaseName { Label: "Op:hgmma Src:fp16 Sparsity:on" Name: "sm__ops_path_tensor_op_hgmma_src_fp16_sparsity_on" } BaseName { Label: "Op:hgmma Src:tf32 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_off" } BaseName { Label: "Op:hgmma Src:tf32 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_op_hgmma_src_tf32_dst_fp32_sparsity_on" } BaseName { Label: "Op:hmma Src:bf16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_off" } BaseName { Label: "Op:hmma Src:bf16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_op_hmma_src_bf16_dst_fp32_sparsity_on" } BaseName { Label: "Op:hmma Src:fp16 Dst:fp16 Sparsity:off" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_off" } BaseName { Label: "Op:hmma Src:fp16 Dst:fp16 Sparsity:on" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp16_sparsity_on" } BaseName { Label: "Op:hmma Src:fp16 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_off" } BaseName { Label: "Op:hmma Src:fp16 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_op_hmma_src_fp16_dst_fp32_sparsity_on" } BaseName { Label: "Op:hmma Src:tf32 Dst:fp32 Sparsity:off" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_off" } BaseName { Label: "Op:hmma Src:tf32 Dst:fp32 Sparsity:on" Name: "sm__ops_path_tensor_op_hmma_src_tf32_dst_fp32_sparsity_on" } BaseName { Label: "Op:igmma Src:int8 Sparsity:off" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_off" } BaseName { Label: "Op:igmma Src:int8 Sparsity:on" Name: "sm__ops_path_tensor_op_igmma_src_int8_sparsity_on" } BaseName { Label: "Op:imma Src:int8 Sparsity:off" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_off" } BaseName { Label: "Op:imma Src:int8 Sparsity:on" Name: "sm__ops_path_tensor_op_imma_src_int8_sparsity_on" } BaseName { Label: "Src:bf16 Dst:fp32" Name: "sm__ops_path_tensor_src_bf16_dst_fp32" } BaseName { Label: "Src:fp16" Name: "sm__ops_path_tensor_src_fp16" } BaseName { Label: "Src:fp64" Name: "sm__ops_path_tensor_src_fp64" } BaseName { Label: "Src:fp8 Sparsity:off" Name: "sm__ops_path_tensor_src_fp8_sparsity_off" } BaseName { Label: "Src:fp8 Sparsity:on" Name: "sm__ops_path_tensor_src_fp8_sparsity_on" } BaseName { Label: "Src:int1" Name: "sm__ops_path_tensor_src_int1" } BaseName { Label: "Src:int8" Name: "sm__ops_path_tensor_src_int8" } BaseName { Label: "Src:tf32 Dst:fp32" Name: "sm__ops_path_tensor_src_tf32_dst_fp32" } } } } }