AMD-AGI · ajassani · May 26, 2025 · May 26, 2025
diff --git a/examples/custom_workflows/roofline_analyzer/.gitignore b/examples/custom_workflows/roofline_analyzer/.gitignore
@@ -0,0 +1,4 @@
+**build**
+**.xlsx
+**egg-info**
+**venv**
diff --git a/examples/custom_workflows/roofline_analyzer/README.md b/examples/custom_workflows/roofline_analyzer/README.md
@@ -0,0 +1,117 @@
+# Roofline Analysis Tool
+
+A functional programming-oriented Python application for performing roofline analysis on GEMM kernels. This tool ingests Excel files containing kernel performance data, performs calculations to determine memory and compute bounds, and exports the results with visualizations.
+
+## Overview
+
+The Roofline Model is a visually intuitive performance model used to provide performance estimates of applications running on multicore, manycore, or accelerator processor architectures. This tool helps analyze the performance of GEMM (General Matrix Multiplication) kernels by:
+
+1. Reading kernel performance data from Excel files
+2. Calculating roofline model parameters
+3. Determining if kernels are memory or compute bound
+4. Visualizing the results with a roofline plot
+5. Exporting the analysis to a new Excel file
+
+## Installation
+
+### Requirements
+
+- Python 3.8 or higher
+- Dependencies listed in pyproject.toml
+
+### Installation Steps
+
+1. Clone this repository:
+```bash
+git clone <repository-url>
+cd path/to/roofline-analysis
+```
+2. Install the package
+```bash
+pip install -e .
+```
+## Configuration
+The tool requires a configuration file in TOML format. Create a config.toml file with the following sections:
+```toml
+[accelerator]
+max_memory_bandwidth = 1.5  # TB/s
+max_compute_teraflops = 19.5  # TFLOPS
+max_achievable_teraflops = 16.0  # TFLOPS
+
+[excel]
+sheet_name = "gemm"
+flops_per_byte_column = "FLOPS/byte"
+performance_column = "Non-Data-Mov TFLOPS/s_mean"
+
+[output]
+prefix = "export-roofline"
+```
+
+### Configuration Options
+
+* **accelerator**: Hardware parameters
+    * **max_memory_bandwidth**: Maximum memory bandwidth in TB/s
+    * **max_compute_teraflops**: Maximum theoretical compute throughput in TFLOPS
+    * **max_achievable_teraflops**: Maximum achievable compute throughput in TFLOPS
+* **excel**: Excel file configuration
+    * **sheet_name**: The name of the worksheet containing kernel data
+    * **flops_per_byte_column**: Column name containing arithmetic intensity values
+    * **performance_column**: Column name containing kernel performance values
+* **output**: Output configuration
+    * **prefix**: Prefix for the output Excel file name
+
+## Usage
+Basic Command
+```bash
+python main.py <excel-file> <config-file>
+```
+Command-line Options
+```bash
+Options:
+  --plot-output PATH  Path to save a separate copy of the roofline plot
+  --skip-plot         Skip generating the roofline plot
+  --help              Show this message and exit.
+```
+IRL Example
+```bash
+python main.py mi300x_013_profile_output_5_steps_step_10_performance_report.xlsx config.toml
+```
+### Input Requirements
+The Excel file should contain a worksheet (default: "gemm") with at least the following columns:
+
+1. A column for arithmetic intensity (FLOPS/byte)
+2. A column for kernel performance (TFLOPS/s)
+
+The exact column names are specified in the config.toml file.
+
+### Output Description
+The tool generates an Excel file with the following content:
+
+1. {sheet_name}_analyzed: The original data with additional calculated columns:
+    * **kernel_memory_roofline**: Memory bandwidth limit for each kernel
+    * **bound_type_maf**: Whether the kernel is "memory" or "compute" bound
+* **bound_distance**: Distance to the nearest roofline
+* **bound_distance_pct**: Percentage distance to the nearest roofline
+2. **{sheet_name}_original**: A copy of the original data
+
+3. **ScalarValues**: Key calculated values including:
+    * **AI_ridge_mtf**: The arithmetic intensity ridge point based on max theoretical FLOPS
+    * **AI_ridge_maf**: The arithmetic intensity ridge point based on max achievable FLOPS
+4. **RooflinePlot**: A visual representation of the roofline model with:
+    * Memory bandwidth roofline
+    * Max theoretical compute roofline
+    * Max achievable compute roofline
+    * Kernel data points
+
+## Project Structure
+The project follows a functional programming approach with the following structure:
+
+* **main.py**: The main entry point
+* **src/cli.py**: Command line interface and config handling
+* **src/xlsx.py**: Excel file processing functions
+* **src/roofline.py**: Roofline analysis calculations
+* **pyproject.toml**: Project metadata and dependencies
+* **config.toml**: Example configuration
+
+## License
+Copyright AMD 2025.
diff --git a/examples/custom_workflows/roofline_analyzer/main.py b/examples/custom_workflows/roofline_analyzer/main.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+import typer
+from pathlib import Path
+from typing import Optional
+
+from cli import read_toml_config, display_summary
+from xlsx import read_xlsx, add_analysis_columns, export_to_xlsx
+from roofline import calculate_ridge_points, calculate_rooflines, plot_roofline
+
+app = typer.Typer(help="Roofline Analysis Tool")
+
+@app.command()
+def analyze(
+    excel_file: Path = typer.Argument(..., help="Path to the Excel file containing kernel data"),
+    config_file: Path = typer.Argument(..., help="Path to the TOML config file"),
+    plot_output: Optional[Path] = typer.Option(None, help="Path to save a separate copy of the roofline plot"),
+    skip_plot: bool = typer.Option(False, help="Skip generating the roofline plot")
+):
+    """
+    Analyze kernel performance data using the roofline model.
+    """
+    try:
+        # Read configuration
+        typer.echo(f"Reading configuration from {config_file}")
+        config = read_toml_config(str(config_file))
+
+        # Read Excel data
+        typer.echo(f"Reading data from {excel_file}")
+        df = read_xlsx(str(excel_file), config)
+
+        # Save a copy of the original data
+        df_orig = df.copy()
+
+        # Calculate ridge points and rooflines
+        typer.echo("Calculating roofline model parameters")
+        ai_ridge_mtf, ai_ridge_maf = calculate_ridge_points(config)
+        rooflines = calculate_rooflines(config)
+
+        # Add analysis columns
+        typer.echo("Analyzing kernel performance")
+        df_result = add_analysis_columns(df, config, ai_ridge_mtf, ai_ridge_maf)
+
+        # Generate roofline plot
+        plot_fig = None
+        if not skip_plot:
+            typer.echo("Generating roofline plot")
+            plot_fig = plot_roofline(df_result, rooflines, config, ai_ridge_mtf, ai_ridge_maf, 
+                                     str(plot_output) if plot_output else None)
+
+        # Export results
+        typer.echo("Exporting results to Excel (including plot and original data)")
+        output_file = export_to_xlsx(df_result, df_orig, config, ai_ridge_mtf, ai_ridge_maf, plot_fig)
+
+        # Display summary
+        display_summary(df_orig, df_result, output_file, config, ai_ridge_mtf, ai_ridge_maf)
+
+    except Exception as e:
+        typer.echo(f"Error: {e}", err=True)
+        raise typer.Exit(code=1)
+
+if __name__ == "__main__":
+    app()
diff --git a/examples/custom_workflows/roofline_analyzer/mi300x_bfloat16.toml b/examples/custom_workflows/roofline_analyzer/mi300x_bfloat16.toml
@@ -0,0 +1,18 @@
+[accelerator]
+max_memory_bandwidth = 5.3  # TB/s
+# MI300X Peak memory bandwidth
+# https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html
+max_compute_teraflops = 1300  # TFLOPS
+# MI300X Peak bfloat16
+# https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html
+max_achievable_teraflops = 708  # TFLOPS
+# MI300X MAF bfloat16
+# https://rocm.blogs.amd.com/software-tools-optimization/measuring-max-achievable-flops-part2/README.html
+
+[excel]
+sheet_name = "gemm"
+flops_per_byte_column = "FLOPS/Byte_first"
+performance_column = "Non-Data-Mov TFLOPS/s_mean"
+
+[output]
+prefix = "export-roofline"
diff --git a/examples/custom_workflows/roofline_analyzer/pyproject.toml b/examples/custom_workflows/roofline_analyzer/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "roofline_analyzer"
+version = "0.1.1"
+description = "A functional programming oriented roofline analysis tool"
+readme = "README.md"
+authors = [
+    {name = "Tyko Niemi", email = "tyko.niemi@amd.com"}
+]
+requires-python = ">=3.8"
+dependencies = [
+    "pandas>=1.3.0",
+    "openpyxl>=3.0.9",
+    "matplotlib>=3.4.0",
+    "numpy>=1.20.0",
+    "tomli>=2.0.0",
+    "typer>=0.4.0",
+    "rich>=10.0.0",
+]
+
+[project.scripts]
+roofline-analyze = "roofline_analyzer.main:app"
diff --git a/examples/custom_workflows/roofline_analyzer/src/cli.py b/examples/custom_workflows/roofline_analyzer/src/cli.py
@@ -0,0 +1,92 @@
+import typer
+import tomli
+from pathlib import Path
+from typing import Dict, Any, Optional
+from rich.console import Console
+from rich.table import Table
+
+app = typer.Typer(help="Roofline Analysis Tool")
+console = Console()
+
+def read_toml_config(config_path: str) -> Dict[str, Any]:
+    """
+    Read and parse the TOML configuration file.
+
+    Args:
+        config_path: Path to the TOML config file
+
+    Returns:
+        Dictionary with configuration values
+    """
+    try:
+        with open(config_path, "rb") as f:
+            config = tomli.load(f)
+
+        # Validate required config sections and keys
+        required_sections = {
+            "accelerator": ["max_memory_bandwidth", "max_compute_teraflops", "max_achievable_teraflops"],
+            "excel": ["sheet_name", "flops_per_byte_column", "performance_column"],
+            "output": ["prefix"]
+        }
+
+        for section, keys in required_sections.items():
+            if section not in config:
+                raise ValueError(f"Missing required section in config: {section}")
+
+            for key in keys:
+                if key not in config[section]:
+                    raise ValueError(f"Missing required key in config[{section}]: {key}")
+
+        return config
+    except Exception as e:
+        raise RuntimeError(f"Error reading config file: {e}")
+
+def display_summary(
+    df_orig: Any, 
+    df_result: Any, 
+    output_file: str,
+    config: Dict[str, Any],
+    ai_ridge_mtf: float,
+    ai_ridge_maf: float
+) -> None:
+    """
+    Display a summary of the analysis.
+
+    Args:
+        df_orig: Original DataFrame
+        df_result: Resulting DataFrame after analysis
+        output_file: Path to the output Excel file
+        config: Configuration dictionary
+        ai_ridge_mtf: The arithmetic intensity ridge point for max theoretical flops
+        ai_ridge_maf: The arithmetic intensity ridge point for max achievable flops
+    """
+    console.print(f"\n[bold green]Roofline Analysis Complete[/bold green]")
+
+    # Hardware information
+    hw_table = Table(title="Hardware Configuration")
+    hw_table.add_column("Parameter", style="cyan")
+    hw_table.add_column("Value", style="green")
+
+    hw_table.add_row("Max Memory Bandwidth", f"{config['accelerator']['max_memory_bandwidth']} TB/s")
+    hw_table.add_row("Max Compute", f"{config['accelerator']['max_compute_teraflops']} TFLOPS")
+    hw_table.add_row("Max Achievable Compute", f"{config['accelerator']['max_achievable_teraflops']} TFLOPS")
+
+    console.print(hw_table)
+
+    # Analysis information
+    analysis_table = Table(title="Analysis Results")
+    analysis_table.add_column("Parameter", style="cyan")
+    analysis_table.add_column("Value", style="green")
+
+    analysis_table.add_row("Kernels Analyzed", str(len(df_orig)))
+    analysis_table.add_row("Compute Bound Kernels", 
+                          str(len(df_result[df_result['bound_type_maf'] == 'compute'])))
+    analysis_table.add_row("Memory Bound Kernels", 
+                          str(len(df_result[df_result['bound_type_maf'] == 'memory'])))
+    analysis_table.add_row("AI Ridge MTF", f"{ai_ridge_mtf:.4f}")
+    analysis_table.add_row("AI Ridge MAF", f"{ai_ridge_maf:.4f}")
+
+    console.print(analysis_table)
+
+    # Output information
+    console.print(f"\n[bold]Output file:[/bold] {output_file}")