diff --git a/examples/custom_workflows/roofline_analyzer/.gitignore b/examples/custom_workflows/roofline_analyzer/.gitignore new file mode 100644 index 00000000..17d3af3a --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/.gitignore @@ -0,0 +1,4 @@ +**build** +**.xlsx +**egg-info** +**venv** \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/README.md b/examples/custom_workflows/roofline_analyzer/README.md new file mode 100644 index 00000000..bd4c732b --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/README.md @@ -0,0 +1,117 @@ +# Roofline Analysis Tool + +A functional programming-oriented Python application for performing roofline analysis on GEMM kernels. This tool ingests Excel files containing kernel performance data, performs calculations to determine memory and compute bounds, and exports the results with visualizations. + +## Overview + +The Roofline Model is a visually intuitive performance model used to provide performance estimates of applications running on multicore, manycore, or accelerator processor architectures. This tool helps analyze the performance of GEMM (General Matrix Multiplication) kernels by: + +1. Reading kernel performance data from Excel files +2. Calculating roofline model parameters +3. Determining if kernels are memory or compute bound +4. Visualizing the results with a roofline plot +5. Exporting the analysis to a new Excel file + +## Installation + +### Requirements + +- Python 3.8 or higher +- Dependencies listed in pyproject.toml + +### Installation Steps + +1. Clone this repository: +```bash +git clone +cd path/to/roofline-analysis +``` +2. Install the package +```bash +pip install -e . +``` +## Configuration +The tool requires a configuration file in TOML format. Create a config.toml file with the following sections: +```toml +[accelerator] +max_memory_bandwidth = 1.5 # TB/s +max_compute_teraflops = 19.5 # TFLOPS +max_achievable_teraflops = 16.0 # TFLOPS + +[excel] +sheet_name = "gemm" +flops_per_byte_column = "FLOPS/byte" +performance_column = "Non-Data-Mov TFLOPS/s_mean" + +[output] +prefix = "export-roofline" +``` + +### Configuration Options + +* **accelerator**: Hardware parameters + * **max_memory_bandwidth**: Maximum memory bandwidth in TB/s + * **max_compute_teraflops**: Maximum theoretical compute throughput in TFLOPS + * **max_achievable_teraflops**: Maximum achievable compute throughput in TFLOPS +* **excel**: Excel file configuration + * **sheet_name**: The name of the worksheet containing kernel data + * **flops_per_byte_column**: Column name containing arithmetic intensity values + * **performance_column**: Column name containing kernel performance values +* **output**: Output configuration + * **prefix**: Prefix for the output Excel file name + +## Usage +Basic Command +```bash +python main.py +``` +Command-line Options +```bash +Options: + --plot-output PATH Path to save a separate copy of the roofline plot + --skip-plot Skip generating the roofline plot + --help Show this message and exit. +``` +IRL Example +```bash +python main.py mi300x_013_profile_output_5_steps_step_10_performance_report.xlsx config.toml +``` +### Input Requirements +The Excel file should contain a worksheet (default: "gemm") with at least the following columns: + +1. A column for arithmetic intensity (FLOPS/byte) +2. A column for kernel performance (TFLOPS/s) + +The exact column names are specified in the config.toml file. + +### Output Description +The tool generates an Excel file with the following content: + +1. {sheet_name}_analyzed: The original data with additional calculated columns: + * **kernel_memory_roofline**: Memory bandwidth limit for each kernel + * **bound_type_maf**: Whether the kernel is "memory" or "compute" bound +* **bound_distance**: Distance to the nearest roofline +* **bound_distance_pct**: Percentage distance to the nearest roofline +2. **{sheet_name}_original**: A copy of the original data + +3. **ScalarValues**: Key calculated values including: + * **AI_ridge_mtf**: The arithmetic intensity ridge point based on max theoretical FLOPS + * **AI_ridge_maf**: The arithmetic intensity ridge point based on max achievable FLOPS +4. **RooflinePlot**: A visual representation of the roofline model with: + * Memory bandwidth roofline + * Max theoretical compute roofline + * Max achievable compute roofline + * Kernel data points + +## Project Structure +The project follows a functional programming approach with the following structure: + +* **main.py**: The main entry point +* **src/cli.py**: Command line interface and config handling +* **src/xlsx.py**: Excel file processing functions +* **src/roofline.py**: Roofline analysis calculations +* **pyproject.toml**: Project metadata and dependencies +* **config.toml**: Example configuration + +## License +Copyright AMD 2025. \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/main.py b/examples/custom_workflows/roofline_analyzer/main.py new file mode 100644 index 00000000..f3408ad0 --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import typer +from pathlib import Path +from typing import Optional + +from cli import read_toml_config, display_summary +from xlsx import read_xlsx, add_analysis_columns, export_to_xlsx +from roofline import calculate_ridge_points, calculate_rooflines, plot_roofline + +app = typer.Typer(help="Roofline Analysis Tool") + +@app.command() +def analyze( + excel_file: Path = typer.Argument(..., help="Path to the Excel file containing kernel data"), + config_file: Path = typer.Argument(..., help="Path to the TOML config file"), + plot_output: Optional[Path] = typer.Option(None, help="Path to save a separate copy of the roofline plot"), + skip_plot: bool = typer.Option(False, help="Skip generating the roofline plot") +): + """ + Analyze kernel performance data using the roofline model. + """ + try: + # Read configuration + typer.echo(f"Reading configuration from {config_file}") + config = read_toml_config(str(config_file)) + + # Read Excel data + typer.echo(f"Reading data from {excel_file}") + df = read_xlsx(str(excel_file), config) + + # Save a copy of the original data + df_orig = df.copy() + + # Calculate ridge points and rooflines + typer.echo("Calculating roofline model parameters") + ai_ridge_mtf, ai_ridge_maf = calculate_ridge_points(config) + rooflines = calculate_rooflines(config) + + # Add analysis columns + typer.echo("Analyzing kernel performance") + df_result = add_analysis_columns(df, config, ai_ridge_mtf, ai_ridge_maf) + + # Generate roofline plot + plot_fig = None + if not skip_plot: + typer.echo("Generating roofline plot") + plot_fig = plot_roofline(df_result, rooflines, config, ai_ridge_mtf, ai_ridge_maf, + str(plot_output) if plot_output else None) + + # Export results + typer.echo("Exporting results to Excel (including plot and original data)") + output_file = export_to_xlsx(df_result, df_orig, config, ai_ridge_mtf, ai_ridge_maf, plot_fig) + + # Display summary + display_summary(df_orig, df_result, output_file, config, ai_ridge_mtf, ai_ridge_maf) + + except Exception as e: + typer.echo(f"Error: {e}", err=True) + raise typer.Exit(code=1) + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/mi300x_bfloat16.toml b/examples/custom_workflows/roofline_analyzer/mi300x_bfloat16.toml new file mode 100644 index 00000000..8cc17900 --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/mi300x_bfloat16.toml @@ -0,0 +1,18 @@ +[accelerator] +max_memory_bandwidth = 5.3 # TB/s +# MI300X Peak memory bandwidth +# https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html +max_compute_teraflops = 1300 # TFLOPS +# MI300X Peak bfloat16 +# https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html +max_achievable_teraflops = 708 # TFLOPS +# MI300X MAF bfloat16 +# https://rocm.blogs.amd.com/software-tools-optimization/measuring-max-achievable-flops-part2/README.html + +[excel] +sheet_name = "gemm" +flops_per_byte_column = "FLOPS/Byte_first" +performance_column = "Non-Data-Mov TFLOPS/s_mean" + +[output] +prefix = "export-roofline" \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/pyproject.toml b/examples/custom_workflows/roofline_analyzer/pyproject.toml new file mode 100644 index 00000000..d4fe588b --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "roofline_analyzer" +version = "0.1.1" +description = "A functional programming oriented roofline analysis tool" +readme = "README.md" +authors = [ + {name = "Tyko Niemi", email = "tyko.niemi@amd.com"} +] +requires-python = ">=3.8" +dependencies = [ + "pandas>=1.3.0", + "openpyxl>=3.0.9", + "matplotlib>=3.4.0", + "numpy>=1.20.0", + "tomli>=2.0.0", + "typer>=0.4.0", + "rich>=10.0.0", +] + +[project.scripts] +roofline-analyze = "roofline_analyzer.main:app" \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/src/cli.py b/examples/custom_workflows/roofline_analyzer/src/cli.py new file mode 100644 index 00000000..2a5f052b --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/src/cli.py @@ -0,0 +1,92 @@ +import typer +import tomli +from pathlib import Path +from typing import Dict, Any, Optional +from rich.console import Console +from rich.table import Table + +app = typer.Typer(help="Roofline Analysis Tool") +console = Console() + +def read_toml_config(config_path: str) -> Dict[str, Any]: + """ + Read and parse the TOML configuration file. + + Args: + config_path: Path to the TOML config file + + Returns: + Dictionary with configuration values + """ + try: + with open(config_path, "rb") as f: + config = tomli.load(f) + + # Validate required config sections and keys + required_sections = { + "accelerator": ["max_memory_bandwidth", "max_compute_teraflops", "max_achievable_teraflops"], + "excel": ["sheet_name", "flops_per_byte_column", "performance_column"], + "output": ["prefix"] + } + + for section, keys in required_sections.items(): + if section not in config: + raise ValueError(f"Missing required section in config: {section}") + + for key in keys: + if key not in config[section]: + raise ValueError(f"Missing required key in config[{section}]: {key}") + + return config + except Exception as e: + raise RuntimeError(f"Error reading config file: {e}") + +def display_summary( + df_orig: Any, + df_result: Any, + output_file: str, + config: Dict[str, Any], + ai_ridge_mtf: float, + ai_ridge_maf: float +) -> None: + """ + Display a summary of the analysis. + + Args: + df_orig: Original DataFrame + df_result: Resulting DataFrame after analysis + output_file: Path to the output Excel file + config: Configuration dictionary + ai_ridge_mtf: The arithmetic intensity ridge point for max theoretical flops + ai_ridge_maf: The arithmetic intensity ridge point for max achievable flops + """ + console.print(f"\n[bold green]Roofline Analysis Complete[/bold green]") + + # Hardware information + hw_table = Table(title="Hardware Configuration") + hw_table.add_column("Parameter", style="cyan") + hw_table.add_column("Value", style="green") + + hw_table.add_row("Max Memory Bandwidth", f"{config['accelerator']['max_memory_bandwidth']} TB/s") + hw_table.add_row("Max Compute", f"{config['accelerator']['max_compute_teraflops']} TFLOPS") + hw_table.add_row("Max Achievable Compute", f"{config['accelerator']['max_achievable_teraflops']} TFLOPS") + + console.print(hw_table) + + # Analysis information + analysis_table = Table(title="Analysis Results") + analysis_table.add_column("Parameter", style="cyan") + analysis_table.add_column("Value", style="green") + + analysis_table.add_row("Kernels Analyzed", str(len(df_orig))) + analysis_table.add_row("Compute Bound Kernels", + str(len(df_result[df_result['bound_type_maf'] == 'compute']))) + analysis_table.add_row("Memory Bound Kernels", + str(len(df_result[df_result['bound_type_maf'] == 'memory']))) + analysis_table.add_row("AI Ridge MTF", f"{ai_ridge_mtf:.4f}") + analysis_table.add_row("AI Ridge MAF", f"{ai_ridge_maf:.4f}") + + console.print(analysis_table) + + # Output information + console.print(f"\n[bold]Output file:[/bold] {output_file}") \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/src/roofline.py b/examples/custom_workflows/roofline_analyzer/src/roofline.py new file mode 100644 index 00000000..61afdcfa --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/src/roofline.py @@ -0,0 +1,166 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from typing import Dict, Any, Tuple, List +from io import BytesIO + +def calculate_ridge_points(config: Dict[str, Any]) -> Tuple[float, float]: + """ + Calculate the arithmetic intensity ridge points. + + Args: + config: Configuration dictionary + + Returns: + Tuple of (AI_ridge_mtf, AI_ridge_maf) + """ + max_memory_bandwidth = config['accelerator']['max_memory_bandwidth'] + max_compute_teraflops = config['accelerator']['max_compute_teraflops'] + max_achievable_teraflops = config['accelerator']['max_achievable_teraflops'] + + # Ridge point = Peak Compute / Memory Bandwidth + ai_ridge_mtf = max_compute_teraflops / max_memory_bandwidth + ai_ridge_maf = max_achievable_teraflops / max_memory_bandwidth + + return ai_ridge_mtf, ai_ridge_maf + +def calculate_rooflines(config: Dict[str, Any]) -> Dict[str, np.ndarray]: + """ + Calculate the roofline model values. + + Args: + config: Configuration dictionary + + Returns: + Dictionary with x-values and y-values for each roofline + """ + max_memory_bandwidth = config['accelerator']['max_memory_bandwidth'] + max_compute_teraflops = config['accelerator']['max_compute_teraflops'] + max_achievable_teraflops = config['accelerator']['max_achievable_teraflops'] + + # Create x-values (operational intensity) range + x_values = np.logspace(-2, 3, 1000) # From 0.01 to 1000 FLOPS/byte + + # Calculate memory roofline: y = x * bandwidth + memory_roof = x_values * max_memory_bandwidth + + # Calculate compute rooflines (horizontal lines) + theoretical_roof = np.full_like(x_values, max_compute_teraflops) + achievable_roof = np.full_like(x_values, max_achievable_teraflops) + + # Create the combined rooflines (min of memory and compute) + combined_theoretical = np.minimum(memory_roof, theoretical_roof) + combined_achievable = np.minimum(memory_roof, achievable_roof) + + return { + 'x_values': x_values, + 'memory_roof': memory_roof, + 'theoretical_roof': theoretical_roof, + 'achievable_roof': achievable_roof, + 'combined_theoretical': combined_theoretical, + 'combined_achievable': combined_achievable + } + +def create_roofline_plot( + df: pd.DataFrame, + rooflines: Dict[str, np.ndarray], + config: Dict[str, Any], + ai_ridge_mtf: float, + ai_ridge_maf: float +) -> plt.Figure: + """ + Generate the roofline plot with kernel data points. + + Args: + df: DataFrame with kernel data + rooflines: Dictionary with roofline data + config: Configuration dictionary + ai_ridge_mtf: The arithmetic intensity ridge point for max theoretical flops + ai_ridge_maf: The arithmetic intensity ridge point for max achievable flops + + Returns: + Matplotlib figure object with the roofline plot + """ + # Get column names from config + flops_per_byte_col = config['excel']['flops_per_byte_column'] + performance_col = config['excel']['performance_column'] + kernel_name_col = config['excel'].get('kernel_name_column', None) + + # Create figure and axes + fig = plt.figure(figsize=(12, 8)) + ax = fig.add_subplot(111) + + # Plot rooflines + ax.loglog(rooflines['x_values'], rooflines['memory_roof'], 'b-', + label=f"Memory Bandwidth ({config['accelerator']['max_memory_bandwidth']} TB/s)") + + ax.loglog(rooflines['x_values'], rooflines['theoretical_roof'], 'r-', + label=f"Max Theoretical ({config['accelerator']['max_compute_teraflops']} TFLOPS)") + + ax.loglog(rooflines['x_values'], rooflines['achievable_roof'], 'g-', + label=f"Max Achievable ({config['accelerator']['max_achievable_teraflops']} TFLOPS)") + + # Plot ridge points + ax.axvline(x=ai_ridge_mtf, color='r', linestyle='--', alpha=0.5, + label=f'AI Ridge MTF ({ai_ridge_mtf:.2f})') + + ax.axvline(x=ai_ridge_maf, color='g', linestyle='--', alpha=0.5, + label=f'AI Ridge MAF ({ai_ridge_maf:.2f})') + + # Plot kernel points + ax.scatter(df[flops_per_byte_col], df[performance_col], c='black', s=50, alpha=0.7) + + # Add annotations for each kernel using the kernel_name_col (should be 'ID') + if kernel_name_col and kernel_name_col in df.columns: + for i, row in df.iterrows(): + kernel_name = row[kernel_name_col] + ax.annotate(str(kernel_name), + (row[flops_per_byte_col], row[performance_col]), + textcoords="offset points", + xytext=(5, 5), + ha='left', + fontsize=8, + bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8)) + + # Set labels and title + ax.set_xlabel('Arithmetic Intensity (FLOPS/byte)') + ax.set_ylabel('Performance (TFLOPS/s)') + ax.set_title('Roofline Analysis') + ax.grid(True, which="both", ls="-", alpha=0.2) + ax.legend() + + # Tight layout for better spacing + fig.tight_layout() + + return fig + +def plot_roofline( + df: pd.DataFrame, + rooflines: Dict[str, np.ndarray], + config: Dict[str, Any], + ai_ridge_mtf: float, + ai_ridge_maf: float, + output_file: str = None +) -> plt.Figure: + """ + Generate and optionally save the roofline plot. + + Args: + df: DataFrame with kernel data + rooflines: Dictionary with roofline data + config: Configuration dictionary + ai_ridge_mtf: The arithmetic intensity ridge point for max theoretical flops + ai_ridge_maf: The arithmetic intensity ridge point for max achievable flops + output_file: Optional file path to save the plot + + Returns: + Matplotlib figure object with the roofline plot + """ + fig = create_roofline_plot(df, rooflines, config, ai_ridge_mtf, ai_ridge_maf) + + # Save if output file is specified + if output_file: + fig.savefig(output_file, dpi=300, bbox_inches='tight') + print(f"Roofline plot saved to {output_file}") + + return fig \ No newline at end of file diff --git a/examples/custom_workflows/roofline_analyzer/src/xlsx.py b/examples/custom_workflows/roofline_analyzer/src/xlsx.py new file mode 100644 index 00000000..9b3d49f3 --- /dev/null +++ b/examples/custom_workflows/roofline_analyzer/src/xlsx.py @@ -0,0 +1,166 @@ +import pandas as pd +import time +import matplotlib.pyplot as plt +from io import BytesIO +from typing import Dict, Any, Tuple + +def read_xlsx(file_path: str, config: Dict[str, Any]) -> pd.DataFrame: + """ + Read and parse the Excel file containing GEMM kernels data. + + Args: + file_path: Path to the Excel file + config: Configuration dictionary from TOML + + Returns: + DataFrame containing kernel data + """ + sheet_name = config['excel']['sheet_name'] + + try: + df = pd.read_excel(file_path, sheet_name=sheet_name) + required_columns = [ + config['excel']['flops_per_byte_column'], + config['excel']['performance_column'] + ] + + missing_cols = [col for col in required_columns if col not in df.columns] + if missing_cols: + raise ValueError(f"Missing required columns in Excel file: {missing_cols}") + + return df + except Exception as e: + raise RuntimeError(f"Error reading Excel file: {e}") + +def add_analysis_columns( + df: pd.DataFrame, + config: Dict[str, Any], + ai_ridge_mtf: float, + ai_ridge_maf: float +) -> pd.DataFrame: + """ + Add calculated columns to the DataFrame for roofline analysis. + + Args: + df: DataFrame with kernel data + config: Configuration dictionary + ai_ridge_mtf: The arithmetic intensity ridge point for max theoretical flops + ai_ridge_maf: The arithmetic intensity ridge point for max achievable flops + + Returns: + DataFrame with added analysis columns + """ + # Add ID column as the first column (1-based index) + df = df.copy() + df.insert(0, 'ID', range(1, len(df) + 1)) + # Set config to use 'ID' as kernel_name_column for plotting + config['excel']['kernel_name_column'] = 'ID' + + # Column name mappings from config + flops_per_byte_col = config['excel']['flops_per_byte_column'] + performance_col = config['excel']['performance_column'] + + # Constants from config + max_memory_bandwidth = config['accelerator']['max_memory_bandwidth'] + max_achievable_tflops = config['accelerator']['max_achievable_teraflops'] + + # Calculate memory roofline for each kernel + df['kernel_memory_roofline'] = df[flops_per_byte_col] * max_memory_bandwidth + + # Determine if kernel is memory or compute bound + df['bound_type_maf'] = df.apply( + lambda row: "compute" if row[flops_per_byte_col] >= ai_ridge_maf else "memory", + axis=1 + ) + + # Add column for the reference roofline value used for distance calculation + def get_reference_roofline(row): + if row['bound_type_maf'] == 'compute': + return max_achievable_tflops + else: + return row['kernel_memory_roofline'] + + df['reference_roofline'] = df.apply(get_reference_roofline, axis=1) + + # Calculate distance to nearest roofline + def calculate_bound_distance(row): + # Now we can just use the reference_roofline value + return row['reference_roofline'] - row[performance_col] + + df['bound_distance'] = df.apply(calculate_bound_distance, axis=1) + + # Calculate percentage distance + def calculate_bound_distance_pct(row): + return (row['bound_distance'] / row['reference_roofline']) * 100 + + df['bound_distance_pct'] = df.apply(calculate_bound_distance_pct, axis=1) + + return df + +def export_to_xlsx( + df_result: pd.DataFrame, + df_orig: pd.DataFrame, + config: Dict[str, Any], + ai_ridge_mtf: float, + ai_ridge_maf: float, + plot_figure: plt.Figure = None +) -> str: + """ + Export the analyzed data to a new Excel file. + + Args: + df_result: DataFrame with analyzed data (with calculated columns) + df_orig: Original DataFrame without calculated columns + config: Configuration dictionary + ai_ridge_mtf: The arithmetic intensity ridge point for max theoretical flops + ai_ridge_maf: The arithmetic intensity ridge point for max achievable flops + plot_figure: Optional matplotlib figure to include in the Excel file + + Returns: + Path to the exported file + """ + timestamp = int(time.time()) + output_file = f"{config['output']['prefix']}-{timestamp}.xlsx" + + with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + # Export the analyzed data + sheet_name = config['excel']['sheet_name'] + df_result.to_excel(writer, sheet_name=f"{sheet_name}_analyzed", index=False) + + # Export the original data + df_orig.to_excel(writer, sheet_name=f"{sheet_name}_original", index=False) + + # Create a sheet for scalar values + scalar_df = pd.DataFrame({ + 'Value': [ai_ridge_mtf, ai_ridge_maf] + }, index=['AI_ridge_mtf', 'AI_ridge_maf']) + + scalar_df.to_excel(writer, sheet_name='ScalarValues') + + # Include the plot if provided + if plot_figure: + # Create a 'Plots' sheet + workbook = writer.book + plot_sheet = workbook.create_sheet(title='RooflinePlot') + + # Save the figure to a BytesIO object + img_data = BytesIO() + plot_figure.savefig(img_data, format='png', dpi=300) + img_data.seek(0) + + # Add the image to the workbook + from openpyxl.drawing.image import Image + img = Image(img_data) + + # You can adjust the size and position as needed + img.width = 800 + img.height = 500 + + # Add the image to the sheet + plot_sheet.add_image(img, 'A1') + + # Add title and description + plot_sheet['A30'] = 'Roofline Analysis Plot' + plot_sheet['A31'] = f'Generated on: {time.strftime("%Y-%m-%d %H:%M:%S")}' + + return output_file \ No newline at end of file