add subparsers (#63)

johncalesp · web-flow · commit 437e924aa85b · 2024-09-16T13:49:57.000-04:00
Co-authored-by: John Calderon &lt;john.calderon@centml.ai&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,7 +14,8 @@ dependencies = [
     "transformers",
     "sentencepiece",
     "aiohttp",
-    "pydantic"
+    "pydantic",
+    "matplotlib"
 ]
 
 classifiers = [
@@ -28,4 +29,4 @@ classifiers = [
 package-dir = {"" = "src"}
 
 [project.scripts]
-inference-benchmark = "flexible_inference_benchmark.main:main"
+fib = "flexible_inference_benchmark.main:main"
diff --git a/scripts/lint/format.sh b/scripts/lint/format.sh
@@ -13,5 +13,4 @@ python -m black \
     --exclude=".*pb2.*" \
     --line-length 120 \
     $additional_opts \
-    ../../src/flexible_inference_benchmark \
-    ../../data_postprocessors 
+    ../../src/flexible_inference_benchmark 
diff --git a/scripts/lint/mypy.ini b/scripts/lint/mypy.ini
@@ -4,4 +4,4 @@ strict = True
 follow_imports = silent
 no_warn_unused_ignores = True
 allow_redefinition = True
-
+exclude = data_postprocessors
diff --git a/src/flexible_inference_benchmark/data_postprocessors/itl.py b/src/flexible_inference_benchmark/data_postprocessors/itl.py
@@ -7,12 +7,11 @@
 import matplotlib.pyplot as plt
 
 
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--datapath", type=str, required=True, help="Path to the data file")
-    parser.add_argument("--output", type=str, required=False, help="Path to save the plot")
-    parser.add_argument('--request-num', type=int, default=0, help='Request number to plot')
-    return parser.parse_args()
+def add_itl_parser(subparsers: argparse._SubParsersAction):
+    itl_parser = subparsers.add_parser("generate-itl-plot")
+    itl_parser.add_argument("--datapath", type=str, required=True, help="Path to the data file")
+    itl_parser.add_argument("--output", type=str, required=False, help="Path to save the plot")
+    itl_parser.add_argument('--request-num', type=int, default=0, help='Request number to plot')
 
 
 def plot_itl(data, idx, output):
@@ -26,12 +25,7 @@ def plot_itl(data, idx, output):
     plt.show()
 
 
-def main():
-    args = parse_args()
+def run(args: argparse.Namespace):
     with open(args.datapath, 'r') as f:
         data = json.load(f)
     plot_itl(data, args.request_num, args.output)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/flexible_inference_benchmark/data_postprocessors/performance.py b/src/flexible_inference_benchmark/data_postprocessors/performance.py
@@ -4,15 +4,13 @@
 
 import json
 import argparse
-
 import numpy as np
 from transformers import AutoTokenizer
 
 
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--datapath", type=str, required=True, help="Path to the data file")
-    return parser.parse_args()
+def add_performance_parser(subparsers: argparse._SubParsersAction) -> None:
+    performance_parser = subparsers.add_parser('analyse')
+    performance_parser.add_argument("--datapath", type=str, required=True, help='Path to the json file')
 
 
 def calculate_metrics(input_requests, outputs, benchmark_duration, tokenizer, stream):
@@ -74,13 +72,8 @@ def calculate_metrics(input_requests, outputs, benchmark_duration, tokenizer, st
         print("=" * 50)
 
 
-def main():
-    args = parse_args()
+def run(args: argparse.Namespace):
     with open(args.datapath, 'r') as f:
         data = json.load(f)
     tokenizer = AutoTokenizer.from_pretrained(data["tokenizer"])
     calculate_metrics(data["inputs"], data["outputs"], data["time"], tokenizer, data["stream"])
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/flexible_inference_benchmark/data_postprocessors/ttft.py b/src/flexible_inference_benchmark/data_postprocessors/ttft.py
@@ -1,3 +1,7 @@
+"""
+Simple example of a data postprocessor script with minimal error checking and typing that shows a plot of TTFT.
+"""
+
 import argparse
 import json
 import matplotlib.pyplot as plt
@@ -17,14 +21,13 @@ def color_scheme_generator(num_colors):
 
 
 def generate_plot(name, data, color, axis):
-    axis.set_ylabel('time (sec)')
+    axis.ecdf(data, orientation="horizontal", color=color)
     axis.set_xlabel('CDF')
-    axis.hist(data, orientation="horizontal", bins=len(data) // 2, fill=False, edgecolor=color, label=name)
-    # axis.legend()
-    axis.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=5)
+    axis.set_ylabel('time (sec)')
 
     ax2 = axis.twiny()
-    ax2.ecdf(data, orientation="horizontal", color=color)
+    ax2.hist(data, orientation="horizontal", bins=len(data) // 2, fill=False, edgecolor=color, label=name)
+    ax2.set_xticks([])
 
 
 def plot_ttft(files, color_scheme):
@@ -36,14 +39,16 @@ def plot_ttft(files, color_scheme):
         generate_plot(data["backend"], ttft_arr, color_scheme[i], ax1)
 
     fig.tight_layout()
-    plt.title('TTFS')
+    plt.title('TTFT')
     plt.tight_layout()
     plt.savefig("ttft.pdf")
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--files", nargs="+", help="list of json files")
-    args = parser.parse_args()
+def add_ttft_parser(subparsers: argparse._SubParsersAction):
+    ttft_parser = subparsers.add_parser("generate-ttft-plot")
+    ttft_parser.add_argument("--files", nargs="+", help="list of json files")
+
+
+def run(args: argparse.Namespace):
     color_scheme = color_scheme_generator(len(args.files))
     plot_ttft(args.files, color_scheme)
diff --git a/src/flexible_inference_benchmark/main.py b/src/flexible_inference_benchmark/main.py
@@ -14,6 +14,9 @@
 from flexible_inference_benchmark.engine.client import Client
 from flexible_inference_benchmark.engine.backend_functions import ASYNC_REQUEST_FUNCS
 from flexible_inference_benchmark.engine.workloads import WORKLOADS_TYPES
+from flexible_inference_benchmark.data_postprocessors.performance import add_performance_parser
+from flexible_inference_benchmark.data_postprocessors.ttft import add_ttft_parser
+from flexible_inference_benchmark.data_postprocessors.itl import add_itl_parser
 
 logger = logging.getLogger(__name__)
 
@@ -99,134 +102,148 @@ def send_requests(
     return asyncio.run(client.benchmark(requests_prompts, requests_times))
 
 
-def parse_args() -> argparse.Namespace:
+def add_benchmark_subparser(subparsers: argparse._SubParsersAction) -> None:  # type: ignore [type-arg]
 
-    parser = argparse.ArgumentParser(description="CentML Inference Benchmark")
+    benchmark_parser = subparsers.add_parser('benchmark')
 
-    parser.add_argument("--seed", type=int, default=None, help="seed for reproducibility")
+    benchmark_parser.add_argument("--seed", type=int, default=None, help="seed for reproducibility")
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--backend",
         type=str,
         default='cserve',
         choices=list(ASYNC_REQUEST_FUNCS.keys()),
         help="Backend inference engine.",
     )
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--workload-type",
         type=str,
         default=None,
         choices=list(WORKLOADS_TYPES.keys()),
         help="choose a workload type, this will overwrite some arguments",
     )
 
-    url_group = parser.add_mutually_exclusive_group()
+    url_group = benchmark_parser.add_mutually_exclusive_group()
 
     url_group.add_argument(
         "--base-url", type=str, default=None, help="Server or API base url if not using http host and port."
     )
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--https-ssl", default=True, help="whether to check for ssl certificate for https endpoints, default is True"
     )
 
-    parser.add_argument("--endpoint", type=str, default="/v1/completions", help="API endpoint.")
+    benchmark_parser.add_argument("--endpoint", type=str, default="/v1/completions", help="API endpoint.")
 
-    req_group = parser.add_mutually_exclusive_group()
+    req_group = benchmark_parser.add_mutually_exclusive_group()
 
     req_group.add_argument("--num-of-req", type=int, default=None, help="Total number of request.")
 
     req_group.add_argument("--max-time-for-reqs", type=int, default=None, help="Max time for requests in seconds.")
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--request-distribution",
         nargs="*",
         default=["exponential", 1],
         help="Request distribution [Distribution_type (inputs to distribution)]",
     )
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--input-token-distribution",
         nargs="*",
         default=["uniform", 0, 255],
         help="Request distribution [Distribution_type (inputs to distribution)]",
     )
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--output-token-distribution",
         nargs="*",
         default=["uniform", 0, 255],
         help="Request distribution [Distribution_type (inputs to distribution)]",
     )
 
-    prefix_group = parser.add_mutually_exclusive_group()
+    prefix_group = benchmark_parser.add_mutually_exclusive_group()
 
     prefix_group.add_argument("--prefix-text", type=str, default=None, help="Text to use as prefix for all requests.")
 
     prefix_group.add_argument("--prefix-len", type=int, default=None, help="Length of prefix to use for all requests.")
 
     prefix_group.add_argument('--no-prefix', action='store_true', help='No prefix for requests.')
 
-    parser.add_argument("--disable-ignore-eos", action="store_true", help="Disables ignoring the eos token")
+    benchmark_parser.add_argument("--disable-ignore-eos", action="store_true", help="Disables ignoring the eos token")
 
-    parser.add_argument("--disable-stream", action="store_true", help="Disable stream response from API")
+    benchmark_parser.add_argument("--disable-stream", action="store_true", help="Disable stream response from API")
 
-    parser.add_argument("--cookies", default={}, help="Insert cookies in the request")
+    benchmark_parser.add_argument("--cookies", default={}, help="Insert cookies in the request")
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--dataset-name",
         type=str,
         default="random",
         choices=["sharegpt", "other", "random"],
         help="Name of the dataset to benchmark on.",
     )
 
-    parser.add_argument("--dataset-path", type=str, default=None, help="Path to the dataset.")
+    benchmark_parser.add_argument("--dataset-path", type=str, default=None, help="Path to the dataset.")
 
-    parser.add_argument("--model", type=str, help="Name of the model.")
+    benchmark_parser.add_argument("--model", type=str, help="Name of the model.")
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--tokenizer", type=str, default=None, help="Name or path of the tokenizer, if not using the default tokenizer."
     )
 
-    parser.add_argument("--disable-tqdm", action="store_true", help="Specify to disable tqdm progress bar.")
+    benchmark_parser.add_argument("--disable-tqdm", action="store_true", help="Specify to disable tqdm progress bar.")
 
-    parser.add_argument("--best-of", type=int, default=1, help="Number of best completions to return.")
+    benchmark_parser.add_argument("--best-of", type=int, default=1, help="Number of best completions to return.")
 
-    parser.add_argument("--use-beam-search", action="store_true", help="Use beam search for completions.")
+    benchmark_parser.add_argument("--use-beam-search", action="store_true", help="Use beam search for completions.")
 
-    parser.add_argument(
+    benchmark_parser.add_argument(
         "--output-file",
         type=str,
         default='output-file.json',
         required=False,
         help="Output json file to save the results.",
     )
 
-    parser.add_argument("--debug", action="store_true", help="Log debug messages")
+    benchmark_parser.add_argument("--debug", action="store_true", help="Log debug messages")
 
-    parser.add_argument("--verbose", action="store_true", help="Print short description of each request")
+    benchmark_parser.add_argument("--verbose", action="store_true", help="Print short description of each request")
 
-    parser.add_argument("--config-file", default=None, help="configuration file")
+    benchmark_parser.add_argument("--config-file", default=None, help="configuration file")
+
+
+def parse_args() -> argparse.Namespace:
+
+    parser = argparse.ArgumentParser(description="CentML Inference Benchmark")
+
+    subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand')
+
+    add_performance_parser(subparsers)
+    add_benchmark_subparser(subparsers)
+    add_ttft_parser(subparsers)
+    add_itl_parser(subparsers)
 
     args = parser.parse_args()
-    if args.config_file:
-        with open(args.config_file, 'r') as f:
-            parser.set_defaults(**json.load(f))
-    # Reload arguments to override config file values with command line values
-    args = parser.parse_args()
-    if not (args.prefix_text or args.prefix_len or args.no_prefix):
-        parser.error("Please provide either prefix text or prefix length or specify no prefix.")
-    if not (args.num_of_req or args.max_time_for_reqs):
-        parser.error("Please provide either number of requests or max time for requests.")
-    if not args.model:
-        parser.error("Please provide the model name.")
+    if args.subcommand == 'benchmark':
+        if args.config_file:
+            with open(args.config_file, 'r') as f:
+                file_data = json.load(f)
+            for k, v in file_data.items():
+                # Reload arguments to override config file values with command line values
+                setattr(args, k, v)
+        if not (args.prefix_text or args.prefix_len or args.no_prefix):
+            parser.error("Please provide either prefix text or prefix length or specify no prefix.")
+        if not (args.num_of_req or args.max_time_for_reqs):
+            parser.error("Please provide either number of requests or max time for requests.")
+        if not args.model:
+            parser.error("Please provide the model name.")
+
     return args
 
 
-def main() -> None:
-    args = parse_args()
+def run_main(args: argparse.Namespace) -> None:
     configure_logging(args)
     if args.workload_type:
         workload_type = WORKLOADS_TYPES[args.workload_type]()
@@ -285,5 +302,23 @@ def main() -> None:
         logger.debug(f"{output_list}")
 
 
+def main() -> None:
+    args = parse_args()
+    if args.subcommand == "analyse":
+        from flexible_inference_benchmark.data_postprocessors.performance import run
+
+        run(args)
+    elif args.subcommand == "generate-ttft-plot":
+        from flexible_inference_benchmark.data_postprocessors.ttft import run
+
+        run(args)
+    elif args.subcommand == "generate-itl-plot":
+        from flexible_inference_benchmark.data_postprocessors.itl import run
+
+        run(args)
+    else:
+        run_main(args)
+
+
 if __name__ == '__main__':
     main()