awsdocs
diff --git a/‎aws_doc_sdk_examples_tools/agent/bin/main.py
Lines changed: 16 additions & 14 deletions b/‎aws_doc_sdk_examples_tools/agent/bin/main.py
Lines changed: 16 additions & 14 deletions
diff --git a/‎aws_doc_sdk_examples_tools/agent/make_prompts.py
Lines changed: 18 additions & 20 deletions b/‎aws_doc_sdk_examples_tools/agent/make_prompts.py
Lines changed: 18 additions & 20 deletions
diff --git a/‎aws_doc_sdk_examples_tools/agent/parse_json_files.py
Lines changed: 0 additions & 56 deletions b/‎aws_doc_sdk_examples_tools/agent/parse_json_files.py
Lines changed: 0 additions & 56 deletions
diff --git a/‎aws_doc_sdk_examples_tools/agent/process_ailly_files.py
Lines changed: 125 additions & 0 deletions b/‎aws_doc_sdk_examples_tools/agent/process_ailly_files.py
Lines changed: 125 additions & 0 deletions
@@ -4,9 +4,9 @@
 
 import typer
 
-from aws_doc_sdk_examples_tools.agent.make_prompts import main as make_prompts
-from aws_doc_sdk_examples_tools.agent.parse_json_files import main as parse_json_files
-from aws_doc_sdk_examples_tools.agent.update_doc_gen import update as update_doc_gen
+from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts
+from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files
+from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen
 from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
 
 app = typer.Typer()
@@ -16,26 +16,28 @@
 IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json"
 
 
-def get_ailly_files(dir: Path) -> List[Path]:
-    return [
-        file
-        for file in dir.iterdir()
-        if file.is_file() and file.name.endswith(".ailly.md")
-    ]
-
-
 @app.command()
 def update(iam_tributary_root: str, system_prompts: List[str] = []) -> None:
+    """
+    Generate new IAM policy metadata for a tributary.
+    """
     doc_gen_root = Path(iam_tributary_root)
     make_prompts(
-        doc_gen_root=doc_gen_root, system_prompts=system_prompts, out=AILLY_DIR_PATH
+        doc_gen_root=doc_gen_root,
+        system_prompts=system_prompts,
+        out_dir=AILLY_DIR_PATH,
+        language="IAMPolicyGrammar",
     )
     run(["npx", "@ailly/cli", "--root", AILLY_DIR])
-    file_paths = get_ailly_files(AILLY_DIR_PATH)
-    parse_json_files(file_paths=file_paths, out=IAM_UPDATES_PATH)
+
+    process_ailly_files(
+        input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH)
+    )
+
     doc_gen = update_doc_gen(
         doc_gen_root=doc_gen_root, iam_updates_path=IAM_UPDATES_PATH
     )
+
     writes = prepare_write(doc_gen.examples)
     write_many(doc_gen_root, writes)
 
 
@@ -19,16 +19,13 @@ def make_doc_gen(root: Path) -> DocGen:
     return doc_gen
 
 
-def write_prompts(doc_gen: DocGen, out: Path) -> None:
-    out.mkdir(parents=True, exist_ok=True)
+def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
     examples = doc_gen.examples
     snippets = doc_gen.snippets
     for example_id, example in examples.items():
-        # Postfix with `.md` so Ailly will pick it up.
-        prompt_path = out / f"{example_id}.md"
-        # This assumes we're running DocGen specifically on AWSIAMPolicyExampleReservoir.
+        prompt_path = out_dir / f"{example_id}.md"
         snippet_key = (
-            example.languages["IAMPolicyGrammar"]
+            example.languages[language]
             .versions[0]
             .excerpts[0]
             .snippet_files[0]
@@ -38,7 +35,7 @@ def write_prompts(doc_gen: DocGen, out: Path) -> None:
         prompt_path.write_text(snippet.code, encoding="utf-8")
 
 
-def setup_ailly(system_prompts: List[str], out: Path) -> None:
+def setup_ailly(system_prompts: List[str], out_dir: Path) -> None:
     """Create the .aillyrc configuration file."""
     fence = "---"
     options = {"isolated": "true"}
@@ -47,32 +44,33 @@ def setup_ailly(system_prompts: List[str], out: Path) -> None:
 
     content = f"{fence}\n{options_block}\n{fence}\n{prompts_block}"
 
-    aillyrc_path = out / ".aillyrc"
-    aillyrc_path.parent.mkdir(parents=True, exist_ok=True)
+    aillyrc_path = out_dir / ".aillyrc"
     aillyrc_path.write_text(content, encoding="utf-8")
 
 
-def read_system_prompts(values: List[str]) -> List[str]:
-    """Parse system prompts from a list of strings or file paths."""
-    prompts = []
+def read_files(values: List[str]) -> List[str]:
+    """Read contents of files into a list of file contents."""
+    contents = []
     for value in values:
         if os.path.isfile(value):
             with open(value, "r", encoding="utf-8") as f:
-                prompts.append(f.read())
+                contents.append(f.read())
         else:
-            prompts.append(value)
-    return prompts
+            contents.append(value)
+    return contents
 
 
 def validate_root_path(doc_gen_root: Path):
-    assert "AWSIAMPolicyExampleReservoir" in str(doc_gen_root)
     assert doc_gen_root.is_dir()
 
 
-def main(doc_gen_root: Path, system_prompts: List[str], out: Path) -> None:
+def make_prompts(
+    doc_gen_root: Path, system_prompts: List[str], out_dir: Path, language: str
+) -> None:
     """Generate prompts and configuration files for Ailly."""
-    system_prompts = read_system_prompts(system_prompts)
-    setup_ailly(system_prompts, out)
     validate_root_path(doc_gen_root)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    system_prompts = read_files(system_prompts)
+    setup_ailly(system_prompts, out_dir)
     doc_gen = make_doc_gen(doc_gen_root)
-    write_prompts(doc_gen, out)
+    write_prompts(doc_gen=doc_gen, out_dir=out_dir, language=language)
@@ -0,0 +1,125 @@
+"""
+Parse generated Ailly output for key: value pairs.
+
+This module processes *.md.ailly.md files, extracts key-value pairs,
+converts them to JSON entries in an array, and writes the JSON array
+to a specified output file.
+"""
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Set
+
+logging.basicConfig(level=logging.WARNING)
+logger = logging.getLogger(__name__)
+
+EXPECTED_KEYS: Set[str] = set(["title", "title_abbrev"])
+VALUE_PREFIXES: Dict[str, str] = {
+    "title": "Example policy: ",
+    "title_abbrev": "Example: ",
+}
+
+
+class MissingExpectedKeys(Exception):
+    pass
+
+
+def parse_fenced_blocks(content: str, fence="===") -> List[List[str]]:
+    blocks = []
+    inside_fence = False
+    current_block: List[str] = []
+
+    for line in content.splitlines():
+        if line.strip() == fence:
+            if inside_fence:
+                blocks.append(current_block)
+                current_block = []
+            inside_fence = not inside_fence
+        elif inside_fence:
+            current_block.append(line)
+
+    return blocks
+
+
+def parse_block_lines(
+    block: List[str], key_pairs: Dict[str, str], expected_keys=EXPECTED_KEYS
+):
+    for line in block:
+        if "=>" in line:
+            parts = line.split("=>", 1)
+            key = parts[0].strip()
+            value = parts[1].strip() if len(parts) > 1 else ""
+            key_pairs[key] = value
+    if missing_keys := expected_keys - key_pairs.keys():
+        raise MissingExpectedKeys(missing_keys)
+
+
+def parse_ailly_file(
+    file_path: str, value_prefixes: Dict[str, str] = VALUE_PREFIXES
+) -> Dict[str, Any]:
+    """
+    Parse an .md.ailly.md file and extract key-value pairs that are between === fence markers. Each
+    key value pair is assumed to be on one line and in the form of `key => value`. This formatting is
+    totally dependent on the LLM output written by Ailly.
+
+    Args:
+        file_path: Path to the .md.ailly.md file
+
+    Returns:
+        Dictionary containing the extracted key-value pairs
+    """
+    result: Dict[str, str] = {}
+
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            content = file.read()
+
+        blocks = parse_fenced_blocks(content)
+
+        for block in blocks:
+            parse_block_lines(block, result)
+
+        for key, prefix in value_prefixes.items():
+            if key in result:
+                result[key] = f"{prefix}{result[key]}"
+
+        result["id"] = Path(file_path).name.split(".md.ailly.md")[0]
+        result["_source_file"] = file_path
+
+    except Exception as e:
+        logger.error(f"Error parsing file {file_path}", exc_info=e)
+
+    return result
+
+
+def process_ailly_files(
+    input_dir: str, output_file: str, file_pattern: str = "*.md.ailly.md"
+) -> None:
+    """
+    Process all .md.ailly.md files in the input directory and write the results as JSON to the output file.
+
+    Args:
+        input_dir: Directory containing .md.ailly.md files
+        output_file: Path to the output JSON file
+        file_pattern: Pattern to match files (default: "*.md.ailly.md")
+    """
+    results = []
+    input_path = Path(input_dir)
+
+    try:
+        for file_path in input_path.glob(file_pattern):
+            logger.info(f"Processing file: {file_path}")
+            parsed_data = parse_ailly_file(str(file_path))
+            if parsed_data:
+                results.append(parsed_data)
+
+        with open(output_file, "w", encoding="utf-8") as out_file:
+            json.dump(results, out_file, indent=2)
+
+        logger.info(
+            f"Successfully processed {len(results)} files. Output written to {output_file}"
+        )
+
+    except Exception as e:
+        logger.error("Error processing files", exc_info=e)