Skip to content

Commit 105b2a6

Browse files
authored
Format change and policy title prefix (#168)
Change the parsers expected format to be something less complex than JSON. Also add example policy prefixes.
1 parent 85924b0 commit 105b2a6

File tree

6 files changed

+334
-92
lines changed

6 files changed

+334
-92
lines changed

aws_doc_sdk_examples_tools/agent/bin/main.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
import typer
66

7-
from aws_doc_sdk_examples_tools.agent.make_prompts import main as make_prompts
8-
from aws_doc_sdk_examples_tools.agent.parse_json_files import main as parse_json_files
9-
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update as update_doc_gen
7+
from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts
8+
from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files
9+
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen
1010
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
1111

1212
app = typer.Typer()
@@ -16,26 +16,28 @@
1616
IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json"
1717

1818

19-
def get_ailly_files(dir: Path) -> List[Path]:
20-
return [
21-
file
22-
for file in dir.iterdir()
23-
if file.is_file() and file.name.endswith(".ailly.md")
24-
]
25-
26-
2719
@app.command()
2820
def update(iam_tributary_root: str, system_prompts: List[str] = []) -> None:
21+
"""
22+
Generate new IAM policy metadata for a tributary.
23+
"""
2924
doc_gen_root = Path(iam_tributary_root)
3025
make_prompts(
31-
doc_gen_root=doc_gen_root, system_prompts=system_prompts, out=AILLY_DIR_PATH
26+
doc_gen_root=doc_gen_root,
27+
system_prompts=system_prompts,
28+
out_dir=AILLY_DIR_PATH,
29+
language="IAMPolicyGrammar",
3230
)
3331
run(["npx", "@ailly/cli", "--root", AILLY_DIR])
34-
file_paths = get_ailly_files(AILLY_DIR_PATH)
35-
parse_json_files(file_paths=file_paths, out=IAM_UPDATES_PATH)
32+
33+
process_ailly_files(
34+
input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH)
35+
)
36+
3637
doc_gen = update_doc_gen(
3738
doc_gen_root=doc_gen_root, iam_updates_path=IAM_UPDATES_PATH
3839
)
40+
3941
writes = prepare_write(doc_gen.examples)
4042
write_many(doc_gen_root, writes)
4143

aws_doc_sdk_examples_tools/agent/make_prompts.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,13 @@ def make_doc_gen(root: Path) -> DocGen:
1919
return doc_gen
2020

2121

22-
def write_prompts(doc_gen: DocGen, out: Path) -> None:
23-
out.mkdir(parents=True, exist_ok=True)
22+
def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
2423
examples = doc_gen.examples
2524
snippets = doc_gen.snippets
2625
for example_id, example in examples.items():
27-
# Postfix with `.md` so Ailly will pick it up.
28-
prompt_path = out / f"{example_id}.md"
29-
# This assumes we're running DocGen specifically on AWSIAMPolicyExampleReservoir.
26+
prompt_path = out_dir / f"{example_id}.md"
3027
snippet_key = (
31-
example.languages["IAMPolicyGrammar"]
28+
example.languages[language]
3229
.versions[0]
3330
.excerpts[0]
3431
.snippet_files[0]
@@ -38,7 +35,7 @@ def write_prompts(doc_gen: DocGen, out: Path) -> None:
3835
prompt_path.write_text(snippet.code, encoding="utf-8")
3936

4037

41-
def setup_ailly(system_prompts: List[str], out: Path) -> None:
38+
def setup_ailly(system_prompts: List[str], out_dir: Path) -> None:
4239
"""Create the .aillyrc configuration file."""
4340
fence = "---"
4441
options = {"isolated": "true"}
@@ -47,32 +44,33 @@ def setup_ailly(system_prompts: List[str], out: Path) -> None:
4744

4845
content = f"{fence}\n{options_block}\n{fence}\n{prompts_block}"
4946

50-
aillyrc_path = out / ".aillyrc"
51-
aillyrc_path.parent.mkdir(parents=True, exist_ok=True)
47+
aillyrc_path = out_dir / ".aillyrc"
5248
aillyrc_path.write_text(content, encoding="utf-8")
5349

5450

55-
def read_system_prompts(values: List[str]) -> List[str]:
56-
"""Parse system prompts from a list of strings or file paths."""
57-
prompts = []
51+
def read_files(values: List[str]) -> List[str]:
52+
"""Read contents of files into a list of file contents."""
53+
contents = []
5854
for value in values:
5955
if os.path.isfile(value):
6056
with open(value, "r", encoding="utf-8") as f:
61-
prompts.append(f.read())
57+
contents.append(f.read())
6258
else:
63-
prompts.append(value)
64-
return prompts
59+
contents.append(value)
60+
return contents
6561

6662

6763
def validate_root_path(doc_gen_root: Path):
68-
assert "AWSIAMPolicyExampleReservoir" in str(doc_gen_root)
6964
assert doc_gen_root.is_dir()
7065

7166

72-
def main(doc_gen_root: Path, system_prompts: List[str], out: Path) -> None:
67+
def make_prompts(
68+
doc_gen_root: Path, system_prompts: List[str], out_dir: Path, language: str
69+
) -> None:
7370
"""Generate prompts and configuration files for Ailly."""
74-
system_prompts = read_system_prompts(system_prompts)
75-
setup_ailly(system_prompts, out)
7671
validate_root_path(doc_gen_root)
72+
out_dir.mkdir(parents=True, exist_ok=True)
73+
system_prompts = read_files(system_prompts)
74+
setup_ailly(system_prompts, out_dir)
7775
doc_gen = make_doc_gen(doc_gen_root)
78-
write_prompts(doc_gen, out)
76+
write_prompts(doc_gen=doc_gen, out_dir=out_dir, language=language)

aws_doc_sdk_examples_tools/agent/parse_json_files.py

Lines changed: 0 additions & 56 deletions
This file was deleted.
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""
2+
Parse generated Ailly output for key: value pairs.
3+
4+
This module processes *.md.ailly.md files, extracts key-value pairs,
5+
converts them to JSON entries in an array, and writes the JSON array
6+
to a specified output file.
7+
"""
8+
9+
import json
10+
import logging
11+
from pathlib import Path
12+
from typing import Any, Dict, List, Set
13+
14+
logging.basicConfig(level=logging.WARNING)
15+
logger = logging.getLogger(__name__)
16+
17+
EXPECTED_KEYS: Set[str] = set(["title", "title_abbrev"])
18+
VALUE_PREFIXES: Dict[str, str] = {
19+
"title": "Example policy: ",
20+
"title_abbrev": "Example: ",
21+
}
22+
23+
24+
class MissingExpectedKeys(Exception):
25+
pass
26+
27+
28+
def parse_fenced_blocks(content: str, fence="===") -> List[List[str]]:
29+
blocks = []
30+
inside_fence = False
31+
current_block: List[str] = []
32+
33+
for line in content.splitlines():
34+
if line.strip() == fence:
35+
if inside_fence:
36+
blocks.append(current_block)
37+
current_block = []
38+
inside_fence = not inside_fence
39+
elif inside_fence:
40+
current_block.append(line)
41+
42+
return blocks
43+
44+
45+
def parse_block_lines(
46+
block: List[str], key_pairs: Dict[str, str], expected_keys=EXPECTED_KEYS
47+
):
48+
for line in block:
49+
if "=>" in line:
50+
parts = line.split("=>", 1)
51+
key = parts[0].strip()
52+
value = parts[1].strip() if len(parts) > 1 else ""
53+
key_pairs[key] = value
54+
if missing_keys := expected_keys - key_pairs.keys():
55+
raise MissingExpectedKeys(missing_keys)
56+
57+
58+
def parse_ailly_file(
59+
file_path: str, value_prefixes: Dict[str, str] = VALUE_PREFIXES
60+
) -> Dict[str, Any]:
61+
"""
62+
Parse an .md.ailly.md file and extract key-value pairs that are between === fence markers. Each
63+
key value pair is assumed to be on one line and in the form of `key => value`. This formatting is
64+
totally dependent on the LLM output written by Ailly.
65+
66+
Args:
67+
file_path: Path to the .md.ailly.md file
68+
69+
Returns:
70+
Dictionary containing the extracted key-value pairs
71+
"""
72+
result: Dict[str, str] = {}
73+
74+
try:
75+
with open(file_path, "r", encoding="utf-8") as file:
76+
content = file.read()
77+
78+
blocks = parse_fenced_blocks(content)
79+
80+
for block in blocks:
81+
parse_block_lines(block, result)
82+
83+
for key, prefix in value_prefixes.items():
84+
if key in result:
85+
result[key] = f"{prefix}{result[key]}"
86+
87+
result["id"] = Path(file_path).name.split(".md.ailly.md")[0]
88+
result["_source_file"] = file_path
89+
90+
except Exception as e:
91+
logger.error(f"Error parsing file {file_path}", exc_info=e)
92+
93+
return result
94+
95+
96+
def process_ailly_files(
97+
input_dir: str, output_file: str, file_pattern: str = "*.md.ailly.md"
98+
) -> None:
99+
"""
100+
Process all .md.ailly.md files in the input directory and write the results as JSON to the output file.
101+
102+
Args:
103+
input_dir: Directory containing .md.ailly.md files
104+
output_file: Path to the output JSON file
105+
file_pattern: Pattern to match files (default: "*.md.ailly.md")
106+
"""
107+
results = []
108+
input_path = Path(input_dir)
109+
110+
try:
111+
for file_path in input_path.glob(file_pattern):
112+
logger.info(f"Processing file: {file_path}")
113+
parsed_data = parse_ailly_file(str(file_path))
114+
if parsed_data:
115+
results.append(parsed_data)
116+
117+
with open(output_file, "w", encoding="utf-8") as out_file:
118+
json.dump(results, out_file, indent=2)
119+
120+
logger.info(
121+
f"Successfully processed {len(results)} files. Output written to {output_file}"
122+
)
123+
124+
except Exception as e:
125+
logger.error("Error processing files", exc_info=e)

0 commit comments

Comments
 (0)