Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aws_doc_sdk_examples_tools/agent/update_doc_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def examples_from_updates(updates_path: Path) -> Iterable[Example]:
examples = [
Example(
id=id,
file=None,
file=Path(),
languages={},
title=update.get("title"),
title_abbrev=update.get("title_abbrev"),
Expand Down
Empty file.
79 changes: 62 additions & 17 deletions aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import logging
import re

from collections import Counter
from dataclasses import replace
import logging
from typing import Dict
from pathlib import Path
from typing import Dict, Iterable, List

from aws_doc_sdk_examples_tools.doc_gen import DocGen
from aws_doc_sdk_examples_tools.lliam.domain.commands import DedupeReservoir
Expand All @@ -12,33 +15,75 @@
logger = logging.getLogger(__name__)


def make_title_abbreviation(example: Example, counter: Counter):
def make_abbrev(example: Example, counter: Counter) -> str:
if not example.title_abbrev:
return ""

count = counter[example.title_abbrev]
abbrev = f"{example.title_abbrev} ({count + 1})" if count else example.title_abbrev
counter[example.title_abbrev] += 1
return abbrev


def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None):
doc_gen = DocGen.from_root(cmd.root, validation=ValidationConfig(check_aws=False))
def reset_abbrev_count(examples: Dict[str, Example]) -> Dict[str, Example]:
"""
Reset all duplicate title abbreviations back to their un-enumerated state.

I don't love this. Ideally we would only update new title_abbrev fields
with the incremented count. But there's no way to know which ones are new
or even which particular title_abbrev is the original.

examples: Dict[str, Example] = {}
Ex.
title_abbrev: some policy
title_abbrev: some policy (2)
title_abbrev: some policy
title_abbrev: some policy

for id, example in doc_gen.examples.items():
if cmd.packages and example.file:
package = example.file.name.split("_metadata.yaml")[0]
if package in cmd.packages:
examples[id] = example
else:
examples[id] = example
Which one is the original? Which ones are new?
"""

title_abbrev_counts: Counter = Counter()
updated_examples = {}

for id, example in examples.items():
examples[id] = replace(
updated_examples[id] = replace(
example,
title_abbrev=make_title_abbreviation(example, title_abbrev_counts),
title_abbrev=re.sub(r"(\s\(\d+\))*$", "", example.title_abbrev or ""),
)

return updated_examples


def example_in_packages(example: Example, packages: List[str]) -> bool:
if packages and example.file:
(example_pkg_name, *_) = example.file.name.split("_metadata.yaml")
if not example_pkg_name in packages:
return False
return True


def dedupe_examples(
examples: Dict[str, Example], packages: List[str]
) -> Dict[str, Example]:
filtered = {
id: ex for id, ex in examples.items() if example_in_packages(ex, packages)
}

reset_examples = reset_abbrev_count(filtered)

counter: Counter = Counter()

return {
id: replace(ex, title_abbrev=make_abbrev(ex, counter))
for id, ex in reset_examples.items()
}


def write_examples(examples: Dict[str, Example], root: Path):
writes = prepare_write(examples)
write_many(cmd.root, writes)
write_many(root, writes)


def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None):
doc_gen = DocGen.from_root(cmd.root, validation=ValidationConfig(check_aws=False))
examples = dedupe_examples(doc_gen.examples, cmd.packages)
write_examples(examples, cmd.root)
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"ailly",
"--max-depth",
"10",
"--no-overwrite",
"--root",
str(AILLY_DIR_PATH),
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from dataclasses import replace
import json
import logging
from collections import Counter
from pathlib import Path
from typing import Dict, Iterable, List

Expand Down Expand Up @@ -37,7 +36,7 @@ def examples_from_updates(updates: Updates) -> Iterable[Example]:
examples = [
Example(
id=id,
file=None,
file=Path(),
languages={},
title=update.get("title"),
title_abbrev=update.get("title_abbrev"),
Expand Down
122 changes: 122 additions & 0 deletions aws_doc_sdk_examples_tools/lliam/test/dedupe_reservoir_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from collections import Counter
from pathlib import Path

from aws_doc_sdk_examples_tools.metadata import Example
from aws_doc_sdk_examples_tools.lliam.service_layer.dedupe_reservoir import (
make_abbrev,
example_in_packages,
reset_abbrev_count,
dedupe_examples,
)


def test_make_abbrev_continues_numbering():
"""Test that numbering continues from existing numbered titles."""
counter = Counter({"Some abbrev": 2})
example = Example(id="test", file=Path(), languages={}, title_abbrev="Some abbrev")
result = make_abbrev(example, counter)

assert result == "Some abbrev (3)"


def test_make_abbrev_first_occurrence():
"""Test that first occurrence doesn't get numbered."""
counter = Counter()
example = Example(id="test", file=Path(), languages={}, title_abbrev="New abbrev")
result = make_abbrev(example, counter)

assert result == "New abbrev"
assert counter["New abbrev"] == 1


def test_example_in_packages_no_packages():
"""Test that example is included when no packages specified."""
example = Example(id="test", file=Path("test_metadata.yaml"), languages={})
result = example_in_packages(example, [])

assert result is True


def test_example_in_packages_matching_package():
"""Test that example is included when package matches."""
example = Example(id="test", file=Path("pkg1_metadata.yaml"), languages={})
result = example_in_packages(example, ["pkg1", "pkg2"])

assert result is True


def test_example_in_packages_non_matching_package():
"""Test that example is excluded when package doesn't match."""
example = Example(id="test", file=Path("pkg3_metadata.yaml"), languages={})
result = example_in_packages(example, ["pkg1", "pkg2"])

assert result is False


def test_build_abbrev_counter():
"""Test building counter from examples with existing numbered titles."""
examples = {
"1": Example(id="1", file=Path(), languages={}, title_abbrev="Test (1)"),
"2": Example(id="2", file=Path(), languages={}, title_abbrev="Test (2)"),
"3": Example(id="3", file=Path(), languages={}, title_abbrev="Other"),
"4": Example(id="4", file=Path(), languages={}, title_abbrev="Test"),
}

result = reset_abbrev_count(examples)

assert result["1"].title_abbrev == "Test"
assert result["2"].title_abbrev == "Test"
assert result["3"].title_abbrev == "Other"
assert result["4"].title_abbrev == "Test"


def test_build_abbrev_counter_empty():
"""Test building counter from empty examples list."""
result = reset_abbrev_count({})

assert len(result) == 0


def test_dedupe_examples():
"""Test deduping examples with existing numbered titles."""
examples = {
"ex1": Example(
id="ex1",
file=Path("pkg1_metadata.yaml"),
languages={},
title_abbrev="Test (2) (2)",
),
"ex2": Example(
id="ex2",
file=Path("pkg1_metadata.yaml"),
languages={},
title_abbrev="Test (3) (3) (3)",
),
"ex3": Example(
id="ex3", file=Path("pkg1_metadata.yaml"), languages={}, title_abbrev="Test"
),
"ex4": Example(
id="ex4", file=Path("pkg1_metadata.yaml"), languages={}, title_abbrev="Test"
),
"ex5": Example(
id="ex5", file=Path("pkg1_metadata.yaml"), languages={}, title_abbrev="Test"
),
"ex6": Example(
id="ex6", file=Path("pkg2_metadata.yaml"), languages={}, title_abbrev="Test"
),
}

result = dedupe_examples(examples, [])

assert len(result) == 6
title_abbrevs = sorted(
[ex.title_abbrev for ex in result.values() if ex.title_abbrev]
)
assert title_abbrevs == [
"Test",
"Test (2)",
"Test (3)",
"Test (4)",
"Test (5)",
"Test (6)",
]
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_update_examples_title_abbrev(doc_gen_tributary: DocGen):
# Create an example with a title_abbrev to update
update_example = Example(
id="iam_policies_example",
file=None,
file=Path(),
languages={},
title_abbrev="Updated Title Abbrev",
)
Expand Down
2 changes: 1 addition & 1 deletion aws_doc_sdk_examples_tools/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def validate(self, errors: MetadataErrors, root: Path):
@dataclass
class Example:
id: str
file: Optional[Path]
file: Path
languages: Dict[str, Language]
# Human readable title.
title: Optional[str] = field(default="")
Expand Down
3 changes: 2 additions & 1 deletion aws_doc_sdk_examples_tools/yaml_mapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path
from typing import Dict, Set, Tuple, Any, List, Optional, Union
from .metadata import (
Example,
Expand Down Expand Up @@ -112,7 +113,7 @@ def example_from_yaml(
return (
Example(
id="",
file=None,
file=Path(),
title=title,
title_abbrev=title_abbrev,
category=category,
Expand Down