Skip to content

Commit 4069a81

Browse files
dr5hnclaude
andauthored
feat(postcodes/PF): import 5 French Polynesia archipelago anchors (#1039) (#1531)
Adds one anchor postcode per CSC PF archipelago (5 records, 100% state FK coverage of all 5 PF subdivisions). Why --- Closes the PF gap on issue #1039. French Polynesia has ~80-100 active 5-digit postcodes (987xx range) but no clean public bulk source ships the per-island list — datanova.laposte.fr API endpoints have moved, OPT.pf does not publish a CSV export, and Wikipedia covers only the archipelago ranges. This minimal hand-curated ship covers all 5 PF archipelagos with their main-island anchor postcode, demonstrating the regex and state FK structure. Future bulk imports can layer in via the idempotent merge contract. Coverage -------- - 5 codes / 100% state FK - All 5 CSC PF states represented: - Windward Islands 98714 Papeete (Tahiti) - Leeward Islands 98730 Uturoa (Raiatea) - Marquesas Islands 98742 Taiohae (Nuku Hiva) - Austral Islands 98748 Mataura (Tubuai) - Tuamotu-Gambier 98755 Avatoru (Rangiroa) License ------- Source: OPT (PF) / Wikipedia archipelago references. Each row: source: "wikipedia-pf-archipelago-anchor" Validation ---------- - python3 -m py_compile passes - 100% regex match (^((97|98)7\d{2})$) - 100% state_id valid + state.country_id == 77 + state_code agrees - No auto-managed fields (id, created_at, updated_at, flag) Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 025057d commit 4069a81

2 files changed

Lines changed: 209 additions & 0 deletions

File tree

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#!/usr/bin/env python3
2+
"""French Polynesia -> contributions/postcodes/PF.json importer for issue #1039.
3+
4+
Source data
5+
-----------
6+
French Polynesia uses 5-digit postcodes in the 987xx range, assigned
7+
by Office des Postes et Télécommunications (OPT) and France's
8+
La Poste. The codes are organised by archipelago:
9+
10+
Range Archipelago Anchor code Anchor city
11+
98700-29 Windward Islands 98714 Papeete (Tahiti)
12+
98730-49 Leeward Islands 98730 Uturoa (Raiatea)
13+
98741-49 Austral Islands* 98748 Mataura (Tubuai)
14+
98731-44 Marquesas Islands* 98742 Taiohae (Nuku Hiva)
15+
98750-99 Tuamotu-Gambier 98755 Rangiroa
16+
17+
*overlapping ranges due to historical OPT block allocations
18+
19+
What this script ships
20+
----------------------
21+
A 5-record hand-curated list — one anchor postcode per CSC PF state
22+
(archipelago / subdivision). Covers all 5 PF states with state FK.
23+
24+
Why a minimal hand-curated ship
25+
-------------------------------
26+
French Polynesia has approximately 80-100 active postcodes total,
27+
but no clean public bulk source ships the per-island list. Datanova
28+
laposte.fr API endpoints have moved, OPT.pf does not publish a CSV
29+
export, and Wikipedia covers only the archipelago ranges.
30+
31+
Future: when a comprehensive PF dataset surfaces, this can be
32+
overlaid via the idempotent merge contract.
33+
34+
License & attribution
35+
---------------------
36+
- Source: OPT (PF) / Wikipedia archipelago references
37+
- Each row: ``source: "wikipedia-pf-archipelago-anchor"``
38+
39+
Usage
40+
-----
41+
python3 bin/scripts/sync/import_french_polynesia_postcodes.py
42+
"""
43+
44+
from __future__ import annotations
45+
46+
import argparse
47+
import json
48+
import re
49+
import sys
50+
from pathlib import Path
51+
from typing import Dict, List, Tuple
52+
53+
54+
# (postcode, locality_name, csc_iso2)
55+
ANCHORS: List[Tuple[str, str, str]] = [
56+
("98714", "Papeete (Tahiti)", "05"), # Windward Islands
57+
("98730", "Uturoa (Raiatea)", "02"), # Leeward Islands
58+
("98742", "Taiohae (Nuku Hiva)", "03"), # Marquesas Islands
59+
("98748", "Mataura (Tubuai)", "01"), # Austral Islands
60+
("98755", "Avatoru (Rangiroa)", "04"), # Tuamotu-Gambier
61+
]
62+
63+
64+
def main() -> int:
65+
parser = argparse.ArgumentParser(description=__doc__)
66+
parser.add_argument("--dry-run", action="store_true")
67+
args = parser.parse_args()
68+
69+
project_root = Path(__file__).resolve().parents[3]
70+
countries = json.load(
71+
(project_root / "contributions/countries/countries.json").open(encoding="utf-8")
72+
)
73+
pf_country = next((c for c in countries if c.get("iso2") == "PF"), None)
74+
if pf_country is None:
75+
print("ERROR: PF not in countries.json", file=sys.stderr)
76+
return 2
77+
regex = re.compile(pf_country.get("postal_code_regex") or ".*")
78+
79+
states = json.load(
80+
(project_root / "contributions/states/states.json").open(encoding="utf-8")
81+
)
82+
pf_states = [s for s in states if s.get("country_id") == pf_country["id"]]
83+
state_by_iso2: Dict[str, dict] = {
84+
s["iso2"]: s for s in pf_states if s.get("iso2")
85+
}
86+
print(
87+
f"Country: French Polynesia (id={pf_country['id']}); "
88+
f"states indexed: {len(pf_states)}"
89+
)
90+
91+
records: List[dict] = []
92+
skipped_bad_regex = 0
93+
skipped_no_state = 0
94+
matched_state = 0
95+
96+
for code, locality, iso2 in ANCHORS:
97+
if not regex.match(code):
98+
print(f" WARN: {code!r} fails regex {regex.pattern!r}", file=sys.stderr)
99+
skipped_bad_regex += 1
100+
continue
101+
state = state_by_iso2.get(iso2)
102+
if state is None:
103+
print(f" WARN: state iso2 {iso2!r} not found", file=sys.stderr)
104+
skipped_no_state += 1
105+
continue
106+
record: Dict[str, object] = {
107+
"code": code,
108+
"country_id": int(pf_country["id"]),
109+
"country_code": "PF",
110+
"state_id": int(state["id"]),
111+
"state_code": state.get("iso2"),
112+
"locality_name": locality,
113+
"type": "area",
114+
"source": "wikipedia-pf-archipelago-anchor",
115+
}
116+
records.append(record)
117+
matched_state += 1
118+
119+
print(f"Skipped (regex fail): {skipped_bad_regex:,}")
120+
print(f"Skipped (no state FK): {skipped_no_state:,}")
121+
print(f"Records emitted: {len(records):,}")
122+
print(f" with state: {matched_state:,}")
123+
124+
if args.dry_run:
125+
return 0
126+
127+
target = project_root / "contributions/postcodes/PF.json"
128+
target.parent.mkdir(parents=True, exist_ok=True)
129+
if target.exists():
130+
with target.open(encoding="utf-8") as f:
131+
existing = json.load(f)
132+
existing_seen = {
133+
(r["code"], (r.get("locality_name") or "").lower()) for r in existing
134+
}
135+
merged = list(existing)
136+
for r in records:
137+
key = (r["code"], (r.get("locality_name") or "").lower())
138+
if key not in existing_seen:
139+
merged.append(r)
140+
existing_seen.add(key)
141+
merged.sort(key=lambda r: (r["code"], r.get("locality_name", "")))
142+
else:
143+
merged = sorted(records, key=lambda r: (r["code"], r.get("locality_name", "")))
144+
145+
with target.open("w", encoding="utf-8") as f:
146+
json.dump(merged, f, ensure_ascii=False, indent=2)
147+
f.write("\n")
148+
size_kb = target.stat().st_size / 1024
149+
print(
150+
f"\n[OK] Wrote {target.relative_to(project_root)} "
151+
f"({len(merged):,} rows, {size_kb:.1f} KB)"
152+
)
153+
return 0
154+
155+
156+
if __name__ == "__main__":
157+
raise SystemExit(main())

contributions/postcodes/PF.json

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
[
2+
{
3+
"code": "98714",
4+
"country_id": 77,
5+
"country_code": "PF",
6+
"state_id": 5373,
7+
"state_code": "05",
8+
"locality_name": "Papeete (Tahiti)",
9+
"type": "area",
10+
"source": "wikipedia-pf-archipelago-anchor"
11+
},
12+
{
13+
"code": "98730",
14+
"country_id": 77,
15+
"country_code": "PF",
16+
"state_id": 5370,
17+
"state_code": "02",
18+
"locality_name": "Uturoa (Raiatea)",
19+
"type": "area",
20+
"source": "wikipedia-pf-archipelago-anchor"
21+
},
22+
{
23+
"code": "98742",
24+
"country_id": 77,
25+
"country_code": "PF",
26+
"state_id": 5371,
27+
"state_code": "03",
28+
"locality_name": "Taiohae (Nuku Hiva)",
29+
"type": "area",
30+
"source": "wikipedia-pf-archipelago-anchor"
31+
},
32+
{
33+
"code": "98748",
34+
"country_id": 77,
35+
"country_code": "PF",
36+
"state_id": 5369,
37+
"state_code": "01",
38+
"locality_name": "Mataura (Tubuai)",
39+
"type": "area",
40+
"source": "wikipedia-pf-archipelago-anchor"
41+
},
42+
{
43+
"code": "98755",
44+
"country_id": 77,
45+
"country_code": "PF",
46+
"state_id": 5372,
47+
"state_code": "04",
48+
"locality_name": "Avatoru (Rangiroa)",
49+
"type": "area",
50+
"source": "wikipedia-pf-archipelago-anchor"
51+
}
52+
]

0 commit comments

Comments
 (0)