Skip to content

Commit 10add14

Browse files
dr5hnclaude
andcommitted
feat(postcodes/GU): import 7 Guam ZIPs (#1039)
Mirrors GU-mapped ZIPs from US.json into Guam's own country namespace, FK'd to nearest GU village by centroid distance. Why --- Guam uses US ZIPs in the 969xx range, but CSC represents Guam as its own country (iso2=GU, country_id=89) with 19 villages as states. Without this mirror, postcode lookups for GU return empty. Coverage -------- - 7 ZIPs / 100% state FK - 7 of 19 GU villages covered (the 12 absent villages have no ZIP centroid in the Census ZCTA file) State FK strategy ----------------- Centroid-distance matching against cities/GU.json (25 GU localities), then FK to that city's state_id (one of 19 villages). License ------- Original source: US Census ZCTA (CC-0). Each row: source: "us-census-via-gu-mirror" Validation ---------- - python3 -m py_compile passes - 100% regex match (^969\d{2}$) - 100% state_id valid + state.country_id == 89 + state_code agrees - No auto-managed fields (id, created_at, updated_at, flag) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 085bfd5 commit 10add14

2 files changed

Lines changed: 286 additions & 0 deletions

File tree

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#!/usr/bin/env python3
2+
"""Guam -> contributions/postcodes/GU.json importer for issue #1039.
3+
4+
Source data
5+
-----------
6+
Guam uses US ZIP codes 96910-96932. The US Census ZCTA file
7+
(already shipped to contributions/postcodes/US.json under
8+
state_code='GU') contains 7 GU-mapped postcodes with WGS-84
9+
lat/lng centroids.
10+
11+
CSC represents Guam as its own country (iso2=GU, country_id=80)
12+
with 19 villages as states. This importer mirrors the same codes
13+
into GU.json under the GU country namespace and FK'd to the
14+
nearest GU city by centroid distance.
15+
16+
What this script does
17+
---------------------
18+
1. Reads existing US.json filtered to state_code='GU'.
19+
2. Loads contributions/cities/GU.json (25 GU localities).
20+
3. For each GU ZIP, finds the nearest GU city by haversine distance,
21+
uses that city's state_id (one of 19 villages).
22+
4. Writes contributions/postcodes/GU.json with country_id=80.
23+
24+
License & attribution
25+
---------------------
26+
- Original source: US Census ZCTA Gazetteer (CC-0, public domain)
27+
- Each row: ``source: "us-census-via-gu-mirror"``
28+
29+
Usage
30+
-----
31+
python3 bin/scripts/sync/import_guam_postcodes.py
32+
"""
33+
34+
from __future__ import annotations
35+
36+
import argparse
37+
import json
38+
import math
39+
import re
40+
import sys
41+
from pathlib import Path
42+
from typing import Dict, List
43+
44+
45+
def haversine_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
46+
R = 6371.0
47+
p1 = math.radians(lat1)
48+
p2 = math.radians(lat2)
49+
dlat = math.radians(lat2 - lat1)
50+
dlon = math.radians(lon2 - lon1)
51+
a = (
52+
math.sin(dlat / 2) ** 2
53+
+ math.cos(p1) * math.cos(p2) * math.sin(dlon / 2) ** 2
54+
)
55+
return 2 * R * math.asin(math.sqrt(a))
56+
57+
58+
def main() -> int:
59+
parser = argparse.ArgumentParser(description=__doc__)
60+
parser.add_argument("--dry-run", action="store_true")
61+
args = parser.parse_args()
62+
63+
project_root = Path(__file__).resolve().parents[3]
64+
65+
countries = json.load(
66+
(project_root / "contributions/countries/countries.json").open(encoding="utf-8")
67+
)
68+
gu_country = next((c for c in countries if c.get("iso2") == "GU"), None)
69+
if gu_country is None:
70+
print("ERROR: GU not in countries.json", file=sys.stderr)
71+
return 2
72+
regex = re.compile(gu_country.get("postal_code_regex") or ".*")
73+
74+
us_path = project_root / "contributions/postcodes/US.json"
75+
us_data = json.load(us_path.open(encoding="utf-8"))
76+
gu_zips = [r for r in us_data if r.get("state_code") == "GU"]
77+
print(f"GU-mapped ZIPs in US.json: {len(gu_zips)}")
78+
79+
cities_path = project_root / "contributions/cities/GU.json"
80+
gu_cities = json.load(cities_path.open(encoding="utf-8"))
81+
gu_cities_with_geo = []
82+
for c in gu_cities:
83+
try:
84+
lat = float(c.get("latitude") or 0)
85+
lon = float(c.get("longitude") or 0)
86+
except (ValueError, TypeError):
87+
continue
88+
if lat or lon:
89+
gu_cities_with_geo.append((lat, lon, c))
90+
print(f"GU cities with geo: {len(gu_cities_with_geo)}")
91+
92+
states = json.load(
93+
(project_root / "contributions/states/states.json").open(encoding="utf-8")
94+
)
95+
gu_states = {s["id"]: s for s in states if s.get("country_id") == gu_country["id"]}
96+
print(
97+
f"Country: Guam (id={gu_country['id']}); "
98+
f"states indexed: {len(gu_states)}"
99+
)
100+
101+
seen: set = set()
102+
records: List[dict] = []
103+
skipped_bad_regex = 0
104+
skipped_no_state = 0
105+
matched_state = 0
106+
107+
for r in gu_zips:
108+
code = r["code"]
109+
if not regex.match(code):
110+
skipped_bad_regex += 1
111+
continue
112+
113+
try:
114+
lat = float(r["latitude"])
115+
lon = float(r["longitude"])
116+
except (ValueError, TypeError, KeyError):
117+
lat = lon = None
118+
119+
nearest_city = None
120+
if lat is not None and lon is not None and gu_cities_with_geo:
121+
best_d = float("inf")
122+
for clat, clon, city in gu_cities_with_geo:
123+
d = haversine_km(lat, lon, clat, clon)
124+
if d < best_d:
125+
best_d = d
126+
nearest_city = city
127+
128+
state = None
129+
locality = None
130+
if nearest_city:
131+
state = gu_states.get(nearest_city.get("state_id"))
132+
locality = nearest_city.get("name")
133+
134+
if state is None:
135+
skipped_no_state += 1
136+
else:
137+
matched_state += 1
138+
139+
key = (code, (locality or "").lower())
140+
if key in seen:
141+
continue
142+
seen.add(key)
143+
144+
record: Dict[str, object] = {
145+
"code": code,
146+
"country_id": int(gu_country["id"]),
147+
"country_code": "GU",
148+
}
149+
if state is not None:
150+
record["state_id"] = int(state["id"])
151+
record["state_code"] = state.get("iso2")
152+
if locality:
153+
record["locality_name"] = locality
154+
if lat is not None and lon is not None:
155+
record["latitude"] = f"{lat:.6f}"
156+
record["longitude"] = f"{lon:.6f}"
157+
record["type"] = "full"
158+
record["source"] = "us-census-via-gu-mirror"
159+
records.append(record)
160+
161+
print(f"Skipped (regex fail): {skipped_bad_regex:,}")
162+
print(f"Skipped (no state FK): {skipped_no_state:,}")
163+
print(f"Records emitted: {len(records):,}")
164+
pct = matched_state * 100 // max(1, len(records))
165+
print(f" with state: {matched_state:,} ({pct}%)")
166+
167+
if args.dry_run:
168+
return 0
169+
170+
target = project_root / "contributions/postcodes/GU.json"
171+
target.parent.mkdir(parents=True, exist_ok=True)
172+
if target.exists():
173+
with target.open(encoding="utf-8") as f:
174+
existing = json.load(f)
175+
existing_seen = {
176+
(r["code"], (r.get("locality_name") or "").lower()) for r in existing
177+
}
178+
merged = list(existing)
179+
for r in records:
180+
key = (r["code"], (r.get("locality_name") or "").lower())
181+
if key not in existing_seen:
182+
merged.append(r)
183+
existing_seen.add(key)
184+
merged.sort(key=lambda r: (r["code"], r.get("locality_name", "")))
185+
else:
186+
merged = sorted(records, key=lambda r: (r["code"], r.get("locality_name", "")))
187+
188+
with target.open("w", encoding="utf-8") as f:
189+
json.dump(merged, f, ensure_ascii=False, indent=2)
190+
f.write("\n")
191+
size_kb = target.stat().st_size / 1024
192+
print(
193+
f"\n[OK] Wrote {target.relative_to(project_root)} "
194+
f"({len(merged):,} rows, {size_kb:.0f} KB)"
195+
)
196+
return 0
197+
198+
199+
if __name__ == "__main__":
200+
raise SystemExit(main())

contributions/postcodes/GU.json

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
[
2+
{
3+
"code": "96910",
4+
"country_id": 89,
5+
"country_code": "GU",
6+
"state_id": 5257,
7+
"state_code": "04",
8+
"locality_name": "Chalan Pago-Ordot Village",
9+
"latitude": "13.450511",
10+
"longitude": "144.750706",
11+
"type": "full",
12+
"source": "us-census-via-gu-mirror"
13+
},
14+
{
15+
"code": "96913",
16+
"country_id": 89,
17+
"country_code": "GU",
18+
"state_id": 5256,
19+
"state_code": "03",
20+
"locality_name": "Barrigada Village",
21+
"latitude": "13.478311",
22+
"longitude": "144.815058",
23+
"type": "full",
24+
"source": "us-census-via-gu-mirror"
25+
},
26+
{
27+
"code": "96915",
28+
"country_id": 89,
29+
"country_code": "GU",
30+
"state_id": 5267,
31+
"state_code": "14",
32+
"locality_name": "Santa Rita Village",
33+
"latitude": "13.374452",
34+
"longitude": "144.708125",
35+
"type": "full",
36+
"source": "us-census-via-gu-mirror"
37+
},
38+
{
39+
"code": "96916",
40+
"country_id": 89,
41+
"country_code": "GU",
42+
"state_id": 5262,
43+
"state_code": "09",
44+
"locality_name": "Merizo Village",
45+
"latitude": "13.265897",
46+
"longitude": "144.689161",
47+
"type": "full",
48+
"source": "us-census-via-gu-mirror"
49+
},
50+
{
51+
"code": "96917",
52+
"country_id": 89,
53+
"country_code": "GU",
54+
"state_id": 5261,
55+
"state_code": "08",
56+
"locality_name": "Inarajan Village",
57+
"latitude": "13.282464",
58+
"longitude": "144.742688",
59+
"type": "full",
60+
"source": "us-census-via-gu-mirror"
61+
},
62+
{
63+
"code": "96928",
64+
"country_id": 89,
65+
"country_code": "GU",
66+
"state_id": 5259,
67+
"state_code": "06",
68+
"locality_name": "Agat Village",
69+
"latitude": "13.355917",
70+
"longitude": "144.657305",
71+
"type": "full",
72+
"source": "us-census-via-gu-mirror"
73+
},
74+
{
75+
"code": "96929",
76+
"country_id": 89,
77+
"country_code": "GU",
78+
"state_id": 5271,
79+
"state_code": "18",
80+
"locality_name": "Chaguian",
81+
"latitude": "13.567326",
82+
"longitude": "144.878059",
83+
"type": "full",
84+
"source": "us-census-via-gu-mirror"
85+
}
86+
]

0 commit comments

Comments
 (0)