Skip to content

Commit 69601e3

Browse files
dr5hnclaude
andauthored
feat(postcodes/SE): bulk-import 16,361 codes via SE community mirror (#1039) (#1442)
Adds Swedish postcodes via the zegl/sweden-zipcode community CSV. 1. bin/scripts/sync/import_sweden_postcodes.py — pipeline reading Zip,City CSV. Formats codes in '### ##' form to match the SE regex. 2. contributions/postcodes/SE.json — 16k+ records. state_id is null on every row: Swedish postcodes are organised by sorting-route geography, not administrative county. Without a kommune-postcode crosswalk the source CSV does not include, no reliable state assignment is possible. State backfill remains a follow-up using Skatteverket open ortsdata. License & attribution - Mirror: github.com/zegl/sweden-zipcode - Each row: source: 'se-zipcode' Refs: #1039 Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c51cfd8 commit 69601e3

2 files changed

Lines changed: 131260 additions & 0 deletions

File tree

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#!/usr/bin/env python3
2+
"""Sweden -> contributions/postcodes/SE.json importer for issue #1039.
3+
4+
Source data
5+
-----------
6+
The community-maintained ``zegl/sweden-zipcode`` archive redistributes a
7+
plain Zip,City CSV covering 16,393 Swedish postcodes.
8+
9+
https://github.com/zegl/sweden-zipcode
10+
11+
What this script does
12+
---------------------
13+
1. Reads sweden-zipcode.csv (UTF-8, header: Zip,City)
14+
2. Picks one canonical record per unique zip (first city alphabetically
15+
when multiple rows exist for one code)
16+
3. Formats codes in the canonical "### ##" form expected by
17+
countries.postal_code_regex (^(?:SE)?\\d{3}\\s\\d{2}$)
18+
4. Writes contributions/postcodes/SE.json
19+
20+
Why state_id is null
21+
--------------------
22+
Swedish postcodes were assigned by sorting-route geography rather than
23+
by administrative county boundaries, so there is no reliable prefix-to-
24+
county mapping. The source CSV has no kommune column either. State
25+
resolution would require a separate kommune-postcode crosswalk that
26+
isn't redistributable. Records ship with country_id only; state_id can
27+
be backfilled in a follow-up PR once a clean kommune crosswalk is wired
28+
in (e.g. via Skatteverket's open ortsdata).
29+
30+
License & attribution
31+
---------------------
32+
- Mirror: github.com/zegl/sweden-zipcode (community redistribution)
33+
- Each row: source: "se-zipcode"
34+
"""
35+
36+
from __future__ import annotations
37+
38+
import argparse
39+
import csv
40+
import json
41+
import sys
42+
from pathlib import Path
43+
from typing import Dict, List
44+
45+
46+
def main() -> int:
47+
parser = argparse.ArgumentParser(description=__doc__)
48+
parser.add_argument("--input", default="/tmp/se_zipcodes.csv")
49+
parser.add_argument("--dry-run", action="store_true")
50+
args = parser.parse_args()
51+
52+
src = Path(args.input)
53+
if not src.exists():
54+
print(f"ERROR: input not found: {src}", file=sys.stderr)
55+
return 2
56+
57+
project_root = Path(__file__).resolve().parents[3]
58+
countries = json.load((project_root / "contributions/countries/countries.json").open(encoding="utf-8"))
59+
se = next((c for c in countries if c.get("iso2") == "SE"), None)
60+
if se is None:
61+
print("ERROR: SE not in countries.json", file=sys.stderr)
62+
return 2
63+
print(f"Country: Sweden (id={se['id']})")
64+
65+
by_code: Dict[str, List[str]] = {}
66+
with src.open(encoding="utf-8", newline="") as f:
67+
reader = csv.DictReader(f)
68+
for row in reader:
69+
zip_raw = (row.get("Zip") or "").strip()
70+
city = (row.get("City") or "").strip()
71+
if not zip_raw or not zip_raw.isdigit() or len(zip_raw) != 5:
72+
continue
73+
# Format ### ##
74+
code = f"{zip_raw[:3]} {zip_raw[3:]}"
75+
by_code.setdefault(code, []).append(city)
76+
77+
print(f"Unique zips: {len(by_code):,}")
78+
79+
records: List[dict] = []
80+
for code in sorted(by_code):
81+
cities = sorted(by_code[code], key=lambda c: c.upper())
82+
record = {
83+
"code": code,
84+
"country_id": int(se["id"]),
85+
"country_code": "SE",
86+
}
87+
if cities and cities[0]:
88+
record["locality_name"] = cities[0]
89+
record["type"] = "full"
90+
record["source"] = "se-zipcode"
91+
records.append(record)
92+
93+
print(f"Records: {len(records):,}")
94+
95+
if args.dry_run:
96+
return 0
97+
98+
target = project_root / "contributions/postcodes/SE.json"
99+
if target.exists():
100+
with target.open(encoding="utf-8") as f:
101+
existing = json.load(f)
102+
seen = {(r["code"], (r.get("locality_name") or "").lower()) for r in existing}
103+
merged = list(existing)
104+
for r in records:
105+
key = (r["code"], (r.get("locality_name") or "").lower())
106+
if key not in seen:
107+
merged.append(r)
108+
seen.add(key)
109+
merged.sort(key=lambda r: (r["code"], r.get("locality_name", "")))
110+
else:
111+
merged = sorted(records, key=lambda r: (r["code"], r.get("locality_name", "")))
112+
113+
with target.open("w", encoding="utf-8") as f:
114+
json.dump(merged, f, ensure_ascii=False, indent=2)
115+
f.write("\n")
116+
size_kb = target.stat().st_size / 1024
117+
print(f"\n[OK] Wrote {target.relative_to(project_root)} ({len(merged):,} rows, {size_kb:.0f} KB)")
118+
return 0
119+
120+
121+
if __name__ == "__main__":
122+
raise SystemExit(main())

0 commit comments

Comments
 (0)