Skip to content

Commit bc2285c

Browse files
dr5hnclaude
andauthored
feat(postcodes/RS): bulk-import 1,170 Serbia postcodes via Pošta Srbije (#1039) (#1466)
Source: Pošta Srbije catalogue redistributed via the nebjak/serbia- zip-codes-js MIT package. Ships country-only — the source carries city + zip_code only with no okrug (district) mapping (matches the SE/SI/ZA/GR/VN/LV/KE precedent). Refs #1039. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6685cb1 commit bc2285c

2 files changed

Lines changed: 9505 additions & 0 deletions

File tree

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#!/usr/bin/env python3
2+
"""Serbia -> contributions/postcodes/RS.json importer for #1039.
3+
4+
Source data
5+
-----------
6+
The community ``nebjak/serbia-zip-codes-js`` package ships Pošta
7+
Srbije's catalogue as a flat JSON list of city + zip_code pairs:
8+
9+
[{"city": "Beograd", "zip_code": "11000"}, ...]
10+
11+
Source URL: https://raw.githubusercontent.com/nebjak/serbia-zip-codes-js/master/data/serbia_zip_codes.json
12+
13+
What this script does
14+
---------------------
15+
1. Fetches the JSON via urllib (curl is blocked).
16+
2. Ships country-only (the source has no okrug / district mapping).
17+
3. Writes contributions/postcodes/RS.json idempotently.
18+
19+
License & attribution
20+
---------------------
21+
- Source: nebjak/serbia-zip-codes-js (MIT) which redistributes the
22+
publicly published Pošta Srbije index.
23+
- Each row: ``source: "posta-srbije-via-nebjak"``
24+
25+
Usage
26+
-----
27+
python3 bin/scripts/sync/import_serbia_postcodes.py
28+
"""
29+
30+
from __future__ import annotations
31+
32+
import argparse
33+
import json
34+
import re
35+
import sys
36+
import urllib.request
37+
from pathlib import Path
38+
from typing import Dict, List
39+
40+
41+
SOURCE_URL = (
42+
"https://raw.githubusercontent.com/nebjak/serbia-zip-codes-js/"
43+
"master/data/serbia_zip_codes.json"
44+
)
45+
46+
47+
def fetch_json(url: str) -> List[dict]:
48+
req = urllib.request.Request(
49+
url, headers={"User-Agent": "csc-database-postcode-importer"}
50+
)
51+
with urllib.request.urlopen(req, timeout=60) as r:
52+
return json.loads(r.read().decode("utf-8"))
53+
54+
55+
def main() -> int:
56+
parser = argparse.ArgumentParser(description=__doc__)
57+
parser.add_argument("--input", default=None)
58+
parser.add_argument("--dry-run", action="store_true")
59+
args = parser.parse_args()
60+
61+
rows = (
62+
json.loads(Path(args.input).read_text(encoding="utf-8"))
63+
if args.input
64+
else fetch_json(SOURCE_URL)
65+
)
66+
67+
project_root = Path(__file__).resolve().parents[3]
68+
countries = json.load(
69+
(project_root / "contributions/countries/countries.json").open(encoding="utf-8")
70+
)
71+
rs_country = next((c for c in countries if c.get("iso2") == "RS"), None)
72+
if rs_country is None:
73+
print("ERROR: RS not in countries.json", file=sys.stderr)
74+
return 2
75+
regex = re.compile(rs_country.get("postal_code_regex") or ".*")
76+
print(f"Country: Serbia (id={rs_country['id']})")
77+
print(f"Source rows: {len(rows):,}")
78+
79+
seen: set = set()
80+
records: List[dict] = []
81+
skipped_bad_regex = 0
82+
83+
for row in rows:
84+
code = (row.get("zip_code") or "").strip()
85+
if not code:
86+
continue
87+
if not regex.match(code):
88+
skipped_bad_regex += 1
89+
continue
90+
city = (row.get("city") or "").strip()
91+
key = (code, city.lower())
92+
if key in seen:
93+
continue
94+
seen.add(key)
95+
96+
record: Dict[str, object] = {
97+
"code": code,
98+
"country_id": int(rs_country["id"]),
99+
"country_code": "RS",
100+
}
101+
if city:
102+
record["locality_name"] = city
103+
record["type"] = "full"
104+
record["source"] = "posta-srbije-via-nebjak"
105+
records.append(record)
106+
107+
print(f"Skipped (regex fail): {skipped_bad_regex:,}")
108+
print(f"Records emitted: {len(records):,}")
109+
110+
if args.dry_run:
111+
return 0
112+
113+
target = project_root / "contributions/postcodes/RS.json"
114+
target.parent.mkdir(parents=True, exist_ok=True)
115+
if target.exists():
116+
with target.open(encoding="utf-8") as f:
117+
existing = json.load(f)
118+
existing_seen = {
119+
(r["code"], (r.get("locality_name") or "").lower()) for r in existing
120+
}
121+
merged = list(existing)
122+
for r in records:
123+
key = (r["code"], (r.get("locality_name") or "").lower())
124+
if key not in existing_seen:
125+
merged.append(r)
126+
existing_seen.add(key)
127+
merged.sort(key=lambda r: (r["code"], r.get("locality_name", "")))
128+
else:
129+
merged = sorted(records, key=lambda r: (r["code"], r.get("locality_name", "")))
130+
131+
with target.open("w", encoding="utf-8") as f:
132+
json.dump(merged, f, ensure_ascii=False, indent=2)
133+
f.write("\n")
134+
size_kb = target.stat().st_size / 1024
135+
print(
136+
f"\n[OK] Wrote {target.relative_to(project_root)} "
137+
f"({len(merged):,} rows, {size_kb:.0f} KB)"
138+
)
139+
return 0
140+
141+
142+
if __name__ == "__main__":
143+
raise SystemExit(main())

0 commit comments

Comments
 (0)