-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathprune-sitemap.js
More file actions
70 lines (59 loc) · 2.42 KB
/
prune-sitemap.js
File metadata and controls
70 lines (59 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
const fs = require('fs');
const path = require('path');
const { parseStringPromise, Builder } = require('xml2js');
// === CONFIGURATION ===
const OUTPUT_DIR = './dist'; // Antora bundle output folder
const SITE_URL = 'https://typedb.com/docs'; // Absolute site URL
const EXCLUDE_COMPONENTS = ['manual', 'typeql', 'drivers'];
const EXCLUDE_VERSION_PATTERN = /\/2\.[^/]*\//; // matches /2.x/ in URL paths
const SITEMAP_INDEX_FILE = 'sitemap.xml';
// === FUNCTIONS ===
async function processSitemapFile(filePath) {
const xml = fs.readFileSync(filePath, 'utf-8');
const json = await parseStringPromise(xml);
if (json.sitemapindex) {
// Filter out excluded component sitemaps
json.sitemapindex.sitemap = json.sitemapindex.sitemap.filter(s => {
const loc = s.loc[0];
return !EXCLUDE_COMPONENTS.some(comp => loc.includes(comp));
});
} else if (json.urlset) {
// Filter URLs with excluded versions
json.urlset.url = json.urlset.url.filter(u => {
const loc = u.loc[0];
return !EXCLUDE_VERSION_PATTERN.test(loc);
});
}
const builder = new Builder();
const updatedXml = builder.buildObject(json);
fs.writeFileSync(filePath, updatedXml, 'utf-8');
console.log(`Processed ${filePath}`);
}
function rebuildSitemapIndex() {
const sitemapFiles = fs.readdirSync(OUTPUT_DIR)
.filter(f => f.startsWith('sitemap-') && f.endsWith('.xml'))
.filter(f => !EXCLUDE_COMPONENTS.some(comp => f.includes(comp)));
const sitemapIndex = {
sitemapindex: {
$: { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' },
sitemap: sitemapFiles.map(f => ({
loc: [`${SITE_URL}/${f}`], // absolute URLs
lastmod: [new Date().toISOString()]
}))
}
};
const builder = new Builder();
const xml = builder.buildObject(sitemapIndex);
fs.writeFileSync(path.join(OUTPUT_DIR, SITEMAP_INDEX_FILE), xml, 'utf-8');
console.log(`Rebuilt ${SITEMAP_INDEX_FILE} with ${sitemapFiles.length} component sitemaps`);
}
async function main() {
const xmlFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.xml'));
for (const file of xmlFiles) {
await processSitemapFile(path.join(OUTPUT_DIR, file));
}
rebuildSitemapIndex();
console.log('Sitemap filtering complete.');
}
// === RUN SCRIPT ===
main().catch(err => console.error(err));