Skip to content

Commit fb9103e

Browse files
authored
Merge pull request #17981 from github/check-for-outdated-links
Script to update internal links
2 parents e1cc751 + f8c6a62 commit fb9103e

File tree

2 files changed

+201
-0
lines changed

2 files changed

+201
-0
lines changed

lib/render-content/renderContent.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ module.exports = async function renderContent (
6666
.trim()
6767
}
6868

69+
if (options.cheerioObject) {
70+
return cheerio.load(html, { xmlMode: true })
71+
}
72+
6973
if (options.encodeEntities) html = entities.encode(html)
7074

7175
return html.trim()

script/update-internal-links.js

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
#!/usr/bin/env node
2+
3+
const fs = require('fs')
4+
const walk = require('walk-sync')
5+
const path = require('path')
6+
const astFromMarkdown = require('mdast-util-from-markdown')
7+
const visit = require('unist-util-visit')
8+
const { loadPages, loadPageMap } = require('../lib/pages')
9+
const loadSiteData = require('../lib/site-data')
10+
const loadRedirects = require('../lib/redirects/precompile')
11+
const { getPathWithoutLanguage, getPathWithoutVersion } = require('../lib/path-utils')
12+
const allVersions = Object.keys(require('../lib/all-versions'))
13+
const frontmatter = require('../lib/read-frontmatter')
14+
const renderContent = require('../lib/render-content')
15+
const patterns = require('../lib/patterns')
16+
17+
const walkFiles = (pathToWalk) => {
18+
return walk(path.posix.join(__dirname, '..', pathToWalk), { includeBasePath: true, directories: false })
19+
.filter(file => file.endsWith('.md') && !file.endsWith('README.md'))
20+
.filter(file => !file.includes('/early-access/')) // ignore EA for now
21+
}
22+
23+
const allFiles = walkFiles('content').concat(walkFiles('data'))
24+
25+
// The script will throw an error if it finds any markup not represented here.
26+
// Hacky but it captures the current rare edge cases.
27+
const linkInlineMarkup = {
28+
emphasis: '*',
29+
strong: '**'
30+
}
31+
32+
const currentVersionWithSpacesRegex = /\/enterprise\/{{ currentVersion }}/g
33+
const currentVersionWithoutSpaces = '/enterprise/{{currentVersion}}'
34+
35+
// [start-readme]
36+
//
37+
// Run this script to find internal links in all content and data Markdown files, check if either the title or link
38+
// (or both) are outdated, and automatically update them if so.
39+
//
40+
// Exceptions:
41+
// * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment
42+
// and title will be unchanged (e.g., [Bar](/noo#bar)).
43+
// * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if
44+
// necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)).
45+
// * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved.
46+
//
47+
// [end-readme]
48+
49+
main()
50+
51+
async function main () {
52+
console.log('Working...')
53+
const pageList = await loadPages()
54+
const pageMap = await loadPageMap(pageList)
55+
const redirects = await loadRedirects(pageList)
56+
const site = await loadSiteData()
57+
58+
const context = {
59+
pages: pageMap,
60+
redirects,
61+
site: site.en.site,
62+
currentLanguage: 'en'
63+
}
64+
65+
for (const file of allFiles) {
66+
const { data, content } = frontmatter(fs.readFileSync(file, 'utf8'))
67+
let newContent = content
68+
69+
// Do a blanket find-replace for /enterprise/{{ currentVersion }}/ to /enterprise/{{currentVersion}}/
70+
// so that the AST parser recognizes the link as a link node. The spaces prevent it from doing so.
71+
newContent = newContent.replace(currentVersionWithSpacesRegex, currentVersionWithoutSpaces)
72+
73+
const ast = astFromMarkdown(newContent)
74+
75+
// We can't do async functions within visit, so gather the nodes upfront
76+
const nodesPerFile = []
77+
78+
visit(ast, node => {
79+
if (node.type !== 'link') return
80+
if (!node.url.startsWith('/')) return
81+
if (node.url.startsWith('/assets')) return
82+
if (node.url.startsWith('/public')) return
83+
if (node.url.includes('/11.10.340/')) return
84+
if (node.url.includes('/2.1/')) return
85+
if (node.url === '/') return
86+
87+
nodesPerFile.push(node)
88+
})
89+
90+
// For every Markdown link...
91+
for (const node of nodesPerFile) {
92+
const oldLink = node.url
93+
94+
// Find and preserve any inline markup in link titles, like [*Foo*](/foo)
95+
let inlineMarkup = ''
96+
if (node.children[0].children) {
97+
inlineMarkup = linkInlineMarkup[node.children[0].type]
98+
99+
if (!inlineMarkup) {
100+
console.error(`Cannot find an inline markup entry for ${node.children[0].type}!`)
101+
process.exit(1)
102+
}
103+
}
104+
105+
const oldTitle = node.children[0].value || node.children[0].children[0].value
106+
const oldMarkdownLink = `[${inlineMarkup}${oldTitle}${inlineMarkup}](${oldLink})`
107+
108+
// As a blanket rule, only update titles in links that begin with quotes. (Many links
109+
// have punctuation before the closing quotes, so we'll only check for opening quotes.)
110+
// Update: "[Foo](/foo)
111+
// Do not update: [Bar](/bar)
112+
const hasQuotesAroundLink = newContent.includes(`"${oldMarkdownLink}`)
113+
114+
let foundPage, fragmentMatch, versionMatch
115+
116+
// Run through all supported versions...
117+
for (const version of allVersions) {
118+
context.currentVersion = version
119+
// Render the link for each version using the renderContent pipeline, which includes the rewrite-local-links plugin.
120+
const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true })
121+
let linkToCheck = $('a').attr('href')
122+
123+
// We need to preserve fragments and hardcoded versions if any are found.
124+
fragmentMatch = oldLink.match(/(#.*$)/)
125+
versionMatch = oldLink.match(/(enterprise-server(?:@.[^/]*?)?)\//)
126+
127+
// Remove the fragment for now.
128+
linkToCheck = linkToCheck
129+
.replace(/#.*$/, '')
130+
.replace(patterns.trailingSlash, '$1')
131+
132+
// Try to find the rendered link in the set of pages!
133+
foundPage = findPage(linkToCheck, pageMap, redirects)
134+
135+
// Once a page is found for a particular version, exit immediately; we don't need to check the other versions
136+
// because all we care about is the page title and path.
137+
if (foundPage) {
138+
break
139+
}
140+
}
141+
142+
if (!foundPage) {
143+
console.error(`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`)
144+
process.exit(1)
145+
}
146+
147+
// If the original link includes a fragment OR the original title includes Liquid, do not change;
148+
// otherwise, use the found page title. (We don't want to update the title if a fragment is found because
149+
// the title likely points to the fragment section header, not the page title.)
150+
const newTitle = fragmentMatch || oldTitle.includes('{%') || !hasQuotesAroundLink ? oldTitle : foundPage.title
151+
152+
// If the original link includes a fragment, append it to the found page path.
153+
// Also remove the language code because Markdown links don't include language codes.
154+
let newLink = getPathWithoutLanguage(fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path)
155+
156+
// If the original link includes a hardcoded version, preserve it; otherwise, remove versioning
157+
// because Markdown links don't include versioning.
158+
newLink = versionMatch ? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}` : getPathWithoutVersion(newLink)
159+
160+
let newMarkdownLink = `[${inlineMarkup}${newTitle}${inlineMarkup}](${newLink})`
161+
162+
// Handle a few misplaced quotation marks.
163+
if (oldMarkdownLink.includes('["')) {
164+
newMarkdownLink = `"${newMarkdownLink}`
165+
}
166+
167+
// Stream the results to console as we find them.
168+
if (oldMarkdownLink !== newMarkdownLink) {
169+
console.log('old link', oldMarkdownLink)
170+
console.log('new link', newMarkdownLink)
171+
console.log('-------')
172+
}
173+
174+
newContent = newContent.replace(oldMarkdownLink, newMarkdownLink)
175+
}
176+
177+
fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 }))
178+
}
179+
180+
console.log('Done!')
181+
}
182+
183+
function findPage (tryPath, pageMap, redirects) {
184+
if (pageMap[tryPath]) {
185+
return {
186+
title: pageMap[tryPath].title,
187+
path: tryPath
188+
}
189+
}
190+
191+
if (pageMap[redirects[tryPath]]) {
192+
return {
193+
title: pageMap[redirects[tryPath]].title,
194+
path: redirects[tryPath]
195+
}
196+
}
197+
}

0 commit comments

Comments
 (0)