|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +const fs = require('fs') |
| 4 | +const walk = require('walk-sync') |
| 5 | +const path = require('path') |
| 6 | +const astFromMarkdown = require('mdast-util-from-markdown') |
| 7 | +const visit = require('unist-util-visit') |
| 8 | +const { loadPages, loadPageMap } = require('../lib/pages') |
| 9 | +const loadSiteData = require('../lib/site-data') |
| 10 | +const loadRedirects = require('../lib/redirects/precompile') |
| 11 | +const { getPathWithoutLanguage, getPathWithoutVersion } = require('../lib/path-utils') |
| 12 | +const allVersions = Object.keys(require('../lib/all-versions')) |
| 13 | +const frontmatter = require('../lib/read-frontmatter') |
| 14 | +const renderContent = require('../lib/render-content') |
| 15 | +const patterns = require('../lib/patterns') |
| 16 | + |
| 17 | +const walkFiles = (pathToWalk) => { |
| 18 | + return walk(path.posix.join(__dirname, '..', pathToWalk), { includeBasePath: true, directories: false }) |
| 19 | + .filter(file => file.endsWith('.md') && !file.endsWith('README.md')) |
| 20 | + .filter(file => !file.includes('/early-access/')) // ignore EA for now |
| 21 | +} |
| 22 | + |
| 23 | +const allFiles = walkFiles('content').concat(walkFiles('data')) |
| 24 | + |
| 25 | +// The script will throw an error if it finds any markup not represented here. |
| 26 | +// Hacky but it captures the current rare edge cases. |
| 27 | +const linkInlineMarkup = { |
| 28 | + emphasis: '*', |
| 29 | + strong: '**' |
| 30 | +} |
| 31 | + |
| 32 | +const currentVersionWithSpacesRegex = /\/enterprise\/{{ currentVersion }}/g |
| 33 | +const currentVersionWithoutSpaces = '/enterprise/{{currentVersion}}' |
| 34 | + |
| 35 | +// [start-readme] |
| 36 | +// |
| 37 | +// Run this script to find internal links in all content and data Markdown files, check if either the title or link |
| 38 | +// (or both) are outdated, and automatically update them if so. |
| 39 | +// |
| 40 | +// Exceptions: |
| 41 | +// * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment |
| 42 | +// and title will be unchanged (e.g., [Bar](/noo#bar)). |
| 43 | +// * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if |
| 44 | +// necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)). |
| 45 | +// * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved. |
| 46 | +// |
| 47 | +// [end-readme] |
| 48 | + |
| 49 | +main() |
| 50 | + |
| 51 | +async function main () { |
| 52 | + console.log('Working...') |
| 53 | + const pageList = await loadPages() |
| 54 | + const pageMap = await loadPageMap(pageList) |
| 55 | + const redirects = await loadRedirects(pageList) |
| 56 | + const site = await loadSiteData() |
| 57 | + |
| 58 | + const context = { |
| 59 | + pages: pageMap, |
| 60 | + redirects, |
| 61 | + site: site.en.site, |
| 62 | + currentLanguage: 'en' |
| 63 | + } |
| 64 | + |
| 65 | + for (const file of allFiles) { |
| 66 | + const { data, content } = frontmatter(fs.readFileSync(file, 'utf8')) |
| 67 | + let newContent = content |
| 68 | + |
| 69 | + // Do a blanket find-replace for /enterprise/{{ currentVersion }}/ to /enterprise/{{currentVersion}}/ |
| 70 | + // so that the AST parser recognizes the link as a link node. The spaces prevent it from doing so. |
| 71 | + newContent = newContent.replace(currentVersionWithSpacesRegex, currentVersionWithoutSpaces) |
| 72 | + |
| 73 | + const ast = astFromMarkdown(newContent) |
| 74 | + |
| 75 | + // We can't do async functions within visit, so gather the nodes upfront |
| 76 | + const nodesPerFile = [] |
| 77 | + |
| 78 | + visit(ast, node => { |
| 79 | + if (node.type !== 'link') return |
| 80 | + if (!node.url.startsWith('/')) return |
| 81 | + if (node.url.startsWith('/assets')) return |
| 82 | + if (node.url.startsWith('/public')) return |
| 83 | + if (node.url.includes('/11.10.340/')) return |
| 84 | + if (node.url.includes('/2.1/')) return |
| 85 | + if (node.url === '/') return |
| 86 | + |
| 87 | + nodesPerFile.push(node) |
| 88 | + }) |
| 89 | + |
| 90 | + // For every Markdown link... |
| 91 | + for (const node of nodesPerFile) { |
| 92 | + const oldLink = node.url |
| 93 | + |
| 94 | + // Find and preserve any inline markup in link titles, like [*Foo*](/foo) |
| 95 | + let inlineMarkup = '' |
| 96 | + if (node.children[0].children) { |
| 97 | + inlineMarkup = linkInlineMarkup[node.children[0].type] |
| 98 | + |
| 99 | + if (!inlineMarkup) { |
| 100 | + console.error(`Cannot find an inline markup entry for ${node.children[0].type}!`) |
| 101 | + process.exit(1) |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + const oldTitle = node.children[0].value || node.children[0].children[0].value |
| 106 | + const oldMarkdownLink = `[${inlineMarkup}${oldTitle}${inlineMarkup}](${oldLink})` |
| 107 | + |
| 108 | + // As a blanket rule, only update titles in links that begin with quotes. (Many links |
| 109 | + // have punctuation before the closing quotes, so we'll only check for opening quotes.) |
| 110 | + // Update: "[Foo](/foo) |
| 111 | + // Do not update: [Bar](/bar) |
| 112 | + const hasQuotesAroundLink = newContent.includes(`"${oldMarkdownLink}`) |
| 113 | + |
| 114 | + let foundPage, fragmentMatch, versionMatch |
| 115 | + |
| 116 | + // Run through all supported versions... |
| 117 | + for (const version of allVersions) { |
| 118 | + context.currentVersion = version |
| 119 | + // Render the link for each version using the renderContent pipeline, which includes the rewrite-local-links plugin. |
| 120 | + const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true }) |
| 121 | + let linkToCheck = $('a').attr('href') |
| 122 | + |
| 123 | + // We need to preserve fragments and hardcoded versions if any are found. |
| 124 | + fragmentMatch = oldLink.match(/(#.*$)/) |
| 125 | + versionMatch = oldLink.match(/(enterprise-server(?:@.[^/]*?)?)\//) |
| 126 | + |
| 127 | + // Remove the fragment for now. |
| 128 | + linkToCheck = linkToCheck |
| 129 | + .replace(/#.*$/, '') |
| 130 | + .replace(patterns.trailingSlash, '$1') |
| 131 | + |
| 132 | + // Try to find the rendered link in the set of pages! |
| 133 | + foundPage = findPage(linkToCheck, pageMap, redirects) |
| 134 | + |
| 135 | + // Once a page is found for a particular version, exit immediately; we don't need to check the other versions |
| 136 | + // because all we care about is the page title and path. |
| 137 | + if (foundPage) { |
| 138 | + break |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + if (!foundPage) { |
| 143 | + console.error(`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`) |
| 144 | + process.exit(1) |
| 145 | + } |
| 146 | + |
| 147 | + // If the original link includes a fragment OR the original title includes Liquid, do not change; |
| 148 | + // otherwise, use the found page title. (We don't want to update the title if a fragment is found because |
| 149 | + // the title likely points to the fragment section header, not the page title.) |
| 150 | + const newTitle = fragmentMatch || oldTitle.includes('{%') || !hasQuotesAroundLink ? oldTitle : foundPage.title |
| 151 | + |
| 152 | + // If the original link includes a fragment, append it to the found page path. |
| 153 | + // Also remove the language code because Markdown links don't include language codes. |
| 154 | + let newLink = getPathWithoutLanguage(fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path) |
| 155 | + |
| 156 | + // If the original link includes a hardcoded version, preserve it; otherwise, remove versioning |
| 157 | + // because Markdown links don't include versioning. |
| 158 | + newLink = versionMatch ? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}` : getPathWithoutVersion(newLink) |
| 159 | + |
| 160 | + let newMarkdownLink = `[${inlineMarkup}${newTitle}${inlineMarkup}](${newLink})` |
| 161 | + |
| 162 | + // Handle a few misplaced quotation marks. |
| 163 | + if (oldMarkdownLink.includes('["')) { |
| 164 | + newMarkdownLink = `"${newMarkdownLink}` |
| 165 | + } |
| 166 | + |
| 167 | + // Stream the results to console as we find them. |
| 168 | + if (oldMarkdownLink !== newMarkdownLink) { |
| 169 | + console.log('old link', oldMarkdownLink) |
| 170 | + console.log('new link', newMarkdownLink) |
| 171 | + console.log('-------') |
| 172 | + } |
| 173 | + |
| 174 | + newContent = newContent.replace(oldMarkdownLink, newMarkdownLink) |
| 175 | + } |
| 176 | + |
| 177 | + fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 })) |
| 178 | + } |
| 179 | + |
| 180 | + console.log('Done!') |
| 181 | +} |
| 182 | + |
| 183 | +function findPage (tryPath, pageMap, redirects) { |
| 184 | + if (pageMap[tryPath]) { |
| 185 | + return { |
| 186 | + title: pageMap[tryPath].title, |
| 187 | + path: tryPath |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + if (pageMap[redirects[tryPath]]) { |
| 192 | + return { |
| 193 | + title: pageMap[redirects[tryPath]].title, |
| 194 | + path: redirects[tryPath] |
| 195 | + } |
| 196 | + } |
| 197 | +} |
0 commit comments