Skip to content

repo sync #2200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion middleware/archived-enterprise-versions-assets.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ module.exports = async (req, res, next) => {
res.set('content-type', r.headers['content-type'])
res.set('content-length', r.headers['content-length'])
res.set('x-is-archived', 'true')
res.set('x-robots-tag', 'none')
res.set('x-robots-tag', 'noindex')
res.send(r.body)
} catch (err) {
next()
Expand Down
2 changes: 1 addition & 1 deletion middleware/archived-enterprise-versions.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ module.exports = async (req, res, next) => {
try {
const r = await got(getProxyPath(req.path, requestedVersion))
res.set('content-type', r.headers['content-type'])
res.set('x-robots-tag', 'none')
res.set('x-robots-tag', 'noindex')

// make the stubbed redirect files added in >=2.18 return 301 instead of 200
const staticRedirect = r.body.match(patterns.staticRedirect)
Expand Down
40 changes: 40 additions & 0 deletions middleware/block-robots.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
const languages = require('../lib/languages')
const products = require('../lib/all-products')
const { deprecated } = require('../lib/enterprise-server-releases.js')

const pathRegExps = [
// Disallow indexing of WIP localized content
...Object.values(languages)
.filter(language => language.wip)
.map(language => new RegExp(`^/${language.code}(/.*)?$`, 'i')),

// Disallow indexing of WIP products
...Object.values(products)
.filter(product => product.wip || product.hidden)
.map(product => [
new RegExp(`^/.*?${product.href}`, 'i'),
...product.versions.map(
version => new RegExp(`^/.*?${version}/${product.id}`, 'i')
)
]),

// Disallow indexing of deprecated enterprise versions
...deprecated
.map(version => [
new RegExp(`^/.*?/enterprise-server@${version}/.*?`, 'i'),
new RegExp(`^/.*?/enterprise/${version}/.*?`, 'i')
])
].flat()

function blockIndex (path) {
return pathRegExps.some(pathRe => pathRe.test(path))
}

const middleware = (req, res, next) => {
if (blockIndex(req.path)) res.set('x-robots-tag', 'noindex')
return next()
}

middleware.blockIndex = blockIndex

module.exports = middleware
1 change: 1 addition & 0 deletions middleware/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ module.exports = function (app) {

// *** Config and context for rendering ***
app.use(require('./find-page')) // Must come before archived-enterprise-versions, breadcrumbs, featured-links, products, render-page
app.use(require('./block-robots'))

// *** Rendering, 2xx responses ***
// I largely ordered these by use frequency
Expand Down
31 changes: 1 addition & 30 deletions middleware/robots.js
Original file line number Diff line number Diff line change
@@ -1,33 +1,4 @@
const languages = require('../lib/languages')
const products = require('../lib/all-products')
const { deprecated } = require('../lib/enterprise-server-releases.js')

let defaultResponse = 'User-agent: *'

// Disallow crawling of WIP localized content
Object.values(languages)
.filter(language => language.wip)
.forEach(language => {
defaultResponse = defaultResponse.concat(`\nDisallow: /${language.code}\nDisallow: /${language.code}/*\n`)
})

// Disallow crawling of WIP products
Object.values(products)
.filter(product => product.wip || product.hidden)
.forEach(product => {
defaultResponse = defaultResponse.concat(`\nDisallow: /*${product.href}`)
product.versions.forEach(version => {
defaultResponse = defaultResponse.concat(`\nDisallow: /*${version}/${product.id}`)
})
})

// Disallow crawling of Deprecated enterprise versions
deprecated
.forEach(version => {
defaultResponse = defaultResponse
.concat(`\nDisallow: /*/enterprise-server@${version}/*`)
.concat(`\nDisallow: /*/enterprise/${version}/*`)
})
const defaultResponse = 'User-agent: *'

const disallowAll = `User-agent: *
Disallow: /`
Expand Down
117 changes: 117 additions & 0 deletions tests/rendering/block-robots.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
const { blockIndex } = require('../../middleware/block-robots')
const languages = require('../../lib/languages')
const products = require('../../lib/all-products')
const enterpriseServerReleases = require('../../lib/enterprise-server-releases')

function allowIndex (path) {
return !blockIndex(path)
}

describe('block robots', () => {
it('allows crawling of the homepage and English content', async () => {
expect(allowIndex('/')).toBe(true)
expect(allowIndex('/en')).toBe(true)
expect(allowIndex('/en/articles/verifying-your-email-address')).toBe(true)
})

it('allows crawling of generally available localized content', async () => {
Object.values(languages)
.filter(language => !language.wip)
.forEach(language => {
expect(allowIndex(`/${language.code}`)).toBe(true)
expect(allowIndex(`/${language.code}/articles/verifying-your-email-address`)).toBe(true)
})
})

it('disallows crawling of WIP localized content', async () => {
Object.values(languages)
.filter(language => language.wip)
.forEach(language => {
expect(allowIndex(`/${language.code}`)).toBe(false)
expect(allowIndex(`/${language.code}/articles/verifying-your-email-address`)).toBe(false)
})
})

it('disallows crawling of WIP products', async () => {
const wipProductIds = Object.values(products)
.filter(product => product.wip)
.map(product => product.id)

wipProductIds.forEach(id => {
const { href } = products[id]
const blockedPaths = [
// English
`/en${href}`,
`/en${href}/overview`,
`/en${href}/overview/intro`,
`/en/enterprise/${enterpriseServerReleases.latest}/user${href}`,
`/en/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`,

// Japanese
`/ja${href}`,
`/ja${href}/overview`,
`/ja${href}/overview/intro`,
`/ja/enterprise/${enterpriseServerReleases.latest}/user${href}`,
`/ja/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`
]

blockedPaths.forEach(path => {
expect(allowIndex(path)).toBe(false)
})
})
})

it('disallows crawling of early access "hidden" products', async () => {
const hiddenProductIds = Object.values(products)
.filter(product => product.hidden)
.map(product => product.id)

hiddenProductIds.forEach(id => {
const { versions } = products[id]
const blockedPaths = versions.map(version => {
return [
// English
`/en/${version}/${id}`,
`/en/${version}/${id}/some-early-access-article`,
// Japanese
`/ja/${version}/${id}`,
`/ja/${version}/${id}/some-early-access-article`
]
}).flat()

blockedPaths.forEach(path => {
expect(allowIndex(path)).toBe(false)
})
})
})

it('allows crawling of non-WIP products', async () => {
expect('actions' in products).toBe(true)
expect(allowIndex('/en/actions')).toBe(true)
expect(allowIndex('/en/actions/overview')).toBe(true)
expect(allowIndex('/en/actions/overview/intro')).toBe(true)
expect(allowIndex(`/en/enterprise/${enterpriseServerReleases.latest}/user/actions`)).toBe(true)
expect(allowIndex(`/en/enterprise/${enterpriseServerReleases.oldestSupported}/user/actions`)).toBe(true)
})

it('disallows crawling of deprecated enterprise releases', async () => {
enterpriseServerReleases.deprecated.forEach(version => {
const blockedPaths = [
// English
`/en/enterprise-server@${version}/actions`,
`/en/enterprise/${version}/actions`,
`/en/enterprise-server@${version}/actions/overview`,
`/en/enterprise/${version}/actions/overview`,
// Japanese
`/ja/enterprise-server@${version}/actions`,
`/ja/enterprise/${version}/actions`,
`/ja/enterprise-server@${version}/actions/overview`,
`/ja/enterprise/${version}/actions/overview`
]

blockedPaths.forEach(path => {
expect(allowIndex(path)).toBe(false)
})
})
})
})
94 changes: 0 additions & 94 deletions tests/rendering/robots-txt.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ const robotsParser = require('robots-parser')
const robotsMiddleware = require('../../middleware/robots')
const { get } = require('../helpers/supertest')
const MockExpressResponse = require('mock-express-response')
const products = require('../../lib/all-products')
const enterpriseServerReleases = require('../../lib/enterprise-server-releases')

describe('robots.txt', () => {
jest.setTimeout(5 * 60 * 1000)
Expand All @@ -31,15 +29,6 @@ describe('robots.txt', () => {
})
})

it('disallows indexing of WIP localized content', async () => {
Object.values(languages)
.filter(language => language.wip)
.forEach(language => {
expect(robots.isAllowed(`https://docs.github.com/${language.code}`)).toBe(false)
expect(robots.isAllowed(`https://docs.github.com/${language.code}/articles/verifying-your-email-address`)).toBe(false)
})
})

it('disallows indexing of herokuapp.com domains', async () => {
const req = {
hostname: 'docs-internal-12345--my-branch.herokuapp.com',
Expand All @@ -52,89 +41,6 @@ describe('robots.txt', () => {
expect(res._getString()).toEqual('User-agent: *\nDisallow: /')
})

it('disallows indexing of WIP products', async () => {
const wipProductIds = Object.values(products)
.filter(product => product.wip)
.map(product => product.id)

wipProductIds.forEach(id => {
const { href } = products[id]
const blockedPaths = [
// English
`https://docs.github.com/en${href}`,
`https://docs.github.com/en${href}/overview`,
`https://docs.github.com/en${href}/overview/intro`,
`https://docs.github.com/en/enterprise/${enterpriseServerReleases.latest}/user${href}`,
`https://docs.github.com/en/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`,

// Japanese
`https://docs.github.com/ja${href}`,
`https://docs.github.com/ja${href}/overview`,
`https://docs.github.com/ja${href}/overview/intro`,
`https://docs.github.com/ja/enterprise/${enterpriseServerReleases.latest}/user${href}`,
`https://docs.github.com/ja/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`
]

blockedPaths.forEach(path => {
expect(robots.isAllowed(path)).toBe(false)
})
})
})

it('disallows indexing of early access "hidden" products', async () => {
const hiddenProductIds = Object.values(products)
.filter(product => product.hidden)
.map(product => product.id)

hiddenProductIds.forEach(id => {
const { versions } = products[id]
const blockedPaths = versions.map(version => {
return [
// English
`https://docs.github.com/en/${version}/${id}`,
`https://docs.github.com/en/${version}/${id}/some-early-access-article`,
// Japanese
`https://docs.github.com/ja/${version}/${id}`,
`https://docs.github.com/ja/${version}/${id}/some-early-access-article`
]
}).flat()

blockedPaths.forEach(path => {
expect(robots.isAllowed(path)).toBe(false)
})
})
})

it('allows indexing of non-WIP products', async () => {
expect('actions' in products).toBe(true)
expect(robots.isAllowed('https://docs.github.com/en/actions')).toBe(true)
expect(robots.isAllowed('https://docs.github.com/en/actions/overview')).toBe(true)
expect(robots.isAllowed('https://docs.github.com/en/actions/overview/intro')).toBe(true)
expect(robots.isAllowed(`https://docs.github.com/en/enterprise/${enterpriseServerReleases.latest}/user/actions`)).toBe(true)
expect(robots.isAllowed(`https://docs.github.com/en/enterprise/${enterpriseServerReleases.oldestSupported}/user/actions`)).toBe(true)
})

it('disallows indexing of deprecated enterprise releases', async () => {
enterpriseServerReleases.deprecated.forEach(version => {
const blockedPaths = [
// English
`https://docs.github.com/en/enterprise-server@${version}/actions`,
`https://docs.github.com/en/enterprise/${version}/actions`,
`https://docs.github.com/en/enterprise-server@${version}/actions/overview`,
`https://docs.github.com/en/enterprise/${version}/actions/overview`,
// Japanese
`https://docs.github.com/ja/enterprise-server@${version}/actions`,
`https://docs.github.com/ja/enterprise/${version}/actions`,
`https://docs.github.com/ja/enterprise-server@${version}/actions/overview`,
`https://docs.github.com/ja/enterprise/${version}/actions/overview`
]

blockedPaths.forEach(path => {
expect(robots.isAllowed(path)).toBe(false)
})
})
})

it('does not have duplicate lines', () => {
const lines = new Set()
for (const line of res.text.split('\n')) {
Expand Down
2 changes: 1 addition & 1 deletion tests/routing/deprecated-enterprise-versions.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ describe('enterprise deprecation', () => {
test('sets the expected x-robots-tag header for deprecated Enterprise pages', async () => {
const res = await get('/en/enterprise/2.13/user/articles/about-branches')
expect(res.statusCode).toBe(200)
expect(res.get('x-robots-tag')).toBe('none')
expect(res.get('x-robots-tag')).toBe('noindex')
})

test('handles requests for deprecated Enterprise pages ( <2.13 )', async () => {
Expand Down