10
10
11
11
import path from 'path'
12
12
import fs from 'fs'
13
- import { execSync } from 'child_process'
14
13
import scrape from 'website-scraper'
15
14
import { program } from 'commander'
16
15
import { rimraf } from 'rimraf'
@@ -35,6 +34,7 @@ program
35
34
'-o, --output <PATH>' ,
36
35
`output directory to place scraped HTML files and redirects. By default, this temp directory is named 'tmpArchivalDir_<VERSION_TO_DEPRECATE>'`
37
36
)
37
+ . option ( '-l, --local-dev' , 'Do not rewrite asset paths to enable testing scraped content locally' )
38
38
. option ( '-d, --dry-run' , 'only scrape the first 10 pages for testing purposes' )
39
39
. option (
40
40
'-p, --page <PATH>' ,
@@ -45,6 +45,7 @@ program
45
45
const output = program . opts ( ) . output
46
46
const dryRun = program . opts ( ) . dryRun
47
47
const singlePage = program . opts ( ) . page
48
+ const localDev = program . opts ( ) . localDev
48
49
const tmpArchivalDirectory = output
49
50
? path . join ( process . cwd ( ) , output )
50
51
: path . join ( process . cwd ( ) , `tmpArchivalDir_${ version } ` )
@@ -67,19 +68,30 @@ class RewriteAssetPathsPlugin {
67
68
68
69
// Get the text contents of the resource
69
70
const text = resource . getText ( )
70
- let newBody = ''
71
+ let newBody = text
71
72
72
73
// Rewrite HTML asset paths. Example:
73
74
// ../assets/images/foo/bar.png ->
74
75
// https://githubdocs.azureedge.net/github-images/enterprise/2.17/assets/images/foo/bar.png
76
+
75
77
if ( resource . isHtml ( ) ) {
76
- newBody = text . replace (
77
- / (?< attribute > s r c | h r e f ) = " (?: \. \. \/ | \/ ) * (?< basepath > _ n e x t \/ s t a t i c | j a v a s c r i p t s | s t y l e s h e e t s | a s s e t s \/ f o n t s | a s s e t s \/ c b - \d + \/ i m a g e s | n o d e _ m o d u l e s ) / g,
78
- ( match , attribute , basepath ) => {
79
- const replaced = `${ REMOTE_ENTERPRISE_STORAGE_URL } /${ this . version } /${ basepath } `
80
- return `${ attribute } ="${ replaced } `
81
- }
78
+ // Remove nextjs scripts and manifest.json link
79
+ newBody = newBody . replace (
80
+ / < s c r i p t \s s r c = " ( \. \. \/ ) * _ n e x t \/ s t a t i c \/ [ \w ] + \/ ( _ b u i l d M a n i f e s t | _ s s g M a n i f e s t ) .j s ? " .* ?> < \/ s c r i p t > / g,
81
+ ''
82
82
)
83
+ newBody = newBody . replace ( / < l i n k h r e f = " .* m a n i f e s t .j s o n " .* ?> / g, '' )
84
+
85
+ if ( ! localDev ) {
86
+ // Rewrite asset paths
87
+ newBody = newBody . replace (
88
+ / (?< attribute > s r c | h r e f ) = " (?: \. \. \/ | \/ ) * (?< basepath > _ n e x t \/ s t a t i c | j a v a s c r i p t s | s t y l e s h e e t s | a s s e t s \/ f o n t s | a s s e t s \/ c b - \d + \/ i m a g e s | n o d e _ m o d u l e s ) / g,
89
+ ( match , attribute , basepath ) => {
90
+ const replaced = `${ REMOTE_ENTERPRISE_STORAGE_URL } /${ this . version } /${ basepath } `
91
+ return `${ attribute } ="${ replaced } `
92
+ }
93
+ )
94
+ }
83
95
}
84
96
85
97
// Rewrite CSS asset paths. Example
@@ -88,25 +100,25 @@ class RewriteAssetPathsPlugin {
88
100
// url(../../../assets/cb-303/images/octicons/search-24.svg) ->
89
101
// url(https://githubdocs.azureedge.net/github-images/enterprise/2.20/assets/cb-303/images/octicons/search-24.svg)
90
102
if ( resource . isCss ( ) ) {
91
- newBody = text . replace (
92
- / (?< attribute > u r l ) (?< paren > \( " | \( ) (?: \. \. \/ ) * (?< basepath > _ n e x t \/ s t a t i c | a s s e t s \/ f o n t s | a s s e t s \/ i m a g e s | a s s e t s \/ c b - \d + \/ i m a g e s ) / g,
93
- ( match , attribute , paren , basepath ) => {
94
- const replaced = `${ REMOTE_ENTERPRISE_STORAGE_URL } /${ this . version } /${ basepath } `
95
- return `${ attribute } ${ paren } ${ replaced } `
96
- }
97
- )
103
+ if ( ! localDev ) {
104
+ newBody = newBody . replace (
105
+ / (?< attribute > u r l ) (?< paren > \( " | \( ) (?: \. \. \/ ) * (?< basepath > _ n e x t \/ s t a t i c | a s s e t s \/ f o n t s | a s s e t s \/ i m a g e s | a s s e t s \/ c b - \d + \/ i m a g e s ) / g,
106
+ ( match , attribute , paren , basepath ) => {
107
+ const replaced = `${ REMOTE_ENTERPRISE_STORAGE_URL } /${ this . version } /${ basepath } `
108
+ return `${ attribute } ${ paren } ${ replaced } `
109
+ }
110
+ )
111
+ }
98
112
}
99
113
100
114
const filePath = path . join ( this . tempDirectory , resource . getFilename ( ) )
101
- await fs . promises . writeFile ( filePath , newBody , 'binary' )
115
+ await fs . promises . writeFile ( filePath , newBody , resource . encoding )
102
116
} )
103
117
}
104
118
}
105
119
106
120
async function main ( ) {
107
121
// Build the production assets, to simulate a production deployment
108
- console . log ( 'Running `npm run build` for production assets' )
109
- execSync ( 'npm run build' , { stdio : 'inherit' } )
110
122
console . log ( 'Finish building production assets' )
111
123
if ( dryRun ) {
112
124
console . log (
0 commit comments