Skip to content

Commit 3b1abfa

Browse files
committed
normalize out unicode ligatures
Fix: GHSA-r6q2-hw4h-h46w
1 parent a43478c commit 3b1abfa

File tree

6 files changed

+88
-12
lines changed

6 files changed

+88
-12
lines changed

src/normalize-unicode.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ const MAX = 10000
99
const cache = new Set<string>()
1010
export const normalizeUnicode = (s: string): string => {
1111
if (!cache.has(s)) {
12-
normalizeCache[s] = s.normalize('NFD')
12+
// shake out identical accents and ligatures
13+
normalizeCache[s] = s
14+
.normalize('NFD')
15+
.toLocaleLowerCase('en')
16+
.toLocaleUpperCase('en')
1317
} else {
1418
cache.delete(s)
1519
}

src/path-reservations.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ export class PathReservations {
5656
['win32 parallelization disabled']
5757
: paths.map(p => {
5858
// don't need normPath, because we skip this entirely for windows
59-
return stripTrailingSlashes(
60-
join(normalizeUnicode(p)),
61-
).toLowerCase()
59+
return stripTrailingSlashes(join(normalizeUnicode(p)))
6260
})
6361

6462
const dirs = new Set<string>(

tap-snapshots/test/normalize-unicode.js-win32.test.cjs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,25 @@
66
*/
77
'use strict'
88
exports[`test/normalize-unicode.js win32 > TAP > normalize with strip slashes > "\\\\\eee\\\\\\" > normalized 1`] = `
9-
\\\\\eee\\\\\\
9+
\\\\\EEE\\\\\\
1010
`
1111

1212
exports[`test/normalize-unicode.js win32 > TAP > normalize with strip slashes > "\\\\a\\\\b\\\\c\\\\d\\\\" > normalized 1`] = `
13-
/a/b/c/d
13+
/A/B/C/D
1414
`
1515

1616
exports[`test/normalize-unicode.js win32 > TAP > normalize with strip slashes > "﹨aaaa﹨dddd﹨" > normalized 1`] = `
17-
aaaa﹨dddd
17+
AAAA﹨DDDD
1818
`
1919

2020
exports[`test/normalize-unicode.js win32 > TAP > normalize with strip slashes > "\bbb\eee\" > normalized 1`] = `
21-
bbb\eee
21+
BBB\EEE
2222
`
2323

2424
exports[`test/normalize-unicode.js win32 > TAP > normalize with strip slashes > "1/4foo.txt" > normalized 1`] = `
25-
1/4foo.txt
25+
1/4FOO.TXT
2626
`
2727

2828
exports[`test/normalize-unicode.js win32 > TAP > normalize with strip slashes > "¼foo.txt" > normalized 1`] = `
29-
¼foo.txt
29+
¼FOO.TXT
3030
`

test/ghsa-8qq5-rm4j-mr97.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ t.test('verify that linkpaths get sanitized properly', async t => {
4242
})
4343

4444
writeFileSync(resolve(out, 'exploit_hard'), 'OVERWRITTEN')
45-
t.equal(readFileSync(resolve(dir, 'secret.txt'), 'utf8'), 'ORIGINAL DATA')
45+
t.equal(
46+
readFileSync(resolve(dir, 'secret.txt'), 'utf8'),
47+
'ORIGINAL DATA',
48+
)
4649

4750
t.not(readlinkSync(resolve(out, 'exploit_sym')), targetSym)
4851
})

test/ghsa-r6q2-hw4h-h46w.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import t from 'tap'
2+
import { normalizeUnicode } from '../src/normalize-unicode.js'
3+
import { Header } from '../src/header.js'
4+
import { extract } from '../src/extract.js'
5+
import { resolve } from 'node:path'
6+
import { lstatSync, readFileSync, statSync } from 'node:fs'
7+
8+
// these characters are problems on macOS's APFS
9+
const chars = {
10+
['ff'.normalize('NFC')]: 'FF',
11+
['fi'.normalize('NFC')]: 'FI',
12+
['fl'.normalize('NFC')]: 'FL',
13+
['ffi'.normalize('NFC')]: 'FFI',
14+
['ffl'.normalize('NFC')]: 'FFL',
15+
['ſt'.normalize('NFC')]: 'ST',
16+
['st'.normalize('NFC')]: 'ST',
17+
['ẛ'.normalize('NFC')]: 'Ṡ',
18+
['ß'.normalize('NFC')]: 'SS',
19+
['ẞ'.normalize('NFC')]: 'SS',
20+
['ſ'.normalize('NFC')]: 'S',
21+
}
22+
23+
for (const [c, n] of Object.entries(chars)) {
24+
t.test(`${c} => ${n}`, async t => {
25+
t.equal(normalizeUnicode(c), n)
26+
27+
t.test('link then file', async t => {
28+
const tarball = Buffer.alloc(2048)
29+
new Header({
30+
path: c,
31+
type: 'SymbolicLink',
32+
linkpath: './target',
33+
}).encode(tarball, 0)
34+
new Header({
35+
path: n,
36+
type: 'File',
37+
size: 1,
38+
}).encode(tarball, 512)
39+
tarball[1024] = 'x'.charCodeAt(0)
40+
41+
const cwd = t.testdir({ tarball })
42+
43+
await extract({ cwd, file: resolve(cwd, 'tarball') })
44+
45+
t.throws(() => statSync(resolve(cwd, 'target')))
46+
t.equal(readFileSync(resolve(cwd, n), 'utf8'), 'x')
47+
})
48+
49+
t.test('file then link', { saveFixture: true }, async t => {
50+
const tarball = Buffer.alloc(2048)
51+
new Header({
52+
path: n,
53+
type: 'File',
54+
size: 1,
55+
}).encode(tarball, 0)
56+
tarball[512] = 'x'.charCodeAt(0)
57+
new Header({
58+
path: c,
59+
type: 'SymbolicLink',
60+
linkpath: './target',
61+
}).encode(tarball, 1024)
62+
63+
const cwd = t.testdir({ tarball })
64+
65+
await extract({ cwd, file: resolve(cwd, 'tarball') })
66+
67+
t.throws(() => statSync(resolve(cwd, 'target')))
68+
t.equal(lstatSync(resolve(cwd, c)).isSymbolicLink(), true)
69+
})
70+
})
71+
}

test/normalize-unicode.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ t.equal(
2121
'matching unicodes',
2222
)
2323
t.equal(normalizeUnicode(cafe1), normalizeUnicode(cafe2), 'cached')
24-
t.equal(normalizeUnicode('foo'), 'foo', 'non-unicode string')
24+
t.equal(normalizeUnicode('foo'), 'FOO', 'non-unicode string')
2525

2626
if (fakePlatform === 'win32') {
2727
t.test('normalize with strip slashes', t => {

0 commit comments

Comments
 (0)