Skip to content

Commit 2fdc8ec

Browse files
committed
update _Bytes_read_string to use array instead of string as accumulator
1 parent df3c1e4 commit 2fdc8ec

File tree

4 files changed

+86
-9
lines changed

4 files changed

+86
-9
lines changed

eslint.config.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export default defineConfig([
2121
{ files: ["lib/browser.js"], languageOptions: { globals: globals.browser } },
2222
{ files: ["lib/node.js"], languageOptions: { globals: globals.node } },
2323
{ files: ["try/**/*.{js,mjs,cjs}"], languageOptions: { globals: { ...globals.browser, ...globals.node } } },
24+
{ files: ["scripts/*.js"], languageOptions: { globals: globals.node } },
2425
{
2526
files: ["**/*.{js,mjs,cjs}"],
2627
plugins: { js },

scripts/build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ js="$filepath.js"
2727
min="$filepath.min.js"
2828

2929
guida make --optimize --output=$js $elm_entry
30+
node scripts/replace-bytes-read-string.js $js
3031

3132
uglifyjs $js --compress "pure_funcs=[F2,F3,F4,F5,F6,F7,F8,F9,A2,A3,A4,A5,A6,A7,A8,A9],pure_getters,keep_fargs=false,unsafe_comps,unsafe" | uglifyjs --mangle --output $min
3233

scripts/replace-bytes-read-string.js

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env node
2+
3+
/* This change prevents V8 from retaining large "concatenated string" chains, which can cause OOMs.
4+
Tested against `rtfeldman/elm-css` compilation.
5+
6+
See the related discussion for context: https://discourse.elm-lang.org/t/guida-compiler-was-there-are-3-elm-compilers-written-in-elm/10329/34
7+
and issue: https://github.com/guida-lang/compiler/issues/107
8+
*/
9+
10+
const fs = require('node:fs');
11+
12+
const argv = process.argv.slice(2);
13+
const path = argv[0];
14+
15+
const data = fs
16+
.readFileSync(path, { encoding: 'utf8', flag: 'r' })
17+
.replace(`var _Bytes_read_string = F3(function(len, bytes, offset)
18+
{
19+
var string = '';
20+
var end = offset + len;
21+
for (; offset < end;)
22+
{
23+
var byte = bytes.getUint8(offset++);
24+
string +=
25+
(byte < 128)
26+
? String.fromCharCode(byte)
27+
:
28+
((byte & 0xE0 /* 0b11100000 */) === 0xC0 /* 0b11000000 */)
29+
? String.fromCharCode((byte & 0x1F /* 0b00011111 */) << 6 | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */)
30+
:
31+
((byte & 0xF0 /* 0b11110000 */) === 0xE0 /* 0b11100000 */)
32+
? String.fromCharCode(
33+
(byte & 0xF /* 0b00001111 */) << 12
34+
| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6
35+
| bytes.getUint8(offset++) & 0x3F /* 0b00111111 */
36+
)
37+
:
38+
(byte =
39+
((byte & 0x7 /* 0b00000111 */) << 18
40+
| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 12
41+
| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6
42+
| bytes.getUint8(offset++) & 0x3F /* 0b00111111 */
43+
) - 0x10000
44+
, String.fromCharCode(Math.floor(byte / 0x400) + 0xD800, byte % 0x400 + 0xDC00)
45+
);
46+
}
47+
return _Utils_Tuple2(offset, string);
48+
});`, `var _Bytes_read_string = F3(function(len, bytes, offset)
49+
{
50+
var string = [];
51+
var end = offset + len;
52+
for (; offset < end;)
53+
{
54+
var byte = bytes.getUint8(offset++);
55+
string.push(
56+
(byte < 128)
57+
? String.fromCharCode(byte)
58+
:
59+
((byte & 0xE0 /* 0b11100000 */) === 0xC0 /* 0b11000000 */)
60+
? String.fromCharCode((byte & 0x1F /* 0b00011111 */) << 6 | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */)
61+
:
62+
((byte & 0xF0 /* 0b11110000 */) === 0xE0 /* 0b11100000 */)
63+
? String.fromCharCode(
64+
(byte & 0xF /* 0b00001111 */) << 12
65+
| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6
66+
| bytes.getUint8(offset++) & 0x3F /* 0b00111111 */
67+
)
68+
:
69+
(byte =
70+
((byte & 0x7 /* 0b00000111 */) << 18
71+
| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 12
72+
| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6
73+
| bytes.getUint8(offset++) & 0x3F /* 0b00111111 */
74+
) - 0x10000
75+
, String.fromCharCode(Math.floor(byte / 0x400) + 0xD800, byte % 0x400 + 0xDC00)
76+
)
77+
);
78+
}
79+
return _Utils_Tuple2(offset, string.join(''));
80+
});`);
81+
82+
fs.writeFileSync(path, data, { encoding: 'utf8', flag: 'w' });

src/Compiler/Parse/Primitives.elm

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -483,24 +483,17 @@ snippetEncoder (Snippet { fptr, offset, length, offRow, offCol }) =
483483

484484
snippetDecoder : BD.Decoder Snippet
485485
snippetDecoder =
486-
-- This `String.toList` -> `String.fromList` round-trip prevents V8 from retaining
487-
-- large "concatenated string" chains, which can cause OOMs.
488-
-- Tested against `rtfeldman/elm-css` compilation.
489-
-- The split call avoids elm-review flags for this pattern.
490-
--
491-
-- See the related discussion for context:
492-
-- https://discourse.elm-lang.org/t/guida-compiler-was-there-are-3-elm-compilers-written-in-elm/10329/25
493486
BD.map5
494487
(\fptr offset length offRow offCol ->
495488
Snippet
496-
{ fptr = String.fromList fptr
489+
{ fptr = fptr
497490
, offset = offset
498491
, length = length
499492
, offRow = offRow
500493
, offCol = offCol
501494
}
502495
)
503-
(BD.map String.toList BD.string)
496+
BD.string
504497
BD.int
505498
BD.int
506499
BD.int

0 commit comments

Comments
 (0)