Closed
Description
- Version: v8.11.4
- Platform: Linux xyz 4.17.5-200.fc28.x86_64 deps: update openssl to 1.0.1j #1 SMP Tue Jul 10 13:39:04 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
- Subsystem:
Decoding a latin1 buffer larger than about 1MB to a string and encoding that string into utf-8 gives a buffer with the same number of bytes as the latin1 input even though more are required for characters that use more space in utf-8.
This seems to work properly on v10.x but not v8.x or v9.x.
Code that demonstrates the problem:
const fs = require('fs');
const s = 'Räksmörgås';
let ss = '';
const SIZE = (1024 * 1024);
// works:
//const SIZE = (1024 * 512);
while (ss.length < SIZE) {
ss = ss + ss.length + ' ' + s + '\n';
}
// create latin1 buffer we can decode
let l1Buffer = Buffer.from(ss, 'latin1');
let l1String = l1Buffer.toString('latin1')
// also fixes it:
// l1String = ('x' + l1String).substring(1, l1String.length + 1);
// create utf8 buffer from decoded latin1 string
let u8Buffer = Buffer.from(l1String, 'utf8')
console.log(l1Buffer.length);
console.log(u8Buffer.length);
if (l1Buffer.length === u8Buffer.length) {
console.log('failed, should be different size');
} else {
console.log('works');
}