Skip to content

Commit d38aeb6

Browse files
authored
Merge pull request #3411 from microsoft/ulugbekna/cand-toLines-trailing-newline
nes: fix: correctly split chunks to lines for trailing newline
2 parents c175434 + 7fa27e4 commit d38aeb6

File tree

2 files changed

+168
-5
lines changed

2 files changed

+168
-5
lines changed

src/extension/xtab/node/xtabUtils.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,25 @@
44
*--------------------------------------------------------------------------------------------*/
55

66
import { Raw } from '@vscode/prompt-tsx';
7-
import { IResponsePart } from '../../../platform/chat/common/chatMLFetcher';
87
import { toTextParts } from '../../../platform/chat/common/globalStringUtils';
98
import { AsyncIterableObject } from '../../../util/vs/base/common/async';
109

1110

12-
export function toLines(stream: AsyncIterableObject<IResponsePart>) {
11+
export function toLines(stream: AsyncIterableObject<{ delta: { text: string } }>) {
1312
return new AsyncIterableObject<string>(async (emitter) => {
14-
let buffer = '';
13+
let buffer: string | null = null;
1514

1615
for await (const chunk of stream) {
16+
buffer ??= '';
1717
buffer += chunk.delta.text;
1818

19-
const parts = buffer.split(/\r?\n/);
19+
const parts: string[] = buffer.split(/\r?\n/);
2020
buffer = parts.pop() ?? '';
2121

2222
emitter.emitMany(parts);
2323
}
2424

25-
if (buffer) {
25+
if (buffer !== null) {
2626
emitter.emitOne(buffer);
2727
}
2828
});
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import { describe, expect, it } from 'vitest';
7+
import { AsyncIterableObject } from '../../../../util/vs/base/common/async';
8+
import { toLines } from '../../node/xtabUtils';
9+
10+
describe('toLines', () => {
11+
12+
async function chunksToLines(chunks: string[]) {
13+
const iter = AsyncIterableObject.fromArray(chunks.map(text => ({ delta: { text } })));
14+
const arr: string[] = [];
15+
for await (const line of toLines(iter)) {
16+
arr.push(line);
17+
}
18+
return arr;
19+
}
20+
21+
describe('empty and minimal inputs', () => {
22+
it('handles empty stream', async () => {
23+
const arr = await chunksToLines([]);
24+
expect(arr).toEqual([]);
25+
});
26+
27+
it('handles single empty chunk', async () => {
28+
const arr = await chunksToLines(['']);
29+
expect(arr).toEqual(['']);
30+
});
31+
32+
it('handles multiple empty chunks', async () => {
33+
const arr = await chunksToLines(['', '', '']);
34+
expect(arr).toEqual(['']);
35+
});
36+
});
37+
38+
describe('single chunk inputs', () => {
39+
it('handles single line without newline', async () => {
40+
const arr = await chunksToLines(['hello']);
41+
expect(arr).toEqual(['hello']);
42+
});
43+
44+
it('handles single line with trailing newline', async () => {
45+
const arr = await chunksToLines(['hello\n']);
46+
expect(arr).toEqual(['hello', '']);
47+
});
48+
49+
it('handles multiple lines in single chunk', async () => {
50+
const arr = await chunksToLines(['line1\nline2\nline3']);
51+
expect(arr).toEqual(['line1', 'line2', 'line3']);
52+
});
53+
54+
it('handles multiple lines with trailing newline', async () => {
55+
const arr = await chunksToLines(['line1\nline2\nline3\n']);
56+
expect(arr).toEqual(['line1', 'line2', 'line3', '']);
57+
});
58+
});
59+
60+
describe('multiple chunks', () => {
61+
it('handles each line as separate chunk', async () => {
62+
const arr = await chunksToLines(['line1\n', 'line2\n', 'line3']);
63+
expect(arr).toEqual(['line1', 'line2', 'line3']);
64+
});
65+
66+
it('handles line split across two chunks', async () => {
67+
const arr = await chunksToLines(['hel', 'lo']);
68+
expect(arr).toEqual(['hello']);
69+
});
70+
71+
it('handles line split across multiple chunks', async () => {
72+
const arr = await chunksToLines(['h', 'e', 'l', 'l', 'o']);
73+
expect(arr).toEqual(['hello']);
74+
});
75+
76+
it('handles newline split between chunks', async () => {
77+
const arr = await chunksToLines(['line1', '\nline2']);
78+
expect(arr).toEqual(['line1', 'line2']);
79+
});
80+
81+
it('handles complex split across chunks', async () => {
82+
const arr = await chunksToLines(['li', 'ne1\nli', 'ne2\n', 'line3']);
83+
expect(arr).toEqual(['line1', 'line2', 'line3']);
84+
});
85+
});
86+
87+
describe('line endings', () => {
88+
it('handles Windows-style line endings (CRLF)', async () => {
89+
const arr = await chunksToLines(['line1\r\nline2\r\nline3']);
90+
expect(arr).toEqual(['line1', 'line2', 'line3']);
91+
});
92+
93+
it('handles Windows-style line endings with trailing CRLF', async () => {
94+
const arr = await chunksToLines(['line1\r\nline2\r\n']);
95+
expect(arr).toEqual(['line1', 'line2', '']);
96+
});
97+
98+
it('handles mixed line endings', async () => {
99+
const arr = await chunksToLines(['line1\nline2\r\nline3']);
100+
expect(arr).toEqual(['line1', 'line2', 'line3']);
101+
});
102+
103+
it('handles CRLF split across chunks', async () => {
104+
const arr = await chunksToLines(['line1\r', '\nline2']);
105+
expect(arr).toEqual(['line1', 'line2']);
106+
});
107+
});
108+
109+
describe('empty lines', () => {
110+
it('handles single empty line', async () => {
111+
const arr = await chunksToLines(['\n']);
112+
expect(arr).toEqual(['', '']);
113+
});
114+
115+
it('handles multiple consecutive empty lines', async () => {
116+
const arr = await chunksToLines(['\n\n\n']);
117+
expect(arr).toEqual(['', '', '', '']);
118+
});
119+
120+
it('handles empty lines between content', async () => {
121+
const arr = await chunksToLines(['line1\n\nline2']);
122+
expect(arr).toEqual(['line1', '', 'line2']);
123+
});
124+
125+
it('handles multiple empty lines between content', async () => {
126+
const arr = await chunksToLines(['line1\n\n\nline2']);
127+
expect(arr).toEqual(['line1', '', '', 'line2']);
128+
});
129+
});
130+
131+
describe('edge cases', () => {
132+
it('handles only newlines in separate chunks', async () => {
133+
const arr = await chunksToLines(['\n', '\n', '\n']);
134+
expect(arr).toEqual(['', '', '', '']);
135+
});
136+
137+
it('handles chunk that is just a newline after content', async () => {
138+
const arr = await chunksToLines(['hello', '\n']);
139+
expect(arr).toEqual(['hello', '']);
140+
});
141+
142+
it('handles whitespace-only lines', async () => {
143+
const arr = await chunksToLines([' \n\t\n ']);
144+
expect(arr).toEqual([' ', '\t', ' ']);
145+
});
146+
147+
it('handles unicode content', async () => {
148+
const arr = await chunksToLines(['héllo\nwörld\n日本語']);
149+
expect(arr).toEqual(['héllo', 'wörld', '日本語']);
150+
});
151+
152+
it('handles emoji content', async () => {
153+
const arr = await chunksToLines(['👋\n🌍']);
154+
expect(arr).toEqual(['👋', '🌍']);
155+
});
156+
157+
it('simulates character-by-character streaming', async () => {
158+
const text = 'ab\ncd';
159+
const arr = await chunksToLines(text.split(''));
160+
expect(arr).toEqual(['ab', 'cd']);
161+
});
162+
});
163+
});

0 commit comments

Comments
 (0)