Skip to content

Commit d3bb665

Browse files
committed
fix(sentence): support continuous whitespace nodes
1 parent 050ef88 commit d3bb665

File tree

2 files changed

+47
-6
lines changed

2 files changed

+47
-6
lines changed

src/sentence-splitter.js

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ export function split(text, options = {}) {
6363
let startPoint = 0;
6464
let currentIndex = 0;
6565
let isSplitPoint = false;
66+
let isInSentence = false;
6667
const newLineCharactersLength = newLineCharacters.length;
6768
for (; currentIndex < text.length; currentIndex++) {
6869
let char = text[currentIndex];
@@ -90,12 +91,16 @@ export function split(text, options = {}) {
9091
// reset stat
9192
startPoint = currentIndex;
9293
isSplitPoint = false;
93-
// Sentence<WhiteSpace>Sentence
94-
if (whiteSpaceCharacters.indexOf(char) !== -1) {
95-
results.push(createNode(Syntax.WhiteSpace, currentIndex, currentIndex + 1));
96-
startPoint++;
97-
currentIndex++;
98-
}
94+
isInSentence = false;
95+
}
96+
// Sentence<WhiteSpace>*Sentence
97+
if (isInSentence === false && whiteSpaceCharacters.indexOf(char) !== -1) {
98+
// Add WhiteSpace
99+
results.push(createNode(Syntax.WhiteSpace, startPoint, currentIndex + 1));
100+
startPoint++;
101+
} else {
102+
// New sentence start
103+
isInSentence = true;
99104
}
100105
}
101106
}

test/sentence-utils-test.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,42 @@ describe("sentence-utils", function() {
108108
assert.deepEqual(sentence2.loc.start, { line: 1, column: 10 });
109109
assert.deepEqual(sentence2.loc.end, { line: 1, column: 18 });
110110
});
111+
it("should return sentences split by multiple whitespaces", function() {
112+
const sentences = splitSentences("1st text. 2nd text");
113+
assert.equal(sentences.length, 2 + 3);
114+
const [sentence0, whitespace0, whitespace1, whitespace2, sentence1] = sentences;
115+
assert.strictEqual(sentence0.raw, "1st text.");
116+
assert.deepEqual(sentence0.range, [0, 9]);
117+
assert.strictEqual(whitespace0.type, Syntax.WhiteSpace);
118+
assert.strictEqual(whitespace0.value, " ");
119+
assert.deepEqual(whitespace0.range, [9, 10]);
120+
assert.strictEqual(whitespace1.type, Syntax.WhiteSpace);
121+
assert.strictEqual(whitespace1.value, " ");
122+
assert.deepEqual(whitespace1.range, [10, 11]);
123+
assert.strictEqual(whitespace2.type, Syntax.WhiteSpace);
124+
assert.strictEqual(whitespace2.value, " ");
125+
assert.deepEqual(whitespace2.range, [11, 12]);
126+
assert.strictEqual(sentence1.raw, "2nd text");
127+
assert.deepEqual(sentence1.range, [12, 20]);
128+
});
129+
it("should return sentences split by text and whitespaces, and new line", function() {
130+
const sentences = splitSentences("1st text. \n 2nd text");
131+
assert.equal(sentences.length, 2 + 2 + 1);
132+
const [sentence0, whitespace0, lineBreak, whitespace1, sentence1] = sentences;
133+
assert.strictEqual(sentence0.raw, "1st text.");
134+
assert.deepEqual(sentence0.range, [0, 9]);
135+
assert.strictEqual(whitespace0.type, Syntax.WhiteSpace);
136+
assert.strictEqual(whitespace0.value, " ");
137+
assert.deepEqual(whitespace0.range, [9, 10]);
138+
assert.strictEqual(lineBreak.type, Syntax.WhiteSpace);
139+
assert.strictEqual(lineBreak.value, "\n");
140+
assert.deepEqual(lineBreak.range, [10, 11]);
141+
assert.strictEqual(whitespace1.type, Syntax.WhiteSpace);
142+
assert.strictEqual(whitespace1.value, " ");
143+
assert.deepEqual(whitespace1.range, [11, 12]);
144+
assert.strictEqual(sentence1.raw, "2nd text");
145+
assert.deepEqual(sentence1.range, [12, 20]);
146+
});
111147
it("should return sentences split by !?", function() {
112148
let sentences = splitSentences("text!?text");
113149
assert.equal(sentences.length, 2);

0 commit comments

Comments
 (0)