Skip to content

Commit e969782

Browse files
authored
feat: add separatorChars insteadof charRegExp option (#5)
* feat: add `splitChars` insteadof `charRegExp` option `charRegExp` is deprecated. It will be removed next major update. * fix: splitChars -> separatorChars * chore: use babel-register insteadof espower-babel * chore: fix test message
1 parent ae5b403 commit e969782

File tree

6 files changed

+73
-21
lines changed

6 files changed

+73
-21
lines changed

.babelrc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
"development": {
77
"plugins": [
88
"jsdoc-to-assert"
9+
],
10+
"presets": [
11+
"power-assert"
912
]
1013
}
1114
}
12-
}
15+
}

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ console.log(JSON.stringify(sentences, null, 4));
105105

106106
// with splitting char options
107107
let sentences = split("text¶text", {
108-
charRegExp: //
108+
separatorChars: [""]
109109
});
110110
sentences.length; // 2
111111
```
@@ -117,7 +117,10 @@ See more detail on [Why do `line` of location in JavaScript AST(ESTree) start wi
117117

118118
### Options
119119

120-
- `charRegExp`
120+
- `separatorChars`
121+
- [".", "。", "?", "!", "?", "!"]
122+
- separator chars of sentences.
123+
- `charRegExp` (**Deprecated**)
121124
- default: `/[\.。\?\!?!]/`
122125
- separator of sentences.
123126
- `newLineCharacters`
@@ -135,6 +138,7 @@ Get these `Syntax` constants value from the module:
135138
```js
136139
import {Syntax} from "sentence-splitter";
137140
console.log(Syntax.Sentence);// "Sentence"
141+
````
138142

139143
### Treat Markdown break line
140144

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@
4545
"babel-plugin-jsdoc-to-assert": "^1.3.0",
4646
"babel-plugin-transform-es2015-modules-commonjs": "^6.1.18",
4747
"babel-preset-es2015": "^6.1.18",
48-
"espower-babel": "^4.0.0",
48+
"babel-preset-power-assert": "^1.0.0",
49+
"babel-register": "^6.18.0",
4950
"mocha": "^2.3.3",
50-
"power-assert": "^1.1.0"
51+
"power-assert": "^1.4.2"
5152
}
5253
}

src/sentence-splitter.js

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
// LICENSE : MIT
22
"use strict";
3+
const assert = require("assert");
34
import StructureSource from "structured-source";
45
const defaultOptions = {
6+
// charRegExp is deprecated
57
charRegExp: /[\.\?\!]/,
8+
// separator char list
9+
separatorChars: [".", "。", "?", "!", "?", "!"],
610
newLineCharacters: "\n"
711
};
812
export const Syntax = {
@@ -13,15 +17,30 @@ export const Syntax = {
1317
* @param {string} text
1418
* @param {{
1519
* charRegExp: ?Object,
20+
* separatorChars: ?string[],
1621
* newLineCharacters: ?String
1722
* }} options
1823
* @returns {Array}
1924
*/
2025
export function split(text, options = {}) {
21-
const matchChar = options.charRegExp || defaultOptions.charRegExp;
26+
const charRegExp = options.charRegExp;
27+
const separatorChars = options.separatorChars || defaultOptions.separatorChars;
28+
assert(!(options.charRegExp && options.separatorChars), "should use either one `charRegExp` or `separatorChars`.\n"
29+
+ "`charRegExp` is deprecated.");
30+
/**
31+
* Is the `char` separator symbol?
32+
* @param {string} char
33+
* @returns {boolean}
34+
*/
35+
const testCharIsSeparator = (char) => {
36+
if (charRegExp) {
37+
return charRegExp.test(char);
38+
}
39+
return separatorChars.indexOf(char) !== -1;
40+
};
2241
const newLineCharacters = options.newLineCharacters || defaultOptions.newLineCharacters;
2342
const src = new StructureSource(text);
24-
let createNode = (type, start, end)=> {
43+
let createNode = (type, start, end) => {
2544
let range = [start, end];
2645
let location = src.rangeToLocation(range);
2746
let slicedText = text.slice(start, end);
@@ -54,7 +73,7 @@ export function split(text, options = {}) {
5473
// string\n|
5574
startPoint = currentIndex + newLineCharactersLength;
5675
isSplitPoint = false;
57-
} else if (matchChar.test(char)) {
76+
} else if (testCharIsSeparator(char)) {
5877
isSplitPoint = true;
5978
} else {
6079
// why `else`

test/mocha.opts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
--compilers js:espower-babel/guess
1+
--compilers js:babel-register

test/sentence-utils-test.js

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import assert from "power-assert";
22
import {Syntax, split as splitSentences} from "../src/sentence-splitter";
3-
describe("sentence-utils", function () {
4-
it("should return array", function () {
3+
describe("sentence-utils", function() {
4+
it("should return array", function() {
55
let sentences = splitSentences("text");
66
assert.equal(sentences.length, 1);
77
let sentence = sentences[0];
@@ -10,7 +10,7 @@ describe("sentence-utils", function () {
1010
assert.deepEqual(sentences[0].loc.start, {line: 1, column: 0});
1111
assert.deepEqual(sentences[0].loc.end, {line: 1, column: 4});
1212
});
13-
it("should return sentences split by first line break", function () {
13+
it("should return sentences split by first line break", function() {
1414
let sentences = splitSentences("\ntext");
1515
assert.equal(sentences.length, 2);
1616
var whiteSpace0 = sentences[0];
@@ -24,7 +24,7 @@ describe("sentence-utils", function () {
2424
assert.deepEqual(sentence1.loc.start, {line: 2, column: 0});
2525
assert.deepEqual(sentence1.loc.end, {line: 2, column: 4});
2626
});
27-
it("should return sentences split by last line break", function () {
27+
it("should return sentences split by last line break", function() {
2828
let sentences = splitSentences("text\n");
2929
assert.equal(sentences.length, 2);
3030
var sentence0 = sentences[0];
@@ -38,7 +38,7 @@ describe("sentence-utils", function () {
3838
assert.deepEqual(whiteSpace1.loc.start, {line: 1, column: 4});
3939
assert.deepEqual(whiteSpace1.loc.end, {line: 2, column: 0});
4040
});
41-
it("should return sentences split by line break*2", function () {
41+
it("should return sentences split by line break*2", function() {
4242
let sentences = splitSentences("text\n\ntext");
4343
assert.equal(sentences.length, 4);
4444
var sentence0 = sentences[0];
@@ -63,7 +63,7 @@ describe("sentence-utils", function () {
6363
assert.deepEqual(sentence3.loc.end, {line: 3, column: 4});
6464

6565
});
66-
it("should return sentences split by 。", function () {
66+
it("should return sentences split by 。", function() {
6767
let sentences = splitSentences("text。。text");
6868
assert.equal(sentences.length, 2);
6969
var sentence0 = sentences[0];
@@ -75,7 +75,7 @@ describe("sentence-utils", function () {
7575
assert.deepEqual(sentence1.loc.start, {line: 1, column: 6});
7676
assert.deepEqual(sentence1.loc.end, {line: 1, column: 10});
7777
});
78-
it("should return sentences split by 。 and linebreak", function () {
78+
it("should return sentences split by 。 and linebreak", function() {
7979
let sentences = splitSentences("text。\ntext");
8080
assert.equal(sentences.length, 3);
8181
var sentence0 = sentences[0];
@@ -91,7 +91,7 @@ describe("sentence-utils", function () {
9191
assert.deepEqual(sentence2.loc.start, {line: 2, column: 0});
9292
assert.deepEqual(sentence2.loc.end, {line: 2, column: 4});
9393
});
94-
it("should return sentences split by !?", function () {
94+
it("should return sentences split by !?", function() {
9595
let sentences = splitSentences("text!?text");
9696
assert.equal(sentences.length, 2);
9797
var sentence0 = sentences[0];
@@ -103,16 +103,16 @@ describe("sentence-utils", function () {
103103
assert.deepEqual(sentence1.loc.start, {line: 1, column: 6});
104104
assert.deepEqual(sentence1.loc.end, {line: 1, column: 10});
105105
});
106-
it("should sentences split by last 。", function () {
106+
it("should sentences split by last 。", function() {
107107
let sentences = splitSentences("text。");
108108
assert.equal(sentences.length, 1);
109109
let sentence = sentences[0];
110110
assert.strictEqual(sentence.raw, "text。");
111111
assert.deepEqual(sentences[0].loc.start, {line: 1, column: 0});
112112
assert.deepEqual(sentences[0].loc.end, {line: 1, column: 5});
113113
});
114-
context("with options", function () {
115-
it("should separate by whiteSpace", function () {
114+
context("with options", function() {
115+
it("should separate by whiteSpace", function() {
116116
var options = {
117117
newLineCharacters: "\n\n"
118118
};
@@ -135,7 +135,7 @@ describe("sentence-utils", function () {
135135
assert.deepEqual(sentence3.loc.start, {line: 3, column: 0});
136136
assert.deepEqual(sentence3.loc.end, {line: 3, column: 4});
137137
});
138-
it("should separate by charRegExp", function () {
138+
it("should separate by charRegExp", function() {
139139
let sentences = splitSentences("text¶text", {
140140
charRegExp: //
141141
});
@@ -149,5 +149,30 @@ describe("sentence-utils", function () {
149149
assert.deepEqual(sentence1.loc.start, {line: 1, column: 5});
150150
assert.deepEqual(sentence1.loc.end, {line: 1, column: 9});
151151
});
152+
it("should separate by splitChars", function() {
153+
let sentences = splitSentences("text¶text", {
154+
separatorChars: ["¶"]
155+
});
156+
assert.equal(sentences.length, 2);
157+
var sentence0 = sentences[0];
158+
assert.strictEqual(sentence0.raw, "text¶");
159+
assert.deepEqual(sentence0.loc.start, {line: 1, column: 0});
160+
assert.deepEqual(sentence0.loc.end, {line: 1, column: 5});
161+
var sentence1 = sentences[1];
162+
assert.strictEqual(sentence1.raw, "text");
163+
assert.deepEqual(sentence1.loc.start, {line: 1, column: 5});
164+
assert.deepEqual(sentence1.loc.end, {line: 1, column: 9});
165+
});
166+
it("should not set separatorChars and charRegExp", function() {
167+
try {
168+
splitSentences("text¶text", {
169+
separatorChars: ["¶"],
170+
charRegExp: //
171+
});
172+
throw new Error("FAIL");
173+
} catch (error) {
174+
assert.equal(error.name, "AssertionError");
175+
}
176+
});
152177
});
153178
});

0 commit comments

Comments
 (0)