Skip to content

Commit 1fddf36

Browse files
authored
feat(setence): add Whitespace Node between Sentence (#7)
1 parent 5217714 commit 1fddf36

File tree

4 files changed

+133
-83
lines changed

4 files changed

+133
-83
lines changed

bin/cmd.js

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
#!/usr/bin/env node
2-
var split = require('../').split;
3-
var Syntax = require('../').Syntax;
4-
var concat = require('concat-stream');
5-
var fs = require('fs');
2+
var split = require("../").split;
3+
var Syntax = require("../").Syntax;
4+
var concat = require("concat-stream");
5+
var fs = require("fs");
66
var file = process.argv[2];
7-
var input = file && file !== '-'
8-
? fs.createReadStream(process.argv[2])
9-
: process.stdin;
10-
input.pipe(concat(function(buf) {
11-
split(buf.toString('utf8')).filter(function(node) {
12-
return node.type === Syntax.Sentence;
13-
}).forEach(function(sentence) {
14-
console.log(sentence.value);
15-
});
16-
}));
7+
var input = file && file !== "-" ? fs.createReadStream(process.argv[2]) : process.stdin;
8+
input.pipe(
9+
concat(function(buf) {
10+
split(buf.toString("utf8"))
11+
.filter(function(node) {
12+
return node.type === Syntax.Sentence;
13+
})
14+
.forEach(function(sentence) {
15+
console.log(sentence.value);
16+
});
17+
})
18+
);

package.json

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@
2929
"build": "NODE_ENV=production babel src --out-dir lib --source-maps",
3030
"watch": "babel src --out-dir lib --watch --source-maps",
3131
"prepublish": "npm run --if-present build",
32-
"test": "mocha"
32+
"test": "mocha",
33+
"prettier": "prettier --write '**/*.{js,jsx,ts,tsx,css}'",
34+
"precommit": "lint-staged",
35+
"postcommit": "git reset"
3336
},
3437
"keywords": [
3538
"text",
@@ -47,7 +50,20 @@
4750
"babel-preset-es2015": "^6.1.18",
4851
"babel-preset-power-assert": "^1.0.0",
4952
"babel-register": "^6.18.0",
53+
"husky": "^0.14.3",
54+
"lint-staged": "^4.3.0",
5055
"mocha": "^2.3.3",
51-
"power-assert": "^1.4.2"
56+
"power-assert": "^1.4.2",
57+
"prettier": "^1.7.4"
58+
},
59+
"prettier": {
60+
"printWidth": 120,
61+
"tabWidth": 4
62+
},
63+
"lint-staged": {
64+
"*.{js,jsx,ts,tsx,css}": [
65+
"prettier --write",
66+
"git add"
67+
]
5268
}
53-
}
69+
}

src/sentence-splitter.js

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,52 @@
22
"use strict";
33
const assert = require("assert");
44
import StructureSource from "structured-source";
5+
56
const defaultOptions = {
67
// charRegExp is deprecated
78
charRegExp: /[\.\?\!]/,
89
// separator char list
910
separatorChars: [".", "。", "?", "!", "?", "!"],
10-
newLineCharacters: "\n"
11+
newLineCharacters: "\n",
12+
whiteSpaceCharacters: [" ", " "]
1113
};
1214
export const Syntax = {
13-
"WhiteSpace": "WhiteSpace",
14-
"Sentence": "Sentence"
15+
WhiteSpace: "WhiteSpace",
16+
Sentence: "Sentence"
1517
};
18+
1619
/**
1720
* @param {string} text
1821
* @param {{
1922
* charRegExp: ?Object,
2023
* separatorChars: ?string[],
21-
* newLineCharacters: ?String
24+
* newLineCharacters: ?String,
25+
* whiteSpaceCharacters: ?string[]
2226
* }} options
2327
* @returns {Array}
2428
*/
2529
export function split(text, options = {}) {
2630
const charRegExp = options.charRegExp;
2731
const separatorChars = options.separatorChars || defaultOptions.separatorChars;
28-
assert(!(options.charRegExp && options.separatorChars), "should use either one `charRegExp` or `separatorChars`.\n"
29-
+ "`charRegExp` is deprecated.");
32+
const whiteSpaceCharacters = options.whiteSpaceCharacters || defaultOptions.whiteSpaceCharacters;
33+
assert(
34+
!(options.charRegExp && options.separatorChars),
35+
"should use either one `charRegExp` or `separatorChars`.\n" + "`charRegExp` is deprecated."
36+
);
3037
/**
3138
* Is the `char` separator symbol?
3239
* @param {string} char
3340
* @returns {boolean}
3441
*/
35-
const testCharIsSeparator = (char) => {
42+
const testCharIsSeparator = char => {
3643
if (charRegExp) {
3744
return charRegExp.test(char);
3845
}
3946
return separatorChars.indexOf(char) !== -1;
4047
};
4148
const newLineCharacters = options.newLineCharacters || defaultOptions.newLineCharacters;
4249
const src = new StructureSource(text);
43-
let createNode = (type, start, end) => {
50+
const createNode = (type, start, end) => {
4451
let range = [start, end];
4552
let location = src.rangeToLocation(range);
4653
let slicedText = text.slice(start, end);
@@ -84,6 +91,12 @@ export function split(text, options = {}) {
8491
startPoint = currentIndex;
8592
isSplitPoint = false;
8693
}
94+
// Sentence<WhiteSpace>Sentence
95+
if (whiteSpaceCharacters.indexOf(char) !== -1) {
96+
results.push(createNode(Syntax.WhiteSpace, currentIndex, currentIndex + 1));
97+
startPoint++;
98+
currentIndex++;
99+
}
87100
}
88101
}
89102

@@ -92,6 +105,7 @@ export function split(text, options = {}) {
92105
}
93106
return results;
94107
}
108+
95109
/**
96110
* @param {string} text
97111
* @param {Object} loc
@@ -105,8 +119,9 @@ export function createWhiteSpaceNode(text, loc, range) {
105119
value: text,
106120
loc: loc,
107121
range: range
108-
}
122+
};
109123
}
124+
110125
/**
111126
* @param {string} text
112127
* @param {Object} loc
@@ -120,5 +135,5 @@ export function createSentenceNode(text, loc, range) {
120135
value: text,
121136
loc: loc,
122137
range: range
123-
}
124-
}
138+
};
139+
}

0 commit comments

Comments
 (0)