22"use strict" ;
33const assert = require ( "assert" ) ;
44import StructureSource from "structured-source" ;
5+
56const defaultOptions = {
67 // charRegExp is deprecated
78 charRegExp : / [ \. 。 \? \! ? ! ] / ,
89 // separator char list
910 separatorChars : [ "." , "。" , "?" , "!" , "?" , "!" ] ,
10- newLineCharacters : "\n"
11+ newLineCharacters : "\n" ,
12+ whiteSpaceCharacters : [ " " , " " ]
1113} ;
1214export const Syntax = {
13- " WhiteSpace" : "WhiteSpace" ,
14- " Sentence" : "Sentence"
15+ WhiteSpace : "WhiteSpace" ,
16+ Sentence : "Sentence"
1517} ;
18+
1619/**
1720 * @param {string } text
1821 * @param {{
1922 * charRegExp: ?Object,
2023 * separatorChars: ?string[],
21- * newLineCharacters: ?String
24+ * newLineCharacters: ?String,
25+ * whiteSpaceCharacters: ?string[]
2226 * }} options
2327 * @returns {Array }
2428 */
2529export function split ( text , options = { } ) {
2630 const charRegExp = options . charRegExp ;
2731 const separatorChars = options . separatorChars || defaultOptions . separatorChars ;
28- assert ( ! ( options . charRegExp && options . separatorChars ) , "should use either one `charRegExp` or `separatorChars`.\n"
29- + "`charRegExp` is deprecated." ) ;
32+ const whiteSpaceCharacters = options . whiteSpaceCharacters || defaultOptions . whiteSpaceCharacters ;
33+ assert (
34+ ! ( options . charRegExp && options . separatorChars ) ,
35+ "should use either one `charRegExp` or `separatorChars`.\n" + "`charRegExp` is deprecated."
36+ ) ;
3037 /**
3138 * Is the `char` separator symbol?
3239 * @param {string } char
3340 * @returns {boolean }
3441 */
35- const testCharIsSeparator = ( char ) => {
42+ const testCharIsSeparator = char => {
3643 if ( charRegExp ) {
3744 return charRegExp . test ( char ) ;
3845 }
3946 return separatorChars . indexOf ( char ) !== - 1 ;
4047 } ;
4148 const newLineCharacters = options . newLineCharacters || defaultOptions . newLineCharacters ;
4249 const src = new StructureSource ( text ) ;
43- let createNode = ( type , start , end ) => {
50+ const createNode = ( type , start , end ) => {
4451 let range = [ start , end ] ;
4552 let location = src . rangeToLocation ( range ) ;
4653 let slicedText = text . slice ( start , end ) ;
@@ -84,6 +91,12 @@ export function split(text, options = {}) {
8491 startPoint = currentIndex ;
8592 isSplitPoint = false ;
8693 }
94+ // Sentence<WhiteSpace>Sentence
95+ if ( whiteSpaceCharacters . indexOf ( char ) !== - 1 ) {
96+ results . push ( createNode ( Syntax . WhiteSpace , currentIndex , currentIndex + 1 ) ) ;
97+ startPoint ++ ;
98+ currentIndex ++ ;
99+ }
87100 }
88101 }
89102
@@ -92,6 +105,7 @@ export function split(text, options = {}) {
92105 }
93106 return results ;
94107}
108+
95109/**
96110 * @param {string } text
97111 * @param {Object } loc
@@ -105,8 +119,9 @@ export function createWhiteSpaceNode(text, loc, range) {
105119 value : text ,
106120 loc : loc ,
107121 range : range
108- }
122+ } ;
109123}
124+
110125/**
111126 * @param {string } text
112127 * @param {Object } loc
@@ -120,5 +135,5 @@ export function createSentenceNode(text, loc, range) {
120135 value : text ,
121136 loc : loc ,
122137 range : range
123- }
124- }
138+ } ;
139+ }
0 commit comments