@@ -134,6 +134,7 @@ function consumeSpecialNameChar(upper, NEXT_STATE){
134
134
function Tokenizer ( options , cbs ) {
135
135
this . _state = TEXT ;
136
136
this . _buffer = "" ;
137
+ this . _content = "" ;
137
138
this . _sectionStart = 0 ;
138
139
this . _index = 0 ;
139
140
this . _bufferOffset = 0 ; //chars removed from _buffer
@@ -144,19 +145,18 @@ function Tokenizer(options, cbs){
144
145
this . _ended = false ;
145
146
this . _xmlMode = ! ! ( options && options . xmlMode ) ;
146
147
this . _decodeEntities = ! ! ( options && options . decodeEntities ) ;
148
+ this . _eagerTextCapture = ! ! ( options && options . eagerTextCapture ) ;
149
+
150
+
147
151
}
148
152
149
153
Tokenizer . prototype . _stateText = function ( c ) {
150
154
if ( c === "<" ) {
151
- if ( this . _index > this . _sectionStart ) {
152
- this . _cbs . ontext ( this . _getSection ( ) ) ;
153
- }
155
+ this . _captureText ( ) ;
154
156
this . _state = BEFORE_TAG_NAME ;
155
157
this . _sectionStart = this . _index ;
156
158
} else if ( this . _decodeEntities && this . _special === SPECIAL_NONE && c === "&" ) {
157
- if ( this . _index > this . _sectionStart ) {
158
- this . _cbs . ontext ( this . _getSection ( ) ) ;
159
- }
159
+ this . _captureText ( ) ;
160
160
this . _baseState = TEXT ;
161
161
this . _state = BEFORE_ENTITY ;
162
162
this . _sectionStart = this . _index ;
@@ -175,7 +175,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){
175
175
this . _state = IN_PROCESSING_INSTRUCTION ;
176
176
this . _sectionStart = this . _index + 1 ;
177
177
} else if ( c === "<" ) {
178
- this . _cbs . ontext ( this . _getSection ( ) ) ;
178
+ this . _captureText ( ) ;
179
179
this . _sectionStart = this . _index ;
180
180
} else {
181
181
this . _state = ( ! this . _xmlMode && ( c === "s" || c === "S" ) ) ?
@@ -186,6 +186,7 @@ Tokenizer.prototype._stateBeforeTagName = function(c){
186
186
187
187
Tokenizer . prototype . _stateInTagName = function ( c ) {
188
188
if ( c === "/" || c === ">" || whitespace ( c ) ) {
189
+ this . _flushText ( ) ;
189
190
this . _emitToken ( "onopentagname" ) ;
190
191
this . _state = BEFORE_ATTRIBUTE_NAME ;
191
192
this . _index -- ;
@@ -211,6 +212,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
211
212
212
213
Tokenizer . prototype . _stateInCloseingTagName = function ( c ) {
213
214
if ( c === ">" || whitespace ( c ) ) {
215
+ this . _flushText ( ) ;
214
216
this . _emitToken ( "onclosetag" ) ;
215
217
this . _state = AFTER_CLOSING_TAG_NAME ;
216
218
this . _index -- ;
@@ -602,9 +604,7 @@ Tokenizer.prototype._cleanup = function (){
602
604
this . _bufferOffset += this . _index ;
603
605
} else if ( this . _running ) {
604
606
if ( this . _state === TEXT ) {
605
- if ( this . _sectionStart !== this . _index ) {
606
- this . _cbs . ontext ( this . _buffer . substr ( this . _sectionStart ) ) ;
607
- }
607
+ this . _captureText ( ) ;
608
608
this . _buffer = "" ;
609
609
this . _index = 0 ;
610
610
this . _bufferOffset += this . _index ;
@@ -831,51 +831,49 @@ Tokenizer.prototype.end = function(chunk){
831
831
832
832
Tokenizer . prototype . _finish = function ( ) {
833
833
//if there is remaining data, emit it in a reasonable way
834
- if ( this . _sectionStart < this . _index ) {
835
- this . _handleTrailingData ( ) ;
836
- }
837
834
835
+ this . _handleTrailingData ( ) ;
838
836
this . _cbs . onend ( ) ;
839
837
} ;
840
838
841
839
Tokenizer . prototype . _handleTrailingData = function ( ) {
842
- var data = this . _buffer . substr ( this . _sectionStart ) ;
843
-
844
- if ( this . _state === IN_CDATA || this . _state === AFTER_CDATA_1 || this . _state === AFTER_CDATA_2 ) {
845
- this . _cbs . oncdata ( data ) ;
846
- } else if ( this . _state === IN_COMMENT || this . _state === AFTER_COMMENT_1 || this . _state === AFTER_COMMENT_2 ) {
847
- this . _cbs . oncomment ( data ) ;
848
- } else if ( this . _state === IN_NAMED_ENTITY && ! this . _xmlMode ) {
849
- this . _parseLegacyEntity ( ) ;
850
- if ( this . _sectionStart < this . _index ) {
840
+
841
+ if ( this . _sectionStart < this . _index ) {
842
+ var data = this . _buffer . substr ( this . _sectionStart ) ;
843
+ if ( this . _state === IN_CDATA || this . _state === AFTER_CDATA_1 || this . _state === AFTER_CDATA_2 ) {
844
+ this . _cbs . oncdata ( data ) ;
845
+ } else if ( this . _state === IN_COMMENT || this . _state === AFTER_COMMENT_1 || this . _state === AFTER_COMMENT_2 ) {
846
+ this . _cbs . oncomment ( data ) ;
847
+ } else if ( this . _state === IN_NAMED_ENTITY && ! this . _xmlMode ) {
848
+ this . _parseLegacyEntity ( ) ;
851
849
this . _state = this . _baseState ;
852
850
this . _handleTrailingData ( ) ;
853
- }
854
- } else if ( this . _state === IN_NUMERIC_ENTITY && ! this . _xmlMode ) {
855
- this . _decodeNumericEntity ( 2 , 10 ) ;
856
- if ( this . _sectionStart < this . _index ) {
851
+ } else if ( this . _state === IN_NUMERIC_ENTITY && ! this . _xmlMode ) {
852
+ this . _decodeNumericEntity ( 2 , 10 ) ;
857
853
this . _state = this . _baseState ;
858
854
this . _handleTrailingData ( ) ;
859
- }
860
- } else if ( this . _state === IN_HEX_ENTITY && ! this . _xmlMode ) {
861
- this . _decodeNumericEntity ( 3 , 16 ) ;
862
- if ( this . _sectionStart < this . _index ) {
855
+ } else if ( this . _state === IN_HEX_ENTITY && ! this . _xmlMode ) {
856
+ this . _decodeNumericEntity ( 3 , 16 ) ;
863
857
this . _state = this . _baseState ;
864
858
this . _handleTrailingData ( ) ;
859
+ } else if (
860
+ this . _state !== IN_TAG_NAME &&
861
+ this . _state !== BEFORE_ATTRIBUTE_NAME &&
862
+ this . _state !== BEFORE_ATTRIBUTE_VALUE &&
863
+ this . _state !== AFTER_ATTRIBUTE_NAME &&
864
+ this . _state !== IN_ATTRIBUTE_NAME &&
865
+ this . _state !== IN_ATTRIBUTE_VALUE_SQ &&
866
+ this . _state !== IN_ATTRIBUTE_VALUE_DQ &&
867
+ this . _state !== IN_ATTRIBUTE_VALUE_NQ &&
868
+ this . _state !== IN_CLOSING_TAG_NAME
869
+ ) {
870
+ this . _captureText ( ) ;
871
+ this . _flushText ( ) ;
865
872
}
866
- } else if (
867
- this . _state !== IN_TAG_NAME &&
868
- this . _state !== BEFORE_ATTRIBUTE_NAME &&
869
- this . _state !== BEFORE_ATTRIBUTE_VALUE &&
870
- this . _state !== AFTER_ATTRIBUTE_NAME &&
871
- this . _state !== IN_ATTRIBUTE_NAME &&
872
- this . _state !== IN_ATTRIBUTE_VALUE_SQ &&
873
- this . _state !== IN_ATTRIBUTE_VALUE_DQ &&
874
- this . _state !== IN_ATTRIBUTE_VALUE_NQ &&
875
- this . _state !== IN_CLOSING_TAG_NAME
876
- ) {
877
- this . _cbs . ontext ( data ) ;
873
+ } else if ( this . _state === TEXT ) {
874
+ this . _flushText ( ) ;
878
875
}
876
+
879
877
//else, ignore remaining data
880
878
//TODO add a way to remove current tag
881
879
} ;
@@ -901,6 +899,31 @@ Tokenizer.prototype._emitPartial = function(value){
901
899
if ( this . _baseState !== TEXT ) {
902
900
this . _cbs . onattribdata ( value ) ; //TODO implement the new event
903
901
} else {
904
- this . _cbs . ontext ( value ) ;
902
+ if ( this . _eagerTextCapture ) {
903
+ this . _content += value ;
904
+ } else {
905
+ this . _cbs . ontext ( value ) ;
906
+ }
907
+ }
908
+ } ;
909
+
910
+ Tokenizer . prototype . _captureText = function ( ) {
911
+ if ( this . _index <= this . _sectionStart ) {
912
+ return ;
913
+ }
914
+ var text = this . _getSection ( ) ;
915
+ if ( ! this . _eagerTextCapture ) {
916
+ this . _cbs . ontext ( text ) ;
917
+ } else {
918
+ this . _content += text ;
905
919
}
906
920
} ;
921
+
922
+ Tokenizer . prototype . _flushText = function ( ) {
923
+ if ( ! this . _content ) {
924
+ return ;
925
+ }
926
+ this . _cbs . ontext ( this . _content ) ;
927
+ this . _content = "" ;
928
+ } ;
929
+
0 commit comments