55 escape ,
66 findClosingBracket
77} from './helpers.ts' ;
8+ import type { Rules } from './rules.ts' ;
89import type { _Lexer } from './Lexer.ts' ;
910import type { Links , Tokens } from './Tokens.ts' ;
1011import type { MarkedOptions } from './MarkedOptions.ts' ;
@@ -69,9 +70,8 @@ function indentCodeCompensation(raw: string, text: string) {
6970 */
7071export class _Tokenizer {
7172 options : MarkedOptions ;
72- // TODO: Fix this rules type
73- rules : any ;
74- lexer ! : _Lexer ;
73+ rules ! : Rules ; // set by the lexer
74+ lexer ! : _Lexer ; // set by the lexer
7575
7676 constructor ( options ?: MarkedOptions ) {
7777 this . options = options || _defaults ;
@@ -111,7 +111,7 @@ export class _Tokenizer {
111111 return {
112112 type : 'code' ,
113113 raw,
114- lang : cap [ 2 ] ? cap [ 2 ] . trim ( ) . replace ( this . rules . inline . _escapes , '$1' ) : cap [ 2 ] ,
114+ lang : cap [ 2 ] ? cap [ 2 ] . trim ( ) . replace ( this . rules . inline . anyPunctuation , '$1' ) : cap [ 2 ] ,
115115 text
116116 } ;
117117 }
@@ -182,7 +182,7 @@ export class _Tokenizer {
182182 ordered : isordered ,
183183 start : isordered ? + bull . slice ( 0 , - 1 ) : '' ,
184184 loose : false ,
185- items : [ ] as Tokens . ListItem [ ]
185+ items : [ ]
186186 } ;
187187
188188 bull = isordered ? `\\d{1,9}\\${ bull . slice ( - 1 ) } ` : `\\${ bull } ` ;
@@ -207,10 +207,10 @@ export class _Tokenizer {
207207 break ;
208208 }
209209
210- raw = cap [ 0 ] as string ;
210+ raw = cap [ 0 ] ;
211211 src = src . substring ( raw . length ) ;
212212
213- let line = cap [ 2 ] . split ( '\n' , 1 ) [ 0 ] . replace ( / ^ \t + / , ( t : string ) => ' ' . repeat ( 3 * t . length ) ) as string ;
213+ let line = cap [ 2 ] . split ( '\n' , 1 ) [ 0 ] . replace ( / ^ \t + / , ( t : string ) => ' ' . repeat ( 3 * t . length ) ) ;
214214 let nextLine = src . split ( '\n' , 1 ) [ 0 ] ;
215215
216216 let indent = 0 ;
@@ -338,7 +338,7 @@ export class _Tokenizer {
338338
339339 // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
340340 list . items [ list . items . length - 1 ] . raw = raw . trimEnd ( ) ;
341- ( list . items [ list . items . length - 1 ] as Tokens . ListItem ) . text = itemContents . trimEnd ( ) ;
341+ ( list . items [ list . items . length - 1 ] ) . text = itemContents . trimEnd ( ) ;
342342 list . raw = list . raw . trimEnd ( ) ;
343343
344344 // Item child tokens handled here at end because we needed to have the final item to trim it first
@@ -384,8 +384,8 @@ export class _Tokenizer {
384384 const cap = this . rules . block . def . exec ( src ) ;
385385 if ( cap ) {
386386 const tag = cap [ 1 ] . toLowerCase ( ) . replace ( / \s + / g, ' ' ) ;
387- const href = cap [ 2 ] ? cap [ 2 ] . replace ( / ^ < ( .* ) > $ / , '$1' ) . replace ( this . rules . inline . _escapes , '$1' ) : '' ;
388- const title = cap [ 3 ] ? cap [ 3 ] . substring ( 1 , cap [ 3 ] . length - 1 ) . replace ( this . rules . inline . _escapes , '$1' ) : cap [ 3 ] ;
387+ const href = cap [ 2 ] ? cap [ 2 ] . replace ( / ^ < ( .* ) > $ / , '$1' ) . replace ( this . rules . inline . anyPunctuation , '$1' ) : '' ;
388+ const title = cap [ 3 ] ? cap [ 3 ] . substring ( 1 , cap [ 3 ] . length - 1 ) . replace ( this . rules . inline . anyPunctuation , '$1' ) : cap [ 3 ] ;
389389 return {
390390 type : 'def' ,
391391 tag,
@@ -398,67 +398,61 @@ export class _Tokenizer {
398398
399399 table ( src : string ) : Tokens . Table | undefined {
400400 const cap = this . rules . block . table . exec ( src ) ;
401- if ( cap ) {
402- if ( ! / [: | ] / . test ( cap [ 2 ] ) ) {
403- // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
404- return ;
405- }
401+ if ( ! cap ) {
402+ return ;
403+ }
406404
407- const item : Tokens . Table = {
408- type : 'table' ,
409- raw : cap [ 0 ] ,
410- header : splitCells ( cap [ 1 ] ) . map ( c => {
411- return { text : c , tokens : [ ] } ;
412- } ) ,
413- align : cap [ 2 ] . replace ( / ^ \| | \| * $ / g, '' ) . split ( '|' ) ,
414- rows : cap [ 3 ] && cap [ 3 ] . trim ( ) ? cap [ 3 ] . replace ( / \n [ \t ] * $ / , '' ) . split ( '\n' ) : [ ]
415- } ;
405+ if ( ! / [: | ] / . test ( cap [ 2 ] ) ) {
406+ // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
407+ return ;
408+ }
416409
417- if ( item . header . length === item . align . length ) {
418- let l = item . align . length ;
419- let i , j , k , row ;
420- for ( i = 0 ; i < l ; i ++ ) {
421- const align = item . align [ i ] ;
422- if ( align ) {
423- if ( / ^ * - + : * $ / . test ( align ) ) {
424- item . align [ i ] = 'right' ;
425- } else if ( / ^ * : - + : * $ / . test ( align ) ) {
426- item . align [ i ] = 'center' ;
427- } else if ( / ^ * : - + * $ / . test ( align ) ) {
428- item . align [ i ] = 'left' ;
429- } else {
430- item . align [ i ] = null ;
431- }
432- }
433- }
410+ const headers = splitCells ( cap [ 1 ] ) ;
411+ const aligns = cap [ 2 ] . replace ( / ^ \| | \| * $ / g, '' ) . split ( '|' ) ;
412+ const rows = cap [ 3 ] && cap [ 3 ] . trim ( ) ? cap [ 3 ] . replace ( / \n [ \t ] * $ / , '' ) . split ( '\n' ) : [ ] ;
434413
435- l = item . rows . length ;
436- for ( i = 0 ; i < l ; i ++ ) {
437- item . rows [ i ] = splitCells ( item . rows [ i ] as unknown as string , item . header . length ) . map ( c => {
438- return { text : c , tokens : [ ] } ;
439- } ) ;
440- }
414+ const item : Tokens . Table = {
415+ type : 'table' ,
416+ raw : cap [ 0 ] ,
417+ header : [ ] ,
418+ align : [ ] ,
419+ rows : [ ]
420+ } ;
441421
442- // parse child tokens inside headers and cells
422+ if ( headers . length !== aligns . length ) {
423+ // header and align columns must be equal, rows can be different.
424+ return ;
425+ }
443426
444- // header child tokens
445- l = item . header . length ;
446- for ( j = 0 ; j < l ; j ++ ) {
447- item . header [ j ] . tokens = this . lexer . inline ( item . header [ j ] . text ) ;
448- }
427+ for ( const align of aligns ) {
428+ if ( / ^ * - + : * $ / . test ( align ) ) {
429+ item . align . push ( 'right' ) ;
430+ } else if ( / ^ * : - + : * $ / . test ( align ) ) {
431+ item . align . push ( 'center' ) ;
432+ } else if ( / ^ * : - + * $ / . test ( align ) ) {
433+ item . align . push ( 'left' ) ;
434+ } else {
435+ item . align . push ( null ) ;
436+ }
437+ }
449438
450- // cell child tokens
451- l = item . rows . length ;
452- for ( j = 0 ; j < l ; j ++ ) {
453- row = item . rows [ j ] ;
454- for ( k = 0 ; k < row . length ; k ++ ) {
455- row [ k ] . tokens = this . lexer . inline ( row [ k ] . text ) ;
456- }
457- }
439+ for ( const header of headers ) {
440+ item . header . push ( {
441+ text : header ,
442+ tokens : this . lexer . inline ( header )
443+ } ) ;
444+ }
458445
459- return item ;
460- }
446+ for ( const row of rows ) {
447+ item . rows . push ( splitCells ( row , item . header . length ) . map ( cell => {
448+ return {
449+ text : cell ,
450+ tokens : this . lexer . inline ( cell )
451+ } ;
452+ } ) ) ;
461453 }
454+
455+ return item ;
462456 }
463457
464458 lheading ( src : string ) : Tokens . Heading | undefined {
@@ -587,8 +581,8 @@ export class _Tokenizer {
587581 }
588582 }
589583 return outputLink ( cap , {
590- href : href ? href . replace ( this . rules . inline . _escapes , '$1' ) : href ,
591- title : title ? title . replace ( this . rules . inline . _escapes , '$1' ) : title
584+ href : href ? href . replace ( this . rules . inline . anyPunctuation , '$1' ) : href ,
585+ title : title ? title . replace ( this . rules . inline . anyPunctuation , '$1' ) : title
592586 } , cap [ 0 ] , this . lexer ) ;
593587 }
594588 }
@@ -597,8 +591,8 @@ export class _Tokenizer {
597591 let cap ;
598592 if ( ( cap = this . rules . inline . reflink . exec ( src ) )
599593 || ( cap = this . rules . inline . nolink . exec ( src ) ) ) {
600- let link = ( cap [ 2 ] || cap [ 1 ] ) . replace ( / \s + / g, ' ' ) ;
601- link = links [ link . toLowerCase ( ) ] ;
594+ const linkString = ( cap [ 2 ] || cap [ 1 ] ) . replace ( / \s + / g, ' ' ) ;
595+ const link = links [ linkString . toLowerCase ( ) ] ;
602596 if ( ! link ) {
603597 const text = cap [ 0 ] . charAt ( 0 ) ;
604598 return {
@@ -612,7 +606,7 @@ export class _Tokenizer {
612606 }
613607
614608 emStrong ( src : string , maskedSrc : string , prevChar = '' ) : Tokens . Em | Tokens . Strong | undefined {
615- let match = this . rules . inline . emStrong . lDelim . exec ( src ) ;
609+ let match = this . rules . inline . emStrongLDelim . exec ( src ) ;
616610 if ( ! match ) return ;
617611
618612 // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
@@ -625,7 +619,7 @@ export class _Tokenizer {
625619 const lLength = [ ...match [ 0 ] ] . length - 1 ;
626620 let rDelim , rLength , delimTotal = lLength , midDelimTotal = 0 ;
627621
628- const endReg = match [ 0 ] [ 0 ] === '*' ? this . rules . inline . emStrong . rDelimAst : this . rules . inline . emStrong . rDelimUnd ;
622+ const endReg = match [ 0 ] [ 0 ] === '*' ? this . rules . inline . emStrongRDelimAst : this . rules . inline . emStrongRDelimUnd ;
629623 endReg . lastIndex = 0 ;
630624
631625 // Clip maskedSrc to same section of string as src (move to lexer?)
@@ -761,7 +755,7 @@ export class _Tokenizer {
761755 let prevCapZero ;
762756 do {
763757 prevCapZero = cap [ 0 ] ;
764- cap [ 0 ] = this . rules . inline . _backpedal . exec ( cap [ 0 ] ) [ 0 ] ;
758+ cap [ 0 ] = this . rules . inline . _backpedal . exec ( cap [ 0 ] ) ?. [ 0 ] ?? '' ;
765759 } while ( prevCapZero !== cap [ 0 ] ) ;
766760 text = escape ( cap [ 0 ] ) ;
767761 if ( cap [ 1 ] === 'www.' ) {
0 commit comments