@@ -580,52 +580,58 @@ def test_unescape_method(self):
580
580
s = '""""""&#bad;'
581
581
self .assertEqual (p .unescape (s ), unescape (s ))
582
582
583
- def test_broken_comments (self ):
583
+ def test_bogus_comments (self ):
584
584
html = ('<! not really a comment >'
585
585
'<! not a comment either -->'
586
586
'<! -- close enough -->'
587
587
'<!><!<-- this was an empty comment>'
588
- '<!!! another bogus comment !!!>' )
588
+ '<!!! another bogus comment !!!>'
589
+ # see #32876
590
+ '<![with square brackets]!>'
591
+ '<![\n multiline\n bogusness\n ]!>'
592
+ '<![more brackets]-[and a hyphen]!>'
593
+ '<![cdata[should be uppercase]]>' )
589
594
expected = [
590
595
('comment' , ' not really a comment ' ),
591
596
('comment' , ' not a comment either --' ),
592
597
('comment' , ' -- close enough --' ),
593
598
('comment' , '' ),
594
599
('comment' , '<-- this was an empty comment' ),
595
600
('comment' , '!! another bogus comment !!!' ),
601
+ ('comment' , '[with square brackets]!' ),
602
+ ('comment' , '[\n multiline\n bogusness\n ]!' ),
603
+ ('comment' , '[more brackets]-[and a hyphen]!' ),
604
+ ('comment' , '[cdata[should be uppercase]]' ),
596
605
]
597
606
self ._run_check (html , expected )
598
607
599
608
def test_broken_condcoms (self ):
600
609
# these condcoms are missing the '--' after '<!' and before the '>'
610
+ # and they are considered bogus comments according to
611
+ # "8.2.4.42. Markup declaration open state"
601
612
html = ('<![if !(IE)]>broken condcom<![endif]>'
602
613
'<![if ! IE]><link href="favicon.tiff"/><![endif]>'
603
614
'<![if !IE 6]><img src="firefox.png" /><![endif]>'
604
615
'<![if !ie 6]><b>foo</b><![endif]>'
605
616
'<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>' )
606
- # According to the HTML5 specs sections "8.2.4.44 Bogus comment state"
607
- # and "8.2.4.45 Markup declaration open state", comment tokens should
608
- # be emitted instead of 'unknown decl', but calling unknown_decl
609
- # provides more flexibility.
610
- # See also Lib/_markupbase.py:parse_declaration
611
617
expected = [
612
- ('unknown decl ' , 'if !(IE)' ),
618
+ ('comment ' , '[ if !(IE)] ' ),
613
619
('data' , 'broken condcom' ),
614
- ('unknown decl ' , 'endif' ),
615
- ('unknown decl ' , 'if ! IE' ),
620
+ ('comment ' , '[ endif] ' ),
621
+ ('comment ' , '[ if ! IE] ' ),
616
622
('startendtag' , 'link' , [('href' , 'favicon.tiff' )]),
617
- ('unknown decl ' , 'endif' ),
618
- ('unknown decl ' , 'if !IE 6' ),
623
+ ('comment ' , '[ endif] ' ),
624
+ ('comment ' , '[ if !IE 6] ' ),
619
625
('startendtag' , 'img' , [('src' , 'firefox.png' )]),
620
- ('unknown decl ' , 'endif' ),
621
- ('unknown decl ' , 'if !ie 6' ),
626
+ ('comment ' , '[ endif] ' ),
627
+ ('comment ' , '[ if !ie 6] ' ),
622
628
('starttag' , 'b' , []),
623
629
('data' , 'foo' ),
624
630
('endtag' , 'b' ),
625
- ('unknown decl ' , 'endif' ),
626
- ('unknown decl ' , 'if (!IE)|(lt IE 9)' ),
631
+ ('comment ' , '[ endif] ' ),
632
+ ('comment ' , '[ if (!IE)|(lt IE 9)] ' ),
627
633
('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
628
- ('unknown decl ' , 'endif' )
634
+ ('comment ' , '[ endif] ' )
629
635
]
630
636
self ._run_check (html , expected )
631
637
@@ -642,6 +648,7 @@ def test_convert_charrefs_dropped_text(self):
642
648
)
643
649
644
650
651
+
645
652
class AttributesTestCase (TestCaseBase ):
646
653
647
654
def test_attr_syntax (self ):
0 commit comments