@@ -8,7 +8,7 @@ use std::rt::io::{Reader, Writer};
8
8
use std:: rt:: io:: extensions:: ReaderUtil ;
9
9
use extra:: time:: { Tm , strptime} ;
10
10
use extra:: url:: Url ;
11
- use rfc2616:: { is_token_item, is_separator, CR , LF , SP , HT , COLON , DOUBLE_QUOTE , BACKSLASH } ;
11
+ use rfc2616:: { is_token_item, is_separator, CR , LF , SP , HT , COLON } ;
12
12
use method:: Method ;
13
13
14
14
use self :: serialization_utils:: { normalise_header_name} ;
@@ -132,11 +132,7 @@ pub fn header_enum_from_stream<R: Reader, E: HeaderEnum>(reader: &mut R)
132
132
#[ deriving( Eq ) ]
133
133
enum HeaderValueByteIteratorState {
134
134
Normal , // Anything other than the rest.
135
- InsideQuotedString , // no LWS compacting here. TODO: check spec on CR LF SP in quoted-string.
136
- InsideQuotedStringEscape , // don't let a " close quoted-string if it comes next
137
- CompactingLWS , // Got at least one linear white space character; turning it into just one SP
138
- GotCR , // Last character was CR
139
- GotCRLF , // Last two characters were CR LF
135
+ GotLF , // Last two characters were CR LF
140
136
Finished , // Finished, so next() should always return ``None`` immediately (no side effects)
141
137
}
142
138
@@ -226,14 +222,21 @@ impl<'self, R: Reader> HeaderValueByteIterator<'self, R> {
226
222
out
227
223
}
228
224
225
+ // It doesn't handle CR or LF at the moment since next() only returns SP or HT when it finds any LWS.
229
226
pub fn consume_optional_lws ( & mut self ) {
230
- match self . next ( ) {
231
- Some ( b) if b != ' ' as u8 => {
232
- // TODO: manually verify this holds
233
- assert_eq ! ( self . next_byte, None ) ;
234
- self . next_byte = Some ( b) ;
235
- } ,
236
- _ => ( ) ,
227
+ loop {
228
+ match self . next ( ) {
229
+ Some ( b) if b == SP || b == HT => {
230
+ continue ;
231
+ } ,
232
+ Some ( b) => {
233
+ // TODO: manually verify this holds
234
+ assert_eq ! ( self . next_byte, None ) ;
235
+ self . next_byte = Some ( b) ;
236
+ break ;
237
+ } ,
238
+ None => { break ; } ,
239
+ }
237
240
}
238
241
}
239
242
@@ -391,6 +394,7 @@ impl<'self, R: Reader> HeaderValueByteIterator<'self, R> {
391
394
Some ( b) if is_separator ( b) => {
392
395
assert_eq ! ( self . next_byte, None ) ;
393
396
self . next_byte = Some ( b) ;
397
+ //self.consume_optional_lws();
394
398
break ;
395
399
} ,
396
400
Some ( b) if is_token_item ( b) => {
@@ -431,83 +435,47 @@ impl<'self, R: Reader> Iterator<u8> for HeaderValueByteIterator<'self, R> {
431
435
}
432
436
} ;
433
437
match self . state {
434
- Normal | CompactingLWS if b == CR => {
435
- // It's OK to go to GotCR on CompactingLWS: if it ends up ``CR LF SP`` it'll get
436
- // back to CompactingLWS, and if it ends up ``CR LF`` we didn't need the
437
- // trailing whitespace anyway.
438
- self . state = GotCR ;
439
- continue ;
438
+ Normal if b == SP || b == HT => {
439
+ if self . at_start {
440
+ continue ;
441
+ } else {
442
+ return Some ( b ) ;
443
+ }
440
444
} ,
441
-
442
- // TODO: fix up these quoted-string rules, they're probably wrong (CRLF inside it?)
443
- Normal | CompactingLWS if b == DOUBLE_QUOTE => {
444
- self . at_start = false ;
445
- self . state = InsideQuotedString ;
446
- return Some ( b ) ;
445
+ Normal if b == CR => {
446
+ // RFC 2616 section 19.3, paragraph 3: "The line terminator for message-header
447
+ // fields is the sequence CRLF. However, we recommend that applications, when
448
+ // parsing such headers, recognize a single LF as a line terminator and ignore
449
+ // the leading CR."
450
+ continue ;
447
451
} ,
448
- InsideQuotedString if b == BACKSLASH => {
449
- self . state = InsideQuotedStringEscape ;
450
- return Some ( b) ;
451
- }
452
- InsideQuotedStringEscape => {
453
- self . state = InsideQuotedString ;
454
- return Some ( b) ;
455
- }
456
- InsideQuotedString if b == DOUBLE_QUOTE => {
457
- self . state = Normal ;
458
- return Some ( b) ;
459
- }
460
- InsideQuotedString => {
461
- return Some ( b) ;
462
- }
463
-
464
- GotCR | Normal if b == LF => {
465
- // TODO: check RFC 2616's precise rules, I think it does say that a server
466
- // should also accept missing CR
467
- self . state = GotCRLF ;
452
+ Normal if b == LF => {
453
+ self . state = GotLF ;
468
454
continue ;
469
455
} ,
470
- GotCR => {
471
- // False alarm, CR without LF. Hmm... was it LWS then? TODO.
472
- // FIXME: this is BAD, but I'm dropping the CR for now;
473
- // when we can have yield it'd be better. Also again, check speck.
474
- self . next_byte = Some ( b) ;
456
+ GotLF if b == SP || b == HT => {
457
+ // Header value can be split into multiple lines.
458
+ // New line is a candidate if it starts with LWS (SP or HT).
459
+ // RFC2616 Section 4.2 paragraph 1:
460
+ // "... Header fields can be extended over multiple lines by preceding each extra line
461
+ // with at least one SP or HT."
462
+
475
463
self . state = Normal ;
476
- return Some ( CR ) ;
477
- } ,
478
- GotCRLF if b == SP || b == HT => {
479
- // CR LF SP is a suitable linear whitespace, so don't stop yet
480
- self . state = CompactingLWS ;
481
- continue ;
464
+ return Some ( b) ;
482
465
} ,
483
- GotCRLF => {
466
+ GotLF => {
484
467
// Ooh! We got to a genuine end of line, so we're done.
485
468
// But first, we must makes sure not to lose that byte.
486
469
self . next_byte = Some ( b) ;
487
470
self . state = Finished ;
488
471
return None ;
489
472
} ,
490
- Normal | CompactingLWS if b == SP || b == HT => {
491
- // Start or continue to compact linear whitespace
492
- self . state = CompactingLWS ;
493
- continue ;
494
- } ,
495
- CompactingLWS => {
496
- // End of LWS, compact it down to a single space, unless it's at the start.
497
- self . state = Normal ;
498
- if self . at_start {
499
- return Some ( b) ;
500
- } else {
501
- self . next_byte = Some ( b) ;
502
- return Some ( SP ) ;
503
- }
504
- } ,
505
473
Normal => {
506
474
self . at_start = false ;
507
475
return Some ( b) ;
508
476
} ,
509
477
Finished => unreachable ! ( ) ,
510
- } ;
478
+ }
511
479
}
512
480
}
513
481
}
@@ -517,8 +485,7 @@ impl<'self, R: Reader> Iterator<u8> for HeaderValueByteIterator<'self, R> {
517
485
*/
518
486
pub trait HeaderConvertible : Eq + Clone {
519
487
/**
520
- * Read a header value from an iterator over the raw value. That iterator compacts linear white
521
- * space to a single SP, so this static method should just expect a single SP. There will be no
488
+ * Read a header value from an iterator over the raw value. There will be no
522
489
* leading or trailing space, either; also the ``CR LF`` which would indicate the end of the
523
490
* header line in the stream is removed.
524
491
*
@@ -718,26 +685,7 @@ impl HeaderConvertible for Tm {
718
685
Err ( * ) => ( )
719
686
}
720
687
721
- // FIXME: remove this if/when I take automatic LWS collapsing out of
722
- // HeaderValueByteIterator.next(). This is a nasty hack. asctime is formatted with a leading
723
- // zero on a single-digit day of month, so:
724
- //
725
- // Sun Nov 6 08:49:37 1994
726
- //
727
- // But the LWS collapsing currently (and unsatisfactorily) employed breaks that.
728
- // There's no suitable format for this in strptime, so we must (gulp) add it back in.
729
-
730
- let mut asctime_value;
731
- if value[ 9 ] == ' ' as u8 {
732
- // Single digit day of month
733
- asctime_value = value. slice_to ( 8 ) . to_owned ( ) ;
734
- asctime_value. push_char ( ' ' ) ;
735
- asctime_value. push_str ( value. slice_from ( 8 ) ) ;
736
- } else {
737
- // Double digit day of month
738
- asctime_value = value. to_owned ( ) ;
739
- }
740
- match strptime ( asctime_value, "%c" ) { // ANSI C's asctime() format
688
+ match strptime ( value, "%c" ) { // ANSI C's asctime() format
741
689
Ok ( time) => Some ( time) ,
742
690
Err ( * ) => None
743
691
}
@@ -826,9 +774,11 @@ mod test {
826
774
#[test]
827
775
fn test_from_stream_asctime() {
828
776
assert_eq!(from_stream_with_str(" Sun Nov 6 08 : 49 : 37 1994 "), Some(sample_tm(~" ")));
829
- // According to the spec, this should fail; but at present it succeeds. See
830
- // HeaderConvertible::from_stream<Tm> above for more info.
831
- // This is here so that I don't leave slowing-down code in there when I fix it. :-)
777
+ }
778
+
779
+ #[test]
780
+ #[should_fail]
781
+ fn test_from_stream_asctime_remove_LWS() {
832
782
assert_eq!(from_stream_with_str(" Sun Nov 6 08 : 49 : 37 1994 "), Some(sample_tm(~" ")));
833
783
}
834
784
0 commit comments