Skip to content

Commit d2d682e

Browse files
committed
Merge pull request sfackler#29 from eerden/removeLWS
Remove LWS collapsing
2 parents e919c54 + df0655e commit d2d682e

File tree

3 files changed

+53
-102
lines changed

3 files changed

+53
-102
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
*~
22
*.dSYM/
3+
*.swp
34
src/libhttp/codegen/codegen
45
src/libhttp/generated/
56
build/

src/libhttp/headers/connection.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ fn test_connection_7() {
8383
::headers::test_utils::assert_interpretation_correct("foo", ~[Token(~"Foo")]);
8484
}
8585
#[test]
86-
#[ignore(reason="lws collapse bug")]
86+
//#[ignore(reason="lws collapse bug")]
8787
fn test_connection_8() {
8888
::headers::test_utils::assert_interpretation_correct("close \r\n , keep-ALIVE", ~[Close, Token(~"Keep-Alive")]);
8989
}
@@ -100,17 +100,17 @@ fn test_connection_11() {
100100
::headers::test_utils::assert_interpretation_correct("CLOSE", Close);
101101
}
102102
#[test]
103-
#[ignore(reason="lws collapse bug")]
103+
//#[ignore(reason="lws collapse bug")]
104104
fn test_connection_12() {
105105
::headers::test_utils::assert_invalid::<~[Connection]>("foo bar");
106106
}
107107
#[test]
108-
#[ignore(reason="lws collapse bug")]
108+
//#[ignore(reason="lws collapse bug")]
109109
fn test_connection_13() {
110110
::headers::test_utils::assert_invalid::<~[Connection]>("foo bar");
111111
}
112112
#[test]
113-
#[ignore(reason="lws collapse bug")]
113+
//#[ignore(reason="lws collapse bug")]
114114
fn test_connection_14() {
115115
::headers::test_utils::assert_invalid::<~[Connection]>("foo, bar baz");
116116
}

src/libhttp/headers/mod.rs

Lines changed: 48 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::rt::io::{Reader, Writer};
88
use std::rt::io::extensions::ReaderUtil;
99
use extra::time::{Tm, strptime};
1010
use extra::url::Url;
11-
use rfc2616::{is_token_item, is_separator, CR, LF, SP, HT, COLON, DOUBLE_QUOTE, BACKSLASH};
11+
use rfc2616::{is_token_item, is_separator, CR, LF, SP, HT, COLON};
1212
use method::Method;
1313

1414
use self::serialization_utils::{normalise_header_name};
@@ -132,11 +132,7 @@ pub fn header_enum_from_stream<R: Reader, E: HeaderEnum>(reader: &mut R)
132132
#[deriving(Eq)]
133133
enum HeaderValueByteIteratorState {
134134
Normal, // Anything other than the rest.
135-
InsideQuotedString, // no LWS compacting here. TODO: check spec on CR LF SP in quoted-string.
136-
InsideQuotedStringEscape, // don't let a " close quoted-string if it comes next
137-
CompactingLWS, // Got at least one linear white space character; turning it into just one SP
138-
GotCR, // Last character was CR
139-
GotCRLF, // Last two characters were CR LF
135+
GotLF, // Last two characters were CR LF
140136
Finished, // Finished, so next() should always return ``None`` immediately (no side effects)
141137
}
142138

@@ -226,14 +222,21 @@ impl<'self, R: Reader> HeaderValueByteIterator<'self, R> {
226222
out
227223
}
228224

225+
// It doesn't handle CR or LF at the moment since next() only returns SP or HT when it finds any LWS.
229226
pub fn consume_optional_lws(&mut self) {
230-
match self.next() {
231-
Some(b) if b != ' ' as u8 => {
232-
// TODO: manually verify this holds
233-
assert_eq!(self.next_byte, None);
234-
self.next_byte = Some(b);
235-
},
236-
_ => (),
227+
loop {
228+
match self.next() {
229+
Some(b) if b == SP || b == HT => {
230+
continue;
231+
},
232+
Some(b) => {
233+
// TODO: manually verify this holds
234+
assert_eq!(self.next_byte, None);
235+
self.next_byte = Some(b);
236+
break;
237+
},
238+
None => { break; },
239+
}
237240
}
238241
}
239242

@@ -391,6 +394,7 @@ impl<'self, R: Reader> HeaderValueByteIterator<'self, R> {
391394
Some(b) if is_separator(b) => {
392395
assert_eq!(self.next_byte, None);
393396
self.next_byte = Some(b);
397+
//self.consume_optional_lws();
394398
break;
395399
},
396400
Some(b) if is_token_item(b) => {
@@ -431,83 +435,47 @@ impl<'self, R: Reader> Iterator<u8> for HeaderValueByteIterator<'self, R> {
431435
}
432436
};
433437
match self.state {
434-
Normal | CompactingLWS if b == CR => {
435-
// It's OK to go to GotCR on CompactingLWS: if it ends up ``CR LF SP`` it'll get
436-
// back to CompactingLWS, and if it ends up ``CR LF`` we didn't need the
437-
// trailing whitespace anyway.
438-
self.state = GotCR;
439-
continue;
438+
Normal if b == SP || b == HT => {
439+
if self.at_start {
440+
continue;
441+
} else {
442+
return Some(b);
443+
}
440444
},
441-
442-
// TODO: fix up these quoted-string rules, they're probably wrong (CRLF inside it?)
443-
Normal | CompactingLWS if b == DOUBLE_QUOTE => {
444-
self.at_start = false;
445-
self.state = InsideQuotedString;
446-
return Some(b);
445+
Normal if b == CR => {
446+
// RFC 2616 section 19.3, paragraph 3: "The line terminator for message-header
447+
// fields is the sequence CRLF. However, we recommend that applications, when
448+
// parsing such headers, recognize a single LF as a line terminator and ignore
449+
// the leading CR."
450+
continue;
447451
},
448-
InsideQuotedString if b == BACKSLASH => {
449-
self.state = InsideQuotedStringEscape;
450-
return Some(b);
451-
}
452-
InsideQuotedStringEscape => {
453-
self.state = InsideQuotedString;
454-
return Some(b);
455-
}
456-
InsideQuotedString if b == DOUBLE_QUOTE => {
457-
self.state = Normal;
458-
return Some(b);
459-
}
460-
InsideQuotedString => {
461-
return Some(b);
462-
}
463-
464-
GotCR | Normal if b == LF => {
465-
// TODO: check RFC 2616's precise rules, I think it does say that a server
466-
// should also accept missing CR
467-
self.state = GotCRLF;
452+
Normal if b == LF => {
453+
self.state = GotLF;
468454
continue;
469455
},
470-
GotCR => {
471-
// False alarm, CR without LF. Hmm... was it LWS then? TODO.
472-
// FIXME: this is BAD, but I'm dropping the CR for now;
473-
// when we can have yield it'd be better. Also again, check speck.
474-
self.next_byte = Some(b);
456+
GotLF if b == SP || b == HT => {
457+
// Header value can be split into multiple lines.
458+
// New line is a candidate if it starts with LWS (SP or HT).
459+
// RFC2616 Section 4.2 paragraph 1:
460+
// "... Header fields can be extended over multiple lines by preceding each extra line
461+
// with at least one SP or HT."
462+
475463
self.state = Normal;
476-
return Some(CR);
477-
},
478-
GotCRLF if b == SP || b == HT => {
479-
// CR LF SP is a suitable linear whitespace, so don't stop yet
480-
self.state = CompactingLWS;
481-
continue;
464+
return Some(b);
482465
},
483-
GotCRLF => {
466+
GotLF => {
484467
// Ooh! We got to a genuine end of line, so we're done.
485468
// But first, we must makes sure not to lose that byte.
486469
self.next_byte = Some(b);
487470
self.state = Finished;
488471
return None;
489472
},
490-
Normal | CompactingLWS if b == SP || b == HT => {
491-
// Start or continue to compact linear whitespace
492-
self.state = CompactingLWS;
493-
continue;
494-
},
495-
CompactingLWS => {
496-
// End of LWS, compact it down to a single space, unless it's at the start.
497-
self.state = Normal;
498-
if self.at_start {
499-
return Some(b);
500-
} else {
501-
self.next_byte = Some(b);
502-
return Some(SP);
503-
}
504-
},
505473
Normal => {
506474
self.at_start = false;
507475
return Some(b);
508476
},
509477
Finished => unreachable!(),
510-
};
478+
}
511479
}
512480
}
513481
}
@@ -517,8 +485,7 @@ impl<'self, R: Reader> Iterator<u8> for HeaderValueByteIterator<'self, R> {
517485
*/
518486
pub trait HeaderConvertible: Eq + Clone {
519487
/**
520-
* Read a header value from an iterator over the raw value. That iterator compacts linear white
521-
* space to a single SP, so this static method should just expect a single SP. There will be no
488+
* Read a header value from an iterator over the raw value. There will be no
522489
* leading or trailing space, either; also the ``CR LF`` which would indicate the end of the
523490
* header line in the stream is removed.
524491
*
@@ -718,26 +685,7 @@ impl HeaderConvertible for Tm {
718685
Err(*) => ()
719686
}
720687

721-
// FIXME: remove this if/when I take automatic LWS collapsing out of
722-
// HeaderValueByteIterator.next(). This is a nasty hack. asctime is formatted with a leading
723-
// zero on a single-digit day of month, so:
724-
//
725-
// Sun Nov 6 08:49:37 1994
726-
//
727-
// But the LWS collapsing currently (and unsatisfactorily) employed breaks that.
728-
// There's no suitable format for this in strptime, so we must (gulp) add it back in.
729-
730-
let mut asctime_value;
731-
if value[9] == ' ' as u8 {
732-
// Single digit day of month
733-
asctime_value = value.slice_to(8).to_owned();
734-
asctime_value.push_char(' ');
735-
asctime_value.push_str(value.slice_from(8));
736-
} else {
737-
// Double digit day of month
738-
asctime_value = value.to_owned();
739-
}
740-
match strptime(asctime_value, "%c") { // ANSI C's asctime() format
688+
match strptime(value, "%c") { // ANSI C's asctime() format
741689
Ok(time) => Some(time),
742690
Err(*) => None
743691
}
@@ -826,9 +774,11 @@ mod test {
826774
#[test]
827775
fn test_from_stream_asctime() {
828776
assert_eq!(from_stream_with_str("Sun Nov 6 08:49:37 1994"), Some(sample_tm(~"")));
829-
// According to the spec, this should fail; but at present it succeeds. See
830-
// HeaderConvertible::from_stream<Tm> above for more info.
831-
// This is here so that I don't leave slowing-down code in there when I fix it. :-)
777+
}
778+
779+
#[test]
780+
#[should_fail]
781+
fn test_from_stream_asctime_remove_LWS() {
832782
assert_eq!(from_stream_with_str("Sun Nov 6 08:49:37 1994"), Some(sample_tm(~"")));
833783
}
834784

0 commit comments

Comments
 (0)