Skip to content

Commit 50e55f6

Browse files
authored
Add a Codec for the chunked transfer coding. (flutter#8)
1 parent 7f0467d commit 50e55f6

File tree

7 files changed

+698
-1
lines changed

7 files changed

+698
-1
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
## 3.1.0
2+
3+
* Add `chunkedCoding`, a `Codec` that supports encoding and decoding the
4+
[chunked transfer coding][].
5+
6+
[chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
7+
18
## 3.0.2
29

310
* Support `string_scanner` 1.0.0.

lib/http_parser.dart

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44

55
export 'src/authentication_challenge.dart';
66
export 'src/case_insensitive_map.dart';
7+
export 'src/chunked_coding.dart';
78
export 'src/http_date.dart';
89
export 'src/media_type.dart';

lib/src/chunked_coding.dart

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
7+
import 'chunked_coding/encoder.dart';
8+
import 'chunked_coding/decoder.dart';
9+
10+
export 'chunked_coding/encoder.dart' hide chunkedCodingEncoder;
11+
export 'chunked_coding/decoder.dart' hide chunkedCodingDecoder;
12+
13+
/// The canonical instance of [ChunkedCodec].
14+
const chunkedCoding = const ChunkedCodingCodec._();
15+
16+
/// A codec that encodes and decodes the [chunked transfer coding][].
17+
///
18+
/// [chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
19+
///
20+
/// The [encoder] creates a *single* chunked message for each call to
21+
/// [ChunkedEncoder.convert] or [ChunkedEncoder.startChunkedConversion]. This
22+
/// means that it will always add an end-of-message footer once conversion has
23+
/// finished. It doesn't support generating chunk extensions or trailing
24+
/// headers.
25+
///
26+
/// Similarly, the [decoder] decodes a *single* chunked message into a stream of
27+
/// byte arrays that must be concatenated to get the full list (like most Dart
28+
/// byte streams). It doesn't support decoding a stream that contains multiple
29+
/// chunked messages, nor does it support a stream that contains chunked data
30+
/// mixed with other types of data.
31+
///
32+
/// Currently, [decoder] will fail to parse chunk extensions and trailing
33+
/// headers. It may be updated to silently ignore them in the future.
34+
class ChunkedCodingCodec extends Codec<List<int>, List<int>> {
35+
ChunkedCodingEncoder get encoder => chunkedCodingEncoder;
36+
ChunkedCodingDecoder get decoder => chunkedCodingDecoder;
37+
38+
const ChunkedCodingCodec._();
39+
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
import 'dart:math' as math;
7+
import 'dart:typed_data';
8+
9+
import 'package:charcode/ascii.dart';
10+
import 'package:typed_data/typed_data.dart';
11+
12+
/// The canonical instance of [ChunkedCodingDecoder].
13+
const chunkedCodingDecoder = const ChunkedCodingDecoder._();
14+
15+
/// A converter that decodes byte arrays into chunks with size tags.
16+
class ChunkedCodingDecoder extends Converter<List<int>, List<int>> {
17+
const ChunkedCodingDecoder._();
18+
19+
List<int> convert(List<int> bytes) {
20+
var sink = new _Sink(null);
21+
var output = sink._decode(bytes, 0, bytes.length);
22+
if (sink._state == _State.end) return output;
23+
24+
throw new FormatException(
25+
"Input ended unexpectedly.", bytes, bytes.length);
26+
}
27+
28+
ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
29+
new _Sink(sink);
30+
}
31+
32+
/// A conversion sink for the chunked transfer encoding.
33+
class _Sink extends ByteConversionSinkBase {
34+
/// The underlying sink to which decoded byte arrays will be passed.
35+
final Sink<List<int>> _sink;
36+
37+
/// The current state of the sink's parsing.
38+
var _state = _State.boundary;
39+
40+
/// The size of the chunk being parsed, or `null` if the size hasn't been
41+
/// parsed yet.
42+
int _size;
43+
44+
_Sink(this._sink);
45+
46+
void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
47+
48+
void addSlice(List<int> chunk, int start, int end, bool isLast) {
49+
RangeError.checkValidRange(start, end, chunk.length);
50+
var output = _decode(chunk, start, end);
51+
if (output.isNotEmpty) _sink.add(output);
52+
if (isLast) _close(chunk, end);
53+
}
54+
55+
void close() => _close();
56+
57+
/// Like [close], but includes [chunk] and [index] in the [FormatException] if
58+
/// one is thrown.
59+
void _close([List<int> chunk, int index]) {
60+
if (_state != _State.end) {
61+
throw new FormatException("Input ended unexpectedly.", chunk, index);
62+
}
63+
64+
_sink.close();
65+
}
66+
67+
/// Decodes the data in [bytes] from [start] to [end].
68+
Uint8List _decode(List<int> bytes, int start, int end) {
69+
/// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to
70+
/// describe the character in the exception text.
71+
assertCurrentChar(int char, String name) {
72+
if (bytes[start] != char) {
73+
throw new FormatException("Expected LF.", bytes, start);
74+
}
75+
}
76+
77+
var buffer = new Uint8Buffer();
78+
while (start != end) {
79+
switch (_state) {
80+
case _State.boundary:
81+
_size = _digitForByte(bytes, start);
82+
_state = _State.size;
83+
start++;
84+
break;
85+
86+
case _State.size:
87+
if (bytes[start] == $cr) {
88+
_state = _State.beforeLF;
89+
} else {
90+
// Shift four bits left since a single hex digit contains four bits
91+
// of information.
92+
_size = (_size << 4) + _digitForByte(bytes, start);
93+
}
94+
start++;
95+
break;
96+
97+
case _State.beforeLF:
98+
assertCurrentChar($lf, "LF");
99+
_state = _size == 0 ? _State.endBeforeCR : _State.body;
100+
start++;
101+
break;
102+
103+
case _State.body:
104+
var chunkEnd = math.min(end, start + _size);
105+
buffer.addAll(bytes, start, chunkEnd);
106+
_size -= chunkEnd - start;
107+
start = chunkEnd;
108+
if (_size == 0) _state = _State.boundary;
109+
break;
110+
111+
case _State.endBeforeCR:
112+
assertCurrentChar($cr, "CR");
113+
_state = _State.endBeforeLF;
114+
start++;
115+
break;
116+
117+
case _State.endBeforeLF:
118+
assertCurrentChar($lf, "CR");
119+
_state = _State.end;
120+
start++;
121+
break;
122+
123+
case _State.end:
124+
throw new FormatException("Expected no more data.", bytes, start);
125+
}
126+
}
127+
return buffer.buffer.asUint8List(0, buffer.length);
128+
}
129+
130+
/// Returns the hex digit (0 through 15) corresponding to the byte at index
131+
/// [i] in [bytes].
132+
///
133+
/// If the given byte isn't a hexadecimal ASCII character, throws a
134+
/// [FormatException].
135+
int _digitForByte(List<int> bytes, int index) {
136+
// If the byte is a numeral, get its value. XOR works because 0 in ASCII is
137+
// `0b110000` and the other numerals come after it in ascending order and
138+
// take up at most four bits.
139+
//
140+
// We check for digits first because it ensures there's only a single branch
141+
// for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
142+
// check because branch prediction will always work on it for valid data.
143+
var byte = bytes[index];
144+
var digit = $0 ^ byte;
145+
if (digit <= 9) {
146+
if (digit >= 0) return digit;
147+
} else {
148+
// If the byte is an uppercase letter, convert it to lowercase. This works
149+
// because uppercase letters in ASCII are exactly `0b100000 = 0x20` less
150+
// than lowercase letters, so if we ensure that that bit is 1 we ensure that
151+
// the letter is lowercase.
152+
var letter = 0x20 | byte;
153+
if ($a <= letter && letter <= $f) return letter - $a + 10;
154+
}
155+
156+
throw new FormatException(
157+
"Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.",
158+
bytes, index);
159+
}
160+
}
161+
162+
/// An enumeration of states that [_Sink] can exist in when decoded a chunked
163+
/// message.
164+
///
165+
/// [_SizeState], [_CRState], and [_ChunkState] have additional data attached.
166+
class _State {
167+
/// The parser has fully parsed one chunk and is expecting the header for the
168+
/// next chunk.
169+
///
170+
/// Transitions to [size].
171+
static const boundary = const _State._("boundary");
172+
173+
/// The parser has parsed at least one digit of the chunk size header, but has
174+
/// not yet parsed the `CR LF` sequence that indicates the end of that header.
175+
///
176+
/// Transitions to [beforeLF].
177+
static const size = const _State._("size");
178+
179+
/// The parser has parsed the chunk size header and the CR character after it,
180+
/// but not the LF.
181+
///
182+
/// Transitions to [body] or [endBeforeCR].
183+
static const beforeLF = const _State._("before LF");
184+
185+
/// The parser has parsed a chunk header and possibly some of the body, but
186+
/// still needs to consume more bytes.
187+
///
188+
/// Transitions to [boundary].
189+
static const body = const _State._("CR");
190+
191+
/// The parser has parsed the final empty chunk but not the CR LF sequence
192+
/// that follows it.
193+
///
194+
/// Transitions to [endBeforeLF].
195+
static const endBeforeCR = const _State._("end before CR");
196+
197+
/// The parser has parsed the final empty chunk and the CR that follows it,
198+
/// but not the LF after that.
199+
///
200+
/// Transitions to [end].
201+
static const endBeforeLF = const _State._("end before LF");
202+
203+
/// The parser has parsed the final empty chunk as well as the CR LF that
204+
/// follows, and expects no more data.
205+
static const end = const _State._("end");
206+
207+
final String _name;
208+
209+
const _State._(this._name);
210+
211+
String toString() => _name;
212+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
import 'dart:typed_data';
7+
8+
import 'package:charcode/ascii.dart';
9+
10+
/// The canonical instance of [ChunkedCodingEncoder].
11+
const chunkedCodingEncoder = const ChunkedCodingEncoder._();
12+
13+
/// The chunk indicating that the chunked message has finished.
14+
final _doneChunk = new Uint8List.fromList([$0, $cr, $lf, $cr, $lf]);
15+
16+
/// A converter that encodes byte arrays into chunks with size tags.
17+
class ChunkedCodingEncoder extends Converter<List<int>, List<int>> {
18+
const ChunkedCodingEncoder._();
19+
20+
List<int> convert(List<int> bytes) =>
21+
_convert(bytes, 0, bytes.length, isLast: true);
22+
23+
ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
24+
new _Sink(sink);
25+
}
26+
27+
/// A conversion sink for the chunked transfer encoding.
28+
class _Sink extends ByteConversionSinkBase {
29+
/// The underlying sink to which encoded byte arrays will be passed.
30+
final Sink<List<int>> _sink;
31+
32+
_Sink(this._sink);
33+
34+
void add(List<int> chunk) {
35+
_sink.add(_convert(chunk, 0, chunk.length));
36+
}
37+
38+
void addSlice(List<int> chunk, int start, int end, bool isLast) {
39+
RangeError.checkValidRange(start, end, chunk.length);
40+
_sink.add(_convert(chunk, start, end, isLast: isLast));
41+
if (isLast) _sink.close();
42+
}
43+
44+
void close() {
45+
_sink.add(_doneChunk);
46+
_sink.close();
47+
}
48+
}
49+
50+
/// Returns a new list a chunked transfer encoding header followed by the slice
51+
/// of [bytes] from [start] to [end].
52+
///
53+
/// If [isLast] is `true`, this adds the footer that indicates that the chunked
54+
/// message is complete.
55+
List<int> _convert(List<int> bytes, int start, int end, {bool isLast: false}) {
56+
if (end == start) return isLast ? _doneChunk : const [];
57+
58+
var size = end - start;
59+
var sizeInHex = size.toRadixString(16);
60+
var footerSize = isLast ? _doneChunk.length : 0;
61+
62+
// Add 2 for the CRLF sequence that follows the size header.
63+
var list = new Uint8List(sizeInHex.length + 2 + size + footerSize);
64+
list.setRange(0, sizeInHex.length, sizeInHex.codeUnits);
65+
list[sizeInHex.length] = $cr;
66+
list[sizeInHex.length + 1] = $lf;
67+
list.setRange(sizeInHex.length + 2, list.length - footerSize, bytes, start);
68+
if (isLast) {
69+
list.setRange(list.length - footerSize, list.length, _doneChunk);
70+
}
71+
return list;
72+
}

pubspec.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
name: http_parser
2-
version: 3.0.3
2+
version: 3.1.0
33
author: "Dart Team <[email protected]>"
44
homepage: https://github.com/dart-lang/http_parser
55
description: >
66
A platform-independent package for parsing and serializing HTTP formats.
77
dependencies:
8+
charcode: "^1.1.0"
89
collection: ">=0.9.1 <2.0.0"
910
source_span: "^1.0.0"
1011
string_scanner: ">=0.0.0 <2.0.0"
12+
typed_data: "^1.1.0"
1113
dev_dependencies:
1214
test: "^0.12.0"
1315
environment:

0 commit comments

Comments
 (0)