Skip to content

Commit 4ef2695

Browse files
committed
Add a DataUri class.
This supports encoding and decoding data URIs, using both bytes and strings. [email protected] Review URL: https://codereview.chromium.org//1390353008 .
1 parent 0aea125 commit 4ef2695

File tree

5 files changed

+577
-3
lines changed

5 files changed

+577
-3
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## 1.1.0
22

3+
* Added a `DataUri` class for encoding and decoding data URIs.
4+
35
* The MIME spec says that media types and their parameter names are
46
case-insensitive. Accordingly, `MediaType` now uses a case-insensitive map for
57
its parameters and its `type` and `subtype` fields are now always lowercase.

lib/http_parser.dart

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ library http_parser;
66

77
export 'src/authentication_challenge.dart';
88
export 'src/case_insensitive_map.dart';
9+
export 'src/data_uri.dart';
910
export 'src/http_date.dart';
1011
export 'src/media_type.dart';
1112
export 'src/web_socket.dart';

lib/src/data_uri.dart

Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
7+
import 'package:convert/convert.dart';
8+
import 'package:crypto/crypto.dart';
9+
import 'package:string_scanner/string_scanner.dart';
10+
11+
import 'media_type.dart';
12+
import 'scan.dart';
13+
import 'utils.dart';
14+
15+
/// Like [whitespace] from scan.dart, except that it matches URI-encoded
16+
/// whitespace rather than literal characters.
17+
final _whitespace = new RegExp(r'(?:(?:%0D%0A)?(?:%20|%09)+)*');
18+
19+
/// A converter for percent encoding strings using UTF-8.
20+
final _utf8Percent = UTF8.fuse(percent);
21+
22+
/// A class representing a `data:` URI that provides access to its [mediaType]
23+
/// and the [data] it contains.
24+
///
25+
/// Data can be encoded as a `data:` URI using [encode] or [encodeString], and
26+
/// decoded using [decode].
27+
///
28+
/// This implementation is based on [RFC 2397][rfc], but as that RFC is
29+
/// [notoriously ambiguous][ambiguities], some judgment calls have been made.
30+
/// This class tries to match browsers' data URI logic, to ensure that it can
31+
/// losslessly parse its own output, and to accept as much input as it can make
32+
/// sense of. A balance has been struck between these goals so that while none
33+
/// of them have been accomplished perfectly, all of them are close enough for
34+
/// practical use.
35+
///
36+
/// [rfc]: http://tools.ietf.org/html/rfc2397
37+
/// [ambiguities]: https://simonsapin.github.io/data-urls/
38+
///
39+
/// Some particular notes on the behavior:
40+
///
41+
/// * When encoding, all characters that are not [reserved][] in the type,
42+
/// subtype, parameter names, and parameter values of media types are
43+
/// percent-encoded using UTF-8.
44+
///
45+
/// * When decoding, the type, subtype, parameter names, and parameter values of
46+
/// media types are percent-decoded using UTF-8. Parameter values are allowed
47+
/// to contain non-token characters once decoded, but the other tokens are
48+
/// not.
49+
///
50+
/// * As per the spec, quoted-string parameters are not supported when decoding.
51+
///
52+
/// * Query components are included in the decoding algorithm, but fragments are
53+
/// not.
54+
///
55+
/// * Invalid media types and parameters will raise exceptions when decoding.
56+
/// This is standard for Dart parsers but contrary to browser behavior.
57+
///
58+
/// * The URL and filename-safe base64 alphabet is accepted when decoding but
59+
/// never emitted when encoding, since browsers don't support it.
60+
///
61+
/// [lws]: https://tools.ietf.org/html/rfc2616#section-2.2
62+
/// [reserved]: https://tools.ietf.org/html/rfc3986#section-2.2
63+
class DataUri implements Uri {
64+
/// The inner URI to which all [Uri] methods are forwarded.
65+
final Uri _inner;
66+
67+
/// The byte data contained in the data URI.
68+
final List<int> data;
69+
70+
/// The media type declared for the data URI.
71+
///
72+
/// This defaults to `text/plain;charset=US-ASCII`.
73+
final MediaType mediaType;
74+
75+
/// The encoding declared by the `charset` parameter in [mediaType].
76+
///
77+
/// If [mediaType] has no `charset` parameter, this defaults to [ASCII]. If
78+
/// the `charset` parameter declares an encoding that can't be found using
79+
/// [Encoding.getByName], this returns `null`.
80+
Encoding get declaredEncoding {
81+
var charset = mediaType.parameters["charset"];
82+
return charset == null ? ASCII : Encoding.getByName(charset);
83+
}
84+
85+
/// Creates a new data URI with the given [mediaType] and [data].
86+
///
87+
/// If [base64] is `true` (the default), the data is base64-encoded;
88+
/// otherwise, it's percent-encoded.
89+
///
90+
/// If [encoding] is passed or [mediaType] declares a `charset` parameter,
91+
/// [data] is encoded using that encoding. Otherwise, it's encoded using
92+
/// [UTF8] or [ASCII] depending on whether it contains any non-ASCII
93+
/// characters.
94+
///
95+
/// Throws [ArgumentError] if [mediaType] and [encoding] disagree on the
96+
/// encoding, and an [UnsupportedError] if [mediaType] defines an encoding
97+
/// that's not supported by [Encoding.getByName].
98+
factory DataUri.encodeString(String data, {bool base64: true,
99+
MediaType mediaType, Encoding encoding}) {
100+
if (mediaType == null) mediaType = new MediaType("text", "plain");
101+
102+
var charset = mediaType.parameters["charset"];
103+
var bytes;
104+
if (encoding != null) {
105+
if (charset == null) {
106+
mediaType = mediaType.change(parameters: {"charset": encoding.name});
107+
} else if (Encoding.getByName(charset) != encoding) {
108+
throw new ArgumentError("Media type charset '$charset' disagrees with "
109+
"encoding '${encoding.name}'.");
110+
}
111+
bytes = encoding.encode(data);
112+
} else if (charset != null) {
113+
encoding = Encoding.getByName(charset);
114+
if (encoding == null) {
115+
throw new UnsupportedError(
116+
'Unsupported media type charset "$charset".');
117+
}
118+
bytes = encoding.encode(data);
119+
} else if (data.codeUnits.every((codeUnit) => codeUnit < 0x80)) {
120+
// If the data is pure ASCII, don't bother explicitly defining a charset.
121+
bytes = data.codeUnits;
122+
} else {
123+
// If the data isn't pure ASCII, default to UTF-8.
124+
bytes = UTF8.encode(data);
125+
mediaType = mediaType.change(parameters: {"charset": "utf-8"});
126+
}
127+
128+
return new DataUri.encode(bytes, base64: base64, mediaType: mediaType);
129+
}
130+
131+
/// Creates a new data URI with the given [mediaType] and [data].
132+
///
133+
/// If [base64] is `true` (the default), the data is base64-encoded;
134+
/// otherwise, it's percent-encoded.
135+
factory DataUri.encode(List<int> data, {bool base64: true,
136+
MediaType mediaType}) {
137+
mediaType ??= new MediaType('text', 'plain');
138+
139+
var buffer = new StringBuffer();
140+
141+
// Manually stringify the media type because [section 3][rfc] requires that
142+
// parameter values should have non-token characters URL-escaped rather than
143+
// emitting them as quoted-strings. This also allows us to omit text/plain
144+
// if possible.
145+
//
146+
// [rfc]: http://tools.ietf.org/html/rfc2397#section-3
147+
if (mediaType.type != 'text' || mediaType.subtype != 'plain') {
148+
buffer.write(_utf8Percent.encode(mediaType.type));
149+
buffer.write("/");
150+
buffer.write(_utf8Percent.encode(mediaType.subtype));
151+
}
152+
153+
mediaType.parameters.forEach((attribute, value) {
154+
buffer.write(";${_utf8Percent.encode(attribute)}=");
155+
buffer.write(_utf8Percent.encode(value));
156+
});
157+
158+
if (base64) {
159+
buffer.write(";base64,");
160+
// *Don't* use the URL-safe encoding scheme, since browsers don't actually
161+
// support it.
162+
buffer.write(CryptoUtils.bytesToBase64(data));
163+
} else {
164+
buffer.write(",");
165+
buffer.write(percent.encode(data));
166+
}
167+
168+
return new DataUri._(data, mediaType,
169+
new Uri(scheme: 'data', path: buffer.toString()));
170+
}
171+
172+
/// Decodes [uri] to make its [data] and [mediaType] available.
173+
///
174+
/// [uri] may be a [Uri] or a [String].
175+
///
176+
/// Throws an [ArgumentError] if [uri] is an invalid type or has a scheme
177+
/// other than `data:`. Throws a [FormatException] if parsing fails.
178+
factory DataUri.decode(uri) {
179+
if (uri is String) {
180+
uri = Uri.parse(uri);
181+
} else if (uri is! Uri) {
182+
throw new ArgumentError.value(uri, "uri", "Must be a String or a Uri.");
183+
}
184+
185+
if (uri.scheme != 'data') {
186+
throw new ArgumentError.value(uri, "uri", "Can only decode a data: URI.");
187+
}
188+
189+
return wrapFormatException("data URI", uri.toString(), () {
190+
// Remove the fragment, as per https://simonsapin.github.io/data-urls/.
191+
// TODO(nweiz): Use Uri.removeFragment once sdk#24593 is fixed.
192+
var string = uri.toString();
193+
var fragment = string.indexOf('#');
194+
if (fragment != -1) string = string.substring(0, fragment);
195+
var scanner = new StringScanner(string);
196+
scanner.expect('data:');
197+
198+
// Manually scan the media type for three reasons:
199+
//
200+
// * Media type parameter values that aren't valid tokens are URL-encoded
201+
// rather than quoted.
202+
//
203+
// * The media type may be omitted without omitting the parameters.
204+
//
205+
// * We need to be able to stop once we reach `;base64,`, even though at
206+
// first it looks like a parameter.
207+
var type;
208+
var subtype;
209+
var implicitType = false;
210+
if (scanner.scan(token)) {
211+
type = _verifyToken(scanner);
212+
scanner.expect('/');
213+
subtype = _expectToken(scanner);
214+
} else {
215+
type = 'text';
216+
subtype = 'plain';
217+
implicitType = true;
218+
}
219+
220+
// Scan the parameters, up through ";base64" or a comma.
221+
var parameters = {};
222+
var base64 = false;
223+
while (scanner.scan(';')) {
224+
var attribute = _expectToken(scanner);
225+
226+
if (attribute != 'base64') {
227+
scanner.expect('=');
228+
} else if (!scanner.scan('=')) {
229+
base64 = true;
230+
break;
231+
}
232+
233+
// Don't use [_expectToken] because the value uses percent-encoding to
234+
// escape non-token characters.
235+
scanner.expect(token);
236+
parameters[attribute] = _utf8Percent.decode(scanner.lastMatch[0]);
237+
}
238+
scanner.expect(',');
239+
240+
if (implicitType && parameters.isEmpty) {
241+
parameters = {"charset": "US-ASCII"};
242+
}
243+
244+
var mediaType = new MediaType(type, subtype, parameters);
245+
246+
var data = base64
247+
? CryptoUtils.base64StringToBytes(scanner.rest)
248+
: percent.decode(scanner.rest);
249+
250+
return new DataUri._(data, mediaType, uri);
251+
});
252+
}
253+
254+
/// Returns the percent-decoded value of the last MIME token scanned by
255+
/// [scanner].
256+
///
257+
/// Throws a [FormatException] if it's not a valid token after
258+
/// percent-decoding.
259+
static String _verifyToken(StringScanner scanner) {
260+
var value = _utf8Percent.decode(scanner.lastMatch[0]);
261+
if (!value.contains(nonToken)) return value;
262+
scanner.error("Invalid token.");
263+
return null;
264+
}
265+
266+
/// Scans [scanner] through a MIME token and returns its percent-decoded
267+
/// value.
268+
///
269+
/// Throws a [FormatException] if it's not a valid token after
270+
/// percent-decoding.
271+
static String _expectToken(StringScanner scanner) {
272+
scanner.expect(token, name: "a token");
273+
return _verifyToken(scanner);
274+
}
275+
276+
DataUri._(this.data, this.mediaType, this._inner);
277+
278+
/// Returns the decoded [data] decoded using [encoding].
279+
///
280+
/// [encoding] defaults to [declaredEncoding]. If the declared encoding isn't
281+
/// supported by [Encoding.getByName] and [encoding] isn't passed, this throws
282+
/// an [UnsupportedError].
283+
String dataAsString({Encoding encoding}) {
284+
encoding ??= declaredEncoding;
285+
if (encoding == null) {
286+
throw new UnsupportedError(
287+
'Unsupported media type charset '
288+
'"${mediaType.parameters["charset"]}".');
289+
}
290+
291+
return encoding.decode(data);
292+
}
293+
294+
String get scheme => _inner.scheme;
295+
String get authority => _inner.authority;
296+
String get userInfo => _inner.userInfo;
297+
String get host => _inner.host;
298+
int get port => _inner.port;
299+
String get path => _inner.path;
300+
String get query => _inner.query;
301+
String get fragment => _inner.fragment;
302+
Uri replace({String scheme, String userInfo, String host, int port,
303+
String path, Iterable<String> pathSegments, String query,
304+
Map<String, String> queryParameters, String fragment}) =>
305+
_inner.replace(
306+
scheme: scheme, userInfo: userInfo, host: host, port: port,
307+
path: path, pathSegments: pathSegments, query: query,
308+
queryParameters: queryParameters, fragment: fragment);
309+
Uri removeFragment() => _inner.removeFragment();
310+
List<String> get pathSegments => _inner.pathSegments;
311+
Map<String, String> get queryParameters => _inner.queryParameters;
312+
Uri normalizePath() => _inner.normalizePath();
313+
bool get isAbsolute => _inner.isAbsolute;
314+
Uri resolve(String reference) => _inner.resolve(reference);
315+
Uri resolveUri(Uri reference) => _inner.resolveUri(reference);
316+
bool get hasScheme => _inner.hasScheme;
317+
bool get hasAuthority => _inner.hasAuthority;
318+
bool get hasPort => _inner.hasPort;
319+
bool get hasQuery => _inner.hasQuery;
320+
bool get hasFragment => _inner.hasFragment;
321+
bool get hasEmptyPath => _inner.hasEmptyPath;
322+
bool get hasAbsolutePath => _inner.hasAbsolutePath;
323+
String get origin => _inner.origin;
324+
String toFilePath({bool windows}) => _inner.toFilePath(windows: windows);
325+
String toString() => _inner.toString();
326+
bool operator==(other) => _inner == other;
327+
int get hashCode => _inner.hashCode;
328+
}

pubspec.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
name: http_parser
2-
version: 1.1.0-dev
2+
version: 1.1.0
33
author: "Dart Team <[email protected]>"
44
homepage: https://github.com/dart-lang/http_parser
55
description: >
66
A platform-independent package for parsing and serializing HTTP formats.
77
dependencies:
8-
crypto: "^0.9.0"
8+
convert: "^1.0.0"
99
collection: ">=0.9.1 <2.0.0"
10+
crypto: "^0.9.0"
1011
source_span: "^1.0.0"
1112
string_scanner: ">=0.0.0 <0.2.0"
1213
dev_dependencies:
14+
charcode: "^1.1.0"
1315
test: "^0.12.0"
1416
environment:
15-
sdk: ">=1.8.0 <2.0.0"
17+
sdk: ">=1.12.0 <2.0.0"

0 commit comments

Comments
 (0)