|
| 1 | +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 | +// for details. All rights reserved. Use of this source code is governed by a |
| 3 | +// BSD-style license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +import 'dart:convert'; |
| 6 | + |
| 7 | +import 'package:convert/convert.dart'; |
| 8 | +import 'package:crypto/crypto.dart'; |
| 9 | +import 'package:string_scanner/string_scanner.dart'; |
| 10 | + |
| 11 | +import 'media_type.dart'; |
| 12 | +import 'scan.dart'; |
| 13 | +import 'utils.dart'; |
| 14 | + |
| 15 | +/// Like [whitespace] from scan.dart, except that it matches URI-encoded |
| 16 | +/// whitespace rather than literal characters. |
| 17 | +final _whitespace = new RegExp(r'(?:(?:%0D%0A)?(?:%20|%09)+)*'); |
| 18 | + |
| 19 | +/// A converter for percent encoding strings using UTF-8. |
| 20 | +final _utf8Percent = UTF8.fuse(percent); |
| 21 | + |
| 22 | +/// A class representing a `data:` URI that provides access to its [mediaType] |
| 23 | +/// and the [data] it contains. |
| 24 | +/// |
| 25 | +/// Data can be encoded as a `data:` URI using [encode] or [encodeString], and |
| 26 | +/// decoded using [decode]. |
| 27 | +/// |
| 28 | +/// This implementation is based on [RFC 2397][rfc], but as that RFC is |
| 29 | +/// [notoriously ambiguous][ambiguities], some judgment calls have been made. |
| 30 | +/// This class tries to match browsers' data URI logic, to ensure that it can |
| 31 | +/// losslessly parse its own output, and to accept as much input as it can make |
| 32 | +/// sense of. A balance has been struck between these goals so that while none |
| 33 | +/// of them have been accomplished perfectly, all of them are close enough for |
| 34 | +/// practical use. |
| 35 | +/// |
| 36 | +/// [rfc]: http://tools.ietf.org/html/rfc2397 |
| 37 | +/// [ambiguities]: https://simonsapin.github.io/data-urls/ |
| 38 | +/// |
| 39 | +/// Some particular notes on the behavior: |
| 40 | +/// |
| 41 | +/// * When encoding, all characters that are not [reserved][] in the type, |
| 42 | +/// subtype, parameter names, and parameter values of media types are |
| 43 | +/// percent-encoded using UTF-8. |
| 44 | +/// |
| 45 | +/// * When decoding, the type, subtype, parameter names, and parameter values of |
| 46 | +/// media types are percent-decoded using UTF-8. Parameter values are allowed |
| 47 | +/// to contain non-token characters once decoded, but the other tokens are |
| 48 | +/// not. |
| 49 | +/// |
| 50 | +/// * As per the spec, quoted-string parameters are not supported when decoding. |
| 51 | +/// |
| 52 | +/// * Query components are included in the decoding algorithm, but fragments are |
| 53 | +/// not. |
| 54 | +/// |
| 55 | +/// * Invalid media types and parameters will raise exceptions when decoding. |
| 56 | +/// This is standard for Dart parsers but contrary to browser behavior. |
| 57 | +/// |
| 58 | +/// * The URL and filename-safe base64 alphabet is accepted when decoding but |
| 59 | +/// never emitted when encoding, since browsers don't support it. |
| 60 | +/// |
| 61 | +/// [lws]: https://tools.ietf.org/html/rfc2616#section-2.2 |
| 62 | +/// [reserved]: https://tools.ietf.org/html/rfc3986#section-2.2 |
| 63 | +class DataUri implements Uri { |
| 64 | + /// The inner URI to which all [Uri] methods are forwarded. |
| 65 | + final Uri _inner; |
| 66 | + |
| 67 | + /// The byte data contained in the data URI. |
| 68 | + final List<int> data; |
| 69 | + |
| 70 | + /// The media type declared for the data URI. |
| 71 | + /// |
| 72 | + /// This defaults to `text/plain;charset=US-ASCII`. |
| 73 | + final MediaType mediaType; |
| 74 | + |
| 75 | + /// The encoding declared by the `charset` parameter in [mediaType]. |
| 76 | + /// |
| 77 | + /// If [mediaType] has no `charset` parameter, this defaults to [ASCII]. If |
| 78 | + /// the `charset` parameter declares an encoding that can't be found using |
| 79 | + /// [Encoding.getByName], this returns `null`. |
| 80 | + Encoding get declaredEncoding { |
| 81 | + var charset = mediaType.parameters["charset"]; |
| 82 | + return charset == null ? ASCII : Encoding.getByName(charset); |
| 83 | + } |
| 84 | + |
| 85 | + /// Creates a new data URI with the given [mediaType] and [data]. |
| 86 | + /// |
| 87 | + /// If [base64] is `true` (the default), the data is base64-encoded; |
| 88 | + /// otherwise, it's percent-encoded. |
| 89 | + /// |
| 90 | + /// If [encoding] is passed or [mediaType] declares a `charset` parameter, |
| 91 | + /// [data] is encoded using that encoding. Otherwise, it's encoded using |
| 92 | + /// [UTF8] or [ASCII] depending on whether it contains any non-ASCII |
| 93 | + /// characters. |
| 94 | + /// |
| 95 | + /// Throws [ArgumentError] if [mediaType] and [encoding] disagree on the |
| 96 | + /// encoding, and an [UnsupportedError] if [mediaType] defines an encoding |
| 97 | + /// that's not supported by [Encoding.getByName]. |
| 98 | + factory DataUri.encodeString(String data, {bool base64: true, |
| 99 | + MediaType mediaType, Encoding encoding}) { |
| 100 | + if (mediaType == null) mediaType = new MediaType("text", "plain"); |
| 101 | + |
| 102 | + var charset = mediaType.parameters["charset"]; |
| 103 | + var bytes; |
| 104 | + if (encoding != null) { |
| 105 | + if (charset == null) { |
| 106 | + mediaType = mediaType.change(parameters: {"charset": encoding.name}); |
| 107 | + } else if (Encoding.getByName(charset) != encoding) { |
| 108 | + throw new ArgumentError("Media type charset '$charset' disagrees with " |
| 109 | + "encoding '${encoding.name}'."); |
| 110 | + } |
| 111 | + bytes = encoding.encode(data); |
| 112 | + } else if (charset != null) { |
| 113 | + encoding = Encoding.getByName(charset); |
| 114 | + if (encoding == null) { |
| 115 | + throw new UnsupportedError( |
| 116 | + 'Unsupported media type charset "$charset".'); |
| 117 | + } |
| 118 | + bytes = encoding.encode(data); |
| 119 | + } else if (data.codeUnits.every((codeUnit) => codeUnit < 0x80)) { |
| 120 | + // If the data is pure ASCII, don't bother explicitly defining a charset. |
| 121 | + bytes = data.codeUnits; |
| 122 | + } else { |
| 123 | + // If the data isn't pure ASCII, default to UTF-8. |
| 124 | + bytes = UTF8.encode(data); |
| 125 | + mediaType = mediaType.change(parameters: {"charset": "utf-8"}); |
| 126 | + } |
| 127 | + |
| 128 | + return new DataUri.encode(bytes, base64: base64, mediaType: mediaType); |
| 129 | + } |
| 130 | + |
| 131 | + /// Creates a new data URI with the given [mediaType] and [data]. |
| 132 | + /// |
| 133 | + /// If [base64] is `true` (the default), the data is base64-encoded; |
| 134 | + /// otherwise, it's percent-encoded. |
| 135 | + factory DataUri.encode(List<int> data, {bool base64: true, |
| 136 | + MediaType mediaType}) { |
| 137 | + mediaType ??= new MediaType('text', 'plain'); |
| 138 | + |
| 139 | + var buffer = new StringBuffer(); |
| 140 | + |
| 141 | + // Manually stringify the media type because [section 3][rfc] requires that |
| 142 | + // parameter values should have non-token characters URL-escaped rather than |
| 143 | + // emitting them as quoted-strings. This also allows us to omit text/plain |
| 144 | + // if possible. |
| 145 | + // |
| 146 | + // [rfc]: http://tools.ietf.org/html/rfc2397#section-3 |
| 147 | + if (mediaType.type != 'text' || mediaType.subtype != 'plain') { |
| 148 | + buffer.write(_utf8Percent.encode(mediaType.type)); |
| 149 | + buffer.write("/"); |
| 150 | + buffer.write(_utf8Percent.encode(mediaType.subtype)); |
| 151 | + } |
| 152 | + |
| 153 | + mediaType.parameters.forEach((attribute, value) { |
| 154 | + buffer.write(";${_utf8Percent.encode(attribute)}="); |
| 155 | + buffer.write(_utf8Percent.encode(value)); |
| 156 | + }); |
| 157 | + |
| 158 | + if (base64) { |
| 159 | + buffer.write(";base64,"); |
| 160 | + // *Don't* use the URL-safe encoding scheme, since browsers don't actually |
| 161 | + // support it. |
| 162 | + buffer.write(CryptoUtils.bytesToBase64(data)); |
| 163 | + } else { |
| 164 | + buffer.write(","); |
| 165 | + buffer.write(percent.encode(data)); |
| 166 | + } |
| 167 | + |
| 168 | + return new DataUri._(data, mediaType, |
| 169 | + new Uri(scheme: 'data', path: buffer.toString())); |
| 170 | + } |
| 171 | + |
| 172 | + /// Decodes [uri] to make its [data] and [mediaType] available. |
| 173 | + /// |
| 174 | + /// [uri] may be a [Uri] or a [String]. |
| 175 | + /// |
| 176 | + /// Throws an [ArgumentError] if [uri] is an invalid type or has a scheme |
| 177 | + /// other than `data:`. Throws a [FormatException] if parsing fails. |
| 178 | + factory DataUri.decode(uri) { |
| 179 | + if (uri is String) { |
| 180 | + uri = Uri.parse(uri); |
| 181 | + } else if (uri is! Uri) { |
| 182 | + throw new ArgumentError.value(uri, "uri", "Must be a String or a Uri."); |
| 183 | + } |
| 184 | + |
| 185 | + if (uri.scheme != 'data') { |
| 186 | + throw new ArgumentError.value(uri, "uri", "Can only decode a data: URI."); |
| 187 | + } |
| 188 | + |
| 189 | + return wrapFormatException("data URI", uri.toString(), () { |
| 190 | + // Remove the fragment, as per https://simonsapin.github.io/data-urls/. |
| 191 | + // TODO(nweiz): Use Uri.removeFragment once sdk#24593 is fixed. |
| 192 | + var string = uri.toString(); |
| 193 | + var fragment = string.indexOf('#'); |
| 194 | + if (fragment != -1) string = string.substring(0, fragment); |
| 195 | + var scanner = new StringScanner(string); |
| 196 | + scanner.expect('data:'); |
| 197 | + |
| 198 | + // Manually scan the media type for three reasons: |
| 199 | + // |
| 200 | + // * Media type parameter values that aren't valid tokens are URL-encoded |
| 201 | + // rather than quoted. |
| 202 | + // |
| 203 | + // * The media type may be omitted without omitting the parameters. |
| 204 | + // |
| 205 | + // * We need to be able to stop once we reach `;base64,`, even though at |
| 206 | + // first it looks like a parameter. |
| 207 | + var type; |
| 208 | + var subtype; |
| 209 | + var implicitType = false; |
| 210 | + if (scanner.scan(token)) { |
| 211 | + type = _verifyToken(scanner); |
| 212 | + scanner.expect('/'); |
| 213 | + subtype = _expectToken(scanner); |
| 214 | + } else { |
| 215 | + type = 'text'; |
| 216 | + subtype = 'plain'; |
| 217 | + implicitType = true; |
| 218 | + } |
| 219 | + |
| 220 | + // Scan the parameters, up through ";base64" or a comma. |
| 221 | + var parameters = {}; |
| 222 | + var base64 = false; |
| 223 | + while (scanner.scan(';')) { |
| 224 | + var attribute = _expectToken(scanner); |
| 225 | + |
| 226 | + if (attribute != 'base64') { |
| 227 | + scanner.expect('='); |
| 228 | + } else if (!scanner.scan('=')) { |
| 229 | + base64 = true; |
| 230 | + break; |
| 231 | + } |
| 232 | + |
| 233 | + // Don't use [_expectToken] because the value uses percent-encoding to |
| 234 | + // escape non-token characters. |
| 235 | + scanner.expect(token); |
| 236 | + parameters[attribute] = _utf8Percent.decode(scanner.lastMatch[0]); |
| 237 | + } |
| 238 | + scanner.expect(','); |
| 239 | + |
| 240 | + if (implicitType && parameters.isEmpty) { |
| 241 | + parameters = {"charset": "US-ASCII"}; |
| 242 | + } |
| 243 | + |
| 244 | + var mediaType = new MediaType(type, subtype, parameters); |
| 245 | + |
| 246 | + var data = base64 |
| 247 | + ? CryptoUtils.base64StringToBytes(scanner.rest) |
| 248 | + : percent.decode(scanner.rest); |
| 249 | + |
| 250 | + return new DataUri._(data, mediaType, uri); |
| 251 | + }); |
| 252 | + } |
| 253 | + |
| 254 | + /// Returns the percent-decoded value of the last MIME token scanned by |
| 255 | + /// [scanner]. |
| 256 | + /// |
| 257 | + /// Throws a [FormatException] if it's not a valid token after |
| 258 | + /// percent-decoding. |
| 259 | + static String _verifyToken(StringScanner scanner) { |
| 260 | + var value = _utf8Percent.decode(scanner.lastMatch[0]); |
| 261 | + if (!value.contains(nonToken)) return value; |
| 262 | + scanner.error("Invalid token."); |
| 263 | + return null; |
| 264 | + } |
| 265 | + |
| 266 | + /// Scans [scanner] through a MIME token and returns its percent-decoded |
| 267 | + /// value. |
| 268 | + /// |
| 269 | + /// Throws a [FormatException] if it's not a valid token after |
| 270 | + /// percent-decoding. |
| 271 | + static String _expectToken(StringScanner scanner) { |
| 272 | + scanner.expect(token, name: "a token"); |
| 273 | + return _verifyToken(scanner); |
| 274 | + } |
| 275 | + |
| 276 | + DataUri._(this.data, this.mediaType, this._inner); |
| 277 | + |
| 278 | + /// Returns the decoded [data] decoded using [encoding]. |
| 279 | + /// |
| 280 | + /// [encoding] defaults to [declaredEncoding]. If the declared encoding isn't |
| 281 | + /// supported by [Encoding.getByName] and [encoding] isn't passed, this throws |
| 282 | + /// an [UnsupportedError]. |
| 283 | + String dataAsString({Encoding encoding}) { |
| 284 | + encoding ??= declaredEncoding; |
| 285 | + if (encoding == null) { |
| 286 | + throw new UnsupportedError( |
| 287 | + 'Unsupported media type charset ' |
| 288 | + '"${mediaType.parameters["charset"]}".'); |
| 289 | + } |
| 290 | + |
| 291 | + return encoding.decode(data); |
| 292 | + } |
| 293 | + |
| 294 | + String get scheme => _inner.scheme; |
| 295 | + String get authority => _inner.authority; |
| 296 | + String get userInfo => _inner.userInfo; |
| 297 | + String get host => _inner.host; |
| 298 | + int get port => _inner.port; |
| 299 | + String get path => _inner.path; |
| 300 | + String get query => _inner.query; |
| 301 | + String get fragment => _inner.fragment; |
| 302 | + Uri replace({String scheme, String userInfo, String host, int port, |
| 303 | + String path, Iterable<String> pathSegments, String query, |
| 304 | + Map<String, String> queryParameters, String fragment}) => |
| 305 | + _inner.replace( |
| 306 | + scheme: scheme, userInfo: userInfo, host: host, port: port, |
| 307 | + path: path, pathSegments: pathSegments, query: query, |
| 308 | + queryParameters: queryParameters, fragment: fragment); |
| 309 | + Uri removeFragment() => _inner.removeFragment(); |
| 310 | + List<String> get pathSegments => _inner.pathSegments; |
| 311 | + Map<String, String> get queryParameters => _inner.queryParameters; |
| 312 | + Uri normalizePath() => _inner.normalizePath(); |
| 313 | + bool get isAbsolute => _inner.isAbsolute; |
| 314 | + Uri resolve(String reference) => _inner.resolve(reference); |
| 315 | + Uri resolveUri(Uri reference) => _inner.resolveUri(reference); |
| 316 | + bool get hasScheme => _inner.hasScheme; |
| 317 | + bool get hasAuthority => _inner.hasAuthority; |
| 318 | + bool get hasPort => _inner.hasPort; |
| 319 | + bool get hasQuery => _inner.hasQuery; |
| 320 | + bool get hasFragment => _inner.hasFragment; |
| 321 | + bool get hasEmptyPath => _inner.hasEmptyPath; |
| 322 | + bool get hasAbsolutePath => _inner.hasAbsolutePath; |
| 323 | + String get origin => _inner.origin; |
| 324 | + String toFilePath({bool windows}) => _inner.toFilePath(windows: windows); |
| 325 | + String toString() => _inner.toString(); |
| 326 | + bool operator==(other) => _inner == other; |
| 327 | + int get hashCode => _inner.hashCode; |
| 328 | +} |
0 commit comments