Don't depend on pacakge:utf (flutter#95)

rhencke · Apr 9, 2019 · 38756a0 · 38756a0
1 parent eeed53e
commit 38756a0
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 280 deletions.
diff --git a/lib/src/char_encodings.dart b/lib/src/char_encodings.dart
@@ -1,4 +1,4 @@
-import 'utf.dart';
+import 'dart:convert' show ascii, utf8;
 
 // TODO(jmesserly): this function is conspicuously absent from dart:utf.
 /// Returns true if the [bytes] starts with a UTF-8 byte order mark.
@@ -20,29 +20,12 @@ bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
 Iterable<int> decodeBytes(String encoding, List<int> bytes) {
   switch (encoding) {
     case 'ascii':
-      // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart
-      for (int byte in bytes) {
-        if (byte > 127) {
-          // TODO(jmesserly): ideally this would be DecoderException, like the
-          // one thrown in runtime/bin/string_stream.dart, but we don't want to
-          // depend on dart:io.
-          throw FormatException("Illegal ASCII character $byte");
-        }
-      }
-      return bytes;
+      return ascii.decode(bytes).runes;
 
     case 'utf-8':
-      // NOTE: to match the behavior of the other decode functions, we eat the
-      // utf-8 BOM here.
-
-      var offset = 0;
-      var length = bytes.length;
-
-      if (hasUtf8Bom(bytes)) {
-        offset += 3;
-        length -= 3;
-      }
-      return decodeUtf8AsIterable(bytes, offset, length);
+      // NOTE: To match the behavior of the other decode functions, we eat the
+      // UTF-8 BOM here. This is the default behavior of `utf8.decode`.
+      return utf8.decode(bytes).runes;
 
     default:
       throw ArgumentError('Encoding $encoding not supported');
@@ -53,20 +36,5 @@ Iterable<int> decodeBytes(String encoding, List<int> bytes) {
 /// Returns the code points for the [input]. This works like [String.charCodes]
 /// but it decodes UTF-16 surrogate pairs.
 List<int> toCodepoints(String input) {
-  var newCodes = <int>[];
-  for (int i = 0; i < input.length; i++) {
-    var c = input.codeUnitAt(i);
-    if (0xD800 <= c && c <= 0xDBFF) {
-      int next = i + 1;
-      if (next < input.length) {
-        var d = input.codeUnitAt(next);
-        if (0xDC00 <= d && d <= 0xDFFF) {
-          c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00);
-          i = next;
-        }
-      }
-    }
-    newCodes.add(c);
-  }
-  return newCodes;
+  return input.runes.toList();
 }
diff --git a/lib/src/utf.dart b/lib/src/utf.dart
diff --git a/pubspec.yaml b/pubspec.yaml
@@ -1,5 +1,5 @@
 name: html
-version: 0.14.0
+version: 0.14.1-dev
 
 description: APIs for parsing and manipulating HTML content outside the browser.
 author: Dart Team <[email protected]>
@@ -16,4 +16,3 @@ dev_dependencies:
   path: ^1.6.2
   pedantic: ^1.3.0
   test: ^1.3.0
-  utf: '>=0.9.0 <0.10.0'
diff --git a/test/tokenizer_test.dart b/test/tokenizer_test.dart
@@ -7,10 +7,8 @@ import 'dart:io';
 import 'dart:mirrors';
 import 'package:path/path.dart' as pathos;
 import 'package:test/test.dart';
-import 'package:html/src/char_encodings.dart';
 import 'package:html/src/token.dart';
 import 'package:html/src/tokenizer.dart';
-import 'package:utf/utf.dart';
 import 'support.dart';
 
 class TokenizerTestParser {
@@ -24,7 +22,7 @@ class TokenizerTestParser {
 
   List parse(String str) {
     // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.
-    var bytes = codepointsToUtf8(toCodepoints(str));
+    var bytes = utf8.encode(str);
     var tokenizer = HtmlTokenizer(bytes, encoding: 'utf-8');
     outputTokens = [];