Skip to content

Commit

Permalink
Don't depend on pacakge:utf (flutter#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
lrhn authored and kevmoo committed Apr 9, 2019
1 parent eeed53e commit 38756a0
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 280 deletions.
44 changes: 6 additions & 38 deletions lib/src/char_encodings.dart
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import 'utf.dart';
import 'dart:convert' show ascii, utf8;

// TODO(jmesserly): this function is conspicuously absent from dart:utf.
/// Returns true if the [bytes] starts with a UTF-8 byte order mark.
Expand All @@ -20,29 +20,12 @@ bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
Iterable<int> decodeBytes(String encoding, List<int> bytes) {
switch (encoding) {
case 'ascii':
// TODO(jmesserly): this was taken from runtime/bin/string_stream.dart
for (int byte in bytes) {
if (byte > 127) {
// TODO(jmesserly): ideally this would be DecoderException, like the
// one thrown in runtime/bin/string_stream.dart, but we don't want to
// depend on dart:io.
throw FormatException("Illegal ASCII character $byte");
}
}
return bytes;
return ascii.decode(bytes).runes;

case 'utf-8':
// NOTE: to match the behavior of the other decode functions, we eat the
// utf-8 BOM here.

var offset = 0;
var length = bytes.length;

if (hasUtf8Bom(bytes)) {
offset += 3;
length -= 3;
}
return decodeUtf8AsIterable(bytes, offset, length);
// NOTE: To match the behavior of the other decode functions, we eat the
// UTF-8 BOM here. This is the default behavior of `utf8.decode`.
return utf8.decode(bytes).runes;

default:
throw ArgumentError('Encoding $encoding not supported');
Expand All @@ -53,20 +36,5 @@ Iterable<int> decodeBytes(String encoding, List<int> bytes) {
/// Returns the code points for the [input]. This works like [String.charCodes]
/// but it decodes UTF-16 surrogate pairs.
List<int> toCodepoints(String input) {
var newCodes = <int>[];
for (int i = 0; i < input.length; i++) {
var c = input.codeUnitAt(i);
if (0xD800 <= c && c <= 0xDBFF) {
int next = i + 1;
if (next < input.length) {
var d = input.codeUnitAt(next);
if (0xDC00 <= d && d <= 0xDFFF) {
c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00);
i = next;
}
}
}
newCodes.add(c);
}
return newCodes;
return input.runes.toList();
}
237 changes: 0 additions & 237 deletions lib/src/utf.dart

This file was deleted.

3 changes: 1 addition & 2 deletions pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: html
version: 0.14.0
version: 0.14.1-dev

description: APIs for parsing and manipulating HTML content outside the browser.
author: Dart Team <[email protected]>
Expand All @@ -16,4 +16,3 @@ dev_dependencies:
path: ^1.6.2
pedantic: ^1.3.0
test: ^1.3.0
utf: '>=0.9.0 <0.10.0'
4 changes: 1 addition & 3 deletions test/tokenizer_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ import 'dart:io';
import 'dart:mirrors';
import 'package:path/path.dart' as pathos;
import 'package:test/test.dart';
import 'package:html/src/char_encodings.dart';
import 'package:html/src/token.dart';
import 'package:html/src/tokenizer.dart';
import 'package:utf/utf.dart';
import 'support.dart';

class TokenizerTestParser {
Expand All @@ -24,7 +22,7 @@ class TokenizerTestParser {

List parse(String str) {
// Note: we need to pass bytes to the tokenizer if we want it to handle BOM.
var bytes = codepointsToUtf8(toCodepoints(str));
var bytes = utf8.encode(str);
var tokenizer = HtmlTokenizer(bytes, encoding: 'utf-8');
outputTokens = [];

Expand Down

0 comments on commit 38756a0

Please sign in to comment.