Skip to content

Commit

Permalink
sql: modify BYTES->STRING casts to match postgres
Browse files Browse the repository at this point in the history
Fixes #17969.

Also introduce the hex form of ENCODE/DECODE to replace this
functionality.

Detailed explanation of why this was necessary:
#17969 (comment)

This is a backwards-incompatible (but important) change and I think it
should be cherry-picked to 1.1.
  • Loading branch information
Justin Jaffray committed Oct 12, 2017
1 parent 62b7495 commit d66d914
Show file tree
Hide file tree
Showing 13 changed files with 160 additions and 38 deletions.
4 changes: 4 additions & 0 deletions docs/generated/sql/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,10 @@ Compatible elements: hour, minute, second, millisecond, microsecond.</p>
<tr><td><code>concat_ws(<a href="string.html">string</a>...) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Uses the first argument as a separator between the concatenation of the subsequent arguments.</p>
<p>For example <code>concat_ws('!','wow','great')</code> returns <code>wow!great</code>.</p>
</span></td></tr>
<tr><td><code>decode(text: <a href="string.html">string</a>, format: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Decodes <code>data</code> as the format specified by <code>format</code> (only “hex” is supported).</p>
</span></td></tr>
<tr><td><code>encode(data: <a href="bytes.html">bytes</a>, format: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Encodes <code>data</code> in the text format specified by <code>format</code> (only “hex” is supported).</p>
</span></td></tr>
<tr><td><code>from_ip(val: <a href="bytes.html">bytes</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Converts the byte string representation of an IP to its character string representation.</p>
</span></td></tr>
<tr><td><code>from_uuid(val: <a href="bytes.html">bytes</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Converts the byte string representation of a UUID to its character string representation.</p>
Expand Down
6 changes: 2 additions & 4 deletions pkg/cli/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func TestDumpRow(t *testing.T) {
1,
2.3,
'striiing',
b'\141\061\142\062\143\063', '2016-03-26', '2016-01-25 10:10:10' ,
'\x613162326333', '2016-03-26', '2016-01-25 10:10:10' ,
'2h30m30s',
true,
1.2345,
Expand Down Expand Up @@ -124,7 +124,7 @@ CREATE TABLE t (
);
INSERT INTO t (i, f, s, b, d, t, n, o, e, u, ip, tz, e1, e2, s1) VALUES
(1, 2.3, 'striiing', b'a1b2c3', '2016-03-26', '2016-01-25 10:10:10+00:00', '2h30m30s', true, 1.2345, 'e9716c74-2638-443d-90ed-ffde7bea7d1d', '192.168.0.1', '2016-01-25 10:10:10+00:00', 3, 4.5, 's'),
(1, 2.3, 'striiing', '\x613162326333', '2016-03-26', '2016-01-25 10:10:10+00:00', '2h30m30s', true, 1.2345, 'e9716c74-2638-443d-90ed-ffde7bea7d1d', '192.168.0.1', '2016-01-25 10:10:10+00:00', 3, 4.5, 's'),
(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL),
(NULL, '+Inf', NULL, NULL, NULL, NULL, NULL, NULL, 'Infinity', NULL, NULL, NULL, NULL, NULL, NULL),
(NULL, '-Inf', NULL, NULL, NULL, NULL, NULL, NULL, '-Infinity', NULL, NULL, NULL, NULL, NULL, NULL),
Expand Down Expand Up @@ -335,8 +335,6 @@ func init() {
func TestDumpRandom(t *testing.T) {
defer leaktest.AfterTest(t)()

t.Skip("https://github.com/cockroachdb/cockroach/issues/17969")

c := newCLITest(cliTestParams{t: t})
defer c.cleanup()

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/array
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ INSERT INTO a VALUES (ARRAY['foo','bar','baz'])
query T
SELECT b FROM a
----
{b'foo',b'bar',b'baz'}
{'\x666f6f','\x626172','\x62617a'}

statement ok
DROP TABLE a
Expand Down
21 changes: 14 additions & 7 deletions pkg/sql/logictest/testdata/logic_test/builtin_function
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,6 @@ SELECT SUBSTR('RoacH', -2, 4)
----
R

# See #6601 and #8210. We use three arguments to guarantee a very high
# probability that at least one of the UUIDs is invalid UTF8.
query error invalid UTF-8
select cast(uuid_v4() as string), cast(uuid_v4() as string), cast(uuid_v4() as string)

query T
SELECT SUBSTR('12345', 2, 77)
----
Expand Down Expand Up @@ -1519,6 +1514,20 @@ SELECT array_upper(ARRAY[ARRAY[1, 2]], 2)
----
2

query error only 'hex' format is supported for ENCODE
SELECT ENCODE('abc', 'escape')

query error only 'hex' format is supported for DECODE
SELECT DECODE('abc', 'escape')

query error invalid UTF-8
SELECT ENCODE('\xa7', 'hex')

query TT
SELECT ENCODE('\x616263', 'hex'), DECODE('abc', 'hex')
----
abc 616263

query T
SELECT FROM_IP(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x01\x02\x03\x04')
----
Expand Down Expand Up @@ -1788,7 +1797,6 @@ SELECT i FROM (VALUES
(sha512('3m'::interval::string)),
(sha512('2016-11-12'::date::string)),
(sha512('2015-08-24 23:45:45.53453'::timestamptz::string)),
(sha512(b'bar':::bytes::string)),
(sha512(b'bar'::bytes))
) AS a(i)
----
Expand All @@ -1802,7 +1810,6 @@ f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254
b2d173023893f71caadf7cb2f9557355462570de2c9c971b9cfa5494936e28df8e13d0db4d550aab66d5e7a002f678ddb02def092c069ce473cf5fb293953986
960b0fed9378be1e9adefd91e1be6ac9c1de7208008dfec438ff845135727bebea0f7458a5181079f61288176e0168cfea501b900c3e495b3ab9bbe4d372486d
d82c4eb5261cb9c8aa9855edd67d1bd10482f41529858d925094d173fa662aa91ff39bc5b188615273484021dfb16fd8284cf684ccf0fc795be3aa2fc1e6c181
d82c4eb5261cb9c8aa9855edd67d1bd10482f41529858d925094d173fa662aa91ff39bc5b188615273484021dfb16fd8284cf684ccf0fc795be3aa2fc1e6c181

# We only support one encoding, UTF8, which is hardcoded to id 6 just like in
# Postgres.
Expand Down
41 changes: 41 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/bytes
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# LogicTest: default

query T
SELECT 'non-escaped-string':::BYTES::STRING
----
\x6e6f6e2d657363617065642d737472696e67

query T
SELECT '\Xabcd':::BYTES::STRING
----
\xabcd

query T
SELECT b'\x5c\x78':::BYTES
----
\x

query T
SELECT b'\x5c\x78':::BYTES::STRING
----
\x5c78

query T
SELECT b'\x5c\x58':::BYTES::STRING
----
\x5c58

query T
SELECT e'\x5c\x78'::STRING
----
\x

query T
SELECT '\X':::BYTES::STRING
----
\x

query T
SELECT '日本語':::STRING::BYTES::STRING
----
\xe697a5e69cace8aa9e
36 changes: 36 additions & 0 deletions pkg/sql/parser/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,42 @@ var Builtins = map[string][]Builtin{
},
},

// https://www.postgresql.org/docs/10/static/functions-binarystring.html
"encode": {
Builtin{
Types: ArgTypes{{"data", TypeBytes}, {"format", TypeString}},
ReturnType: fixedReturnType(TypeString),
fn: func(evalCtx *EvalContext, args Datums) (_ Datum, err error) {
data, format := string(*args[0].(*DBytes)), string(MustBeDString(args[1]))
if format != "hex" {
return nil, pgerror.NewError(pgerror.CodeInvalidParameterValueError, "only 'hex' format is supported for ENCODE")
}
if !utf8.ValidString(data) {
return nil, pgerror.NewError(pgerror.CodeCharacterNotInRepertoireError, "invalid UTF-8 sequence")
}
return NewDString(data), nil
},
Info: "Encodes `data` in the text format specified by `format` (only \"hex\" is supported).",
},
},

"decode": {
Builtin{
Types: ArgTypes{{"text", TypeString}, {"format", TypeString}},
ReturnType: fixedReturnType(TypeString),
fn: func(evalCtx *EvalContext, args Datums) (_ Datum, err error) {
data, format := string(MustBeDString(args[0])), string(MustBeDString(args[1]))
if format != "hex" {
return nil, pgerror.NewError(pgerror.CodeInvalidParameterValueError, "only 'hex' format is supported for DECODE")
}
var buf bytes.Buffer
hexEncodeString(&buf, data)
return NewDString(buf.String()), nil
},
Info: "Decodes `data` as the format specified by `format` (only \"hex\" is supported).",
},
},

"ascii": {stringBuiltin1(func(_ *EvalContext, s string) (Datum, error) {
for _, ch := range s {
return NewDInt(DInt(ch)), nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/parser/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ func (expr *StrVal) ResolveAsType(ctx *SemaContext, typ Type) (Datum, error) {
expr.resString = DString(expr.s)
return NewDNameFromDString(&expr.resString), nil
case TypeBytes:
s, err := ParseDByte(expr.s)
s, err := ParseDByte(expr.s, !expr.bytesEsc)
if err == nil {
expr.resBytes = *s
}
Expand Down
14 changes: 10 additions & 4 deletions pkg/sql/parser/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ func ParseDBool(s string) (*DBool, error) {
}

// ParseDByte parses a string representation of hex encoded binary data.
func ParseDByte(s string) (*DBytes, error) {
if len(s) > 2 && (s[0] == '\\' && (s[1] == 'x' || s[1] == 'X')) {
// allowBackslashXFormat determines if the `\x` format of inputting a hex string should be allowed.
func ParseDByte(s string, allowBackslashXFormat bool) (*DBytes, error) {
if allowBackslashXFormat && len(s) >= 2 && (s[0] == '\\' && (s[1] == 'x' || s[1] == 'X')) {
hexstr, err := hex.DecodeString(s[2:])
if err != nil {
return nil, makeParseError(s, TypeBytes, err)
Expand Down Expand Up @@ -1011,11 +1012,16 @@ func (d *DBytes) max() (Datum, bool) {
}

// AmbiguousFormat implements the Datum interface.
func (*DBytes) AmbiguousFormat() bool { return false }
func (*DBytes) AmbiguousFormat() bool { return true }

// Format implements the NodeFormatter interface.
func (d *DBytes) Format(buf *bytes.Buffer, f FmtFlags) {
encodeSQLBytes(buf, string(*d))
buf.WriteString("'\\x")
b := string(*d)
for i := 0; i < len(b); i++ {
buf.Write(rawHexMap[b[i]])
}
buf.WriteByte('\'')
}

// Size implements the Datum interface.
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/parser/datum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ func TestDatumOrdering(t *testing.T) {
{`'':::string`, valIsMin, `e'\x00'`, `''`, noMax},
{`e'\x00'`, noPrev, `e'\x00\x00'`, `''`, noMax},
{`'abc':::string`, noPrev, `e'abc\x00'`, `''`, noMax},
{`'':::bytes`, valIsMin, `b'\x00'`, `b''`, noMax},
{`'abc':::bytes`, noPrev, `b'abc\x00'`, `b''`, noMax},
{`'':::bytes`, valIsMin, `'\x00'`, `'\x'`, noMax},
{`'abc':::bytes`, noPrev, `'\x61626300'`, `'\x'`, noMax},

// Dates
{`'2006-01-02':::date`, `'2006-01-01'`, `'2006-01-03'`, noMin, noMax},
Expand Down
35 changes: 33 additions & 2 deletions pkg/sql/parser/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ var (
'{': true,
'}': true,
}
// hexMap is a mapping from each byte to the `\x%%` hex form as a []byte.
hexMap [256][]byte
// rawHexMap is a mapping from each byte to the `%%` hex form as a []byte.
rawHexMap [256][]byte
)

// encodeSQLString writes a string literal to buf. All unicode and
Expand All @@ -59,6 +62,12 @@ func EscapeSQLString(in string) string {
return buf.String()
}

func hexEncodeString(buf *bytes.Buffer, in string) {
for i := 0; i < len(in); i++ {
buf.Write(rawHexMap[in[i]])
}
}

// encodeEscapedChar is used internally to write out a character from a larger
// string that needs to be escaped to a buffer.
func encodeEscapedChar(
Expand Down Expand Up @@ -239,7 +248,29 @@ func init() {
}
}

for i := range hexMap {
hexMap[i] = []byte(fmt.Sprintf("\\x%02x", i))
// underlyingHexMap contains the string "\x00\x01\x02..." which hexMap and
// rawHexMap then index into.
var underlyingHexMap bytes.Buffer
underlyingHexMap.Grow(1024)

for i := 0; i < 256; i++ {
underlyingHexMap.WriteString("\\x")
writeHexDigit(&underlyingHexMap, i/16)
writeHexDigit(&underlyingHexMap, i%16)
}

underlyingHexBytes := underlyingHexMap.Bytes()

for i := 0; i < 256; i++ {
hexMap[i] = underlyingHexBytes[i*4 : i*4+4]
rawHexMap[i] = underlyingHexBytes[i*4+2 : i*4+4]
}
}

func writeHexDigit(buf *bytes.Buffer, v int) {
if v < 10 {
buf.WriteByte('0' + byte(v))
} else {
buf.WriteByte('a' + byte(v-10))
}
}
12 changes: 6 additions & 6 deletions pkg/sql/parser/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package parser

import (
"bytes"
"fmt"
"math"
"math/big"
Expand Down Expand Up @@ -2440,11 +2441,10 @@ func performCast(ctx *EvalContext, d Datum, t CastTargetType) (Datum, error) {
case *DCollatedString:
s = t.Contents
case *DBytes:
if !utf8.ValidString(string(*t)) {
return nil, pgerror.NewErrorf(
pgerror.CodeCharacterNotInRepertoireError, "invalid UTF-8: %q", string(*t))
}
s = string(*t)
var buf bytes.Buffer
buf.WriteString("\\x")
hexEncodeString(&buf, string(*t))
s = buf.String()
case *DOid:
s = t.name
}
Expand All @@ -2470,7 +2470,7 @@ func performCast(ctx *EvalContext, d Datum, t CastTargetType) (Datum, error) {
case *BytesColType:
switch t := d.(type) {
case *DString:
return ParseDByte(string(*t))
return ParseDByte(string(*t), true)
case *DCollatedString:
return NewDBytes(DBytes(t.Contents)), nil
case *DUuid:
Expand Down
19 changes: 9 additions & 10 deletions pkg/sql/parser/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,17 @@ func TestEval(t *testing.T) {
{`0xa`, `10`},
{`0xcafe1111`, `3405648145`},
// Hexadecimal bytes literals.
{`x'636174'`, `b'cat'`},
{`X'636174'`, `b'cat'`},
{`x'636174'`, `'\x636174'`},
{`X'636174'`, `'\x636174'`},
{`x'636174'::string`, `'cat'`},
{`e'\\x636174'::BYTES`, `b'cat'`},
{`e'\\X636174'::BYTES`, `b'cat'`},
{`e'\\x636174'::STRING::BYTES`, `b'cat'`},
{`e'\\x636174'::BYTES`, `'\x636174'`},
{`e'\\X636174'::BYTES`, `'\x636174'`},
{`e'\\x636174'::STRING::BYTES`, `'\x636174'`},
{`e'\\x636174'::STRING`, `e'\\x636174'`},
// String concatenation.
{`'a' || 'b'`, `'ab'`},
{`'a' || (1 + 2)::char`, `'a3'`},
{`b'hello' || 'world'`, `b'helloworld'`},
{`b'hello' || 'world'`, `'\x68656c6c6f776f726c64'`},
// Bit shift operators.
{`1 << 2`, `4`},
{`4 >> 2`, `1`},
Expand Down Expand Up @@ -606,9 +606,9 @@ func TestEval(t *testing.T) {
{`CAST('123' AS int) + 1`, `124`},
{`CAST(NULL AS int)`, `NULL`},
{`'hello'::char(2)`, `'he'`},
{`'hello'::bytes`, `b'hello'`},
{`'hello'::bytes`, `'\x68656c6c6f'`},
{`b'hello'::string`, `'hello'`},
{`b'\xff'`, `b'\xff'`},
{`b'\xff'`, `'\xff'`},
{`123::text`, `'123'`},
{`date '2010-09-28'`, `'2010-09-28'`},
{`CAST('2010-09-28' AS date)`, `'2010-09-28'`},
Expand Down Expand Up @@ -673,7 +673,7 @@ func TestEval(t *testing.T) {
{`10123456::int::interval`, `'10s123ms456µs'`},
// Type annotation expressions.
{`ANNOTATE_TYPE('s', string)`, `'s'`},
{`ANNOTATE_TYPE('s', bytes)`, `b's'`},
{`ANNOTATE_TYPE('s', bytes)`, `'\x73'`},
{`ANNOTATE_TYPE('2010-09-28', date)`, `'2010-09-28'`},
{`ANNOTATE_TYPE('PT12H2M', interval)`, `'12h2m'`},
{`ANNOTATE_TYPE('2 02:12', interval)`, `'2d2h12m'`},
Expand Down Expand Up @@ -1063,7 +1063,6 @@ func TestEvalError(t *testing.T) {
`could not parse "abcd" as type interval: interval: missing unit`},
{`'1- 2:3:4 9'::interval`,
`could not parse "1- 2:3:4 9" as type interval: invalid input syntax for type interval 1- 2:3:4 9`},
{`b'\xff\xfe\xfd'::string`, `invalid UTF-8: "\xff\xfe\xfd"`},
{`e'\\x6sdfsd36174'::BYTES`, `could not parse "\\x6sdfsd36174" as type bytes: encoding/hex: odd length hex string`},
{`ARRAY[NULL, ARRAY[1, 2]]`, `multidimensional arrays must have array expressions with matching dimensions`},
{`ARRAY[ARRAY[1, 2], NULL]`, `multidimensional arrays must have array expressions with matching dimensions`},
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/parser/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func TestFormatExpr(t *testing.T) {
{`'abc'`, FmtShowTypes,
`('abc')[string]`},
{`b'abc'`, FmtShowTypes,
`(b'abc')[bytes]`},
`('\x616263')[bytes]`},
{`interval '3s'`, FmtShowTypes,
`('3s')[interval]`},
{`date '2003-01-01'`, FmtShowTypes,
Expand Down

0 comments on commit d66d914

Please sign in to comment.