diff --git a/docs/stdlib/bytes.rst b/docs/stdlib/bytes.rst index 3e85e4892a9..726e58014b1 100644 --- a/docs/stdlib/bytes.rst +++ b/docs/stdlib/bytes.rst @@ -33,9 +33,7 @@ Bytes .. eql:type:: std::bytes - A sequence of bytes. - - Bytes cannot be cast into any other type. They represent raw data. + A sequence of bytes representing raw data. There's a special byte literal: @@ -54,6 +52,15 @@ Bytes db> SELECT contains(b'qwerty', b'42'); {false} + It is possible to :eql:op:`cast ` between :eql:type:`bytes` and + :eql:type:`json`. Bytes are represented as base64 encoded strings in json.: + + .. code-block:: edgeql-repl + + db> SELECT SELECT b'Hello EdgeDB!'; + {"\"SGVsbG8gRWRnZURCIQ==\""} + db> SELECT to_json("\"SGVsbG8gRWRnZURCIQ==\""); + {b'Hello EdgeDB!'} ---------- diff --git a/docs/stdlib/json.rst b/docs/stdlib/json.rst index a56611bb505..46993dd6d4e 100644 --- a/docs/stdlib/json.rst +++ b/docs/stdlib/json.rst @@ -119,8 +119,7 @@ possible to cast a JSON value directly into a :eql:type:`tuple`. Arbitrary JSON data. - Any other type (except for :eql:type:`bytes`) can be - :eql:op:`cast ` to and from JSON: + Any other type can be :eql:op:`cast ` to and from JSON: .. code-block:: edgeql-repl diff --git a/edb/edgeql/compiler/casts.py b/edb/edgeql/compiler/casts.py index 34c396f91db..522c7a2cb15 100644 --- a/edb/edgeql/compiler/casts.py +++ b/edb/edgeql/compiler/casts.py @@ -92,6 +92,9 @@ def compile_cast( f'`...[IS {new_stype.get_displayname(ctx.env.schema)}]` instead', context=srcctx) + json_t = ctx.env.get_track_schema_type( + sn.QualName('std', 'json')) + if isinstance(ir_set.expr, irast.Array): return _cast_array_literal( ir_set, orig_stype, new_stype, srcctx=srcctx, ctx=ctx) @@ -124,8 +127,6 @@ def compile_cast( cardinality_mod=cardinality_mod, ctx=ctx) else: - json_t = ctx.env.get_track_schema_type( - sn.QualName('std', 'json')) if (new_stype.issubclass(ctx.env.schema, json_t) and ir_set.path_id.is_objtype_path()): # JSON casts of objects are special: we want the full shape @@ -626,8 +627,15 @@ def _cast_array_literal( context=srcctx) from None assert isinstance(new_stype, s_types.Array) el_type = new_stype.get_subtypes(ctx.env.schema)[0] + intermediate_stype = orig_stype + else: el_type = new_stype + ctx.env.schema, intermediate_stype = s_types.Array.from_subtypes( + ctx.env.schema, [el_type]) + + intermediate_typeref = typegen.type_to_typeref( + intermediate_stype, env=ctx.env) casted_els = [] for el in ir_set.expr.elements: @@ -637,12 +645,12 @@ def _cast_array_literal( casted_els.append(el) new_array = setgen.ensure_set( - irast.Array(elements=casted_els, typeref=orig_typeref), + irast.Array(elements=casted_els, typeref=intermediate_typeref), ctx=ctx) if direct_cast is not None: return _cast_to_ir( - new_array, direct_cast, orig_stype, new_stype, ctx=ctx) + new_array, direct_cast, intermediate_stype, new_stype, ctx=ctx) else: cast_ir = irast.TypeCast( diff --git a/edb/ir/ast.py b/edb/ir/ast.py index 53369cd9d9e..285d4bb9477 100644 --- a/edb/ir/ast.py +++ b/edb/ir/ast.py @@ -167,6 +167,10 @@ def __repr__(self) -> str: def real_material_type(self) -> TypeRef: return self.material_type or self + @property + def real_base_type(self) -> TypeRef: + return self.base_type or self + def __eq__(self, other: object) -> bool: if not isinstance(other, self.__class__): return False diff --git a/edb/ir/typeutils.py b/edb/ir/typeutils.py index 9b55673b26c..e945a667e88 100644 --- a/edb/ir/typeutils.py +++ b/edb/ir/typeutils.py @@ -32,6 +32,7 @@ from edb.schema import scalars as s_scalars from edb.schema import types as s_types from edb.schema import objtypes as s_objtypes +from edb.schema import objects as s_obj from edb.schema import utils as s_utils from . import ast as irast @@ -104,6 +105,16 @@ def is_abstract(typeref: irast.TypeRef) -> bool: return typeref.is_abstract +def is_json(typeref: irast.TypeRef) -> bool: + """Return True if *typeref* describes the json type.""" + return typeref.real_base_type.id == s_obj.get_known_type_id('std::json') + + +def is_bytes(typeref: irast.TypeRef) -> bool: + """Return True if *typeref* describes the bytes type.""" + return typeref.real_base_type.id == s_obj.get_known_type_id('std::bytes') + + def is_persistent_tuple(typeref: irast.TypeRef) -> bool: if is_tuple(typeref): if typeref.material_type is not None: diff --git a/edb/lib/std/30-jsonfuncs.edgeql b/edb/lib/std/30-jsonfuncs.edgeql index 66993170910..19cc6ab1b03 100644 --- a/edb/lib/std/30-jsonfuncs.edgeql +++ b/edb/lib/std/30-jsonfuncs.edgeql @@ -261,6 +261,14 @@ CREATE CAST FROM std::bool TO std::json { }; +CREATE CAST FROM std::bytes TO std::json { + SET volatility := 'Stable'; + USING SQL $$ + SELECT to_jsonb(encode(val, 'base64')); + $$; +}; + + CREATE CAST FROM std::uuid TO std::json { SET volatility := 'Stable'; USING SQL FUNCTION 'to_jsonb'; @@ -337,6 +345,14 @@ CREATE CAST FROM std::json TO std::uuid { }; +CREATE CAST FROM std::json TO std::bytes { + SET volatility := 'Stable'; + USING SQL $$ + SELECT decode(edgedb.jsonb_extract_scalar(val, 'string'), 'base64')::bytea; + $$; +}; + + CREATE CAST FROM std::json TO std::str { SET volatility := 'Stable'; USING SQL $$ diff --git a/edb/pgsql/compiler/output.py b/edb/pgsql/compiler/output.py index 9f1b195c27c..b0b2ce2fba6 100644 --- a/edb/pgsql/compiler/output.py +++ b/edb/pgsql/compiler/output.py @@ -28,8 +28,10 @@ from edb.ir import typeutils as irtyputils from edb.schema import defines as s_defs +from edb.schema import casts as s_casts from edb.pgsql import ast as pgast +from edb.pgsql import common from edb.pgsql import types as pgtypes from . import astutils @@ -152,39 +154,54 @@ def array_as_json_object( ) -> pgast.BaseExpr: el_type = styperef.subtypes[0] - if irtyputils.is_tuple(el_type): + is_tuple = irtyputils.is_tuple(el_type) + # Tuples and bytes might need underlying casts to be done + if is_tuple or irtyputils.is_bytes(el_type): coldeflist = [] - json_args: List[pgast.BaseExpr] = [] - is_named = any(st.element_name for st in el_type.subtypes) - - for i, st in enumerate(el_type.subtypes): - if is_named: - colname = st.element_name - assert colname - json_args.append(pgast.StringConstant(val=colname)) - else: - colname = str(i) - - val: pgast.BaseExpr = pgast.ColumnRef(name=[colname]) - if irtyputils.is_collection(st): - val = coll_as_json_object(val, styperef=st, env=env) - - json_args.append(val) - - if not irtyputils.is_persistent_tuple(el_type): - # Column definition list is only allowed for functions - # returning "record", i.e. an anonymous tuple, which - # would not be the case for schema-persistent tuple types. - coldeflist.append( - pgast.ColumnDef( - name=colname, - typename=pgast.TypeName( - name=pgtypes.pg_type_from_ir_typeref(st) + + out_alias = env.aliases.get('q') + + val: pgast.BaseExpr + if is_tuple: + json_args: List[pgast.BaseExpr] = [] + is_named = any(st.element_name for st in el_type.subtypes) + for i, st in enumerate(el_type.subtypes): + if is_named: + colname = st.element_name + assert colname + json_args.append(pgast.StringConstant(val=colname)) + else: + colname = str(i) + + val = pgast.ColumnRef(name=[colname]) + val = serialize_expr_to_json( + val, styperef=st, nested=True, env=env) + + json_args.append(val) + + if not irtyputils.is_persistent_tuple(el_type): + # Column definition list is only allowed for functions + # returning "record", i.e. an anonymous tuple, which + # would not be the case for schema-persistent tuple types. + coldeflist.append( + pgast.ColumnDef( + name=colname, + typename=pgast.TypeName( + name=pgtypes.pg_type_from_ir_typeref(st) + ) ) ) - ) - json_func = 'build_object' if is_named else 'build_array' + json_func = 'build_object' if is_named else 'build_array' + agg_arg = _build_json(json_func, json_args, env=env) + + needs_unnest = bool(el_type.subtypes) + else: + assert not el_type.subtypes + val = pgast.ColumnRef(name=[out_alias]) + agg_arg = serialize_expr_to_json( + val, styperef=el_type, nested=True, env=env) + needs_unnest = True return pgast.SelectStmt( target_list=[ @@ -193,9 +210,7 @@ def array_as_json_object( args=[ pgast.FuncCall( name=_get_json_func('agg', env=env), - args=[ - _build_json(json_func, json_args, env=env) - ] + args=[agg_arg], ), pgast.StringConstant(val='[]'), ] @@ -205,9 +220,7 @@ def array_as_json_object( ], from_clause=[ pgast.RangeFunction( - alias=pgast.Alias( - aliasname=env.aliases.get('q'), - ), + alias=pgast.Alias(aliasname=out_alias), is_rowsfrom=True, functions=[ pgast.FuncCall( @@ -217,7 +230,7 @@ def array_as_json_object( ) ] ) - ] if el_type.subtypes else [], + ] if needs_unnest else [], ) else: return pgast.FuncCall( @@ -255,8 +268,8 @@ def unnamed_tuple_as_json_object( ), ], ) - if irtyputils.is_collection(el_type): - val = coll_as_json_object(val, styperef=el_type, env=env) + val = serialize_expr_to_json( + val, styperef=el_type, nested=True, env=env) vals.append(val) return _build_json( @@ -282,8 +295,8 @@ def unnamed_tuple_as_json_object( val = pgast.ColumnRef(name=[str(el_idx)]) - if irtyputils.is_collection(el_type): - val = coll_as_json_object(val, styperef=el_type, env=env) + val = serialize_expr_to_json( + val, styperef=el_type, nested=True, env=env) vals.append(val) @@ -340,8 +353,8 @@ def named_tuple_as_json_object( ) ] ) - if irtyputils.is_collection(el_type): - val = coll_as_json_object(val, styperef=el_type, env=env) + val = serialize_expr_to_json( + val, styperef=el_type, nested=True, env=env) keyvals.append(val) return _build_json( @@ -369,8 +382,8 @@ def named_tuple_as_json_object( val = pgast.ColumnRef(name=[el_type.element_name]) - if irtyputils.is_collection(el_type): - val = coll_as_json_object(val, styperef=el_type, env=env) + val = serialize_expr_to_json( + val, styperef=el_type, nested=True, env=env) keyvals.append(val) @@ -410,7 +423,7 @@ def named_tuple_as_json_object( def tuple_var_as_json_object( tvar: pgast.TupleVar, *, - path_id: irast.PathId, + styperef: irast.TypeRef, env: context.Environment, ) -> pgast.BaseExpr: @@ -512,14 +525,14 @@ def serialize_expr_if_needed( def serialize_expr_to_json( expr: pgast.BaseExpr, *, - path_id: irast.PathId, + styperef: irast.TypeRef, nested: bool=False, env: context.Environment) -> pgast.BaseExpr: val: pgast.BaseExpr if isinstance(expr, pgast.TupleVar): - val = tuple_var_as_json_object(expr, path_id=path_id, env=env) + val = tuple_var_as_json_object(expr, styperef=styperef, env=env) elif isinstance(expr, (pgast.RowExpr, pgast.ImplicitRowExpr)): val = _build_json( @@ -530,8 +543,20 @@ def serialize_expr_to_json( env=env, ) - elif path_id.is_collection_path() and not expr.ser_safe: - val = coll_as_json_object(expr, styperef=path_id.target, env=env) + elif irtyputils.is_collection(styperef) and not expr.ser_safe: + val = coll_as_json_object(expr, styperef=styperef, env=env) + + # TODO: We'll probably want to generalize this to other custom JSON + # casts once they exist. + elif ( + irtyputils.is_bytes(styperef) + and not expr.ser_safe + ): + cast_name = s_casts.get_cast_fullname_from_names( + 'std', 'std::bytes', 'std::json') + val = pgast.FuncCall( + name=common.get_cast_backend_name(cast_name, aspect='function'), + args=[expr], null_safe=True, ser_safe=True) elif not nested: val = pgast.FuncCall( @@ -554,7 +579,7 @@ def serialize_expr( context.OutputFormat.JSON_ELEMENTS, context.OutputFormat.JSONB): val = serialize_expr_to_json( - expr, path_id=path_id, nested=nested, env=env) + expr, styperef=path_id.target, nested=nested, env=env) elif env.output_format in (context.OutputFormat.NATIVE, context.OutputFormat.NATIVE_INTERNAL, diff --git a/edb/pgsql/compiler/relgen.py b/edb/pgsql/compiler/relgen.py index 269afd87899..67b200e098a 100644 --- a/edb/pgsql/compiler/relgen.py +++ b/edb/pgsql/compiler/relgen.py @@ -1832,7 +1832,7 @@ def process_set_as_type_cast( if serialized is not None: if irtyputils.is_collection(inner_set.typeref): serialized = output.serialize_expr_to_json( - serialized, path_id=inner_set.path_id, + serialized, styperef=inner_set.path_id.target, env=subctx.env) pathctx.put_path_value_var( diff --git a/edb/schema/casts.py b/edb/schema/casts.py index b274332a1e8..7272eff60a9 100644 --- a/edb/schema/casts.py +++ b/edb/schema/casts.py @@ -173,13 +173,12 @@ def is_castable( return False -def get_cast_fullname( - schema: s_schema.Schema, +def get_cast_fullname_from_names( module: str, - from_type: s_types.TypeShell[s_types.Type], - to_type: s_types.TypeShell[s_types.Type], + from_type: str, + to_type: str, ) -> sn.QualName: - quals = [str(from_type.get_name(schema)), str(to_type.get_name(schema))] + quals = [from_type, to_type] shortname = sn.QualName(module, 'cast') return sn.QualName( module=shortname.module, @@ -187,6 +186,19 @@ def get_cast_fullname( ) +def get_cast_fullname( + schema: s_schema.Schema, + module: str, + from_type: s_types.TypeShell[s_types.Type], + to_type: s_types.TypeShell[s_types.Type], +) -> sn.QualName: + return get_cast_fullname_from_names( + module, + str(from_type.get_name(schema)), + str(to_type.get_name(schema)), + ) + + class Cast( so.QualifiedObject, s_anno.AnnotationSubject, diff --git a/edb/testbase/serutils.py b/edb/testbase/serutils.py index 9d48886b319..ae14b44f21e 100644 --- a/edb/testbase/serutils.py +++ b/edb/testbase/serutils.py @@ -94,6 +94,7 @@ def _stringify(o): @serialize.register(int) @serialize.register(float) @serialize.register(str) +@serialize.register(bytes) @serialize.register(bool) @serialize.register(type(None)) @serialize.register(decimal.Decimal) diff --git a/edb/testbase/server.py b/edb/testbase/server.py index e863eef2d35..21aca6eb5d4 100644 --- a/edb/testbase/server.py +++ b/edb/testbase/server.py @@ -945,8 +945,8 @@ def _assert_generic_shape(path, data, shape): self.fail( f'{message}: {data!r} != {shape!r} ' f'{_format_path(path)}') - elif isinstance(shape, (str, int, timedelta, decimal.Decimal, - edgedb.RelativeDuration)): + elif isinstance(shape, (str, int, bytes, timedelta, + decimal.Decimal, edgedb.RelativeDuration)): if data != shape: self.fail( f'{message}: {data!r} != {shape!r} ' diff --git a/tests/test_edgeql_casts.py b/tests/test_edgeql_casts.py index d250b2df47b..01ff0f49ddd 100644 --- a/tests/test_edgeql_casts.py +++ b/tests/test_edgeql_casts.py @@ -81,9 +81,26 @@ async def test_edgeql_casts_bytes_03(self): async def test_edgeql_casts_bytes_04(self): async with self.assertRaisesRegexTx( - edgedb.QueryError, r'cannot cast'): - await self.con.execute(""" - SELECT to_json('1'); + edgedb.InvalidValueError, r'expected json string or null'): + await self.con.query_one("""SELECT to_json('1');"""), + + self.assertEqual( + await self.con.query_one(r''' + SELECT to_json('"aGVsbG8="'); + '''), + b'hello', + ) + + async with self.assertRaisesRegexTx( + edgedb.InvalidValueError, r'invalid symbol'): + await self.con.query_one(""" + SELECT to_json('"not base64!"'); + """) + + async with self.assertRaisesRegexTx( + edgedb.InvalidValueError, r'invalid base64 end sequence'): + await self.con.query_one(""" + SELECT to_json('"a"'); """) async def test_edgeql_casts_bytes_05(self): diff --git a/tests/test_edgeql_json.py b/tests/test_edgeql_json.py index 41a53f81865..1cd277b484b 100644 --- a/tests/test_edgeql_json.py +++ b/tests/test_edgeql_json.py @@ -1379,12 +1379,113 @@ async def test_edgeql_json_slice_03(self): """) async def test_edgeql_json_bytes_cast_01(self): - async with self.assertRaisesRegexTx( - edgedb.QueryError, r'cannot cast.*bytes.*to.*json.*'): + await self.assert_query_result( + r"""SELECT b'foo';""", + ['Zm9v'], + ['"Zm9v"'], + ) - await self.con.execute(r""" - SELECT b'foo'; - """) + await self.assert_query_result( + r"""SELECT (foo := b'hello', bar := [b'world']);""", + [{'bar': ['d29ybGQ='], 'foo': 'aGVsbG8='}], + ['{"bar": ["d29ybGQ="], "foo": "aGVsbG8="}'], + ) + + await self.assert_query_result( + r"""SELECT { x := b'hello' };""", + [{'x': 'aGVsbG8='}], + ['{"x": "aGVsbG8="}'], + ) + + await self.assert_query_result( + r"""SELECT [b'foo'];""", + [['Zm9v']], + ['["Zm9v"]'], + ) + + await self.assert_query_result( + r"""SELECT (b'foo',)""", + [['Zm9v']], + ['["Zm9v"]'], + ) + + await self.assert_query_result( + r"""SELECT [(b'foo',)][0]""", + [['Zm9v']], + ['["Zm9v"]'], + ) + + await self.assert_query_result( + r"""SELECT (a := b'foo')""", + [{"a": "Zm9v"}], + ['{"a": "Zm9v"}'], + ) + + await self.assert_query_result( + r"""SELECT [(a := b'foo')][0]""", + [{"a": "Zm9v"}], + ['{"a": "Zm9v"}'], + ) + + async def test_edgeql_json_bytes_output_01(self): + await self.assert_query_result( + r"""SELECT b'foo';""", + ['Zm9v'], + [b'foo'], + ) + + await self.assert_query_result( + r"""SELECT { x := b'hello' };""", + [{'x': 'aGVsbG8='}], + [{'x': b'hello'}], + ) + + await self.assert_query_result( + r"""SELECT (b'foo',)""", + [['Zm9v']], + [[b'foo']], + ) + + await self.assert_query_result( + r"""SELECT [(b'foo',)][0]""", + [['Zm9v']], + [[b'foo']], + ) + + await self.assert_query_result( + r"""SELECT (a := b'foo')""", + [{"a": "Zm9v"}], + [{"a": b'foo'}], + ) + + await self.assert_query_result( + r"""SELECT [(a := b'foo')][0]""", + [{"a": "Zm9v"}], + [{"a": b'foo'}], + ) + + await self.assert_query_result( + r"""SELECT [b'foo'];""", + [['Zm9v']], + [[b'foo']], + ) + + await self.assert_query_result( + r"""SELECT (foo := b'hello', bar := [b'world']);""", + [{'bar': ['d29ybGQ='], 'foo': 'aGVsbG8='}], + [{'bar': [b'world'], 'foo': b'hello'}], + ) + + async def test_edgeql_json_bytes_output_02(self): + await self.con.execute(r''' + CREATE SCALAR TYPE bytes2 EXTENDING bytes; + ''') + + await self.assert_query_result( + r"""SELECT [b'foo'];""", + [['Zm9v']], + [[b'foo']], + ) async def test_edgeql_json_alias_01(self): await self.assert_query_result(