Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement casting between bytes and json #2482

Merged
merged 3 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions docs/stdlib/bytes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ Bytes

.. eql:type:: std::bytes

A sequence of bytes.

Bytes cannot be cast into any other type. They represent raw data.
A sequence of bytes representing raw data.

There's a special byte literal:

Expand All @@ -54,6 +52,15 @@ Bytes
db> SELECT contains(b'qwerty', b'42');
{false}

It is possible to :eql:op:`cast <CAST>` between :eql:type:`bytes` and
:eql:type:`json`. Bytes are represented as base64 encoded strings in json.:

.. code-block:: edgeql-repl

db> SELECT SELECT <json>b'Hello EdgeDB!';
{"\"SGVsbG8gRWRnZURCIQ==\""}
db> SELECT <bytes>to_json("\"SGVsbG8gRWRnZURCIQ==\"");
{b'Hello EdgeDB!'}

----------

Expand Down
3 changes: 1 addition & 2 deletions docs/stdlib/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ possible to cast a JSON value directly into a :eql:type:`tuple`.

Arbitrary JSON data.

Any other type (except for :eql:type:`bytes`) can be
:eql:op:`cast <CAST>` to and from JSON:
Any other type can be :eql:op:`cast <CAST>` to and from JSON:

.. code-block:: edgeql-repl

Expand Down
16 changes: 12 additions & 4 deletions edb/edgeql/compiler/casts.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ def compile_cast(
f'`...[IS {new_stype.get_displayname(ctx.env.schema)}]` instead',
context=srcctx)

json_t = ctx.env.get_track_schema_type(
sn.QualName('std', 'json'))

if isinstance(ir_set.expr, irast.Array):
return _cast_array_literal(
ir_set, orig_stype, new_stype, srcctx=srcctx, ctx=ctx)
Expand Down Expand Up @@ -124,8 +127,6 @@ def compile_cast(
cardinality_mod=cardinality_mod, ctx=ctx)

else:
json_t = ctx.env.get_track_schema_type(
sn.QualName('std', 'json'))
if (new_stype.issubclass(ctx.env.schema, json_t) and
ir_set.path_id.is_objtype_path()):
# JSON casts of objects are special: we want the full shape
Expand Down Expand Up @@ -626,8 +627,15 @@ def _cast_array_literal(
context=srcctx) from None
assert isinstance(new_stype, s_types.Array)
el_type = new_stype.get_subtypes(ctx.env.schema)[0]
intermediate_stype = orig_stype

else:
el_type = new_stype
ctx.env.schema, intermediate_stype = s_types.Array.from_subtypes(
ctx.env.schema, [el_type])

intermediate_typeref = typegen.type_to_typeref(
intermediate_stype, env=ctx.env)

casted_els = []
for el in ir_set.expr.elements:
Expand All @@ -637,12 +645,12 @@ def _cast_array_literal(
casted_els.append(el)

new_array = setgen.ensure_set(
irast.Array(elements=casted_els, typeref=orig_typeref),
irast.Array(elements=casted_els, typeref=intermediate_typeref),
ctx=ctx)

if direct_cast is not None:
return _cast_to_ir(
new_array, direct_cast, orig_stype, new_stype, ctx=ctx)
new_array, direct_cast, intermediate_stype, new_stype, ctx=ctx)

else:
cast_ir = irast.TypeCast(
Expand Down
4 changes: 4 additions & 0 deletions edb/ir/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ def __repr__(self) -> str:
def real_material_type(self) -> TypeRef:
return self.material_type or self

@property
def real_base_type(self) -> TypeRef:
return self.base_type or self

def __eq__(self, other: object) -> bool:
if not isinstance(other, self.__class__):
return False
Expand Down
11 changes: 11 additions & 0 deletions edb/ir/typeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from edb.schema import scalars as s_scalars
from edb.schema import types as s_types
from edb.schema import objtypes as s_objtypes
from edb.schema import objects as s_obj
from edb.schema import utils as s_utils

from . import ast as irast
Expand Down Expand Up @@ -104,6 +105,16 @@ def is_abstract(typeref: irast.TypeRef) -> bool:
return typeref.is_abstract


def is_json(typeref: irast.TypeRef) -> bool:
"""Return True if *typeref* describes the json type."""
return typeref.real_base_type.id == s_obj.get_known_type_id('std::json')


def is_bytes(typeref: irast.TypeRef) -> bool:
"""Return True if *typeref* describes the bytes type."""
return typeref.real_base_type.id == s_obj.get_known_type_id('std::bytes')


def is_persistent_tuple(typeref: irast.TypeRef) -> bool:
if is_tuple(typeref):
if typeref.material_type is not None:
Expand Down
16 changes: 16 additions & 0 deletions edb/lib/std/30-jsonfuncs.edgeql
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,14 @@ CREATE CAST FROM std::bool TO std::json {
};


CREATE CAST FROM std::bytes TO std::json {
SET volatility := 'Stable';
USING SQL $$
SELECT to_jsonb(encode(val, 'base64'));
$$;
};


CREATE CAST FROM std::uuid TO std::json {
SET volatility := 'Stable';
USING SQL FUNCTION 'to_jsonb';
Expand Down Expand Up @@ -337,6 +345,14 @@ CREATE CAST FROM std::json TO std::uuid {
};


CREATE CAST FROM std::json TO std::bytes {
SET volatility := 'Stable';
USING SQL $$
SELECT decode(edgedb.jsonb_extract_scalar(val, 'string'), 'base64')::bytea;
$$;
};


CREATE CAST FROM std::json TO std::str {
SET volatility := 'Stable';
USING SQL $$
Expand Down
125 changes: 75 additions & 50 deletions edb/pgsql/compiler/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@
from edb.ir import typeutils as irtyputils

from edb.schema import defines as s_defs
from edb.schema import casts as s_casts

from edb.pgsql import ast as pgast
from edb.pgsql import common
from edb.pgsql import types as pgtypes

from . import astutils
Expand Down Expand Up @@ -152,39 +154,54 @@ def array_as_json_object(
) -> pgast.BaseExpr:
el_type = styperef.subtypes[0]

if irtyputils.is_tuple(el_type):
is_tuple = irtyputils.is_tuple(el_type)
# Tuples and bytes might need underlying casts to be done
if is_tuple or irtyputils.is_bytes(el_type):
coldeflist = []
json_args: List[pgast.BaseExpr] = []
is_named = any(st.element_name for st in el_type.subtypes)

for i, st in enumerate(el_type.subtypes):
if is_named:
colname = st.element_name
assert colname
json_args.append(pgast.StringConstant(val=colname))
else:
colname = str(i)

val: pgast.BaseExpr = pgast.ColumnRef(name=[colname])
if irtyputils.is_collection(st):
val = coll_as_json_object(val, styperef=st, env=env)

json_args.append(val)

if not irtyputils.is_persistent_tuple(el_type):
# Column definition list is only allowed for functions
# returning "record", i.e. an anonymous tuple, which
# would not be the case for schema-persistent tuple types.
coldeflist.append(
pgast.ColumnDef(
name=colname,
typename=pgast.TypeName(
name=pgtypes.pg_type_from_ir_typeref(st)

out_alias = env.aliases.get('q')

val: pgast.BaseExpr
if is_tuple:
json_args: List[pgast.BaseExpr] = []
is_named = any(st.element_name for st in el_type.subtypes)
for i, st in enumerate(el_type.subtypes):
if is_named:
colname = st.element_name
assert colname
json_args.append(pgast.StringConstant(val=colname))
else:
colname = str(i)

val = pgast.ColumnRef(name=[colname])
val = serialize_expr_to_json(
val, styperef=st, nested=True, env=env)

json_args.append(val)

if not irtyputils.is_persistent_tuple(el_type):
# Column definition list is only allowed for functions
# returning "record", i.e. an anonymous tuple, which
# would not be the case for schema-persistent tuple types.
coldeflist.append(
pgast.ColumnDef(
name=colname,
typename=pgast.TypeName(
name=pgtypes.pg_type_from_ir_typeref(st)
)
)
)
)

json_func = 'build_object' if is_named else 'build_array'
json_func = 'build_object' if is_named else 'build_array'
agg_arg = _build_json(json_func, json_args, env=env)

needs_unnest = bool(el_type.subtypes)
else:
assert not el_type.subtypes
val = pgast.ColumnRef(name=[out_alias])
agg_arg = serialize_expr_to_json(
val, styperef=el_type, nested=True, env=env)
needs_unnest = True

return pgast.SelectStmt(
target_list=[
Expand All @@ -193,9 +210,7 @@ def array_as_json_object(
args=[
pgast.FuncCall(
name=_get_json_func('agg', env=env),
args=[
_build_json(json_func, json_args, env=env)
]
args=[agg_arg],
),
pgast.StringConstant(val='[]'),
]
Expand All @@ -205,9 +220,7 @@ def array_as_json_object(
],
from_clause=[
pgast.RangeFunction(
alias=pgast.Alias(
aliasname=env.aliases.get('q'),
),
alias=pgast.Alias(aliasname=out_alias),
is_rowsfrom=True,
functions=[
pgast.FuncCall(
Expand All @@ -217,7 +230,7 @@ def array_as_json_object(
)
]
)
] if el_type.subtypes else [],
] if needs_unnest else [],
)
else:
return pgast.FuncCall(
Expand Down Expand Up @@ -255,8 +268,8 @@ def unnamed_tuple_as_json_object(
),
],
)
if irtyputils.is_collection(el_type):
val = coll_as_json_object(val, styperef=el_type, env=env)
val = serialize_expr_to_json(
val, styperef=el_type, nested=True, env=env)
vals.append(val)

return _build_json(
Expand All @@ -282,8 +295,8 @@ def unnamed_tuple_as_json_object(

val = pgast.ColumnRef(name=[str(el_idx)])

if irtyputils.is_collection(el_type):
val = coll_as_json_object(val, styperef=el_type, env=env)
val = serialize_expr_to_json(
val, styperef=el_type, nested=True, env=env)

vals.append(val)

Expand Down Expand Up @@ -340,8 +353,8 @@ def named_tuple_as_json_object(
)
]
)
if irtyputils.is_collection(el_type):
val = coll_as_json_object(val, styperef=el_type, env=env)
val = serialize_expr_to_json(
val, styperef=el_type, nested=True, env=env)
keyvals.append(val)

return _build_json(
Expand Down Expand Up @@ -369,8 +382,8 @@ def named_tuple_as_json_object(

val = pgast.ColumnRef(name=[el_type.element_name])

if irtyputils.is_collection(el_type):
val = coll_as_json_object(val, styperef=el_type, env=env)
val = serialize_expr_to_json(
val, styperef=el_type, nested=True, env=env)

keyvals.append(val)

Expand Down Expand Up @@ -410,7 +423,7 @@ def named_tuple_as_json_object(
def tuple_var_as_json_object(
tvar: pgast.TupleVar,
*,
path_id: irast.PathId,
styperef: irast.TypeRef,
env: context.Environment,
) -> pgast.BaseExpr:

Expand Down Expand Up @@ -512,14 +525,14 @@ def serialize_expr_if_needed(

def serialize_expr_to_json(
expr: pgast.BaseExpr, *,
path_id: irast.PathId,
styperef: irast.TypeRef,
nested: bool=False,
env: context.Environment) -> pgast.BaseExpr:

val: pgast.BaseExpr

if isinstance(expr, pgast.TupleVar):
val = tuple_var_as_json_object(expr, path_id=path_id, env=env)
val = tuple_var_as_json_object(expr, styperef=styperef, env=env)

elif isinstance(expr, (pgast.RowExpr, pgast.ImplicitRowExpr)):
val = _build_json(
Expand All @@ -530,8 +543,20 @@ def serialize_expr_to_json(
env=env,
)

elif path_id.is_collection_path() and not expr.ser_safe:
val = coll_as_json_object(expr, styperef=path_id.target, env=env)
elif irtyputils.is_collection(styperef) and not expr.ser_safe:
val = coll_as_json_object(expr, styperef=styperef, env=env)

# TODO: We'll probably want to generalize this to other custom JSON
# casts once they exist.
elif (
irtyputils.is_bytes(styperef)
and not expr.ser_safe
):
cast_name = s_casts.get_cast_fullname_from_names(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd add a TODO: note here to remind us to find a way to generalize this, since bytes wouldn't be the last scalar with a custom JSON cast.

'std', 'std::bytes', 'std::json')
val = pgast.FuncCall(
name=common.get_cast_backend_name(cast_name, aspect='function'),
args=[expr], null_safe=True, ser_safe=True)

elif not nested:
val = pgast.FuncCall(
Expand All @@ -554,7 +579,7 @@ def serialize_expr(
context.OutputFormat.JSON_ELEMENTS,
context.OutputFormat.JSONB):
val = serialize_expr_to_json(
expr, path_id=path_id, nested=nested, env=env)
expr, styperef=path_id.target, nested=nested, env=env)

elif env.output_format in (context.OutputFormat.NATIVE,
context.OutputFormat.NATIVE_INTERNAL,
Expand Down
2 changes: 1 addition & 1 deletion edb/pgsql/compiler/relgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1832,7 +1832,7 @@ def process_set_as_type_cast(
if serialized is not None:
if irtyputils.is_collection(inner_set.typeref):
serialized = output.serialize_expr_to_json(
serialized, path_id=inner_set.path_id,
serialized, styperef=inner_set.path_id.target,
env=subctx.env)

pathctx.put_path_value_var(
Expand Down
Loading