diff --git a/ceto/codegen.py b/ceto/codegen.py index 6bf0022..8f66131 100755 --- a/ceto/codegen.py +++ b/ceto/codegen.py @@ -2143,6 +2143,10 @@ def codegen_call(node: Call, cx: Scope): return "namespace " + name_code + " {\n" + block_code + "\n}" elif func_name == "defmacro": return "\n" + elif func_name == "overparenthesized_decltype" and len(node.args) == 1: + # calling plain "decltype" in ceto will always strip outer double parenthesese + # (they are often accidentally added by codegen's overeager parenthesization) + return "decltype((" + codegen_node(node.args[0], cx) + "))" else: arg_strs = [codegen_node(a, cx) for a in node.args] args_inner = ", ".join(arg_strs) @@ -2782,7 +2786,7 @@ def codegen_node(node: Node, cx: Scope): # no longer necessary CTAD reimplementation: # return "std::vector<{}>{{{}}}".format(decltype_str(node.args[0], cx), ", ".join(elements)) else: - raise CodeGenError("Cannot create vector without elements") + raise CodeGenError("Cannot create vector without elements", node) elif isinstance(node, BracedLiteral): if isinstance(node.parent, Block): raise CodeGenError("Curly brace expression is invalid here. Use 'scope' for an anonymous scope.", node) diff --git a/ceto/compiler.py b/ceto/compiler.py index 2af113b..ec2e4f9 100755 --- a/ceto/compiler.py +++ b/ceto/compiler.py @@ -165,6 +165,7 @@ def main(): ap.add_argument("-m", "--compileonly", action='store_true', help="Compile ceto code only. Do not compile C++. Do not run program.") ap.add_argument("--donotexecute", action='store_true', help="If compiling C++, do not attempt to run an executable") ap.add_argument("--_nostandardlibmacros", action='store_true', help="Do not include standard lib macros during compilation (not recommended unless compiling the standard lib macros themselves)") + ap.add_argument("--_norefs", action='store_true', help="Enable experimental mode to ban unsafe use of C++ references (without unsafe annotation). Currently implemented: ban all C++ references from subexpressions: An expression returning a reference must either be discarded or must be on the lhs of an Assignment (requiring a 'ref' type annotion if the reference is to be preserved instead of a copy). TODO: additional unsafe annotation for const:ref / mut:ref locals/members and mut:ref params") ap.add_argument("-I", "--include", type=str, nargs="*", help="Additional search directory for ceto headers (.cth files). Directory of transpiled file (first positional arg) takes priority in search.") ap.add_argument("filename") ap.add_argument("args", nargs="*") diff --git a/ceto/semanticanalysis.py b/ceto/semanticanalysis.py index 7d532dd..323e632 100644 --- a/ceto/semanticanalysis.py +++ b/ceto/semanticanalysis.py @@ -91,6 +91,121 @@ def visitor(node): return visitor(node) +def no_references_in_subexpressions(node): + + from ceto.compiler import cmdargs + if not cmdargs._norefs: + return node + + if isinstance(node, Template) and node.func.name == "include": + return None + + if isinstance(node, BinOp) and node.op == "in" and node.parent.func and node.parent.func.name == "for": + rhs = no_references_in_subexpressions(node.rhs) + if rhs: + node.args = [node.lhs, rhs] + return node + + if isinstance(node, BinOp) and isinstance(node.parent, Block) and node.op in ["+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "~=", ">>=", "<<="]: + rhs = no_references_in_subexpressions(node.rhs) + if rhs: + node.args = [node.lhs, rhs] + return node + + new_args = [] + found_new = False + for a in node.args: + new = no_references_in_subexpressions(a) + if new: + new_args.append(new) + found_new = True + else: + new_args.append(a) + + if found_new: + node.args = new_args + + if node.func: + new = no_references_in_subexpressions(node.func) + if new: + node.func = new + + if isinstance(node, Module): + return node + + # Note we should do a better job banning Assign in subexpressions even though the Assign vs NamedParameter + # distinction was meant to tackle this in the simple case of x=y in a Call + if isinstance(node, (Block, TypeOp, ListLiteral, BracedLiteral, ScopeResolution, Template, Identifier, Assign, NamedParameter, IntegerLiteral, FloatLiteral, StringLiteral)): + return None + + if isinstance(node.parent, (Block, TypeOp)): + return None + + if isinstance(node.parent, (Call, BracedCall, Template, ArrayAccess)) and node is node.parent.func: + return None + + if isinstance(node.parent, Call) and node.parent.func.name in ["if", "while"]: + return None + + if isinstance(node.parent, Call) and is_def_or_class_like(node.parent): + return None + + if isinstance(node, Call) and is_call_lambda(node): + return None + + if isinstance(node, Call): + pass + + if isinstance(node.parent, Call) and node.parent.func.name == "for": + return None + + if isinstance(node.parent, ArrayAccess) and node.parent.func.name == "lambda": + return None + + if isinstance(node, AttributeAccess): + attr_lhs = node.lhs + while isinstance(attr_lhs, (AttributeAccess, ScopeResolution)): + attr_lhs = attr_lhs.lhs + + if isinstance(attr_lhs, Identifier) and attr_lhs.name != "self" and not attr_lhs.scope.find_def(attr_lhs): + # implicit scope resolution + return None + + # note that banning e.g. dereferenable (requires { *foo } ) types on the lhs of an assignment should use + # the "no implicit conversions in assignments" logic in codegen_assign + + if isinstance(node.parent, Assign): + # a C++ reference is ok as the type of the lhs of an assignment because C++ will perform a copy without + # an explicit reference type (which, for a local variable, will TODO require an unsafe annotation even if const) + return None + +# code = """(lambda[ref] (: +# static_assert(not std::is_reference_v) +# return ceto_private_placeholder +#):decltype(auto))()""" + +# from .parser import parse +# ban_references_lambda = parse(code).args[0] +# print(ban_references_lambda.ast_repr(ceto_evalable=False, preserve_source_loc=False)) +# import sys +# sys.exit(-1) + + clone = node.clone() + + ban_references_lambda = Call(TypeOp(":", [Call(ArrayAccess(Identifier("lambda", ), [Identifier("ref", ), ], ), [Block([Call(Identifier("static_assert", ), [UnOp("not", [ScopeResolution("::", [Identifier("std", ), Template(Identifier("is_reference_v", ), [Call(Identifier("overparenthesized_decltype", ), [clone, ], ), ], ), ], )], ), ], ), SyntaxTypeOp(":", [Identifier("return"), node], ), ], ), ], ), Call(Identifier("decltype", ), [Identifier("auto", ), ], ), ], ), [], ) + + #ban_references_lambda = Call(Identifier("CETO_BAN_REFS"), [node]) + + ban_references_lambda.parent = node.parent + node.parent = ban_references_lambda + clone.parent = ban_references_lambda + clone.scope = node.scope + ban_references_lambda.scope = node.scope + #ban_references_lambda = basic_semantic_analysis(ban_references_lambda) + + return ban_references_lambda + + def type_inorder_traversal(typenode: Node, func): if isinstance(typenode, TypeOp): if not type_inorder_traversal(typenode.lhs, func): @@ -1018,17 +1133,32 @@ def macro_expansion(expr: Module): return expr -def semantic_analysis(expr: Module): - assert isinstance(expr, Module) # enforced by parser +def basic_semantic_analysis(expr: Module) -> Module: - expr = one_liner_expander(expr) - expr = assign_to_named_parameter(expr) - expr = warn_and_remove_redundant_parenthesese(expr) expr = build_types(expr) expr = build_parents(expr) expr = apply_replacers(expr, [ScopeVisitor()]) expr = apply_replacers(expr, [ImplicitLambdaCaptureVisitor()]) + return expr + + +def semantic_analysis(expr: Module) -> Module: + assert isinstance(expr, Module) # enforced by parser + + expr = one_liner_expander(expr) + expr = assign_to_named_parameter(expr) + expr = warn_and_remove_redundant_parenthesese(expr) + + expr = basic_semantic_analysis(expr) + expr = no_references_in_subexpressions(expr) + + def noscope(n): + n.scope = None + return n + expr = replace_node(expr, noscope) + + expr = basic_semantic_analysis(expr) def defs(node): if not isinstance(node, Node): diff --git a/include/ceto.h b/include/ceto.h index 28f9351..1be35ce 100644 --- a/include/ceto.h +++ b/include/ceto.h @@ -73,6 +73,8 @@ static inline void issue_null_deref_message() { #endif +#define CETO_BAN_REFS(expr) [&]() -> decltype(auto) { static_assert(!(std::is_reference_v)); return expr; }() + // mad = maybe allow deref // Based on answer of user Nawaz at https://stackoverflow.com/questions/14466620/c-template-specialization-calling-methods-on-types-that-could-be-pointers-or?noredirect=1&lq=1 diff --git a/tests/regression/ban_refs_for_loop.ctp b/tests/regression/ban_refs_for_loop.ctp new file mode 100644 index 0000000..f8220ad --- /dev/null +++ b/tests/regression/ban_refs_for_loop.ctp @@ -0,0 +1,30 @@ +# Test Output 1 +# Test Output 2 +# Test Output 3 +# Test Output 123 + +# requires --_norefs + +class (Foo: + a = [1, 2, 3] + + def (foo: + #for (x in self.a: # static_assert this->a is_reference_v (same with self.a but self rewritten to this) + #for (x in this->a: # same + a = self.a + for (x in a: + std.cout << x # << std.endl # static_assert std.cout << x is_reference_v + std.cout << "\n" + ) + # unsafe:mut:auto:ref:ref in the future! + b: mut:auto:ref:ref = self.a + for (x in b: + std.cout << x + ) + ) +) + +def (main: + f = Foo() + f.foo() +) diff --git a/tests/regression/ban_refs_for_loop.donotedit.autogenerated.cpp b/tests/regression/ban_refs_for_loop.donotedit.autogenerated.cpp new file mode 100644 index 0000000..5b1918b --- /dev/null +++ b/tests/regression/ban_refs_for_loop.donotedit.autogenerated.cpp @@ -0,0 +1,46 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include // for <=> +#include +#include + + +#include "ceto.h" +//#include "ceto_private_boundscheck.donotedit.autogenerated.h" + +#include "ceto_private_listcomp.donotedit.autogenerated.h" +; +#include "ceto_private_boundscheck.donotedit.autogenerated.h" +; +#include "ceto_private_convenience.donotedit.autogenerated.h" +; +struct Foo : public ceto::shared_object, public std::enable_shared_from_this { + + decltype(std::vector {{1, 2, 3}}) a = std::vector {{1, 2, 3}}; + + inline auto foo() const -> void { + const auto a = (this -> a); + for(const auto& x : a) { + std::cout << x; + std::cout << "\n"; + } + auto && b { (this -> a) } ; + for(const auto& x : b) { + std::cout << x; + } + } + +}; + + auto main() -> int { + const auto f = std::make_shared(); + (*ceto::mad(f)).foo(); + } +