Skip to content

Commit

Permalink
experimental mode to ban unsafe use of C++ references (without unsafe…
Browse files Browse the repository at this point in the history
… annotation)

Currently implemented: ban all C++ references from subexpressions: An expression returning a reference must either be discarded or must be on the lhs of an Assignment (requiring a 'ref' type annotion if the reference is to be preserved instead of a copy). TODO: additional unsafe annotation for const:ref / mut:ref locals/members and mut:ref params.
  • Loading branch information
ehren committed Jan 7, 2025
1 parent 6860090 commit 9ddebb2
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 6 deletions.
6 changes: 5 additions & 1 deletion ceto/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2143,6 +2143,10 @@ def codegen_call(node: Call, cx: Scope):
return "namespace " + name_code + " {\n" + block_code + "\n}"
elif func_name == "defmacro":
return "\n"
elif func_name == "overparenthesized_decltype" and len(node.args) == 1:
# calling plain "decltype" in ceto will always strip outer double parenthesese
# (they are often accidentally added by codegen's overeager parenthesization)
return "decltype((" + codegen_node(node.args[0], cx) + "))"
else:
arg_strs = [codegen_node(a, cx) for a in node.args]
args_inner = ", ".join(arg_strs)
Expand Down Expand Up @@ -2782,7 +2786,7 @@ def codegen_node(node: Node, cx: Scope):
# no longer necessary CTAD reimplementation:
# return "std::vector<{}>{{{}}}".format(decltype_str(node.args[0], cx), ", ".join(elements))
else:
raise CodeGenError("Cannot create vector without elements")
raise CodeGenError("Cannot create vector without elements", node)
elif isinstance(node, BracedLiteral):
if isinstance(node.parent, Block):
raise CodeGenError("Curly brace expression is invalid here. Use 'scope' for an anonymous scope.", node)
Expand Down
1 change: 1 addition & 0 deletions ceto/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def main():
ap.add_argument("-m", "--compileonly", action='store_true', help="Compile ceto code only. Do not compile C++. Do not run program.")
ap.add_argument("--donotexecute", action='store_true', help="If compiling C++, do not attempt to run an executable")
ap.add_argument("--_nostandardlibmacros", action='store_true', help="Do not include standard lib macros during compilation (not recommended unless compiling the standard lib macros themselves)")
ap.add_argument("--_norefs", action='store_true', help="Enable experimental mode to ban unsafe use of C++ references (without unsafe annotation). Currently implemented: ban all C++ references from subexpressions: An expression returning a reference must either be discarded or must be on the lhs of an Assignment (requiring a 'ref' type annotion if the reference is to be preserved instead of a copy). TODO: additional unsafe annotation for const:ref / mut:ref locals/members and mut:ref params")
ap.add_argument("-I", "--include", type=str, nargs="*", help="Additional search directory for ceto headers (.cth files). Directory of transpiled file (first positional arg) takes priority in search.")
ap.add_argument("filename")
ap.add_argument("args", nargs="*")
Expand Down
140 changes: 135 additions & 5 deletions ceto/semanticanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,121 @@ def visitor(node):
return visitor(node)


def no_references_in_subexpressions(node):

from ceto.compiler import cmdargs
if not cmdargs._norefs:
return node

if isinstance(node, Template) and node.func.name == "include":
return None

if isinstance(node, BinOp) and node.op == "in" and node.parent.func and node.parent.func.name == "for":
rhs = no_references_in_subexpressions(node.rhs)
if rhs:
node.args = [node.lhs, rhs]
return node

if isinstance(node, BinOp) and isinstance(node.parent, Block) and node.op in ["+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "~=", ">>=", "<<="]:
rhs = no_references_in_subexpressions(node.rhs)
if rhs:
node.args = [node.lhs, rhs]
return node

new_args = []
found_new = False
for a in node.args:
new = no_references_in_subexpressions(a)
if new:
new_args.append(new)
found_new = True
else:
new_args.append(a)

if found_new:
node.args = new_args

if node.func:
new = no_references_in_subexpressions(node.func)
if new:
node.func = new

if isinstance(node, Module):
return node

# Note we should do a better job banning Assign in subexpressions even though the Assign vs NamedParameter
# distinction was meant to tackle this in the simple case of x=y in a Call
if isinstance(node, (Block, TypeOp, ListLiteral, BracedLiteral, ScopeResolution, Template, Identifier, Assign, NamedParameter, IntegerLiteral, FloatLiteral, StringLiteral)):
return None

if isinstance(node.parent, (Block, TypeOp)):
return None

if isinstance(node.parent, (Call, BracedCall, Template, ArrayAccess)) and node is node.parent.func:
return None

if isinstance(node.parent, Call) and node.parent.func.name in ["if", "while"]:
return None

if isinstance(node.parent, Call) and is_def_or_class_like(node.parent):
return None

if isinstance(node, Call) and is_call_lambda(node):
return None

if isinstance(node, Call):
pass

if isinstance(node.parent, Call) and node.parent.func.name == "for":
return None

if isinstance(node.parent, ArrayAccess) and node.parent.func.name == "lambda":
return None

if isinstance(node, AttributeAccess):
attr_lhs = node.lhs
while isinstance(attr_lhs, (AttributeAccess, ScopeResolution)):
attr_lhs = attr_lhs.lhs

if isinstance(attr_lhs, Identifier) and attr_lhs.name != "self" and not attr_lhs.scope.find_def(attr_lhs):
# implicit scope resolution
return None

# note that banning e.g. dereferenable (requires { *foo } ) types on the lhs of an assignment should use
# the "no implicit conversions in assignments" logic in codegen_assign

if isinstance(node.parent, Assign):
# a C++ reference is ok as the type of the lhs of an assignment because C++ will perform a copy without
# an explicit reference type (which, for a local variable, will TODO require an unsafe annotation even if const)
return None

# code = """(lambda[ref] (:
# static_assert(not std::is_reference_v<decltype(ceto_private_placeholder)>)
# return ceto_private_placeholder
#):decltype(auto))()"""

# from .parser import parse
# ban_references_lambda = parse(code).args[0]
# print(ban_references_lambda.ast_repr(ceto_evalable=False, preserve_source_loc=False))
# import sys
# sys.exit(-1)

clone = node.clone()

ban_references_lambda = Call(TypeOp(":", [Call(ArrayAccess(Identifier("lambda", ), [Identifier("ref", ), ], ), [Block([Call(Identifier("static_assert", ), [UnOp("not", [ScopeResolution("::", [Identifier("std", ), Template(Identifier("is_reference_v", ), [Call(Identifier("overparenthesized_decltype", ), [clone, ], ), ], ), ], )], ), ], ), SyntaxTypeOp(":", [Identifier("return"), node], ), ], ), ], ), Call(Identifier("decltype", ), [Identifier("auto", ), ], ), ], ), [], )

#ban_references_lambda = Call(Identifier("CETO_BAN_REFS"), [node])

ban_references_lambda.parent = node.parent
node.parent = ban_references_lambda
clone.parent = ban_references_lambda
clone.scope = node.scope
ban_references_lambda.scope = node.scope
#ban_references_lambda = basic_semantic_analysis(ban_references_lambda)

return ban_references_lambda


def type_inorder_traversal(typenode: Node, func):
if isinstance(typenode, TypeOp):
if not type_inorder_traversal(typenode.lhs, func):
Expand Down Expand Up @@ -1018,17 +1133,32 @@ def macro_expansion(expr: Module):
return expr


def semantic_analysis(expr: Module):
assert isinstance(expr, Module) # enforced by parser
def basic_semantic_analysis(expr: Module) -> Module:

expr = one_liner_expander(expr)
expr = assign_to_named_parameter(expr)
expr = warn_and_remove_redundant_parenthesese(expr)

expr = build_types(expr)
expr = build_parents(expr)
expr = apply_replacers(expr, [ScopeVisitor()])
expr = apply_replacers(expr, [ImplicitLambdaCaptureVisitor()])
return expr


def semantic_analysis(expr: Module) -> Module:
assert isinstance(expr, Module) # enforced by parser

expr = one_liner_expander(expr)
expr = assign_to_named_parameter(expr)
expr = warn_and_remove_redundant_parenthesese(expr)

expr = basic_semantic_analysis(expr)
expr = no_references_in_subexpressions(expr)

def noscope(n):
n.scope = None
return n
expr = replace_node(expr, noscope)

expr = basic_semantic_analysis(expr)

def defs(node):
if not isinstance(node, Node):
Expand Down
2 changes: 2 additions & 0 deletions include/ceto.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ static inline void issue_null_deref_message() {

#endif

#define CETO_BAN_REFS(expr) [&]() -> decltype(auto) { static_assert(!(std::is_reference_v<decltype((expr))>)); return expr; }()

// mad = maybe allow deref

// Based on answer of user Nawaz at https://stackoverflow.com/questions/14466620/c-template-specialization-calling-methods-on-types-that-could-be-pointers-or?noredirect=1&lq=1
Expand Down
30 changes: 30 additions & 0 deletions tests/regression/ban_refs_for_loop.ctp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Test Output 1
# Test Output 2
# Test Output 3
# Test Output 123

# requires --_norefs

class (Foo:
a = [1, 2, 3]

def (foo:
#for (x in self.a: # static_assert this->a is_reference_v (same with self.a but self rewritten to this)
#for (x in this->a: # same
a = self.a
for (x in a:
std.cout << x # << std.endl # static_assert std.cout << x is_reference_v
std.cout << "\n"
)
# unsafe:mut:auto:ref:ref in the future!
b: mut:auto:ref:ref = self.a
for (x in b:
std.cout << x
)
)
)

def (main:
f = Foo()
f.foo()
)
46 changes: 46 additions & 0 deletions tests/regression/ban_refs_for_loop.donotedit.autogenerated.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

#include <string>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <sstream>
#include <functional>
#include <cassert>
#include <compare> // for <=>
#include <thread>
#include <optional>


#include "ceto.h"
//#include "ceto_private_boundscheck.donotedit.autogenerated.h"

#include "ceto_private_listcomp.donotedit.autogenerated.h"
;
#include "ceto_private_boundscheck.donotedit.autogenerated.h"
;
#include "ceto_private_convenience.donotedit.autogenerated.h"
;
struct Foo : public ceto::shared_object, public std::enable_shared_from_this<Foo> {

decltype(std::vector {{1, 2, 3}}) a = std::vector {{1, 2, 3}};

inline auto foo() const -> void {
const auto a = (this -> a);
for(const auto& x : a) {
std::cout << x;
std::cout << "\n";
}
auto && b { (this -> a) } ;
for(const auto& x : b) {
std::cout << x;
}
}

};

auto main() -> int {
const auto f = std::make_shared<const Foo>();
(*ceto::mad(f)).foo();
}

0 comments on commit 9ddebb2

Please sign in to comment.