Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

only allow certain characters after interpolated vars #25234

Merged
merged 5 commits into from
Dec 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,12 @@ Language changes
backslashes and the end of the literal while 2n+1 backslashes followed by a quote encodes n
backslashes followed by a quote character ([#22926]).

* The syntax `(x...)` for constructing a tuple is deprecated; use `(x...,)` instead (#24452).
* The syntax `(x...)` for constructing a tuple is deprecated; use `(x...,)` instead ([#24452]).

* Non-parenthesized interpolated variables in strings, e.g. `"$x"`, must be followed
by a character that will never be an allowed identifier character (currently
operators, space/control characters, or common punctuation characters) ([#25231]).


Breaking changes
----------------
Expand Down
26 changes: 26 additions & 0 deletions src/flisp/julia_extensions.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ int jl_op_suffix_char(uint32_t wc)
return HT_NOTFOUND != wcharhash_get_r(&jl_opsuffs, (void*)((uintptr_t)wc), NULL);
}

// chars that we will never allow to be part of a valid non-operator identifier
static int never_id_char(uint32_t wc)
{
utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc);
return (
// spaces and control characters:
(cat >= UTF8PROC_CATEGORY_ZS && cat <= UTF8PROC_CATEGORY_CS) ||

// ASCII and Latin1 non-connector punctuation
(wc < 0xff &&
cat >= UTF8PROC_CATEGORY_PD && cat <= UTF8PROC_CATEGORY_PO) ||

wc == '`');
}

value_t fl_julia_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
argcount(fl_ctx, "identifier-char?", nargs, 1);
Expand All @@ -180,6 +195,16 @@ value_t fl_julia_identifier_start_char(fl_context_t *fl_ctx, value_t *args, uint
return jl_id_start_char(wc) ? fl_ctx->T : fl_ctx->F;
}

value_t fl_julia_never_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
argcount(fl_ctx, "never-identifier-char?", nargs, 1);
if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
type_error(fl_ctx, "never-identifier-char?", "wchar", args[0]);
uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
return never_id_char(wc) ? fl_ctx->T : fl_ctx->F;
}


value_t fl_julia_op_suffix_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
argcount(fl_ctx, "op-suffix-char?", nargs, 1);
Expand Down Expand Up @@ -307,6 +332,7 @@ static const builtinspec_t julia_flisp_func_info[] = {
{ "accum-julia-symbol", fl_accum_julia_symbol },
{ "identifier-char?", fl_julia_identifier_char },
{ "identifier-start-char?", fl_julia_identifier_start_char },
{ "never-identifier-char?", fl_julia_never_identifier_char },
{ "op-suffix-char?", fl_julia_op_suffix_char },
{ "strip-op-suffix", fl_julia_strip_op_suffix },
{ "underscore-symbol?", fl_julia_underscore_symbolp },
Expand Down
9 changes: 8 additions & 1 deletion src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2072,11 +2072,18 @@
(define (string-replace s a b)
(string.join (string-split s a) b))

(define (ends-interpolated-atom? c)
(or (eof-object? c) (opchar? c) (never-identifier-char? c)))

(define (parse-interpolate s)
(let* ((p (ts:port s))
(c (peek-char p)))
(cond ((identifier-start-char? c)
(parse-atom s))
(let* ((atom (parse-atom s))
(c (peek-char p)))
(if (ends-interpolated-atom? c)
atom
(error (string "interpolated variable $" atom " ends with invalid character \"" c "\"; use \"$(" atom ")\" instead.")))))
((eqv? c #\()
(read-char p)
(let ((ex (parse-eq* s))
Expand Down
4 changes: 4 additions & 0 deletions test/parse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,7 @@ end
# added ⟂ to operator precedence (#24404)
@test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c)
@test Meta.parse("a ⟂ b ∥ c") == Expr(:comparison, :a, :⟂, :b, :∥, :c)

# only allow certain characters after interpolated vars (#25231)
@test Meta.parse("\"\$x෴ \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(U+0df4) looks just like my signature 😉 .

@test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) == :string