From b29ff6d1dc542de2572899741fc2a3c6f37dc1ef Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 26 May 2017 14:51:38 -0400 Subject: [PATCH 01/10] allow operator suffixes: combining characters and primes --- NEWS.md | 4 ++++ doc/src/manual/variables.md | 3 ++- src/flisp/flisp.h | 1 + src/flisp/julia_extensions.c | 44 ++++++++++++++++++++++++++++++++++++ src/julia-parser.scm | 38 ++++++++++++++++++++----------- test/parse.jl | 5 ++++ 6 files changed, 81 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index b9041977cec39..0234e26caeef9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,10 @@ New language features * Local variables can be tested for being defined using the new `@isdefined variable` macro ([#TBD]). + * Custom infix operators can now be defined by appending Unicode + combining marks and primes to other operators. For example, `+̂″` is parsed + as an infix operator with the same precedence as `+` ([#22089]). + Language changes ---------------- diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md index 8683b3ace795b..ccea2dba6e6fe 100644 --- a/doc/src/manual/variables.md +++ b/doc/src/manual/variables.md @@ -96,7 +96,8 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f` will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron` -to define `⊗` as an infix Kronecker product). +to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks +and primes, e.g. `+̂″` is parsed as an infix operator with the same precedence as `+`. The only explicitly disallowed names for variables are the names of built-in statements: diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h index c5c122afda7a7..8efc21122e68e 100644 --- a/src/flisp/flisp.h +++ b/src/flisp/flisp.h @@ -378,6 +378,7 @@ int fl_load_system_image_str(fl_context_t *fl_ctx, char* str, size_t len); /* julia extensions */ JL_DLLEXPORT int jl_id_char(uint32_t wc); JL_DLLEXPORT int jl_id_start_char(uint32_t wc); +JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc); struct _fl_context_t { symbol_t *symtab; diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index b2c5abe508bea..aca8c83ea31e3 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -134,6 +134,17 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc) return 0; } +// chars that can follow an operator (e.g. +) and be parsed as part of the operator +JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc) +{ + if (wc < 0xA1 || wc > 0x10ffff) return 0; + utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc); + return (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC || + cat == UTF8PROC_CATEGORY_ME || + // primes (single, double, triple, their reverses, and quadruple) + (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057)); +} + value_t fl_julia_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) { argcount(fl_ctx, "identifier-char?", nargs, 1); @@ -152,6 +163,37 @@ value_t fl_julia_identifier_start_char(fl_context_t *fl_ctx, value_t *args, uint return jl_id_start_char(wc) ? fl_ctx->T : fl_ctx->F; } +value_t fl_julia_op_suffix_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) +{ + argcount(fl_ctx, "op-suffix-char?", nargs, 1); + if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype) + type_error(fl_ctx, "op-suffix-char?", "wchar", args[0]); + uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); + return jl_op_suffix_char(wc) ? fl_ctx->T : fl_ctx->F; +} + +value_t fl_julia_strip_op_suffix(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) +{ + argcount(fl_ctx, "strip-op-suffix", nargs, 1); + if (!issymbol(args[0])) + type_error(fl_ctx, "strip-op-suffix", "symbol", args[0]); + char *op = symbol_name(fl_ctx, args[0]); + size_t i = 0; + while (op[i]) { + size_t j = i; + if (jl_op_suffix_char(u8_nextchar(op, &j))) + break; + i = j; + } + if (!op[i]) return args[0]; // no suffix to strip + if (!i) lerror(fl_ctx, symbol(fl_ctx, "error"), "invalid operator"); + char *opnew = strncpy(malloc(i+1), op, i); + opnew[i] = 0; + value_t *opnew_symbol = symbol(fl_ctx, opnew); + free(opnew); + return opnew_symbol; +} + #include "julia_charmap.h" #define _equal_wchar_(x, y, ctx) ((x) == (y)) #define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x))) @@ -245,6 +287,8 @@ static const builtinspec_t julia_flisp_func_info[] = { { "accum-julia-symbol", fl_accum_julia_symbol }, { "identifier-char?", fl_julia_identifier_char }, { "identifier-start-char?", fl_julia_identifier_start_char }, + { "op-suffix-char?", fl_julia_op_suffix_char }, + { "strip-op-suffix", fl_julia_strip_op_suffix }, { NULL, NULL } }; diff --git a/src/julia-parser.scm b/src/julia-parser.scm index f9048eb39f52e..09027b7b014ef 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -54,9 +54,13 @@ (lambda (x) (has? t x)))))) +; as for Set, but strip operator suffixes before testing membership +(define (SuffSet l) (let ((S (Set l))) + (lambda (op) (and (symbol? op) (S (strip-op-suffix op)))))) + ;; for each prec-x generate an is-prec-x? procedure (for-each (lambda (name) - (eval `(define ,(symbol (string "is-" name "?")) (Set ,name)))) + (eval `(define ,(symbol (string "is-" name "?")) (SuffSet ,name)))) prec-names) ;; hash table of binary operators -> precedence @@ -68,7 +72,7 @@ (pushprec (cdr L) (+ prec 1))))) (pushprec (map eval prec-names) 1) t)) -(define (operator-precedence op) (get prec-table op 0)) +(define (operator-precedence op) (get prec-table (strip-op-suffix op) 0)) (define unary-ops (append! '(|<:| |>:|) (add-dots '(+ - ! ~ ¬ √ ∛ ∜)))) @@ -117,7 +121,7 @@ (delete-duplicates (map (lambda (op) (string.char (string op) 1)) (cons `|..| (filter dotop? operators)))))) -(define operator? (Set operators)) +(define operator? (SuffSet operators)) (define initial-reserved-words '(begin while if for try return break continue function macro quote let local global const do @@ -198,21 +202,29 @@ (else (read-char port) (skip-to-eol port))))) +(define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c))) + (define (read-operator port c) (if (and (eqv? c #\*) (eqv? (peek-char port) #\*)) (error "use \"^\" instead of \"**\"")) - (if (or (eof-object? (peek-char port)) (not (opchar? (peek-char port)))) + (if (or (eof-object? (peek-char port)) (not (op-or-sufchar? (peek-char port)))) (symbol (string c)) ; 1-char operator (let ((str (let loop ((str (string c)) - (c (peek-char port))) - (if (and (not (eof-object? c)) (opchar? c)) - (let* ((newop (string str c)) - (opsym (string->symbol newop))) - (if (operator? opsym) - (begin (read-char port) - (loop newop (peek-char port))) - str)) - str)))) + (c (peek-char port)) + (in-suffix? #f)) + (if (eof-object? c) + str + (let ((sufchar? (op-suffix-char? c))) + (if (if in-suffix? + sufchar? + (or sufchar? (opchar? c))) + (let* ((newop (string str c)) + (opsym (string->symbol newop))) + (if (operator? opsym) + (begin (read-char port) + (loop newop (peek-char port) sufchar?)) + str)) + str)))))) (if (equal? str "--") (error (string "invalid operator \"" str "\""))) (string->symbol str)))) diff --git a/test/parse.jl b/test/parse.jl index 357e70ce711ad..a4f3f04726bb3 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -1228,6 +1228,11 @@ end === 2 f("") end === (3, String) +# operator suffixes +@test parse("3 +̂ 4") == Expr(:call, :+̂, 3, 4) +@test parse("3 +̂′ 4") == Expr(:call, :+̂′, 3, 4) +@test Base.operator_precedence(:+̂) == Base.operator_precedence(:+) + # issue #19351 # adding return type decl should not affect parse of function body @test :(t(abc) = 3).args[2] == :(t(abc)::Int = 3).args[2] From aaac3c148592bffdd448744994839b434cc34227 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 1 Jun 2017 08:27:05 -0400 Subject: [PATCH 02/10] blacklist key syntactic operators from suffixing --- src/julia-parser.scm | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/julia-parser.scm b/src/julia-parser.scm index 09027b7b014ef..7a58dbd1d6228 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -37,6 +37,10 @@ prec-pipe prec-colon prec-plus prec-bitshift prec-times prec-rational prec-power prec-decl prec-dot)) +(define trans-op (string->symbol ".'")) +(define ctrans-op (string->symbol "'")) +(define vararg-op (string->symbol "...")) + (define (Set l) ;; construct a length-specialized membership tester (cond ((length= l 1) @@ -54,9 +58,21 @@ (lambda (x) (has? t x)))))) -; as for Set, but strip operator suffixes before testing membership -(define (SuffSet l) (let ((S (Set l))) - (lambda (op) (and (symbol? op) (S (strip-op-suffix op)))))) +; only allow/strip suffixes for some operators +(define no-suffix? (Set (append prec-assignment prec-conditional prec-lazy-or prec-lazy-and + prec-colon prec-decl prec-dot + '(-- --> -> |<:| |>:| in isa $) + (list ctrans-op trans-op vararg-op)))) +(define (maybe-strip-op-suffix op) + (if (symbol? op) + (let ((op_ (strip-op-suffix op))) + (if (or (eqv? op op_) (no-suffix? op_)) + op + op_)) + op)) + +; like Set, but strip operator suffixes before testing membership +(define (SuffSet l) (let ((S (Set l))) (lambda (op) (S (maybe-strip-op-suffix op))))) ;; for each prec-x generate an is-prec-x? procedure (for-each (lambda (name) @@ -72,7 +88,9 @@ (pushprec (cdr L) (+ prec 1))))) (pushprec (map eval prec-names) 1) t)) -(define (operator-precedence op) (get prec-table (strip-op-suffix op) 0)) +(define (operator-precedence op) (get prec-table + (maybe-strip-op-suffix op) + 0)) (define unary-ops (append! '(|<:| |>:|) (add-dots '(+ - ! ~ ¬ √ ∛ ∜)))) @@ -96,10 +114,6 @@ (and (pair? ex) (eq? '$ (car ex))))) -(define trans-op (string->symbol ".'")) -(define ctrans-op (string->symbol "'")) -(define vararg-op (string->symbol "...")) - (define (is-word-operator? op) (every identifier-start-char? (string->list (symbol->string op)))) From c5a32be3ca58b25f4fa4aa1896e93b663177352a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 1 Jun 2017 09:36:26 -0400 Subject: [PATCH 03/10] fix incorrect pointer type --- src/flisp/julia_extensions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index aca8c83ea31e3..e5b57f9cd2e37 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -189,7 +189,7 @@ value_t fl_julia_strip_op_suffix(fl_context_t *fl_ctx, value_t *args, uint32_t n if (!i) lerror(fl_ctx, symbol(fl_ctx, "error"), "invalid operator"); char *opnew = strncpy(malloc(i+1), op, i); opnew[i] = 0; - value_t *opnew_symbol = symbol(fl_ctx, opnew); + value_t opnew_symbol = symbol(fl_ctx, opnew); free(opnew); return opnew_symbol; } From cb131e0758138e432127ad2606f38545252ee033 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 1 Jun 2017 09:39:37 -0400 Subject: [PATCH 04/10] allow sub/superscript operator suffixes --- NEWS.md | 5 +- doc/src/manual/variables.md | 3 +- src/flisp/flisp.c | 2 - src/flisp/flisp.h | 1 - src/flisp/julia_extensions.c | 71 ++++++++++++---------- src/flisp/julia_opsuffs.h | 112 +++++++++++++++++++++++++++++++++++ test/parse.jl | 1 + 7 files changed, 158 insertions(+), 37 deletions(-) create mode 100644 src/flisp/julia_opsuffs.h diff --git a/NEWS.md b/NEWS.md index 0234e26caeef9..0083a3fa6ee14 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,8 +8,9 @@ New language features using the new `@isdefined variable` macro ([#TBD]). * Custom infix operators can now be defined by appending Unicode - combining marks and primes to other operators. For example, `+̂″` is parsed - as an infix operator with the same precedence as `+` ([#22089]). + combining marks, primes, and sub/superscripts to other operators. + For example, `+̂ₐ″` is parsed as an infix operator with the same + precedence as `+` ([#22089]). Language changes ---------------- diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md index ccea2dba6e6fe..eaabb0413a9ac 100644 --- a/doc/src/manual/variables.md +++ b/doc/src/manual/variables.md @@ -96,8 +96,7 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f` will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron` -to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks -and primes, e.g. `+̂″` is parsed as an infix operator with the same precedence as `+`. +to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks, primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`. The only explicitly disallowed names for variables are the names of built-in statements: diff --git a/src/flisp/flisp.c b/src/flisp/flisp.c index fe25401f07421..417a087418c51 100644 --- a/src/flisp/flisp.c +++ b/src/flisp/flisp.c @@ -2304,7 +2304,6 @@ static const builtinspec_t core_builtin_info[] = { extern void builtins_init(fl_context_t *fl_ctx); extern void comparehash_init(fl_context_t *fl_ctx); -extern void jl_charmap_init(fl_context_t *fl_ctx); static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize) { @@ -2337,7 +2336,6 @@ static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize) fl_ctx->consflags = bitvector_new(fl_ctx->heapsize/sizeof(cons_t), 1); fl_print_init(fl_ctx); comparehash_init(fl_ctx); - jl_charmap_init(fl_ctx); fl_ctx->N_STACK = 262144; fl_ctx->Stack = (value_t*)malloc(fl_ctx->N_STACK*sizeof(value_t)); CHECK_ALIGN8(fl_ctx->Stack); diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h index 8efc21122e68e..9239e2b7d18a4 100644 --- a/src/flisp/flisp.h +++ b/src/flisp/flisp.h @@ -407,7 +407,6 @@ struct _fl_context_t { fltype_t *builtintype; htable_t equal_eq_hashtable; - htable_t jl_charmap; value_t tablesym; fltype_t *tabletype; diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index e5b57f9cd2e37..5c754788eda09 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -12,6 +12,11 @@ extern "C" { #endif +#define _equal_wchar_(x, y, ctx) ((x) == (y)) +#define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x))) +#include "htable.inc" +HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_) + static int is_uws(uint32_t wc) { return (wc==9 || wc==10 || wc==11 || wc==12 || wc==13 || wc==32 || @@ -134,15 +139,28 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc) return 0; } +#include "julia_opsuffs.h" + // chars that can follow an operator (e.g. +) and be parsed as part of the operator -JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc) +int jl_op_suffix_char(uint32_t wc) { + static htable_t jl_opsuffs; + if (!jl_opsuffs.size) { // initialize hash table of suffixes + size_t i, opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t)); + htable_t *h = htable_new(&jl_opsuffs, opsuffs_len); + assert(sizeof(uint32_t) <= sizeof(void*)); + for (i = 0; i < opsuffs_len; ++i) { + assert((void*)(uintptr_t)opsuffs[i] != HT_NOTFOUND); + wcharhash_put_r(h, (void*)((uintptr_t)opsuffs[i]), NULL, NULL); + } + } if (wc < 0xA1 || wc > 0x10ffff) return 0; utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc); - return (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC || - cat == UTF8PROC_CATEGORY_ME || - // primes (single, double, triple, their reverses, and quadruple) - (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057)); + if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC || + cat == UTF8PROC_CATEGORY_ME) + return 1; + // use hash table of other allowed characters: primes and sub/superscripts + return HT_NOTFOUND != wcharhash_get_r(&jl_opsuffs, (void*)((uintptr_t)wc), NULL); } value_t fl_julia_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) @@ -195,32 +213,25 @@ value_t fl_julia_strip_op_suffix(fl_context_t *fl_ctx, value_t *args, uint32_t n } #include "julia_charmap.h" -#define _equal_wchar_(x, y, ctx) ((x) == (y)) -#define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x))) -#include "htable.inc" -HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_) -void jl_charmap_init(fl_context_t *fl_ctx) +utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *ctx) { - size_t charmap_len = sizeof(charmap) / (2*sizeof(uint32_t)); - size_t i; - htable_t *h = htable_new(&fl_ctx->jl_charmap, charmap_len); - assert(sizeof(uint32_t) <= sizeof(void*)); - for (i = 0; i < charmap_len; ++i) { - /* Store charmap in a hash table. Typecasting codepoints - directly to pointer keys works because pointers are at - least 32 bits on all Julia-supported systems, and because - we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */ - assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND); - wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]), - (void*)((uintptr_t)charmap[i][1]), (void*)fl_ctx); + static htable_t jl_charmap; + if (!jl_charmap.size) { // initialize hash table + size_t i, charmap_len = sizeof(charmap) / (2*sizeof(uint32_t)); + htable_t *h = htable_new(&jl_charmap, charmap_len); + assert(sizeof(uint32_t) <= sizeof(void*)); + for (i = 0; i < charmap_len; ++i) { + /* Store charmap in a hash table. Typecasting codepoints + directly to pointer keys works because pointers are at + least 32 bits on all Julia-supported systems, and because + we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */ + assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND); + wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]), + (void*)((uintptr_t)charmap[i][1]), NULL); + } } -} -utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *fl_ctx_) -{ - fl_context_t *fl_ctx = (fl_context_t *) fl_ctx_; - htable_t *h = &fl_ctx->jl_charmap; - void *v = wcharhash_get_r(h, (void*)((uintptr_t)c), (void*) fl_ctx); + void *v = wcharhash_get_r(&jl_charmap, (void*)((uintptr_t)c), NULL); return v == HT_NOTFOUND ? c : (utf8proc_int32_t) ((uintptr_t) v); } @@ -233,7 +244,7 @@ static char *normalize(fl_context_t *fl_ctx, char *s) ssize_t result; size_t newlen; result = utf8proc_decompose_custom((uint8_t*) s, 0, NULL, 0, (utf8proc_option_t)options, - jl_charmap_map, (void*) fl_ctx); + jl_charmap_map, NULL); if (result < 0) goto error; newlen = result * sizeof(int32_t) + 1; if (newlen > fl_ctx->jlbuflen) { @@ -242,7 +253,7 @@ static char *normalize(fl_context_t *fl_ctx, char *s) if (!fl_ctx->jlbuf) lerror(fl_ctx, fl_ctx->OutOfMemoryError, "error allocating UTF8 buffer"); } result = utf8proc_decompose_custom((uint8_t*)s,0, (int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options, - jl_charmap_map, (void*) fl_ctx); + jl_charmap_map, NULL); if (result < 0) goto error; result = utf8proc_reencode((int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options); if (result < 0) goto error; diff --git a/src/flisp/julia_opsuffs.h b/src/flisp/julia_opsuffs.h new file mode 100644 index 0000000000000..4769e119790bb --- /dev/null +++ b/src/flisp/julia_opsuffs.h @@ -0,0 +1,112 @@ +/* Array of codepoints allowed as operator suffixes in Julia: + primes and Latin/Greek/math super/subscripts. + + produced by: + + for c in "²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ" * "′″‴‵‶‷⁗" + println(" ", repr(UInt32(c)), ", // ", c) + end +*/ + +static const uint32_t opsuffs[] = { + 0x000000b2, // ² + 0x000000b3, // ³ + 0x000000b9, // ¹ + 0x000002b0, // ʰ + 0x000002b2, // ʲ + 0x000002b3, // ʳ + 0x000002b7, // ʷ + 0x000002b8, // ʸ + 0x000002e1, // ˡ + 0x000002e2, // ˢ + 0x000002e3, // ˣ + 0x00001d2c, // ᴬ + 0x00001d2e, // ᴮ + 0x00001d30, // ᴰ + 0x00001d31, // ᴱ + 0x00001d33, // ᴳ + 0x00001d34, // ᴴ + 0x00001d35, // ᴵ + 0x00001d36, // ᴶ + 0x00001d37, // ᴷ + 0x00001d38, // ᴸ + 0x00001d39, // ᴹ + 0x00001d3a, // ᴺ + 0x00001d3c, // ᴼ + 0x00001d3e, // ᴾ + 0x00001d3f, // ᴿ + 0x00001d40, // ᵀ + 0x00001d41, // ᵁ + 0x00001d42, // ᵂ + 0x00001d43, // ᵃ + 0x00001d47, // ᵇ + 0x00001d48, // ᵈ + 0x00001d49, // ᵉ + 0x00001d4d, // ᵍ + 0x00001d4f, // ᵏ + 0x00001d50, // ᵐ + 0x00001d52, // ᵒ + 0x00001d56, // ᵖ + 0x00001d57, // ᵗ + 0x00001d58, // ᵘ + 0x00001d5b, // ᵛ + 0x00001d5d, // ᵝ + 0x00001d5e, // ᵞ + 0x00001d5f, // ᵟ + 0x00001d60, // ᵠ + 0x00001d61, // ᵡ + 0x00001d62, // ᵢ + 0x00001d63, // ᵣ + 0x00001d64, // ᵤ + 0x00001d65, // ᵥ + 0x00001d66, // ᵦ + 0x00001d67, // ᵧ + 0x00001d68, // ᵨ + 0x00001d69, // ᵩ + 0x00001d6a, // ᵪ + 0x00001d9c, // ᶜ + 0x00001da0, // ᶠ + 0x00001da5, // ᶥ + 0x00001da6, // ᶦ + 0x00001dab, // ᶫ + 0x00001db0, // ᶰ + 0x00001db8, // ᶸ + 0x00001dbb, // ᶻ + 0x00001dbf, // ᶿ + 0x00002009, //   + 0x00002070, // ⁰ + 0x00002071, // ⁱ + 0x00002074, // ⁴ + 0x00002075, // ⁵ + 0x00002076, // ⁶ + 0x00002077, // ⁷ + 0x00002078, // ⁸ + 0x00002079, // ⁹ + 0x0000207a, // ⁺ + 0x0000207b, // ⁻ + 0x0000207c, // ⁼ + 0x0000207d, // ⁽ + 0x0000207e, // ⁾ + 0x0000207f, // ⁿ + 0x00002090, // ₐ + 0x00002091, // ₑ + 0x00002092, // ₒ + 0x00002093, // ₓ + 0x00002095, // ₕ + 0x00002096, // ₖ + 0x00002097, // ₗ + 0x00002098, // ₘ + 0x00002099, // ₙ + 0x0000209a, // ₚ + 0x0000209b, // ₛ + 0x0000209c, // ₜ + 0x00002c7c, // ⱼ + 0x00002c7d, // ⱽ + 0x00002032, // ′ + 0x00002033, // ″ + 0x00002034, // ‴ + 0x00002035, // ‵ + 0x00002036, // ‶ + 0x00002037, // ‷ + 0x00002057 // ⁗ +}; diff --git a/test/parse.jl b/test/parse.jl index a4f3f04726bb3..c712cddc27498 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -1231,6 +1231,7 @@ end === (3, String) # operator suffixes @test parse("3 +̂ 4") == Expr(:call, :+̂, 3, 4) @test parse("3 +̂′ 4") == Expr(:call, :+̂′, 3, 4) +@test parse("3 +⁽¹⁾ 4") == Expr(:call, :+⁽¹⁾, 3, 4) @test Base.operator_precedence(:+̂) == Base.operator_precedence(:+) # issue #19351 From 9e537207cdb79b5c59c1bc72979a1fd308c1aef5 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 1 Jun 2017 09:47:39 -0400 Subject: [PATCH 05/10] add missing subscripts --- src/flisp/julia_opsuffs.h | 33 ++++++++++++++++++++++++--------- test/parse.jl | 1 + 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/flisp/julia_opsuffs.h b/src/flisp/julia_opsuffs.h index 4769e119790bb..bade253c97ef4 100644 --- a/src/flisp/julia_opsuffs.h +++ b/src/flisp/julia_opsuffs.h @@ -3,7 +3,7 @@ produced by: - for c in "²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ" * "′″‴‵‶‷⁗" + for c in sort(unique(collect("₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ" * "′″‴‵‶‷⁗"))) println(" ", repr(UInt32(c)), ", // ", c) end */ @@ -74,6 +74,13 @@ static const uint32_t opsuffs[] = { 0x00001dbb, // ᶻ 0x00001dbf, // ᶿ 0x00002009, //   + 0x00002032, // ′ + 0x00002033, // ″ + 0x00002034, // ‴ + 0x00002035, // ‵ + 0x00002036, // ‶ + 0x00002037, // ‷ + 0x00002057, // ⁗ 0x00002070, // ⁰ 0x00002071, // ⁱ 0x00002074, // ⁴ @@ -88,6 +95,21 @@ static const uint32_t opsuffs[] = { 0x0000207d, // ⁽ 0x0000207e, // ⁾ 0x0000207f, // ⁿ + 0x00002080, // ₀ + 0x00002081, // ₁ + 0x00002082, // ₂ + 0x00002083, // ₃ + 0x00002084, // ₄ + 0x00002085, // ₅ + 0x00002086, // ₆ + 0x00002087, // ₇ + 0x00002088, // ₈ + 0x00002089, // ₉ + 0x0000208a, // ₊ + 0x0000208b, // ₋ + 0x0000208c, // ₌ + 0x0000208d, // ₍ + 0x0000208e, // ₎ 0x00002090, // ₐ 0x00002091, // ₑ 0x00002092, // ₒ @@ -101,12 +123,5 @@ static const uint32_t opsuffs[] = { 0x0000209b, // ₛ 0x0000209c, // ₜ 0x00002c7c, // ⱼ - 0x00002c7d, // ⱽ - 0x00002032, // ′ - 0x00002033, // ″ - 0x00002034, // ‴ - 0x00002035, // ‵ - 0x00002036, // ‶ - 0x00002037, // ‷ - 0x00002057 // ⁗ + 0x00002c7d // ⱽ }; diff --git a/test/parse.jl b/test/parse.jl index c712cddc27498..9a4dded4bd353 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -1232,6 +1232,7 @@ end === (3, String) @test parse("3 +̂ 4") == Expr(:call, :+̂, 3, 4) @test parse("3 +̂′ 4") == Expr(:call, :+̂′, 3, 4) @test parse("3 +⁽¹⁾ 4") == Expr(:call, :+⁽¹⁾, 3, 4) +@test parse("3 +₍₀₎ 4") == Expr(:call, :+₍₀₎, 3, 4) @test Base.operator_precedence(:+̂) == Base.operator_precedence(:+) # issue #19351 From 6a118ef82cc9e3fbd1e943c6d221d9e630b509f0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 5 Jun 2017 14:05:39 -0400 Subject: [PATCH 06/10] rm unnecessary assertion (only need for value, as in jl_charmap, not for key) --- src/flisp/julia_extensions.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index 5c754788eda09..2b75282863b8f 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -149,10 +149,8 @@ int jl_op_suffix_char(uint32_t wc) size_t i, opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t)); htable_t *h = htable_new(&jl_opsuffs, opsuffs_len); assert(sizeof(uint32_t) <= sizeof(void*)); - for (i = 0; i < opsuffs_len; ++i) { - assert((void*)(uintptr_t)opsuffs[i] != HT_NOTFOUND); + for (i = 0; i < opsuffs_len; ++i) wcharhash_put_r(h, (void*)((uintptr_t)opsuffs[i]), NULL, NULL); - } } if (wc < 0xA1 || wc > 0x10ffff) return 0; utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc); From 052b0f2c70a58ff6741c5075c74de2454fd667e2 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 15 Jun 2017 12:44:57 -0400 Subject: [PATCH 07/10] add tests for operators that are not supposed to be suffixable --- test/parse.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/parse.jl b/test/parse.jl index 9a4dded4bd353..2af911dbe05c1 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -1233,6 +1233,9 @@ end === (3, String) @test parse("3 +̂′ 4") == Expr(:call, :+̂′, 3, 4) @test parse("3 +⁽¹⁾ 4") == Expr(:call, :+⁽¹⁾, 3, 4) @test parse("3 +₍₀₎ 4") == Expr(:call, :+₍₀₎, 3, 4) +for bad in ('=', '$', ':', "||", "&&", "->", "<:") + @test_throws ParseError parse("3 $(bad)⁽¹⁾ 4") +end @test Base.operator_precedence(:+̂) == Base.operator_precedence(:+) # issue #19351 From 07a2014f9aaebf3b91e1a8c1a4b808662beee992 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 15 Jun 2017 13:18:12 -0400 Subject: [PATCH 08/10] slight optimization --- src/julia-parser.scm | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/julia-parser.scm b/src/julia-parser.scm index 7a58dbd1d6228..15665fda6223c 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -72,7 +72,11 @@ op)) ; like Set, but strip operator suffixes before testing membership -(define (SuffSet l) (let ((S (Set l))) (lambda (op) (S (maybe-strip-op-suffix op))))) +(define (SuffSet l) + (let ((S (Set l))) + (if (every no-suffix? l) + S ; suffixes not allowed for anything in l + (lambda (op) (S (maybe-strip-op-suffix op)))))) ;; for each prec-x generate an is-prec-x? procedure (for-each (lambda (name) From e9764488975588d4ba37b44f111a834cf9d0cccf Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 19 Jul 2017 10:47:17 -0400 Subject: [PATCH 09/10] another slight optimization --- src/julia-parser.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/julia-parser.scm b/src/julia-parser.scm index 15665fda6223c..22325d2444485 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -66,7 +66,7 @@ (define (maybe-strip-op-suffix op) (if (symbol? op) (let ((op_ (strip-op-suffix op))) - (if (or (eqv? op op_) (no-suffix? op_)) + (if (or (eq? op op_) (no-suffix? op_)) op op_)) op)) From 31b7edb3253718e261687034910dfbb901430425 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 24 Jul 2017 11:29:50 -0400 Subject: [PATCH 10/10] line length --- doc/src/manual/variables.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md index eaabb0413a9ac..2f20f15a33b4a 100644 --- a/doc/src/manual/variables.md +++ b/doc/src/manual/variables.md @@ -96,7 +96,8 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f` will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron` -to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks, primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`. +to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks, +primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`. The only explicitly disallowed names for variables are the names of built-in statements: