From d224635190bdc48839a5f4b079f36f6f9152f82f Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 3 Apr 2024 20:53:36 -0400 Subject: [PATCH] RFC: Curried getproperty syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds support for parsing `.a` as `x->x.a`. This kind of thing has come up multiple times in the past, but I'm currently finding myself doing a lot of work on nested structs where this operation is very common. In general, we've had the position that this kind of thing should be a special case of the short-currying syntax (e.g. #38713), but I actually think that might be a false constraint. In particular, `.a` is a bit of a worst case for the curry syntax. If there is no requirement for `.a` to be excessively short in an eventual underscore curry syntax, I think that could open more options. That said, any syntax proposal of course needs to stand on its own, so let me motivate the cases where I think this plays: A. Curried getfield I think this is probably the most obvious and often requested. The syntax here is very useful for situations where higher order functions operate on collections of records: 1. `map(.a, vec)` and reductions for getting the fields of an object - also includes things like `sum(.price, items)` 2. Predicates like `sort(vecs, by=.x)` or `filter(!.deleted, entries)` 3. In pipelines `vecs |> .x |> sqrt |> sum` I think that's mostly what people are thinking of, but the use case for this syntax is more general. B. A syntax for lenses Packages like Accessors.jl provide lens-like abstractions. Currently these are written as `lens = @optic _.a`. An example use of Accessors.jl is (from their documentation) ``` julia> modify(lowercase, (;a="AA", b="BB"), @optic _.a) T("aa", "BB") ``` This PR can be thought of as providing lenses first class syntax, as in: ``` julia> modify(lowercase, (;a="AA", b="BB"), .a) T("aa", "BB") ``` C. Symbol index generalization to hierachical structures We have a lot of packages in the ecosystem that support named axes of various forms (Canonical examples might be DataFrames and NamedArrays, but there's probably two dozen of these). Generally the way that this syntax works is that people use quoted symbols for indexing: ``` df[5, :col] ``` However, this breaks down when there is hierachical composition involved. For example, for simulation models, you often build parameter sets and solutions out of hierarchies of simpler models. There's a couple of solutions that people have come up with for this problem: 1. Some packages parse out hierachy from symbol names: `sol[:var"my.nested.hierachy.state"]` 2. Other packages have a global root object: `sol[○.my.nested.hierarchy.state]` 2a. A variant of this is using the object as its own root `sol[sol.my.nested.hierarchy.state]` 2b. Yet another variant is having the root object be context specific `sol[sys.my.nested.hierarchy.state]` 3. Yet other packages put symbolic names into the global namespaces `sol[my.nested.hierarchy.state]` These solutions are all lacking. 1 requires string manipulation for composition, the various variants of 2 are ok, but there is no agreement among packages what the root object looks like or is spelled, and even so, it's an extra export and 3 pollutes the global namespaces. By using the same mechanism here, we essentially standardize the solution `2`, but make the root object implicit.` --- base/operators.jl | 14 +++++++ src/julia-parser.scm | 95 ++++++++++++++++++++++++++++++-------------- src/julia-syntax.scm | 12 ++++-- 3 files changed, 88 insertions(+), 33 deletions(-) diff --git a/base/operators.jl b/base/operators.jl index 26274307ba05c..d90e202a0b796 100644 --- a/base/operators.jl +++ b/base/operators.jl @@ -1479,3 +1479,17 @@ julia> [1, 2] .∉ ([2, 3],) ``` """ ∉, ∌ + +struct GetPropertyLens + syms::Tuple{Vararg{Symbol}} +end +getproperty() = GetPropertyLens(()) +getproperty(lens::GetPropertyLens, s::Symbol) = + GetPropertyLens(lens, tuple(getfield(lens, :syms)..., s)) + +function (lens::GetPropertyLens)(strct) + syms = getfield(lens, :syms) + isempty(syms) && return strct + sym = first(syms) + return GetPropertyLens(tail(syms))(getproperty(strct, sym)) +end \ No newline at end of file diff --git a/src/julia-parser.scm b/src/julia-parser.scm index 891a26bb0ea49..fccb8eea8719b 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -640,12 +640,15 @@ ;; parse left-to-right binary operator ;; produces structures like (+ (+ (+ 2 3) 4) 5) -(define-macro (parse-LtoR s down ops) +(define-macro (parse-LtoR s down ops syntactic) `(let loop ((ex (,down ,s)) (t (peek-token ,s))) (if (,ops t) (begin (take-token ,s) - (loop (list 'call t ex (,down ,s)) (peek-token ,s))) + (loop (if ,syntactic + (list t ex (,down ,s)) + (list 'call t ex (,down ,s))) + (peek-token ,s))) ex))) ;; parse right-to-left binary operator @@ -849,7 +852,7 @@ (else ex))))) (define (parse-pipe< s) (parse-RtoL s parse-pipe> is-prec-pipe s) (parse-LtoR s parse-range is-prec-pipe>?)) +(define (parse-pipe> s) (parse-LtoR s parse-range is-prec-pipe>? #f)) ;; parse ranges and postfix ... ;; colon is strange; 3 arguments with 2 colons yields one call: @@ -929,8 +932,8 @@ (define (parse-expr s) (parse-with-chains s parse-term is-prec-plus? '(+ ++))) (define (parse-term s) (parse-with-chains s parse-rational is-prec-times? '(*))) -(define (parse-rational s) (parse-LtoR s parse-shift is-prec-rational?)) -(define (parse-shift s) (parse-LtoR s parse-unary-subtype is-prec-bitshift?)) +(define (parse-rational s) (parse-LtoR s parse-shift is-prec-rational? #f)) +(define (parse-shift s) (parse-LtoR s parse-unary-subtype is-prec-bitshift? #f)) ;; parse `<: A where B` as `<: (A where B)` (issue #21545) (define (parse-unary-subtype s) @@ -2156,11 +2159,11 @@ ((eqv? nxt #\) ) ;; empty tuple () (begin (take-token s) '((tuple) . #t))) - ((syntactic-op? nxt) + ((and (syntactic-op? nxt) (not (eq? nxt '|.|))) ;; allow (=) etc. (let ((tok (take-token s))) (if (not (eqv? (require-token s) #\) )) - (error (string "invalid identifier name \"" tok "\"")) + (error (string "A: invalid identifier name \"" tok "\"")) (take-token s)) (if checked (check-identifier tok)) (cons tok #f))) @@ -2473,6 +2476,55 @@ ;; process escape sequences using lisp read (read (open-input-string (string #\" s #\")))))) +;; parse an identifier, having previously obtained symbol token `t` +(define (parse-identifier s t checked) + (if checked + (begin (check-identifier t) + (if (closing-token? t) + (error (string "unexpected \"" (take-token s) "\""))))) + (take-token s) + (cond ((and (eq? t 'var) + (if (or (ts:pbtok s) (ts:last-tok s)) + (and (eqv? (peek-token s) #\") (not (ts:space? s))) + ;; Hack: avoid peek-token if possible to preserve + ;; (io.pos (ts:port s)) for non-greedy Meta.parse + (eqv? (peek-char (ts:port s)) #\"))) + ;; var"funky identifier" syntax + (peek-token s) + (take-token s) ;; leading " + (let ((str (parse-raw-literal s #\")) + (nxt (peek-token s))) + (if (and (symbol? nxt) (not (operator? nxt)) (not (ts:space? s))) + (error (string "suffix not allowed after `var\"" str "\"`"))) + (symbol str))) + ((eq? t 'true) '(true)) + ((eq? t 'false) '(false)) + (else t))) + +(define (parse-quoted-identifier s checked) + (let ((t (require-token s))) + (if (symbol? t) + `(quote ,(parse-identifier s t checked)) + (error (string "X: unexpected \"" (take-token s) "\""))))) + +(define (parse-lens s checked) + (list '|.| + (let loop ((ex (parse-quoted-identifier s checked)) + (t (peek-token s))) + (cond + ((eqv? t #\( ) + (error "Call syntax after lens is reserved. Add parentheses.")) + ((eq? t '|.|) + (let ((spc (ts:space? s))) + (take-token s) + (let ((nxt (peek-token s))) + (if (eqv? nxt #\( ) + ;; lens broadcast .a.b.c.(vec). + ;; (begin (ts:put-back! s t spc) ex) + (error "lens broadcast syntax is reserved. Add parentheses.") + (loop (list '|.| ex (parse-quoted-identifier s checked)) nxt))))) + (else ex))))) + ;; parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. (define (parse-atom s (checked #t)) (let ((t (require-token s))) @@ -2519,33 +2571,16 @@ (with-bindings ((end-symbol #f)) (parse-atom s #f)))))))) + ;; lens .a.b.c + ((eq? t '|.|) + (take-token s) + (parse-lens s checked)) + ;; misplaced = ((eq? t '=) (error "unexpected \"=\"")) ;; identifier - ((symbol? t) - (if checked - (begin (check-identifier t) - (if (closing-token? t) - (error (string "unexpected \"" (take-token s) "\""))))) - (take-token s) - (cond ((and (eq? t 'var) - (if (or (ts:pbtok s) (ts:last-tok s)) - (and (eqv? (peek-token s) #\") (not (ts:space? s))) - ;; Hack: avoid peek-token if possible to preserve - ;; (io.pos (ts:port s)) for non-greedy Meta.parse - (eqv? (peek-char (ts:port s)) #\"))) - ;; var"funky identifier" syntax - (peek-token s) - (take-token s) ;; leading " - (let ((str (parse-raw-literal s #\")) - (nxt (peek-token s))) - (if (and (symbol? nxt) (not (operator? nxt)) (not (ts:space? s))) - (error (string "suffix not allowed after `var\"" str "\"`"))) - (symbol str))) - ((eq? t 'true) '(true)) - ((eq? t 'false) '(false)) - (else t))) + ((symbol? t) (parse-identifier s t checked)) ;; parens or tuple ((eqv? t #\( ) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index abe8165c42787..2ecb6fb87032d 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -2439,8 +2439,14 @@ '|.| (lambda (e) (if (length= e 2) - ;; e = (|.| op) - `(call (top BroadcastFunction) ,(cadr e)) + (let ((oplens (cadr e))) + (if (symbol? oplens) + ;; e = (|.| op) + `(call (top BroadcastFunction) ,oplens) + ;; e = (|.| lens) + (if (eq? (car oplens) '|.|) + (expand-fuse-broadcast '() `(|.| (|.| (call (top getproperty)) ,(cadr oplens)) ,(caddr oplens))) + (expand-fuse-broadcast '() `(|.| (call (top getproperty)) ,oplens))))) ;; e = (|.| f x) (expand-fuse-broadcast '() e))) @@ -2624,7 +2630,7 @@ (cond ((dotop-named? f) (expand-fuse-broadcast '() `(|.| ,(undotop f) (tuple ,@(cddr e))))) ;; "(.op)(...)" - ((and (length= f 2) (eq? (car f) '|.|)) + ((and (length= f 2) (eq? (car f) '|.|) (symbol? (cadr f))) (expand-fuse-broadcast '() `(|.| ,(cadr f) (tuple ,@(cddr e))))) ((eq? f 'ccall) (if (not (length> e 4)) (error "too few arguments to ccall"))