diff --git a/examples/examples-siu.ipynb b/examples/examples-siu.ipynb index 4eaeb55f..dd63ffc7 100644 --- a/examples/examples-siu.ipynb +++ b/examples/examples-siu.ipynb @@ -11,9 +11,18 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/machow/.virtualenvs/siuba/lib/python3.8/site-packages/pandas/compat/__init__.py:124: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.\n", + " warnings.warn(msg)\n" + ] + } + ], "source": [ - "from siuba.siu import _, explain" + "from siuba.siu import _, explain, strip_symbolic" ] }, { @@ -21,13 +30,6 @@ "execution_count": 2, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "_.somecol.min()\n" - ] - }, { "data": { "text/plain": [ @@ -301,15 +303,18 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "((_.a + (_.b / 2) + _.c**_.d) >> _) & _\n" - ] + "data": { + "text/plain": [ + "'_.a + _.b / 2 + _.c**_.d << _ & _'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "f = _.a + _.b / 2 + _.c**_.d >> _ & _\n", + "f = _.a + _.b / 2 + _.c**_.d << _ & _\n", "\n", "explain(f)" ] @@ -364,7 +369,7 @@ { "data": { "text/plain": [ - "{'a', 'b', 'c'}" + "{'a', 'b'}" ] }, "execution_count": 12, @@ -376,7 +381,7 @@ "symbol = _.a[_.b + 1] + _['c']\n", "\n", "# hacky way to go from symbol to call for now\n", - "call = symbol.source\n", + "call = strip_symbolic(symbol)\n", "\n", "call.op_vars()" ] @@ -415,7 +420,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "(1,_('a') + _('b'))\n" + "(1,_['a'] + _['b'])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/machow/repos/siuba/siuba/meta_hook.py:20: UserWarning: The siuba.meta_hook module is DEPRECATED and will be removed in a future release.\n", + " warnings.warn(\n" ] }, { @@ -435,7 +448,7 @@ "from siuba.meta_hook.pandas import DataFrame\n", "\n", "f = add(1, _['a'] + _['b'])\n", - "explain(f)\n", + "print(explain(f))\n", "\n", "f({'a': 1, 'b': 2})" ] @@ -549,7 +562,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "212 µs ± 50.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + "154 µs ± 177 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" ] } ], @@ -569,7 +582,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "7.29 µs ± 199 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" + "2.74 µs ± 10 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" ] } ], @@ -672,7 +685,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -686,7 +699,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.8.12" }, "toc": { "base_numbering": 1, diff --git a/siuba/siu/__init__.py b/siuba/siu/__init__.py index 60f36fb5..5f753f9a 100644 --- a/siuba/siu/__init__.py +++ b/siuba/siu/__init__.py @@ -10,11 +10,10 @@ ) from .symbolic import Symbolic, strip_symbolic, create_sym_call, explain from .visitors import CallTreeLocal, CallVisitor, FunctionLookupBound, FunctionLookupError, ExecutionValidatorVisitor -from .dispatchers import symbolic_dispatch, singledispatch2, pipe_no_args, Pipeable +from .dispatchers import symbolic_dispatch, singledispatch2, pipe_no_args, Pipeable, pipe, call Lam = Lazy _ = Symbolic() -pipe = Pipeable diff --git a/siuba/siu/calls.py b/siuba/siu/calls.py index 50cc5420..fa295e37 100644 --- a/siuba/siu/calls.py +++ b/siuba/siu/calls.py @@ -117,6 +117,8 @@ class Call: """ + + def __init__(self, func, *args, **kwargs): self.func = func self.args = args @@ -189,6 +191,27 @@ def __call__(self, x): f_op = getattr(operator, self.func) return f_op(inst, *rest, **kwargs) + # TODO: type checks will be very useful here. Will need to import symbolic. + # Let's do this once types are in a _typing.py submodule. + def __rshift__(self, x): + """Create a""" + from .symbolic import strip_symbolic + + stripped = strip_symbolic(x) + + if isinstance(stripped, Call): + return self._construct_pipe(MetaArg("_"), self, x) + + raise TypeError() + + def __rrshift__(self, x): + from .symbolic import strip_symbolic + if isinstance(strip_symbolic(x), (Call)): + # only allow non-calls (i.e. data) on the left. + raise TypeError() + + return self(x) + @staticmethod def evaluate_calls(arg, x): if isinstance(arg, Call): return arg(x) @@ -284,6 +307,10 @@ def obj_name(self): return None + @classmethod + def _construct_pipe(cls, *args): + return PipeCall(*args) + class Lazy(Call): """Lazily return calls rather than evaluating them.""" @@ -586,4 +613,37 @@ def __call__(self, x): return self.args[0] +# Pipe =================================================================================== + +class PipeCall(Call): + """ + pipe(df, a, b, c) + pipe(_, a, b, c) + + should options for first arg be only MetaArg or a non-call? + """ + + def __init__(self, func, *args, **kwargs): + self.func = "__siu_pipe_call__" + self.args = (func, *args) + if kwargs: + raise ValueError("Keyword arguments are not allowed.") + self.kwargs = {} + def __call__(self, x=None): + # Note that most calls map_subcalls to pass in the same data for each argument. + # In contrast, PipeCall passes data from the prev step to the next. + crnt_data, *calls = self.args + + if isinstance(crnt_data, MetaArg): + crnt_data = crnt_data(x) + + for call in calls: + new_data = call(crnt_data) + crnt_data = new_data + + return crnt_data + + def __repr__(self): + args_repr = ",".join(map(repr, self.args)) + return f"{type(self).__name__}({args_repr})" diff --git a/siuba/siu/dispatchers.py b/siuba/siu/dispatchers.py index cccf68b6..bd71655b 100644 --- a/siuba/siu/dispatchers.py +++ b/siuba/siu/dispatchers.py @@ -3,9 +3,11 @@ from functools import singledispatch, update_wrapper, wraps import inspect -from .calls import Call, FuncArg, MetaArg, Lazy +from .calls import Call, FuncArg, MetaArg, Lazy, PipeCall from .symbolic import Symbolic, create_sym_call, strip_symbolic +from typing import Callable + def _dispatch_not_impl(func_name): def f(x, *args, **kwargs): raise TypeError("singledispatch function {func_name} not implemented for type {type}" @@ -173,7 +175,9 @@ def wrapper(*args, **kwargs): # Pipe ======================================================================== class Pipeable: - """Enable function composition through the right bitshift (>>) operator. + """DEPRECATED: please use the siuba.siu.call function. + + Enable function composition through the right bitshift (>>) operator. Parameters ---------- @@ -210,6 +214,8 @@ class Pipeable: """ def __init__(self, f = None, calls = None): + import warnings + warnings.warn("Pipeable is deprecated. Please use siuba.siu.call.") # symbolics like _.some_attr need to be stripped down to a call, because # calling _.some_attr() returns another symbolic. f = strip_symbolic(f) @@ -262,15 +268,162 @@ def __call__(self, x): res = f(res) return res +def _prep_lazy_args(*args): + result = [] + for ii, arg in enumerate(args): + if ii == 0: + result.append(strip_symbolic(arg)) + else: + result.append(Lazy(strip_symbolic(arg))) + + return result -def create_pipe_call(obj, *args, **kwargs) -> Pipeable: + +def create_pipe_call(obj, *args, **kwargs) -> Call: """Return a Call of a function on its args and kwargs, wrapped in a Pipeable.""" - first, *rest = args - return Pipeable(Call( + + stripped_args = _prep_lazy_args(*args) + + return Call( "__call__", strip_symbolic(obj), - strip_symbolic(first), - *(Lazy(strip_symbolic(x)) for x in rest), + *stripped_args, **{k: Lazy(strip_symbolic(v)) for k,v in kwargs.items()} - )) + ) + +def create_eager_pipe_call(obj, *args, **kwargs) -> Call: + + return Call( + "__call__", + strip_symbolic(obj), + *map(strip_symbolic, args), + **{k: strip_symbolic(v) for k,v in kwargs.items()} + ) + + +def call(__func: "Callable | Call | Symbolic", *args, **kwargs): + """Allow a function call to be used in a call (with >>). + + Parameters + ---------- + __func: + A function to be called as part of a call. + *args: + Additional position arguments to pass to the function. + **kwargs: + Additional keyword arguments to pass to the function. + + Examples + -------- + + The simplest use of the call is passing just the to-be-called function. + + >>> s = "a string" + >>> s >> call(print) + a string + + This is equivalent to explicitly passing ``_`` as a placeholder. + + >>> from siuba import _ + >>> s >> call(print, _) + a string + + The explicit syntax is useful, because it allows us to pass more arguments. + For example, the code below passes additional arguments to print. + + >>> "a" >> call(print, _, "string", sep=" ") + a string + + You can transform the input data. For example, the code below passes "shout".upper() + to print. + + >>> "shout" >> call(print, _.upper()) + SHOUT + + Since ``_`` is just a placeholder for the data on the left-hand-side of >>, you + can pass it multiple times to the to-be-called function (e.g. print). + + >>> "nice" >> call(print, _, _, sep=" ") + nice nice + + Alternatively, you can pass a siu expression straight to call. + + >>> "abc" >> call(_[0].upper()) + 'A' + + """ + + if isinstance(__func, (Symbolic, Call)): + if args or kwargs: + raise NotImplementedError( + "If a siu expression (e.g. _) is the first argument to call, it must " + "be the only argument. You can pass arguments using the form, " + "call(_.some_method(1, 2, c = 3))." + ) + return strip_symbolic(__func) + if not args and not kwargs: + # handle implicit case, call(some_func) -> call(some_func, _) + return create_eager_pipe_call(__func, MetaArg("_")) + + return create_eager_pipe_call(__func, *args, **kwargs) + + +def pipe(__data, *args: Callable): + """Pipe data through a chain of callables. Return the final result. + + Examples + -------- + + Case 1: pipe regular functions + + >>> pipe({"a": 1}, lambda x: x["a"], lambda x: x + 1) + 2 + + Case 2: pipe to siu expressions + + >>> from siuba import _ + >>> pipe({"a": 1}, _["a"], _ + 1) + 2 + + Case 3: call external function on siu expression + + >>> from siuba.siu import call + >>> pipe({"a": 1}, call(isinstance, _["a"], int)) + True + + Case 4: _ as first arg to delay + + >>> f = pipe(_, lambda x: x["a"]) + >>> f + PipeCall(...) + + >>> f({"a": 1}) + 1 + + Example: using with verb + + >>> from siuba import _, summarize + >>> from siuba.data import mtcars + >>> pipe(mtcars, summarize(res = _.hp.mean())) + res + 0 146.6875 + """ + + stripped = strip_symbolic(__data) + + # Special case: support backwards compatibility with old pipe() behavior ---- + # call() and Call.__rrshift__ now handle this behavior. + if len(args) == 0: + if isinstance(stripped, Call): + return stripped + else: + return call(stripped) + + + # When data is _, return a pipe call + pipe_call = PipeCall(stripped, *map(strip_symbolic, args)) + + if isinstance(stripped, MetaArg): + return pipe_call + return pipe_call() diff --git a/siuba/siu/symbolic.py b/siuba/siu/symbolic.py index 483c0086..4a710d6e 100644 --- a/siuba/siu/symbolic.py +++ b/siuba/siu/symbolic.py @@ -1,11 +1,30 @@ from functools import singledispatch -from .calls import BINARY_OPS, UNARY_OPS, Call, BinaryOp, BinaryRightOp, MetaArg, UnaryOp, SliceOp, FuncArg +from .calls import Call, BinaryOp, BinaryRightOp, MetaArg, UnaryOp, SliceOp, FuncArg from .format import Formatter # Symbolic # ============================================================================= +def create_binary_op(op_name, left_op = True): + def _binary_op(self, x): + if left_op: + node = BinaryOp(op_name, strip_symbolic(self), strip_symbolic(x)) + else: + node = BinaryRightOp(op_name, strip_symbolic(self), strip_symbolic(x)) + + return self.__class__(node, ready_to_call = True) + return _binary_op + +def create_unary_op(op_name): + def _unary_op(self): + node = UnaryOp(op_name, strip_symbolic(self)) + + return self.__class__(node, ready_to_call = True) + + return _unary_op + + class Symbolic(object): def __init__(self, source = None, ready_to_call = False): self.__source = MetaArg("_") if source is None else source @@ -18,6 +37,9 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def __array_function__(self, func, types, args, kwargs): return array_function(self, func, types, *args, **kwargs) + # since we override __eq__, we must explicitly set the hash method back to default + __hash__ = object.__hash__ + # allowed methods ---- def __getattr__(self, x): @@ -56,6 +78,24 @@ def __invert__(self): def __op_invert(self): return Symbolic(UnaryOp('__invert__', self.__source), ready_to_call = True) + def __rshift__(self, x): + + # Note that this and __rrshift__ are copied from Call + stripped = strip_symbolic(x) + + if isinstance(stripped, Call): + lhs_call = self.__source + return Call._construct_pipe(MetaArg("_"), lhs_call, stripped) + # strip_symbolic(self)(x) + # x is a symbolic + raise NotImplementedError("Symbolic may only be used on right-hand side of >> operator.") + + def __rrshift__(self, x): + if isinstance(x, (Symbolic, Call)): + raise NotImplementedError() + + return strip_symbolic(self)(x) + # banned methods ---- @@ -72,11 +112,54 @@ def __bool__(self): def __repr__(self): return Formatter().format(self.__source) - -def create_sym_call(source, *args, **kwargs): + # unary operators ---- + # note that __invert__ is handled in a custom way above + __neg__ = create_unary_op("__neg__") + __pos__ = create_unary_op("__pos__") + __abs__ = create_unary_op("__abs__") + + + # binary operators ---- + __add__ = create_binary_op("__add__") + __sub__ = create_binary_op("__sub__") + __mul__ = create_binary_op("__mul__") + __matmul__ = create_binary_op("__matmul__") + __truediv__ = create_binary_op("__truediv__") + __floordiv__ = create_binary_op("__floordiv__") + __mod__ = create_binary_op("__mod__") + __divmod__ = create_binary_op("__divmod__") + __pow__ = create_binary_op("__pow__") + __lshift__ = create_binary_op("__lshift__") + __and__ = create_binary_op("__and__") + __xor__ = create_binary_op("__xor__") + __or__ = create_binary_op("__or__") + __gt__ = create_binary_op("__gt__") + __lt__ = create_binary_op("__lt__") + __eq__ = create_binary_op("__eq__") + __ne__ = create_binary_op("__ne__") + __ge__ = create_binary_op("__ge__") + __le__ = create_binary_op("__le__") + + + __radd__ = create_binary_op("__radd__", False) + __rsub__ = create_binary_op("__rsub__", False) + __rmul__ = create_binary_op("__rmul__", False) + __rmatmul__ = create_binary_op("__rmatmul__", False) + __rtruediv__ = create_binary_op("__rtruediv__", False) + __rfloordiv__ = create_binary_op("__rfloordiv__", False) + __rmod__ = create_binary_op("__rmod__", False) + __rdivmod__ = create_binary_op("__rdivmod__", False) + __rpow__ = create_binary_op("__rpow__", False) + __rlshift__ = create_binary_op("__rlshift__", False) + __rand__ = create_binary_op("__rand__", False) + __rxor__ = create_binary_op("__rxor__", False) + __ror__ = create_binary_op("__ror__", False) + + +def create_sym_call(__source, *args, **kwargs): return Symbolic(Call( "__call__", - strip_symbolic(source), + strip_symbolic(__source), *map(strip_symbolic, args), **{k: strip_symbolic(v) for k,v in kwargs.items()} ), @@ -174,32 +257,3 @@ def _array_ufunc_sym(self, ufunc, method, *inputs, **kwargs): # Do some gnarly method setting on Symbolic ----------------------------------- # ============================================================================= -def create_binary_op(op_name, left_op = True): - def _binary_op(self, x): - if left_op: - node = BinaryOp(op_name, strip_symbolic(self), strip_symbolic(x)) - else: - node = BinaryRightOp(op_name, strip_symbolic(self), strip_symbolic(x)) - - return Symbolic(node, ready_to_call = True) - return _binary_op - -def create_unary_op(op_name): - def _unary_op(self): - node = UnaryOp(op_name, strip_symbolic(self)) - - return Symbolic(node, ready_to_call = True) - - return _unary_op - -for k, v in BINARY_OPS.items(): - if k in {"__getattr__", "__getitem__"}: continue - rop = k.replace("__", "__r", 1) - setattr(Symbolic, k, create_binary_op(k)) - setattr(Symbolic, rop, create_binary_op(rop, left_op = False)) - -for k, v in UNARY_OPS.items(): - if k != "__invert__": - setattr(Symbolic, k, create_unary_op(k)) - - diff --git a/siuba/tests/test_siu.py b/siuba/tests/test_siu.py index 01a1408f..e5dfab74 100644 --- a/siuba/tests/test_siu.py +++ b/siuba/tests/test_siu.py @@ -13,7 +13,7 @@ "%", #"_ ** _", #TODO: uses different formatting "<<", - ">>", + #">>", "&", "^", "|" diff --git a/siuba/tests/test_siu_dispatchers.py b/siuba/tests/test_siu_dispatchers.py new file mode 100644 index 00000000..f2aa1ad7 --- /dev/null +++ b/siuba/tests/test_siu_dispatchers.py @@ -0,0 +1,45 @@ +import pytest + +from siuba.siu.dispatchers import call +from siuba.siu import _ + +# TODO: direct test of lazy elements +# TODO: NSECall - no map subcalls + +def test_siu_call_no_args(): + assert 1 >> call(range) == range(1) + +def test_siu_call_no_args_explicit(): + assert 1 >> call(range, _) == range(1) + +def test_siu_call_pos_arg(): + assert 1 >> call(range, _, 2) == range(1, 2) + + +def test_siu_call_kwarg(): + assert "," >> call("a,b,c".split, _, maxsplit=1) == ["a", "b,c"] + + +def test_siu_call_onlykwargs(): + def f(*, x): + return x + + res = 1 >> call(f, x = _) + assert res == 1 + + +def test_siu_call_arg_kwarg(): + assert 1 >> call("{0}_{1}_{b}".format, _, 2, b=3) == "1_2_3" + + +def test_siu_call_underscore_arg(): + assert 1 >> call(range, 2, 3, _) == range(2, 3, 1) + + +def test_siu_call_underscore_method(): + assert "a,b" >> call(_.split(",")) == ["a", "b"] + + +def test_siu_call_underscore_method_args(): + with pytest.raises(NotImplementedError): + "a,b" >> call(_.split, _, ",") diff --git a/siuba/tests/test_verb_utils.py b/siuba/tests/test_verb_utils.py index 4059afcd..a048bfad 100644 --- a/siuba/tests/test_verb_utils.py +++ b/siuba/tests/test_verb_utils.py @@ -1,6 +1,6 @@ from siuba.siu import Symbolic from siuba.sql.verbs import collect, show_query, LazyTbl -from siuba.dply.verbs import Pipeable +from siuba.dply.verbs import Call from .helpers import data_frame import pandas as pd @@ -15,18 +15,9 @@ def df(backend): def test_show_query(df): assert isinstance(show_query(df), df.__class__) assert isinstance(df >> show_query(), df.__class__) - assert isinstance(show_query(), Pipeable) + assert isinstance(show_query(), Call) def test_collect(df): assert isinstance(collect(df), pd.DataFrame) assert isinstance(df >> collect(), pd.DataFrame) - assert isinstance(collect(), Pipeable) - - -# Pipeable -------------------------------------------------------------------- - -def test_pipe_symbolic_attr(): - class A: a = 1 - - assert Pipeable(_.a)(A) == 1 - + assert isinstance(collect(), Call)