feat: implement Output widget that mimics a frontend

This is a port of voila-dashboards/voila#91 and subsequent fixes.
maartenbreddels · May 26, 2020 · 2d88880 · 2d88880
1 parent 6510bd9
commit 2d88880
Show file tree

Hide file tree

Showing 4 changed files with 1,117 additions and 1 deletion.
diff --git a/nbclient/client.py b/nbclient/client.py
@@ -1,5 +1,6 @@
-import datetime
 import base64
+import collections
+import datetime
 from textwrap import dedent
 
 from async_generator import asynccontextmanager
@@ -22,6 +23,7 @@
     CellExecutionError
 )
 from .util import run_sync, ensure_async
+from .output_widget import OutputWidget
 
 
 def timestamp():
@@ -307,6 +309,11 @@ def reset_execution_trackers(self):
         self._display_id_map = {}
         self.widget_state = {}
         self.widget_buffers = {}
+        # maps to list of hooks, where the last is used, this is used
+        # to support nested use of output widgets.
+        self.output_hook_stack = collections.defaultdict(list)
+        # our front-end mimicing Output widgets
+        self.output_widget_objects = {}
 
     def start_kernel_manager(self):
         """Creates a new kernel manager.
@@ -787,6 +794,14 @@ def process_message(self, msg, cell, cell_index):
     def output(self, outs, msg, display_id, cell_index):
         msg_type = msg['msg_type']
 
+        parent_msg_id = msg['parent_header'].get('msg_id')
+        if self.output_hook_stack[parent_msg_id]:
+            # if we have a hook registered, it will overrride our
+            # default output behaviour (e.g. OutputWidget)
+            hook = self.output_hook_stack[parent_msg_id][-1]
+            hook.output(outs, msg, display_id, cell_index)
+            return
+
         try:
             out = output_from_msg(msg)
         except ValueError:
@@ -812,6 +827,15 @@ def output(self, outs, msg, display_id, cell_index):
 
     def clear_output(self, outs, msg, cell_index):
         content = msg['content']
+
+        parent_msg_id = msg['parent_header'].get('msg_id')
+        if self.output_hook_stack[parent_msg_id]:
+            # if we have a hook registered, it will overrride our
+            # default clear_output behaviour (e.g. OutputWidget)
+            hook = self.output_hook_stack[parent_msg_id][-1]
+            hook.clear_output(outs, msg, cell_index)
+            return
+
         if content.get('wait'):
             self.log.debug('Wait to clear output')
             self.clear_before_next_output = True
@@ -832,6 +856,24 @@ def handle_comm_msg(self, outs, msg, cell_index):
             self.widget_state.setdefault(content['comm_id'], {}).update(data['state'])
             if 'buffer_paths' in data and data['buffer_paths']:
                 self.widget_buffers[content['comm_id']] = self._get_buffer_data(msg)
+        # There are cases where we need to mimic a frontend, to get similar behaviour as
+        # when using the Output widget from Jupyter lab/notebook
+        if msg['msg_type'] == 'comm_open' and msg['content'].get('target_name') == 'jupyter.widget':
+            content = msg['content']
+            data = content['data']
+            state = data['state']
+            comm_id = msg['content']['comm_id']
+            if state['_model_module'] == '@jupyter-widgets/output' and\
+               state['_model_name'] == 'OutputModel':
+                self.output_widget_objects[comm_id] = OutputWidget(comm_id, state, self.kc, self)
+        elif msg['msg_type'] == 'comm_msg':
+            content = msg['content']
+            data = content['data']
+            if 'state' in data:
+                state = data['state']
+                comm_id = msg['content']['comm_id']
+                if comm_id in self.output_widget_objects:
+                    self.output_widget_objects[comm_id].set_state(state)
 
     def _serialize_widget_state(self, state):
         """Serialize a widget state, following format in @jupyter-widgets/schema."""
@@ -856,6 +898,22 @@ def _get_buffer_data(self, msg):
             )
         return encoded_buffers
 
+    def register_output_hook(self, msg_id, hook):
+        """Registers an override object that handles output/clear_output instead.
+
+        Multiple hooks can be registered, where the last one will be used (stack based)
+        """
+        # mimics
+        # https://jupyterlab.github.io/jupyterlab/services/interfaces/kernel.ikernelconnection.html#registermessagehook
+        self.output_hook_stack[msg_id].append(hook)
+
+    def remove_output_hook(self, msg_id, hook):
+        """Unregisters an override object that handles output/clear_output instead"""
+        # mimics
+        # https://jupyterlab.github.io/jupyterlab/services/interfaces/kernel.ikernelconnection.html#removemessagehook
+        removed_hook = self.output_hook_stack[msg_id].pop()
+        assert removed_hook == hook
+
 
 def execute(nb, cwd=None, km=None, **kwargs):
     """Execute a notebook's code, updating outputs within the notebook object.

diff --git a/nbclient/jsonutil.py b/nbclient/jsonutil.py
@@ -0,0 +1,205 @@
+"""Utilities to manipulate JSON objects."""
+
+# NOTE: this is a copy of ipykernel/jsonutils.py (+blackified)
+
+# Copyright (c) IPython Development Team.
+# Distributed under the terms of the Modified BSD License.
+
+from binascii import b2a_base64
+import math
+import re
+import types
+from datetime import datetime
+import numbers
+
+
+from ipython_genutils import py3compat
+from ipython_genutils.py3compat import unicode_type, iteritems
+
+next_attr_name = '__next__' if py3compat.PY3 else 'next'
+
+# -----------------------------------------------------------------------------
+# Globals and constants
+# -----------------------------------------------------------------------------
+
+# timestamp formats
+ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
+ISO8601_PAT = re.compile(
+    r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$"
+)
+
+# holy crap, strptime is not threadsafe.
+# Calling it once at import seems to help.
+datetime.strptime("1", "%d")
+
+# -----------------------------------------------------------------------------
+# Classes and functions
+# -----------------------------------------------------------------------------
+
+
+# constants for identifying png/jpeg data
+PNG = b'\x89PNG\r\n\x1a\n'
+# front of PNG base64-encoded
+PNG64 = b'iVBORw0KG'
+JPEG = b'\xff\xd8'
+# front of JPEG base64-encoded
+JPEG64 = b'/9'
+# constants for identifying gif data
+GIF_64 = b'R0lGODdh'
+GIF89_64 = b'R0lGODlh'
+# front of PDF base64-encoded
+PDF64 = b'JVBER'
+
+
+def encode_images(format_dict):
+    """b64-encodes images in a displaypub format dict
+
+    Perhaps this should be handled in json_clean itself?
+
+    Parameters
+    ----------
+
+    format_dict : dict
+        A dictionary of display data keyed by mime-type
+
+    Returns
+    -------
+
+    format_dict : dict
+        A copy of the same dictionary,
+        but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
+        is base64-encoded.
+
+    """
+
+    # no need for handling of ambiguous bytestrings on Python 3,
+    # where bytes objects always represent binary data and thus
+    # base64-encoded.
+    if py3compat.PY3:
+        return format_dict
+
+    encoded = format_dict.copy()
+
+    pngdata = format_dict.get('image/png')
+    if isinstance(pngdata, bytes):
+        # make sure we don't double-encode
+        if not pngdata.startswith(PNG64):
+            pngdata = b2a_base64(pngdata)
+        encoded['image/png'] = pngdata.decode('ascii')
+
+    jpegdata = format_dict.get('image/jpeg')
+    if isinstance(jpegdata, bytes):
+        # make sure we don't double-encode
+        if not jpegdata.startswith(JPEG64):
+            jpegdata = b2a_base64(jpegdata)
+        encoded['image/jpeg'] = jpegdata.decode('ascii')
+
+    gifdata = format_dict.get('image/gif')
+    if isinstance(gifdata, bytes):
+        # make sure we don't double-encode
+        if not gifdata.startswith((GIF_64, GIF89_64)):
+            gifdata = b2a_base64(gifdata)
+        encoded['image/gif'] = gifdata.decode('ascii')
+
+    pdfdata = format_dict.get('application/pdf')
+    if isinstance(pdfdata, bytes):
+        # make sure we don't double-encode
+        if not pdfdata.startswith(PDF64):
+            pdfdata = b2a_base64(pdfdata)
+        encoded['application/pdf'] = pdfdata.decode('ascii')
+
+    return encoded
+
+
+def json_clean(obj):
+    """Clean an object to ensure it's safe to encode in JSON.
+
+    Atomic, immutable objects are returned unmodified.  Sets and tuples are
+    converted to lists, lists are copied and dicts are also copied.
+
+    Note: dicts whose keys could cause collisions upon encoding (such as a dict
+    with both the number 1 and the string '1' as keys) will cause a ValueError
+    to be raised.
+
+    Parameters
+    ----------
+    obj : any python object
+
+    Returns
+    -------
+    out : object
+
+      A version of the input which will not cause an encoding error when
+      encoded as JSON.  Note that this function does not *encode* its inputs,
+      it simply sanitizes it so that there will be no encoding errors later.
+
+    """
+    # types that are 'atomic' and ok in json as-is.
+    atomic_ok = (unicode_type, type(None))
+
+    # containers that we need to convert into lists
+    container_to_list = (tuple, set, types.GeneratorType)
+
+    # Since bools are a subtype of Integrals, which are a subtype of Reals,
+    # we have to check them in that order.
+
+    if isinstance(obj, bool):
+        return obj
+
+    if isinstance(obj, numbers.Integral):
+        # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
+        return int(obj)
+
+    if isinstance(obj, numbers.Real):
+        # cast out-of-range floats to their reprs
+        if math.isnan(obj) or math.isinf(obj):
+            return repr(obj)
+        return float(obj)
+
+    if isinstance(obj, atomic_ok):
+        return obj
+
+    if isinstance(obj, bytes):
+        if py3compat.PY3:
+            # unanmbiguous binary data is base64-encoded
+            # (this probably should have happened upstream)
+            return b2a_base64(obj).decode('ascii')
+        else:
+            # Python 2 bytestr is ambiguous,
+            # needs special handling for possible binary bytestrings.
+            # imperfect workaround: if ascii, assume text.
+            # otherwise assume binary, base64-encode (py3 behavior).
+            try:
+                return obj.decode('ascii')
+            except UnicodeDecodeError:
+                return b2a_base64(obj).decode('ascii')
+
+    if isinstance(obj, container_to_list) or (
+        hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)
+    ):
+        obj = list(obj)
+
+    if isinstance(obj, list):
+        return [json_clean(x) for x in obj]
+
+    if isinstance(obj, dict):
+        # First, validate that the dict won't lose data in conversion due to
+        # key collisions after stringification.  This can happen with keys like
+        # True and 'true' or 1 and '1', which collide in JSON.
+        nkeys = len(obj)
+        nkeys_collapsed = len(set(map(unicode_type, obj)))
+        if nkeys != nkeys_collapsed:
+            raise ValueError(
+                'dict cannot be safely converted to JSON: '
+                'key collision would lead to dropped values'
+            )
+        # If all OK, proceed by making the new dict that will be json-safe
+        out = {}
+        for k, v in iteritems(obj):
+            out[unicode_type(k)] = json_clean(v)
+        return out
+    if isinstance(obj, datetime):
+        return obj.strftime(ISO8601)
+
+    # we don't understand it, it's probably an unserializable object
+    raise ValueError("Can't clean for JSON: %r" % obj)
diff --git a/nbclient/output_widget.py b/nbclient/output_widget.py
@@ -0,0 +1,77 @@
+from .jsonutil import json_clean
+from nbformat.v4 import output_from_msg
+
+
+class OutputWidget:
+    """This class mimics a front end output widget"""
+    def __init__(self, comm_id, state, kernel_client, executor):
+        self.comm_id = comm_id
+        self.state = state
+        self.kernel_client = kernel_client
+        self.executor = executor
+        self.topic = ('comm-%s' % self.comm_id).encode('ascii')
+        self.outputs = self.state['outputs']
+        self.clear_before_next_output = False
+
+    def clear_output(self, outs, msg, cell_index):
+        self.parent_header = msg['parent_header']
+        content = msg['content']
+        if content.get('wait'):
+            self.clear_before_next_output = True
+        else:
+            self.outputs = []
+            # sync back the state to the kernel
+            self.sync_state()
+            if hasattr(self.executor, 'widget_state'):
+                # sync the state to the nbconvert state as well, since that is used for testing
+                self.executor.widget_state[self.comm_id]['outputs'] = self.outputs
+
+    def sync_state(self):
+        state = {'outputs': self.outputs}
+        msg = {'method': 'update', 'state': state, 'buffer_paths': []}
+        self.send(msg)
+
+    def _publish_msg(self, msg_type, data=None, metadata=None, buffers=None, **keys):
+        """Helper for sending a comm message on IOPub"""
+        data = {} if data is None else data
+        metadata = {} if metadata is None else metadata
+        content = json_clean(dict(data=data, comm_id=self.comm_id, **keys))
+        msg = self.kernel_client.session.msg(msg_type, content=content, parent=self.parent_header,
+                                             metadata=metadata)
+        self.kernel_client.shell_channel.send(msg)
+
+    def send(self, data=None, metadata=None, buffers=None):
+        self._publish_msg('comm_msg', data=data, metadata=metadata, buffers=buffers)
+
+    def output(self, outs, msg, display_id, cell_index):
+        if self.clear_before_next_output:
+            self.outputs = []
+            self.clear_before_next_output = False
+        self.parent_header = msg['parent_header']
+        output = output_from_msg(msg)
+
+        if self.outputs:
+            # try to coalesce/merge output text
+            last_output = self.outputs[-1]
+            if (last_output['output_type'] == 'stream'
+                    and output['output_type'] == 'stream'
+                    and last_output['name'] == output['name']):
+                last_output['text'] += output['text']
+            else:
+                self.outputs.append(output)
+        else:
+            self.outputs.append(output)
+        self.sync_state()
+        if hasattr(self.executor, 'widget_state'):
+            # sync the state to the nbconvert state as well, since that is used for testing
+            self.executor.widget_state[self.comm_id]['outputs'] = self.outputs
+
+    def set_state(self, state):
+        if 'msg_id' in state:
+            msg_id = state.get('msg_id')
+            if msg_id:
+                self.executor.register_output_hook(msg_id, self)
+                self.msg_id = msg_id
+            else:
+                self.executor.remove_output_hook(self.msg_id, self)
+                self.msg_id = msg_id