diff --git a/docs/source/exporting.rst b/docs/source/exporting.rst index dc1cb993b..cdfcf327a 100644 --- a/docs/source/exporting.rst +++ b/docs/source/exporting.rst @@ -51,6 +51,11 @@ Converting linked SVG to PDF .. autoclass:: SVG2PDFPreprocessor +Embedding images in notebooks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: EmbedImagesPreprocessor + Postprocessors -------------- diff --git a/src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py b/src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py index 486976d3f..ef874ada7 100644 --- a/src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py +++ b/src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py @@ -10,6 +10,7 @@ from .pp_highlighter import HighlighterPostProcessor, HighlighterPreprocessor from .pre_codefolding import CodeFoldingPreprocessor from .pre_pymarkdown import PyMarkdownPreprocessor +from .pre_embedimages import EmbedImagesPreprocessor from .pre_svg2pdf import SVG2PDFPreprocessor from .toc2 import TocExporter @@ -22,6 +23,7 @@ 'ExporterInliner', 'HighlighterPostProcessor', 'HighlighterPreprocessor', + 'EmbedImagesPreprocessor', 'NotebookLangExporter', 'PyMarkdownPreprocessor', 'SVG2PDFPreprocessor', diff --git a/src/jupyter_contrib_nbextensions/nbconvert_support/pre_embedimages.py b/src/jupyter_contrib_nbextensions/nbconvert_support/pre_embedimages.py new file mode 100644 index 000000000..1388e20e7 --- /dev/null +++ b/src/jupyter_contrib_nbextensions/nbconvert_support/pre_embedimages.py @@ -0,0 +1,157 @@ +"""Nbconvert preprocessor for the python-markdown nbextension.""" + +import base64 +import os +import re + +from ipython_genutils.ipstruct import Struct +from nbconvert.preprocessors import Preprocessor +from traitlets import Bool, Unicode + +try: + from urllib.request import urlopen # py3 +except ImportError: + from urllib2 import urlopen + + +class EmbedImagesPreprocessor(Preprocessor): + """ + :mod:`nbconvert` Preprocessor to embed images in a markdown cell as + attachment inside the notebook itself. + + This :class:`~nbconvert.preprocessors.Preprocessor` replaces kernel code in + markdown cells with the results stored in the cell metadata. + + The preprocessor is installed by default. To enable embedding images with + NbConvert, you need to set the configuration parameter + `EmbedImagesPreprocessor.embed_images=True`. + This can be done either in the `jupyter_nbconvert_config.py` file:: + + c.EmbedImagesPreprocessor.embed_images=True + + or using a command line parameter when calling NbConvert:: + + $ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True mynotebook.ipynb + + Further options are:: + + EmbedImagesPreprocessor.embed_remote_images=True + + to additionally embeds all images referenced by an url + (e.g. http://jupyter.org/assets/nav_logo.svg) instead of a local file name. + + Another configuration option is:: + + EmbedImagesPreprocessor.resize=small + + Let's you scale-down the size of an image. This is useful if you want to + save space by not embedding large images and instead use a smaller (scaled) + version. Works only for raster images (i.e. png, jpg). + Valid resize settings are: small = 500px, mid = 1000px, large = 2000px + for maximum size in length or width. No upscaling of small images will + be performed. The Python package `PIL` needs to be installed for this + option to work. + + Example:: + + $ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True + --EmbedImagesPreprocessor.resize=large mynotebook.ipynb + + *Note:* To embed images after conversion to HTML you can also use the + `html_embed` exporter + """ + + embed_images = Bool(False, help="Embed images as attachment").tag(config=True) + embed_remote_images = Bool(False, help="Embed images referenced by an url as attachment").tag(config=True) + resize = Unicode('', help="Resize images to save space (reduce size)").tag(config=True) + imgsizes = {'small': 500, 'mid': 1000, 'large': 2000} + + def preprocess(self, nb, resources): + """Skip preprocessor if not enabled""" + if self.embed_images: + nb, resources = super(EmbedImagesPreprocessor, self).preprocess(nb, resources) + return nb, resources + + def resize_image(self, imgname, imgformat, imgdata): + """Resize images if desired and PIL is installed + + Parameters + ---------- + imgname: str + Name of image + imgformat: str + Format of image (JPG or PNG) + imgdata: + Binary image data + + """ + if imgformat in ['png', 'jpg']: + from io import BytesIO + try: + from PIL import Image + except ImportError: + self.log.info("Pillow library not available to resize images") + return imgdata + # Only make images smaller when rescaling + im = Image.open(BytesIO(imgdata)) + factor = self.imgsizes[self.resize] / max(im.size) + if factor < 1.0: + newsize = (int(im.size[0] * factor), int(im.size[1] * factor)) + newim = im.resize(newsize) + fp = BytesIO() + # PIL requires JPEG instead of JPG + newim.save(fp, format=imgformat.replace('jpg', 'jpeg')) + imgdata = fp.getvalue() + fp.close() + self.log.debug("Resized %d x %d image %s to size %d x %d pixels" % + (im.size[0], im.size[1], imgname, newsize[0], newsize[1])) + return imgdata + + def replfunc_md(self, match): + """Read image and store as base64 encoded attachment""" + url = match.group(2) + imgformat = url.split('.')[-1].lower() + if url.startswith('http'): + if self.embed_remote_images: + data = urlopen(url).read() + else: + return match.group(0) + elif url.startswith('attachment'): + return match.group(0) + else: + filename = os.path.join(self.path, url) + with open(filename, 'rb') as f: + data = f.read() + + if self.resize in self.imgsizes.keys(): + data = self.resize_image(url, imgformat, data) + + self.log.debug("Embedding url: %s, format: %s" % (url, imgformat)) + b64_data = base64.b64encode(data).decode("utf-8") + self.attachments[url] = {'image/' + imgformat: b64_data} + + newimg = '![' + match.group(1) + '](attachment:' + match.group(2) + ')' + return newimg + + def preprocess_cell(self, cell, resources, index): + """ + Preprocess cell + + Parameters + ---------- + cell : NotebookNode cell + Notebook cell being processed + resources : dictionary + Additional resources used in the conversion process. Allows + preprocessors to pass variables into the Jinja engine. + index : int + Index of the cell being processed (see base.py) + """ + self.path = resources['metadata']['path'] + self.attachments = getattr(cell, 'attachments', Struct()) + + if cell.cell_type == "markdown": + regex = re.compile('!\[([^"]*)\]\(([^"]+)\)') + cell.source = regex.sub(self.replfunc_md, cell.source) + cell.attachments = self.attachments + return cell, resources diff --git a/tests/data/large_image.png b/tests/data/large_image.png new file mode 100644 index 000000000..4c74c1baa Binary files /dev/null and b/tests/data/large_image.png differ diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py index 03f8c8e64..0b05ff9dd 100644 --- a/tests/test_preprocessors.py +++ b/tests/test_preprocessors.py @@ -113,6 +113,78 @@ def test_preprocessor_svg2pdf(): 'exported pdf should be referenced in exported notebook') +def test_preprocessor_embedimages(): + """Test python embedimages preprocessor.""" + # check import shortcut + from jupyter_contrib_nbextensions.nbconvert_support import EmbedImagesPreprocessor # noqa E501 + notebook_node = nbf.new_notebook(cells=[ + nbf.new_code_cell(source="a = 'world'"), + nbf.new_markdown_cell( + source="![testimage]({})".format(path_in_data('icon.png')) + ), + ]) + customconfig = Config(EmbedImagesPreprocessor={'embed_images': True}) + body, resources = export_through_preprocessor( + notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb', + customconfig) + + expected = 'image/png' + assert_in(expected, body, 'Attachment {} is missing'.format(expected)) + + +def test_preprocessor_embedimages_resize(): + """Test python embedimages preprocessor.""" + # check import shortcut + from jupyter_contrib_nbextensions.nbconvert_support import EmbedImagesPreprocessor # noqa E501 + + try: + from PIL import Image # noqa F401 + except ImportError: + raise SkipTest('PIL not found') + + notebook_node = nbf.new_notebook(cells=[ + nbf.new_code_cell(source="a = 'world'"), + nbf.new_markdown_cell( + source="![testimage]({})".format(path_in_data('large_image.png')) + ), + ]) + body, resources = export_through_preprocessor( + notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb') + len_noembed = len(body) + + customconfig = Config(EmbedImagesPreprocessor={'embed_images': True, + 'resize': 'small'}) + body, resources = export_through_preprocessor( + notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb', + customconfig) + len_small = len(body) + + customconfig = Config(EmbedImagesPreprocessor={'embed_images': True, + 'resize': 'mid'}) + body, resources = export_through_preprocessor( + notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb', + customconfig) + len_mid = len(body) + + customconfig = Config(EmbedImagesPreprocessor={'embed_images': True, + 'resize': 'large'}) + body, resources = export_through_preprocessor( + notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb', + customconfig) + len_large = len(body) + + customconfig = Config(EmbedImagesPreprocessor={'embed_images': True}) + body, resources = export_through_preprocessor( + notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb', + customconfig) + len_noresize = len(body) + + assert(len_noembed < len_small) + assert(len_small < len_mid) + assert(len_mid < len_large) + assert(len_large < len_noresize) + + def _normalize_iso8601_timezone(timestamp_str): # Zulu -> +00:00 offset timestamp_str = re.sub(r'Z$', r'+00:00', timestamp_str) @@ -146,3 +218,4 @@ def test_preprocessor_execute_time(): _normalize_iso8601_timezone(etmd['end_time']), _normalize_iso8601_timezone(etmd['start_time']), 'end_time should not be before start_time') +