Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preprocessor to embed markdown images #1067

Merged
merged 13 commits into from
Feb 17, 2018
5 changes: 5 additions & 0 deletions docs/source/exporting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ Converting linked SVG to PDF
.. autoclass:: SVG2PDFPreprocessor


Embedding images in notebooks
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: EmbedImagesPreprocessor


Postprocessors
--------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .pp_highlighter import HighlighterPostProcessor, HighlighterPreprocessor
from .pre_codefolding import CodeFoldingPreprocessor
from .pre_pymarkdown import PyMarkdownPreprocessor
from .pre_embedimages import EmbedImagesPreprocessor
from .pre_svg2pdf import SVG2PDFPreprocessor
from .toc2 import TocExporter

Expand All @@ -22,6 +23,7 @@
'ExporterInliner',
'HighlighterPostProcessor',
'HighlighterPreprocessor',
'EmbedImagesPreprocessor',
'NotebookLangExporter',
'PyMarkdownPreprocessor',
'SVG2PDFPreprocessor',
Expand Down
157 changes: 157 additions & 0 deletions src/jupyter_contrib_nbextensions/nbconvert_support/pre_embedimages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Nbconvert preprocessor for the python-markdown nbextension."""

import base64
import os
import re

from ipython_genutils.ipstruct import Struct
from nbconvert.preprocessors import Preprocessor
from traitlets import Bool, Unicode

try:
from urllib.request import urlopen # py3
except ImportError:
from urllib2 import urlopen


class EmbedImagesPreprocessor(Preprocessor):
"""
:mod:`nbconvert` Preprocessor to embed images in a markdown cell as
attachment inside the notebook itself.

This :class:`~nbconvert.preprocessors.Preprocessor` replaces kernel code in
markdown cells with the results stored in the cell metadata.

The preprocessor is installed by default. To enable embedding images with
NbConvert, you need to set the configuration parameter
`EmbedImagesPreprocessor.embed_images=True`.
This can be done either in the `jupyter_nbconvert_config.py` file::

c.EmbedImagesPreprocessor.embed_images=True

or using a command line parameter when calling NbConvert::

$ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True mynotebook.ipynb

Further options are::

EmbedImagesPreprocessor.embed_remote_images=True

to additionally embeds all images referenced by an url
(e.g. http://jupyter.org/assets/nav_logo.svg) instead of a local file name.

Another configuration option is::

EmbedImagesPreprocessor.resize=small

Let's you scale-down the size of an image. This is useful if you want to
save space by not embedding large images and instead use a smaller (scaled)
version. Works only for raster images (i.e. png, jpg).
Valid resize settings are: small = 500px, mid = 1000px, large = 2000px
for maximum size in length or width. No upscaling of small images will
be performed. The Python package `PIL` needs to be installed for this
option to work.

Example::

$ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True
--EmbedImagesPreprocessor.resize=large mynotebook.ipynb

*Note:* To embed images after conversion to HTML you can also use the
`html_embed` exporter
"""

embed_images = Bool(False, help="Embed images as attachment").tag(config=True)
embed_remote_images = Bool(False, help="Embed images referenced by an url as attachment").tag(config=True)
resize = Unicode('', help="Resize images to save space (reduce size)").tag(config=True)
imgsizes = {'small': 500, 'mid': 1000, 'large': 2000}

def preprocess(self, nb, resources):
"""Skip preprocessor if not enabled"""
if self.embed_images:
nb, resources = super(EmbedImagesPreprocessor, self).preprocess(nb, resources)
return nb, resources

def resize_image(self, imgname, imgformat, imgdata):
"""Resize images if desired and PIL is installed

Parameters
----------
imgname: str
Name of image
imgformat: str
Format of image (JPG or PNG)
imgdata:
Binary image data

"""
if imgformat in ['png', 'jpg']:
from io import BytesIO
try:
from PIL import Image
except ImportError:
self.log.info("Pillow library not available to resize images")
return imgdata
# Only make images smaller when rescaling
im = Image.open(BytesIO(imgdata))
factor = self.imgsizes[self.resize] / max(im.size)
if factor < 1.0:
newsize = (int(im.size[0] * factor), int(im.size[1] * factor))
newim = im.resize(newsize)
fp = BytesIO()
# PIL requires JPEG instead of JPG
newim.save(fp, format=imgformat.replace('jpg', 'jpeg'))
imgdata = fp.getvalue()
fp.close()
self.log.debug("Resized %d x %d image %s to size %d x %d pixels" %
(im.size[0], im.size[1], imgname, newsize[0], newsize[1]))
return imgdata

def replfunc_md(self, match):
"""Read image and store as base64 encoded attachment"""
url = match.group(2)
imgformat = url.split('.')[-1].lower()
if url.startswith('http'):
if self.embed_remote_images:
data = urlopen(url).read()
else:
return match.group(0)
elif url.startswith('attachment'):
return match.group(0)
else:
filename = os.path.join(self.path, url)
with open(filename, 'rb') as f:
data = f.read()

if self.resize in self.imgsizes.keys():
data = self.resize_image(url, imgformat, data)

self.log.debug("Embedding url: %s, format: %s" % (url, imgformat))
b64_data = base64.b64encode(data).decode("utf-8")
self.attachments[url] = {'image/' + imgformat: b64_data}

newimg = '![' + match.group(1) + '](attachment:' + match.group(2) + ')'
return newimg

def preprocess_cell(self, cell, resources, index):
"""
Preprocess cell

Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
index : int
Index of the cell being processed (see base.py)
"""
self.path = resources['metadata']['path']
self.attachments = getattr(cell, 'attachments', Struct())

if cell.cell_type == "markdown":
regex = re.compile('!\[([^"]*)\]\(([^"]+)\)')
cell.source = regex.sub(self.replfunc_md, cell.source)
cell.attachments = self.attachments
return cell, resources
Binary file added tests/data/large_image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
73 changes: 73 additions & 0 deletions tests/test_preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,78 @@ def test_preprocessor_svg2pdf():
'exported pdf should be referenced in exported notebook')


def test_preprocessor_embedimages():
"""Test python embedimages preprocessor."""
# check import shortcut
from jupyter_contrib_nbextensions.nbconvert_support import EmbedImagesPreprocessor # noqa E501
notebook_node = nbf.new_notebook(cells=[
nbf.new_code_cell(source="a = 'world'"),
nbf.new_markdown_cell(
source="![testimage]({})".format(path_in_data('icon.png'))
),
])
customconfig = Config(EmbedImagesPreprocessor={'embed_images': True})
body, resources = export_through_preprocessor(
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
customconfig)

expected = 'image/png'
assert_in(expected, body, 'Attachment {} is missing'.format(expected))


def test_preprocessor_embedimages_resize():
"""Test python embedimages preprocessor."""
# check import shortcut
from jupyter_contrib_nbextensions.nbconvert_support import EmbedImagesPreprocessor # noqa E501

try:
from PIL import Image # noqa F401
except ImportError:
raise SkipTest('PIL not found')

notebook_node = nbf.new_notebook(cells=[
nbf.new_code_cell(source="a = 'world'"),
nbf.new_markdown_cell(
source="![testimage]({})".format(path_in_data('large_image.png'))
),
])
body, resources = export_through_preprocessor(
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb')
len_noembed = len(body)

customconfig = Config(EmbedImagesPreprocessor={'embed_images': True,
'resize': 'small'})
body, resources = export_through_preprocessor(
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
customconfig)
len_small = len(body)

customconfig = Config(EmbedImagesPreprocessor={'embed_images': True,
'resize': 'mid'})
body, resources = export_through_preprocessor(
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
customconfig)
len_mid = len(body)

customconfig = Config(EmbedImagesPreprocessor={'embed_images': True,
'resize': 'large'})
body, resources = export_through_preprocessor(
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
customconfig)
len_large = len(body)

customconfig = Config(EmbedImagesPreprocessor={'embed_images': True})
body, resources = export_through_preprocessor(
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
customconfig)
len_noresize = len(body)

assert(len_noembed < len_small)
assert(len_small < len_mid)
assert(len_mid < len_large)
assert(len_large < len_noresize)


def _normalize_iso8601_timezone(timestamp_str):
# Zulu -> +00:00 offset
timestamp_str = re.sub(r'Z$', r'+00:00', timestamp_str)
Expand Down Expand Up @@ -146,3 +218,4 @@ def test_preprocessor_execute_time():
_normalize_iso8601_timezone(etmd['end_time']),
_normalize_iso8601_timezone(etmd['start_time']),
'end_time should not be before start_time')