Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add remove regex default to TemplateExporter, don't remove cells with outputs but no source #616

Merged
merged 7 commits into from
Jul 11, 2017
13 changes: 11 additions & 2 deletions nbconvert/exporters/templateexporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import json

from traitlets import HasTraits, Unicode, List, Dict, Bool, default, observe
from traitlets.config import Config
from traitlets.utils.importstring import import_item
from ipython_genutils import py3compat
from jinja2 import (
Expand All @@ -21,7 +22,7 @@
from nbconvert import filters
from .exporter import Exporter

#Jinja2 extensions to load.
# Jinja2 extensions to load.
JINJA_EXTENSIONS = ['jinja2.ext.loopcontrols']

default_filters = {
Expand Down Expand Up @@ -68,7 +69,6 @@ class ExtensionTolerantLoader(BaseLoader):
def __init__(self, loader, extension):
self.loader = loader
self.extension = extension


def get_source(self, environment, template):
try:
Expand Down Expand Up @@ -123,6 +123,15 @@ def environment(self):
self._environment_cached = self._create_environment()
return self._environment_cached

@property
def default_config(self):
c = Config({
'RegexRemovePreprocessor':{
'enabled': True
}
})
c.merge(super(TemplateExporter, self).default_config)
return c

template_file = Unicode(
help="Name of the template file to use"
Expand Down
157 changes: 83 additions & 74 deletions nbconvert/exporters/tests/files/prompt_numbers.ipynb
Original file line number Diff line number Diff line change
@@ -1,81 +1,90 @@
{
"metadata": {
"name": "notebook2"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
"cells": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"evs = np.zeros(100)",
"evs.shape"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"(100,)"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": null
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": "*"
},
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 10,
"metadata": {},
"outputs": [],
"prompt_number": 0
"output_type": "execute_result"
}
],
"metadata": {}
"source": [
"evs = np.zeros(100)\n",
"evs.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
" "
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
]
}
},
"nbformat": 4,
"nbformat_minor": 1
}
2 changes: 1 addition & 1 deletion nbconvert/exporters/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_prompt_number(self):
in_regex = r"In \[(.*)\]:"
out_regex = r"Out\[(.*)\]:"

ins = ["2", "10", " ", " ", "*", "0"]
ins = ["2", "10", " ", " ", "0"]
outs = ["10"]

assert re.findall(in_regex, output) == ins
Expand Down
2 changes: 1 addition & 1 deletion nbconvert/exporters/tests/test_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_prompt_number_color(self):
in_regex = r"In \[\{\\color\{incolor\}(.*)\}\]:"
out_regex = r"Out\[\{\\color\{outcolor\}(.*)\}\]:"

ins = ["2", "10", " ", " ", "*", "0"]
ins = ["2", "10", " ", " ", "0"]
outs = ["10"]

assert re.findall(in_regex, output) == ins
Expand Down
31 changes: 23 additions & 8 deletions nbconvert/preprocessors/regexremove.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from traitlets import List, Unicode
from .base import Preprocessor


class RegexRemovePreprocessor(Preprocessor):
"""
Removes cells from a notebook that match one or more regular expression.
Expand Down Expand Up @@ -39,19 +40,33 @@ class RegexRemovePreprocessor(Preprocessor):

patterns = List(Unicode, default_value=[r'\Z']).tag(config=True)

def preprocess(self, nb, resources):
def check_conditions(self, cell):
"""
Preprocessing to apply to each notebook. See base.py for details.
Checks that a cell matches the pattern and that (if a code cell)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment here doesn't quite match the code below: The function checks that the cell does not match the pattern or is a code cell with output.

As an aside, are there any other cell types that can have output? We might just be able to check not pattern.match(cell.source) or cell.get('outputs'). Then cell.get('outputs') will evaluate to false if the key doesn't exist or the list is empty.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No other cell types have outputs, so we should be in the clear.

it does not have any outputs.

Returns: Boolean.
True means cell should *not* be removed.
"""
# Skip preprocessing if the list of patterns is empty
if not self.patterns:
return nb, resources

# Compile all the patterns into one: each pattern is first wrapped
# by a non-capturing group to ensure the correct order of precedence
# and the patterns are joined with a logical or
pattern = re.compile('|'.join('(?:%s)' % pattern
for pattern in self.patterns))
# Filter out cells that match any of the patterns
nb.cells = [cell for cell in nb.cells
if not pattern.match(cell.source)]

# Filter out cells that meet the pattern and have no outputs
return cell.get('outputs') or not pattern.match(cell.source)

def preprocess(self, nb, resources):
"""
Preprocessing to apply to each notebook. See base.py for details.
"""
# Skip preprocessing if the list of patterns is empty
if not self.patterns:
return nb, resources

# Filter out cells that meet the conditions
nb.cells = [cell for cell in nb.cells if self.check_conditions(cell)]

return nb, resources
26 changes: 24 additions & 2 deletions nbconvert/preprocessors/tests/test_regexremove.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Distributed under the terms of the Modified BSD License.

import re
from nbformat import v4 as nbformat
from nbformat import v4 as nbformat, from_dict

from .base import PreprocessorTestsBase
from ..regexremove import RegexRemovePreprocessor
Expand Down Expand Up @@ -46,7 +46,7 @@ def test_output(self):
expected_cell_count = {
'default': 5, # only strictly empty cells
'disallow_whitespace': 2, # all "empty" cells are removed
'disallow_tab_newline': 3, # all "empty" cells but the single space
'disallow_tab_newline': 3, # all "empty" cells but the single space
'none': 6,
}
for method in ['default', 'disallow_whitespace', 'disallow_tab_newline', 'none']:
Expand All @@ -68,3 +68,25 @@ def test_output(self):
for cell in nb.cells:
for pattern in patterns:
self.assertFalse(pattern.match(cell.source))

def test_nosource_with_output(self):
"""
Test that the check_conditions returns true when given a code-cell
that has non-empty outputs but no source.
"""

cell = {
'cell_type': 'code',
'execution_count': 2,
'metadata': {},
'outputs': [{
'name': 'stdout',
'output_type': 'stream',
'text': 'I exist.\n'
}],
'source': ''
}
preprocessor = self.build_preprocessor()
node = from_dict(cell)
assert preprocessor.check_conditions(node)