Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed clearmetadata preprocessor to remove all metadata by default #1314

Merged
merged 2 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 68 additions & 5 deletions nbconvert/preprocessors/clearmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,83 @@
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.

from traitlets import Set
from traitlets import Bool, Set
from .base import Preprocessor

class ClearMetadataPreprocessor(Preprocessor):
"""
Removes all the metadata from all code cells in a notebook.
"""

clear_cell_metadata = Bool(True,
help=("Flag to choose if cell metadata is to be cleared "
"in addition to notebook metadata.")).tag(config=True)
clear_notebook_metadata = Bool(True,
MSeal marked this conversation as resolved.
Show resolved Hide resolved
help=("Flag to choose if notebook metadata is to be cleared "
"in addition to cell metadata.")).tag(config=True)
preserve_nb_metadata_mask = Set([('language_info', 'name')],
help=("Indicates the key paths to preserve when deleting metadata "
"across both cells and notebook metadata fields. Tuples of "
"keys can be passed to preserved specific nested values")).tag(config=True)
preserve_cell_metadata_mask = Set(
help=("Indicates the key paths to preserve when deleting metadata "
"across both cells and notebook metadata fields. Tuples of "
"keys can be passed to preserved specific nested values")).tag(config=True)

def current_key(self, mask_key):
if isinstance(mask_key, str):
return mask_key
elif len(mask_key) == 0:
# Safeguard
return None
else:
return mask_key[0]

def current_mask(self, mask):
return { self.current_key(k) for k in mask if self.current_key(k) is not None }

def nested_masks(self, mask):
return { self.current_key(k[0]): k[1:] for k in mask if k and not isinstance(k, str) and len(k) > 1 }

def nested_filter(self, items, mask):
keep_current = self.current_mask(mask)
keep_nested_lookup = self.nested_masks(mask)
for k, v in items:
keep_nested = keep_nested_lookup.get(k)
if k in keep_current:
if keep_nested is not None:
if isinstance(v, dict):
yield k, dict(self.nested_filter(v.items(), keep_nested))
else:
yield k, v

def preprocess_cell(self, cell, resources, cell_index):
"""
All the code cells are returned with an empty metadata field.
"""
if cell.cell_type == 'code':
# Remove metadata
if 'metadata' in cell:
cell.metadata = {}
if self.clear_cell_metadata:
if cell.cell_type == 'code':
# Remove metadata
if 'metadata' in cell:
cell.metadata = dict(self.nested_filter(cell.metadata.items(), self.preserve_cell_metadata_mask))
return cell, resources

def preprocess(self, nb, resources):
"""
Preprocessing to apply on each notebook.

Must return modified nb, resources.

Parameters
----------
nb : NotebookNode
Notebook being converted
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
"""
nb, resources = super().preprocess(nb, resources)
if self.clear_notebook_metadata:
if 'metadata' in nb:
nb.metadata = dict(self.nested_filter(nb.metadata.items(), self.preserve_nb_metadata_mask))
return nb, resources
97 changes: 94 additions & 3 deletions nbconvert/preprocessors/tests/test_clearmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,120 @@ class TestClearMetadata(PreprocessorTestsBase):

def build_notebook(self):
notebook = super().build_notebook()
notebook.metadata = {
'language_info': {'name': 'python', 'version': '3.6.7'},
'kernelspec': {'language': 'python', 'name': 'python3'}
}
# Add a test field to the first cell
if 'metadata' not in notebook.cells[0]:
notebook.cells[0].metadata = {}
notebook.cells[0].metadata['test_field'] = 'test_value'
notebook.cells[0].metadata['test_nested'] = { 'test_keep': 'keep', 'test_filtered': 'filter' }
notebook.cells[0].metadata['executeTime'] = dict([('end_time', '09:31:50'),
('start_time', '09:31:49')])
return notebook

def build_preprocessor(self):
def build_preprocessor(self, **kwargs):
"""Make an instance of a preprocessor"""
preprocessor = ClearMetadataPreprocessor()
preprocessor = ClearMetadataPreprocessor(**kwargs)
preprocessor.enabled = True
return preprocessor

def test_constructor(self):
"""Can a ClearMetadataPreprocessor be constructed?"""
self.build_preprocessor()

def test_output(self):
def test_default_output(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor()
nb, res = preprocessor(nb, res)

assert not nb.cells[0].metadata
# By default we only perserve the langauge name
assert nb.metadata == {'language_info': {'name': 'python'}}

def test_cell_only(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(clear_notebook_metadata=False)
nb, res = preprocessor(nb, res)

assert not nb.cells[0].metadata
assert nb.metadata

def test_notebook_only(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(clear_cell_metadata=False, preserve_nb_metadata_mask=set())
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata
assert not nb.metadata

def test_selective_cell_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=['test_field'],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_field': 'test_value' }
assert not nb.metadata

def test_selective_cell_tuple_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
# Ensure that a tuple length 1 works as well as a string key
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=[('test_field',)],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_field': 'test_value' }
assert not nb.metadata

def test_nested_cell_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=[('test_nested', 'test_keep')],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_nested': { 'test_keep': 'keep' } }
assert not nb.metadata

def test_nested_cell_tuple_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
# Ensure that a tuple length 1 works as well as a string key
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=[('test_nested', ('test_keep',))],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_nested': { 'test_keep': 'keep' } }
assert not nb.metadata

def test_selective_notebook_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(preserve_nb_metadata_mask=['kernelspec'])
nb, res = preprocessor(nb, res)

assert not nb.cells[0].metadata
assert nb.metadata == { 'kernelspec': { 'language': 'python', 'name': 'python3' } }