diff --git a/nbconvert/preprocessors/clearmetadata.py b/nbconvert/preprocessors/clearmetadata.py index 4f690f203..62fe9c5cd 100644 --- a/nbconvert/preprocessors/clearmetadata.py +++ b/nbconvert/preprocessors/clearmetadata.py @@ -11,21 +11,57 @@ class ClearMetadataPreprocessor(Preprocessor): Removes all the metadata from all code cells in a notebook. """ + clear_cell_metadata = Bool(True, + help=("Flag to choose if cell metadata is to be cleared " + "in addition to notebook metadata.")).tag(config=True) clear_notebook_metadata = Bool(True, help=("Flag to choose if notebook metadata is to be cleared " "in addition to cell metadata.")).tag(config=True) - preserve_metadata_keys = Set( - help=("Indicates the keys to preserve when deleting metadata " - "across both cells and notebook metadata fields.")).tag(config=True) + preserve_nb_metadata_mask = Set([('language_info', 'name')], + help=("Indicates the key paths to preserve when deleting metadata " + "across both cells and notebook metadata fields. Tuples of " + "keys can be passed to preserved specific nested values")).tag(config=True) + preserve_cell_metadata_mask = Set( + help=("Indicates the key paths to preserve when deleting metadata " + "across both cells and notebook metadata fields. Tuples of " + "keys can be passed to preserved specific nested values")).tag(config=True) + + def current_key(self, mask_key): + if isinstance(mask_key, str): + return mask_key + elif len(mask_key) == 0: + # Safeguard + return None + else: + return mask_key[0] + + def current_mask(self, mask): + return { self.current_key(k) for k in mask if self.current_key(k) is not None } + + def nested_masks(self, mask): + return { self.current_key(k[0]): k[1:] for k in mask if k and not isinstance(k, str) and len(k) > 1 } + + def nested_filter(self, items, mask): + keep_current = self.current_mask(mask) + keep_nested_lookup = self.nested_masks(mask) + for k, v in items: + keep_nested = keep_nested_lookup.get(k) + if k in keep_current: + if keep_nested is not None: + if isinstance(v, dict): + yield k, dict(self.nested_filter(v.items(), keep_nested)) + else: + yield k, v def preprocess_cell(self, cell, resources, cell_index): """ All the code cells are returned with an empty metadata field. """ - if cell.cell_type == 'code': - # Remove metadata - if 'metadata' in cell: - cell.metadata = { k: v for k,v in cell.metadata.items() if k in self.preserve_metadata_keys } + if self.clear_cell_metadata: + if cell.cell_type == 'code': + # Remove metadata + if 'metadata' in cell: + cell.metadata = dict(self.nested_filter(cell.metadata.items(), self.preserve_cell_metadata_mask)) return cell, resources def preprocess(self, nb, resources): @@ -45,5 +81,5 @@ def preprocess(self, nb, resources): nb, resources = super().preprocess(nb, resources) if self.clear_notebook_metadata: if 'metadata' in nb: - nb.metadata = { k: v for k,v in nb.metadata.items() if k in self.preserve_metadata_keys } + nb.metadata = dict(self.nested_filter(nb.metadata.items(), self.preserve_nb_metadata_mask)) return nb, resources diff --git a/nbconvert/preprocessors/tests/test_clearmetadata.py b/nbconvert/preprocessors/tests/test_clearmetadata.py index 0eaf37d2d..37b035365 100644 --- a/nbconvert/preprocessors/tests/test_clearmetadata.py +++ b/nbconvert/preprocessors/tests/test_clearmetadata.py @@ -14,11 +14,15 @@ class TestClearMetadata(PreprocessorTestsBase): def build_notebook(self): notebook = super().build_notebook() - notebook.metadata = {'language': 'python'} + notebook.metadata = { + 'language_info': {'name': 'python', 'version': '3.6.7'}, + 'kernelspec': {'language': 'python', 'name': 'python3'} + } # Add a test field to the first cell if 'metadata' not in notebook.cells[0]: notebook.cells[0].metadata = {} notebook.cells[0].metadata['test_field'] = 'test_value' + notebook.cells[0].metadata['test_nested'] = { 'test_keep': 'keep', 'test_filtered': 'filter' } notebook.cells[0].metadata['executeTime'] = dict([('end_time', '09:31:50'), ('start_time', '09:31:49')]) return notebook @@ -41,7 +45,8 @@ def test_default_output(self): nb, res = preprocessor(nb, res) assert not nb.cells[0].metadata - assert not nb.metadata + # By default we only perserve the langauge name + assert nb.metadata == {'language_info': {'name': 'python'}} def test_cell_only(self): """Test the output of the ClearMetadataPreprocessor""" @@ -53,22 +58,76 @@ def test_cell_only(self): assert not nb.cells[0].metadata assert nb.metadata + def test_notebook_only(self): + """Test the output of the ClearMetadataPreprocessor""" + nb = self.build_notebook() + res = self.build_resources() + preprocessor = self.build_preprocessor(clear_cell_metadata=False, preserve_nb_metadata_mask=set()) + nb, res = preprocessor(nb, res) + + assert nb.cells[0].metadata + assert not nb.metadata + def test_selective_cell_metadata(self): """Test the output of the ClearMetadataPreprocessor""" nb = self.build_notebook() res = self.build_resources() - preprocessor = self.build_preprocessor(preserve_metadata_keys=['test_field']) + preprocessor = self.build_preprocessor( + preserve_cell_metadata_mask=['test_field'], + preserve_nb_metadata_mask=set() + ) + nb, res = preprocessor(nb, res) + + assert nb.cells[0].metadata == { 'test_field': 'test_value' } + assert not nb.metadata + + def test_selective_cell_tuple_metadata(self): + """Test the output of the ClearMetadataPreprocessor""" + nb = self.build_notebook() + res = self.build_resources() + # Ensure that a tuple length 1 works as well as a string key + preprocessor = self.build_preprocessor( + preserve_cell_metadata_mask=[('test_field',)], + preserve_nb_metadata_mask=set() + ) nb, res = preprocessor(nb, res) assert nb.cells[0].metadata == { 'test_field': 'test_value' } assert not nb.metadata + def test_nested_cell_metadata(self): + """Test the output of the ClearMetadataPreprocessor""" + nb = self.build_notebook() + res = self.build_resources() + preprocessor = self.build_preprocessor( + preserve_cell_metadata_mask=[('test_nested', 'test_keep')], + preserve_nb_metadata_mask=set() + ) + nb, res = preprocessor(nb, res) + + assert nb.cells[0].metadata == { 'test_nested': { 'test_keep': 'keep' } } + assert not nb.metadata + + def test_nested_cell_tuple_metadata(self): + """Test the output of the ClearMetadataPreprocessor""" + nb = self.build_notebook() + res = self.build_resources() + # Ensure that a tuple length 1 works as well as a string key + preprocessor = self.build_preprocessor( + preserve_cell_metadata_mask=[('test_nested', ('test_keep',))], + preserve_nb_metadata_mask=set() + ) + nb, res = preprocessor(nb, res) + + assert nb.cells[0].metadata == { 'test_nested': { 'test_keep': 'keep' } } + assert not nb.metadata + def test_selective_notebook_metadata(self): """Test the output of the ClearMetadataPreprocessor""" nb = self.build_notebook() res = self.build_resources() - preprocessor = self.build_preprocessor(preserve_metadata_keys=['language']) + preprocessor = self.build_preprocessor(preserve_nb_metadata_mask=['kernelspec']) nb, res = preprocessor(nb, res) assert not nb.cells[0].metadata - assert nb.metadata == { 'language': 'python' } + assert nb.metadata == { 'kernelspec': { 'language': 'python', 'name': 'python3' } }