From bb863c3dd0b7548acb462ac7575676ebed02562d Mon Sep 17 00:00:00 2001 From: krande Date: Tue, 14 Sep 2021 19:30:35 +0200 Subject: [PATCH] rework table implementation to allow granular control of tabular formatting --- files/doc_table/00-main/table.md | 1 + files/doc_table/01-app/table.md | 1 + files/doc_table/metadata.yaml | 4 +++ src/paradoc/concepts.py | 20 ++++++++++++++ src/paradoc/document.py | 47 ++++++++++++++++++++++++++++---- src/paradoc/formatting/utils.py | 27 +++++++++++++----- src/paradoc/references.py | 4 +-- src/paradoc/utils.py | 11 +++++++- tests/test_doc_math.py | 4 +-- tests/test_tables.py | 22 +++++++++++++++ 10 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 files/doc_table/00-main/table.md create mode 100644 files/doc_table/01-app/table.md create mode 100644 files/doc_table/metadata.yaml create mode 100644 src/paradoc/concepts.py create mode 100644 tests/test_tables.py diff --git a/files/doc_table/00-main/table.md b/files/doc_table/00-main/table.md new file mode 100644 index 0000000..f89dde1 --- /dev/null +++ b/files/doc_table/00-main/table.md @@ -0,0 +1 @@ +{{__my_table__}} \ No newline at end of file diff --git a/files/doc_table/01-app/table.md b/files/doc_table/01-app/table.md new file mode 100644 index 0000000..52b5f1a --- /dev/null +++ b/files/doc_table/01-app/table.md @@ -0,0 +1 @@ +{{__my_table_2__}} \ No newline at end of file diff --git a/files/doc_table/metadata.yaml b/files/doc_table/metadata.yaml new file mode 100644 index 0000000..c6f1f2f --- /dev/null +++ b/files/doc_table/metadata.yaml @@ -0,0 +1,4 @@ +linkReferences: true +nameInLink: true +figPrefix: "Figure" +tblPrefix: "Table" \ No newline at end of file diff --git a/src/paradoc/concepts.py b/src/paradoc/concepts.py new file mode 100644 index 0000000..b9c3a3b --- /dev/null +++ b/src/paradoc/concepts.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass +import pandas as pd +from .formatting import TableFormat + + +@dataclass +class Table: + name: str + df: pd.DataFrame + caption: str + format: TableFormat = TableFormat() + + def to_markdown(self, include_name_in_cell=False): + df = self.df.copy() + if include_name_in_cell: + col_name = df.columns[0] + df.iloc[0, df.columns.get_loc(col_name)] = self.name + tbl_str = df.to_markdown(index=False, tablefmt="grid") + tbl_str += f"\nTable: {self.caption}" + return tbl_str diff --git a/src/paradoc/document.py b/src/paradoc/document.py index 3d9eeba..420c2c4 100644 --- a/src/paradoc/document.py +++ b/src/paradoc/document.py @@ -1,5 +1,7 @@ +from __future__ import annotations import logging import os +import pandas as pd import pathlib import shutil from dataclasses import dataclass @@ -7,7 +9,8 @@ import pypandoc from docx import Document from docxcompose.composer import Composer - +from typing import Dict +from .concepts import Table from .formatting import TableFormat from .utils import close_word_docs_by_name, docx_update, get_list_of_files @@ -63,7 +66,7 @@ def __init__( self._app_prefix = app_prefix self.export_format = export_format self.variables = dict() - self.tables = dict() + self.tables: Dict[str, Table] = dict() self.equations = dict() # Style info: https://python-docx.readthedocs.io/en/latest/user/styles-using.html @@ -114,7 +117,7 @@ def compile(self, output_name, auto_open=False, metadata_file=None): from .formatting import Formatting from .formatting.utils import ( apply_custom_styles_to_docx, - fix_headers_after_compose, + fix_headers_after_compose ) from .utils import variable_sub @@ -172,7 +175,7 @@ def compile(self, output_name, auto_open=False, metadata_file=None): logging.info(f"Added {md.new_file}") main_format = Formatting(False, self.paragraph_style_map, self.table_format) - _ = apply_custom_styles_to_docx(composer_main.doc, main_format) + _ = self._reformat_doc(composer_main.doc, False) composer_main.doc.add_page_break() # Appendix - Format Style @@ -188,7 +191,7 @@ def compile(self, output_name, auto_open=False, metadata_file=None): app_paragraph_style.update(self.paragraph_style_map) app_format = Formatting(True, app_paragraph_style, self.table_format) - _ = apply_custom_styles_to_docx(composer_app.doc, app_format) + _ = self._reformat_doc(composer_main.doc, True) composer_main.append(composer_app.doc) @@ -205,6 +208,40 @@ def compile(self, output_name, auto_open=False, metadata_file=None): if auto_open is True: os.startfile(dest_file) + def add_table(self, name, df: pd.DataFrame, caption: str, tbl_format: TableFormat = None): + self.tables[name] = Table(name, df, caption, tbl_format) + + def _reformat_doc(self, doc: Document, is_appendix, style_doc=None): + from paradoc import MY_DOCX_TMPL + from paradoc.utils import iter_block_items + from docx.table import Table as DocxTable + from docx.text.paragraph import Paragraph + from .formatting.utils import format_table, format_paragraph, format_captions, get_table_ref + + document = style_doc if style_doc is not None else Document(MY_DOCX_TMPL) + prev_table = False + refs = dict() + + for block in iter_block_items(doc): + if type(block) == Paragraph: + if prev_table and len(block.runs) > 0: + block.runs[0].text = "\n" + block.runs[0].text + prev_table = False + block.paragraph_format.space_before = None + if block.style.name in ("Image Caption", "Table Caption"): + ref_ = format_captions(block, is_appendix) + refs.update(ref_) + else: + format_paragraph(block, document, self.paragraph_style_map) + + elif type(block) == DocxTable: + tbl_source = get_table_ref(block, self.tables) + if tbl_source is not None and tbl_source.format is not None: + format_table(block, document, tbl_source.format) + prev_table = True + + return refs + @property def main_dir(self): return self.source_dir / self._main_prefix diff --git a/src/paradoc/formatting/utils.py b/src/paradoc/formatting/utils.py index acd3c0f..479cb12 100644 --- a/src/paradoc/formatting/utils.py +++ b/src/paradoc/formatting/utils.py @@ -2,10 +2,12 @@ from docx import Document from docx.shared import Pt -from docx.table import Table +from docx.table import Table as DocxTable from docx.text.paragraph import Paragraph - +from typing import Dict from .concepts import Formatting, TableFormat +from paradoc.concepts import Table +from typing import Union def add_indented_normal(doc): @@ -24,10 +26,9 @@ def add_indented_normal(doc): return style -def format_paragraph(pg, document, paragraph_formatting: Formatting): +def format_paragraph(pg, document, paragraph_style_map: dict): from docx.shared import Mm - paragraph_style_map = paragraph_formatting.paragraph_style_map style_name = pg.style.name logging.debug(style_name) if style_name == "Compact": # Is a bullet point list @@ -74,9 +75,9 @@ def apply_custom_styles_to_docx(doc, doc_format: Formatting = None, style_doc=No ref_ = format_captions(block, doc_format) refs.update(ref_) else: - format_paragraph(block, document, doc_format) + format_paragraph(block, document, doc_format.paragraph_style_map) - elif type(block) == Table: + elif type(block) == DocxTable: if doc_format.table_format: format_table(block, document, doc_format.table_format) prev_table = True @@ -84,7 +85,19 @@ def apply_custom_styles_to_docx(doc, doc_format: Formatting = None, style_doc=No return refs -def format_table(tbl, document, tbl_format: TableFormat): +def get_table_ref(docx_table: DocxTable, tables: Dict[str, Table]) -> Union[Table, None]: + cell0 = docx_table.rows[1].cells[0].paragraphs[0] + cell0_str = cell0.text + for key, tbl in tables.items(): + if key == cell0_str: + df = tbl.df + col_name = df.columns[0] + cell0.text = str(df.iloc[0, df.columns.get_loc(col_name)]) + return tbl + return None + + +def format_table(tbl: DocxTable, document, tbl_format: TableFormat): new_tbl_style = document.styles[tbl_format.style] tbl.style = new_tbl_style logging.info(f'Changed Table style from "{tbl.style}" to "{new_tbl_style}"') diff --git a/src/paradoc/references.py b/src/paradoc/references.py index e436924..7b9aa27 100644 --- a/src/paradoc/references.py +++ b/src/paradoc/references.py @@ -100,7 +100,7 @@ def add_bookmark(paragraph, bookmark_text, bookmark_name): tag.append(end) -def insert_caption(pg, prefix, run, text, doc_format): +def insert_caption(pg, prefix, run, text, is_appendix: bool): """ :param pg: @@ -113,7 +113,7 @@ def insert_caption(pg, prefix, run, text, doc_format): """ from docx.text.run import Run - heading_ref = "Appendix" if doc_format.is_appendix is True else '"Heading 1"' + heading_ref = "Appendix" if is_appendix is True else '"Heading 1"' seq1 = pg._element._new_r() add_seq_reference(seq1, f"STYLEREF \\s {heading_ref} \\n", run._parent) diff --git a/src/paradoc/utils.py b/src/paradoc/utils.py index 65879b0..9df7d96 100644 --- a/src/paradoc/utils.py +++ b/src/paradoc/utils.py @@ -286,10 +286,19 @@ def basic_equation_compiler(f, print_latex=False, print_formula=False): def variable_sub(md_doc_str, variable_dict): + from .concepts import Table + + def sub_table(tbl: Table) -> str: + return tbl.to_markdown(True) + for key, value in variable_dict.items(): key_str = f"{{{{__{key}__}}}}" if key_str in md_doc_str: - md_doc_str = md_doc_str.replace(key_str, str(value)) + if type(value) is Table: + value_str = sub_table(value) + else: + value_str = str(value) + md_doc_str = md_doc_str.replace(key_str, value_str) return md_doc_str diff --git a/tests/test_doc_math.py b/tests/test_doc_math.py index 32e6339..40c7b30 100644 --- a/tests/test_doc_math.py +++ b/tests/test_doc_math.py @@ -19,8 +19,8 @@ def test_math_doc(self): one.equations["my_equation"] = basic_equation_compiler(my_calc_example_1) one.equations["my_equation_2"] = basic_equation_compiler(my_calc_example_2) - one.tables["results"] = df1.to_markdown(index=False, tablefmt="grid") - one.tables["results_2"] = df2.to_markdown(index=False, tablefmt="grid") + one.add_table("results", df1) + one.add_table("results_2", df2) one.compile("MathDoc") diff --git a/tests/test_tables.py b/tests/test_tables.py new file mode 100644 index 0000000..0a20eee --- /dev/null +++ b/tests/test_tables.py @@ -0,0 +1,22 @@ +import unittest +from paradoc import OneDoc +from paradoc.formatting import TableFormat +import pandas as pd + +from common import files_dir, test_dir + + +class TableTests(unittest.TestCase): + def test_table(self): + report_dir = files_dir / "doc_table" + one = OneDoc(report_dir, work_dir=test_dir / "doc_table") + df = pd.DataFrame([(0, 0), (1, 2)], columns=["a", "b"]) + + one.add_table("my_table", df, "A basic table") + one.add_table("my_table_2", df, "A slightly smaller table", TableFormat(font_size=8)) + + one.compile("TableDoc") + + +if __name__ == "__main__": + unittest.main()