Skip to content

Commit

Permalink
rework table implementation to allow granular control of tabular form…
Browse files Browse the repository at this point in the history
…atting
  • Loading branch information
krande committed Sep 14, 2021
1 parent bc11923 commit bb863c3
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 17 deletions.
1 change: 1 addition & 0 deletions files/doc_table/00-main/table.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{__my_table__}}
1 change: 1 addition & 0 deletions files/doc_table/01-app/table.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{__my_table_2__}}
4 changes: 4 additions & 0 deletions files/doc_table/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
linkReferences: true
nameInLink: true
figPrefix: "Figure"
tblPrefix: "Table"
20 changes: 20 additions & 0 deletions src/paradoc/concepts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from dataclasses import dataclass
import pandas as pd
from .formatting import TableFormat


@dataclass
class Table:
name: str
df: pd.DataFrame
caption: str
format: TableFormat = TableFormat()

def to_markdown(self, include_name_in_cell=False):
df = self.df.copy()
if include_name_in_cell:
col_name = df.columns[0]
df.iloc[0, df.columns.get_loc(col_name)] = self.name
tbl_str = df.to_markdown(index=False, tablefmt="grid")
tbl_str += f"\nTable: {self.caption}"
return tbl_str
47 changes: 42 additions & 5 deletions src/paradoc/document.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from __future__ import annotations
import logging
import os
import pandas as pd
import pathlib
import shutil
from dataclasses import dataclass

import pypandoc
from docx import Document
from docxcompose.composer import Composer

from typing import Dict
from .concepts import Table
from .formatting import TableFormat
from .utils import close_word_docs_by_name, docx_update, get_list_of_files

Expand Down Expand Up @@ -63,7 +66,7 @@ def __init__(
self._app_prefix = app_prefix
self.export_format = export_format
self.variables = dict()
self.tables = dict()
self.tables: Dict[str, Table] = dict()
self.equations = dict()

# Style info: https://python-docx.readthedocs.io/en/latest/user/styles-using.html
Expand Down Expand Up @@ -114,7 +117,7 @@ def compile(self, output_name, auto_open=False, metadata_file=None):
from .formatting import Formatting
from .formatting.utils import (
apply_custom_styles_to_docx,
fix_headers_after_compose,
fix_headers_after_compose
)
from .utils import variable_sub

Expand Down Expand Up @@ -172,7 +175,7 @@ def compile(self, output_name, auto_open=False, metadata_file=None):
logging.info(f"Added {md.new_file}")

main_format = Formatting(False, self.paragraph_style_map, self.table_format)
_ = apply_custom_styles_to_docx(composer_main.doc, main_format)
_ = self._reformat_doc(composer_main.doc, False)
composer_main.doc.add_page_break()

# Appendix - Format Style
Expand All @@ -188,7 +191,7 @@ def compile(self, output_name, auto_open=False, metadata_file=None):
app_paragraph_style.update(self.paragraph_style_map)

app_format = Formatting(True, app_paragraph_style, self.table_format)
_ = apply_custom_styles_to_docx(composer_app.doc, app_format)
_ = self._reformat_doc(composer_main.doc, True)

composer_main.append(composer_app.doc)

Expand All @@ -205,6 +208,40 @@ def compile(self, output_name, auto_open=False, metadata_file=None):
if auto_open is True:
os.startfile(dest_file)

def add_table(self, name, df: pd.DataFrame, caption: str, tbl_format: TableFormat = None):
self.tables[name] = Table(name, df, caption, tbl_format)

def _reformat_doc(self, doc: Document, is_appendix, style_doc=None):
from paradoc import MY_DOCX_TMPL
from paradoc.utils import iter_block_items
from docx.table import Table as DocxTable
from docx.text.paragraph import Paragraph
from .formatting.utils import format_table, format_paragraph, format_captions, get_table_ref

document = style_doc if style_doc is not None else Document(MY_DOCX_TMPL)
prev_table = False
refs = dict()

for block in iter_block_items(doc):
if type(block) == Paragraph:
if prev_table and len(block.runs) > 0:
block.runs[0].text = "\n" + block.runs[0].text
prev_table = False
block.paragraph_format.space_before = None
if block.style.name in ("Image Caption", "Table Caption"):
ref_ = format_captions(block, is_appendix)
refs.update(ref_)
else:
format_paragraph(block, document, self.paragraph_style_map)

elif type(block) == DocxTable:
tbl_source = get_table_ref(block, self.tables)
if tbl_source is not None and tbl_source.format is not None:
format_table(block, document, tbl_source.format)
prev_table = True

return refs

@property
def main_dir(self):
return self.source_dir / self._main_prefix
Expand Down
27 changes: 20 additions & 7 deletions src/paradoc/formatting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from docx import Document
from docx.shared import Pt
from docx.table import Table
from docx.table import Table as DocxTable
from docx.text.paragraph import Paragraph

from typing import Dict
from .concepts import Formatting, TableFormat
from paradoc.concepts import Table
from typing import Union


def add_indented_normal(doc):
Expand All @@ -24,10 +26,9 @@ def add_indented_normal(doc):
return style


def format_paragraph(pg, document, paragraph_formatting: Formatting):
def format_paragraph(pg, document, paragraph_style_map: dict):
from docx.shared import Mm

paragraph_style_map = paragraph_formatting.paragraph_style_map
style_name = pg.style.name
logging.debug(style_name)
if style_name == "Compact": # Is a bullet point list
Expand Down Expand Up @@ -74,17 +75,29 @@ def apply_custom_styles_to_docx(doc, doc_format: Formatting = None, style_doc=No
ref_ = format_captions(block, doc_format)
refs.update(ref_)
else:
format_paragraph(block, document, doc_format)
format_paragraph(block, document, doc_format.paragraph_style_map)

elif type(block) == Table:
elif type(block) == DocxTable:
if doc_format.table_format:
format_table(block, document, doc_format.table_format)
prev_table = True

return refs


def format_table(tbl, document, tbl_format: TableFormat):
def get_table_ref(docx_table: DocxTable, tables: Dict[str, Table]) -> Union[Table, None]:
cell0 = docx_table.rows[1].cells[0].paragraphs[0]
cell0_str = cell0.text
for key, tbl in tables.items():
if key == cell0_str:
df = tbl.df
col_name = df.columns[0]
cell0.text = str(df.iloc[0, df.columns.get_loc(col_name)])
return tbl
return None


def format_table(tbl: DocxTable, document, tbl_format: TableFormat):
new_tbl_style = document.styles[tbl_format.style]
tbl.style = new_tbl_style
logging.info(f'Changed Table style from "{tbl.style}" to "{new_tbl_style}"')
Expand Down
4 changes: 2 additions & 2 deletions src/paradoc/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def add_bookmark(paragraph, bookmark_text, bookmark_name):
tag.append(end)


def insert_caption(pg, prefix, run, text, doc_format):
def insert_caption(pg, prefix, run, text, is_appendix: bool):
"""
:param pg:
Expand All @@ -113,7 +113,7 @@ def insert_caption(pg, prefix, run, text, doc_format):
"""
from docx.text.run import Run

heading_ref = "Appendix" if doc_format.is_appendix is True else '"Heading 1"'
heading_ref = "Appendix" if is_appendix is True else '"Heading 1"'

seq1 = pg._element._new_r()
add_seq_reference(seq1, f"STYLEREF \\s {heading_ref} \\n", run._parent)
Expand Down
11 changes: 10 additions & 1 deletion src/paradoc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,19 @@ def basic_equation_compiler(f, print_latex=False, print_formula=False):


def variable_sub(md_doc_str, variable_dict):
from .concepts import Table

def sub_table(tbl: Table) -> str:
return tbl.to_markdown(True)

for key, value in variable_dict.items():
key_str = f"{{{{__{key}__}}}}"
if key_str in md_doc_str:
md_doc_str = md_doc_str.replace(key_str, str(value))
if type(value) is Table:
value_str = sub_table(value)
else:
value_str = str(value)
md_doc_str = md_doc_str.replace(key_str, value_str)
return md_doc_str


Expand Down
4 changes: 2 additions & 2 deletions tests/test_doc_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def test_math_doc(self):

one.equations["my_equation"] = basic_equation_compiler(my_calc_example_1)
one.equations["my_equation_2"] = basic_equation_compiler(my_calc_example_2)
one.tables["results"] = df1.to_markdown(index=False, tablefmt="grid")
one.tables["results_2"] = df2.to_markdown(index=False, tablefmt="grid")
one.add_table("results", df1)
one.add_table("results_2", df2)

one.compile("MathDoc")

Expand Down
22 changes: 22 additions & 0 deletions tests/test_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest
from paradoc import OneDoc
from paradoc.formatting import TableFormat
import pandas as pd

from common import files_dir, test_dir


class TableTests(unittest.TestCase):
def test_table(self):
report_dir = files_dir / "doc_table"
one = OneDoc(report_dir, work_dir=test_dir / "doc_table")
df = pd.DataFrame([(0, 0), (1, 2)], columns=["a", "b"])

one.add_table("my_table", df, "A basic table")
one.add_table("my_table_2", df, "A slightly smaller table", TableFormat(font_size=8))

one.compile("TableDoc")


if __name__ == "__main__":
unittest.main()

0 comments on commit bb863c3

Please sign in to comment.