Skip to content

Commit

Permalink
Merge pull request #5 from Krande/dev
Browse files Browse the repository at this point in the history
Major improvement on handling Table formatting and numbering on docx export
  • Loading branch information
Krande authored Sep 17, 2021
2 parents c30714f + 1202b68 commit 251790a
Show file tree
Hide file tree
Showing 19 changed files with 387 additions and 246 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,13 @@ jobs:
activate-environment: anaconda-client-env
python-version: ${{ matrix.pyver.distver }}
environment-file: conda/environment.yml

- name: build, test and upload conda package
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
env:
ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }}
shell: bash -l {0}
run: |
conda-build -c krande -c conda-forge conda --python=${{ matrix.pyver.distver }} --token=$ANACONDA_TOKEN --user krande --override-channels
- name: build and test conda package
if: github.event_name == 'push' && github.ref != 'refs/heads/main'
shell: bash -l {0}
Expand Down
1 change: 1 addition & 0 deletions conda/environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
name: anaconda-client-env
channels:
- krande
- conda-forge
dependencies:
- conda-build
Expand Down
4 changes: 2 additions & 2 deletions files/doc_math/00-main/00-intro.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# A List of functions

Here are two basic function @eq:my_equation and @eq:my_equation_2.
Here are two basic function @eq:my_equation_1 and @eq:my_equation_2.

{{__my_equation__}}
{{__my_equation_1__}}


And here is a small edit of that function
Expand Down
21 changes: 0 additions & 21 deletions files/doc_math/00-main/00-results-tables.md

This file was deleted.

4 changes: 3 additions & 1 deletion files/doc_math/metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
lang: en-GB
date: \today
urlcolor: "black"
linkReferences: true
nameInLink: true
figPrefix: "Figure"
tblPrefix: "Table"
tblPrefix: "Table"
2 changes: 2 additions & 0 deletions files/doc_table/00-main/table.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ Some text before the table

{{__my_table__}}

{{__my_table_3__}}

And some text after
4 changes: 4 additions & 0 deletions files/doc_table/01-app/table.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ Some text before the table

{{__my_table_2__}}

{{__my_table_4__}}

{{__my_table_5__}}

and some text after
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from setuptools import setup

setup(version="0.0.3")
setup(version="0.0.4")
50 changes: 43 additions & 7 deletions src/paradoc/common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import pathlib
from dataclasses import dataclass
from typing import Callable
import re
from dataclasses import dataclass, field
from typing import Callable, List, Union

import pandas as pd

Expand All @@ -15,6 +16,7 @@ class TableFormat:
style: str = "Grid Table 1 Light"
font_size: float = 11
font_style: str = "Arial"
float_fmt: Union[str, tuple] = None


class TableFlags:
Expand All @@ -27,19 +29,29 @@ class Table:
df: pd.DataFrame
caption: str
format: TableFormat = TableFormat()
add_link: bool = False
add_link: bool = True
md_instances: List[MarkDownFile] = field(default_factory=list)
docx_instances: List[object] = field(default_factory=list)

# def get_cell0(self):
# col_name = self.df.columns[0]
# df.iloc[0, df.columns.get_loc(col_name)]

def to_markdown(self, include_name_in_cell=False, flags=None):
df = self.df.copy()
if include_name_in_cell:
col_name = df.columns[0]
df.iloc[0, df.columns.get_loc(col_name)] = self.name
tbl_str = df.to_markdown(index=False, tablefmt="grid")

props = dict(index=False, tablefmt="grid")
if self.format.float_fmt is not None:
props["floatfmt"] = self.format.float_fmt
tbl_str = df.to_markdown(**props)
if flags is not None and TableFlags.NO_CAPTION in flags:
return tbl_str
tbl_str += f"\n\nTable: {self.caption}"
if self.add_link:
tbl_str += f"{{#tbl:{self.name}}}"
tbl_str += f" {{#tbl:{self.name}}}"
return tbl_str


Expand All @@ -48,12 +60,16 @@ class Equation:
name: str
func: Callable
custom_eq_str_compiler: Callable = None
add_link: bool = True
include_python_code: bool = False
md_instances: List[MarkDownFile] = field(default_factory=list)
docx_instances: List[object] = field(default_factory=list)

def to_latex(self, print_latex=False, print_formula=False, flags=None):
if self.custom_eq_str_compiler is not None:
return self.custom_eq_str_compiler(self.func)

from inspect import getsourcelines
from inspect import getsource, getsourcelines

import pytexit

Expand All @@ -68,8 +84,14 @@ def to_latex(self, print_latex=False, print_formula=False, flags=None):
continue
if dots >= 6 or dots == 0:
eq_latex += pytexit.py2tex(line, print_latex=print_latex, print_formula=print_formula) + "\n"
eq_str = eq_latex

if self.add_link:
eq_str += f"{{#eq:{self.name}}}"

return eq_latex + f"{{#eq:{self.name}}}"
if self.include_python_code:
eq_str = f"\n\n```python\n{getsource(self.func)}\n```\n\n" + eq_str
return eq_str


@dataclass
Expand All @@ -85,6 +107,20 @@ class MarkDownFile:
new_file: pathlib.Path
build_file: pathlib.Path

def read_original_file(self):
with open(self.path, "r") as f:
return f.read()

def read_built_file(self):
"""Read the Markdown file after performed variable substitution"""
with open(self.build_file, "r") as f:
return f.read()

def get_variables(self):
md_doc_str = self.read_original_file()
key_re = re.compile("{{(.*)}}")
return key_re.finditer(md_doc_str)


class ExportFormats:
DOCX = "docx"
Expand Down
136 changes: 88 additions & 48 deletions src/paradoc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import Dict

import pandas as pd
import pypandoc

from .common import (
DocXFormat,
Expand All @@ -17,7 +16,8 @@
Table,
TableFormat,
)
from .utils import get_list_of_files, variable_sub
from .exceptions import LatexNotInstalled
from .utils import get_list_of_files


class OneDoc:
Expand Down Expand Up @@ -49,11 +49,11 @@ class OneDoc:
"Body Text": "Normal Indent",
"Compact": "Normal Indent",
}
FORMATS = ExportFormats

def __init__(
self,
source_dir=None,
export_format=ExportFormats.DOCX,
main_prefix="00-main",
app_prefix="01-app",
clean_build_dir=True,
Expand All @@ -64,7 +64,6 @@ def __init__(
self.work_dir = kwargs.get("work_dir", pathlib.Path("").resolve().absolute())
self._main_prefix = main_prefix
self._app_prefix = app_prefix
self.export_format = export_format
self.variables = dict()
self.tables: Dict[str, Table] = dict()
self.equations: Dict[str, Equation] = dict()
Expand All @@ -73,9 +72,9 @@ def __init__(
# Style info: https://python-docx.readthedocs.io/en/latest/user/styles-using.html
self.paragraph_style_map = kwargs.get("paragraph_style_map", OneDoc.default_paragraph_map)
self.appendix_heading_map = kwargs.get("appendix_heading_map", OneDoc.default_app_map)

self.md_files_main = []
self.md_files_app = []
self.metadata_file = None

for md_file in get_list_of_files(self.source_dir, ".md"):
is_appendix = True if app_prefix in md_file else False
Expand All @@ -101,65 +100,106 @@ def __init__(
if clean_build_dir is True:
shutil.rmtree(self.build_dir, ignore_errors=True)

def compile(self, output_name, auto_open=False, metadata_file=None):
dest_file = (self.dist_dir / output_name).with_suffix(f".{self.export_format}").resolve().absolute()
def compile(self, output_name, auto_open=False, metadata_file=None, export_format=ExportFormats.DOCX, **kwargs):
dest_file = (self.dist_dir / output_name).with_suffix(f".{export_format}").resolve().absolute()

logging.debug(f'Compiling report to "{dest_file}"')
os.makedirs(self.build_dir, exist_ok=True)
os.makedirs(self.dist_dir, exist_ok=True)

for mdf in self.md_files_main + self.md_files_app:
md_file = mdf.path
os.makedirs(mdf.new_file.parent, exist_ok=True)
self.metadata_file = self.source_dir / "metadata.yaml" if metadata_file is None else pathlib.Path(metadata_file)

# Substitute parameters/tables in the creation of the document
with open(md_file, "r") as f:
tmp_md_doc = f.read()
tmp_md_doc = variable_sub(tmp_md_doc, self.tables)
tmp_md_doc = variable_sub(tmp_md_doc, self.variables)
tmp_md_doc = variable_sub(tmp_md_doc, self.equations)
if self.metadata_file.exists() is False:
with open(self.metadata_file, "w") as f:
f.write('linkReferences: true\nnameInLink: true\nfigPrefix: "Figure"\ntblPrefix: "Table"')

with open(mdf.build_file, "w") as f:
f.write(tmp_md_doc)

metadata_file = self.source_dir / "metadata.yaml" if metadata_file is None else metadata_file
if metadata_file.exists() is False:
with open(metadata_file, "w") as f:
f.write('linkReferences: true\nnameInLink: true\nfigPrefix: "Figure"\ntblPrefix: "Table"')

pypandoc.convert_file(
str(mdf.build_file),
self.export_format,
outputfile=str(mdf.new_file),
format="markdown",
extra_args=[
"-M2GB",
"+RTS",
"-K64m",
"-RTS",
f"--resource-path={md_file.parent}",
f"--metadata-file={metadata_file}"
# f"--reference-doc={MY_DOCX_TMPL}",
],
filters=["pandoc-crossref"],
encoding="utf8",
)
if self.export_format == ExportFormats.DOCX:
if export_format == ExportFormats.DOCX:
from paradoc.io.word.exporter import WordExporter

wordx = WordExporter(self)
wordx.convert_to_docx(output_name, dest_file)
use_custom_compile = kwargs.get("use_custom_docx_compile", True)
if use_custom_compile is False:
use_table_name_in_cell_as_index = False
else:
use_table_name_in_cell_as_index = True

self._perform_variable_substitution(use_table_name_in_cell_as_index)

wordx = WordExporter(self, **kwargs)
wordx.export(output_name, dest_file)
elif export_format == ExportFormats.PDF:
from paradoc.io.pdf.exporter import PdfExporter

latex_path = shutil.which("latex")
if latex_path is None:
latex_url = "https://www.latex-project.org/get/"
raise LatexNotInstalled(
"Latex was not installed on your system. "
f'Please install latex before exporting to pdf. See "{latex_url}" for installation packages'
)
self._perform_variable_substitution(False)
pdf = PdfExporter(self)
pdf.export(dest_file)
else:
raise NotImplementedError(f'Export format "{export_format}" is not yet supported')

if auto_open is True:
os.startfile(dest_file)

def add_table(self, name, df: pd.DataFrame, caption: str, tbl_format: TableFormat = TableFormat()):
def add_table(self, name, df: pd.DataFrame, caption: str, tbl_format: TableFormat = TableFormat(), **kwargs):
if '"' in caption:
raise ValueError('Using characters such as " currently breaks the caption search in the docs compiler')
self.tables[name] = Table(name, df, caption, tbl_format)
self._uniqueness_check(name)
self.tables[name] = Table(name, df, caption, tbl_format, **kwargs)

def add_equation(self, name, eq, custom_eq_str_compiler=None):
self.equations[name] = Equation(name, eq, custom_eq_str_compiler=custom_eq_str_compiler)
def add_equation(self, name, eq, custom_eq_str_compiler=None, **kwargs):
self._uniqueness_check(name)
self.equations[name] = Equation(name, eq, custom_eq_str_compiler=custom_eq_str_compiler, **kwargs)

def _perform_variable_substitution(self, use_table_var_substitution):
logging.info("Performing variable substitution")
for mdf in self.md_files_main + self.md_files_app:
md_file = mdf.path
os.makedirs(mdf.new_file.parent, exist_ok=True)
md_str = mdf.read_original_file()
for m in mdf.get_variables():
res = m.group(1)
key = res.split("|")[0] if "|" in res else res
list_of_flags = res.split("|")[1:] if "|" in res else None
key_clean = key[2:-2]

tbl = self.tables.get(key_clean, None)
eq = self.equations.get(key_clean, None)
variables = self.variables.get(key_clean, None)

if tbl is not None:
tbl.md_instances.append(mdf)
new_str = tbl.to_markdown(use_table_var_substitution, list_of_flags)
elif eq is not None:
eq.md_instances.append(mdf)
new_str = eq.to_latex()
elif variables is not None:
new_str = str(variables)
else:
logging.error(f'key "{key_clean}" located in {md_file} has not been substituted')
new_str = m.group(0)

md_str = md_str.replace(m.group(0), new_str)

with open(mdf.build_file, "w") as f:
f.write(md_str)

def _uniqueness_check(self, name):
error_msg = 'Table name "{name}" must be unique. This name is already used by {cont_type}="{container}"'

tbl = self.tables.get(name, None)
if tbl is not None:
raise ValueError(error_msg.format(name=name, cont_type="Table", container=tbl))
eq = self.equations.get(name, None)
if eq is not None:
raise ValueError(error_msg.format(name=name, cont_type="Equation", container=eq))
v = self.variables.get(name, None)
if v is not None:
raise ValueError(error_msg.format(name=name, cont_type="Variable", container=v))

@property
def main_dir(self):
Expand Down
2 changes: 2 additions & 0 deletions src/paradoc/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class LatexNotInstalled(Exception):
pass
Empty file added src/paradoc/io/pdf/__init__.py
Empty file.
Loading

0 comments on commit 251790a

Please sign in to comment.