Skip to content
This repository has been archived by the owner on Jan 6, 2025. It is now read-only.

[MRG + 1] Create a new figure and test each plot type #127 #179

Merged
merged 9 commits into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ install:
pip install ".[dev]"

test:
pytest --verbose --cov-config .coveragerc --cov-report term --cov-report xml --cov=camelot tests
pytest --verbose --cov-config .coveragerc --cov-report term --cov-report xml --cov=camelot --mpl tests

docs:
cd docs && make html
Expand All @@ -25,4 +25,4 @@ publish:
pip install twine
python setup.py sdist
twine upload dist/*
rm -fr build dist .egg camelot_py.egg-info
rm -fr build dist .egg camelot_py.egg-info
1 change: 1 addition & 0 deletions camelot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from .__version__ import __version__
from .io import read_pdf
from .plotting import plot


def _write_usage(self, prog, args='', prefix='Usage: '):
Expand Down
12 changes: 8 additions & 4 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import logging

import click
import matplotlib.pyplot as plt

from . import __version__
from .io import read_pdf
from .plotting import plot


logger = logging.getLogger('camelot')
Expand Down Expand Up @@ -80,7 +82,7 @@ def cli(ctx, *args, **kwargs):
help='Number of times for erosion/dilation will be applied.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'table', 'contour', 'joint', 'line']),
help='Plot geometry found on PDF page, for debugging.')
help='Plot elements found on PDF page for visual debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
def lattice(c, *args, **kwargs):
Expand All @@ -106,7 +108,8 @@ def lattice(c, *args, **kwargs):
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
table.plot(plot_type)
plot(table, plot_type=plot_type)
plt.show()
else:
if output is None:
raise click.UsageError('Please specify output file path using --output')
Expand All @@ -127,7 +130,7 @@ def lattice(c, *args, **kwargs):
' used to combine text horizontally, to generate columns.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'table']),
help='Plot geometry found on PDF page for debugging.')
help='Plot elements found on PDF page for visual debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
def stream(c, *args, **kwargs):
Expand All @@ -152,7 +155,8 @@ def stream(c, *args, **kwargs):
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
table.plot(plot_type)
plot(table, plot_type=plot_type)
plt.show()
else:
if output is None:
raise click.UsageError('Please specify output file path using --output')
Expand Down
29 changes: 0 additions & 29 deletions camelot/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pandas as pd

from .plotting import *


class Cell(object):
"""Defines a cell in a table with coordinates relative to a
Expand Down Expand Up @@ -321,33 +319,6 @@ def set_span(self):
cell.hspan = True
return self

def plot(self, geometry_type):
"""Plot geometry found on PDF page based on geometry_type
specified, useful for debugging and playing with different
parameters to get the best output.

Parameters
----------
geometry_type : str
The geometry type for which a plot should be generated.
Can be 'text', 'table', 'contour', 'joint', 'line'

"""
if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
geometry_type))

if geometry_type == 'text':
plot_text(self._text)
elif geometry_type == 'table':
plot_table(self)
elif geometry_type == 'contour':
plot_contour(self._image)
elif geometry_type == 'joint':
plot_joint(self._image)
elif geometry_type == 'line':
plot_line(self._segments)

def to_csv(self, path, **kwargs):
"""Writes Table to a comma-separated values (csv) file.

Expand Down
3 changes: 0 additions & 3 deletions camelot/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,6 @@ def parse(self, flavor='lattice', **kwargs):
-------
tables : camelot.core.TableList
List of tables found in PDF.
geometry : camelot.core.GeometryList
List of geometry objects (contours, lines, joints) found
in PDF.

"""
tables = []
Expand Down
125 changes: 101 additions & 24 deletions camelot/plotting.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,59 @@
import cv2
# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import matplotlib.patches as patches


def plot(table, plot_type='text', filepath=None):
"""Plot elements found on PDF page based on plot_type
specified, useful for debugging and playing with different
parameters to get the best output.

Parameters
----------
table: Table
A Camelot Table.
plot_type : str, optional (default: 'text')
{'text', 'table', 'contour', 'joint', 'line'}
The element type for which a plot should be generated.
filepath: str, optional (default: None)
Absolute path for saving the generated plot.

Returns
-------
fig : matplotlib.fig.Figure

"""
if table.flavor == 'stream' and plot_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
plot_type))
if plot_type == 'text':
fig = plot_text(table._text)
elif plot_type == 'table':
fig = plot_table(table)
elif plot_type == 'contour':
fig = plot_contour(table._image)
elif plot_type == 'joint':
fig = plot_joint(table._image)
elif plot_type == 'line':
fig = plot_line(table._segments)
if filepath:
plt.savefig(filepath)
return fig


def plot_text(text):
"""Generates a plot for all text present on the PDF page.
"""Generates a plot for all text elements present
on the PDF page.

Parameters
----------
text : list

Returns
-------
fig : matplotlib.fig.Figure

"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
Expand All @@ -26,83 +70,116 @@ def plot_text(text):
)
ax.set_xlim(min(xs) - 10, max(xs) + 10)
ax.set_ylim(min(ys) - 10, max(ys) + 10)
plt.show()
return fig


def plot_table(table):
"""Generates a plot for the table.
"""Generates a plot for the detected tables
on the PDF page.

Parameters
----------
table : camelot.core.Table

Returns
-------
fig : matplotlib.fig.Figure

"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
for row in table.cells:
for cell in row:
if cell.left:
plt.plot([cell.lb[0], cell.lt[0]],
ax.plot([cell.lb[0], cell.lt[0]],
[cell.lb[1], cell.lt[1]])
if cell.right:
plt.plot([cell.rb[0], cell.rt[0]],
ax.plot([cell.rb[0], cell.rt[0]],
[cell.rb[1], cell.rt[1]])
if cell.top:
plt.plot([cell.lt[0], cell.rt[0]],
ax.plot([cell.lt[0], cell.rt[0]],
[cell.lt[1], cell.rt[1]])
if cell.bottom:
plt.plot([cell.lb[0], cell.rb[0]],
ax.plot([cell.lb[0], cell.rb[0]],
[cell.lb[1], cell.rb[1]])
plt.show()
return fig
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@suyash458 Are you returning fig from the plot_* functions only for those additional asserts in the tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pytest-mpl's image comparison decorator requires the plot functions to return a matplotlib figure



def plot_contour(image):
"""Generates a plot for all table boundaries present on the
PDF page.
"""Generates a plot for all table boundaries present
on the PDF page.

Parameters
----------
image : tuple

Returns
-------
fig : matplotlib.fig.Figure

"""
img, table_bbox = image
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
for t in table_bbox.keys():
cv2.rectangle(img, (t[0], t[1]),
(t[2], t[3]), (255, 0, 0), 20)
plt.imshow(img)
plt.show()
ax.add_patch(
patches.Rectangle(
(t[0], t[1]),
t[2] - t[0],
t[3] - t[1],
fill=None,
edgecolor='red'
)
)
ax.imshow(img)
return fig


def plot_joint(image):
"""Generates a plot for all line intersections present on the
PDF page.
"""Generates a plot for all line intersections present
on the PDF page.

Parameters
----------
image : tuple

Returns
-------
fig : matplotlib.fig.Figure

"""
img, table_bbox = image
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
x_coord = []
y_coord = []
for k in table_bbox.keys():
for coord in table_bbox[k]:
x_coord.append(coord[0])
y_coord.append(coord[1])
plt.plot(x_coord, y_coord, 'ro')
plt.imshow(img)
plt.show()
ax.plot(x_coord, y_coord, 'ro')
ax.imshow(img)
return fig


def plot_line(segments):
"""Generates a plot for all line segments present on the PDF page.
"""Generates a plot for all line segments present
on the PDF page.

Parameters
----------
segments : tuple

Returns
-------
fig : matplotlib.fig.Figure

"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
vertical, horizontal = segments
for v in vertical:
plt.plot([v[0], v[2]], [v[1], v[3]])
ax.plot([v[0], v[2]], [v[1], v[3]])
for h in horizontal:
plt.plot([h[0], h[2]], [h[1], h[3]])
plt.show()
ax.plot([h[0], h[2]], [h[1], h[3]])
return fig
Loading