Skip to content
This repository has been archived by the owner on Jan 6, 2025. It is now read-only.

Commit

Permalink
Change method name and add pep8
Browse files Browse the repository at this point in the history
  • Loading branch information
vinayak-mehta committed Nov 2, 2018
1 parent d0edd43 commit efe48da
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 66 deletions.
3 changes: 2 additions & 1 deletion camelot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

from .__version__ import __version__
from .io import read_pdf
from .plotting import plot_pdf
from .plotting import plot


def _write_usage(self, prog, args='', prefix='Usage: '):
return self._write_usage('camelot', args, prefix=prefix)
Expand Down
13 changes: 8 additions & 5 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import logging

import click
import matplotlib.pyplot as plt

from . import __version__
from .io import read_pdf
from .plotting import plot_pdf
from .plotting import plot


logger = logging.getLogger('camelot')
Expand Down Expand Up @@ -81,7 +82,7 @@ def cli(ctx, *args, **kwargs):
help='Number of times for erosion/dilation will be applied.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'table', 'contour', 'joint', 'line']),
help='Plot geometry found on PDF page, for debugging.')
help='Plot elements found on PDF page for visual debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
def lattice(c, *args, **kwargs):
Expand All @@ -107,7 +108,8 @@ def lattice(c, *args, **kwargs):
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
plot_pdf(table, plot_type)
plot(table, plot_type=plot_type)
plt.show()
else:
if output is None:
raise click.UsageError('Please specify output file path using --output')
Expand All @@ -128,7 +130,7 @@ def lattice(c, *args, **kwargs):
' used to combine text horizontally, to generate columns.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'table']),
help='Plot geometry found on PDF page for debugging.')
help='Plot elements found on PDF page for visual debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
def stream(c, *args, **kwargs):
Expand All @@ -153,7 +155,8 @@ def stream(c, *args, **kwargs):
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
plot_pdf(table, plot_type)
plot(table, plot_type=plot_type)
plt.show()
else:
if output is None:
raise click.UsageError('Please specify output file path using --output')
Expand Down
3 changes: 0 additions & 3 deletions camelot/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,6 @@ def parse(self, flavor='lattice', **kwargs):
-------
tables : camelot.core.TableList
List of tables found in PDF.
geometry : camelot.core.GeometryList
List of geometry objects (contours, lines, joints) found
in PDF.
"""
tables = []
Expand Down
104 changes: 66 additions & 38 deletions camelot/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,56 @@
import matplotlib.patches as patches


def plot_pdf(table, geometry_type, filename=None):
"""Plot geometry found on PDF page based on geometry_type
specified, useful for debugging and playing with different
parameters to get the best output.
Parameters
----------
table: Table
The table object to plot data from
geometry_type : str
The geometry type for which a plot should be generated.
Can be 'text', 'table', 'contour', 'joint', 'line'
filename: str
If specified, saves the plot to a file with the given name
"""
if table.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
geometry_type))
if geometry_type == 'text':
plot_text(table._text)
elif geometry_type == 'table':
plot_table(table)
elif geometry_type == 'contour':
plot_contour(table._image)
elif geometry_type == 'joint':
plot_joint(table._image)
elif geometry_type == 'line':
plot_line(table._segments)
if filename:
plt.savefig(filename)
plt.show()
def plot(table, plot_type='text', filepath=None):
"""Plot elements found on PDF page based on plot_type
specified, useful for debugging and playing with different
parameters to get the best output.
Parameters
----------
table: Table
A Camelot Table.
plot_type : str, optional (default: 'text')
{'text', 'table', 'contour', 'joint', 'line'}
The element type for which a plot should be generated.
filepath: str, optional (default: None)
Absolute path for saving the generated plot.
Returns
-------
fig : matplotlib.fig.Figure
"""
if table.flavor == 'stream' and plot_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
plot_type))
if plot_type == 'text':
fig = plot_text(table._text)
elif plot_type == 'table':
fig = plot_table(table)
elif plot_type == 'contour':
fig = plot_contour(table._image)
elif plot_type == 'joint':
fig = plot_joint(table._image)
elif plot_type == 'line':
fig = plot_line(table._segments)
if filepath:
plt.savefig(filepath)
return fig


def plot_text(text):
"""Generates a plot for all text present on the PDF page.
"""Generates a plot for all text elements present
on the PDF page.
Parameters
----------
text : list
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
Expand All @@ -64,12 +74,17 @@ def plot_text(text):


def plot_table(table):
"""Generates a plot for the table.
"""Generates a plot for the detected tables
on the PDF page.
Parameters
----------
table : camelot.core.Table
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
Expand All @@ -91,13 +106,17 @@ def plot_table(table):


def plot_contour(image):
"""Generates a plot for all table boundaries present on the
PDF page.
"""Generates a plot for all table boundaries present
on the PDF page.
Parameters
----------
image : tuple
Returns
-------
fig : matplotlib.fig.Figure
"""
img, table_bbox = image
fig = plt.figure()
Expand All @@ -117,13 +136,17 @@ def plot_contour(image):


def plot_joint(image):
"""Generates a plot for all line intersections present on the
PDF page.
"""Generates a plot for all line intersections present
on the PDF page.
Parameters
----------
image : tuple
Returns
-------
fig : matplotlib.fig.Figure
"""
img, table_bbox = image
fig = plt.figure()
Expand All @@ -140,12 +163,17 @@ def plot_joint(image):


def plot_line(segments):
"""Generates a plot for all line segments present on the PDF page.
"""Generates a plot for all line segments present
on the PDF page.
Parameters
----------
segments : tuple
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
Expand Down
15 changes: 8 additions & 7 deletions docs/user/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@ To process background lines, you can pass ``process_background=True``.
.. csv-table::
:file: ../_static/csv/background_lines.csv

Plot geometry
-------------
Visual debugging
----------------

You can use camelot's :meth:`plot_pdf() <camelot.plotting.plot_pdf>` method to plot various geometries that were detected by Camelot while processing the PDF page. This can help you select table areas, column separators and debug bad table outputs, by tweaking different configuration parameters.
The following geometries are available for plotting. You can pass them to the :meth:`plot_pdf() <camelot.plotting.plot_pdf>` method, which will then generate a `matplotlib <https://matplotlib.org/>`_ plot for the passed geometry. The plot can be saved to a file by passing a ``filename`` param to :meth:`plot_pdf() <camelot.plotting.plot_pdf>`
You can use the :meth:`plot() <camelot.plotting.plot>` method to generate a `matplotlib <https://matplotlib.org/>`_ plot of various elements that were detected on the PDF page while processing it. This can help you select table areas, column separators and debug bad table outputs, by tweaking different configuration parameters.

You can specify the type of element you want to plot using the ``plot_type`` keyword argument. The generated plot can be saved to a file by passing a ``filename`` keyword argument. The following plot types are supported:

- 'text'
- 'table'
- 'contour'
- 'line'
- 'joint'

.. note:: The last three geometries can only be used with :ref:`Lattice <lattice>`, i.e. when ``flavor='lattice'``.
.. note:: The last three plot types can only be used with :ref:`Lattice <lattice>`, i.e. when ``flavor='lattice'``.

Let's generate a plot for each geometry using this `PDF <../_static/pdf/foo.pdf>`__ as an example. First, let's get all the tables out.
Let's generate a plot for each type using this `PDF <../_static/pdf/foo.pdf>`__ as an example. First, let's get all the tables out.

::

Expand Down Expand Up @@ -76,7 +77,7 @@ This, as we shall later see, is very helpful with :ref:`Stream <stream>` for not
table
^^^^^

Let's plot the table (to see if it was detected correctly or not). This geometry type, along with contour, line and joint is useful for debugging and improving the extraction output, in case the table wasn't detected correctly. (More on that later.)
Let's plot the table (to see if it was detected correctly or not). This plot type, along with contour, line and joint is useful for debugging and improving the extraction output, in case the table wasn't detected correctly. (More on that later.)

::

Expand Down
5 changes: 2 additions & 3 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
import matplotlib

matplotlib.use('Agg')
import matplotlib
matplotlib.use('agg')
18 changes: 9 additions & 9 deletions tests/test_plotting.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-

import camelot
import os

import pytest

from camelot.plotting import *
import camelot


testdir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -16,36 +16,36 @@
def test_text_plot():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
return plot_text(tables[0]._text)
return camelot.plot(tables[0], plot_type='text')


@pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True)
def test_contour_plot():
def test_table_plot():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
return plot_contour(tables[0]._image)
return camelot.plot(tables[0], plot_type='table')


@pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True)
def test_table_plot():
def test_contour_plot():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
return plot_table(tables[0])
return camelot.plot(tables[0], plot_type='contour')


@pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True)
def test_line_plot():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
return plot_line(tables[0]._segments)
return camelot.plot(tables[0], plot_type='line')


@pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True)
def test_joint_plot():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
return plot_joint(tables[0]._image)
return camelot.plot(tables[0], plot_type='joint')

0 comments on commit efe48da

Please sign in to comment.