-
Notifications
You must be signed in to change notification settings - Fork 952
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix grouping textlines when bounding box of parent container is wrong (…
…#386) * Default value for --all-texts should be false, because using the flag enables it * Fix edge case: when no neighbors are found a line should form its own text box * Added test for grouping textlines where 1 is outside the parent bounding box * Added CHANGELOG.md line
- Loading branch information
1 parent
7e91d4e
commit 1d773dc
Showing
4 changed files
with
28 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import unittest | ||
|
||
from pdfminer.layout import LTLayoutContainer, LAParams, LTTextLineHorizontal | ||
|
||
|
||
class TestGroupTextLines(unittest.TestCase): | ||
def test_parent_with_wrong_bbox_returns_non_empty_neighbour_list(self): | ||
""" | ||
LTLayoutContainer.group_textlines() should return all the lines in a | ||
separate LTTextBoxes if they do not overlap. Even when the bounding box | ||
of the parent container does not contain all the lines. | ||
""" | ||
laparams = LAParams() | ||
layout = LTLayoutContainer((0, 0, 50, 50)) | ||
line1 = LTTextLineHorizontal(laparams.word_margin) | ||
line1.set_bbox((0, 0, 50, 5)) | ||
line2 = LTTextLineHorizontal(laparams.word_margin) | ||
line2.set_bbox((0, 50, 50, 55)) | ||
lines = [line1, line2] | ||
|
||
textboxes = list(layout.group_textlines(laparams, lines)) | ||
|
||
self.assertEqual(len(textboxes), 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters