Merge pull request #120 from joschrew/dockerfile #121
64 passed, 12 failed and 1 skipped
❌ 3.13-junit.xml
77 tests were completed in 9s with 64 passed, 12 failed and 1 skipped.
Test suite | Passed | Failed | Skipped | Time |
---|---|---|---|---|
pytest | 64✅ | 12❌ | 1⚪ | 9s |
❌ pytest
src.dinglehopper.tests.extracted_text_test
✅ test_text
✅ test_normalization_check
✅ test_align
✅ test_textequiv[attributes0-None-None]
✅ test_textequiv[attributes1-0-None]
✅ test_textequiv[attributes2-0-None]
✅ test_textequiv[attributes3-0-None]
✅ test_textequiv[attributes4-1-None]
✅ test_textequiv[attributes5-0-TextEquiv without index]
✅ test_textequiv[attributes6-2-No index attributes, use 'conf' attribute to sort TextEquiv]
✅ test_textequiv[attributes7-0-TextEquiv without index]
✅ test_textequiv[attributes8-1-No index attributes, use 'conf' attribute to sort TextEquiv]
✅ test_textequiv[attributes9-0-No index attributes, use first TextEquiv]
src.dinglehopper.tests.test_align
✅ test_left_empty
✅ test_right_empty
✅ test_left_longer
✅ test_right_longer
✅ test_some_diff
✅ test_longer
✅ test_completely_different
✅ test_with_some_fake_ocr_errors
✅ test_lines
⚪ test_lines_similar
✅ test_score_hint
src.dinglehopper.tests.test_character_error_rate
✅ test_character_error_rate
✅ test_character_error_rate_hard
src.dinglehopper.tests.test_edit_distance
✅ test_distance
src.dinglehopper.tests.test_editops
✅ test_editops
✅ test_editops_canonically_equivalent
src.dinglehopper.tests.test_integ_align
✅ test_align_page_files
src.dinglehopper.tests.test_integ_bigger_texts
✅ test_bigger_texts
src.dinglehopper.tests.test_integ_character_error_rate_ocr
✅ test_character_error_rate_between_page_files
✅ test_character_error_rate_between_page_alto
✅ test_character_error_rate_between_page_alto_2
src.dinglehopper.tests.test_integ_cli_dir
❌ test_cli_directory
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_directory0')
❌ test_cli_fail_without_gt
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_fail_without_gt0')
src.dinglehopper.tests.test_integ_cli_valid_json
❌ test_cli_json
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_json0')
❌ test_cli_json_cer_is_infinity
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_json_cer_is_infinity0')
src.dinglehopper.tests.test_integ_differences
❌ test_cli_differences
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_differences0')
src.dinglehopper.tests.test_integ_edit_distance_ocr
✅ test_distance_between_page_files
✅ test_distance_between_page_alto
✅ test_distance_between_page_alto_2
src.dinglehopper.tests.test_integ_empty_files
✅ test_empty_files[-Lorem ipsum-inf]
✅ test_empty_files[Lorem ipsum--1.0]
✅ test_empty_files[\ufeff-Lorem ipsum-inf]
✅ test_empty_files[Lorem ipsum-\ufeff-1.0]
✅ test_empty_files[--0.0]
✅ test_empty_files[\ufeff--0.0]
✅ test_empty_files[-\ufeff-0.0]
src.dinglehopper.tests.test_integ_ocrd_cli
❌ test_ocrd_cli
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_ocrd_cli0')
src.dinglehopper.tests.test_integ_summarize
✅ test_cli_summarize_json
✅ test_cli_summarize_html
✅ test_cli_summarize_html_skip_invalid
src.dinglehopper.tests.test_integ_table_extraction
✅ test_reading_order_settings[table-order-0001.xml-1\n2\n3\n4\n5\n6\n7\n8\n9]
✅ test_reading_order_settings[table-order-0002.xml-1\n4\n7\n2\n5\n8\n3\n6\n9]
✅ test_reading_order_settings[table-region.xml-1\n2\n3\n4\n5\n6\n7\n8\n9]
✅ test_reading_order_settings[table-no-reading-order.xml-5\n6\n7\n8\n9\n1\n2\n3\n4]
✅ test_reading_order_settings[table-unordered.xml-1\n2\n3\n4\n5\n6\n7\n8\n9]
src.dinglehopper.tests.test_integ_word_error_rate_ocr
❌ test_word_error_rate_between_page_files
@pytest.mark.integration
❌ test_word_error_rate_between_page_alto
self = {(<class 'str'>, <class 'str'>): <function word_error_rate_n at 0x7f4f5d42fc40>, (<class 'dinglehopper.extracted_text....f4f5d42fd80>, (<class 'collections.abc.Iterable'>, <class 'collections.abc.Iterable'>): <function _ at 0x7f4f5d42fe20>}
❌ test_word_error_rate_between_page_alto_2
@pytest.mark.integration
src.dinglehopper.tests.test_ocr_files
✅ test_alto_namespace
✅ test_alto_text
✅ test_alto_text_ALTO1
✅ test_alto_text_ALTO2
✅ test_alto_text_ALTO3
✅ test_page_namespace
✅ test_page_test
✅ test_page_with_empty_region
✅ test_page_order
✅ test_page_mixed_regions
✅ test_page_level
✅ test_text
✅ test_plain
src.dinglehopper.tests.test_word_error_rate
❌ test_words
def test_words():
❌ test_words_private_use_area
def test_words_private_use_area():
❌ test_word_error_rate
self = {(<class 'str'>, <class 'str'>): <function word_error_rate_n at 0x7f4f5d42fc40>, (<class 'dinglehopper.extracted_text....f4f5d42fd80>, (<class 'collections.abc.Iterable'>, <class 'collections.abc.Iterable'>): <function _ at 0x7f4f5d42fe20>}
Annotations
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_cli_dir ► test_cli_directory
Failed test found in:
3.13-junit.xml
Error:
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_directory0')
Raw output
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_directory0')
@pytest.mark.integration
def test_cli_directory(tmp_path):
"""
Test that the cli/process_dir() processes a directory of files and
yields JSON and HTML reports.
"""
initLogging()
> process_dir(
os.path.join(data_dir, "directory-test", "gt"),
os.path.join(data_dir, "directory-test", "ocr"),
"report",
str(tmp_path / "reports"),
metrics=False,
differences=True,
textequiv_level="line",
)
dinglehopper/tests/test_integ_cli_dir.py:19:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/cli.py:204: in process_dir
process(
dinglehopper/cli.py:126: in process
gt_words: List[str] = list(words_normalized(gt_text))
dinglehopper/word_error_rate.py:77: in _
yield from words_normalized(s.text)
dinglehopper/word_error_rate.py:72: in words_normalized
yield from words(unicodedata.normalize("NFC", s))
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'ü', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_cli_dir ► test_cli_fail_without_gt
Failed test found in:
3.13-junit.xml
Error:
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_fail_without_gt0')
Raw output
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_fail_without_gt0')
@pytest.mark.integration
def test_cli_fail_without_gt(tmp_path):
"""
Test that the cli/process_dir skips a file if there is no corresponding file
in the other directory.
"""
initLogging()
> process_dir(
os.path.join(data_dir, "directory-test", "gt"),
os.path.join(data_dir, "directory-test", "ocr"),
"report",
str(tmp_path / "reports"),
metrics=False,
differences=True,
textequiv_level="line",
)
dinglehopper/tests/test_integ_cli_dir.py:43:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/cli.py:204: in process_dir
process(
dinglehopper/cli.py:126: in process
gt_words: List[str] = list(words_normalized(gt_text))
dinglehopper/word_error_rate.py:77: in _
yield from words_normalized(s.text)
dinglehopper/word_error_rate.py:72: in words_normalized
yield from words(unicodedata.normalize("NFC", s))
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'ü', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_cli_valid_json ► test_cli_json
Failed test found in:
3.13-junit.xml
Error:
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_json0')
Raw output
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_json0')
@pytest.mark.integration
def test_cli_json(tmp_path):
"""Test that the cli/process() yields a loadable JSON report"""
with working_directory(tmp_path):
with open("gt.txt", "w") as gtf:
gtf.write("AAAAA")
with open("ocr.txt", "w") as ocrf:
ocrf.write("AAAAB")
with open("gt.txt", "r") as gtf:
print(gtf.read())
> process("gt.txt", "ocr.txt", "report")
dinglehopper/tests/test_integ_cli_valid_json.py:21:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/cli.py:126: in process
gt_words: List[str] = list(words_normalized(gt_text))
dinglehopper/word_error_rate.py:77: in _
yield from words_normalized(s.text)
dinglehopper/word_error_rate.py:72: in words_normalized
yield from words(unicodedata.normalize("NFC", s))
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'A', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_cli_valid_json ► test_cli_json_cer_is_infinity
Failed test found in:
3.13-junit.xml
Error:
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_json_cer_is_infinity0')
Raw output
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_json_cer_is_infinity0')
@pytest.mark.integration
def test_cli_json_cer_is_infinity(tmp_path):
"""Test that the cli/process() yields a loadable JSON report when CER == inf"""
with working_directory(tmp_path):
with open("gt.txt", "w") as gtf:
gtf.write("") # Empty to yield CER == inf
with open("ocr.txt", "w") as ocrf:
ocrf.write("Not important")
> process("gt.txt", "ocr.txt", "report")
dinglehopper/tests/test_integ_cli_valid_json.py:39:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/cli.py:127: in process
ocr_words: List[str] = list(words_normalized(ocr_text))
dinglehopper/word_error_rate.py:77: in _
yield from words_normalized(s.text)
dinglehopper/word_error_rate.py:72: in words_normalized
yield from words(unicodedata.normalize("NFC", s))
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'N', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_differences ► test_cli_differences
Failed test found in:
3.13-junit.xml
Error:
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_differences0')
Raw output
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_cli_differences0')
@pytest.mark.integration
def test_cli_differences(tmp_path):
"""Test that the cli/process() yields a JSON report that includes
the differences found between the GT and OCR text"""
initLogging()
> process(
os.path.join(data_dir, "test-gt.page2018.xml"),
os.path.join(data_dir, "test-fake-ocr.page2018.xml"),
"report",
tmp_path,
differences=True,
)
dinglehopper/tests/test_integ_differences.py:18:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/cli.py:126: in process
gt_words: List[str] = list(words_normalized(gt_text))
dinglehopper/word_error_rate.py:77: in _
yield from words_normalized(s.text)
dinglehopper/word_error_rate.py:72: in words_normalized
yield from words(unicodedata.normalize("NFC", s))
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'ü', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_ocrd_cli ► test_ocrd_cli
Failed test found in:
3.13-junit.xml
Error:
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_ocrd_cli0')
Raw output
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_ocrd_cli0')
@pytest.mark.integration
@pytest.mark.skipif(sys.platform == "win32", reason="only on unix")
def test_ocrd_cli(tmp_path):
"""Test OCR-D interface"""
# Copy test workspace
test_workspace_dir_source = Path(data_dir) / "actevedef_718448162"
test_workspace_dir = tmp_path / "test_ocrd_cli"
shutil.copytree(test_workspace_dir_source, test_workspace_dir)
# Run through the OCR-D interface
with working_directory(test_workspace_dir):
runner = CliRunner()
args = [
"-m",
"mets.xml",
"-I",
"OCR-D-GT-PAGE,OCR-D-OCR-CALAMARI",
"-O",
"OCR-D-OCR-CALAMARI-EVAL",
]
# Hack to satisfy ocrd_cli_wrap_processor() check for arguments
sys.argv[1:] = args
result = runner.invoke(ocrd_dinglehopper, args)
> assert result.exit_code == 0
E AssertionError: assert 1 == 0
E + where 1 = <Result TypeError('word_break() takes 1 positional argument but 2 were given')>.exit_code
dinglehopper/tests/test_integ_ocrd_cli.py:40: AssertionError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_word_error_rate_ocr ► test_word_error_rate_between_page_files
Failed test found in:
3.13-junit.xml
Error:
@pytest.mark.integration
Raw output
@pytest.mark.integration
def test_word_error_rate_between_page_files():
# In the fake OCR file, we changed 2 characters and replaced a fi ligature with fi.
# So we have 3 changed words, the ligature does not count → 2 errors
gt = page_text(ET.parse(os.path.join(data_dir, "test-gt.page2018.xml")))
gt_word_count = (
7 + 6 + 5 + 8 + 7 + 6 + 7 + 8 + 6 + 7 + 7 + 5 + 6 + 8 + 8 + 7 + 7 + 6 + 5 + 4
) # Manually verified word count per line
> assert len(list(words(gt))) == gt_word_count
dinglehopper/tests/test_integ_word_error_rate_ocr.py:22:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'ü', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_word_error_rate_ocr ► test_word_error_rate_between_page_alto
Failed test found in:
3.13-junit.xml
Error:
self = {(<class 'str'>, <class 'str'>): <function word_error_rate_n at 0x7f4f5d42fc40>, (<class 'dinglehopper.extracted_text....f4f5d42fd80>, (<class 'collections.abc.Iterable'>, <class 'collections.abc.Iterable'>): <function _ at 0x7f4f5d42fe20>}
Raw output
self = {(<class 'str'>, <class 'str'>): <function word_error_rate_n at 0x7f4f5d42fc40>, (<class 'dinglehopper.extracted_text....f4f5d42fd80>, (<class 'collections.abc.Iterable'>, <class 'collections.abc.Iterable'>): <function _ at 0x7f4f5d42fe20>}
args = ('Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt\nut labore et dolore...t\njusto duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum\ndolor sit amet.')
kwargs = {}, func = <function word_error_rate_n at 0x7f4f5d42fc40>
def __call__(self, *args, **kwargs):
"""Resolve and dispatch to best method."""
self.evaluate()
func = self.dispatch(*args)
try:
> return func(*args, **kwargs)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/multimethod/__init__.py:350:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/word_error_rate.py:82: in word_error_rate_n
reference_seq = list(words_normalized(reference))
dinglehopper/word_error_rate.py:72: in words_normalized
yield from words(unicodedata.normalize("NFC", s))
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'L', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
The above exception was the direct cause of the following exception:
@pytest.mark.integration
def test_word_error_rate_between_page_alto():
gt = page_text(
ET.parse(os.path.join(data_dir, "lorem-ipsum", "lorem-ipsum-scan.gt.page.xml"))
)
ocr = alto_text(
ET.parse(
os.path.join(
data_dir, "lorem-ipsum", "lorem-ipsum-scan.ocr.tesseract.alto.xml"
)
)
)
assert gt == ocr
> assert word_error_rate(gt, ocr) == 0
dinglehopper/tests/test_integ_word_error_rate_ocr.py:42:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/word_error_rate.py:111: in word_error_rate
wer, _ = word_error_rate_n(reference, compared)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = {(<class 'str'>, <class 'str'>): <function word_error_rate_n at 0x7f4f5d42fc40>, (<class 'dinglehopper.extracted_text....f4f5d42fd80>, (<class 'collections.abc.Iterable'>, <class 'collections.abc.Iterable'>): <function _ at 0x7f4f5d42fe20>}
args = ('Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt\nut labore et dolore...t\njusto duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum\ndolor sit amet.')
kwargs = {}, func = <function word_error_rate_n at 0x7f4f5d42fc40>
def __call__(self, *args, **kwargs):
"""Resolve and dispatch to best method."""
self.evaluate()
func = self.dispatch(*args)
try:
return func(*args, **kwargs)
except TypeError as ex:
> raise DispatchError(f"Function {func.__code__}") from ex
E multimethod.DispatchError: Function <code object word_error_rate_n at 0x7f4f5f3f9930, file "/home/runner/work/dinglehopper/dinglehopper/src/dinglehopper/word_error_rate.py", line 80>
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/multimethod/__init__.py:352: DispatchError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_integ_word_error_rate_ocr ► test_word_error_rate_between_page_alto_2
Failed test found in:
3.13-junit.xml
Error:
@pytest.mark.integration
Raw output
@pytest.mark.integration
def test_word_error_rate_between_page_alto_2():
gt = page_text(
ET.parse(
os.path.join(data_dir, "lorem-ipsum", "lorem-ipsum-scan-bad.gt.page.xml")
)
)
gt_word_count = (
14 + 18 + 17 + 14 + 17 + 17 + 3
) # Manually verified word count per line
> assert len(list(words(gt))) == gt_word_count
dinglehopper/tests/test_integ_word_error_rate_ocr.py:56:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'L', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_word_error_rate ► test_words
Failed test found in:
3.13-junit.xml
Error:
def test_words():
Raw output
def test_words():
> result = list(
words("Der schnelle [„braune“] Fuchs kann keine 3,14 Meter springen, oder?")
)
dinglehopper/tests/test_word_error_rate.py:9:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = 'D', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
return uniseg.wordbreak.WordBreak.ALETTER
else:
> return old_word_break(c, index)
E TypeError: word_break() takes 1 positional argument but 2 were given
dinglehopper/word_error_rate.py:29: TypeError
Check failure on line 0 in 3.13-junit.xml
github-actions / Tests Results - 3.13
pytest ► src.dinglehopper.tests.test_word_error_rate ► test_words_private_use_area
Failed test found in:
3.13-junit.xml
Error:
def test_words_private_use_area():
Raw output
def test_words_private_use_area():
> result = list(
words(
"ber die vielen Sorgen wegen deelben vergaß Hartkopf, "
"der Frau Amtmnnin das ver⸗\n"
"ſproene zu berliefern."
)
)
dinglehopper/tests/test_word_error_rate.py:28:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dinglehopper/word_error_rate.py:58: in words
for word in uniseg.wordbreak.words(s):
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:250: in words
breakables = word_breakables(s)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/wordbreak.py:108: in word_breakables
run = Run(s, word_break)
/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/uniseg/breaking.py:53: in __init__
self._attributes = [func(c) for c in text]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
c = '\ue72b', index = 0
def new_word_break(c, index=0):
if 0xE000 <= ord(c) <= 0xF8FF: # Private Use Area
> return uniseg.wordbreak.WordBreak.ALETTER
E AttributeError: module 'uniseg.wordbreak' has no attribute 'WordBreak'. Did you mean: 'Word_Break'?
dinglehopper/word_error_rate.py:27: AttributeError