@@ -131,7 +131,7 @@ def test_read_field_file_tesseract_only():
131
131
assert len (result ["species_image" ]) == 1
132
132
assert result ["species_image" ][0 ] == image
133
133
assert len (result ['species_ocr_results' ]) == 1
134
- assert result ["species_ocr_results" ][0 ]['ocr' ] == '_Tesseract '
134
+ assert result ["species_ocr_results" ][0 ]['ocr' ] == 'Tesseract '
135
135
assert result ["species_ocr_results" ][0 ]['original_text_detected' ] == 'zOstericolaXX'
136
136
assert result ["species_ocr_results" ][0 ]['adjusted_text' ] == 'zostericola'
137
137
assert result ["species_ocr_results" ][0 ]['match_score' ] == 0.917
@@ -150,12 +150,12 @@ def test_read_field_file_htr():
150
150
assert result ["species_image" ][0 ] == image
151
151
assert len (result ["species_ocr_results" ]) == 2
152
152
153
- assert result ["species_ocr_results" ][0 ]['ocr' ] == '_TrOCR '
153
+ assert result ["species_ocr_results" ][0 ]['ocr' ] == 'TrOCR '
154
154
assert result ["species_ocr_results" ][0 ]['original_text_detected' ] == 'zostericolaX'
155
155
assert result ["species_ocr_results" ][0 ]['adjusted_text' ] == 'zostericolax'
156
156
assert result ["species_ocr_results" ][0 ]['match_score' ] == ""
157
157
158
- assert result ["species_ocr_results" ][1 ]['ocr' ] == '_Tesseract '
158
+ assert result ["species_ocr_results" ][1 ]['ocr' ] == 'Tesseract '
159
159
assert result ["species_ocr_results" ][1 ]['original_text_detected' ] == 'zOstericolaXX'
160
160
assert result ["species_ocr_results" ][1 ]['adjusted_text' ] == 'zostericolaxx'
161
161
assert result ["species_ocr_results" ][1 ]['match_score' ] == ""
@@ -174,12 +174,12 @@ def test_read_field_file_fuzzy():
174
174
assert result ["species_image" ][0 ] == image
175
175
assert len (result ["species_ocr_results" ]) == 2
176
176
177
- assert result ["species_ocr_results" ][0 ]['ocr' ] == '_TrOCR '
177
+ assert result ["species_ocr_results" ][0 ]['ocr' ] == 'TrOCR '
178
178
assert result ["species_ocr_results" ][0 ]['original_text_detected' ] == 'zostericolaX'
179
179
assert result ["species_ocr_results" ][0 ]['adjusted_text' ] == 'zostericola'
180
180
assert result ["species_ocr_results" ][0 ]['match_score' ] == 0.957
181
181
182
- assert result ["species_ocr_results" ][1 ]['ocr' ] == '_Tesseract '
182
+ assert result ["species_ocr_results" ][1 ]['ocr' ] == 'Tesseract '
183
183
assert result ["species_ocr_results" ][1 ]['original_text_detected' ] == 'zOstericolaXX'
184
184
assert result ["species_ocr_results" ][1 ]['adjusted_text' ] == 'zostericola'
185
185
assert result ["species_ocr_results" ][1 ]['match_score' ] == 0.917
@@ -216,12 +216,12 @@ def test_institutional_label_detect(mock_yolo_output):
216
216
assert len (result ["species_image" ]) == 1
217
217
assert len (result ["species_ocr_results" ]) == 2
218
218
219
- assert result ["species_ocr_results" ][0 ]['ocr' ] == '_TrOCR '
219
+ assert result ["species_ocr_results" ][0 ]['ocr' ] == 'TrOCR '
220
220
assert result ["species_ocr_results" ][0 ]['original_text_detected' ] == 'zostericolaX'
221
221
assert result ["species_ocr_results" ][0 ]['adjusted_text' ] == 'zostericola'
222
222
assert result ["species_ocr_results" ][0 ]['match_score' ] == 0.957
223
223
224
- assert result ["species_ocr_results" ][1 ]['ocr' ] == '_Tesseract '
224
+ assert result ["species_ocr_results" ][1 ]['ocr' ] == 'Tesseract '
225
225
assert result ["species_ocr_results" ][1 ]['original_text_detected' ] == 'zOstericolaXX'
226
226
assert result ["species_ocr_results" ][1 ]['adjusted_text' ] == 'zostericola'
227
227
assert result ["species_ocr_results" ][1 ]['match_score' ] == 0.917
@@ -329,7 +329,7 @@ def test_determine_best_ocr_result_non_reference():
329
329
assert best_match_score == ""
330
330
assert best_engine == ""
331
331
332
- best_text , best_match_score , best_engine = hespi .determine_best_ocr_result (result ['location_ocr_results' ], preferred_engine = "_TrOCR " )
332
+ best_text , best_match_score , best_engine = hespi .determine_best_ocr_result (result ['location_ocr_results' ], preferred_engine = "TrOCR " )
333
333
assert best_text == "Queenscliff"
334
334
assert best_match_score == ""
335
335
assert best_engine == ""
@@ -345,12 +345,12 @@ def test_determine_best_ocr_result_reference():
345
345
best_text , best_match_score , best_engine = hespi .determine_best_ocr_result (result ['species_ocr_results' ])
346
346
assert best_text == "zostericola"
347
347
assert best_match_score == 0.957
348
- assert best_engine == "_TrOCR "
348
+ assert best_engine == "TrOCR "
349
349
350
- best_text , best_match_score , best_engine = hespi .determine_best_ocr_result (result ['species_ocr_results' ], preferred_engine = "_Tesseract " )
350
+ best_text , best_match_score , best_engine = hespi .determine_best_ocr_result (result ['species_ocr_results' ], preferred_engine = "Tesseract " )
351
351
assert best_text == "zostericola"
352
352
assert best_match_score == 0.957
353
- assert best_engine == "_TrOCR "
353
+ assert best_engine == "TrOCR "
354
354
355
355
356
356
def test_determine_best_ocr_result_single ():
0 commit comments