Skip to content

Commit b47064d

Browse files
committed
🎨 removing unnecessary underscores
1 parent 444838e commit b47064d

File tree

4 files changed

+20
-20
lines changed

4 files changed

+20
-20
lines changed

hespi/hespi.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,9 @@ def institutional_label_detect(self, component, stub, output_dir) -> Dict:
286286
counter = Counter(best_engine_results)
287287
preferred_engine = counter.most_common(1)[0][0]
288288
elif detection_results.get('label_classification', None) in ["printed", "typewriter"]:
289-
preferred_engine = '_Tesseract'
289+
preferred_engine = 'Tesseract'
290290
else:
291-
preferred_engine = '_TrOCR'
291+
preferred_engine = 'TrOCR'
292292

293293
# Determining Recognised Text for fields not in the reference database
294294
for key, detection_result in detection_results.items():
@@ -362,7 +362,7 @@ def read_field_file(
362362

363363
detection_results[f"{field}_ocr_results"].append(
364364
{
365-
'ocr': '_TrOCR',
365+
'ocr': 'TrOCR',
366366
'original_text_detected': htr_text,
367367
'adjusted_text': adjusted_text,
368368
'match_score': match_score,
@@ -386,7 +386,7 @@ def read_field_file(
386386

387387
detection_results[f"{field}_ocr_results"].append(
388388
{
389-
'ocr': '_Tesseract',
389+
'ocr': 'Tesseract',
390390
'original_text_detected': tesseract_text,
391391
'adjusted_text': adjusted_text,
392392
'match_score': match_score,

hespi/util.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ def process_row_ocr_results(row, field_name):
6262
tesseract_match_score = []
6363

6464
for d in row:
65-
if d['ocr'] == '_TrOCR':
65+
if d['ocr'] == 'TrOCR':
6666
trocr_original.append(d['original_text_detected'])
6767
trocr_adjusted.append(d['adjusted_text'])
6868
trocr_match_score.append(d['match_score'])
69-
elif d['ocr'] == '_Tesseract':
69+
elif d['ocr'] == 'Tesseract':
7070
tesseract_original.append(d['original_text_detected'])
7171
tesseract_adjusted.append(d['adjusted_text'])
7272
tesseract_match_score.append(d['match_score'])

tests/test_hespi.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def test_read_field_file_tesseract_only():
131131
assert len(result["species_image"]) == 1
132132
assert result["species_image"][0] == image
133133
assert len(result['species_ocr_results']) == 1
134-
assert result["species_ocr_results"][0]['ocr'] == '_Tesseract'
134+
assert result["species_ocr_results"][0]['ocr'] == 'Tesseract'
135135
assert result["species_ocr_results"][0]['original_text_detected'] == 'zOstericolaXX'
136136
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericola'
137137
assert result["species_ocr_results"][0]['match_score'] == 0.917
@@ -150,12 +150,12 @@ def test_read_field_file_htr():
150150
assert result["species_image"][0] == image
151151
assert len(result["species_ocr_results"]) == 2
152152

153-
assert result["species_ocr_results"][0]['ocr'] == '_TrOCR'
153+
assert result["species_ocr_results"][0]['ocr'] == 'TrOCR'
154154
assert result["species_ocr_results"][0]['original_text_detected'] == 'zostericolaX'
155155
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericolax'
156156
assert result["species_ocr_results"][0]['match_score'] == ""
157157

158-
assert result["species_ocr_results"][1]['ocr'] == '_Tesseract'
158+
assert result["species_ocr_results"][1]['ocr'] == 'Tesseract'
159159
assert result["species_ocr_results"][1]['original_text_detected'] == 'zOstericolaXX'
160160
assert result["species_ocr_results"][1]['adjusted_text'] == 'zostericolaxx'
161161
assert result["species_ocr_results"][1]['match_score'] == ""
@@ -174,12 +174,12 @@ def test_read_field_file_fuzzy():
174174
assert result["species_image"][0] == image
175175
assert len(result["species_ocr_results"]) == 2
176176

177-
assert result["species_ocr_results"][0]['ocr'] == '_TrOCR'
177+
assert result["species_ocr_results"][0]['ocr'] == 'TrOCR'
178178
assert result["species_ocr_results"][0]['original_text_detected'] == 'zostericolaX'
179179
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericola'
180180
assert result["species_ocr_results"][0]['match_score'] == 0.957
181181

182-
assert result["species_ocr_results"][1]['ocr'] == '_Tesseract'
182+
assert result["species_ocr_results"][1]['ocr'] == 'Tesseract'
183183
assert result["species_ocr_results"][1]['original_text_detected'] == 'zOstericolaXX'
184184
assert result["species_ocr_results"][1]['adjusted_text'] == 'zostericola'
185185
assert result["species_ocr_results"][1]['match_score'] == 0.917
@@ -216,12 +216,12 @@ def test_institutional_label_detect(mock_yolo_output):
216216
assert len(result["species_image"]) == 1
217217
assert len(result["species_ocr_results"]) == 2
218218

219-
assert result["species_ocr_results"][0]['ocr'] == '_TrOCR'
219+
assert result["species_ocr_results"][0]['ocr'] == 'TrOCR'
220220
assert result["species_ocr_results"][0]['original_text_detected'] == 'zostericolaX'
221221
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericola'
222222
assert result["species_ocr_results"][0]['match_score'] == 0.957
223223

224-
assert result["species_ocr_results"][1]['ocr'] == '_Tesseract'
224+
assert result["species_ocr_results"][1]['ocr'] == 'Tesseract'
225225
assert result["species_ocr_results"][1]['original_text_detected'] == 'zOstericolaXX'
226226
assert result["species_ocr_results"][1]['adjusted_text'] == 'zostericola'
227227
assert result["species_ocr_results"][1]['match_score'] == 0.917
@@ -329,7 +329,7 @@ def test_determine_best_ocr_result_non_reference():
329329
assert best_match_score == ""
330330
assert best_engine == ""
331331

332-
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['location_ocr_results'], preferred_engine="_TrOCR")
332+
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['location_ocr_results'], preferred_engine="TrOCR")
333333
assert best_text == "Queenscliff"
334334
assert best_match_score == ""
335335
assert best_engine == ""
@@ -345,12 +345,12 @@ def test_determine_best_ocr_result_reference():
345345
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['species_ocr_results'])
346346
assert best_text == "zostericola"
347347
assert best_match_score == 0.957
348-
assert best_engine == "_TrOCR"
348+
assert best_engine == "TrOCR"
349349

350-
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['species_ocr_results'], preferred_engine="_Tesseract")
350+
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['species_ocr_results'], preferred_engine="Tesseract")
351351
assert best_text == "zostericola"
352352
assert best_match_score == 0.957
353-
assert best_engine == "_TrOCR"
353+
assert best_engine == "TrOCR"
354354

355355

356356
def test_determine_best_ocr_result_single():

tests/test_util.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,9 @@ def test_ocr_data_df_ocr_results():
128128
"family":"family",
129129
"id":"id",
130130
"species_ocr_results": [
131-
dict(ocr="_TrOCR", original_text_detected="zostericolumXX", adjusted_text="zostericolum", match_score=0.9),
132-
dict(ocr="_TrOCR", original_text_detected="z", adjusted_text="z", match_score=0),
133-
dict(ocr="_Tesseract", original_text_detected="zasdfoppasf", adjusted_text="zasdfoppasf", match_score=''),
131+
dict(ocr="TrOCR", original_text_detected="zostericolumXX", adjusted_text="zostericolum", match_score=0.9),
132+
dict(ocr="TrOCR", original_text_detected="z", adjusted_text="z", match_score=0),
133+
dict(ocr="Tesseract", original_text_detected="zasdfoppasf", adjusted_text="zasdfoppasf", match_score=''),
134134
],
135135
"extra": [],
136136
}

0 commit comments

Comments
 (0)