Skip to content

Commit

Permalink
Merge branch 'bugger' into dev
Browse files Browse the repository at this point in the history
# Conflicts:
#	include/tesseract/baseapi.h
#	src/api/baseapi.cpp
#	src/api/pagerenderer.cpp
#	src/ccutil/ccutil.cpp
#	src/tesseract.cpp
  • Loading branch information
GerHobbelt committed Jul 7, 2024
2 parents ced709c + e84f1f4 commit da9d4d1
Show file tree
Hide file tree
Showing 13 changed files with 33 additions and 32 deletions.
5 changes: 3 additions & 2 deletions include/tesseract/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ class TESS_API TessBaseAPI {
*/
char *GetAltoText(int page_number);

/**
/**
* Make an XML-formatted string with PAGE markup from the internal
* data structures.
*
Expand All @@ -841,12 +841,13 @@ class TESS_API TessBaseAPI {
/**
* Make an XML-formatted string with PAGE markup from the internal
* data structures.
*
* Returned string must be freed with the delete [] operator.
*/
char *GetPAGEText(int page_number);

/**
* Make a TSV-formatted string from the internal data structures.
* Allows additional column with detected language.
* page_number is 0-based but will appear in the output as 1-based.
*
* Returned string must be freed with the delete [] operator.
Expand Down
2 changes: 0 additions & 2 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3518,8 +3518,6 @@ void TessBaseAPI::SetupDebugAllPreset() {
tess.debug_baseline_fit.set_value(1, SRC); // 0..3
tess.debug_baseline_y_coord.set_value(-2000, SRC);

tess.showcase_threshold_methods.set_value((debug_all > 2), SRC);

tess.debug_write_unlv.set_value(true, SRC);
tess.debug_line_finding.set_value(true, SRC);
tess.debug_image_normalization.set_value(true, SRC);
Expand Down
19 changes: 4 additions & 15 deletions src/api/pagerenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#include <unordered_set>

#include <leptonica/allheaders.h>
#include <leptonica/pix_internal.h>
#include <leptonica/array_internal.h>
#include <leptonica/pix_internal.h>

#include <tesseract/renderer.h>
#include "tesseractclass.h" // for Tesseract
Expand Down Expand Up @@ -722,19 +722,6 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
SetInputName(nullptr);
}

#if defined(_WIN32) && 0
// convert input name from ANSI encoding to utf-8
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
input_file_ = utf8_str;
delete[] uni16_str;
delete[] utf8_str;
#endif

// Used variables

std::stringstream reading_order_str;
Expand Down Expand Up @@ -771,7 +758,9 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
#define LEVELFLAG tesseract_->page_xml_level

if (LEVELFLAG != 0 && LEVELFLAG != 1) {
tprintError("For now, only line level and word level are available, and the level is reset to line level.\n");
tprintWarn(
"page_xml_level: for now, only line level (0) and word level (1) are available, and the level "
"is reset to line level.\n");
LEVELFLAG = 0;
}

Expand Down
6 changes: 3 additions & 3 deletions src/ccstruct/pageres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
return 1;
}
}
ASSERT_HOST(!"Error: Incomparable PAGE_RES_ITs");
ASSERT_HOST_MSG(false, "Error: Incomparable PAGE_RES_ITs");
}

// we both point to the same block, but different rows.
Expand All @@ -1240,7 +1240,7 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
return 1;
}
}
ASSERT_HOST(!"Error: Incomparable PAGE_RES_ITs");
ASSERT_HOST_MSG(false, "Error: Incomparable PAGE_RES_ITs");
}

// We point to different blocks.
Expand All @@ -1254,7 +1254,7 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
}
}
// Shouldn't happen...
ASSERT_HOST(!"Error: Incomparable PAGE_RES_ITs");
ASSERT_HOST_MSG(false, "Error: Incomparable PAGE_RES_ITs");
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ccutil/ccutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static bool determine_datadir(std::string &datadir, const std::string &argv0, co

// Ignore TESSDATA_PREFIX if there is no matching filesystem entry.
if (tessdata_prefix != nullptr && !std::filesystem::exists(tessdata_prefix)) {
tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignore it\n", tessdata_prefix);
tprintWarn("Environment variable TESSDATA_PREFIX's value '{}' is not a directory that exists in your filesystem; tesseract will ignore it.\n", tessdata_prefix);
tessdata_prefix = nullptr;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ccutil/tprintf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ static loglevel_return_type ParseLogLevel(const char *loglevel) {
std::transform(loglevel_string.begin(), loglevel_string.end(), loglevel_string.begin(), ::toupper);
int loglevel = loglevels.at(loglevel_string);
return loglevel;
} catch (const std::out_of_range &e) {
} catch (const std::out_of_range &) {
// TODO: Allow numeric argument?
tprintError("Unsupported --loglevel {}\n", loglevel);
return cpp::fail(false);
Expand Down
10 changes: 10 additions & 0 deletions src/ccutil/winutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ std::string Utf16ToUtf8(const wchar_t* utf16Str) {
return result;
}

// convert input name from ANSI encoding to utf-8
std::string AnsiToUtf8(const char *AnsiStr) {
auto str16_len = MultiByteToWideChar(CP_ACP, 0, AnsiStr, -1, nullptr, 0);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, AnsiStr, -1, uni16_str, str16_len);
std::string utf8_str = Utf16ToUtf8(uni16_str);
delete[] uni16_str;
return utf8_str;
}

} // namespace winutils
} // namespace tesseract

Expand Down
1 change: 1 addition & 0 deletions src/ccutil/winutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace winutils {

std::wstring Utf8ToUtf16(const char* utf8Str);
std::string Utf16ToUtf8(const wchar_t* utf16Str);
std::string AnsiToUtf8(const char *AnsiStr);

} // namespace winutils
} // namespace tesseract
Expand Down
6 changes: 3 additions & 3 deletions src/tesseract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ static bool PreloadRenderers(tesseract::TessBaseAPI &api,
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintError("Could not create PAGE output file: %s\n", strerror(errno));
tprintError("Could not create PAGE output file: {}\n", strerror(errno));
error = true;
}
}
Expand Down Expand Up @@ -899,7 +899,7 @@ static inline auto format_as(WritingDirection d) {

}

static void SetupDebugAllPreset(TessBaseAPI &api)
void SetupDebugAllPreset(TessBaseAPI &api)
{
if (debug_all) {
api.SetupDebugAllPreset();
Expand Down Expand Up @@ -1067,7 +1067,7 @@ extern "C" int tesseract_main(int argc, const char** argv)
// second: grab the output_base_path if we haven't already:
if (!outputbasepath_is_specified) {
if (path_params.size() < 2) {
tprintError("Error, missing outputbase command line argument\n");
tprintError("Missing outputbase command line argument\n");
return EXIT_FAILURE;
}

Expand Down
8 changes: 4 additions & 4 deletions src/textord/textord.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,12 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi
// Make the rows in the block.
// Do it the old fashioned way.
if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
gradient = make_rows(page_tr_, to_blocks);
*gradient = make_rows(page_tr_, to_blocks);
} else if (!PSM_SPARSE(pageseg_mode)) {
// RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
*gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
} else {
gradient = 0.0f;
*gradient = 0.0f;
}
const int debug_level_offset = std::max(0, textord_baseline_debug + 0 /* to implicitly use operator int() on the referenced variable */);
int old_level = debug_baseline_detector_level;
Expand All @@ -241,7 +241,7 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi
// Now make the words in the lines.
if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
// SINGLE_LINE uses the old word maker on the single line.
make_words(this, page_tr_, gradient, blocks, to_blocks);
make_words(this, page_tr_, *gradient, blocks, to_blocks);
} else {
// SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
// single word, and in SINGLE_CHAR mode, all the outlines
Expand Down
2 changes: 1 addition & 1 deletion src/training/combine_tessdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ extern "C" int tesseract_combine_tessdata_main(int argc, const char** argv)
tprintError("No LSTM Component found in {}!\n", argv[2]);
return EXIT_FAILURE;
}
tesseract::LSTMRecognizer recognizer;
tesseract::LSTMRecognizer recognizer(nullptr);
recognizer.SetDebug(tess_debug_lstm);
if (!recognizer.DeSerialize(&tm, &fp)) {
tprintError("Failed to deserialize LSTM in {}!\n", argv[2]);
Expand Down
1 change: 1 addition & 0 deletions src/training/common/commontraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ int ParseArguments(int* argc, const char ***argv) {
# include "mastertrainer.h"
# include "mf.h"
# include "oldlist.h"
# include "commandlineflags.h"
# include <tesseract/params.h>
# include "shapetable.h"
# include "tessdatamanager.h"
Expand Down
1 change: 1 addition & 0 deletions src/training/common/trainingsampleset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "trainingsample.h"
#include "trainingsampleset.h"
#include "unicity_table.h"
#include "commandlineflags.h"

namespace tesseract {

Expand Down

0 comments on commit da9d4d1

Please sign in to comment.