Skip to content

Commit

Permalink
- introducing spdlog / libdiagnostics based logging/diagnostics outpu…
Browse files Browse the repository at this point in the history
…t over tprintf*() functions.

- remove experimental LogLevelElevation code.
- make sure the debug_all preset is set up BEFORE any command-line arguments direct tesseract to set some arbitrary parameters just below, for otherwise those `-c xyz=v` commands may be overruled by the debug_all preset!
- clean up the CLI options/commands parsing & processing: detect and process the various commands more obviously and use an enum/int variable to encode the active command to be executed by tesseract.
  • Loading branch information
GerHobbelt committed May 7, 2024
1 parent 27b130b commit 4c97f35
Show file tree
Hide file tree
Showing 6 changed files with 434 additions and 503 deletions.
49 changes: 49 additions & 0 deletions src/ccmain/tessedit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,55 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
}
}

// Parse a string of the form `<box>[+<box>]*` where box is given as
// `lNtNwNhN` or `lNtNrNbN` with the `N` being numeric values.
//
// Returns an BOXA instance (array of BOX coordinates) on success or NULL on failure.
// Errors are reported via tprintError() as they happen.
BOXA *Tesseract::ParseRectsString(const char *rects_str) {
// Dev Note: use classic C code approach instead of C++ std::string based: much easier & less heap thrashing.
char *rects = strdup(rects_str);

// also match ',' and ';', as well as '+', in case user used one of those separators instead of '+':
BOXA *boxa = boxaCreate(100);
int idx = 0;
char *token = rects;
for (;;) {
int pos = strspn(token, " :;+");
token += pos;
pos = strcspn(rects, " :;+");
bool eol = (token[pos] == 0);
token[pos] = 0;

// as an extra service, convert to lowercase before parsing:
strlwr(token);

int left, top, width, height, right, bottom;
int params = sscanf(token, "l%dt%dw%dh%d", &left, &top, &width, &height);
if (params == 4) {
BOX *box = boxCreateValid(left, top, width, height);
boxaAddBox(boxa, box, L_INSERT);
} else {
params = sscanf(token, "l%dt%dr%db%d", &left, &top, &right, &bottom);
if (params == 4) {
BOX *box = boxCreateValid(left, top, right - left, bottom - top);
boxaAddBox(boxa, box, L_INSERT);
} else {
tprintError("Rectangle spec line part '{}' does not match either of the supported formats LTDH or LTRB, f.e. something akin to 'l30t60w50h100'. Your line:\n {}\n", token, rects_str);
boxaDestroy(&boxa);
return nullptr;
}
}
token += pos;
if (eol) {
break;
}
token++;
}
return boxa;
}


// Initialize for potentially a set of languages defined by the language
// string and recursively any additional languages required by any language
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
Expand Down
14 changes: 7 additions & 7 deletions src/ccutil/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ inline void chomp_string(char *str) {
}

// return the smallest multiple of block_size greater than or equal to n.
inline int RoundUp(int n, int block_size) {
static inline int RoundUp(int n, int block_size) {
return block_size * ((n + block_size - 1) / block_size);
}

Expand Down Expand Up @@ -187,7 +187,7 @@ inline void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper
// Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
// For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
// some integer n.
inline int Modulo(int a, int b) {
static inline int Modulo(int a, int b) {
return (a % b + b) % b;
}

Expand All @@ -196,29 +196,29 @@ inline int Modulo(int a, int b) {
// counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
// -3/3 = 0 and -4/3 = -1.
// I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
inline int DivRounded(int a, int b) {
static inline int DivRounded(int a, int b) {
if (b < 0) {
return -DivRounded(a, -b);
}
return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
}

// Return a double cast to int with rounding.
inline int IntCastRounded(double x) {
static inline int IntCastRounded(double x) {
ASSERT0(std::isfinite(x));
ASSERT0(x < INT_MAX);
ASSERT0(x > INT_MIN);
return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
}

// Return a float cast to int with rounding.
inline int IntCastRounded(float x) {
static inline int IntCastRounded(float x) {
ASSERT0(std::isfinite(x));
return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F);
}

// Reverse the order of bytes in a n byte quantity for big/little-endian switch.
inline void ReverseN(void *ptr, int num_bytes) {
static inline void ReverseN(void *ptr, int num_bytes) {
ASSERT0(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8);
char *cptr = static_cast<char *>(ptr);
int halfsize = num_bytes / 2;
Expand All @@ -230,7 +230,7 @@ inline void ReverseN(void *ptr, int num_bytes) {
}

// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
inline void Reverse32(void *ptr) {
static inline void Reverse32(void *ptr) {
ReverseN(ptr, 4);
}

Expand Down
2 changes: 1 addition & 1 deletion src/ccutil/pathutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace fs {
using namespace ::std::filesystem;
#endif

bool exists(const char *filename) {
static inline bool exists(const char *filename) {
#if defined(WIN32) || defined(_WIN32) || defined(_WIN64)
return _access(filename, 0) == 0;
#else
Expand Down
103 changes: 62 additions & 41 deletions src/ccutil/tprintf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <climits> // for INT_MAX
#include <cstdio>

#include <diagnostics/diagnostics.h>

#ifdef HAVE_MUPDF

#include "mupdf/fitz/config.h"
Expand All @@ -40,62 +42,81 @@

namespace tesseract {

#ifdef HAVE_MUPDF
static void assert_that_a_spdlog_sink_and_logger_are_active() {
std::shared_ptr<spdlog::logger> logger = spdlog::default_logger();
ASSERT0(!!logger);
const std::vector<spdlog::sink_ptr> &sinks = logger->sinks();
ASSERT0(!sinks.empty());
}


// Warning: tprintf() is invoked in tesseract for PARTIAL lines, so we SHOULD gather these fragments
// here before dispatching the gathered lines to the appropriate back-end API!
static void fz_tess_tprintf(int level, fmt::string_view format, fmt::format_args args) {
static int block_level = T_LOG_DEBUG;
static void gather_and_log_a_single_tprintf_line(int level, fmt::string_view format, fmt::format_args args) {
static int block_level = T_LOG_TRACE;

// elevation means LOWERING the level value as lower is higher severity!
level -= tprintGetLevelElevation();

// sanity check/clipping: there's no log level beyond ERROR severity: ERROR is the highest it can possibly get.
if (level < T_LOG_ERROR) {
level = T_LOG_ERROR;
}
// make the entire message line have the most severe log level given for any part of the line:
if (level < block_level) {
block_level = level;
level = T_LOG_ERROR;
}

auto msg = fmt::vformat(format, args);

// when this is a partial message, store it in the buffer until later, when the message is completed.
static std::string msg_buffer;
msg_buffer += msg;
if (!msg_buffer.ends_with('\n'))
if (!msg.ends_with('\n')) {
// make the entire message line have the most severe log level given for any part of the line:
if (level < block_level) {
block_level = level;
}
msg_buffer += msg;
return;
}

const char *s = msg_buffer.c_str();
level = block_level;

if (!strncmp(s, "ERROR: ", 7))
fz_error(NULL, "%s", s + 7);
else if (!strncmp(s, "WARNING: ", 9))
fz_warn(NULL, "%s", s + 9);
else {
switch (level) {
case T_LOG_ERROR:
fz_error(NULL, "%s", s);
break;
case T_LOG_WARN:
fz_warn(NULL, "%s", s);
break;
case T_LOG_INFO:
fz_info(NULL, "%s", s);
break;
case T_LOG_DEBUG:
default:
fz_info(NULL, "%s", s);
break;
}
// `msg` carries a complete message, or at least the end of it:
// when there's some old stuff waiting for us: append and pick up the tracked error level.
if (!msg_buffer.empty()) {
level = block_level;
msg = msg_buffer + msg;
msg_buffer.clear();
}

msg_buffer.clear();
// We've gathered a single, entire, message: now output it line-by-line (if it's multi-line internally).
const char *s = msg.c_str();

if (!strncmp(s, "ERROR: ", 7)) {
s += 7;
if (level > T_LOG_ERROR)
level = T_LOG_ERROR;
} else if (!strncmp(s, "WARNING: ", 9)) {
s += 9;
if (level > T_LOG_WARN)
level = T_LOG_WARN;
}

switch (level) {
case T_LOG_ERROR:
spdlog::error(s);
break;
case T_LOG_WARN:
spdlog::warn(s);
break;
case T_LOG_INFO:
spdlog::info(s);
break;
case T_LOG_DEBUG:
default:
spdlog::debug(s);
break;
}

// reset next line log level to lowest possible:
block_level = T_LOG_DEBUG;
}

#endif

#define MAX_MSG_LEN 2048

// when we use tesseract as part of MuPDF (or mixed with it), we use the fz_error/fz_warn/fz_info APIs to
// output any error/info/debug messages and have the callbacks which MAY be registered with those APIs
Expand Down Expand Up @@ -126,9 +147,10 @@ const int tprintGetLevelElevation(void)

// Trace printf
void vTessPrint(int level, fmt::string_view format, fmt::format_args args) {
#ifdef HAVE_MUPDF
fz_tess_tprintf(level, format, args);
#else
assert_that_a_spdlog_sink_and_logger_are_active();

gather_and_log_a_single_tprintf_line(level, format, args);

const char *debug_file_name = debug_file.c_str();
static FILE *debugfp = nullptr; // debug file

Expand Down Expand Up @@ -158,7 +180,6 @@ void vTessPrint(int level, fmt::string_view format, fmt::format_args args) {
} else {
fmt::vprint(stderr, format, args);
}
#endif
}

} // namespace tesseract
17 changes: 12 additions & 5 deletions src/ccutil/tprintf.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ namespace tesseract {
// Note: You can disable some log messages by setting FLAGS_tlog_level > 0.

enum LogLevel : int {
T_LOG_ERROR,
//T_LOG_CRITICAL,
T_LOG_ERROR,
T_LOG_WARN,
T_LOG_INFO,
T_LOG_DEBUG,
T_LOG_TRACE,
};

// Helper function for tprintf.
Expand All @@ -44,22 +46,27 @@ extern TESS_API const int tprintGetLevelElevation(void);

template <typename S, typename... Args>
void tprintError(const S *format, Args &&...args) {
vTessPrint(T_LOG_ERROR + tprintGetLevelElevation(), format, fmt::make_format_args(args...));
vTessPrint(T_LOG_ERROR, format, fmt::make_format_args(args...));
}

template <typename S, typename... Args>
void tprintWarn(const S *format, Args &&...args) {
vTessPrint(T_LOG_WARN + tprintGetLevelElevation(), format, fmt::make_format_args(args...));
vTessPrint(T_LOG_WARN, format, fmt::make_format_args(args...));
}

template <typename S, typename... Args>
void tprintInfo(const S *format, Args &&...args) {
vTessPrint(T_LOG_INFO + tprintGetLevelElevation(), format, fmt::make_format_args(args...));
vTessPrint(T_LOG_INFO, format, fmt::make_format_args(args...));
}

template <typename S, typename... Args>
void tprintDebug(const S *format, Args &&...args) {
vTessPrint(T_LOG_DEBUG + tprintGetLevelElevation(), format, fmt::make_format_args(args...));
vTessPrint(T_LOG_DEBUG, format, fmt::make_format_args(args...));
}

template <typename S, typename... Args>
void tprintTrace(const S *format, Args &&...args) {
vTessPrint(T_LOG_TRACE, format, fmt::make_format_args(args...));
}

} // namespace tesseract
Expand Down
Loading

0 comments on commit 4c97f35

Please sign in to comment.