Skip to content

Commit

Permalink
Merge branch 'bugger' into dev
Browse files Browse the repository at this point in the history
# Conflicts:
#	include/tesseract/tprintf.h
#	src/api/baseapi.cpp
#	src/ccmain/control.cpp
#	src/ccmain/paramsd.cpp
#	src/ccmain/tesseractclass.cpp
#	src/ccmain/tesseractclass.h
#	src/ccstruct/debugpixa.cpp
#	src/ccstruct/pageres.cpp
#	src/ccutil/ambigs.cpp
#	src/ccutil/errcode.h
#	src/ccutil/params.cpp
#	src/ccutil/params.h
#	src/ccutil/unicharset.cpp
#	src/lstm/lstmrecognizer.h
#	src/tesseract.cpp
#	src/training/wordlist2dawg.cpp
  • Loading branch information
GerHobbelt committed Jul 9, 2024
2 parents da9d4d1 + 8dc3cdd commit f05dcf2
Show file tree
Hide file tree
Showing 335 changed files with 3,143 additions and 1,142 deletions.
16 changes: 11 additions & 5 deletions include/tesseract/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_

#ifdef HAVE_TESSERACT_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include <tesseract/preparation.h> // compiler config, etc.

#include "export.h"
#include "pageiterator.h"
Expand Down Expand Up @@ -66,8 +64,12 @@ class Tesseract;
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);

using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
enum PermuterType : int;
// function prototype:
// PermuterType Dict::letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end)
using DictFunc = PermuterType (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;

using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);

Expand Down Expand Up @@ -287,7 +289,11 @@ class TESS_API TessBaseAPI {
bool GetVariableAsString(const char *name, std::string *val) const;

/**
* Take all the internally gathered diagnostics data (including the tprintError/Warn/Info/Debug/Trace messages issued thus far, plus all collected image snapshots representing the intermediate state of the tesseract process at that time) and produce a HTML report from it for human consumption.
* Take all the internally gathered diagnostics data (including the
* tprintError/Warn/Info/Debug/Trace messages issued thus far, plus all
* collected image snapshots representing the intermediate state of the
* tesseract process at that time) and produce a HTML report from it
* for human consumption.
*/
void FinalizeAndWriteDiagnosticsReport(); // --> ReportDebugInfo()

Expand Down
4 changes: 1 addition & 3 deletions include/tesseract/fmt-support.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
#ifndef TESSERACT_FMT_SUPPORT_H_
#define TESSERACT_FMT_SUPPORT_H_

#ifdef HAVE_TESSERACT_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include <tesseract/preparation.h> // compiler config, etc.

#include <fmt/base.h>
#include <fmt/format.h>
Expand Down
2 changes: 1 addition & 1 deletion include/tesseract/ltrresultiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class TESS_API LTRResultIterator : public PageIterator {
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);

~LTRResultIterator() override;
virtual ~LTRResultIterator() override;

// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
Expand Down
17 changes: 17 additions & 0 deletions include/tesseract/preparation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

#define _USE_MATH_DEFINES // for M_PI, when you load math.h

// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_TESSERACT_CONFIG_H
# include "config_auto.h"
#endif

#if defined(_MSC_VER)
#include <crtdbg.h>
#if 0
#include <winsock2.h>
#include <windows.h>
#endif
#endif

#include <tesseract/debugheap.h>
2 changes: 1 addition & 1 deletion include/tesseract/resultiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
virtual ~ResultIterator() override = default;

// ============= Moving around within the page ============.
/**
Expand Down
4 changes: 3 additions & 1 deletion src/api/altorenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string

Expand Down Expand Up @@ -189,6 +190,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
continue;
case PT_NOISE:
ASSERT_HOST_MSG(false, "TODO: Please report image which triggers the noise case.\n");
break;
default:
break;
}
Expand Down
33 changes: 13 additions & 20 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@
*
**********************************************************************/

#define _USE_MATH_DEFINES // for M_PI

// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_TESSERACT_CONFIG_H
# include "config_auto.h"
#endif
#include <tesseract/preparation.h> // compiler config, etc.

#include <tesseract/debugheap.h>
#include "boxword.h" // for BoxWord
Expand Down Expand Up @@ -264,23 +260,10 @@ TessBaseAPI::TessBaseAPI()
rect_height_(0),
image_width_(0),
image_height_(0) {
// make sure the debug_all preset is set up BEFORE any command-line arguments
// direct tesseract to set some arbitrary parameters just below,
// for otherwise those `-c xyz=v` commands may be overruled by the
// debug_all preset!
debug_all.set_on_modify_handler([this](decltype(debug_all) &target,
const int32_t old_value,
int32_t &new_value,
const int32_t default_value,
ParamSetBySourceType source_type,
ParamPtr optional_setter) {
this->SetupDebugAllPreset();
});
}

TessBaseAPI::~TessBaseAPI() {
End();
debug_all.set_on_modify_handler(0);
}

/**
Expand Down Expand Up @@ -1996,7 +1979,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename,
} else {
p1 = GetInputImage();
}
tess.AddPixDebugPage(p1, fmt::format("(normalized) image to process @ graynorm_mode = {}", graynorm_mode));
tess.AddPixDebugPage(p1, fmt::format("Greyscale normalized image to process @ graynorm_mode = {}", graynorm_mode));
}
}

Expand Down Expand Up @@ -3004,7 +2987,17 @@ bool TessBaseAPI::Threshold(Pix **pix) {
const char *sequence = "c1.1 + d3.3";
const int dispsep = 0;
Image pix_post = pixMorphSequence(pix_binary, sequence, dispsep);
tesseract_->AddPixCompedOverOrigDebugPage(pix_post, fmt::format("Otsu (tesseract) : post-processed: {} (just an example to showcase what leptonica can do for us!)", sequence));
tesseract_->AddPixCompedOverOrigDebugPage(pix_post, fmt::format("Otsu (tesseract) : post-processed: {} -- just an example to showcase what leptonica can do for us!", sequence));

l_int32 w, h, d;
Image composite = tesseract_->pix_grey().copy();
pixGetDimensions(composite, &w, &h, &d);
Image mask = pixConvert1To8(nullptr, pix_post, 255, 0);
pixRasterop(composite, 0, 0, w, h, PIX_PAINT, mask, 0, 0);
tesseract_->AddPixCompedOverOrigDebugPage(composite, fmt::format("post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", sequence));

mask.destroy();
composite.destroy();
pix_post.destroy();
}
} else {
Expand Down
3 changes: 2 additions & 1 deletion src/api/capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
//
///////////////////////////////////////////////////////////////////////

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include <tesseract/capi.h>


Expand Down
4 changes: 3 additions & 1 deletion src/api/hocrrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
*
**********************************************************************/

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <locale> // for std::locale::classic
#include <memory> // for std::unique_ptr
Expand Down Expand Up @@ -194,6 +195,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
continue;
case PT_NOISE:
ASSERT_HOST_MSG(false, "TODO: Please report image which triggers the noise case.\n");
break;
default:
break;
}
Expand Down
3 changes: 2 additions & 1 deletion src/api/lstmboxrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
*
**********************************************************************/

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
Expand Down
18 changes: 11 additions & 7 deletions src/api/pagerenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
// limitations under the License.
**********************************************************************/

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#include <tesseract/baseapi.h> // for TessBaseAPI
Expand Down Expand Up @@ -807,6 +808,7 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
continue;
case PT_NOISE:
ASSERT_HOST_MSG(false, "TODO: Please report image which triggers the noise case.\n");
break;
default:
break;
}
Expand Down Expand Up @@ -927,8 +929,10 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {

if (LEVELFLAG > 0 || (POLYGONFLAG && !skewed_flag)) {
// Sort wordpolygons
word_top_pts = RecalcPolygonline(word_top_pts, 1 - ttb_flag);
word_bottom_pts = RecalcPolygonline(word_bottom_pts, 0 + ttb_flag);
//
// warning C4800: Implicit conversion from 'int' to bool. Possible information loss
word_top_pts = RecalcPolygonline(word_top_pts, !ttb_flag);
word_bottom_pts = RecalcPolygonline(word_bottom_pts, ttb_flag);

// AppendLinePolygon
AppendLinePolygon(line_top_ltr_pts, line_top_rtl_pts, word_top_pts,
Expand Down Expand Up @@ -1010,13 +1014,13 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
}
if ((POLYGONFLAG && !skewed_flag) || LEVELFLAG > 0) {
// Recalc Polygonlines
line_top_ltr_pts = RecalcPolygonline(line_top_ltr_pts, 1 - ttb_flag);
line_top_ltr_pts = RecalcPolygonline(line_top_ltr_pts, !ttb_flag);
line_bottom_ltr_pts =
RecalcPolygonline(line_bottom_ltr_pts, 0 + ttb_flag);
RecalcPolygonline(line_bottom_ltr_pts, ttb_flag);

// Smooth the polygonline
SimplifyLinePolygon(line_top_ltr_pts, 5, 1 - ttb_flag);
SimplifyLinePolygon(line_bottom_ltr_pts, 5, 0 + ttb_flag);
SimplifyLinePolygon(line_top_ltr_pts, 5, !ttb_flag);
SimplifyLinePolygon(line_bottom_ltr_pts, 5, ttb_flag);

// Fit linepolygon matching the baselinepoints
line_baseline_pts = SortBaseline(line_baseline_pts, writing_direction);
Expand Down
6 changes: 1 addition & 5 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
///////////////////////////////////////////////////////////////////////

// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_TESSERACT_CONFIG_H
# include "config_auto.h"
#endif

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include "pdf_ttf.h"
#include <tesseract/tprintf.h>
Expand Down
7 changes: 2 additions & 5 deletions src/api/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
//
///////////////////////////////////////////////////////////////////////

#ifdef HAVE_TESSERACT_CONFIG_H
# include "config_auto.h"
#endif
#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
#include <cstring>
Expand Down Expand Up @@ -136,7 +133,7 @@ void TessResultRenderer::AppendData(const char *s, int len) {
if (!tesseract::Serialize(fout_, s, len)) {
happy_ = false;
}
fflush(fout_);
//fflush(fout_); -- only slows down performance.
}

bool TessResultRenderer::BeginDocumentHandler() {
Expand Down
3 changes: 2 additions & 1 deletion src/api/wordstrboxrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
*
**********************************************************************/

#include <tesseract/debugheap.h>
#include <tesseract/preparation.h> // compiler config, etc.

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
Expand Down
2 changes: 2 additions & 0 deletions src/arch/dotproduct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "dotproduct.h"

namespace tesseract {
Expand Down
2 changes: 2 additions & 0 deletions src/arch/dotproductavx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "dotproduct.h"
#include "intsimdmatrix.h"

Expand Down
2 changes: 2 additions & 0 deletions src/arch/dotproductavx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

// General Notice:
//
// This is not about whether the compiler is optimizing **the rest of your code using FMA instructions**.
Expand Down
2 changes: 2 additions & 0 deletions src/arch/dotproductfma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "dotproduct.h"

// General Notice:
Expand Down
2 changes: 2 additions & 0 deletions src/arch/dotproductneon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "dotproduct.h"
#include "tesstypes.h"

Expand Down
2 changes: 2 additions & 0 deletions src/arch/dotproductsse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "dotproduct.h"

// General Notice:
Expand Down
2 changes: 2 additions & 0 deletions src/arch/intsimdmatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "intsimdmatrix.h"
#include "matrix.h" // for GENERIC_2D_ARRAY
#include "simddetect.h" // for SIMDDetect
Expand Down
2 changes: 2 additions & 0 deletions src/arch/intsimdmatrixavx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "intsimdmatrix.h"

// General Notice:
Expand Down
2 changes: 2 additions & 0 deletions src/arch/intsimdmatrixavx512vnni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "intsimdmatrix.h"

// General Notice:
Expand Down
2 changes: 2 additions & 0 deletions src/arch/intsimdmatrixneon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include <tesseract/preparation.h> // compiler config, etc.

#include "intsimdmatrix.h"
#include "tesstypes.h"

Expand Down
Loading

0 comments on commit f05dcf2

Please sign in to comment.