-
Notifications
You must be signed in to change notification settings - Fork 9.7k
/
Copy paththresholder.cpp
330 lines (303 loc) · 11.4 KB
/
thresholder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
///////////////////////////////////////////////////////////////////////
// File: thresholder.cpp
// Description: Base API for thresholding images in tesseract.
// Author: Ray Smith
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include <allheaders.h>
#include <tesseract/thresholder.h>
#include <cstdint> // for uint32_t
#include <cstring>
#include "otsuthr.h"
#include "tprintf.h" // for tprintf
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
namespace tesseract {
ImageThresholder::ImageThresholder()
: pix_(nullptr)
, image_width_(0)
, image_height_(0)
, pix_channels_(0)
, pix_wpl_(0)
, scale_(1)
, yres_(300)
, estimated_res_(300) {
SetRectangle(0, 0, 0, 0);
}
ImageThresholder::~ImageThresholder() {
Clear();
}
// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
pixDestroy(&pix_);
}
// Return true if no image has been set.
bool ImageThresholder::IsEmpty() const {
return pix_ == nullptr;
}
// SetImage makes a copy of all the image data, so it may be deleted
// immediately after this call.
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
// Palette color images will not work properly and must be converted to
// 24 bit.
// Binary images of 1 bit per pixel may also be given but they must be
// byte packed with the MSB of the first byte being the first pixel, and a
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line) {
int bpp = bytes_per_pixel * 8;
if (bpp == 0)
bpp = 1;
Pix *pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
l_uint32 *data = pixGetData(pix);
int wpl = pixGetWpl(pix);
switch (bpp) {
case 1:
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
for (int x = 0; x < width; ++x) {
if (imagedata[x / 8] & (0x80 >> (x % 8)))
CLEAR_DATA_BIT(data, x);
else
SET_DATA_BIT(data, x);
}
}
break;
case 8:
// Greyscale just copies the bytes in the right order.
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
for (int x = 0; x < width; ++x)
SET_DATA_BYTE(data, x, imagedata[x]);
}
break;
case 24:
// Put the colors in the correct places in the line buffer.
for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
for (int x = 0; x < width; ++x, ++data) {
SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
}
}
break;
case 32:
// Maintain byte order consistency across different endianness.
for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
for (int x = 0; x < width; ++x) {
data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
(imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
}
}
break;
default:
tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
}
SetImage(pix);
pixDestroy(&pix);
}
// Store the coordinates of the rectangle to process for later use.
// Doesn't actually do any thresholding.
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
rect_left_ = left;
rect_top_ = top;
rect_width_ = width;
rect_height_ = height;
}
// Get enough parameters to be able to rebuild bounding boxes in the
// original image (not just within the rectangle).
// Left and top are enough with top-down coordinates, but
// the height of the rectangle and the image are needed for bottom-up.
void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
int *imageheight) {
*left = rect_left_;
*top = rect_top_;
*width = rect_width_;
*height = rect_height_;
*imagewidth = image_width_;
*imageheight = image_height_;
}
// Pix vs raw, which to use? Pix is the preferred input for efficiency,
// since raw buffers are copied.
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
// immediately after, but may not go away until after the Thresholder has
// finished with it.
void ImageThresholder::SetImage(const Pix *pix) {
if (pix_ != nullptr)
pixDestroy(&pix_);
Pix *src = const_cast<Pix *>(pix);
int depth;
pixGetDimensions(src, &image_width_, &image_height_, &depth);
// Convert the image as necessary so it is one of binary, plain RGB, or
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
// not just a clone of the input.
if (pixGetColormap(src)) {
Pix *tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
depth = pixGetDepth(tmp);
if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(tmp, false);
pixDestroy(&tmp);
} else {
pix_ = tmp;
}
} else if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(src, false);
} else {
pix_ = pixCopy(nullptr, src);
}
depth = pixGetDepth(pix_);
pix_channels_ = depth / 8;
pix_wpl_ = pixGetWpl(pix_);
scale_ = 1;
estimated_res_ = yres_ = pixGetYRes(pix_);
Init();
}
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
return false;
}
if (pix_channels_ == 0) {
// We have a binary image, but it still has to be copied, as this API
// allows the caller to modify the output.
Pix *original = GetPixRect();
*pix = pixCopy(nullptr, original);
pixDestroy(&original);
} else {
OtsuThresholdRectToPix(pix_, pix);
}
return true;
}
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
// to the extreme of a 1x1 pixel thresholds image.
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns nullptr if the input is binary. PixDestroy after use.
Pix *ImageThresholder::GetPixRectThresholds() {
if (IsBinary())
return nullptr;
Pix *pix_grey = GetPixRectGrey();
int width = pixGetWidth(pix_grey);
int height = pixGetHeight(pix_grey);
int *thresholds;
int *hi_values;
OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
pixDestroy(&pix_grey);
Pix *pix_thresholds = pixCreate(width, height, 8);
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
pixSetAllArbitrary(pix_thresholds, threshold);
delete[] thresholds;
delete[] hi_values;
return pix_thresholds;
}
// Common initialization shared between SetImage methods.
void ImageThresholder::Init() {
SetRectangle(0, 0, image_width_, image_height_);
}
// Get a clone/copy of the source image rectangle.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
Pix *ImageThresholder::GetPixRect() {
if (IsFullImage()) {
// Just clone the whole thing.
return pixClone(pix_);
} else {
// Crop to the given rectangle.
Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
Pix *cropped = pixClipRectangle(pix_, box, nullptr);
boxDestroy(&box);
return cropped;
}
}
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
Pix *ImageThresholder::GetPixRectGrey() {
auto pix = GetPixRect(); // May have to be reduced to grey.
int depth = pixGetDepth(pix);
if (depth != 8) {
if (depth == 24) {
auto tmp = pixConvert24To32(pix);
pixDestroy(&pix);
pix = tmp;
}
auto result = pixConvertTo8(pix, false);
pixDestroy(&pix);
return result;
}
return pix;
}
// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const {
int *thresholds;
int *hi_values;
int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
&thresholds, &hi_values);
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
OpenclDevice od;
if (num_channels == 4 && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
od.ThresholdRectToPixOCL((unsigned char *)pixGetData(src_pix), num_channels,
pixGetWpl(src_pix) * 4, thresholds, hi_values, out_pix /*pix_OCL*/,
rect_height_, rect_width_, rect_top_, rect_left_);
} else {
#endif
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
#ifdef USE_OPENCL
}
#endif
delete[] thresholds;
delete[] hi_values;
}
/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ImageThresholder::ThresholdRectToPix(Pix *src_pix, int num_channels, const int *thresholds,
const int *hi_values, Pix **pix) const {
*pix = pixCreate(rect_width_, rect_height_, 1);
uint32_t *pixdata = pixGetData(*pix);
int wpl = pixGetWpl(*pix);
int src_wpl = pixGetWpl(src_pix);
uint32_t *srcdata = pixGetData(src_pix);
pixSetXRes(*pix, pixGetXRes(src_pix));
pixSetYRes(*pix, pixGetYRes(src_pix));
for (int y = 0; y < rect_height_; ++y) {
const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl;
uint32_t *pixline = pixdata + y * wpl;
for (int x = 0; x < rect_width_; ++x) {
bool white_result = true;
for (int ch = 0; ch < num_channels; ++ch) {
int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
white_result = false;
break;
}
}
if (white_result)
CLEAR_DATA_BIT(pixline, x);
else
SET_DATA_BIT(pixline, x);
}
}
}
} // namespace tesseract.