-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathtextlineprojection.h
206 lines (178 loc) · 10.3 KB
/
textlineprojection.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: [email protected] (Ray Smith)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
#include "blobgrid.h" // For BlobGrid
class DENORM;
struct Pix;
struct TPOINT;
namespace tesseract {
class ColPartition;
// Simple class to encapsulate the computation of an image representing
// local textline density, and function(s) to make use of it.
// The underlying principle is that if you smear connected components
// horizontally (vertically for components on a vertically written textline)
// and count the number of smeared components in an image, then the resulting
// image shows the density of the textlines at each image position.
class TextlineProjection {
public:
// The down-scaling factor is computed to obtain a projection resolution
// of about 100 dpi, whatever the input.
explicit TextlineProjection(int resolution);
~TextlineProjection();
// Build the projection profile given the input_block containing lists of
// blobs, a rotation to convert to image coords,
// and a full-resolution nontext_map, marking out areas to avoid.
// During construction, we have the following assumptions:
// The rotation is a multiple of 90 degrees, ie no deskew yet.
// The blobs have had their left and right rules set to also limit
// the range of projection.
void ConstructProjection(TO_BLOCK* input_block,
const FCOORD& rotation, Pix* nontext_map);
// Display the blobs in the window colored according to textline quality.
void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
// Moves blobs that look like they don't sit well on a textline from the
// input blobs list to the output small_blobs list.
// This gets them away from initial textline finding to stop diacritics
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
BLOBNBOX_LIST* small_blobs) const;
// Create a window and display the projection in it.
void DisplayProjection() const;
// Compute the distance of the box from the partition using curved projection
// space. As DistanceOfBoxFromBox, except that the direction is taken from
// the ColPartition and the median bounds of the ColPartition are used as
// the to_box.
int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
const DENORM* denorm, bool debug) const;
// Compute the distance from the from_box to the to_box using curved
// projection space. Separation that involves a decrease in projection
// density (moving from the from_box to the to_box) is weighted more heavily
// than constant density, and an increase is weighted less.
// If horizontal_textline is true, then curved space is used vertically,
// as for a diacritic on the edge of a textline.
// The projection uses original image coords, so denorm is used to get
// back to the image coords from box/part space.
int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
bool horizontal_textline,
const DENORM* denorm, bool debug) const;
// Compute the distance between (x, y1) and (x, y2) using the rule that
// a decrease in textline density is weighted more heavily than an increase.
// The coordinates are in source image space, ie processed by any denorm
// already, but not yet scaled by scale_factor_.
// Going from the outside of a textline to the inside should measure much
// less distance than going from the inside of a textline to the outside.
int VerticalDistance(bool debug, int x, int y1, int y2) const;
// Compute the distance between (x1, y) and (x2, y) using the rule that
// a decrease in textline density is weighted more heavily than an increase.
int HorizontalDistance(bool debug, int x1, int x2, int y) const;
// Returns true if the blob appears to be outside of a horizontal textline.
// Such blobs are potentially diacritics (even if large in Thai) and should
// be kept away from initial textline finding.
bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
bool debug) const;
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
// but uses the median top/bottom for horizontal and median left/right for
// vertical instead of the bounding box edges.
// Evaluates for both horizontal and vertical and returns the best result,
// with a positive value for horizontal and a negative value for vertical.
int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
bool debug) const;
// Computes the mean projection gradients over the horizontal and vertical
// edges of the box:
// -h-h-h-h-h-h
// |------------| mean=htop -v|+v--------+v|-v
// |+h+h+h+h+h+h| -v|+v +v|-v
// | | -v|+v +v|-v
// | box | -v|+v box +v|-v
// | | -v|+v +v|-v
// |+h+h+h+h+h+h| -v|+v +v|-v
// |------------| mean=hbot -v|+v--------+v|-v
// -h-h-h-h-h-h
// mean=vleft mean=vright
//
// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
// for a horizontal textline, a negative number for a vertical textline,
// and near zero for undecided. Undecided is most likely non-text.
int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
private:
// Internal version of EvaluateBox returns the unclipped gradients as well
// as the result of EvaluateBox.
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
int* hgrad1, int* hgrad2,
int* vgrad1, int* vgrad2) const;
// Helper returns the mean gradient value for the horizontal row at the given
// y, (in the external coordinates) by subtracting the mean of the transformed
// row 2 pixels above from the mean of the transformed row 2 pixels below.
// This gives a positive value for a good top edge and negative for bottom.
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
int BestMeanGradientInRow(const DENORM* denorm, inT16 min_x, inT16 max_x,
inT16 y, bool best_is_max) const;
// Helper returns the mean gradient value for the vertical column at the
// given x, (in the external coordinates) by subtracting the mean of the
// transformed column 2 pixels left from the mean of the transformed column
// 2 pixels to the right.
// This gives a positive value for a good left edge and negative for right.
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
int BestMeanGradientInColumn(const DENORM* denorm, inT16 x, inT16 min_y,
inT16 max_y, bool best_is_max) const;
// Helper returns the mean pixel value over the line between the start_pt and
// end_pt (inclusive), but shifted perpendicular to the line in the projection
// image by offset pixels. For simplicity, it is assumed that the vector is
// either nearly horizontal or nearly vertical. It works on skewed textlines!
// The end points are in external coordinates, and will be denormalized with
// the denorm if not NULL before further conversion to pix coordinates.
// After all the conversions, the offset is added to the direction
// perpendicular to the line direction. The offset is thus in projection image
// coordinates, which allows the caller to get a guaranteed displacement
// between pixels used to calculate gradients.
int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
TPOINT start_pt, TPOINT end_pt) const;
// Helper function to add 1 to a rectangle in source image coords to the
// internal projection pix_.
void IncrementRectangle8Bit(const TBOX& box);
// Inserts a list of blobs into the projection.
// Rotation is a multiple of 90 degrees to get from blob coords to
// nontext_map coords, image_box is the bounds of the nontext_map.
// Blobs are spread horizontally or vertically according to their internal
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
const TBOX& image_box, Pix* nontext_map);
// Pads the bounding box of the given blob according to whether it is on
// a horizontal or vertical text line, taking into account tab-stops near
// the blob. Returns true if padding was in the horizontal direction.
bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
// Helper denormalizes the TPOINT with the denorm if not NULL, then
// converts to pix_ coordinates.
void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
// Helper truncates the TPOINT to be within the pix_.
void TruncateToImageBounds(TPOINT* pt) const;
// Transform tesseract coordinates to coordinates used in the pix.
int ImageXToProjectionX(int x) const;
int ImageYToProjectionY(int y) const;
// The down-sampling scale factor used in building the image.
int scale_factor_;
// The blob coordinates of the top-left (origin of the pix_) in tesseract
// coordinates. Used to transform the bottom-up tesseract coordinates to
// the top-down coordinates of the pix.
int x_origin_;
int y_origin_;
// The image of horizontally smeared blob boxes summed to provide a
// textline density map. As with a horizontal projection, the map has
// dips in the gaps between textlines.
Pix* pix_;
};
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_