Skip to content
This repository has been archived by the owner on Aug 8, 2023. It is now read-only.

Commit

Permalink
Port raggedness-minimizing line breaking from gl-js.
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisLoer committed Dec 21, 2016
1 parent 9a6f81a commit a88357d
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 71 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"lodash": "^4.16.4",
"mapbox-gl": "mapbox/mapbox-gl-js#ab836206d415ca3a74257a3066d11a54ab2838cb",
"mapbox-gl-style-spec": "mapbox/mapbox-gl-style-spec#49e8b407bdbbe6f7c92dbcb56d3d51f425fc2653",
"mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1ada02a706fd124fc3441fd3a2b3cda67960ff5",
"mapbox-gl-test-suite": "mapbox/mapbox-gl-test-suite#e1208969a7855e524bd174bddf4e46f0bb697cb6",
"mkdirp": "^0.5.1",
"node-cmake": "^1.2.1",
"pixelmatch": "^4.0.2",
Expand Down
24 changes: 10 additions & 14 deletions platform/default/bidi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,8 @@ class BiDiImpl {
UBiDi* bidiLine = nullptr;
};

BiDi::BiDi() : impl(std::make_unique<BiDiImpl>())
{
}

BiDi::~BiDi() {
}
BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
BiDi::~BiDi() = default;

// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
// logical order. Output won't be intelligible until the bidirectional algorithm is applied
Expand Down Expand Up @@ -53,7 +49,7 @@ std::u16string applyArabicShaping(const std::u16string& input) {
return std::u16string(outputText.get(), outputLength);
}

void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
int32_t paragraphCount = ubidi_countParagraphs(impl->bidiText);
for (int32_t i = 0; i < paragraphCount; i++) {
UErrorCode errorCode = U_ZERO_ERROR;
Expand All @@ -65,20 +61,20 @@ void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
u_errorName(errorCode));
}

lineBreakPoints.insert(paragraphEndIndex);
lineBreakPoints.insert(static_cast<std::size_t>(paragraphEndIndex));
}
}

std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
// BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
// paragraph boundaries are included in the set of line break points. The calling code might not
// include the line break because it didn't need to wrap at that point, or because the text was
// separated with a more exotic code point such as (U+001C)
mergeParagraphLineBreaks(lineBreakPoints);

std::vector<std::u16string> transformedLines;
int32_t start = 0;
for (int32_t lineBreakPoint : lineBreakPoints) {
std::size_t start = 0;
for (std::size_t lineBreakPoint : lineBreakPoints) {
transformedLines.push_back(getLine(start, lineBreakPoint));
start = lineBreakPoint;
}
Expand All @@ -87,7 +83,7 @@ std::vector<std::u16string> BiDi::applyLineBreaking(std::set<int32_t> lineBreakP
}

std::vector<std::u16string> BiDi::processText(const std::u16string& input,
std::set<int32_t> lineBreakPoints) {
std::set<std::size_t> lineBreakPoints) {
UErrorCode errorCode = U_ZERO_ERROR;

ubidi_setPara(impl->bidiText, input.c_str(), static_cast<int32_t>(input.size()),
Expand All @@ -100,9 +96,9 @@ std::vector<std::u16string> BiDi::processText(const std::u16string& input,
return applyLineBreaking(lineBreakPoints);
}

std::u16string BiDi::getLine(int32_t start, int32_t end) {
std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setLine(impl->bidiText, start, end, impl->bidiLine, &errorCode);
ubidi_setLine(impl->bidiText, static_cast<int32_t>(start), static_cast<int32_t>(end), impl->bidiLine, &errorCode);

if (U_FAILURE(errorCode)) {
throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(errorCode));
Expand Down
19 changes: 9 additions & 10 deletions platform/qt/src/bidi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ std::u16string applyArabicShaping(const std::u16string& input) {
return utf16string.toStdU16String();
}

void BiDi::mergeParagraphLineBreaks(std::set<int32_t>& lineBreakPoints) {
lineBreakPoints.insert(bidi.impl->string.length());
void BiDi::mergeParagraphLineBreaks(std::set<std::size_t>& lineBreakPoints) {
lineBreakPoints.insert(static_cast<std::size_t>(bidi.impl->string.length()));
}

std::vector<std::u16string>
BiDi::applyLineBreaking(std::set<int32_t> lineBreakPoints) {
BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
mergeParagraphLineBreaks(lineBreakPoints);

std::vector<std::u16string> transformedLines;
int32_t start = 0;
for (int32_t lineBreakPoint : lineBreakPoints) {
std::size_t start = 0;
for (std::size_t lineBreakPoint : lineBreakPoints) {
transformedLines.push_back(bidi.getLine(start, lineBreakPoint));
start = lineBreakPoint;
}
Expand All @@ -38,16 +38,15 @@ BiDi::BiDi() : impl(std::make_unique<BiDiImpl>())
{
}

BiDi::~BiDi() {
}
BiDi::~BiDi() = default;

std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<int32_t> lineBreakPoints) {
std::vector<std::u16string> BiDi::processText(const std::u16string& input, std::set<std::size_t> lineBreakPoints) {
impl->string = QString::fromStdU16String(input);
return applyLineBreaking(lineBreakPoints);
}

std::u16string BiDi::getLine(int32_t start, int32_t end) {
return impl->string.mid(start, end - start).toStdU16String();
std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
return impl->string.mid(static_cast<int32_t>(start), static_cast<int32_t>(end - start)).toStdU16String();
}

} // end namespace mbgl
8 changes: 4 additions & 4 deletions src/mbgl/text/bidi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ class BiDi : private util::noncopyable {
BiDi();
~BiDi();

std::vector<std::u16string> processText(const std::u16string&, std::set<int32_t>);
std::vector<std::u16string> processText(const std::u16string&, std::set<std::size_t>);

private:
void mergeParagraphLineBreaks(std::set<int32_t>&);
std::vector<std::u16string> applyLineBreaking(std::set<int32_t>);
std::u16string getLine(int32_t start, int32_t end);
void mergeParagraphLineBreaks(std::set<std::size_t>&);
std::vector<std::u16string> applyLineBreaking(std::set<std::size_t>);
std::u16string getLine(std::size_t start, std::size_t end);

std::unique_ptr<BiDiImpl> impl;
};
Expand Down
142 changes: 102 additions & 40 deletions src/mbgl/text/glyph_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void align(Shaping& shaping,
const float verticalAlign,
const float maxLineLength,
const float lineHeight,
const uint32_t lineCount,
const std::size_t lineCount,
const Point<float>& translate) {
const float shiftX =
(justify - horizontalAlign) * maxLineLength + ::round(translate.x * 24 /* one em */);
Expand All @@ -80,8 +80,8 @@ void align(Shaping& shaping,
// justify left = 0, right = 1, center = .5
void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
const std::map<uint32_t, SDFGlyph>& sdfs,
uint32_t start,
uint32_t end,
std::size_t start,
std::size_t end,
float justify) {
if (!justify) {
return;
Expand All @@ -93,34 +93,104 @@ void justifyLine(std::vector<PositionedGlyph>& positionedGlyphs,
const uint32_t lastAdvance = it->second.metrics.advance;
const float lineIndent = float(glyph.x + lastAdvance) * justify;

for (uint32_t j = start; j <= end; j++) {
for (std::size_t j = start; j <= end; j++) {
positionedGlyphs[j].x -= lineIndent;
}
}
}

float GlyphSet::determineIdeographicLineWidth(const std::u16string& logicalInput,
float GlyphSet::determineAverageLineWidth(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const {
float totalWidth = 0;

// totalWidth doesn't include the last character for magical tuning reasons. This makes the
// algorithm a little more agressive about trying to fit the text into fewer lines, taking
// advantage of the tolerance for going a little over maxWidth
for (uint32_t i = 0; i < logicalInput.size() - 1; i++) {
auto it = sdfs.find(logicalInput[i]);
for (char16_t chr : logicalInput) {
auto it = sdfs.find(chr);
if (it != sdfs.end()) {
totalWidth += it->second.metrics.advance + spacing;
}
}

int32_t lineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
return totalWidth / lineCount;
int32_t targetLineCount = std::fmax(1, std::ceil(totalWidth / maxWidth));
return totalWidth / targetLineCount;
}

float calculateBadness(const float lineWidth, const float targetWidth, const float penalty, const bool isLastBreak) {
const float raggedness = std::pow(lineWidth - targetWidth, 2);
if (isLastBreak && lineWidth < targetWidth) {
// Be more tolerant of short final lines
return std::fmax(0, raggedness - 150);
}
if (penalty < 0) {
return raggedness - std::pow(penalty, 2);
}
return raggedness + std::pow(penalty, 2);
}

float calculatePenalty(char16_t codePoint, char16_t previousCodePoint) {
float penalty = 0;
// Force break on newline
if (codePoint == 0x0a) {
penalty -= 10000;
}
// Penalize open parenthesis at end of line
if (previousCodePoint && (previousCodePoint == 0x28 || previousCodePoint == 0xff08)) {
penalty += 50;
}
// Penalize close parenthesis at beginning of line
if (codePoint == 0x29 || codePoint == 0xff09) {
penalty += 50;
}
return penalty;
}

struct PotentialBreak {
PotentialBreak(const std::size_t p_index, const float p_x, const PotentialBreak* p_priorBreak, const float p_badness)
: index(p_index), x(p_x), priorBreak(p_priorBreak), badness(p_badness)
{}

const std::size_t index;
const float x;
const PotentialBreak* priorBreak;
const float badness;
};


PotentialBreak evaluateBreak(const std::size_t breakIndex, const float breakX, const float targetWidth, const std::list<PotentialBreak>& potentialBreaks, const float penalty, const bool isLastBreak) {
// We could skip evaluating breaks where the line length (breakX - priorBreak.x) > maxWidth
// ...but in fact we allow lines longer than maxWidth (if there's no break points)
// ...and when targetWidth and maxWidth are close, strictly enforcing maxWidth can give
// more lopsided results.

const PotentialBreak* bestPriorBreak = nullptr;
float bestBreakBadness = calculateBadness(breakX, targetWidth, penalty, isLastBreak);

for (const auto& potentialBreak : potentialBreaks) {
const float lineWidth = breakX - potentialBreak.x;
float breakBadness = calculateBadness(lineWidth, targetWidth, penalty, isLastBreak) + potentialBreak.badness;
if (breakBadness <= bestBreakBadness) {
bestPriorBreak = &potentialBreak;
bestBreakBadness = breakBadness;
}
}

return PotentialBreak(breakIndex, breakX, bestPriorBreak, bestBreakBadness);
}

std::set<std::size_t> leastBadBreaks(const PotentialBreak& lastLineBreak) {
std::set<std::size_t> leastBadBreaks = { lastLineBreak.index };
const PotentialBreak* priorBreak = lastLineBreak.priorBreak;
while (priorBreak) {
leastBadBreaks.insert(priorBreak->index);
priorBreak = priorBreak->priorBreak;
}
return leastBadBreaks;
}


// We determine line breaks based on shaped text in logical order. Working in visual order would be
// more intuitive, but we can't do that because the visual order may be changed by line breaks!
std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
std::set<std::size_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const {
if (!maxWidth) {
Expand All @@ -130,42 +200,34 @@ std::set<int32_t> GlyphSet::determineLineBreaks(const std::u16string& logicalInp
if (logicalInput.empty()) {
return {};
}

if (util::i18n::allowsIdeographicBreaking(logicalInput)) {
maxWidth = determineIdeographicLineWidth(logicalInput, spacing, maxWidth);
}

std::set<int32_t> lineBreakPoints;

const float targetWidth = determineAverageLineWidth(logicalInput, spacing, maxWidth);

std::list<PotentialBreak> potentialBreaks;
float currentX = 0;
uint32_t lastSafeBreak = 0;
float lastSafeBreakX = 0;

for (uint32_t i = 0; i < logicalInput.size(); i++) {
auto it = sdfs.find(logicalInput[i]);
for (std::size_t i = 0; i < logicalInput.size(); i++) {
const char16_t codePoint = logicalInput[i];
auto it = sdfs.find(codePoint);
if (it == sdfs.end()) {
continue;
}

const SDFGlyph& glyph = it->second;

// Ideographic characters, spaces, and word-breaking punctuation that often appear without
// surrounding spaces.
if (util::i18n::allowsWordBreaking(glyph.id) ||
util::i18n::allowsIdeographicBreaking(glyph.id)) {
lastSafeBreak = i;
lastSafeBreakX = currentX;
}
if (util::i18n::allowsWordBreaking(codePoint) ||
util::i18n::allowsIdeographicBreaking(codePoint)) {
const char16_t previousCodePoint = i > 0 ? logicalInput[i-1] : 0;

if (currentX > maxWidth && lastSafeBreak > 0) {
lineBreakPoints.insert(lastSafeBreak);
currentX -= lastSafeBreakX;
lastSafeBreakX = 0;
potentialBreaks.push_back(evaluateBreak(i, currentX, targetWidth, potentialBreaks,
calculatePenalty(codePoint, previousCodePoint),
false));
}

currentX += glyph.metrics.advance + spacing;
currentX += it->second.metrics.advance + spacing;
}

return lineBreakPoints;
return leastBadBreaks(evaluateBreak(logicalInput.size(), currentX, targetWidth, potentialBreaks, 0, true));
}

void GlyphSet::shapeLines(Shaping& shaping,
Expand Down Expand Up @@ -194,7 +256,7 @@ void GlyphSet::shapeLines(Shaping& shaping,
continue;
}

uint32_t lineStartIndex = static_cast<uint32_t>(shaping.positionedGlyphs.size());
std::size_t lineStartIndex = shaping.positionedGlyphs.size();
for (char16_t chr : line) {
auto it = sdfs.find(chr);
if (it == sdfs.end()) {
Expand All @@ -207,20 +269,20 @@ void GlyphSet::shapeLines(Shaping& shaping,
}

// Only justify if we placed at least one glyph
if (static_cast<uint32_t>(shaping.positionedGlyphs.size()) != lineStartIndex) {
if (shaping.positionedGlyphs.size() != lineStartIndex) {
float lineLength = x - spacing; // Don't count trailing spacing
maxLineLength = util::max(lineLength, maxLineLength);

justifyLine(shaping.positionedGlyphs, sdfs, lineStartIndex,
static_cast<uint32_t>(shaping.positionedGlyphs.size()) - 1, justify);
shaping.positionedGlyphs.size() - 1, justify);
}

x = 0;
y += lineHeight;
}

align(shaping, justify, horizontalAlign, verticalAlign, maxLineLength, lineHeight,
static_cast<uint32_t>(lines.size()), translate);
lines.size(), translate);
const uint32_t height = lines.size() * lineHeight;

// Calculate the bounding box
Expand Down
4 changes: 2 additions & 2 deletions src/mbgl/text/glyph_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ class GlyphSet {
BiDi& bidi) const;

private:
float determineIdeographicLineWidth(const std::u16string& logicalInput,
float determineAverageLineWidth(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const;
std::set<int32_t> determineLineBreaks(const std::u16string& logicalInput,
std::set<std::size_t> determineLineBreaks(const std::u16string& logicalInput,
const float spacing,
float maxWidth) const;

Expand Down
2 changes: 2 additions & 0 deletions src/mbgl/util/i18n.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ bool allowsWordBreaking(uint16_t chr) {
return (chr == 0x0a /* newline */
|| chr == 0x20 /* space */
|| chr == 0x26 /* ampersand */
|| chr == 0x28 /* open parenthesis */
|| chr == 0x29 /* close parenthesis */
|| chr == 0x2b /* plus sign */
|| chr == 0x2d /* hyphen-minus */
|| chr == 0x2f /* solidus */
Expand Down

0 comments on commit a88357d

Please sign in to comment.